深入探究C++虚函数表
以下讨论均基于Windows x64平台MSVC v14.42.34433环境。
关于虚函数表
众所周知,虚函数主要被用来实现多态机制,即通过基类指针或引用访问虚函数时,被访问的必须是该指针或引用所指对象的动态类型中该函数的最终重写版本。简而言之,运行时调用哪个函数,取决于对象的真实类型,而不是指针或引用的静态类型。
需要注意的是,C++标准规定了虚函数的行为,但并没有规定具体实现(C++标准在很多方面都只定义“做什么”(what),而不规定“怎么做”(how))。而虚函数表则是各大主流编译器都采用的一种成熟有效的实现方案。
单继承下的虚函数表
派生类未重写基类的虚函数
测试代码:
#include <iostream>
using namespace std;
class Base {
public:
virtual void bFunc1() { cout << "Base::bFunc1()" << endl; }
virtual void bFunc2() { cout << "Base::bFunc2()" << endl; }
virtual void bFunc3() { cout << "Base::bFunc3()" << endl; }
};
class Derived : public Base {
public:
virtual void dFunc1() { cout << "Derived::dFunc1()" << endl; }
};
int main() {
Derived obj;
cout << "sizeof(Derived) = " << sizeof(Derived) << endl;
((void (*)()) *((uintptr_t *) *(uintptr_t *) &obj + 0))();
((void (*)()) *((uintptr_t *) *(uintptr_t *) &obj + 1))();
((void (*)()) *((uintptr_t *) *(uintptr_t *) &obj + 2))();
((void (*)()) *((uintptr_t *) *(uintptr_t *) &obj + 3))();
return 0;
}
执行输出:
sizeof(Derived) = 8
Base::bFunc1()
Base::bFunc2()
Base::bFunc3()
Derived::dFunc1()
查看class Base
和class Drived
内存布局:
class Base size(8):
+---
0 | {vfptr}
+---
Base::$vftable@:
| &Base_meta
| 0
0 | &Base::bFunc1
1 | &Base::bFunc2
2 | &Base::bFunc3
class Derived size(8):
+---
0 | +--- (base class Base)
0 | | {vfptr}
| +---
+---
Derived::$vftable@:
| &Derived_meta
| 0
0 | &Base::bFunc1
1 | &Base::bFunc2
2 | &Base::bFunc3
3 | &Derived::dFunc1
可以发现验证了代码输出,派生类的虚表指针{vfptr}
位于起始地址,虚函数表中,先是基类虚函数按声明顺序依次排列,然后派生类虚函数紧随其后。
派生类重写基类的虚函数
测试代码:
#include <iostream>
using namespace std;
class Base {
public:
virtual void bFunc1() { cout << "Base::bFunc1()" << endl; }
virtual void bFunc2() { cout << "Base::bFunc2()" << endl; }
virtual void bFunc3() { cout << "Base::bFunc3()" << endl; }
};
class Derived : public Base {
public:
virtual void dFunc1() { cout << "Derived::dFunc1()" << endl; }
virtual void bFunc2() { cout << "Derived::bFunc2()" << endl; }
};
int main() {
Derived obj;
cout << "sizeof(Derived) = " << sizeof(Derived) << endl;
((void (*)()) *((uintptr_t *) *(uintptr_t *) &obj + 0))();
((void (*)()) *((uintptr_t *) *(uintptr_t *) &obj + 1))();
((void (*)()) *((uintptr_t *) *(uintptr_t *) &obj + 2))();
((void (*)()) *((uintptr_t *) *(uintptr_t *) &obj + 3))();
return 0;
}
执行输出:
sizeof(Derived) = 8
Base::bFunc1()
Derived::bFunc2()
Base::bFunc3()
Derived::dFunc1()
查看class Base和class Derived内存布局:
class Base size(8):
+---
0 | {vfptr}
+---
Base::$vftable@:
| &Base_meta
| 0
0 | &Base::bFunc1
1 | &Base::bFunc2
2 | &Base::bFunc3
class Derived size(8):
+---
0 | +--- (base class Base)
0 | | {vfptr}
| +---
+---
Derived::$vftable@:
| &Derived_meta
| 0
0 | &Base::bFunc1
1 | &Derived::bFunc2
2 | &Base::bFunc3
3 | &Derived::dFunc1
可以发现对于派生类重写的虚函数,在派生类的虚函数表中会把原本基类的虚函数直接替换掉。
多继承下的虚函数表
派生类未重写基类的虚函数
测试代码:
#include <iostream>
using namespace std;
class Base1 {
public:
virtual void b1Func1() { cout << "Base1::b1Func1()" << endl; }
virtual void b1Func2() { cout << "Base1::b1Func2()" << endl; }
virtual void b1Func3() { cout << "Base1::b1Func3()" << endl; }
private:
int b1Data1 = 0;
};
class Base2 {
public:
virtual void b2Func1() { cout << "Base2::b2Func1()" << endl; }
virtual void b2Func2() { cout << "Base2::b2Func2()" << endl; }
virtual void b2Func3() { cout << "Base2::b2Func3()" << endl; }
private:
int b2Data1 = 0;
};
class Base3 {
public:
virtual void b3Func1() { cout << "Base3::b3Func1()" << endl; }
virtual void b3Func2() { cout << "Base3::b3Func2()" << endl; }
virtual void b3Func3() { cout << "Base3::b3Func3()" << endl; }
private:
int b3Data1 = 0;
};
class Derived : public Base1, public Base2, public Base3 {
public:
virtual void dFunc1() { cout << "Derived::dFunc1()" << endl; }
private:
int d1Data1 = 0;
};
int main() {
Derived obj;
cout << "sizeof(Derived) = " << sizeof(Derived) << endl;
uintptr_t *vftableBase1 = (uintptr_t *) *(uintptr_t *)((uint8_t *) &obj + 0);
((void (*)()) *(vftableBase1 + 0))();
((void (*)()) *(vftableBase1 + 1))();
((void (*)()) *(vftableBase1 + 2))();
((void (*)()) *(vftableBase1 + 3))();
uintptr_t *vftableBase2 = (uintptr_t *) *(uintptr_t *)((uint8_t *) &obj + 16);
((void (*)()) *(vftableBase2 + 0))();
((void (*)()) *(vftableBase2 + 1))();
((void (*)()) *(vftableBase2 + 2))();
uintptr_t *vftableBase3 = (uintptr_t *) *(uintptr_t *)((uint8_t *) &obj + 32);
((void (*)()) *(vftableBase3 + 0))();
((void (*)()) *(vftableBase3 + 1))();
((void (*)()) *(vftableBase3 + 2))();
return 0;
}
多继承要复杂一点,为了更复杂一点,上述测试代码还给每个类增加了一个成员变量,先看class Derived
的内存布局:
class Derived size(56):
+---
0 | +--- (base class Base1)
0 | | {vfptr}
8 | | b1Data1
| | <alignment member> (size=4)
| +---
16 | +--- (base class Base2)
16 | | {vfptr}
24 | | b2Data1
| | <alignment member> (size=4)
| +---
32 | +--- (base class Base3)
32 | | {vfptr}
40 | | b3Data1
| | <alignment member> (size=4)
| +---
48 | d1Data1
| <alignment member> (size=4)
+---
Derived::$vftable@Base1@:
| &Derived_meta
| 0
0 | &Base1::b1Func1
1 | &Base1::b1Func2
2 | &Base1::b1Func3
3 | &Derived::dFunc1
Derived::$vftable@Base2@:
| -16
0 | &Base2::b2Func1
1 | &Base2::b2Func2
2 | &Base2::b2Func3
Derived::$vftable@Base3@:
| -32
0 | &Base3::b3Func1
1 | &Base3::b3Func2
2 | &Base3::b3Func3
可以看到首先是每个被继承的基类都有各自的虚函数表,然后被继承的三个基类成员依次排列,且派生类“复用”了第一个被继承的基类的虚表指针成员,并将派生类新增的虚函数追加到了该指针指向的虚函数表之后(这一点还是和单继承一样)。
执行输出:
sizeof(Derived) = 56
Base1::b1Func1()
Base1::b1Func2()
Base1::b1Func3()
Derived::dFunc1()
Base2::b2Func1()
Base2::b2Func2()
Base2::b2Func3()
Base3::b3Func1()
Base3::b3Func2()
Base3::b3Func3()
派生类重写基类的虚函数
测试代码:
#include <iostream>
using namespace std;
class Base1 {
public:
virtual void b1Func1() { cout << "Base1::b1Func1()" << endl; }
virtual void b1Func2() { cout << "Base1::b1Func2()" << endl; }
virtual void b1Func3() { cout << "Base1::b1Func3()" << endl; }
private:
int b1Data1 = 0;
};
class Base2 {
public:
virtual void b2Func1() { cout << "Base2::b2Func1()" << endl; }
virtual void b2Func2() { cout << "Base2::b2Func2()" << endl; }
virtual void b2Func3() { cout << "Base2::b2Func3()" << endl; }
private:
int b2Data1 = 0;
};
class Base3 {
public:
virtual void b3Func1() { cout << "Base3::b3Func1()" << endl; }
virtual void b3Func2() { cout << "Base3::b3Func2()" << endl; }
virtual void b3Func3() { cout << "Base3::b3Func3()" << endl; }
private:
int b3Data1 = 0;
};
class Derived : public Base1, public Base2, public Base3 {
public:
virtual void dFunc1() { cout << "Derived::dFunc1()" << endl; }
virtual void b1Func2() { cout << "Derived::b1Func2()" << endl; }
virtual void b2Func2() { cout << "Derived::b2Func2()" << endl; }
virtual void b3Func2() { cout << "Derived::b3Func2()" << endl; }
private:
int d1Data1 = 0;
};
int main() {
Derived obj;
cout << "sizeof(Derived) = " << sizeof(Derived) << endl;
uintptr_t *vftableBase1 = (uintptr_t *) *(uintptr_t *)((uint8_t *) &obj + 0);
((void (*)()) *(vftableBase1 + 0))();
((void (*)()) *(vftableBase1 + 1))();
((void (*)()) *(vftableBase1 + 2))();
((void (*)()) *(vftableBase1 + 3))();
uintptr_t *vftableBase2 = (uintptr_t *) *(uintptr_t *)((uint8_t *) &obj + 16);
((void (*)()) *(vftableBase2 + 0))();
((void (*)()) *(vftableBase2 + 1))();
((void (*)()) *(vftableBase2 + 2))();
uintptr_t *vftableBase3 = (uintptr_t *) *(uintptr_t *)((uint8_t *) &obj + 32);
((void (*)()) *(vftableBase3 + 0))();
((void (*)()) *(vftableBase3 + 1))();
((void (*)()) *(vftableBase3 + 2))();
return 0;
}
还是先看class Derived
的内存布局:
class Derived size(56):
+---
0 | +--- (base class Base1)
0 | | {vfptr}
8 | | b1Data1
| | <alignment member> (size=4)
| +---
16 | +--- (base class Base2)
16 | | {vfptr}
24 | | b2Data1
| | <alignment member> (size=4)
| +---
32 | +--- (base class Base3)
32 | | {vfptr}
40 | | b3Data1
| | <alignment member> (size=4)
| +---
48 | d1Data1
| <alignment member> (size=4)
+---
Derived::$vftable@Base1@:
| &Derived_meta
| 0
0 | &Base1::b1Func1
1 | &Derived::b1Func2
2 | &Base1::b1Func3
3 | &Derived::dFunc1
Derived::$vftable@Base2@:
| -16
0 | &Base2::b2Func1
1 | &Derived::b2Func2
2 | &Base2::b2Func3
Derived::$vftable@Base3@:
| -32
0 | &Base3::b3Func1
1 | &Derived::b3Func2
2 | &Base3::b3Func3
可以看到每个被继承的基类的虚函数表中对应的虚函数都被派生类重写后的虚函数替换了(与单继承规律一致)。
执行输出:
sizeof(Derived) = 56
Base1::b1Func1()
Derived::b1Func2()
Base1::b1Func3()
Derived::dFunc1()
Base2::b2Func1()
Derived::b2Func2()
Base2::b2Func3()
Base3::b3Func1()
Derived::b3Func2()
Base3::b3Func3()
看完多继承的内存布局之后,有没有发现一个问题?被继承的几个基类的虚函数表不是连续的,而指向这几个虚函数表的虚表指针也是分散的,只有第一个被继承的基类Base1
的虚表指针位于派生类的起始地址,那么当试图通过Base2
和Base3
的指针或引用访问派生类重写后的虚函数时会发生什么呢?经过尝试,仍然能正常访问到被派生类重写后的虚函数,这是因为编译器自动处理了地址跳转(Thunk),注意内存布局输出中的-16
和-32
,这表明当使用Base2
或Base3
的指针或引用访问虚函数时,this
指针会-16
或-32
字节来得到派生类Derived
的起始地址,这其中的转换由编译器实现,且对开发者是透明的。实际上,当试图将Derived
的指针强转为Base2
或Base3
的指针时也会发生上述行为。
写在最后
C++标准仅仅规定了虚函数是什么,以及虚函数的行为,并没有规定虚函数要如何实现,且虚表指针、虚函数表等概念也并存在于C++标准中,这取决于编译器其所遵循的ABI,所以上述所有仅对于MSVC编译环境。最后,尽量避免根据内存布局去编写移植代码!!!