在编程中有时会把多个函数串在一起依次调用,以达到特定的目的,在这里我们把这样的调用方式形象地称为函数链调用。函数链中的函数有些是独立的,有些则只用在函数组合中,不会单独调用。对象的连续配置和组合是函数链调用比较常用场合,去除语法糖,linq也是一个函数链的运用。下面通过即时编译中的一个例子,来看看函数链的用法。
几种热门的语言C#、java、js都采用了即时编译的方式,即时编译需要有相应的汇编类库供调用,以便把VM代码转换成本地机器代码。
Apple的汇编类库是比较常用的,被firefox,webkit用于js的即时编译,下面的代码是Apple汇编类库的一般调用方式:
masm.push(ecx);
masm.move(ebp, edx);
masm.add32(0x12 ,edx);
masm.push(edx);
masm.load32(MacroAssembler::Address(edx),edx);
masm.push(edx);
masm.load32(MacroAssembler::Address(edx),edx);
masm.add32(edx,r);
masm.call(MacroAssembler::Address(r));
masm.pop(ecx);
下面再看看chrome中v8的调用习惯:
#define __ masm()->
__ mov(ebx, Operand(esp, kSavedRegistersAreaSize));
__ Set(ecx, Immediate(0));
__ lea(edx, Operand(esp, kSavedRegistersAreaSize + 1 * kPointerSize));
__ sub(edx, Operand(ebp));
__ neg(edx);
Label pop_loop;
__ bind(&pop_loop);
__ pop(Operand(edx, 0));
#undef __
与前面的调用方式差别不大,通过宏代换使得汇编调用看的更直观,遵循了宏定义用过即取消定义的习惯。
从上面的代码可以看出,普通的函数调用方式,大部分的汇编码调用还是比较整洁,涉及到内存调用的部分显得有些不太直观,对于 mov [ebx + 2 * ecx + 0x1000],eax这样的语句写起来会有些复杂。下面我们试着看看有没有更直观的方式来表现。
在这里我们可以看到,函数与真实汇编之间存在一定程度的失配:汇编语言本身是描述的,具有较强的组合能力,而用单个函数去模拟这样的能力,往往有点力不从心,这样失配的结果就引起功能的简化和简洁性的减弱。利用多个函数一起协同的能力,函数链可以用于解决这样的失配问题,使得调用代码书写得象汇编一样简洁。
下面的代码是一些准备工作,定义了汇编要用到的一些结构,如寄存器、地址、操作还有卷标。寄存器和卷标的代码都非常简单,操作和地址的代码复杂一些,主要是定义了一些操作符的重载,这些函数体现了函数链中函数的特点:要么返回自身或者返回新对象,以备后续调用。另外还有一些宏定义,这些宏都比较简单,不复杂。为简单起见在这里程序并不作实际的本地代码转化工作,只保证书写的代码能编译通过。具体的代码如下:
View Code
struct TNode
{
};
struct TOp;
struct TLabel
{
inline TOp & operator () (TOp & r){return r;}
};
struct TInt : TNode
{
int val;
TInt(int v):val(v){}
};
struct TReg : TNode
{
int reg;
TReg(int r):reg(r){}
TReg():reg(0){}
inline bool operator != (TReg & l){ return this->reg != l.reg;}
inline bool operator == (TReg & l){ return this->reg == l.reg;}
inline bool operator > (TReg & l){ return this->reg > l.reg;}
inline bool operator < (TReg & l){ return this->reg < l.reg;}
};
struct TAdr : TNode
{
int typ;
TReg* base;
int scale;
TReg* index;
int direct;
TAdr():typ(0),base(NULL),scale(0),index(NULL),direct(0){}
};
struct TAlloc
{
static TAdr* allocAdr()
{
return new TAdr;
}
static TReg* allocReg()
{
return new TReg();
}
static void free(TAdr* p)
{
delete p;
}
static void free(TReg* p)
{
delete p;
}
};
inline TAdr & operator + (TReg & l,TReg & r)
{
TAdr* adr = TAlloc::allocAdr();
adr->base = &l;
adr->index = &r;
return *adr;
};
inline TAdr & operator * (int l,TReg & r)
{
TAdr* adr = TAlloc::allocAdr();
adr->scale = l;
adr->index = &r;
return *adr;
};
inline TAdr & operator + (TReg & r,int l)
{
TAdr* adr = TAlloc::allocAdr();
adr->base = &r;
adr->direct = l;
return *adr;
};
inline TAdr & operator + (TAdr & adr,int l)
{
adr.direct = l;
return adr;
};
inline TAdr & operator + (TReg & l,TAdr & r)
{
TAdr* adr = TAlloc::allocAdr();
adr->base = &l;
adr->index = r.index;
adr->scale = r.scale;
adr->direct += r.direct;
return *adr;
};
struct TOp
{
int op;
TNode* left;
TNode* right;
TOp(int _op):op(_op),left(NULL),right(NULL){}
inline TOp & operator () (TReg & r)
{
if(left)
right = &r;
else
left = &r;
return *this;
};
inline TOp & operator () (TAdr & r)
{
if(left)
right = &r;
else
left = &r;
return *this;
};
inline TOp & operator () (TInt & r)
{
if(left)
right = &r;
else
left = &r;
return *this;
};
inline TOp & operator () (int r)
{
if(left)
right = &TInt(r);
else
left = &TInt(r);
return *this;
};
inline TOp & operator [] (TAdr & r)
{
if(left)
right = &r;
else
left = &r;
return *this;
};
inline TOp & operator [] (TReg & r)
{
if(left)
right = &r;
else
left = &r;
return *this;
};
inline TOp & operator [] (int r)
{
if(left)
right = &TInt(r);
else
left = &TInt(r);
return *this;
};
inline TOp & operator + (TLabel r)
{
return *this;
}
};
struct TOpcode
{
static const unsigned char mov = 1;
static const unsigned char add = 2;
static const unsigned char sub = 3;
static const unsigned char mul = 4;
static const unsigned char div = 5;
static const unsigned char jmp = 6;
static const unsigned char push = 7;
static const unsigned char pop = 8;
static const unsigned char call = 9;
static const unsigned char ret = 10;
};
#define ncode_mov (TOp(TOpcode::mov))
#define ncode_add (TOp(TOpcode::add))
#define ncode_sub (TOp(TOpcode::sub))
#define ncode_mul (TOp(TOpcode::mul))
#define ncode_div (TOp(TOpcode::div))
#define ncode_push (TOp(TOpcode::push))
#define ncode_pop (TOp(TOpcode::pop))
#define ncode_jmp (TOp(TOpcode::jmp)) +
#define ncode_call (TOp(TOpcode::call))
#define ncode_ret (TOp(TOpcode::ret))
#define _(x,...) ncode_##x __VA_ARGS__
#define eax (TReg(0))
#define ecx (TReg(1))
#define edx (TReg(2))
#define ebx (TReg(3))
#define esp (TReg(4))
#define ebp (TReg(5))
#define esi (TReg(6))
#define edi (TReg(7))
通过上面的准备,现在可以书写汇编调用代码了:
int _tmain(int argc, _TCHAR* argv[])
{
TLabel L1,L2;
_(push ebp);
_(mov ebp,esp);
_(push esi);
_(push edi);
_(mov ebx, eax);
_(mov eax,[ebx + 2 * ecx]);
_(mov [ebx + 2 * ecx + 0x1000],eax);
L1 _(mov eax,[eax]);
L2 _(mov eax,[0x1234]);
_(call eax);
_(jmp L1);
_(jmp L2);
_(pop edi);
_(pop esi);
_(mov esp,ebp);
_(pop ebp);
_(ret );
exit(1);
}
是不是看起来像嵌入式汇编代码,但只是形似而已,这里是函数调用,而嵌入式汇编码是执行码。现在看起来是否更直观,YY一下。
现在再看看宏展开后的实际代码,是不是都是一些函数链调用?
int wmain(int argc, _TCHAR* argv[])
{
TLabel L1,L2;
(TOp(TOpcode::push)) (TReg(5)) ;
(TOp(TOpcode::mov)) (TReg(5)) (TReg(4));
(TOp(TOpcode::push)) (TReg(6)) ;
(TOp(TOpcode::push)) (TReg(7)) ;
(TOp(TOpcode::mov)) (TReg(3)) (TReg(0));
(TOp(TOpcode::mov)) (TReg(0)) [(TReg(3)) + 2 * (TReg(1))];
(TOp(TOpcode::mov)) [(TReg(3)) + 2 * (TReg(1)) + 0x1000] (TReg(0));
L1 (TOp(TOpcode::mov)) (TReg(0)) [(TReg(0))];
L2 (TOp(TOpcode::mov)) (TReg(0)) [0x1234];
(TOp(TOpcode::call)) (TReg(0)) ;
(TOp(TOpcode::jmp)) + L1 ;
(TOp(TOpcode::jmp)) + L2 ;
(TOp(TOpcode::pop)) (TReg(7)) ;
(TOp(TOpcode::pop)) (TReg(6)) ;
(TOp(TOpcode::mov)) (TReg(4)) (TReg(5));
(TOp(TOpcode::pop)) (TReg(5)) ;
(TOp(TOpcode::ret)) ;
exit(1);
}
有头晕的感觉吧?正好应了一点,简单的背后是复杂。
下面是完整的事例代码:
View Code
#include <stdio.h>
#include <stdlib.h>
#include <tchar.h>
struct TNode
{
};
struct TOp;
struct TLabel
{
inline TOp & operator () (TOp & r){return r;}
};
struct TInt : TNode
{
int val;
TInt(int v):val(v){}
};
struct TReg : TNode
{
int reg;
TReg(int r):reg(r){}
TReg():reg(0){}
inline bool operator != (TReg & l){ return this->reg != l.reg;}
inline bool operator == (TReg & l){ return this->reg == l.reg;}
inline bool operator > (TReg & l){ return this->reg > l.reg;}
inline bool operator < (TReg & l){ return this->reg < l.reg;}
};
struct TAdr : TNode
{
int typ;
TReg* base;
int scale;
TReg* index;
int direct;
TAdr():typ(0),base(NULL),scale(0),index(NULL),direct(0){}
};
struct TAlloc
{
static TAdr* allocAdr()
{
return new TAdr;
}
static TReg* allocReg()
{
return new TReg();
}
static void free(TAdr* p)
{
delete p;
}
static void free(TReg* p)
{
delete p;
}
};
inline TAdr & operator + (TReg & l,TReg & r)
{
TAdr* adr = TAlloc::allocAdr();
adr->base = &l;
adr->index = &r;
return *adr;
};
inline TAdr & operator * (int l,TReg & r)
{
TAdr* adr = TAlloc::allocAdr();
adr->scale = l;
adr->index = &r;
return *adr;
};
inline TAdr & operator + (TReg & r,int l)
{
TAdr* adr = TAlloc::allocAdr();
adr->base = &r;
adr->direct = l;
return *adr;
};
inline TAdr & operator + (TAdr & adr,int l)
{
adr.direct = l;
return adr;
};
inline TAdr & operator + (TReg & l,TAdr & r)
{
TAdr* adr = TAlloc::allocAdr();
adr->base = &l;
adr->index = r.index;
adr->scale = r.scale;
adr->direct += r.direct;
return *adr;
};
struct TOp
{
int op;
TNode* left;
TNode* right;
TOp(int _op):op(_op),left(NULL),right(NULL){}
inline TOp & operator () (TReg & r)
{
if(left)
right = &r;
else
left = &r;
return *this;
};
inline TOp & operator () (TAdr & r)
{
if(left)
right = &r;
else
left = &r;
return *this;
};
inline TOp & operator () (TInt & r)
{
if(left)
right = &r;
else
left = &r;
return *this;
};
inline TOp & operator () (int r)
{
if(left)
right = &TInt(r);
else
left = &TInt(r);
return *this;
};
inline TOp & operator [] (TAdr & r)
{
if(left)
right = &r;
else
left = &r;
return *this;
};
inline TOp & operator [] (TReg & r)
{
if(left)
right = &r;
else
left = &r;
return *this;
};
inline TOp & operator [] (int r)
{
if(left)
right = &TInt(r);
else
left = &TInt(r);
return *this;
};
inline TOp & operator + (TLabel r)
{
return *this;
}
};
struct TOpcode
{
static const unsigned char mov = 1;
static const unsigned char add = 2;
static const unsigned char sub = 3;
static const unsigned char mul = 4;
static const unsigned char div = 5;
static const unsigned char jmp = 6;
static const unsigned char push = 7;
static const unsigned char pop = 8;
static const unsigned char call = 9;
static const unsigned char ret = 10;
};
#define ncode_mov (TOp(TOpcode::mov))
#define ncode_add (TOp(TOpcode::add))
#define ncode_sub (TOp(TOpcode::sub))
#define ncode_mul (TOp(TOpcode::mul))
#define ncode_div (TOp(TOpcode::div))
#define ncode_push (TOp(TOpcode::push))
#define ncode_pop (TOp(TOpcode::pop))
#define ncode_jmp (TOp(TOpcode::jmp)) +
#define ncode_call (TOp(TOpcode::call))
#define ncode_ret (TOp(TOpcode::ret))
#define _(x,...) ncode_##x __VA_ARGS__
#define eax (TReg(0))
#define ecx (TReg(1))
#define edx (TReg(2))
#define ebx (TReg(3))
#define esp (TReg(4))
#define ebp (TReg(5))
#define esi (TReg(6))
#define edi (TReg(7))
int _tmain(int argc, _TCHAR* argv[])
{
TLabel L1,L2;
_(push ebp);
_(mov ebp,esp);
_(push esi);
_(push edi);
_(mov ebx, eax);
_(mov eax,[ebx + 2 * ecx]);
_(mov [ebx + 2 * ecx + 0x1000],eax);
L1 _(mov eax,[eax]);
L2 _(mov eax,[0x1234]);
_(call eax);
_(jmp L1);
_(jmp L2);
_(pop edi);
_(pop esi);
_(mov esp,ebp);
_(pop ebp);
_(ret );
exit(1);
}
-----复杂,并不会因奥卡姆剃刀而减少。