Thursday, July 15, 2010

Just In Time Compiler for Managed Platform- Part 2: Generate native method

Today we'll design a small block of code that is equivallent to a corresponding java method.

Since the generated native executable code will be used only by our VM we are free to define out own structure and calling convention for it. We generate one native function for each Java method. Each generated native function will have only parameter that is required for operation- a pointer to a structure RuntimeEnvironment:

union Variable
{
    u1 charValue;
    u2 shortValue;
    u4 intValue;
    f4 floatValue;
    u4* ptrValue;
    Object object;
};

struct RuntimeEnvironment
{
    Variable *stack;
    int stackTop;
    //We'll add more as we require later
};

The return type will be int:

int ExecuteMethod(RuntimeEnvironment *pRE);

To generate the final method we need a lot of helper function. We define those as:
void HelperFunction(u1* code, int& ip);
These functions will take a code block and insert code in the block and fix the code pointer (ip).

First let us define the Prolog and Epilog and return 0 helper functions for this function prototype:

void Prolog(u1* code, int& ip)
{
    u1 c[]= {
        0x55,//               push        ebp  
        0x8B, 0xEC,//            mov         ebp,esp 
        0x81, 0xEC, 0xC0, 0x00, 0x00, 0x00,// sub         esp,0C0h 
        0x53,//               push        ebx  
        0x56,//               push        esi  
        0x57,  //             push        edi 
    };

    memcpy(&code[ip], c, sizeof(c));
    ip+=sizeof(c);
}

void Epilog(u1* code, int& ip)
{
    u1 c[]= {
        0x5F,//               pop         edi  
        0x5E,//               pop         esi  
        0x5B,//               pop         ebx  
        0x8B, 0xE5,//            mov         esp,ebp 
        0x5D,//               pop         ebp  
        0xC3,//               ret              
    };

    memcpy(&code[ip], c, sizeof(c));
    ip+=sizeof(c);
}

void Return0(u1* code, int& ip)
{
    //33 C0  xor         eax,eax 
    code[ip++]=0x33;
    code[ip++]=0xC0;
}

Now, we want to generate machine code for the following simple function-

public static int SimpleCall()
{
    return 17;
}

Here is the generated java byte code:
Signature: ()I
  Code:
    0:   bipush  17
    2:   ireturn

Here we start to generate helper function for Java Virtual Machine instruction.

For bipush [value] we need to push the [value] on the VM stack:

void BiPush(u1* code, int& ip, short value)
{
    // C++ equivallent
    // pRE->stack[pRE->stackTop++].shortValue = value;

    u1 c[] = {
         0x8B, 0x45, 0x08, //         mov         eax,dword ptr [pRE] 
         0x8B, 0x48, 0x04, //         mov         ecx,dword ptr [eax+4] 
         0x8B, 0x55, 0x08, //         mov         edx,dword ptr [pRE] 
         0x8B, 0x02, //            mov         eax,dword ptr [edx] 
         0xBA, 0x00, 0x00, 0x00, 0x00, //   mov         edx,value 
         0x66, 0x89, 0x14, 0xC8, //      mov         word ptr [eax+ecx*8],dx 
         0x8B, 0x45, 0x08, //         mov         eax,dword ptr [pRE] 
         0x8B, 0x48, 0x04, //         mov         ecx,dword ptr [eax+4] 
         0x83, 0xC1, 0x01, //         add         ecx,1 
         0x8B, 0x55, 0x08, //         mov         edx,dword ptr [pRE] 
         0x89, 0x4A, 0x04,  //       mov         dword ptr [edx+4],ecx 
    };

    //We need to encode value and set it to the 00 00 00 00 position 
    u1 encVal[4];
    EncodeByte4((int)value, encVal);
    memcpy(c + 12, encVal, 4); 
    memcpy(&code[ip], c, sizeof(c));
    ip+=sizeof(c);
}

Thats it for the simple java function. We can now test this:

int main() 
{ 
    int codeBlockSize = 4096;    
    int (*SimpleCall)(RuntimeEnvironment *)=(int (*)(RuntimeEnvironment *)) VirtualAlloc(NULL, codeBlockSize,  MEM_COMMIT, PAGE_EXECUTE_READWRITE);
    u1* codes = (u1*) SimpleCall;
    int ip=0;
    memset(codes, 0, codeBlockSize);

    Prolog(codes, ip);
    BiPush(codes, ip, 17);
    Return0(codes, ip);
    Epilog(codes, ip);

    //No lets test if it is really pushing value 17 on the VM stack
    RuntimeEnvironment *pRE = new RuntimeEnvironment();;
    pRE->stack = new Variable[20];    
    memset(pRE->stack, 0, sizeof(Variable)*20);
    int retVal = (*SimpleCall)(pRE);
    printf("pRE->stack[0].intValue = %d", pRE->stack[0].intValue);    
    
    return 0;
}

Thats cool- we have generated our first native function that actually does some byte code execution.

No comments:

Post a Comment