Friday, July 30, 2010

Just In Time Compiler for Managed Platform- Part 4: Basic Conditional Branch

Today I'll generate code that can handle conditional branch. This one is really gian step- since once we have conditional branching enabled we are ready to execute almost all instructions. This is relatively complex to handle than earlier things.

First let us define the java class with a condition:

public class Math
{
    public static int add(int x, int y)
    {
        int r;
        r= x+y;
        return r;
    }

    public static int SimpleCall()
    {
        return 17;
    }
    public static int SimpleCall2()
    {
        return add(17, 29);
    }

    public static int SimpleCall4(int p)
    {
        if(p>100) return 100;
        return add(p, 29);
    }

    public static int SimpleCall5(int p)
    {
        return SimpleCall4(101);
    }

    public static int SimpleCall3()
    {        
        return SimpleCall4(17);
    }

    public static int Main()
    {
        return SimpleCall3();
    }
}


And the generated byte code:

public class Math extends java.lang.Object{
public Math();
  Signature: ()V
  Code:
   0:   aload_0
   1:   invokespecial   #1; //Method java/lang/Object."init":()V
   4:   return

public static int add(int, int);
  Signature: (II)I
  Code:
   0:   iload_0
   1:   iload_1
   2:   iadd
   3:   istore_2
   4:   iload_2
   5:   ireturn

public static int SimpleCall();
  Signature: ()I
  Code:
   0:   bipush  17
   2:   ireturn

public static int SimpleCall2();
  Signature: ()I
  Code:
   0:   bipush  17
   2:   bipush  29
   4:   invokestatic    #2; //Method add:(II)I
   7:   ireturn

public static int SimpleCall4(int);
  Signature: (I)I
  Code:
   0:   iload_0
   1:   bipush  100
   3:   if_icmple       9
   6:   bipush  100
   8:   ireturn
   9:   iload_0
   10:  bipush  29
   12:  invokestatic    #2; //Method add:(II)I
   15:  ireturn

public static int SimpleCall5(int);
  Signature: (I)I
  Code:
   0:   bipush  101
   2:   invokestatic    #3; //Method SimpleCall4:(I)I
   5:   ireturn

public static int SimpleCall3();
  Signature: ()I
  Code:
   0:   bipush  17
   2:   invokestatic    #3; //Method SimpleCall4:(I)I
   5:   ireturn

public static int Main();
  Signature: ()I
  Code:
   0:   invokestatic    #4; //Method SimpleCall3:()I
   3:   ireturn

}

The SimpleCall4 method has a conditional branch instruction if_icmple. Note that we deal with static methiods so far- so dont care the init method yet.

Let us first define the helper method for the instruction to generate machine code:

void IfIcmple(u1* code, int& ip, int targetpc, CMapPtrToPtr *pJmpTargetMap)
{
    u1 c[] = {
         //if((pRE->stack[pRE->stackTop-2].intValue > pRE->stack[pRE->stackTop-1].intValue))

         0x8B, 0x45, 0x08, //         mov         eax,dword ptr [pRE] 
         0x8B, 0x48, 0x04, //         mov         ecx,dword ptr [eax+4] 
         0x8B, 0x55, 0x08, //         mov         edx,dword ptr [pRE] 
         0x8B, 0x02, //            mov         eax,dword ptr [edx] 
         0x8B, 0x55, 0x08, //         mov         edx,dword ptr [pRE] 
         0x8B, 0x52, 0x04, //         mov         edx,dword ptr [edx+4] 
         0x8B, 0x75, 0x08, //         mov         esi,dword ptr [pRE] 
         0x8B, 0x36, //            mov         esi,dword ptr [esi] 
         0x8B, 0x44, 0xC8, 0xF0, //      mov         eax,dword ptr [eax+ecx*8-10h] 
         0x3B, 0x44, 0xD6, 0xF8, //      cmp         eax,dword ptr [esi+edx*8-8] 
         0x76, 0x00, 0x90, 0x90, 0x90, // JBE  nop nop nop                                                        


         //pRE->stackTop -= 2;
         0x8B, 0x45, 0x08, //         mov         eax,dword ptr [pRE] 
         0x8B, 0x48, 0x04, //         mov         ecx,dword ptr [eax+4] 
         0x83, 0xE9, 0x02, //         sub         ecx,2 
         0x8B, 0x55, 0x08, //         mov         edx,dword ptr [pRE] 
         0x89, 0x4A, 0x04, //         mov         dword ptr [edx+4],ecx 
    };

    memcpy(&code[ip], c, sizeof(c));
    ip+=sizeof(c);

    LinkedListNode *pNode = new LinkedListNode(&code[ip-20], NULL);
    
    LOG(_T("PC = 0x%X Jmp Inst Offset = 0x%X Inst = 0x%X\n"), targetpc, (int)(&code[ip-20])-(int)(code), code[ip-20]);

    JmpTarget *pJmpTarget = NULL;
    if(!pJmpTargetMap->Lookup((void *) targetpc, (void *&) pJmpTarget))
    {
        pJmpTarget = new JmpTarget();
        pJmpTarget->pTargetList = pNode;
        pJmpTarget->pc = targetpc;
        pJmpTargetMap->SetAt((void *)targetpc, pJmpTarget);
    }
    else
    {
        pNode->pNext = pJmpTarget->pTargetList;
    }

    pJmpTarget->pTargetList = pNode;
}

The instruction if_icmple checks value on top of stack and brances if first argument is greater than second argument. We do same in the machine code. Since we can not calculate address of target instruction without first generating code that is behind the target- we keep the target address empty and fix all jump address after we generate machine code all the instructions. To do this we maintain a map of jmp instruction locations and also a map of native vs managed code locations.

Since one instruction can be target of multiple jmp instructions we keep a linked list of jmp instructions we need to fix for a specific managed target instruction-

struct LinkedListNode
{
 void *pData;
 LinkedListNode *pNext;
 LinkedListNode(void *pData, LinkedListNode *pNext)
 {
  this->pData = pData;
  this->pNext = pNext;
 }

 LinkedListNode()
 {
  this->pData = NULL;
  this->pNext = NULL;
 }
};

Here we need to fix out ireturn helper- since after return from method we must not execute any instruction we must return but before that we must fix the native stack (epilog) and then return. Since we add epilog at the end of each native function we just jmp to that location for the cleanup if there is multiple retutn from managed code. To track thios we insert an unconditional jmp instruction to the epilog-

// ireturn instruction takes the value from stack top and push
// to stack[0] position. 
void IReturn(u1* code, int& ip, CMapPtrToPtr *pJmpTargetMap)
{
    u1 c[] = {
         //pRE->stack[0].intValue=pRE->stack[pRE->stackTop-1].intValue;
         0x8B, 0x45, 0x08, //         mov         eax,dword ptr [pRE] 
         0x8B, 0x48, 0x04, //         mov         ecx,dword ptr [eax+4] 
         0x8B, 0x55, 0x08, //         mov         edx,dword ptr [pRE] 
         0x8B, 0x02,       //         mov         eax,dword ptr [edx] 
         0x8B, 0x55, 0x08, //         mov         edx,dword ptr [pRE] 
         0x8B, 0x12,       //         mov         edx,dword ptr [edx] 
         0x8B, 0x44, 0xC8, 0xF8, //         mov         eax,dword ptr [eax+ecx*8-8] 
         0x89, 0x02,       //         mov         dword ptr [edx],eax 
    };

    memcpy(&code[ip], c, sizeof(c));
    ip+=sizeof(c);    

    u1 c1[] = {
        0xE9, 0x00, 0x00, 0x00, 0x00  //JMP , nop ,nop
    };

    memcpy(&code[ip], c1, sizeof(c1));
    ip+=sizeof(c1);    

    LinkedListNode *pNode = new LinkedListNode(&code[ip-5], NULL);
    
    LOG(_T("PC = RETURN Jmp Inst Offset = 0x%X Inst = 0x%X\n"), (int)(&code[ip-5])-(int)(code), code[ip-5]);

    JmpTarget *pJmpTarget = NULL;
    if(!pJmpTargetMap->Lookup((void *) 0, (void *&) pJmpTarget))
    {
        pJmpTarget = new JmpTarget();
        pJmpTarget->pTargetList = pNode;
        pJmpTarget->pc = 0;
        pJmpTargetMap->SetAt((void *)0, pJmpTarget);
    }
    else
    {
        pNode->pNext = pJmpTarget->pTargetList;
    }

    pJmpTarget->pTargetList = pNode;

}


Lets now see how we fix the address-

void FixJmpLocations(u1 *codes, CMapPtrToPtr *pJmpTargetMap, CMapPtrToPtr *pManagedtoNativeMap, u1* retAddress)
{
    LOG(_T("Fixing Jmp Target\n"));

    //Iterate through the entire map,
    for (POSITION pos = pJmpTargetMap->GetStartPosition(); pos != NULL;)
    {
        JmpTarget *pJmpTarget;
        int pc;
        pJmpTargetMap->GetNextAssoc(pos, (void *&)pc, (void *&)pJmpTarget);

        ASSERT(pc == pJmpTarget->pc);
        int target;

        if(0 == pJmpTarget->pc)
        {
            target = (int)retAddress;
        }
        else
        {
            target = (int) pManagedtoNativeMap->GetValueAt((void *&)pJmpTarget->pc);
        }

        LinkedListNode *pTargetList = pJmpTarget->pTargetList;

        do{
            int offset=0;
            if(0xE9 == codes[(int)((int)pTargetList->pData-(int)codes)])
            {
                offset = target - (int)pTargetList->pData-5; //1 for inst 4 for 4 byte offset = -5
                memcpy(&codes[(int)((int)pTargetList->pData-(int)codes)+1], &offset, sizeof(offset));
            }
            else
            {
                offset = target - (int)pTargetList->pData - 2;  //1 for inst 1 for 1 byte offset = -2 
                codes[(int)((int)pTargetList->pData-(int)codes)+1]=offset;
            }

            LOG(_T("Fixed 0x%X with Native Address Offset 0x%X\n"), (int)pTargetList->pData - (int)codes, offset);

            pTargetList = pTargetList->pNext;
        }while(NULL != pTargetList);
    }
}

Please note that the map pManagedtoNativeMap is polulated the Compile function like this in the giant for loop for each instruction-

pManagedtoNativeMap->SetAt((void *)pc, &codes[ip]);

Thats it. We are now ready to test the vodes we generate-

int main()
{
    Context *pRE = new Context();;
    pRE->stack = new Variable[STACK_SIZE];    
    pRE->stackTop = 0;
    memset(pRE->stack, 0, sizeof(Variable)*STACK_SIZE);

    pRE->pVMEnv = new VMEnvironment();
    pRE->pVMEnv->pClassHeap = new ClassHeap();
    pRE->pVMEnv->pObjectHeap = new ObjectHeap();

    pRE->pVMEnv->ppHelperMethods = HelperMethods;

    ClassHeap* pClsHeap =  pRE->pVMEnv->pClassHeap; 

    JavaClass jc;
    pClsHeap->LoadClass("Math", &jc);
    JavaClass *pVirtualClass =&jc,  *pClass1 = &jc;

    int mindex=pClass1->GetMethodIndex(_T("Main"),_T("()I"),pVirtualClass); 

    method_info_ex *pMethod = &pVirtualClass->methods[mindex];

    MethodLink *pMethodLink = new MethodLink();
    pMethodLink->pClass = pVirtualClass;
    pMethodLink->pMethod = pMethod;    

    ((void (*)(MethodLink *pMethodLink, Context *pRE))pRE->pVMEnv->ppHelperMethods[CALL_METHOD_HELPER_INDEX])(pMethodLink, pRE);

    LOG(_T("Ret = %d"), pRE->stack[0].intValue);

    return 0;
}

Thats it. Our generated native code can handle branching!

Tuesday, July 27, 2010

Just In Time Compiler for Managed Platform- Part 3: Call a method

Today I'll try to extend the simple JIT compiler to the point where we can call a method from another method.

First lets create a simple java class:

public class Math
{
    public static int add(int x, int y)
    {
        int r;
        r= x+y;
        return r;
    }

    public static int SimpleCall2()
    {
        return add(17, 29);
    }
}

Here we call add method from SimpleCall2 method. Our JIT compiler will supply mechanism to handle this. When we compile using java compiler we get following class and method byte code:

public class Math extends java.lang.Object{
public Math();
  Signature: ()V
  Code:
   0:   aload_0
   1:   invokespecial   #1; 
   4:   return

public static int add(int, int);
  Signature: (II)I
  Code:
   0:   iload_0
   1:   iload_1
   2:   iadd
   3:   istore_2
   4:   iload_2
   5:   ireturn

public static int SimpleCall2();
  Signature: ()I
  Code:
   0:   bipush  17
   2:   bipush  29
   4:   invokestatic    #2; //Method add:(II)I
   7:   ireturn
}


[Note: I do not describe the Java Virtual Machine basics here again- You may look at my article for basic understanding: Home Made Java Virtual Machine]

First we need to extend our Context structure to hold some more values. We should add members at the bottom- otherwise the code we generated so far will be invalid.

struct VMEnvironment
{
    ObjectHeap *pObjectHeap;
    ClassHeap *pClassHeap;
    void **ppHelperMethods;
};

struct Context
{
    Variable *stack;
    int stackTop;
    JavaClass *pClass;
    Context *pCallerContext; 
    VMEnvironment *pVMEnv;
};


Also we want to keep track of native codes we generate. So we need another structure.

struct MethodLink
{
    JavaClass *pClass;
    method_info_ex *pMethod;
    void *pNativeBlock;
};


Now we need a helper class to keep track of the methods we work with. We use a simple string to pointer map. The key string is generated from classname, method name and method desc. So key looks like "Math::add(II)I".

MethodLink* GetMethod(JavaClass *pClass, method_info_ex *pMethod, u4 pc)
{
    static CMapStringToPtr methodsMap;

    u2 mi=getu2(&pMethod->pCode_attr->code[pc+1]);
    char *pConstPool = (char *)pClass->constant_pool[mi];
    
    u2 classIndex = getu2(&pConstPool[1]);
    u2 nameAndTypeIndex = getu2(&pConstPool[3]);

    //get class at pool index 
    pConstPool = (char *)pClass->constant_pool[classIndex];

    ASSERT(pConstPool[0] == CONSTANT_Class);

    u2 ni=getu2(&pConstPool[1]);

    CString strClassName;
    pClass->GetStringFromConstPool(ni, strClassName);

    ClassHeap *pClassHeap = new ClassHeap();

    JavaClass *pClassCallee=pClassHeap->GetClass(strClassName);

    pConstPool = (char *)pClassCallee->constant_pool[nameAndTypeIndex];
    ASSERT(pConstPool[0] == CONSTANT_NameAndType);
    
    u2 name_index = getu2(&pConstPool[1]);
    u2 descriptor_index = getu2(&pConstPool[3]);

    CString strMethodName, strMethodDesc;
    pClassCallee->GetStringFromConstPool(name_index, strMethodName);
    pClassCallee->GetStringFromConstPool(descriptor_index, strMethodDesc);
    
    JavaClass *pVirtualClass=pClassCallee;
    int nIndex=pClassCallee->GetMethodIndex(strMethodName, strMethodDesc, pVirtualClass);
    
    method_info_ex *pCalleeMethod = &pClassCallee->methods[nIndex];    

    /*
    if( ACC_SUPER & pCalleeMethod->access_flags)
    {
        pCalleeMethod = pClassCallee->GetSuperClass();
    }    
    */

    CString sign(strClassName+"::"+strMethodName+strMethodDesc);
    MethodLink *pLink=NULL;
    if(!methodsMap.Lookup(sign, (void *&)pLink))
    {
        pLink = new MethodLink();
        pLink->pClass = pClassCallee;
        pLink->pMethod = pCalleeMethod;
        pLink->pNativeBlock = NULL;
        methodsMap.SetAt(sign, pLink);    
    }

    return pLink;
}


To call a method we do not generate the statck preparation code using machine code for now to keep the things simple. We'll do that after we finish all type of code generation. So from native code we call back to a C++ method that again calls into generated codes-

void CallMethod(MethodLink *pMethodLink, Context *pRE)
{
    LOG(_T("CallMethod\n"));

    int codeBlockSize = pMethodLink->pMethod->pCode_attr->code_length*2;   //todo guess better

    int (*NativeBlock)(Context *)=(int (*)(Context *)) VirtualAlloc(NULL, codeBlockSize,  MEM_COMMIT, PAGE_EXECUTE_READWRITE);
    u1* codes = (u1*) NativeBlock;

    int ip =0;

    JavaClass *pClass = pMethodLink->pClass;

    if(NULL == pMethodLink->pNativeBlock)
    {            
        Compile(pMethodLink->pClass, pMethodLink->pMethod, codes, ip);
        pMethodLink->pNativeBlock = codes;
    }

    CString strName, strDesc;
    pMethodLink->pClass->GetStringFromConstPool(pMethodLink->pMethod->name_index, strName);
    pMethodLink->pClass->GetStringFromConstPool(pMethodLink->pMethod->descriptor_index, strDesc);

    int params=GetMethodParametersStackCount(strDesc)+1;
    
    //invokestatic: we are only dealing with static methods so far

    int nDiscardStack =params;
    if(pMethodLink->pMethod->access_flags & ACC_NATIVE)
    {
    }
    else 
    {
        nDiscardStack+= pMethodLink->pMethod->pCode_attr->max_locals; 
    }    

    pRE->stackTop+=(nDiscardStack-1);
    LOG(_T("Invoking method %s%s, \n"), strName, strDesc);

    (*NativeBlock)(pRE);

    //if returns then get on stack    
    if(strDesc.Find(_T(")V")) < 0)
    {
        nDiscardStack--;
        if(strDesc.Find(_T(")J")) < 0)
        {            
        }
        else
        {
            nDiscardStack--;
        }
    }

    pRE->stackTop-=nDiscardStack;    
    LOG(_T("~CallMethod\n"));
}


OK, thats the callbacks we need for now. Now we generate the actual machine code that will use the MethodLink* value to call back to the CallMethod function. To do this we use a function pointer list and store it in the context environment-

#define CALL_METHOD_HELPER_INDEX 0

void*  HelperMethods[] = {
    CallMethod,
};

Let us now define the InvokeStatic helper method.

void InvokeStatic(JavaClass *pClass, method_info_ex *pMethod, u4 pc, u1* codes, int &ip)
{
    MethodLink* pLink = GetMethod(pClass, pMethod, pc);
    EmitCallMethod(codes, ip, pLink);
}

void EmitCallMethod(u1* code, int &ip, void* pLinkAddress)
{
    //((void (*)(MethodLink *pMethodLink))pRE->pVMEnv->ppHelperMethods[CALL_METHOD_HELPER_INDEX])(pLinkAddress, pRE);
    u1 c[] = {
         0x8B, 0x45, 0x08, //         mov         eax,dword ptr [pRE] 
         0x50, //              push        eax  
         0x68, 0x00, 0x00, 0x00, 0x00, //   push        pLinkAddress 
         0x8B, 0x4D, 0x08, //         mov         ecx,dword ptr [pRE] 
         0x8B, 0x51, 0x10, //         mov         edx,dword ptr [ecx+10h] 
         0x8B, 0x42, 0x08, //         mov         eax,dword ptr [edx+8] 
         0x8B, 0x08, //            mov         ecx,dword ptr [eax] 
         0xFF, 0xD1, //            call        ecx  
         0x83, 0xC4, 0x08, //         add         esp,8 
    };

    memcpy(c+5, &pLinkAddress, 4);
    memcpy(&code[ip], c, sizeof(c));
    ip+=sizeof(c);
}


To compile the methods we define a function that generates machine code for java byte codes. This function does not handle branch instructions right now. To handle branch we probably need two pass- since we would not know the exact address during first pass. So, here is a large while loop to do basic things:

u4 Compile(JavaClass *pClass, method_info_ex *pMethod, u1 *codes, int &ip)
{
    if(pMethod->access_flags & ACC_NATIVE)
    {
        return 1;
    }

    Prolog(codes, ip);    

    u4 pc=0;
    u1 *bc=pMethod->pCode_attr->code;    
    
    i4 error=0;
    
    CString strMethod;
    pClass->GetStringFromConstPool(pMethod->name_index, strMethod);
   
    i4 index=0;
    while(pMethod->pCode_attr->code_length>pc)
    {
        LOG(_T("Opcode = %s\n"),OpcodeDesc[(u1)bc[pc]]); 

        switch(bc[pc])
        {
        case nop:
            pc++;
            break;

        case bipush:// 16 /*(0x10)*/
            BiPush(codes, ip, (u1)bc[pc+1]);
            pc+=2;
            break;
   
        case iload_0: //26 Load int from local variable 0 
            ILoad_0(codes, ip);
            pc++;            
            break;

        case iload_1: //27 Load int from local variable 1 
            ILoad_1(codes, ip);
            pc++;            
            break;
        case iload_2: //28 Load int from local variable 2 
            ILoad_2(codes, ip);
            pc++;            
            break;
        case iload_3: //29 Load int from local variable 3 
            ILoad_3(codes, ip);
            pc++;            
            break;

        case istore_2: // 61 /*(0x3d) */
            IStore_2(codes, ip);
            pc++;
            break;

        case iadd: //96
            IAdd(codes, ip);
            pc++;
            break;

        case invokestatic:// 184 
            InvokeStatic(pClass, pMethod, pc, codes, ip);
            pc+=3;
            break;
        case ireturn: //172 (0xac)            
            IReturn(codes, ip);
            pc++;
            break;

        default:
            error=1;
            break;
        }

        if(error) break;
    }

    Return0(codes, ip);
    Epilog(codes, ip);

    return error;
}


OK, we are now ready to test out code:

int main()
{
    Context *pRE = new Context();;
    pRE->stack = new Variable[STACK_SIZE];    
    pRE->stackTop = 0;
    memset(pRE->stack, 0, sizeof(Variable)*STACK_SIZE);

    pRE->pVMEnv = new VMEnvironment();
    pRE->pVMEnv->pClassHeap = new ClassHeap();
    pRE->pVMEnv->pObjectHeap = new ObjectHeap();

    pRE->pVMEnv->ppHelperMethods = HelperMethods;

    ClassHeap* pClsHeap =  pRE->pVMEnv->pClassHeap;
    
    JavaClass jc;
    pClsHeap->LoadClass("Math", &jc);
    JavaClass *pVirtualClass =&jc,  *pClass1 = &jc;

    int mindex=pClass1->GetMethodIndex(_T("SimpleCall2"),_T("()I"),pVirtualClass); 

    method_info_ex *pMethod = &pVirtualClass->methods[mindex];

    MethodLink *pMethodLink = new MethodLink();
    pMethodLink->pClass = pVirtualClass;
    pMethodLink->pMethod = pMethod;    

    ((void (*)(MethodLink *pMethodLink, Context *pRE))pRE->pVMEnv->ppHelperMethods[CALL_METHOD_HELPER_INDEX])(pMethodLink, pRE);
    LOG(_T("Return Value = %d"), pRE->stack[0].intValue);

    return 0;
}

Do you see value 46 on the stack as return value? Cool!

Thursday, July 15, 2010

Just In Time Compiler for Managed Platform- Part 2: Generate native method

Today we'll design a small block of code that is equivallent to a corresponding java method.

Since the generated native executable code will be used only by our VM we are free to define out own structure and calling convention for it. We generate one native function for each Java method. Each generated native function will have only parameter that is required for operation- a pointer to a structure RuntimeEnvironment:

union Variable
{
    u1 charValue;
    u2 shortValue;
    u4 intValue;
    f4 floatValue;
    u4* ptrValue;
    Object object;
};

struct RuntimeEnvironment
{
    Variable *stack;
    int stackTop;
    //We'll add more as we require later
};

The return type will be int:

int ExecuteMethod(RuntimeEnvironment *pRE);

To generate the final method we need a lot of helper function. We define those as:
void HelperFunction(u1* code, int& ip);
These functions will take a code block and insert code in the block and fix the code pointer (ip).

First let us define the Prolog and Epilog and return 0 helper functions for this function prototype:

void Prolog(u1* code, int& ip)
{
    u1 c[]= {
        0x55,//               push        ebp  
        0x8B, 0xEC,//            mov         ebp,esp 
        0x81, 0xEC, 0xC0, 0x00, 0x00, 0x00,// sub         esp,0C0h 
        0x53,//               push        ebx  
        0x56,//               push        esi  
        0x57,  //             push        edi 
    };

    memcpy(&code[ip], c, sizeof(c));
    ip+=sizeof(c);
}

void Epilog(u1* code, int& ip)
{
    u1 c[]= {
        0x5F,//               pop         edi  
        0x5E,//               pop         esi  
        0x5B,//               pop         ebx  
        0x8B, 0xE5,//            mov         esp,ebp 
        0x5D,//               pop         ebp  
        0xC3,//               ret              
    };

    memcpy(&code[ip], c, sizeof(c));
    ip+=sizeof(c);
}

void Return0(u1* code, int& ip)
{
    //33 C0  xor         eax,eax 
    code[ip++]=0x33;
    code[ip++]=0xC0;
}

Now, we want to generate machine code for the following simple function-

public static int SimpleCall()
{
    return 17;
}

Here is the generated java byte code:
Signature: ()I
  Code:
    0:   bipush  17
    2:   ireturn

Here we start to generate helper function for Java Virtual Machine instruction.

For bipush [value] we need to push the [value] on the VM stack:

void BiPush(u1* code, int& ip, short value)
{
    // C++ equivallent
    // pRE->stack[pRE->stackTop++].shortValue = value;

    u1 c[] = {
         0x8B, 0x45, 0x08, //         mov         eax,dword ptr [pRE] 
         0x8B, 0x48, 0x04, //         mov         ecx,dword ptr [eax+4] 
         0x8B, 0x55, 0x08, //         mov         edx,dword ptr [pRE] 
         0x8B, 0x02, //            mov         eax,dword ptr [edx] 
         0xBA, 0x00, 0x00, 0x00, 0x00, //   mov         edx,value 
         0x66, 0x89, 0x14, 0xC8, //      mov         word ptr [eax+ecx*8],dx 
         0x8B, 0x45, 0x08, //         mov         eax,dword ptr [pRE] 
         0x8B, 0x48, 0x04, //         mov         ecx,dword ptr [eax+4] 
         0x83, 0xC1, 0x01, //         add         ecx,1 
         0x8B, 0x55, 0x08, //         mov         edx,dword ptr [pRE] 
         0x89, 0x4A, 0x04,  //       mov         dword ptr [edx+4],ecx 
    };

    //We need to encode value and set it to the 00 00 00 00 position 
    u1 encVal[4];
    EncodeByte4((int)value, encVal);
    memcpy(c + 12, encVal, 4); 
    memcpy(&code[ip], c, sizeof(c));
    ip+=sizeof(c);
}

Thats it for the simple java function. We can now test this:

int main() 
{ 
    int codeBlockSize = 4096;    
    int (*SimpleCall)(RuntimeEnvironment *)=(int (*)(RuntimeEnvironment *)) VirtualAlloc(NULL, codeBlockSize,  MEM_COMMIT, PAGE_EXECUTE_READWRITE);
    u1* codes = (u1*) SimpleCall;
    int ip=0;
    memset(codes, 0, codeBlockSize);

    Prolog(codes, ip);
    BiPush(codes, ip, 17);
    Return0(codes, ip);
    Epilog(codes, ip);

    //No lets test if it is really pushing value 17 on the VM stack
    RuntimeEnvironment *pRE = new RuntimeEnvironment();;
    pRE->stack = new Variable[20];    
    memset(pRE->stack, 0, sizeof(Variable)*20);
    int retVal = (*SimpleCall)(pRE);
    printf("pRE->stack[0].intValue = %d", pRE->stack[0].intValue);    
    
    return 0;
}

Thats cool- we have generated our first native function that actually does some byte code execution.

Saturday, July 10, 2010

Just In Time Compiler for Managed Platform- Part 1: Code Generation

First we need to generate executable code block.

So let us write some code in C++.
int add(int x, int y) 
{ 
    int r; 
    r= x+y; 
    return r; 
}

int main()
{
    int r1 = add(13, 23);
    printf("Returned value = %d", r1);
}

Great! It returns the right value. OK, but that is very basic. We want to generate the function from data in a simple buffer-

First we need a machine equivallent code for the function above:

unsigned char addcode[] = {  
    0x55,          //push        ebp  
    0x8B, 0xEC,        //mov         ebp,esp 
    0x81, 0xEC, 0xC0, 0x00, 0x00, 0x00,  //sub         esp,0C0h 
    0x53,          //push        ebx  
    0x56,          //push        esi  
    0x57,          //push        edi  

    //r=x+y;
    0x8B, 0x45, 0x08,       //mov         eax,dword ptr [x] 
    0x03, 0x45, 0x0C,       //add         eax,dword ptr [y] 
    0x89, 0x45, 0xF8,       //mov         dword ptr [r],eax 

    //return r;
    0x8B, 0x45, 0xF8,       //mov         eax,dword ptr [r] 

    0x5F,          //pop         edi  
    0x5E,          //pop         esi  
    0x5B,          //pop         ebx  
    0x8B, 0xE5,        //mov         esp,ebp 
    0x5D,          //pop         ebp  

    0xC3          //ret              
   };

OK, this code is generated by Visual Studio compiler. We use it to generate our own code block in memory.

To get a memory block we can use to generate executable code we use the following Windows API:

LPVOID WINAPI VirtualAlloc(
    __in_opt LPVOID lpAddress,
    __in SIZE_T dwSize,
    __in DWORD flAllocationType,
    __in DWORD flProtect
);

First we allocate a 4096 byte executable code block and put it in a function pointer:

int (*addfn)(int, int) = (int (*)(int, int)) VirtualAlloc(NULL, 4096,  MEM_COMMIT, PAGE_EXECUTE_READWRITE);

Then we copy our executable code to this memory block:

memcpy(addfn, addcode, sizeof(addcode)); 

Now the majic - we call the function and print the return value:

int r1 = (*addfn)(13,23); 
 printf("Returned value = %d", r1);

Thats easy- right?

Now we release the memory since we are gentle citizen-

VirtualFree(addfn, NULL, MEM_RELEASE);

Thats it. here is the full code:
#include [windows.h][stdio.h]...

unsigned char addcode[] = {  
    0x55,          //push        ebp  
    0x8B, 0xEC,        //mov         ebp,esp 
    0x81, 0xEC, 0xC0, 0x00, 0x00, 0x00,  //sub         esp,0C0h 
    0x53,          //push        ebx  
    0x56,          //push        esi  
    0x57,          //push        edi  

    //r=x+y;
    0x8B, 0x45, 0x08,       //mov         eax,dword ptr [x] 
    0x03, 0x45, 0x0C,       //add         eax,dword ptr [y] 
    0x89, 0x45, 0xF8,       //mov         dword ptr [r],eax 

    //return r;
    0x8B, 0x45, 0xF8,       //mov         eax,dword ptr [r] 

    0x5F,          //pop         edi  
    0x5E,          //pop         esi  
    0x5B,          //pop         ebx  
    0x8B, 0xE5,        //mov         esp,ebp 
    0x5D,          //pop         ebp  

    0xC3          //ret              
   };

int main()
{
    int (*addfn)(int, int) = (int (*)(int, int)) VirtualAlloc(NULL, 4096,  MEM_COMMIT, PAGE_EXECUTE_READWRITE);
    memcpy(addfn, addcode, sizeof(addcode)); 
    
    int r1 = (*addfn)(13,23); 
    printf("Returned value = %d", r1);

    VirtualFree(addfn, NULL, MEM_RELEASE);

    return 0;
}

Thats all for now. We can now generate code in memory and execute it. First step to design a JIT compiler.