Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added JIT CNV4 compiler for ppc64le, ppc64 and ppc #5540

Closed
wants to merge 11 commits into from
164 changes: 161 additions & 3 deletions src/crypto/CryptonightR_JIT.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@
#include "CryptonightR_JIT.h"
#include "CryptonightR_template.h"

static const uint8_t prologue[] = {
//x86 prologue and epilogue
#if defined __i386 || defined __x86_64__

static const uint8_t prologue[] = {
0x4C, 0x8B, 0xD7, // mov r10, rdi
0x53, // push rbx
0x55, // push rbp
Expand All @@ -30,11 +32,9 @@ static const uint8_t prologue[] = {
0x41, 0x8B, 0x42, 0x18, // mov eax, DWORD PTR [r10+24]
0x41, 0x8B, 0x52, 0x1C, // mov edx, DWORD PTR [r10+28]
0x45, 0x8B, 0x4A, 0x20, // mov r9d, DWORD PTR [r10+32]
#endif
};

static const uint8_t epilogue[] = {
#if defined __i386 || defined __x86_64__
0x49, 0x8B, 0xE3, // mov rsp, r11
0x41, 0x89, 0x1A, // mov DWORD PTR [r10], ebx
0x41, 0x89, 0x72, 0x04, // mov DWORD PTR [r10+4], esi
Expand All @@ -44,9 +44,93 @@ static const uint8_t epilogue[] = {
0x5D, // pop rbp
0x5B, // pop rbx
0xC3, // ret
};
#endif

#if defined __PPC__ || defined __PPC64__
#include <endian.h>
static const uint32_t prologue[] = {
0x7c0802a6, //mflr r0 save lr to r0
0xf8010010, //std r0 16(r1) save r0 to stack
0x80830000,// lwz 4,0(3) load values from data pointer at r3
0x80A30004,// lwz 5,4(3)
0x80C30008,// lwz 6,8(3)
0x80E3000C,// lwz 7,12(3)
0x81030010,// lwz 8,16(3)
0x81230014,// lwz 9,20(3)
0x81430018,// lwz 10,24(3)
0x8163001C,// lwz 11,28(3)
0x81830020,// lwz 12,32(3)
};

static const uint32_t epilogue[] ={
0x90830000,// stw 4,0(r3) restore values to data pointer at r3
0x90A30004,// stw 5,4(r3)
0x90C30008,// stw 6,8(r3)
0x90E3000C,// stw 7,12(r3)
0x91030010,// stw 8,16(r3)
0x91230014,// stw 9,20(r3)
0x91430018,// stw 10,24(r3)
0x9163001C,// stw 11,28(r3)
0x91830020,// stw 12,32(r3)
0xe8010010, //ld r0,16(r1) load lr from stack to r0
0x7c0803a6, //restore link register
0x4e800020, //jump to lr
};

#define ppcD(d) (d << 21)
#define ppcS(s) (s << 21)
#define ppcA(a) (a << 16)
#define ppcB(b) (b << 11)
#define ppcMC(c) (c << 6)
#define ppcME(e) (e << 1)
#define ppcIMM(imm) ((imm) & 0xffff)

#define ppcHI(opcode) ((opcode) << 26)
#define ppcLO(opcode) ((opcode) << 1)

#define ppcADD (uint32_t)(ppcHI(31) | ppcLO(266))
#define ppcADDI (uint32_t)(ppcHI(14))
#define ppcADDIS (uint32_t)(ppcHI(15))
#define ppcMULLW (uint32_t)(ppcHI(31) | ppcLO(235))
#define ppcNEG (uint32_t)(ppcHI(31) | ppcLO(104))
#define ppcSUBF (uint32_t)(ppcHI(31) | ppcLO(40))
#define ppcXOR (uint32_t)(ppcHI(31) | ppcLO(316))
#define ppcROTLW (uint32_t)ppcHI(23)

uint32_t ppcgen_op(uint32_t op,uint32_t a0, uint32_t a1, uint32_t a2 ){
switch (op){
case(ppcADD):
op = ppcADD | ppcD((uint8_t)a0) | ppcA((uint8_t)a1) | ppcB((uint8_t)a2);
break;
case(ppcSUBF):
op = ppcSUBF | ppcD((uint8_t)a0) | ppcA((uint8_t)a1) | ppcB((uint8_t)a2);
break;
case(ppcMULLW):
op = ppcMULLW | ppcD((uint8_t)a0) | ppcA((uint8_t)a1) | ppcB((uint8_t)a2);
break;
case(ppcXOR):
op = ppcXOR | ppcD((uint8_t)a0) | ppcA((uint8_t)a1) | ppcB((uint8_t)a2);
break;
case(ppcNEG):
op = ppcNEG | ppcD((uint8_t)a0) | ppcA((uint8_t)a1);
break;
case(ppcADDIS):
op = ppcADDIS | ppcD((uint8_t)a0) | ppcA((uint8_t)a1) | ppcIMM((uint16_t)a2);
break;
case(ppcADDI):
op = ppcADDI | ppcD((uint8_t)a0) | ppcA((uint8_t)a1) | ppcIMM((uint16_t)a2);
break;
case(ppcROTLW):
op = ppcROTLW | ppcS((uint8_t)a1) | ppcA((uint8_t)a0) | ppcB((uint8_t)a2) | ppcMC((uint8_t)0) | ppcME((uint8_t)31);
break;
}
return op;
}

#endif //end ppc helper functions


#define APPEND_CODE(src, size) \
do { \
if (JIT_code + (size) > JIT_code_end) \
Expand All @@ -57,6 +141,9 @@ static const uint8_t epilogue[] = {

int v4_generate_JIT_code(const struct V4_Instruction* code, v4_random_math_JIT_func buf, const size_t buf_size)
{
if(buf == NULL){
return -1;
}
#if defined __i386 || defined __x86_64__
uint8_t* JIT_code = (uint8_t*) buf;
const uint8_t* JIT_code_end = JIT_code + buf_size;
Expand Down Expand Up @@ -117,7 +204,78 @@ int v4_generate_JIT_code(const struct V4_Instruction* code, v4_random_math_JIT_f
__builtin___clear_cache((char*)buf, (char*)JIT_code);

return 0;
#elif defined(__PPC__) || defined(__PPC64__)
uint8_t* JIT_code = (uint8_t*) buf;
const uint8_t* JIT_code_end = JIT_code + buf_size;
static const uint8_t regN[] = {4,5,6,7,8,9,10,11,12};
static const uint8_t r0 = 0;

#if __BYTE_ORDER == __BIG_ENDIAN
//use end of buffer to store pointer to beginning of buffer (weirdness of the BE ppc abi)
size_t l = buf_size/sizeof(void*);
if (sizeof(void*) == sizeof(uint32_t)){
*((uint32_t*)buf+l-1) = (uint32_t)buf;
}
else if(sizeof(void*) == sizeof(uint64_t)){
*((uint64_t*)buf+l-1) = (uint64_t)buf;
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That can also be a single op.

#endif

APPEND_CODE(prologue,sizeof(prologue));
for (uint32_t i = 0;; ++i)
{
uint8_t dst = code[i].dst_index;
uint8_t src = code[i].src_index;
uint32_t tmp[] = {0,0,0,0};
int16_t* C;
switch (code[i].opcode)
{
case MUL:
tmp[0] = ppcgen_op(ppcMULLW,regN[dst],regN[dst],regN[src]);
APPEND_CODE(tmp,1*sizeof(uint32_t));
break;
case ADD:
C = (int16_t*)&code[i].C;
#if __BYTE_ORDER == __BIG_ENDIAN
uint16_t hi = (uint16_t)C[0];
int16_t lo = C[1];
#else
uint16_t hi = (uint16_t)C[1];
int16_t lo = C[0];
#endif
tmp[0] = ppcgen_op(ppcADD,regN[dst],regN[dst],regN[src]);
if(lo < 0) hi+=1; //compensate ADDI sign extension to 32bits
tmp[1] = ppcgen_op(ppcADDIS,regN[dst],regN[dst],hi); // sum upper 16bits
tmp[2] = ppcgen_op(ppcADDI,regN[dst],regN[dst],lo); // sum lower 16bits
APPEND_CODE(tmp,3*sizeof(uint32_t));
break;
case SUB:
tmp[0] = ppcgen_op(ppcSUBF,regN[dst],regN[src],regN[dst]);
APPEND_CODE(tmp,1*sizeof(uint32_t));
break;
case ROR:
tmp[0] = ppcgen_op(ppcNEG,r0,regN[src],0);
tmp[1] = ppcgen_op(ppcROTLW,regN[dst],regN[dst],r0);
APPEND_CODE(tmp,2*sizeof(uint32_t));
break;
case ROL:
tmp[0] = ppcgen_op(ppcROTLW,regN[dst],regN[dst],regN[src]);
APPEND_CODE(tmp,1*sizeof(uint32_t));
break;
case XOR:
tmp[0] = ppcgen_op(ppcXOR,regN[dst],regN[dst],regN[src]);
APPEND_CODE(tmp,1*sizeof(uint32_t));
break;
case RET:
APPEND_CODE(epilogue,sizeof(epilogue));
return 0;
default:
return -1;
}
}
//end ppc64
#else
return 1;
#endif
return 1;
}
5 changes: 5 additions & 0 deletions src/crypto/CryptonightR_JIT.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ __attribute__((sysv_abi))
#endif
;

#if defined __PPC__ || defined __PPC64__
uint32_t ppcgen_op(uint32_t op,uint32_t a0, uint32_t a1, uint32_t a2 );
#endif


// Given the random math sequence, generates machine code (x86-64) for it
// Returns 0 if code was generated successfully
// Returns -1 if provided buffer was too small
Expand Down
Loading