Skip to content

Commit

Permalink
Update CALL relocs - try JMP (to epilogue) then CALL (target func) th…
Browse files Browse the repository at this point in the history
…en JMP (back from epilogue) instead of CALL (into epilogue) then JMP (into target func), since if a traceback hits while we're in the epilogue, the funcForPC for the current PC will not be aligned with the link register (regardless whether the LR is virtual or real), and it will appear that the callee and caller are the same `_func`
  • Loading branch information
Anonymous authored and eh-steve committed Dec 7, 2023
1 parent 2133bbf commit 684a15f
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 21 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
strategy:
fail-fast: false
matrix:
go-version: [ 1.18.X, 1.19.X, 1.20.X , 1.21.0-rc.3 ]
go-version: [ 1.18.X, 1.19.X, 1.20.X , 1.21.X ]
os: [ubuntu-latest, windows-latest, macos-latest, [self-hosted, Linux, ARM64], [self-hosted, macOS, ARM64]]
cgo-enabled: ["CGO_ENABLED=0", "CGO_ENABLED=1"]
dynlink: ["JIT_GC_DYNLINK=1", "JIT_GC_DYNLINK=0"]
Expand Down
11 changes: 7 additions & 4 deletions asm_bytes.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,13 @@ const (

// x86/amd64
var (
x86amd64NOPcode = byte(0x90)
x86amd64JMPLcode = []byte{0xff, 0x25, 0x00, 0x00, 0x00, 0x00} // JMPL *ADDRESS
x86amd64JMPNearCode = []byte{0xE9, 0x00, 0x00, 0x00, 0x00} // JMP (PCREL offset)+4
x86amd64JMPShortCode = []byte{0xEB, 0x00} // JMP (PCREL offset)+1
x86amd64NOPcode = byte(0x90)
x86amd64JMPLcode = []byte{0xff, 0x25, 0x00, 0x00, 0x00, 0x00} // JMPL *ADDRESS
x86amd64JMPNearCode = []byte{0xE9, 0x00, 0x00, 0x00, 0x00} // JMP (PCREL offset)+4
x86amd64JMPShortCode = []byte{0xEB, 0x00} // JMP (PCREL offset)+1
x86amd64CALLFarCode = []byte{
0xff, 0x15, 0x00, 0x00, 0x00, 0x00, // CALL ptr [RIP]
}
x86amd64replaceCMPLcode = []byte{
0x50, // PUSH RAX
0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // MOVABS RAX, imm64 (64 bit)
Expand Down
2 changes: 1 addition & 1 deletion jit/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ require (

require github.com/opentracing/opentracing-go v1.2.0 // indirect

//replace github.com/eh-steve/goloader => ../
replace github.com/eh-steve/goloader => ../
//replace github.com/eh-steve/goloader/jit/testdata => ./testdata
5 changes: 3 additions & 2 deletions jit/jit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ func TestJitEmbeddedStruct(t *testing.T) {
}

func TestSchedule(t *testing.T) {
t.Skip("this takes forever")
conf := baseConfig
data := testData{
files: []string{"./testdata/test_schedule/test.go"},
Expand Down Expand Up @@ -496,7 +497,7 @@ func TestPatchMultipleModuleItabs(t *testing.T) {
}
fmt.Println(len(result2))
afterCall := runtime.NumGoroutine()
for afterCall != start {
for afterCall > start {
time.Sleep(100 * time.Millisecond)
runtime.GC()
afterCall = runtime.NumGoroutine()
Expand Down Expand Up @@ -668,7 +669,7 @@ func TestJitGoroutines(t *testing.T) {
time.Sleep(100 * time.Millisecond)
afterStop := runtime.NumGoroutine()
sleepCount := 0
for afterStop != before {
for afterStop > before {
time.Sleep(100 * time.Millisecond)
runtime.GC()
afterStop = runtime.NumGoroutine()
Expand Down
19 changes: 16 additions & 3 deletions ld.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,11 +330,14 @@ func (linker *Linker) addSymbol(name string, globalSymPtr map[string]uintptr) (s
case x86amd64CALL2code: // CGo FF 15 PCREL call
if instructionBytes[1] == 0x15 {
epilogueSize = maxExtraInstructionBytesPCRELxCALL2
break
}
fallthrough // Might be FF XX E8/E9 ...
default:
switch instructionBytes[1] {
case x86amd64CALLcode:
epilogueSize = maxExtraInstructionBytesPCRELxCALL
opcode = x86amd64CALLcode
epilogueSize = maxExtraInstructionBytesCALLNear
case x86amd64JMPcode:
epilogueSize = maxExtraInstructionBytesPCRELxJMP
}
Expand All @@ -350,6 +353,10 @@ func (linker *Linker) addSymbol(name string, globalSymPtr map[string]uintptr) (s
if shortJmp {
epilogueSize = maxExtraInstructionBytesPCRELxCMPLShort
}
case x86amd64CALLcode:
if shortJmp {
epilogueSize = maxExtraInstructionBytesCALLShort
}
}
objsym.Reloc[i].EpilogueSize = epilogueSize
linker.code = append(linker.code, createArchNops(linker.Arch, epilogueSize)...)
Expand All @@ -364,9 +371,15 @@ func (linker *Linker) addSymbol(name string, globalSymPtr map[string]uintptr) (s
alignment := alignof(len(linker.code)-symbol.Offset, PtrSize) - (len(linker.code) - symbol.Offset)
linker.code = append(linker.code, createArchNops(linker.Arch, objsym.Reloc[i].EpilogueSize+alignment)...)
case reloctype.R_CALL, reloctype.R_CALL | reloctype.R_WEAK:
epilogueSize := maxExtraInstructionBytesCALLNear
returnOffset := (reloc.Offset + reloc.Size) - (objsym.Reloc[i].EpilogueOffset + epilogueSize) - len(x86amd64JMPShortCode) // assumes short jump, adjusts if not
shortJmp := returnOffset < 0 && returnOffset > -0x80
objsym.Reloc[i].EpilogueOffset = len(linker.code) - symbol.Offset
objsym.Reloc[i].EpilogueSize = maxExtraInstructionBytesCALL
linker.code = append(linker.code, createArchNops(linker.Arch, maxExtraInstructionBytesCALL)...)
if shortJmp {
epilogueSize = maxExtraInstructionBytesCALLShort
}
objsym.Reloc[i].EpilogueSize = epilogueSize
linker.code = append(linker.code, createArchNops(linker.Arch, epilogueSize)...)
}
bytearrayAlignNops(linker.Arch, &linker.code, PtrSize)
}
Expand Down
47 changes: 38 additions & 9 deletions relocate.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ var (
maxExtraInstructionBytesPCRELxMOVNear = len(x86amd64replaceMOVQcode) + len(x86amd64JMPNearCode)
maxExtraInstructionBytesPCRELxCMPLShort = len(x86amd64replaceCMPLcode) + len(x86amd64JMPShortCode)
maxExtraInstructionBytesPCRELxCMPLNear = len(x86amd64replaceCMPLcode) + len(x86amd64JMPNearCode)
maxExtraInstructionBytesPCRELxCALL = len(x86amd64JMPLcode) + PtrSize
maxExtraInstructionBytesPCRELxCALL2 = PtrSize
maxExtraInstructionBytesPCRELxJMP = len(x86amd64JMPLcode) + PtrSize
maxExtraInstructionBytesCALL = len(x86amd64JMPLcode) + PtrSize
maxExtraInstructionBytesCALLShort = len(x86amd64CALLFarCode) + len(x86amd64JMPShortCode) + PtrSize
maxExtraInstructionBytesCALLNear = len(x86amd64CALLFarCode) + len(x86amd64JMPNearCode) + PtrSize
maxExtraInstructionBytesGOTPCREL = PtrSize
maxExtraInstructionBytesARM64GOTPCREL = PtrSize
)
Expand Down Expand Up @@ -140,14 +140,28 @@ func (linker *Linker) relocateCALL(addr uintptr, loc obj.Reloc, segment *segment
copy(segment.codeByte[epilogueOffset:epilogueOffset+loc.EpilogueSize], createX86Nops(loc.EpilogueSize))

if offset > 0x7FFFFFFF || offset < -0x80000000 || (linker.options.ForceTestRelocationEpilogues && loc.EpilogueSize > 0) {
// "CALL" into the epilogue, then immediately JMPL into the actual func using a PCREL 8 byte
// address immediately after the epilogue - the RET will bring us straight back to the call site
// JMP into the epilogue, then CALL into the actual func using a PCREL 8 byte address placed after the JMP back from the epilogue
if loc.EpilogueSize == 0 {
return fmt.Errorf("relocation epilogue not available but got a >32-bit CALL reloc (x86 code: %x) with offset %d: %s", relocByte[loc.Offset-2:loc.Offset+loc.Size], offset, loc.Sym.Name)
}
offset = (segment.codeBase + epilogueOffset) - (addrBase + loc.Offset + loc.Size)
copy(segment.codeByte[epilogueOffset:], x86amd64JMPLcode)
epilogueOffset += len(x86amd64JMPLcode)
// Replace the E8 CALL with a E9 JMP into the epilogue, the CALL the function, then JMP (near or far) back
relocByte[loc.Offset-1] = x86amd64JMPcode
offset = (segment.codeBase + epilogueOffset) - (addrBase + loc.Offset + loc.Size) // Point the JMP offset at the epilogue
copy(segment.codeByte[epilogueOffset:], x86amd64CALLFarCode)
epilogueOffset += len(x86amd64CALLFarCode)
returnOffset := (loc.Offset + loc.Size) - epilogueOffset - len(x86amd64JMPShortCode) // assumes short jump - if we need a near jump, we'll adjust
if returnOffset > -0x80 && returnOffset < 0 {
byteorder.PutUint32(relocByte[epilogueOffset-4:], uint32(len(x86amd64JMPShortCode))) // Read the 8 bytes after the length of the JMP back
copy(segment.codeByte[epilogueOffset:], x86amd64JMPShortCode)
segment.codeByte[epilogueOffset+1] = uint8(returnOffset)
epilogueOffset += len(x86amd64JMPShortCode)
} else {
byteorder.PutUint32(relocByte[epilogueOffset-4:], uint32(len(x86amd64JMPNearCode))) // Read the 8 bytes after the length of the JMP back
returnOffset -= len(x86amd64JMPNearCode) - len(x86amd64JMPShortCode)
copy(segment.codeByte[epilogueOffset:], x86amd64JMPNearCode)
byteorder.PutUint32(segment.codeByte[epilogueOffset+1:], uint32(returnOffset))
epilogueOffset += len(x86amd64JMPNearCode)
}
putAddressAddOffset(byteorder, segment.codeByte, &epilogueOffset, uint64(addr)+uint64(loc.Add))
}
byteorder.PutUint32(relocByte[loc.Offset:], uint32(offset))
Expand Down Expand Up @@ -233,8 +247,23 @@ func (linker *Linker) relocatePCREL(addr uintptr, loc obj.Reloc, segment *segmen
epilogueOffset += len(x86amd64replaceMOVQcode)
}
case x86amd64CALLcode:
copy(segment.codeByte[epilogueOffset:], x86amd64JMPLcode)
epilogueOffset += len(x86amd64JMPLcode)
bytes[1] = x86amd64JMPcode
offset = (segment.codeBase + epilogueOffset) - (addrBase + loc.Offset + loc.Size) // Point the JMP offset at the epilogue
copy(segment.codeByte[epilogueOffset:], x86amd64CALLFarCode)
epilogueOffset += len(x86amd64CALLFarCode)
returnOffset := (loc.Offset + loc.Size) - epilogueOffset - len(x86amd64JMPShortCode) // assumes short jump - if we need a near jump, we'll adjust
if returnOffset > -0x80 && returnOffset < 0 {
byteorder.PutUint32(relocByte[epilogueOffset-4:], uint32(len(x86amd64JMPShortCode))) // Read the 8 bytes after the length of the JMP back
copy(segment.codeByte[epilogueOffset:], x86amd64JMPShortCode)
segment.codeByte[epilogueOffset+1] = uint8(returnOffset)
epilogueOffset += len(x86amd64JMPShortCode)
} else {
byteorder.PutUint32(relocByte[epilogueOffset-4:], uint32(len(x86amd64JMPNearCode))) // Read the 8 bytes after the length of the JMP back
returnOffset -= len(x86amd64JMPNearCode) - len(x86amd64JMPShortCode)
copy(segment.codeByte[epilogueOffset:], x86amd64JMPNearCode)
byteorder.PutUint32(segment.codeByte[epilogueOffset+1:], uint32(returnOffset))
epilogueOffset += len(x86amd64JMPNearCode)
}
putAddressAddOffset(byteorder, segment.codeByte, &epilogueOffset, uint64(addr)+uint64(loc.Add))
case x86amd64CALL2code:
putAddressAddOffset(byteorder, segment.codeByte, &epilogueOffset, uint64(addr)+uint64(loc.Add))
Expand Down
2 changes: 1 addition & 1 deletion utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func createARM64Nops(size int) []byte {
}

nops := make([]byte, size)
for i := 0; i < size/4; i += 4 {
for i := 0; i < size; i += 4 {
copy(nops[i:], arm64NopCode)
}
return nops
Expand Down

0 comments on commit 684a15f

Please sign in to comment.