diff --git a/CHANGELOG.md b/CHANGELOG.md index 7821b2b2a..c6245b178 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ OpenTelemetry Go Automatic Instrumentation adheres to [Semantic Versioning](http - Close `proc` file when done discovering PID. ([#649](https://github.com/open-telemetry/opentelemetry-go-instrumentation/pull/649)) - Use `debug` packages to parse Go and modules' versions. ([#653](https://github.com/open-telemetry/opentelemetry-go-instrumentation/pull/653)) - Clean up warn in otelglobal `SetStatus()` when grabbing the status code. ([#675](https://github.com/open-telemetry/opentelemetry-go-instrumentation/pull/675)) +- Avoid using runtime.NumCPU to get the number of CPUs on the system before remote mmap ([#680](https://github.com/open-telemetry/opentelemetry-go-instrumentation/pull/680)) ## [v0.10.1-alpha] - 2024-01-10 diff --git a/internal/include/alloc.h b/internal/include/alloc.h index f4d94b09b..4cf71b25e 100644 --- a/internal/include/alloc.h +++ b/internal/include/alloc.h @@ -22,7 +22,7 @@ #define MIN_BUFFER_SIZE 1 // Injected in init -volatile const u32 total_cpus; +volatile const u64 total_cpus; volatile const u64 start_addr; volatile const u64 end_addr; @@ -108,6 +108,12 @@ static __always_inline void *write_target_data(void *data, s32 size) { target += dist_to_next_page; } + u64 target_u = (u64)target; + if (target_u > end_addr || target_u < start_addr) { + bpf_printk("TARGET ADDRESS IS OUT OF BOUNDS: 0x%llx", target); + return NULL; + } + long success = bpf_probe_write_user(target, data, size); if (success == 0) { diff --git a/internal/pkg/inject/consts.go b/internal/pkg/inject/consts.go index 4db5644a4..d881bfee2 100644 --- a/internal/pkg/inject/consts.go +++ b/internal/pkg/inject/consts.go @@ -19,7 +19,6 @@ import ( "encoding/json" "errors" "fmt" - "runtime" "github.com/cilium/ebpf" "github.com/hashicorp/go-version" @@ -34,8 +33,6 @@ var ( offsets = structfield.NewIndex() errNotFound = errors.New("offset not found") - - nCPU = uint32(runtime.NumCPU()) ) const ( @@ -114,7 +111,7 @@ func WithRegistersABI(value bool) Option { // "start_addr", and "end_addr". func WithAllocationDetails(details process.AllocationDetails) Option { return option{ - keyTotalCPUs: nCPU, + keyTotalCPUs: details.NumCPU, keyStartAddr: details.StartAddr, keyEndAddr: details.EndAddr, } diff --git a/internal/pkg/inject/consts_test.go b/internal/pkg/inject/consts_test.go index 7e4ef9367..6eeeb81bd 100644 --- a/internal/pkg/inject/consts_test.go +++ b/internal/pkg/inject/consts_test.go @@ -37,10 +37,11 @@ func TestWithRegistersABI(t *testing.T) { } func TestWithAllocationDetails(t *testing.T) { - const start, end uint64 = 1, 2 + const start, end, nCPU uint64 = 1, 2, 3 details := process.AllocationDetails{ StartAddr: start, EndAddr: end, + NumCPU: nCPU, } opts := []Option{WithAllocationDetails(details)} @@ -51,8 +52,8 @@ func TestWithAllocationDetails(t *testing.T) { require.Contains(t, got, keyEndAddr) v := got[keyTotalCPUs] - require.IsType(t, *(new(uint32)), v) - assert.Equal(t, nCPU, v.(uint32)) + require.IsType(t, *(new(uint64)), v) + assert.Equal(t, nCPU, v.(uint64)) v = got[keyStartAddr] require.IsType(t, *(new(uint64)), v) diff --git a/internal/pkg/instrumentation/utils/kernel.go b/internal/pkg/instrumentation/utils/kernel.go index e233c7d42..e398f66a9 100644 --- a/internal/pkg/instrumentation/utils/kernel.go +++ b/internal/pkg/instrumentation/utils/kernel.go @@ -16,6 +16,7 @@ package utils import ( "bufio" + "errors" "fmt" "os" "strconv" @@ -96,3 +97,90 @@ func KernelLockdownMode() KernelLockdown { return KernelLockdownNone } + +// Injectable for tests, this file is one of the files LSCPU looks for. +var cpuPresentPath = "/sys/devices/system/cpu/present" + +func GetCPUCountFromSysDevices() (int, error) { + rawFile, err := os.ReadFile(cpuPresentPath) + if err != nil { + return 0, err + } + + cpuCount, err := parseCPUList(string(rawFile)) + if err != nil { + return 0, err + } + + return cpuCount, nil +} + +func parseCPUList(raw string) (int, error) { + listPart := strings.Split(raw, ",") + count := 0 + for _, v := range listPart { + if strings.Contains(v, "-") { + rangeC, err := parseCPURange(v) + if err != nil { + return 0, fmt.Errorf("error parsing line %s: %w", v, err) + } + count = count + rangeC + } else { + count++ + } + } + return count, nil +} + +func parseCPURange(cpuRange string) (int, error) { + var first, last int + _, err := fmt.Sscanf(cpuRange, "%d-%d", &first, &last) + if err != nil { + return 0, fmt.Errorf("error reading from range %s: %w", cpuRange, err) + } + + return (last - first) + 1, nil +} + +var procInfoPath = "/proc/cpuinfo" + +func GetCPUCountFromProc() (int, error) { + file, err := os.Open(procInfoPath) + if err != nil { + return 0, err + } + defer file.Close() + + scanner := bufio.NewScanner(file) + count := 0 + for scanner.Scan() { + if strings.Contains(scanner.Text(), "processor") { + count++ + } + } + + if err := scanner.Err(); err != nil { + return 0, err + } + + return count, nil +} + +func GetCPUCount() (int, error) { + var err error + // First try to get the CPU count from /sys/devices + cpuCount, e := GetCPUCountFromSysDevices() + if e == nil { + return cpuCount, nil + } + err = errors.Join(err, e) + + // If that fails, try to get the CPU count from /proc + cpuCount, e = GetCPUCountFromProc() + if e == nil { + return cpuCount, nil + } + err = errors.Join(err, e) + + return 0, err +} diff --git a/internal/pkg/instrumentation/utils/kernel_test.go b/internal/pkg/instrumentation/utils/kernel_test.go index 10b8260cd..c6e449457 100644 --- a/internal/pkg/instrumentation/utils/kernel_test.go +++ b/internal/pkg/instrumentation/utils/kernel_test.go @@ -85,28 +85,28 @@ func TestLockdownParsing(t *testing.T) { // Setup for testing lockdownPath = path - setIntegrity(t, path, "none [integrity] confidentiality\n") + setContent(t, path, "none [integrity] confidentiality\n") assert.Equal(t, KernelLockdownIntegrity, KernelLockdownMode()) - setIntegrity(t, path, "[none] integrity confidentiality\n") + setContent(t, path, "[none] integrity confidentiality\n") assert.Equal(t, KernelLockdownNone, KernelLockdownMode()) - setIntegrity(t, path, "none integrity [confidentiality]\n") + setContent(t, path, "none integrity [confidentiality]\n") assert.Equal(t, KernelLockdownConfidentiality, KernelLockdownMode()) - setIntegrity(t, path, "whatever\n") + setContent(t, path, "whatever\n") assert.Equal(t, KernelLockdownOther, KernelLockdownMode()) - setIntegrity(t, path, "") + setContent(t, path, "") assert.Equal(t, KernelLockdownIntegrity, KernelLockdownMode()) - setIntegrity(t, path, "[none] integrity confidentiality\n") + setContent(t, path, "[none] integrity confidentiality\n") setNotReadable(t, path) assert.Equal(t, KernelLockdownIntegrity, KernelLockdownMode()) } // Utils. -func setIntegrity(t *testing.T, path, text string) { +func setContent(t *testing.T, path, text string) { err := os.WriteFile(path, []byte(text), 0o644) assert.NoError(t, err) } @@ -115,3 +115,108 @@ func setNotReadable(t *testing.T, path string) { err := os.Chmod(path, 0o00) assert.NoError(t, err) } + +func TestGetCPUCountFromSysDevices(t *testing.T) { + noFile, err := os.CreateTemp("", "not_existent_fake_cpu_present") + assert.NoError(t, err) + notPath, err := filepath.Abs(noFile.Name()) + assert.NoError(t, err) + assert.NoError(t, noFile.Close()) + assert.NoError(t, os.Remove(noFile.Name())) + + // Setup for testing file that doesn't exist + cpuPresentPath = notPath + ncpu, err := GetCPUCountFromSysDevices() + assert.Error(t, err) + assert.Equal(t, 0, ncpu) + + tempFile, err := os.CreateTemp("", "fake_cpu_present") + assert.NoError(t, err) + path, err := filepath.Abs(tempFile.Name()) + assert.NoError(t, err) + assert.NoError(t, tempFile.Close()) + + defer os.Remove(tempFile.Name()) + // Setup for testing + cpuPresentPath = path + + setContent(t, path, "0-7") + ncpu, err = GetCPUCountFromSysDevices() + assert.NoError(t, err) + assert.Equal(t, 8, ncpu) + + setContent(t, path, "0-7,10-15") + ncpu, err = GetCPUCountFromSysDevices() + assert.NoError(t, err) + assert.Equal(t, 14, ncpu) + + setContent(t, path, "0-7,10-15,20-23") + ncpu, err = GetCPUCountFromSysDevices() + assert.NoError(t, err) + assert.Equal(t, 18, ncpu) + + setContent(t, path, "0-") + ncpu, err = GetCPUCountFromSysDevices() + assert.Error(t, err) + assert.Equal(t, 0, ncpu) + + setNotReadable(t, path) + ncpu, err = GetCPUCountFromSysDevices() + assert.Error(t, err) + assert.Equal(t, 0, ncpu) +} + +func TestGetCPUCountFromProc(t *testing.T) { + noFile, err := os.CreateTemp("", "not_existent_fake_cpuinfo") + assert.NoError(t, err) + notPath, err := filepath.Abs(noFile.Name()) + assert.NoError(t, err) + assert.NoError(t, noFile.Close()) + assert.NoError(t, os.Remove(noFile.Name())) + + // Setup for testing file that doesn't exist + procInfoPath = notPath + ncpu, err := GetCPUCountFromProc() + assert.Error(t, err) + assert.Equal(t, 0, ncpu) + + tempFile, err := os.CreateTemp("", "fake_cpuinfo") + assert.NoError(t, err) + path, err := filepath.Abs(tempFile.Name()) + assert.NoError(t, err) + assert.NoError(t, tempFile.Close()) + + defer os.Remove(tempFile.Name()) + // Setup for testing + procInfoPath = path + + setContent(t, path, "processor : 0") + ncpu, err = GetCPUCountFromProc() + assert.NoError(t, err) + assert.Equal(t, 1, ncpu) + + setContent(t, path, "processor : 0\nprocessor : 1") + ncpu, err = GetCPUCountFromProc() + assert.NoError(t, err) + assert.Equal(t, 2, ncpu) + + setContent(t, path, "processor : 0\nprocessor : 1\nprocessor : 2") + ncpu, err = GetCPUCountFromProc() + assert.NoError(t, err) + assert.Equal(t, 3, ncpu) + + setContent(t, path, "processor : 0\nprocessor : 1\nprocessor : 2\nprocessor : 3") + ncpu, err = GetCPUCountFromProc() + assert.NoError(t, err) + assert.Equal(t, 4, ncpu) + + setContent(t, path, "processor : 0\n some text \nprocessor : 1") + ncpu, err = GetCPUCountFromProc() + assert.NoError(t, err) + assert.Equal(t, 2, ncpu) + + setNotReadable(t, path) + ncpu, err = GetCPUCountFromProc() + assert.Error(t, err) + assert.Equal(t, 0, ncpu) +} diff --git a/internal/pkg/process/allocate.go b/internal/pkg/process/allocate.go index 8a68b2c47..5697ec86e 100644 --- a/internal/pkg/process/allocate.go +++ b/internal/pkg/process/allocate.go @@ -22,6 +22,7 @@ import ( "github.com/go-logr/logr" + "go.opentelemetry.io/auto/internal/pkg/instrumentation/utils" "go.opentelemetry.io/auto/internal/pkg/process/ptrace" ) @@ -29,13 +30,30 @@ import ( type AllocationDetails struct { StartAddr uint64 EndAddr uint64 + NumCPU uint64 } // Allocate allocates memory for the instrumented process. func Allocate(logger logr.Logger, pid int) (*AllocationDetails, error) { logger = logger.WithName("Allocate") - mapSize := uint64(os.Getpagesize() * runtime.NumCPU() * 8) + // runtime.NumCPU doesn't query any kind of hardware or OS state, + // but merely uses affinity APIs to count what CPUs the given go process is available to run on. + // Go's implementation of runtime.NumCPU (https://github.com/golang/go/blob/48d899dcdbed4534ed942f7ec2917cf86b18af22/src/runtime/os_linux.go#L97) + // uses sched_getaffinity to count the number of CPUs the process is allowed to run on. + // We are interested in the number of CPUs available to the system. + nCPU, err := utils.GetCPUCount() + if err != nil { + return nil, err + } + + mapSize := uint64(os.Getpagesize() * nCPU * 8) + logger.Info( + "Requesting memory allocation", + "size", mapSize, + "page size", os.Getpagesize(), + "cpu count", nCPU) + addr, err := remoteAllocate(logger, pid, mapSize) if err != nil { return nil, err @@ -43,13 +61,14 @@ func Allocate(logger logr.Logger, pid int) (*AllocationDetails, error) { logger.Info( "mmaped remote memory", - "start_addr", fmt.Sprintf("%X", addr), - "end_addr", fmt.Sprintf("%X", addr+mapSize), + "start_addr", fmt.Sprintf("0x%x", addr), + "end_addr", fmt.Sprintf("0x%x", addr+mapSize), ) return &AllocationDetails{ StartAddr: addr, EndAddr: addr + mapSize, + NumCPU: uint64(nCPU), }, nil }