open-telemetry · edeNFed · Feb 20, 2024 · Feb 19, 2024 · Feb 19, 2024
@@ -18,6 +18,7 @@ OpenTelemetry Go Automatic Instrumentation adheres to [Semantic Versioning](http
 - Close `proc` file when done discovering PID. ([#649](https://github.com/open-telemetry/opentelemetry-go-instrumentation/pull/649))
 - Use `debug` packages to parse Go and modules' versions. ([#653](https://github.com/open-telemetry/opentelemetry-go-instrumentation/pull/653))
 - Clean up warn in otelglobal `SetStatus()` when grabbing the status code. ([#675](https://github.com/open-telemetry/opentelemetry-go-instrumentation/pull/675))
+- Avoid using runtime.NumCPU to get the number of CPUs on the system before remote mmap ([#680](https://github.com/open-telemetry/opentelemetry-go-instrumentation/pull/680))
 
 ## [v0.10.1-alpha] - 2024-01-10
 

@@ -22,7 +22,7 @@
 #define MIN_BUFFER_SIZE 1
 
 // Injected in init
-volatile const u32 total_cpus;
+volatile const u64 total_cpus;
 volatile const u64 start_addr;
 volatile const u64 end_addr;
 
@@ -108,6 +108,12 @@ static __always_inline void *write_target_data(void *data, s32 size)
     {
         target += dist_to_next_page;
     }
+    u64 target_u = (u64)target;
+    if (target_u > end_addr || target_u < start_addr) {
+        bpf_printk("TARGET ADDRESS IS OUT OF BOUNDS: 0x%llx", target);
+        return NULL;
+    }
+
     long success = bpf_probe_write_user(target, data, size);
     if (success == 0)
     {

@@ -19,7 +19,6 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
-	"runtime"
 
 	"github.com/cilium/ebpf"
 	"github.com/hashicorp/go-version"
@@ -34,8 +33,6 @@ var (
 
 	offsets     = structfield.NewIndex()
 	errNotFound = errors.New("offset not found")
-
-	nCPU = uint32(runtime.NumCPU())
 )
 
 const (
@@ -114,7 +111,7 @@ func WithRegistersABI(value bool) Option {
 // "start_addr", and "end_addr".
 func WithAllocationDetails(details process.AllocationDetails) Option {
 	return option{
-		keyTotalCPUs: nCPU,
+		keyTotalCPUs: details.NumCPU,
 		keyStartAddr: details.StartAddr,
 		keyEndAddr:   details.EndAddr,
 	}

@@ -37,10 +37,11 @@ func TestWithRegistersABI(t *testing.T) {
 }
 
 func TestWithAllocationDetails(t *testing.T) {
-	const start, end uint64 = 1, 2
+	const start, end, nCPU uint64 = 1, 2, 3
 	details := process.AllocationDetails{
 		StartAddr: start,
 		EndAddr:   end,
+		NumCPU:    nCPU,
 	}
 
 	opts := []Option{WithAllocationDetails(details)}
@@ -51,8 +52,8 @@ func TestWithAllocationDetails(t *testing.T) {
 	require.Contains(t, got, keyEndAddr)
 
 	v := got[keyTotalCPUs]
-	require.IsType(t, *(new(uint32)), v)
-	assert.Equal(t, nCPU, v.(uint32))
+	require.IsType(t, *(new(uint64)), v)
+	assert.Equal(t, nCPU, v.(uint64))
 
 	v = got[keyStartAddr]
 	require.IsType(t, *(new(uint64)), v)

@@ -16,6 +16,7 @@ package utils
 
 import (
 	"bufio"
+	"errors"
 	"fmt"
 	"os"
 	"strconv"
@@ -96,3 +97,90 @@ func KernelLockdownMode() KernelLockdown {
 
 	return KernelLockdownNone
 }
+
+// Injectable for tests, this file is one of the files LSCPU looks for.
+var cpuPresentPath = "/sys/devices/system/cpu/present"
+
+func GetCPUCountFromSysDevices() (int, error) {
+	rawFile, err := os.ReadFile(cpuPresentPath)
+	if err != nil {
+		return 0, err
+	}
+
+	cpuCount, err := parseCPUList(string(rawFile))
+	if err != nil {
+		return 0, err
+	}
+
+	return cpuCount, nil
+}
+
+func parseCPUList(raw string) (int, error) {
+	listPart := strings.Split(raw, ",")
+	count := 0
+	for _, v := range listPart {
+		if strings.Contains(v, "-") {
+			rangeC, err := parseCPURange(v)
+			if err != nil {
+				return 0, fmt.Errorf("error parsing line %s: %w", v, err)
+			}
+			count = count + rangeC
+		} else {
+			count++
+		}
+	}
+	return count, nil
+}
+
+func parseCPURange(cpuRange string) (int, error) {
+	var first, last int
+	_, err := fmt.Sscanf(cpuRange, "%d-%d", &first, &last)
+	if err != nil {
+		return 0, fmt.Errorf("error reading from range %s: %w", cpuRange, err)
+	}
+
+	return (last - first) + 1, nil
+}
+
+var procInfoPath = "/proc/cpuinfo"
+
+func GetCPUCountFromProc() (int, error) {
+	file, err := os.Open(procInfoPath)
+	if err != nil {
+		return 0, err
+	}
+	defer file.Close()
+
+	scanner := bufio.NewScanner(file)
+	count := 0
+	for scanner.Scan() {
+		if strings.Contains(scanner.Text(), "processor") {
+			count++
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		return 0, err
+	}
+
+	return count, nil
+}
+
+func GetCPUCount() (int, error) {
+	var err error
+	// First try to get the CPU count from /sys/devices
+	cpuCount, e := GetCPUCountFromSysDevices()
+	if e == nil {
+		return cpuCount, nil
+	}
+	err = errors.Join(err, e)
+
+	// If that fails, try to get the CPU count from /proc
+	cpuCount, e = GetCPUCountFromProc()
+	if e == nil {
+		return cpuCount, nil
+	}
+	err = errors.Join(err, e)
+
+	return 0, err
+}
@@ -85,28 +85,28 @@ func TestLockdownParsing(t *testing.T) {
 	// Setup for testing
 	lockdownPath = path
 
-	setIntegrity(t, path, "none [integrity] confidentiality\n")
+	setContent(t, path, "none [integrity] confidentiality\n")
 	assert.Equal(t, KernelLockdownIntegrity, KernelLockdownMode())
 
-	setIntegrity(t, path, "[none] integrity confidentiality\n")
+	setContent(t, path, "[none] integrity confidentiality\n")
 	assert.Equal(t, KernelLockdownNone, KernelLockdownMode())
 
-	setIntegrity(t, path, "none integrity [confidentiality]\n")
+	setContent(t, path, "none integrity [confidentiality]\n")
 	assert.Equal(t, KernelLockdownConfidentiality, KernelLockdownMode())
 
-	setIntegrity(t, path, "whatever\n")
+	setContent(t, path, "whatever\n")
 	assert.Equal(t, KernelLockdownOther, KernelLockdownMode())
 
-	setIntegrity(t, path, "")
+	setContent(t, path, "")
 	assert.Equal(t, KernelLockdownIntegrity, KernelLockdownMode())
 
-	setIntegrity(t, path, "[none] integrity confidentiality\n")
+	setContent(t, path, "[none] integrity confidentiality\n")
 	setNotReadable(t, path)
 	assert.Equal(t, KernelLockdownIntegrity, KernelLockdownMode())
 }
 
 // Utils.
-func setIntegrity(t *testing.T, path, text string) {
+func setContent(t *testing.T, path, text string) {
 	err := os.WriteFile(path, []byte(text), 0o644)
 	assert.NoError(t, err)
 }
@@ -115,3 +115,108 @@ func setNotReadable(t *testing.T, path string) {
 	err := os.Chmod(path, 0o00)
 	assert.NoError(t, err)
 }
+
+func TestGetCPUCountFromSysDevices(t *testing.T) {
+	noFile, err := os.CreateTemp("", "not_existent_fake_cpu_present")
+	assert.NoError(t, err)
+	notPath, err := filepath.Abs(noFile.Name())
+	assert.NoError(t, err)
+	assert.NoError(t, noFile.Close())
+	assert.NoError(t, os.Remove(noFile.Name()))
+
+	// Setup for testing file that doesn't exist
+	cpuPresentPath = notPath
+	ncpu, err := GetCPUCountFromSysDevices()
+	assert.Error(t, err)
+	assert.Equal(t, 0, ncpu)
+
+	tempFile, err := os.CreateTemp("", "fake_cpu_present")
+	assert.NoError(t, err)
+	path, err := filepath.Abs(tempFile.Name())
+	assert.NoError(t, err)
+	assert.NoError(t, tempFile.Close())
+
+	defer os.Remove(tempFile.Name())
+	// Setup for testing
+	cpuPresentPath = path
+
+	setContent(t, path, "0-7")
+	ncpu, err = GetCPUCountFromSysDevices()
+	assert.NoError(t, err)
+	assert.Equal(t, 8, ncpu)
+
+	setContent(t, path, "0-7,10-15")
+	ncpu, err = GetCPUCountFromSysDevices()
+	assert.NoError(t, err)
+	assert.Equal(t, 14, ncpu)
+
+	setContent(t, path, "0-7,10-15,20-23")
+	ncpu, err = GetCPUCountFromSysDevices()
+	assert.NoError(t, err)
+	assert.Equal(t, 18, ncpu)
+
+	setContent(t, path, "0-")
+	ncpu, err = GetCPUCountFromSysDevices()
+	assert.Error(t, err)
+	assert.Equal(t, 0, ncpu)
+
+	setNotReadable(t, path)
+	ncpu, err = GetCPUCountFromSysDevices()
+	assert.Error(t, err)
+	assert.Equal(t, 0, ncpu)
+}
+
+func TestGetCPUCountFromProc(t *testing.T) {
+	noFile, err := os.CreateTemp("", "not_existent_fake_cpuinfo")
+	assert.NoError(t, err)
+	notPath, err := filepath.Abs(noFile.Name())
+	assert.NoError(t, err)
+	assert.NoError(t, noFile.Close())
+	assert.NoError(t, os.Remove(noFile.Name()))
+
+	// Setup for testing file that doesn't exist
+	procInfoPath = notPath
+	ncpu, err := GetCPUCountFromProc()
+	assert.Error(t, err)
+	assert.Equal(t, 0, ncpu)
+
+	tempFile, err := os.CreateTemp("", "fake_cpuinfo")
+	assert.NoError(t, err)
+	path, err := filepath.Abs(tempFile.Name())
+	assert.NoError(t, err)
+	assert.NoError(t, tempFile.Close())
+
+	defer os.Remove(tempFile.Name())
+	// Setup for testing
+	procInfoPath = path
+
+	setContent(t, path, "processor	: 0")
+	ncpu, err = GetCPUCountFromProc()
+	assert.NoError(t, err)
+	assert.Equal(t, 1, ncpu)
+
+	setContent(t, path, "processor	: 0\nprocessor	: 1")
+	ncpu, err = GetCPUCountFromProc()
+	assert.NoError(t, err)
+	assert.Equal(t, 2, ncpu)
+
+	setContent(t, path, "processor	: 0\nprocessor	: 1\nprocessor	: 2")
+	ncpu, err = GetCPUCountFromProc()
+	assert.NoError(t, err)
+	assert.Equal(t, 3, ncpu)
+
+	setContent(t, path, "processor	: 0\nprocessor	: 1\nprocessor	: 2\nprocessor	: 3")
+	ncpu, err = GetCPUCountFromProc()
+	assert.NoError(t, err)
+	assert.Equal(t, 4, ncpu)
+
+	setContent(t, path, "processor	: 0\n some text \nprocessor	: 1")
+	ncpu, err = GetCPUCountFromProc()
+	assert.NoError(t, err)
+	assert.Equal(t, 2, ncpu)
+
+	setNotReadable(t, path)
+	ncpu, err = GetCPUCountFromProc()
+	assert.Error(t, err)
+	assert.Equal(t, 0, ncpu)
+}
@@ -22,34 +22,53 @@ import (
 
 	"github.com/go-logr/logr"
 
+	"go.opentelemetry.io/auto/internal/pkg/instrumentation/utils"
 	"go.opentelemetry.io/auto/internal/pkg/process/ptrace"
 )
 
 // AllocationDetails are the details about allocated memory.
 type AllocationDetails struct {
 	StartAddr uint64
 	EndAddr   uint64
+	NumCPU    uint64
 }
 
 // Allocate allocates memory for the instrumented process.
 func Allocate(logger logr.Logger, pid int) (*AllocationDetails, error) {
 	logger = logger.WithName("Allocate")
 
-	mapSize := uint64(os.Getpagesize() * runtime.NumCPU() * 8)
+	// runtime.NumCPU doesn't query any kind of hardware or OS state,
+	// but merely uses affinity APIs to count what CPUs the given go process is available to run on.
+	// Go's implementation of runtime.NumCPU (https://github.com/golang/go/blob/48d899dcdbed4534ed942f7ec2917cf86b18af22/src/runtime/os_linux.go#L97)
+	// uses sched_getaffinity to count the number of CPUs the process is allowed to run on.
+	// We are interested in the number of CPUs available to the system.
+	nCPU, err := utils.GetCPUCount()
+	if err != nil {
+		return nil, err
+	}
+
+	mapSize := uint64(os.Getpagesize() * nCPU * 8)
+	logger.Info(
+		"Requesting memory allocation",
+		"size", mapSize,
+		"page size", os.Getpagesize(),
+		"cpu count", nCPU)
+
 	addr, err := remoteAllocate(logger, pid, mapSize)
 	if err != nil {
 		return nil, err
 	}
 
 	logger.Info(
 		"mmaped remote memory",
-		"start_addr", fmt.Sprintf("%X", addr),
-		"end_addr", fmt.Sprintf("%X", addr+mapSize),
+		"start_addr", fmt.Sprintf("0x%x", addr),
+		"end_addr", fmt.Sprintf("0x%x", addr+mapSize),
 	)
 
 	return &AllocationDetails{
 		StartAddr: addr,
 		EndAddr:   addr + mapSize,
+		NumCPU:    uint64(nCPU),
 	}, nil
 }