Skip to content

Commit

Permalink
Pre-allocate list based on source length (#206)
Browse files Browse the repository at this point in the history
  • Loading branch information
atifaziz authored Sep 25, 2024
1 parent aa4d6d6 commit 5423d6b
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 33 deletions.
15 changes: 13 additions & 2 deletions src/CSnakes.Runtime/CPython/List.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,19 @@ internal static nint PyList_GetItem(PyObject obj, nint pos)
[LibraryImport(PythonLibraryName, EntryPoint = "PyList_GetItem")]
private static partial nint PyList_GetItem_(PyObject obj, nint pos);

[LibraryImport(PythonLibraryName)]
internal static partial int PyList_Append(PyObject obj, PyObject o);
internal static int PyList_SetItemRaw(nint ob, nint pos, nint o)
{
int result = PyList_SetItem_(ob, pos, o);
if (result != -1)
{
// Add reference to the new item as it belongs to list now.
Py_IncRefRaw(o);
}
return result;
}

[LibraryImport(PythonLibraryName, EntryPoint = "PyList_SetItem")]
internal static partial int PyList_SetItem_(nint obj, nint pos, nint o);

internal static bool IsPyList(PyObject p)
{
Expand Down
11 changes: 7 additions & 4 deletions src/CSnakes.Runtime/CPython/Tuple.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ internal unsafe partial class CPythonAPI
private static nint PyTupleType = IntPtr.Zero;
private static nint PyEmptyTuple = IntPtr.Zero;

public static nint GetPyEmptyTuple()
{
Py_IncRefRaw(PyEmptyTuple);
return PyEmptyTuple;
}

/// <summary>
/// Create a PyTuple from the PyObject pointers in `items`.
/// Function handles the reference increments to items.
Expand All @@ -17,10 +23,7 @@ internal static nint PackTuple(Span<IntPtr> items)
{
// This is a shortcut to a CPython optimization. Keep an empty tuple and reuse it.
if (items.Length == 0)
{
Py_IncRefRaw(PyEmptyTuple);
return PyEmptyTuple;
}
return GetPyEmptyTuple();

nint tuple = PyTuple_New(items.Length);
for (int i = 0; i < items.Length; i++)
Expand Down
166 changes: 139 additions & 27 deletions src/CSnakes.Runtime/Python/Pack.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
using CSnakes.Runtime.CPython;
using System.Runtime.InteropServices.Marshalling;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using PyObjectMarshaller = System.Runtime.InteropServices.Marshalling.SafeHandleMarshaller<CSnakes.Runtime.Python.PyObject>;

namespace CSnakes.Runtime.Python;

Expand All @@ -10,46 +13,155 @@ namespace CSnakes.Runtime.Python;
/// </summary>
internal static class Pack
{
internal static PyObject CreateTuple(Span<PyObject> items)
internal static PyObject CreateTuple(Span<PyObject> items) =>
PyObject.Create(CreateListOrTuple<TupleBuilder>(items));

internal static PyObject CreateList(Span<PyObject> items) =>
PyObject.Create(CreateListOrTuple<ListBuilder>(items));

private interface IListOrTupleBuilder
{
List<SafeHandleMarshaller<PyObject>.ManagedToUnmanagedIn> marshallers = new(items.Length);
try
static abstract nint New(nint size);
static abstract int SetItemRaw(nint ob, nint pos, nint o);
}

private sealed class ListBuilder : IListOrTupleBuilder
{
private ListBuilder() { }

// As per Python/C API docs for `PyList_New`:
//
// > If len is greater than zero, the returned list object's items are set to `NULL`. Thus
// > you cannot use abstract API functions such as `PySequence_SetItem()` or expose the
// > object to Python code before setting all items to a real object with
// > `PyList_SetItem()`.
//
// Source: https://docs.python.org/3/c-api/list.html#c.PyList_New

public static IntPtr New(IntPtr size) => CPythonAPI.PyList_New(size);
public static int SetItemRaw(IntPtr ob, IntPtr pos, IntPtr o) => CPythonAPI.PyList_SetItemRaw(ob, pos, o);
}

private sealed class TupleBuilder : IListOrTupleBuilder
{
private TupleBuilder() { }
public static IntPtr New(IntPtr size) => size == 0 ? CPythonAPI.GetPyEmptyTuple() : CPythonAPI.PyTuple_New(size);
public static int SetItemRaw(IntPtr ob, IntPtr pos, IntPtr o) => CPythonAPI.PyTuple_SetItemRaw(ob, pos, o);
}

const int FixedArrayLength = 8;

[InlineArray(FixedArrayLength)]
private struct ArrayOf8<T>
{
private T _;
}

private static nint CreateListOrTuple<TBuilder>(Span<PyObject> items)
where TBuilder : IListOrTupleBuilder
{
// Allocate initial space for the handles and marshallers on the stack.
// If the number of items exceeds the stack space, allocate and spill
// the rest into an array on the heap.
// TODO Consider using an array pool for the spilled handles and marshallers.

const int stackSpillThreshold = FixedArrayLength;
var spillLength = Math.Max(0, items.Length - stackSpillThreshold);

Span<nint> initialHandles = stackalloc nint[Math.Min(stackSpillThreshold, items.Length)];
nint[]? spilledHandles = spillLength > 0 ? new nint[spillLength] : null;

var initialMarshallers = new ArrayOf8<PyObjectMarshaller.ManagedToUnmanagedIn>();
PyObjectMarshaller.ManagedToUnmanagedIn[]? spilledMarshallers =
spillLength > 0
? new PyObjectMarshaller.ManagedToUnmanagedIn[spillLength]
: null;

var uninitializedMarshallers = MemoryMarshal.CreateSpan(ref Unsafe.As<ArrayOf8<PyObjectMarshaller.ManagedToUnmanagedIn>, PyObjectMarshaller.ManagedToUnmanagedIn>(ref initialMarshallers), stackSpillThreshold);
var uninitializedHandles = initialHandles;

// The following loop initializes the marshallers and handles for each
// item in the input span. It is assumed that no exceptions are thrown
// during this loop. The marshallers are freed in the finally block of
// the actual list/tuple initialization.

foreach (var item in items)
{
var handles = items.Length < 18 // .NET tuples are max 17 items. This is a performance optimization.
? stackalloc IntPtr[items.Length]
: new IntPtr[items.Length];
PyObjectMarshaller.ManagedToUnmanagedIn m = default;
m.FromManaged(item);

for (int i = 0; i < items.Length; i++)
if (uninitializedMarshallers.IsEmpty)
{
SafeHandleMarshaller<PyObject>.ManagedToUnmanagedIn m = default;
m.FromManaged(items[i]);
marshallers.Add(m);
handles[i] = m.ToUnmanaged();
Debug.Assert(spilledMarshallers is not null);
uninitializedMarshallers = spilledMarshallers;
}
return PyObject.Create(CPythonAPI.PackTuple(handles));
}
finally
{
foreach (var m in marshallers)

uninitializedMarshallers[0] = m;
uninitializedMarshallers = uninitializedMarshallers[1..];

if (uninitializedHandles.IsEmpty)
{
m.Free();
Debug.Assert(spilledHandles is not null);
uninitializedHandles = spilledHandles;
}

uninitializedHandles[0] = m.ToUnmanaged();
uninitializedHandles = uninitializedHandles[1..];
}
}

internal static PyObject CreateList(Span<PyObject> items)
{
PyObject pyList = PyObject.Create(CPythonAPI.PyList_New(0)); // TODO: preallocate based on items.Length and use PyList_SetItem
nint obj = 0;

foreach (var item in items)
try
{
int result = CPythonAPI.PyList_Append(pyList, item);
if (result == -1)
obj = TBuilder.New(items.Length);
SetItems(obj, spilledHandles, SetItems(obj, initialHandles, 0));

return obj;

static int SetItems(nint obj, Span<nint> handles, int i)
{
throw PyObject.ThrowPythonExceptionAsClrException();
foreach (var handle in handles)
{
int result = TBuilder.SetItemRaw(obj, i++, handle);
if (result == -1)
{
throw PyObject.ThrowPythonExceptionAsClrException();
}
}

return i;
}
}
catch
{
if (obj != 0)
{
CPythonAPI.Py_DecRefRaw(obj);
}

throw;
}
finally
{
if (spilledMarshallers is null)
{
foreach (var m in initialMarshallers[..items.Length])
{
m.Free();
}
}
else
{
foreach (var m in initialMarshallers)
{
m.Free();
}

return pyList;
foreach (var m in spilledMarshallers)
{
m.Free();
}
}
}
}
}

0 comments on commit 5423d6b

Please sign in to comment.