PyAV-Org · WyattBlue · Dec 25, 2023 · Dec 24, 2022 · Jul 23, 2023 · Dec 24, 2023
diff --git a/av/video/frame.pxd b/av/video/frame.pxd
@@ -11,6 +11,7 @@ cdef class VideoFrame(Frame):
     # This is the buffer that is used to back everything in the AVFrame.
     # We don't ever actually access it directly.
     cdef uint8_t *_buffer
+    cdef object _np_buffer
 
     cdef VideoReformatter reformatter
 

diff --git a/av/video/frame.pyx b/av/video/frame.pyx
@@ -6,7 +6,7 @@ from av.enum cimport define_enum
 from av.error cimport err_check
 from av.utils cimport check_ndarray, check_ndarray_shape
 from av.video.format cimport get_pix_fmt, get_video_format
-from av.video.plane cimport VideoPlane
+from av.video.plane cimport VideoPlane, YUVPlanes
 
 
 cdef object _cinit_bypass_sentinel
@@ -118,6 +118,8 @@ cdef class VideoFrame(Frame):
         # The `self._buffer` member is only set if *we* allocated the buffer in `_init`,
         # as opposed to a buffer allocated by a decoder.
         lib.av_freep(&self._buffer)
+        # Let go of the reference from the numpy buffers if we made one
+        self._np_buffer = None
 
     def __repr__(self):
         return '<av.%s #%d, pts=%s %s %dx%d at 0x%x>' % (
@@ -150,7 +152,6 @@ cdef class VideoFrame(Frame):
         cdef int plane_count = 0
         while plane_count < max_plane_count and self.ptr.extended_data[plane_count]:
             plane_count += 1
-
         return tuple([VideoPlane(self, i) for i in range(plane_count)])
 
     property width:
@@ -290,11 +291,21 @@ cdef class VideoFrame(Frame):
         if frame.format.name in ('yuv420p', 'yuvj420p'):
             assert frame.width % 2 == 0
             assert frame.height % 2 == 0
-            return np.hstack((
-                useful_array(frame.planes[0]),
-                useful_array(frame.planes[1]),
-                useful_array(frame.planes[2])
-            )).reshape(-1, frame.width)
+            # Fast path for the case that the entire YUV data is contiguous
+            if (
+                frame.planes[0].line_size == frame.planes[0].width and
+                frame.planes[1].line_size == frame.planes[1].width and
+                frame.planes[2].line_size == frame.planes[2].width
+            ):
+                yuv_planes = YUVPlanes(frame, 0)
+                return useful_array(yuv_planes).reshape(frame.height * 3 // 2, frame.width)
+            else:
+                # Otherwise, we need to copy the data through the use of np.hstack
+                return np.hstack((
+                    useful_array(frame.planes[0]),
+                    useful_array(frame.planes[1]),
+                    useful_array(frame.planes[2])
+                )).reshape(-1, frame.width)
         elif frame.format.name in ('yuv444p', 'yuvj444p'):
             return np.hstack((
                 useful_array(frame.planes[0]),
@@ -369,6 +380,86 @@ cdef class VideoFrame(Frame):
 
         return frame
 
+    @staticmethod
+    def from_numpy_buffer(array, format="rgb24"):
+        if format in ("rgb24", "bgr24"):
+            check_ndarray(array, 'uint8', 3)
+            check_ndarray_shape(array, array.shape[2] == 3)
+            height, width = array.shape[:2]
+        elif format in ("gray", "gray8", "rgb8", "bgr8"):
+            check_ndarray(array, "uint8", 2)
+            height, width = array.shape[:2]
+        elif format in ("yuv420p", "yuvj420p", "nv12"):
+            check_ndarray(array, "uint8", 2)
+            check_ndarray_shape(array, array.shape[0] % 3 == 0)
+            check_ndarray_shape(array, array.shape[1] % 2 == 0)
+            height, width = array.shape[:2]
+            height = height // 6 * 4
+        else:
+            raise ValueError(f"Conversion from numpy array with format `{format}` is not yet supported")
+
+        if not array.flags["C_CONTIGUOUS"]:
+            raise ValueError("provided array must be C_CONTIGUOUS")
+
+        frame = alloc_video_frame()
+        frame._image_fill_pointers_numpy(array, width, height, format)
+        return frame
+
+    def _image_fill_pointers_numpy(self, buffer, width, height, format):
+        cdef lib.AVPixelFormat c_format
+        cdef uint8_t * c_ptr
+        cdef size_t c_data
+
+        # If you want to use the numpy notation
+        # then you need to include the following two lines at the top of the file
+        #      cimport numpy as cnp
+        #      cnp.import_array()
+        # And add the numpy include directories to the setup.py files
+        # hint np.get_include()
+        # cdef cnp.ndarray[
+        #     dtype=cnp.uint8_t, ndim=1,
+        #     negative_indices=False, mode='c'] c_buffer
+        # c_buffer = buffer.reshape(-1)
+        # c_ptr = &c_buffer[0]
+        # c_ptr = <uint8_t*> (<void*>(buffer.ctypes.data))
+
+        # Using buffer.ctypes.data helps avoid any kind of
+        # usage of the c-api from numpy, which avoid the need to add numpy
+        # as a compile time dependency
+        # Without this double cast, you get an error that looks like
+        #     c_ptr = <uint8_t*> (buffer.ctypes.data)
+        # TypeError: expected bytes, int found
+        c_data = buffer.ctypes.data
+        c_ptr = <uint8_t*> (c_data)
+        c_format = get_pix_fmt(format)
+        lib.av_freep(&self._buffer)
+
+        # Hold on to a reference for the numpy buffer
+        # so that it doesn't get accidentally garbage collected
+        self._np_buffer = buffer
+        self.ptr.format = c_format
+        self.ptr.width = width
+        self.ptr.height = height
+        res = lib.av_image_fill_linesizes(
+            self.ptr.linesize,
+            <lib.AVPixelFormat>self.ptr.format,
+            width,
+        )
+        if res:
+          err_check(res)
+
+        res = lib.av_image_fill_pointers(
+            self.ptr.data,
+            <lib.AVPixelFormat>self.ptr.format,
+            self.ptr.height,
+            c_ptr,
+            self.ptr.linesize,
+        )
+
+        if res:
+            err_check(res)
+        self._init_user_attributes()
+
     @staticmethod
     def from_ndarray(array, format='rgb24'):
         """

diff --git a/av/video/plane.pxd b/av/video/plane.pxd
@@ -6,3 +6,7 @@ cdef class VideoPlane(Plane):
 
     cdef readonly size_t buffer_size
     cdef readonly unsigned int width, height
+
+
+cdef class YUVPlanes(VideoPlane):
+    pass
diff --git a/av/video/plane.pyx b/av/video/plane.pyx
@@ -2,11 +2,9 @@ from av.video.frame cimport VideoFrame
 
 
 cdef class VideoPlane(Plane):
-
     def __cinit__(self, VideoFrame frame, int index):
-
         # The palette plane has no associated component or linesize; set fields manually
-        if frame.format.name == 'pal8' and index == 1:
+        if frame.format.name == "pal8" and index == 1:
             self.width = 256
             self.height = 1
             self.buffer_size = 256 * 4
@@ -19,7 +17,7 @@ cdef class VideoPlane(Plane):
                 self.height = component.height
                 break
         else:
-            raise RuntimeError('could not find plane %d of %r' % (index, frame.format))
+            raise RuntimeError("could not find plane %d of %r" % (index, frame.format))
 
         # Sometimes, linesize is negative (and that is meaningful). We are only
         # insisting that the buffer size be based on the extent of linesize, and
@@ -37,3 +35,20 @@ cdef class VideoPlane(Plane):
         """
         def __get__(self):
             return self.frame.ptr.linesize[self.index]
+
+
+cdef class YUVPlanes(VideoPlane):
+    def __cinit__(self, VideoFrame frame, int index):
+        if index != 0:
+            raise RuntimeError("YUVPlanes only supports index 0")
+        if frame.format.name not in ("yuvj420p", "yuv420p"):
+            raise RuntimeError("YUVPlane only supports yuv420p and yuvj420p")
+        if frame.ptr.linesize[0] < 0:
+            raise RuntimeError("YUVPlane only supports positive linesize")
+        self.width = frame.width
+        self.height = frame.height * 3 // 2
+        self.buffer_size = self.height *  abs(self.frame.ptr.linesize[0])
+        self.frame = frame
+
+    cdef void* _buffer_ptr(self):
+        return self.frame.ptr.extended_data[self.index]
diff --git a/include/libavutil/avutil.pxd b/include/libavutil/avutil.pxd
@@ -297,6 +297,18 @@ cdef extern from "libavutil/imgutils.h" nogil:
         AVPixelFormat pix_fmt,
         int align
     )
+    cdef int av_image_fill_pointers(
+        uint8_t *pointers[4],
+        AVPixelFormat pix_fmt,
+        int height,
+        uint8_t *ptr,
+        const int linesizes[4]
+    )
+    cdef int av_image_fill_linesizes(
+        int linesizes[4],
+        AVPixelFormat pix_fmt,
+        int width,
+    )
 
 
 cdef extern from "libavutil/log.h" nogil:

diff --git a/tests/test_videoframe.py b/tests/test_videoframe.py
@@ -491,6 +491,104 @@ def test_ndarray_nv12_align(self):
         self.assertNdarraysEqual(frame.to_ndarray(), array)
 
 
+class TestVideoFrameNumpyBuffer(TestCase):
+    def test_shares_memory_gray(self):
+        array = numpy.random.randint(0, 256, size=(357, 318), dtype=numpy.uint8)
+        frame = VideoFrame.from_numpy_buffer(array, "gray")
+        self.assertNdarraysEqual(frame.to_ndarray(), array)
+
+        # overwrite the array, the contents thereof
+        array[...] = numpy.random.randint(0, 256, size=(357, 318), dtype=numpy.uint8)
+        # Make sure the frame reflects that
+        self.assertNdarraysEqual(frame.to_ndarray(), array)
+
+    def test_shares_memory_gray8(self):
+        array = numpy.random.randint(0, 256, size=(357, 318), dtype=numpy.uint8)
+        frame = VideoFrame.from_numpy_buffer(array, "gray8")
+        self.assertNdarraysEqual(frame.to_ndarray(), array)
+
+        # overwrite the array, the contents thereof
+        array[...] = numpy.random.randint(0, 256, size=(357, 318), dtype=numpy.uint8)
+        # Make sure the frame reflects that
+        self.assertNdarraysEqual(frame.to_ndarray(), array)
+
+    def test_shares_memory_rgb8(self):
+        array = numpy.random.randint(0, 256, size=(357, 318), dtype=numpy.uint8)
+        frame = VideoFrame.from_numpy_buffer(array, "rgb8")
+        self.assertNdarraysEqual(frame.to_ndarray(), array)
+
+        # overwrite the array, the contents thereof
+        array[...] = numpy.random.randint(0, 256, size=(357, 318), dtype=numpy.uint8)
+        # Make sure the frame reflects that
+        self.assertNdarraysEqual(frame.to_ndarray(), array)
+
+    def test_shares_memory_bgr8(self):
+        array = numpy.random.randint(0, 256, size=(357, 318), dtype=numpy.uint8)
+        frame = VideoFrame.from_numpy_buffer(array, "bgr8")
+        self.assertNdarraysEqual(frame.to_ndarray(), array)
+
+        # overwrite the array, the contents thereof
+        array[...] = numpy.random.randint(0, 256, size=(357, 318), dtype=numpy.uint8)
+        # Make sure the frame reflects that
+        self.assertNdarraysEqual(frame.to_ndarray(), array)
+
+    def test_shares_memory_rgb24(self):
+        array = numpy.random.randint(0, 256, size=(357, 318, 3), dtype=numpy.uint8)
+        frame = VideoFrame.from_numpy_buffer(array, "rgb24")
+        self.assertNdarraysEqual(frame.to_ndarray(), array)
+
+        # overwrite the array, the contents thereof
+        array[...] = numpy.random.randint(0, 256, size=(357, 318, 3), dtype=numpy.uint8)
+        # Make sure the frame reflects that
+        self.assertNdarraysEqual(frame.to_ndarray(), array)
+
+    def test_shares_memory_yuv420p(self):
+        array = numpy.random.randint(
+            0, 256, size=(512 * 6 // 4, 256), dtype=numpy.uint8
+        )
+        frame = VideoFrame.from_numpy_buffer(array, "yuv420p")
+        self.assertNdarraysEqual(frame.to_ndarray(), array)
+
+        # overwrite the array, the contents thereof
+        array[...] = numpy.random.randint(0, 256, size=array.shape, dtype=numpy.uint8)
+        # Make sure the frame reflects that
+        self.assertNdarraysEqual(frame.to_ndarray(), array)
+
+    def test_shares_memory_yuvj420p(self):
+        array = numpy.random.randint(
+            0, 256, size=(512 * 6 // 4, 256), dtype=numpy.uint8
+        )
+        frame = VideoFrame.from_numpy_buffer(array, "yuvj420p")
+        self.assertNdarraysEqual(frame.to_ndarray(), array)
+
+        # overwrite the array, the contents thereof
+        array[...] = numpy.random.randint(0, 256, size=array.shape, dtype=numpy.uint8)
+        # Make sure the frame reflects that
+        self.assertNdarraysEqual(frame.to_ndarray(), array)
+
+    def test_shares_memory_nv12(self):
+        array = numpy.random.randint(
+            0, 256, size=(512 * 6 // 4, 256), dtype=numpy.uint8
+        )
+        frame = VideoFrame.from_numpy_buffer(array, "nv12")
+        self.assertNdarraysEqual(frame.to_ndarray(), array)
+
+        # overwrite the array, the contents thereof
+        array[...] = numpy.random.randint(0, 256, size=array.shape, dtype=numpy.uint8)
+        # Make sure the frame reflects that
+        self.assertNdarraysEqual(frame.to_ndarray(), array)
+
+    def test_shares_memory_bgr24(self):
+        array = numpy.random.randint(0, 256, size=(357, 318, 3), dtype=numpy.uint8)
+        frame = VideoFrame.from_numpy_buffer(array, "bgr24")
+        self.assertNdarraysEqual(frame.to_ndarray(), array)
+
+        # overwrite the array, the contents thereof
+        array[...] = numpy.random.randint(0, 256, size=(357, 318, 3), dtype=numpy.uint8)
+        # Make sure the frame reflects that
+        self.assertNdarraysEqual(frame.to_ndarray(), array)
+
+
 class TestVideoFrameTiming(TestCase):
     def test_reformat_pts(self):
         frame = VideoFrame(640, 480, "rgb24")