Skip to content

Commit

Permalink
feat: AVFoundation encoder uses EncodingController
Browse files Browse the repository at this point in the history
  • Loading branch information
yuto-trd committed Aug 2, 2024
1 parent 7212d7e commit 81e129c
Show file tree
Hide file tree
Showing 8 changed files with 497 additions and 551 deletions.
148 changes: 148 additions & 0 deletions src/Beutl.Extensions.AVFoundation/Encoding/AVFAudioEncoderSettings.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
using Beutl.Media.Encoding;

namespace Beutl.Extensions.AVFoundation.Encoding;

public sealed class AVFAudioEncoderSettings : AudioEncoderSettings
{
public static readonly CoreProperty<AudioFormatType> FormatProperty;
public static readonly CoreProperty<BitDepth> LinearPcmBitDepthProperty;
public static readonly CoreProperty<bool> LinearPcmBigEndianProperty;
public static readonly CoreProperty<bool> LinearPcmFloatProperty;
public static readonly CoreProperty<bool> LinearPcmNonInterleavedProperty;
public static readonly CoreProperty<AudioQuality> QualityProperty;
public static readonly CoreProperty<AudioQuality> SampleRateConverterQualityProperty;

static AVFAudioEncoderSettings()
{
FormatProperty = ConfigureProperty<AudioFormatType, AVFAudioEncoderSettings>(nameof(Format))
.DefaultValue(AudioFormatType.MPEG4AAC)
.Register();

LinearPcmBitDepthProperty = ConfigureProperty<BitDepth, AVFAudioEncoderSettings>(nameof(LinearPcmBitDepth))
.DefaultValue(BitDepth.Bits16)
.Register();

LinearPcmBigEndianProperty = ConfigureProperty<bool, AVFAudioEncoderSettings>(nameof(LinearPcmBigEndian))
.DefaultValue(false)
.Register();

LinearPcmFloatProperty = ConfigureProperty<bool, AVFAudioEncoderSettings>(nameof(LinearPcmFloat))
.DefaultValue(false)
.Register();

LinearPcmNonInterleavedProperty =
ConfigureProperty<bool, AVFAudioEncoderSettings>(nameof(LinearPcmNonInterleaved))
.DefaultValue(false)
.Register();

QualityProperty = ConfigureProperty<AudioQuality, AVFAudioEncoderSettings>(nameof(Quality))
.DefaultValue(AudioQuality.Default)
.Register();

SampleRateConverterQualityProperty =
ConfigureProperty<AudioQuality, AVFAudioEncoderSettings>(nameof(SampleRateConverterQuality))
.DefaultValue(AudioQuality.Default)
.Register();

BitrateProperty.OverrideDefaultValue<AVFAudioEncoderSettings>(-1);
}

public AudioFormatType Format
{
get => GetValue(FormatProperty);
set => SetValue(FormatProperty, value);
}

public BitDepth LinearPcmBitDepth
{
get => GetValue(LinearPcmBitDepthProperty);
set => SetValue(LinearPcmBitDepthProperty, value);
}

public bool LinearPcmBigEndian
{
get => GetValue(LinearPcmBigEndianProperty);
set => SetValue(LinearPcmBigEndianProperty, value);
}

public bool LinearPcmFloat
{
get => GetValue(LinearPcmFloatProperty);
set => SetValue(LinearPcmFloatProperty, value);
}

public bool LinearPcmNonInterleaved
{
get => GetValue(LinearPcmNonInterleavedProperty);
set => SetValue(LinearPcmNonInterleavedProperty, value);
}

public AudioQuality Quality
{
get => GetValue(QualityProperty);
set => SetValue(QualityProperty, value);
}

public AudioQuality SampleRateConverterQuality
{
get => GetValue(SampleRateConverterQualityProperty);
set => SetValue(SampleRateConverterQualityProperty, value);
}

public enum BitDepth
{
Bits8 = 8,
Bits16 = 16,
Bits24 = 24,
Bits32 = 32
}

public enum AudioQuality
{
Default = -1,
Min = 0,
Low = 32, // 0x00000020
Medium = 64, // 0x00000040
High = 96, // 0x00000060
Max = 127, // 0x0000007F
}

public enum AudioFormatType
{
MPEGLayer1 = 778924081, // 0x2E6D7031
MPEGLayer2 = 778924082, // 0x2E6D7032
MPEGLayer3 = 778924083, // 0x2E6D7033
Audible = 1096107074, // 0x41554442
MACE3 = 1296122675, // 0x4D414333
MACE6 = 1296122678, // 0x4D414336
QDesign2 = 1363430706, // 0x51444D32
QDesign = 1363430723, // 0x51444D43
QUALCOMM = 1365470320, // 0x51636C70
MPEG4AAC = 1633772320, // 0x61616320
MPEG4AAC_ELD = 1633772389, // 0x61616365
MPEG4AAC_ELD_SBR = 1633772390, // 0x61616366
MPEG4AAC_ELD_V2 = 1633772391, // 0x61616367
MPEG4AAC_HE = 1633772392, // 0x61616368
MPEG4AAC_LD = 1633772396, // 0x6161636C
MPEG4AAC_HE_V2 = 1633772400, // 0x61616370
MPEG4AAC_Spatial = 1633772403, // 0x61616373
AC3 = 1633889587, // 0x61632D33
AES3 = 1634038579, // 0x61657333
AppleLossless = 1634492771, // 0x616C6163
ALaw = 1634492791, // 0x616C6177
ParameterValueStream = 1634760307, // 0x61707673
CAC3 = 1667326771, // 0x63616333
MPEG4CELP = 1667591280, // 0x63656C70
MPEG4HVXC = 1752594531, // 0x68767863
iLBC = 1768710755, // 0x696C6263
AppleIMA4 = 1768775988, // 0x696D6134
LinearPCM = 1819304813, // 0x6C70636D
MIDIStream = 1835623529, // 0x6D696469
DVIIntelIMA = 1836253201, // 0x6D730011
MicrosoftGSM = 1836253233, // 0x6D730031
AMR = 1935764850, // 0x73616D72
TimeCode = 1953066341, // 0x74696D65
MPEG4TwinVQ = 1953986161, // 0x74777671
ULaw = 1970037111, // 0x756C6177
}
}
48 changes: 0 additions & 48 deletions src/Beutl.Extensions.AVFoundation/Encoding/AVFEncoderInfo.cs

This file was deleted.

170 changes: 170 additions & 0 deletions src/Beutl.Extensions.AVFoundation/Encoding/AVFEncodingController.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
using System.Runtime.Versioning;
using Beutl.Extensibility;
using Beutl.Media.Encoding;
using MonoMac.AudioToolbox;
using MonoMac.AVFoundation;
using MonoMac.CoreFoundation;
using MonoMac.CoreMedia;
using MonoMac.CoreVideo;
using MonoMac.Foundation;

namespace Beutl.Extensions.AVFoundation.Encoding;

[SupportedOSPlatform("macos")]
public class AVFEncodingController(string outputFile) : EncodingController(outputFile)
{
public override AVFVideoEncoderSettings VideoSettings { get; } = new();

public override AVFAudioEncoderSettings AudioSettings { get; } = new();

private void ConfigureVideoInput(
AVAssetWriter writer,
out AVAssetWriterInput videoInput, out AVAssetWriterInputPixelBufferAdaptor adaptor)
{
videoInput = AVAssetWriterInput.Create(AVMediaType.Video, new AVVideoSettingsCompressed
{
Width = VideoSettings.DestinationSize.Width,
Height = VideoSettings.DestinationSize.Height,
Codec = VideoSettings.Codec.ToAVVideoCodec(),
CodecSettings = new AVVideoCodecSettings
{
AverageBitRate = VideoSettings.Bitrate == -1 ? null : VideoSettings.Bitrate,
MaxKeyFrameInterval = VideoSettings.KeyframeRate == -1 ? null : VideoSettings.KeyframeRate,
JPEGQuality = VideoSettings.JPEGQuality < 0 ? null : VideoSettings.JPEGQuality,
ProfileLevelH264 = VideoSettings.ProfileLevelH264.ToAVVideoProfileLevelH264(),
},
});
videoInput.ExpectsMediaDataInRealTime = true;
adaptor = AVAssetWriterInputPixelBufferAdaptor.Create(videoInput,
new CVPixelBufferAttributes
{
PixelFormatType = CVPixelFormatType.CV32ARGB,
Width = VideoSettings.SourceSize.Width,
Height = VideoSettings.SourceSize.Width,
});
writer.AddInput(videoInput);
}

private void ConfigureAudioInput(
AVAssetWriter writer,
out AVAssetWriterInput audioInput)
{
var audioSettings = new AudioSettings
{
SampleRate = AudioSettings.SampleRate,
EncoderBitRate = AudioSettings.Bitrate == -1 ? null : AudioSettings.Bitrate,
NumberChannels = AudioSettings.Channels,
Format = AudioSettings.Format.ToAudioFormatType(),
AudioQuality =
AudioSettings.Quality == AVFAudioEncoderSettings.AudioQuality.Default
? null
: (AVAudioQuality?)AudioSettings.Quality,
SampleRateConverterAudioQuality =
AudioSettings.SampleRateConverterQuality == AVFAudioEncoderSettings.AudioQuality.Default
? null
: (AVAudioQuality?)AudioSettings.SampleRateConverterQuality,
};
if (audioSettings.Format == AudioFormatType.LinearPCM)
{
audioSettings.LinearPcmFloat = AudioSettings.LinearPcmFloat;
audioSettings.LinearPcmBigEndian = AudioSettings.LinearPcmBigEndian;
audioSettings.LinearPcmBitDepth = (int?)AudioSettings.LinearPcmBitDepth;
audioSettings.LinearPcmNonInterleaved = AudioSettings.LinearPcmNonInterleaved;
}

audioInput = AVAssetWriterInput.Create(AVMediaType.Audio, audioSettings);
audioInput.ExpectsMediaDataInRealTime = true;
writer.AddInput(audioInput);
}

private async ValueTask<bool> WriteAudioFrame(ISampleProvider sampleProvider, AVAssetWriterInput input, long offset,
long length)
{
using var sound = await sampleProvider.Sample(offset, length);
var sourceFormat =
AudioStreamBasicDescription.CreateLinearPCM(sound.SampleRate, (uint)sound.NumChannels);
sourceFormat.FormatFlags = AudioFormatFlags.IsFloat | AudioFormatFlags.IsPacked;
sourceFormat.BitsPerChannel = 32;
var fmtError = AudioStreamBasicDescription.GetFormatInfo(ref sourceFormat);
if (fmtError != AudioFormatError.None) throw new Exception(fmtError.ToString());

uint inputDataSize = (uint)(sound.SampleSize * sound.NumSamples);
var time = new CMTime(offset, sound.SampleRate);
var dataBuffer = InternalMethods.CreateCMBlockBufferWithMemoryBlock(
inputDataSize, sound.Data, CMBlockBufferFlags.AlwaysCopyData);

var formatDescription = InternalMethods.CreateAudioFormatDescription(sourceFormat);

var sampleBuffer = CMSampleBuffer.CreateWithPacketDescriptions(dataBuffer, formatDescription,
sound.NumSamples, time, null, out var error4);
if (error4 != CMSampleBufferError.None) throw new Exception(error4.ToString());

return input.AppendSampleBuffer(sampleBuffer);
}

private async ValueTask<bool> WriteVideoFrame(IFrameProvider frameProvider,
AVAssetWriterInputPixelBufferAdaptor adaptor, long frame)
{
using var image = await frameProvider.RenderFrame(frame);
var time = new CMTime(frame * VideoSettings.FrameRate.Denominator,
(int)VideoSettings.FrameRate.Numerator);
CVPixelBuffer? pixelBuffer = AVFSampleUtilities.ConvertToCVPixelBuffer(image);
if (pixelBuffer == null)
{
return false;
}

return adaptor.AppendPixelBufferWithPresentationTime(pixelBuffer, time);
}

public override async ValueTask Encode(IFrameProvider frameProvider, ISampleProvider sampleProvider,
CancellationToken cancellationToken)
{
var url = NSUrl.FromFilename(OutputFile);
var writer = AVAssetWriter.FromUrl(url, AVFileType.Mpeg4, out var error);
if (error != null) throw new Exception(error.LocalizedDescription);

ConfigureVideoInput(writer, out var videoInput, out var videoAdaptor);
ConfigureAudioInput(writer, out var audioInput);

if (!writer.StartWriting())
{
throw new Exception("Failed to start writing");
}

writer.StartSessionAtSourceTime(CMTime.Zero);
bool encodeAudio = true;
bool encodeVideo = true;
long sampleCount = 0;
long frameCount = 0;

while ((encodeVideo || encodeAudio) && !cancellationToken.IsCancellationRequested)
{
long videoTs = frameCount * VideoSettings.FrameRate.Denominator / VideoSettings.FrameRate.Numerator;
long audioTs = sampleCount / sampleProvider.SampleRate;
if (encodeVideo &&
(!encodeAudio || videoTs <= audioTs))
{
encodeVideo = await WriteVideoFrame(frameProvider, videoAdaptor, frameCount);
frameCount++;
if (frameCount >= frameProvider.FrameCount)
{
videoInput.MarkAsFinished();
encodeVideo = false;
}
}
else
{
encodeAudio = await WriteAudioFrame(sampleProvider, audioInput, sampleCount, 1024);
sampleCount += 1024;
if (sampleCount >= sampleProvider.SampleCount)
{
audioInput.MarkAsFinished();
encodeAudio = false;
}
}
}

writer.FinishWriting();

Check warning on line 168 in src/Beutl.Extensions.AVFoundation/Encoding/AVFEncodingController.cs

View workflow job for this annotation

GitHub Actions / Analyze (csharp)

'AVAssetWriter.FinishWriting()' is obsolete: 'Deprecated in iOS 6.0. Use the asynchronous FinishWriting(NSAction completionHandler) instead'

Check warning on line 168 in src/Beutl.Extensions.AVFoundation/Encoding/AVFEncodingController.cs

View workflow job for this annotation

GitHub Actions / build

'AVAssetWriter.FinishWriting()' is obsolete: 'Deprecated in iOS 6.0. Use the asynchronous FinishWriting(NSAction completionHandler) instead'
}
}
Loading

0 comments on commit 81e129c

Please sign in to comment.