From 253ac40312f3a803e426540324c6388eb3fd72c8 Mon Sep 17 00:00:00 2001 From: winlin Date: Sat, 4 Feb 2017 14:57:07 +0800 Subject: [PATCH] for #738, parse avcc/sps/pps/asc from mp4. --- trunk/research/librtmp/srs_ingest_mp4.c | 49 ++++++----- trunk/src/kernel/srs_kernel_codec.cpp | 29 +++++++ trunk/src/kernel/srs_kernel_codec.hpp | 11 ++- trunk/src/kernel/srs_kernel_error.hpp | 1 + trunk/src/kernel/srs_kernel_mp4.cpp | 103 ++++++++++++++++++++++-- trunk/src/kernel/srs_kernel_mp4.hpp | 41 ++++++++++ trunk/src/libs/srs_librtmp.cpp | 94 ++++++++++++++++++++- trunk/src/libs/srs_librtmp.hpp | 54 +++++++++++++ 8 files changed, 346 insertions(+), 36 deletions(-) diff --git a/trunk/research/librtmp/srs_ingest_mp4.c b/trunk/research/librtmp/srs_ingest_mp4.c index fa9f6fc681..d244481113 100644 --- a/trunk/research/librtmp/srs_ingest_mp4.c +++ b/trunk/research/librtmp/srs_ingest_mp4.c @@ -180,35 +180,34 @@ int do_proxy(srs_mp4_t mp4, srs_rtmp_t ortmp, int64_t re, int32_t* pstarttime, u { int ret = 0; - // packet data - char type; - int size; - char* data = NULL; - srs_human_trace("start ingest mp4 to RTMP stream"); for (;;) { -#if 0 - // tag header - if ((ret = srs_flv_read_tag_header(flv, &type, &size, ptimestamp)) != 0) { - if (srs_flv_is_eof(ret)) { - srs_human_trace("parse completed."); - return 0; - } - srs_human_trace("flv get packet failed. ret=%d", ret); - return ret; - } + // packet data + char type; + int32_t size; + char* data = NULL; - if (size <= 0) { - srs_human_trace("invalid size=%d", size); - break; - } - - // TODO: FIXME: mem leak when error. - data = (char*)malloc(size); - if ((ret = srs_flv_read_tag_data(flv, data, size)) != 0) { - return ret; + // Read a mp4 sample and convert to flv tag. + if (1) { + srs_mp4_sample_t sample; + if ((ret = srs_mp4_read_sample(mp4, &sample)) != 0) { + if (srs_mp4_is_eof(ret)) { + srs_human_trace("parse completed."); + return 0; + } + srs_human_trace("mp4 get sample failed. ret=%d", ret); + return ret; + } + + size = srs_mp4_sizeof(mp4, &sample); + data = (char*)malloc(size); + + if ((ret = srs_mp4_to_flv_tag(mp4, &sample, &type, ptimestamp, data, size)) != 0) { + return ret; + } + + srs_mp4_free_sample(&sample); } -#endif uint32_t timestamp = *ptimestamp; if ((ret = srs_human_print_rtmp_packet(type, timestamp, data, size)) != 0) { diff --git a/trunk/src/kernel/srs_kernel_codec.cpp b/trunk/src/kernel/srs_kernel_codec.cpp index 4b5a0ea389..6404321b83 100644 --- a/trunk/src/kernel/srs_kernel_codec.cpp +++ b/trunk/src/kernel/srs_kernel_codec.cpp @@ -162,6 +162,17 @@ string srs_codec_avc_level2str(SrsAvcLevel level) } } +string srs_codec_audio_samplerate2str(SrsCodecAudioSampleRate v) +{ + switch (v) { + case SrsCodecAudioSampleRate5512: return "5512"; + case SrsCodecAudioSampleRate11025: return "11025"; + case SrsCodecAudioSampleRate22050: return "22050"; + case SrsCodecAudioSampleRate44100: return "44100"; + default: return "Other"; + } +} + /** * the public data, event HLS disable, others can use it. */ @@ -288,6 +299,24 @@ bool SrsFlvCodec::video_is_acceptable(char* data, int size) return true; } +string srs_codec_audio_samplesize2str(SrsCodecAudioSampleSize v) +{ + switch (v) { + case SrsCodecAudioSampleSize16bit: return "16bits"; + case SrsCodecAudioSampleSize8bit: return "8bits"; + default: return "Other"; + } +} + +string srs_codec_audio_channels2str(SrsCodecAudioSoundType v) +{ + switch (v) { + case SrsCodecAudioSoundTypeStereo: return "Stereo"; + case SrsCodecAudioSoundTypeMono: return "Mono"; + default: return "Other"; + } +} + string srs_codec_avc_nalu2str(SrsAvcNaluType nalu_type) { switch (nalu_type) { diff --git a/trunk/src/kernel/srs_kernel_codec.hpp b/trunk/src/kernel/srs_kernel_codec.hpp index 3c715ca09d..9f67723788 100644 --- a/trunk/src/kernel/srs_kernel_codec.hpp +++ b/trunk/src/kernel/srs_kernel_codec.hpp @@ -42,6 +42,7 @@ enum SrsCodecAudioType { // set to the max value to reserved, for array map. SrsCodecAudioTypeReserved = 2, + SrsCodecAudioTypeForbidden = 2, SrsCodecAudioTypeSequenceHeader = 0, SrsCodecAudioTypeRawData = 1, @@ -59,6 +60,7 @@ enum SrsCodecVideoAVCFrame { // set to the zero to reserved, for array map. SrsCodecVideoAVCFrameReserved = 0, + SrsCodecVideoAVCFrameForbidden = 0, SrsCodecVideoAVCFrameReserved1 = 6, SrsCodecVideoAVCFrameKeyFrame = 1, @@ -78,6 +80,7 @@ enum SrsCodecVideoAVCType { // set to the max value to reserved, for array map. SrsCodecVideoAVCTypeReserved = 3, + SrsCodecVideoAVCTypeForbidden = 3, SrsCodecVideoAVCTypeSequenceHeader = 0, SrsCodecVideoAVCTypeNALU = 1, @@ -170,12 +173,14 @@ enum SrsCodecAudioSampleRate { // set to the max value to reserved, for array map. SrsCodecAudioSampleRateReserved = 4, + SrsCodecAudioSampleRateForbidden = 4, SrsCodecAudioSampleRate5512 = 0, SrsCodecAudioSampleRate11025 = 1, SrsCodecAudioSampleRate22050 = 2, SrsCodecAudioSampleRate44100 = 3, }; +std::string srs_codec_audio_samplerate2str(SrsCodecAudioSampleRate v); /** * E.4.1 FLV Tag, page 75 @@ -263,10 +268,12 @@ enum SrsCodecAudioSampleSize { // set to the max value to reserved, for array map. SrsCodecAudioSampleSizeReserved = 2, + SrsCodecAudioSampleSizeForbidden = 2, SrsCodecAudioSampleSize8bit = 0, SrsCodecAudioSampleSize16bit = 1, }; +std::string srs_codec_audio_samplesize2str(SrsCodecAudioSampleSize v); /** * the FLV/RTMP supported audio sound type/channel. @@ -277,11 +284,13 @@ enum SrsCodecAudioSampleSize enum SrsCodecAudioSoundType { // set to the max value to reserved, for array map. - SrsCodecAudioSoundTypeReserved = 2, + SrsCodecAudioSoundTypeReserved = 2, + SrsCodecAudioSoundTypeForbidden = 2, SrsCodecAudioSoundTypeMono = 0, SrsCodecAudioSoundTypeStereo = 1, }; +std::string srs_codec_audio_channels2str(SrsCodecAudioSoundType v); /** * Table 7-1 - NAL unit type codes, syntax element categories, and NAL unit type classes diff --git a/trunk/src/kernel/srs_kernel_error.hpp b/trunk/src/kernel/srs_kernel_error.hpp index 909aeef721..c792ce4c01 100644 --- a/trunk/src/kernel/srs_kernel_error.hpp +++ b/trunk/src/kernel/srs_kernel_error.hpp @@ -250,6 +250,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define ERROR_MP4_BOX_ILLEGAL_BRAND 3074 #define ERROR_MP4_ESDS_SL_Config 3075 #define ERROR_MP4_ILLEGAL_MOOV 3076 +#define ERROR_MP4_ILLEGAL_HANDLER 3077 /////////////////////////////////////////////////////// // HTTP/StreamCaster/KAFKA protocol error. diff --git a/trunk/src/kernel/srs_kernel_mp4.cpp b/trunk/src/kernel/srs_kernel_mp4.cpp index 15c22b3737..458c5196e0 100644 --- a/trunk/src/kernel/srs_kernel_mp4.cpp +++ b/trunk/src/kernel/srs_kernel_mp4.cpp @@ -2963,12 +2963,22 @@ SrsMp4Decoder::SrsMp4Decoder() brand = SrsMp4BoxBrandForbidden; buf = new char[SRS_MP4_BUF_SIZE]; stream = new SrsSimpleStream(); + vcodec = SrsCodecVideoForbidden; + acodec = SrsCodecAudioForbidden; + nb_asc = nb_avcc = 0; + pasc = pavcc = NULL; + asc_written = avcc_written = false; + sample_rate = SrsCodecAudioSampleRateForbidden; + sound_bits = SrsCodecAudioSampleSizeForbidden; + channels = SrsCodecAudioSoundTypeForbidden; } SrsMp4Decoder::~SrsMp4Decoder() { srs_freepa(buf); srs_freep(stream); + srs_freepa(pasc); + srs_freepa(pavcc); } int SrsMp4Decoder::initialize(ISrsReadSeeker* rs) @@ -2978,7 +2988,7 @@ int SrsMp4Decoder::initialize(ISrsReadSeeker* rs) srs_assert(rs); rsio = rs; - // For mdat before moov, we must reset the io. + // For mdat before moov, we must reset the offset to the mdat. off_t offset = -1; while (true) { @@ -3016,7 +3026,7 @@ int SrsMp4Decoder::initialize(ISrsReadSeeker* rs) return ret; } - // Reset the io to the start to reparse the general MP4. + // Set the offset to the mdat. if (offset >= 0) { return rsio->lseek(offset, SEEK_SET, NULL); } @@ -3024,6 +3034,44 @@ int SrsMp4Decoder::initialize(ISrsReadSeeker* rs) return ret; } +int SrsMp4Decoder::read_sample(SrsMp4HandlerType* pht, + uint16_t* pft, uint16_t* pct, uint32_t* pdts, uint32_t* ppts, uint8_t** psample, uint32_t* pnb_sample) +{ + int ret = ERROR_SUCCESS; + + if (!avcc_written && nb_avcc) { + avcc_written = true; + *pdts = *ppts = 0; + *pht = SrsMp4HandlerTypeVIDE; + + uint32_t nb_sample = *pnb_sample = nb_avcc; + uint8_t* sample = *psample = new uint8_t[nb_sample]; + memcpy(sample, pavcc, nb_sample); + + *pft = SrsCodecVideoAVCFrameKeyFrame; + *pct = SrsCodecVideoAVCTypeSequenceHeader; + + return ret; + } + + if (!asc_written && nb_asc) { + asc_written = true; + *pdts = *ppts = 0; + *pht = SrsMp4HandlerTypeSOUN; + + uint32_t nb_sample = *pnb_sample = nb_asc; + uint8_t* sample = *psample = new uint8_t[nb_sample]; + memcpy(sample, pasc, nb_sample); + + *pft = 0x00; + *pct = SrsCodecAudioTypeSequenceHeader; + + return ret; + } + + return ret; +} + int SrsMp4Decoder::parse_ftyp(SrsMp4FileTypeBox* ftyp) { int ret = ERROR_SUCCESS; @@ -3069,6 +3117,32 @@ int SrsMp4Decoder::parse_moov(SrsMp4MovieBox* moov) return ret; } + SrsMp4AudioSampleEntry* mp4a = soun? soun->mp4a():NULL; + if (mp4a) { + uint32_t sr = mp4a->samplerate>>16; + if (sr >= 44100) { + sample_rate = SrsCodecAudioSampleRate44100; + } else if (sr >= 22050) { + sample_rate = SrsCodecAudioSampleRate22050; + } else if (sr >= 11025) { + sample_rate = SrsCodecAudioSampleRate11025; + } else { + sample_rate = SrsCodecAudioSampleRate5512; + } + + if (mp4a->samplesize == 16) { + sound_bits = SrsCodecAudioSampleSize16bit; + } else { + sound_bits = SrsCodecAudioSampleSize8bit; + } + + if (mp4a->channelcount == 2) { + channels = SrsCodecAudioSoundTypeStereo; + } else { + channels = SrsCodecAudioSoundTypeMono; + } + } + SrsMp4AvccBox* avcc = vide? vide->avcc():NULL; SrsMp4DecoderSpecificInfo* asc = soun? soun->asc():NULL; if (vide && !avcc) { @@ -3082,16 +3156,33 @@ int SrsMp4Decoder::parse_moov(SrsMp4MovieBox* moov) return ret; } + vcodec = vide?vide->vide_codec():SrsCodecVideoForbidden; + acodec = soun?soun->soun_codec():SrsCodecAudioForbidden; + + if (avcc && avcc->nb_config) { + nb_avcc = avcc->nb_config; + pavcc = new uint8_t[nb_avcc]; + memcpy(pavcc, avcc->avc_config, nb_avcc); + } + if (asc && asc->nb_asc) { + nb_asc = asc->nb_asc; + pasc = new uint8_t[nb_asc]; + memcpy(pasc, asc->asc, nb_asc); + } + stringstream ss; ss << "dur=" << mvhd->duration() << "ms"; // video codec. ss << ", vide=" << moov->nb_vide_tracks() << "(" - << srs_codec_video2str(vide?vide->vide_codec():SrsCodecVideoForbidden) - << "," << (avcc? avcc->nb_config:0) << "BSH" << ")"; + << srs_codec_video2str(vcodec) << "," << nb_avcc << "BSH" + << ")"; // audio codec. ss << ", soun=" << moov->nb_soun_tracks() << "(" - << srs_codec_audio2str(soun?soun->soun_codec():SrsCodecAudioForbidden) - << "," << (asc? asc->nb_asc:0) << "BSH" << ")"; + << srs_codec_audio2str(acodec) << "," << nb_asc << "BSH" + << "," << srs_codec_audio_channels2str(channels) + << "," << srs_codec_audio_samplesize2str(sound_bits) + << "," << srs_codec_audio_samplerate2str(sample_rate) + << ")"; srs_trace("MP4 moov %s", ss.str().c_str()); diff --git a/trunk/src/kernel/srs_kernel_mp4.hpp b/trunk/src/kernel/srs_kernel_mp4.hpp index b82b4c01fb..0a99fef774 100644 --- a/trunk/src/kernel/srs_kernel_mp4.hpp +++ b/trunk/src/kernel/srs_kernel_mp4.hpp @@ -400,6 +400,7 @@ class SrsMp4TrackBox : public SrsMp4Box virtual SrsMp4SampleTableBox* stbl(); // Get the sample description box virtual SrsMp4SampleDescriptionBox* stsd(); +public: // For H.264/AVC, get the avc1 box. virtual SrsMp4VisualSampleEntry* avc1(); // For AAC, get the mp4a box. @@ -1344,6 +1345,32 @@ class SrsMp4Decoder private: // The major brand of decoder, parse from ftyp. SrsMp4BoxBrand brand; +public: + // The video codec of first track, generally there is zero or one track. + // Forbidden if no video stream. + SrsCodecVideo vcodec; +private: + // For H.264/AVC, the avcc contains the sps/pps. + int nb_avcc; + uint8_t* pavcc; + // Whether avcc is written to reader. + bool avcc_written; +public: + // The audio codec of first track, generally there is zero or one track. + // Forbidden if no audio stream. + SrsCodecAudio acodec; + // The audio sample rate. + SrsCodecAudioSampleRate sample_rate; + // The audio sound bits. + SrsCodecAudioSampleSize sound_bits; + // The audio sound type. + SrsCodecAudioSoundType channels; +private: + // For AAC, the asc in esds box. + int nb_asc; + uint8_t* pasc; + // Whether asc is written to reader. + bool asc_written; private: // Underlayer reader and seeker. // @remark The demuxer must use seeker for general MP4 to seek the moov. @@ -1363,6 +1390,20 @@ class SrsMp4Decoder * the decoder just read data from the reader. */ virtual int initialize(ISrsReadSeeker* rs); + /** + * Read a sample from mp4. + * @param pht The sample type, audio/soun or video/vide. + * @param pft, The frame type. For video, it's SrsCodecVideoAVCFrame. + * @param pct, The codec type. For video, it's SrsCodecVideoAVCType. For audio, it's SrsCodecAudioType. + * @param pdts The output dts in milliseconds. + * @param ppts The output pts in milliseconds. + * @param pnb_sample The output size of payload. + * @param psample The output payload, user must free it. + * @remark The decoder will generate the first two audio/video sequence header. + */ + virtual int read_sample(SrsMp4HandlerType* pht, uint16_t* pft, uint16_t* pct, + uint32_t* pdts, uint32_t* ppts, uint8_t** psample, uint32_t* pnb_sample + ); private: virtual int parse_ftyp(SrsMp4FileTypeBox* ftyp); virtual int parse_moov(SrsMp4MovieBox* moov); diff --git a/trunk/src/libs/srs_librtmp.cpp b/trunk/src/libs/srs_librtmp.cpp index d15d1b6700..c5452ea08c 100644 --- a/trunk/src/libs/srs_librtmp.cpp +++ b/trunk/src/libs/srs_librtmp.cpp @@ -1548,7 +1548,6 @@ srs_bool srs_h264_startswith_annexb(char* h264_raw_data, int h264_raw_size, int* struct Mp4Context { - bool non_seekable; SrsFileReader reader; SrsMp4Decoder dec; }; @@ -1558,7 +1557,6 @@ srs_mp4_t srs_mp4_open_read(const char* file) int ret = ERROR_SUCCESS; Mp4Context* mp4 = new Mp4Context(); - mp4->non_seekable = false; if ((ret = mp4->reader.open(file)) != ERROR_SUCCESS) { srs_freep(mp4); @@ -1584,12 +1582,100 @@ int srs_mp4_init_demuxer(srs_mp4_t mp4) return ret; } - // OK, it's legal mp4 for live streaming. - context->non_seekable = true; + return ret; +} + +int srs_mp4_read_sample(srs_mp4_t mp4, srs_mp4_sample_t* s) +{ + s->sample = NULL; + + int ret = ERROR_SUCCESS; + + Mp4Context* context = (Mp4Context*)mp4; + SrsMp4Decoder* dec = &context->dec; + + SrsMp4HandlerType ht = SrsMp4HandlerTypeForbidden; + if ((ret = dec->read_sample(&ht, &s->frame_type, &s->codec_type, &s->dts, &s->pts, &s->sample, &s->nb_sample)) != ERROR_SUCCESS) { + return ret; + } + + if (ht == SrsMp4HandlerTypeForbidden) { + return ERROR_MP4_ILLEGAL_HANDLER; + } + + if (ht == SrsMp4HandlerTypeSOUN) { + s->codec = dec->acodec; + s->sample_rate = dec->sample_rate; + s->channels = dec->channels; + s->sound_bits = dec->sound_bits; + } else { + s->codec = dec->vcodec; + } + s->handler_type = (uint32_t)ht; + + return ret; +} + +void srs_mp4_free_sample(srs_mp4_sample_t* s) +{ + srs_freepa(s->sample); +} + +int32_t srs_mp4_sizeof(srs_mp4_t mp4, srs_mp4_sample_t* s) +{ + if (s->handler_type == SrsMp4HandlerTypeSOUN) { + if (s->codec == SrsCodecAudioAAC) { + return s->nb_sample + 2; + } + return s->nb_sample + 1; + } + + if (s->codec == SrsCodecVideoAVC) { + return s->nb_sample + 5; + } + return s->nb_sample + 1; +} + +int srs_mp4_to_flv_tag(srs_mp4_t mp4, srs_mp4_sample_t* s, char* type, uint32_t* time, char* data, int32_t size) +{ + int ret = ERROR_SUCCESS; + + *time = s->dts; + + SrsBuffer p(data, size); + if (s->handler_type == SrsMp4HandlerTypeSOUN) { + *type = SRS_RTMP_TYPE_AUDIO; + + // E.4.2.1 AUDIODATA, flv_v10_1.pdf, page 3 + p.write_1bytes(uint8_t(s->codec << 4) | uint8_t(s->sample_rate << 2) | uint8_t(s->sound_bits << 1) | s->channels); + if (s->codec == SrsCodecAudioAAC) { + p.write_1bytes(uint8_t(s->codec_type == SrsCodecAudioTypeSequenceHeader? 0:1)); + } + + p.write_bytes((char*)s->sample, s->nb_sample); + return ret; + } + + // E.4.3.1 VIDEODATA, flv_v10_1.pdf, page 5 + p.write_1bytes(uint8_t(s->frame_type<<4) | s->codec); + if (s->codec == SrsCodecVideoAVC) { + *type = SRS_RTMP_TYPE_VIDEO; + + p.write_1bytes(uint8_t(s->codec_type == SrsCodecVideoAVCTypeSequenceHeader? 0:1)); + // cts = pts - dts, where dts = flvheader->timestamp. + uint32_t cts = s->pts - s->dts; + p.write_3bytes(cts); + } + p.write_bytes((char*)s->sample, s->nb_sample); return ret; } +srs_bool srs_mp4_is_eof(int error_code) +{ + return error_code == ERROR_SYSTEM_FILE_EOF; +} + struct FlvContext { SrsFileReader reader; diff --git a/trunk/src/libs/srs_librtmp.hpp b/trunk/src/libs/srs_librtmp.hpp index ab891bd312..8be0d116f3 100644 --- a/trunk/src/libs/srs_librtmp.hpp +++ b/trunk/src/libs/srs_librtmp.hpp @@ -513,6 +513,39 @@ extern srs_bool srs_h264_startswith_annexb( ************************************************************* *************************************************************/ typedef void* srs_mp4_t; +// The sample struct of mp4. +typedef struct { + // The handler type, it's SrsMp4HandlerType. + uint32_t handler_type; + + // The dts in milliseconds. + uint32_t dts; + // The codec type. + // video: SrsCodecVideo. + // audio: SrsCodecAudio. + uint16_t codec; + // The codec type: + // video: SrsCodecVideoAVCType. + // audio: SrsCodecAudioType. + uint16_t codec_type; + + // The video pts in milliseconds. Ignore for audio. + uint32_t pts; + // The video frame type, it's SrsCodecVideoAVCFrame. + uint16_t frame_type; + + // The audio sample rate, it's SrsCodecAudioSampleRate. + uint8_t sample_rate; + // The audio sound bits, it's SrsCodecAudioSampleSize. + uint8_t sound_bits; + // The audio sound type, it's SrsCodecAudioSoundType. + uint8_t channels; + + // The size of sample payload in bytes. + uint32_t nb_sample; + // The output sample data, user must free it by srs_mp4_free_sample. + uint8_t* sample; +} srs_mp4_sample_t; /* Open mp4 file for muxer(write) or demuxer(read). */ extern srs_mp4_t srs_mp4_open_read(const char* file); extern void srs_mp4_close(srs_mp4_t mp4); @@ -522,6 +555,27 @@ extern void srs_mp4_close(srs_mp4_t mp4); * For the live streaming, we must feed stream frame by frame. */ extern int srs_mp4_init_demuxer(srs_mp4_t mp4); +/** + * Read a sample form mp4. + * @remark User can use srs_mp4_sample_to_flv_tag to convert mp4 sampel to flv tag. + * Use the srs_mp4_to_flv_tag_size to calc the flv tag data size to alloc. + */ +extern int srs_mp4_read_sample(srs_mp4_t mp4, srs_mp4_sample_t* sample); +/** + * Free the allocated mp4 sample. + */ +extern void srs_mp4_free_sample(srs_mp4_sample_t* sample); +/** + * Calc the size of flv tag, for the mp4 sample to convert to. + */ +extern int32_t srs_mp4_sizeof(srs_mp4_t mp4, srs_mp4_sample_t* sample); +/** + * Covert mp4 sample to flv tag. + */ +extern int srs_mp4_to_flv_tag(srs_mp4_t mp4, srs_mp4_sample_t* sample, char* type, uint32_t* time, char* data, int32_t size); +/* error code */ +/* whether the error code indicates EOF */ +extern srs_bool srs_mp4_is_eof(int error_code); /************************************************************* **************************************************************