Skip to content

Commit d1b9916

Browse files
committed
feat(avformat): fmp4 支持封装 webvtt
1 parent 8326c4a commit d1b9916

File tree

1 file changed

+94
-87
lines changed

1 file changed

+94
-87
lines changed

src/avformat/formats/OIsobmffFormat.ts

Lines changed: 94 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -453,22 +453,22 @@ export default class OIsobmffFormat extends OFormat {
453453
}
454454
else if (stream.codecpar.frameSize > 0) {
455455
track.sampleDurations.push(static_cast<double>(avRescaleQ(
456-
static_cast<int64>(stream.codecpar.frameSize / stream.codecpar.sampleRate * AV_TIME_BASE),
456+
static_cast<int64>((stream.codecpar.frameSize / stream.codecpar.sampleRate * AV_TIME_BASE) as double),
457457
AV_TIME_BASE_Q,
458458
stream.timeBase
459459
)))
460460
}
461461
else if (stream.codecpar.codecId === AVCodecID.AV_CODEC_ID_AAC) {
462462
track.sampleDurations.push(static_cast<double>(avRescaleQ(
463-
static_cast<int64>(1024 / stream.codecpar.sampleRate * AV_TIME_BASE),
463+
static_cast<int64>((1024 / stream.codecpar.sampleRate * AV_TIME_BASE) as double),
464464
AV_TIME_BASE_Q,
465465
stream.timeBase
466466
)))
467467
}
468468
else {
469469
// 随便猜一个?每帧一个 fragment 没有 sampleDuration QuickTime 无法播放
470470
track.sampleDurations.push(static_cast<double>(avRescaleQ(
471-
static_cast<int64>(1024 / stream.codecpar.sampleRate * AV_TIME_BASE),
471+
static_cast<int64>((1024 / stream.codecpar.sampleRate * AV_TIME_BASE) as double),
472472
AV_TIME_BASE_Q,
473473
stream.timeBase
474474
)))
@@ -676,6 +676,79 @@ export default class OIsobmffFormat extends OFormat {
676676
}
677677
}
678678

679+
private writeTrackData(ioWriter: IOWriter, avpacket: pointer<AVPacket>, dts: int64, stream: AVStream) {
680+
if (stream.codecpar.codecId === AVCodecID.AV_CODEC_ID_WEBVTT) {
681+
const streamContext = stream.privData as IsobmffStreamContext
682+
if (streamContext.lastDuration > 0) {
683+
const lastCueEndTimestamp = streamContext.lastDts + static_cast<int64>(streamContext.lastDuration)
684+
// add empty sample vtte
685+
if (lastCueEndTimestamp < dts && avRescaleQ(dts - lastCueEndTimestamp, stream.timeBase, AV_MILLI_TIME_BASE_Q) > 200) {
686+
ioWriter.writeUint32(8)
687+
ioWriter.writeString(BoxType.VTTE)
688+
689+
const duration = static_cast<double>((dts - lastCueEndTimestamp) as int64)
690+
691+
if (this.context.fragment) {
692+
const track = this.context.currentFragment.tracks.find((track) => {
693+
return track.streamIndex === avpacket.streamIndex
694+
})
695+
track.sampleCount++
696+
if (!track.sampleSizes.length) {
697+
track.baseMediaDecodeTime = lastCueEndTimestamp
698+
}
699+
track.sampleDurations.push(duration)
700+
track.sampleSizes.push(8)
701+
}
702+
else {
703+
const deltas = static_cast<double>((lastCueEndTimestamp - streamContext.lastDts) as int64)
704+
this.context.currentChunk.sampleCount++
705+
streamContext.sampleSizes.push(8)
706+
if (streamContext.sttsSampleDeltas[streamContext.sttsSampleDeltas.length - 1] === deltas) {
707+
streamContext.sttsSampleCounts[streamContext.sttsSampleCounts.length - 1]++
708+
}
709+
else {
710+
streamContext.sttsSampleCounts.push(1)
711+
streamContext.sttsSampleDeltas.push(deltas)
712+
}
713+
}
714+
streamContext.lastPts = lastCueEndTimestamp
715+
streamContext.lastDts = lastCueEndTimestamp
716+
streamContext.lastDuration = duration
717+
}
718+
}
719+
const identifier = getAVPacketSideData(avpacket, AVPacketSideDataType.AV_PKT_DATA_WEBVTT_IDENTIFIER)
720+
const settings = getAVPacketSideData(avpacket, AVPacketSideDataType.AV_PKT_DATA_WEBVTT_SETTINGS)
721+
722+
let size = avpacket.size + 8
723+
if (identifier) {
724+
size += 8 + static_cast<int32>(identifier.size)
725+
}
726+
if (settings) {
727+
size += 8 + static_cast<int32>(settings.size)
728+
}
729+
ioWriter.writeUint32(8 + size)
730+
ioWriter.writeString(BoxType.VTTC)
731+
if (identifier) {
732+
ioWriter.writeUint32(8 + identifier.size)
733+
ioWriter.writeString('iden')
734+
ioWriter.writeBuffer(mapUint8Array(identifier.data, reinterpret_cast<size>(identifier.size)))
735+
}
736+
if (settings) {
737+
ioWriter.writeUint32(8 + settings.size)
738+
ioWriter.writeString('sttg')
739+
ioWriter.writeBuffer(mapUint8Array(settings.data, reinterpret_cast<size>(settings.size)))
740+
}
741+
ioWriter.writeUint32(8 + avpacket.size)
742+
ioWriter.writeString(BoxType.PAYL)
743+
ioWriter.writeBuffer(getAVPacketData(avpacket))
744+
return 8 + size
745+
}
746+
else {
747+
ioWriter.writeBuffer(getAVPacketData(avpacket))
748+
return avpacket.size
749+
}
750+
}
751+
679752
public writeAVPacket(formatContext: AVOFormatContext, avpacket: pointer<AVPacket>): number {
680753

681754
if (!avpacket.size) {
@@ -692,13 +765,13 @@ export default class OIsobmffFormat extends OFormat {
692765

693766
const streamContext = stream.privData as IsobmffStreamContext
694767

695-
let dts = avRescaleQ2(avpacket.dts, addressof(avpacket.timeBase), stream.timeBase)
696-
let pts = avRescaleQ2(avpacket.pts !== NOPTS_VALUE_BIGINT ? avpacket.pts : avpacket.dts, addressof(avpacket.timeBase), stream.timeBase)
697-
const duration = avpacket.duration !== NOPTS_VALUE_BIGINT ? avRescaleQ2(avpacket.duration, addressof(avpacket.timeBase), stream.timeBase) : NOPTS_VALUE_BIGINT
768+
let dts: int64 = avRescaleQ2(avpacket.dts, addressof(avpacket.timeBase), stream.timeBase)
769+
let pts: int64 = avRescaleQ2(avpacket.pts !== NOPTS_VALUE_BIGINT ? avpacket.pts : avpacket.dts, addressof(avpacket.timeBase), stream.timeBase)
770+
const duration: int64 = avpacket.duration !== NOPTS_VALUE_BIGINT ? avRescaleQ2(avpacket.duration, addressof(avpacket.timeBase), stream.timeBase) : NOPTS_VALUE_BIGINT
698771

699772
if ((stream.codecpar.codecId === AVCodecID.AV_CODEC_ID_H264
700-
|| stream.codecpar.codecId === AVCodecID.AV_CODEC_ID_HEVC
701-
|| stream.codecpar.codecId === AVCodecID.AV_CODEC_ID_VVC
773+
|| stream.codecpar.codecId === AVCodecID.AV_CODEC_ID_HEVC
774+
|| stream.codecpar.codecId === AVCodecID.AV_CODEC_ID_VVC
702775
)
703776
&& (avpacket.flags & AVPacketFlags.AV_PKT_FLAG_H26X_ANNEXB)
704777
) {
@@ -707,7 +780,7 @@ export default class OIsobmffFormat extends OFormat {
707780
avpacket = this.avpacket
708781
}
709782
else if ((stream.codecpar.codecId === AVCodecID.AV_CODEC_ID_AC3
710-
|| stream.codecpar.codecId === AVCodecID.AV_CODEC_ID_EAC3
783+
|| stream.codecpar.codecId === AVCodecID.AV_CODEC_ID_EAC3
711784
)
712785
&& (!this.context.ac3Info || !this.context.ac3Info.done)
713786
) {
@@ -750,7 +823,7 @@ export default class OIsobmffFormat extends OFormat {
750823
}
751824
}
752825

753-
track.ioWriter.writeBuffer(getAVPacketData(avpacket))
826+
const dataSize = this.writeTrackData(track.ioWriter, avpacket, dts, stream)
754827

755828
if (!track.sampleSizes.length) {
756829
track.baseMediaDecodeTime = dts
@@ -763,14 +836,10 @@ export default class OIsobmffFormat extends OFormat {
763836
) {
764837
track.sampleDurations[track.sampleSizes.length - 1] = static_cast<double>(dts - streamContext.lastDts)
765838
}
766-
if (avpacket.duration > 0) {
767-
track.sampleDurations.push(static_cast<double>(avRescaleQ(
768-
avpacket.duration,
769-
avpacket.timeBase,
770-
stream.timeBase
771-
)))
839+
if (duration > 0) {
840+
track.sampleDurations.push(static_cast<double>(duration))
772841
}
773-
track.sampleSizes.push(avpacket.size)
842+
track.sampleSizes.push(dataSize)
774843

775844
if (stream.codecpar.codecType === AVMediaType.AVMEDIA_TYPE_VIDEO) {
776845
let flag = 0
@@ -781,7 +850,6 @@ export default class OIsobmffFormat extends OFormat {
781850
flag |= (SampleFlags.DEPENDS_YES | SampleFlags.IS_NON_SYN)
782851
}
783852
track.sampleCompositionTimeOffset.push(static_cast<double>((pts !== NOPTS_VALUE_BIGINT ? pts : dts) - dts))
784-
785853
track.sampleFlags.push(flag)
786854
}
787855

@@ -823,6 +891,9 @@ export default class OIsobmffFormat extends OFormat {
823891
track.sampleCount++
824892
streamContext.lastPts = bigint.max(streamContext.lastPts, pts + (duration !== NOPTS_VALUE_BIGINT ? duration : 0n))
825893
streamContext.lastDts = dts
894+
if (duration > 0) {
895+
streamContext.lastDuration = static_cast<double>(duration)
896+
}
826897
this.context.currentFragment.firstWrote = true
827898
}
828899
else {
@@ -852,65 +923,7 @@ export default class OIsobmffFormat extends OFormat {
852923
currentChunk.sampleCount++
853924
}
854925

855-
if (stream.codecpar.codecId === AVCodecID.AV_CODEC_ID_WEBVTT) {
856-
if (streamContext.lastDuration > 0) {
857-
const lastCueEndTimestamp = streamContext.lastDts + static_cast<int64>(streamContext.lastDuration)
858-
// add empty sample vtte
859-
if (lastCueEndTimestamp < dts && avRescaleQ(dts - lastCueEndTimestamp, stream.timeBase, AV_MILLI_TIME_BASE_Q) > 200) {
860-
formatContext.ioWriter.writeUint32(8)
861-
formatContext.ioWriter.writeString(BoxType.VTTE)
862-
currentChunk.sampleCount++
863-
streamContext.sampleSizes.push(8)
864-
const deltas = static_cast<double>(lastCueEndTimestamp - streamContext.lastDts)
865-
if (!streamContext.sttsSampleCounts.length) {
866-
streamContext.sttsSampleCounts.push(1)
867-
streamContext.sttsSampleDeltas.push(deltas)
868-
}
869-
else {
870-
if (streamContext.sttsSampleDeltas[streamContext.sttsSampleDeltas.length - 1] === deltas) {
871-
streamContext.sttsSampleCounts[streamContext.sttsSampleCounts.length - 1]++
872-
}
873-
else {
874-
streamContext.sttsSampleCounts.push(1)
875-
streamContext.sttsSampleDeltas.push(deltas)
876-
}
877-
}
878-
streamContext.lastPts = lastCueEndTimestamp
879-
streamContext.lastDts = lastCueEndTimestamp
880-
streamContext.lastDuration = static_cast<double>((dts - lastCueEndTimestamp) as int64)
881-
}
882-
}
883-
const identifier = getAVPacketSideData(avpacket, AVPacketSideDataType.AV_PKT_DATA_WEBVTT_IDENTIFIER)
884-
const settings = getAVPacketSideData(avpacket, AVPacketSideDataType.AV_PKT_DATA_WEBVTT_SETTINGS)
885-
886-
let size = avpacket.size + 8
887-
if (identifier) {
888-
size += 8 + static_cast<int32>(identifier.size)
889-
}
890-
if (settings) {
891-
size += 8 + static_cast<int32>(settings.size)
892-
}
893-
formatContext.ioWriter.writeUint32(8 + size)
894-
formatContext.ioWriter.writeString(BoxType.VTTC)
895-
if (identifier) {
896-
formatContext.ioWriter.writeUint32(8 + identifier.size)
897-
formatContext.ioWriter.writeString('iden')
898-
formatContext.ioWriter.writeBuffer(mapUint8Array(identifier.data, reinterpret_cast<size>(identifier.size)))
899-
}
900-
if (settings) {
901-
formatContext.ioWriter.writeUint32(8 + settings.size)
902-
formatContext.ioWriter.writeString('sttg')
903-
formatContext.ioWriter.writeBuffer(mapUint8Array(settings.data, reinterpret_cast<size>(settings.size)))
904-
}
905-
formatContext.ioWriter.writeUint32(8 + avpacket.size)
906-
formatContext.ioWriter.writeString(BoxType.PAYL)
907-
formatContext.ioWriter.writeBuffer(getAVPacketData(avpacket))
908-
streamContext.sampleSizes.push(8 + size)
909-
}
910-
else {
911-
formatContext.ioWriter.writeBuffer(getAVPacketData(avpacket))
912-
streamContext.sampleSizes.push(avpacket.size)
913-
}
926+
streamContext.sampleSizes.push(this.writeTrackData(formatContext.ioWriter, avpacket, dts, stream))
914927

915928
if (stream.codecpar.codecType === AVMediaType.AVMEDIA_TYPE_VIDEO
916929
&& avpacket.flags & AVPacketFlags.AV_PKT_FLAG_KEY
@@ -925,18 +938,12 @@ export default class OIsobmffFormat extends OFormat {
925938
}
926939
else {
927940
const deltas = static_cast<double>(dts - streamContext.lastDts)
928-
if (!streamContext.sttsSampleCounts.length) {
929-
streamContext.sttsSampleCounts.push(1)
930-
streamContext.sttsSampleDeltas.push(deltas)
941+
if (streamContext.sttsSampleDeltas[streamContext.sttsSampleDeltas.length - 1] === deltas) {
942+
streamContext.sttsSampleCounts[streamContext.sttsSampleCounts.length - 1]++
931943
}
932944
else {
933-
if (streamContext.sttsSampleDeltas[streamContext.sttsSampleDeltas.length - 1] === deltas) {
934-
streamContext.sttsSampleCounts[streamContext.sttsSampleCounts.length - 1]++
935-
}
936-
else {
937-
streamContext.sttsSampleCounts.push(1)
938-
streamContext.sttsSampleDeltas.push(deltas)
939-
}
945+
streamContext.sttsSampleCounts.push(1)
946+
streamContext.sttsSampleDeltas.push(deltas)
940947
}
941948
}
942949

0 commit comments

Comments
 (0)