diff --git a/trunk/doc/CHANGELOG.md b/trunk/doc/CHANGELOG.md index 36e815e91..97de3a803 100644 --- a/trunk/doc/CHANGELOG.md +++ b/trunk/doc/CHANGELOG.md @@ -7,6 +7,7 @@ The changelog for SRS. ## SRS 7.0 Changelog +* v7.0, 2025-08-12, Merge [#4230](https://github.com/ossrs/srs/pull/4230): MP4 DVR: Fix audio/video synchronization issues in WebRTC recordings. v7.0.52 (#4230) * v7.0, 2025-08-12, Merge [#4301](https://github.com/ossrs/srs/pull/4301): Valgrind: Return error for unsupported check=new on Valgrind < 3.21. v7.0.52 (#4301) * v7.0, 2025-08-12, Merge [#4431](https://github.com/ossrs/srs/pull/4431): fix srt cmake 4.x compiling error. v7.0.52 (#4431) * v7.0, 2025-08-11, Merge [#4433](https://github.com/ossrs/srs/pull/4433): Use clang format. v7.0.52 (#4433) @@ -65,6 +66,7 @@ The changelog for SRS. ## SRS 6.0 Changelog +* v6.0, 2025-08-12, Merge [#4230](https://github.com/ossrs/srs/pull/4230): MP4 DVR: Fix audio/video synchronization issues in WebRTC recordings. v6.0.172 (#4230) * v6.0, 2025-08-11, Merge [#4432](https://github.com/ossrs/srs/pull/4432): AI: HTTP-FLV: Fix heap-use-after-free crash during stream unmount. v6.0.171 (#4432) * v6.0, 2025-07-28, Merge [#4245](https://github.com/ossrs/srs/pull/4245): Allow Forward to be configured with Env Var. v6.0.170 (#4245) * v6.0, 2025-07-10, Merge [#4414](https://github.com/ossrs/srs/pull/4414): Fix H.264 B-frame detection logic to comply with specification. v6.0.169 (#4414) diff --git a/trunk/src/core/srs_core_version6.hpp b/trunk/src/core/srs_core_version6.hpp index af5bdf5b1..89a09a264 100644 --- a/trunk/src/core/srs_core_version6.hpp +++ b/trunk/src/core/srs_core_version6.hpp @@ -9,6 +9,6 @@ #define VERSION_MAJOR 6 #define VERSION_MINOR 0 -#define VERSION_REVISION 171 +#define VERSION_REVISION 172 #endif diff --git a/trunk/src/core/srs_core_version7.hpp b/trunk/src/core/srs_core_version7.hpp index 60b16270f..21fd136c6 100644 --- a/trunk/src/core/srs_core_version7.hpp +++ b/trunk/src/core/srs_core_version7.hpp @@ -9,6 +9,6 @@ #define VERSION_MAJOR 7 #define VERSION_MINOR 0 -#define VERSION_REVISION 51 +#define VERSION_REVISION 52 #endif \ No newline at end of file diff --git a/trunk/src/kernel/srs_kernel_mp4.cpp b/trunk/src/kernel/srs_kernel_mp4.cpp index 10b648b7b..d52b22fcf 100644 --- a/trunk/src/kernel/srs_kernel_mp4.cpp +++ b/trunk/src/kernel/srs_kernel_mp4.cpp @@ -5549,12 +5549,60 @@ uint32_t SrsMp4Sample::pts_ms() return (uint32_t)(pts * 1000 / tbn) + adjust; } +SrsMp4DvrJitter::SrsMp4DvrJitter() +{ + reset(); +} + +SrsMp4DvrJitter::~SrsMp4DvrJitter() +{ +} + +void SrsMp4DvrJitter::on_sample(SrsMp4Sample *sample) +{ + if (!has_first_audio_ && sample->type == SrsFrameTypeAudio) { + has_first_audio_ = true; + audio_start_dts_ = sample->dts; + } + + if (!has_first_video_ && sample->type == SrsFrameTypeVideo) { + has_first_video_ = true; + video_start_dts_ = sample->dts; + } +} + +uint32_t SrsMp4DvrJitter::get_first_sample_delta(SrsFrameType track) +{ + if (track == SrsFrameTypeVideo) { + return video_start_dts_ > audio_start_dts_ ? video_start_dts_ - audio_start_dts_ : 0; + } else if (track == SrsFrameTypeAudio) { + return audio_start_dts_ > video_start_dts_ ? audio_start_dts_ - video_start_dts_ : 0; + } + return 0; +} + +void SrsMp4DvrJitter::reset() +{ + video_start_dts_ = 0; + audio_start_dts_ = 0; + has_first_video_ = false; + has_first_audio_ = false; +} + +bool SrsMp4DvrJitter::is_initialized() +{ + return has_first_video_ && has_first_audio_; +} + SrsMp4SampleManager::SrsMp4SampleManager() { + jitter_ = new SrsMp4DvrJitter(); } SrsMp4SampleManager::~SrsMp4SampleManager() { + srs_freep(jitter_); + vector::iterator it; for (it = samples.begin(); it != samples.end(); ++it) { SrsMp4Sample *sample = *it; @@ -5631,6 +5679,7 @@ SrsMp4Sample *SrsMp4SampleManager::at(uint32_t index) void SrsMp4SampleManager::append(SrsMp4Sample *sample) { + jitter_->on_sample(sample); samples.push_back(sample); } @@ -5805,6 +5854,7 @@ srs_error_t SrsMp4SampleManager::write_track(SrsFrameType track, } else { // The first sample always in the STTS table. stts_entry.sample_count++; + stts_entry.sample_delta = jitter_->get_first_sample_delta(track); } } diff --git a/trunk/src/kernel/srs_kernel_mp4.hpp b/trunk/src/kernel/srs_kernel_mp4.hpp index 8df73ef55..b1c903ae9 100644 --- a/trunk/src/kernel/srs_kernel_mp4.hpp +++ b/trunk/src/kernel/srs_kernel_mp4.hpp @@ -2434,6 +2434,34 @@ public: virtual uint32_t pts_ms(); }; +// MP4 DVR jitter for audio/video synchronization in DVR recordings. +// Handles timing offset between audio and video tracks to ensure proper A/V sync in MP4 files. +class SrsMp4DvrJitter +{ +private: + uint64_t video_start_dts_; + uint64_t audio_start_dts_; + bool has_first_video_; + bool has_first_audio_; + +public: + SrsMp4DvrJitter(); + virtual ~SrsMp4DvrJitter(); + +public: + // Record the first sample timestamp for each track type + virtual void on_sample(SrsMp4Sample *sample); + // Calculate the initial STTS delta for the first sample of a track + // to maintain A/V synchronization in MP4 files + virtual uint32_t get_first_sample_delta(SrsFrameType track); + +private: + // Reset the jitter state (useful for new recording sessions) + virtual void reset(); + // Check if both audio and video start times have been captured + virtual bool is_initialized(); +}; + // Build samples from moov, or write samples to moov. // One or more sample are grouped to a chunk, each track contains one or more chunks. // The offset of chunk is specified by stco. @@ -2445,6 +2473,9 @@ public: // The keyframe is specified by stss. class SrsMp4SampleManager { +private: + SrsMp4DvrJitter *jitter_; // MP4 A/V sync jitter handler + public: std::vector samples; diff --git a/trunk/src/utest/srs_utest_mp4.cpp b/trunk/src/utest/srs_utest_mp4.cpp index 76cf7d18f..e167c696f 100644 --- a/trunk/src/utest/srs_utest_mp4.cpp +++ b/trunk/src/utest/srs_utest_mp4.cpp @@ -2454,3 +2454,144 @@ VOID TEST(KernelMp4Test, SrsMp4M2tsInitEncoder) EXPECT_TRUE(fw.filesize() > 0); } } + +VOID TEST(KernelMp4Test, SrsMp4DvrJitter) +{ + // Test basic initialization + if (true) { + SrsMp4DvrJitter jitter; + + // Should not be initialized yet + EXPECT_FALSE(jitter.is_initialized()); + + // Delta should be 0 for uninitialized jitter + EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeVideo)); + EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeAudio)); + } + + // Test audio first scenario + if (true) { + SrsMp4DvrJitter jitter; + + // Create audio sample that arrives first + SrsMp4Sample audio_sample; + audio_sample.type = SrsFrameTypeAudio; + audio_sample.dts = 1000; // Audio starts at 1000us + + // Create video sample that arrives later + SrsMp4Sample video_sample; + video_sample.type = SrsFrameTypeVideo; + video_sample.dts = 2000; // Video starts at 2000us + + // Process samples + jitter.on_sample(&audio_sample); + jitter.on_sample(&video_sample); + + // Should be initialized now + EXPECT_TRUE(jitter.is_initialized()); + + // Video should have delta = video_start - audio_start = 2000 - 1000 = 1000 + EXPECT_EQ(1000, jitter.get_first_sample_delta(SrsFrameTypeVideo)); + + // Audio should have delta = 0 (since audio started first) + EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeAudio)); + } + + // Test video first scenario + if (true) { + SrsMp4DvrJitter jitter; + + // Create video sample that arrives first + SrsMp4Sample video_sample; + video_sample.type = SrsFrameTypeVideo; + video_sample.dts = 500; // Video starts at 500us + + // Create audio sample that arrives later + SrsMp4Sample audio_sample; + audio_sample.type = SrsFrameTypeAudio; + audio_sample.dts = 1500; // Audio starts at 1500us + + // Process samples + jitter.on_sample(&video_sample); + jitter.on_sample(&audio_sample); + + // Should be initialized now + EXPECT_TRUE(jitter.is_initialized()); + + // Audio should have delta = audio_start - video_start = 1500 - 500 = 1000 + EXPECT_EQ(1000, jitter.get_first_sample_delta(SrsFrameTypeAudio)); + + // Video should have delta = 0 (since video started first) + EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeVideo)); + } + + // Test same start time scenario + if (true) { + SrsMp4DvrJitter jitter; + + // Create samples with same start time + SrsMp4Sample audio_sample; + audio_sample.type = SrsFrameTypeAudio; + audio_sample.dts = 1000; + + SrsMp4Sample video_sample; + video_sample.type = SrsFrameTypeVideo; + video_sample.dts = 1000; + + // Process samples + jitter.on_sample(&audio_sample); + jitter.on_sample(&video_sample); + + // Should be initialized now + EXPECT_TRUE(jitter.is_initialized()); + + // Both should have delta = 0 (same start time) + EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeVideo)); + EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeAudio)); + } + + // Test reset functionality + if (true) { + SrsMp4DvrJitter jitter; + + // Initialize with samples + SrsMp4Sample audio_sample; + audio_sample.type = SrsFrameTypeAudio; + audio_sample.dts = 1000; + + jitter.on_sample(&audio_sample); + + // Reset and verify + jitter.reset(); + EXPECT_FALSE(jitter.is_initialized()); + EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeVideo)); + EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeAudio)); + } + + // Test multiple samples of same type (should only record first) + if (true) { + SrsMp4DvrJitter jitter; + + // Create multiple audio samples + SrsMp4Sample audio1; + audio1.type = SrsFrameTypeAudio; + audio1.dts = 1000; + + SrsMp4Sample audio2; + audio2.type = SrsFrameTypeAudio; + audio2.dts = 2000; // This should be ignored + + SrsMp4Sample video1; + video1.type = SrsFrameTypeVideo; + video1.dts = 1500; + + // Process samples + jitter.on_sample(&audio1); + jitter.on_sample(&audio2); // Should be ignored + jitter.on_sample(&video1); + + // Should use first audio sample (1000) not second (2000) + EXPECT_EQ(500, jitter.get_first_sample_delta(SrsFrameTypeVideo)); // 1500 - 1000 = 500 + EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeAudio)); + } +}