diff --git a/trunk/doc/CHANGELOG.md b/trunk/doc/CHANGELOG.md
index 36e815e91..97de3a803 100644
--- a/trunk/doc/CHANGELOG.md
+++ b/trunk/doc/CHANGELOG.md
@@ -7,6 +7,7 @@ The changelog for SRS.
## SRS 7.0 Changelog
+* v7.0, 2025-08-12, Merge [#4230](https://github.com/ossrs/srs/pull/4230): MP4 DVR: Fix audio/video synchronization issues in WebRTC recordings. v7.0.52 (#4230)
* v7.0, 2025-08-12, Merge [#4301](https://github.com/ossrs/srs/pull/4301): Valgrind: Return error for unsupported check=new on Valgrind < 3.21. v7.0.52 (#4301)
* v7.0, 2025-08-12, Merge [#4431](https://github.com/ossrs/srs/pull/4431): fix srt cmake 4.x compiling error. v7.0.52 (#4431)
* v7.0, 2025-08-11, Merge [#4433](https://github.com/ossrs/srs/pull/4433): Use clang format. v7.0.52 (#4433)
@@ -65,6 +66,7 @@ The changelog for SRS.
## SRS 6.0 Changelog
+* v6.0, 2025-08-12, Merge [#4230](https://github.com/ossrs/srs/pull/4230): MP4 DVR: Fix audio/video synchronization issues in WebRTC recordings. v6.0.172 (#4230)
* v6.0, 2025-08-11, Merge [#4432](https://github.com/ossrs/srs/pull/4432): AI: HTTP-FLV: Fix heap-use-after-free crash during stream unmount. v6.0.171 (#4432)
* v6.0, 2025-07-28, Merge [#4245](https://github.com/ossrs/srs/pull/4245): Allow Forward to be configured with Env Var. v6.0.170 (#4245)
* v6.0, 2025-07-10, Merge [#4414](https://github.com/ossrs/srs/pull/4414): Fix H.264 B-frame detection logic to comply with specification. v6.0.169 (#4414)
diff --git a/trunk/src/core/srs_core_version6.hpp b/trunk/src/core/srs_core_version6.hpp
index af5bdf5b1..89a09a264 100644
--- a/trunk/src/core/srs_core_version6.hpp
+++ b/trunk/src/core/srs_core_version6.hpp
@@ -9,6 +9,6 @@
#define VERSION_MAJOR 6
#define VERSION_MINOR 0
-#define VERSION_REVISION 171
+#define VERSION_REVISION 172
#endif
diff --git a/trunk/src/core/srs_core_version7.hpp b/trunk/src/core/srs_core_version7.hpp
index 60b16270f..21fd136c6 100644
--- a/trunk/src/core/srs_core_version7.hpp
+++ b/trunk/src/core/srs_core_version7.hpp
@@ -9,6 +9,6 @@
#define VERSION_MAJOR 7
#define VERSION_MINOR 0
-#define VERSION_REVISION 51
+#define VERSION_REVISION 52
#endif
\ No newline at end of file
diff --git a/trunk/src/kernel/srs_kernel_mp4.cpp b/trunk/src/kernel/srs_kernel_mp4.cpp
index 10b648b7b..d52b22fcf 100644
--- a/trunk/src/kernel/srs_kernel_mp4.cpp
+++ b/trunk/src/kernel/srs_kernel_mp4.cpp
@@ -5549,12 +5549,60 @@ uint32_t SrsMp4Sample::pts_ms()
return (uint32_t)(pts * 1000 / tbn) + adjust;
}
+SrsMp4DvrJitter::SrsMp4DvrJitter()
+{
+ reset();
+}
+
+SrsMp4DvrJitter::~SrsMp4DvrJitter()
+{
+}
+
+void SrsMp4DvrJitter::on_sample(SrsMp4Sample *sample)
+{
+ if (!has_first_audio_ && sample->type == SrsFrameTypeAudio) {
+ has_first_audio_ = true;
+ audio_start_dts_ = sample->dts;
+ }
+
+ if (!has_first_video_ && sample->type == SrsFrameTypeVideo) {
+ has_first_video_ = true;
+ video_start_dts_ = sample->dts;
+ }
+}
+
+uint32_t SrsMp4DvrJitter::get_first_sample_delta(SrsFrameType track)
+{
+ if (track == SrsFrameTypeVideo) {
+ return video_start_dts_ > audio_start_dts_ ? video_start_dts_ - audio_start_dts_ : 0;
+ } else if (track == SrsFrameTypeAudio) {
+ return audio_start_dts_ > video_start_dts_ ? audio_start_dts_ - video_start_dts_ : 0;
+ }
+ return 0;
+}
+
+void SrsMp4DvrJitter::reset()
+{
+ video_start_dts_ = 0;
+ audio_start_dts_ = 0;
+ has_first_video_ = false;
+ has_first_audio_ = false;
+}
+
+bool SrsMp4DvrJitter::is_initialized()
+{
+ return has_first_video_ && has_first_audio_;
+}
+
SrsMp4SampleManager::SrsMp4SampleManager()
{
+ jitter_ = new SrsMp4DvrJitter();
}
SrsMp4SampleManager::~SrsMp4SampleManager()
{
+ srs_freep(jitter_);
+
vector::iterator it;
for (it = samples.begin(); it != samples.end(); ++it) {
SrsMp4Sample *sample = *it;
@@ -5631,6 +5679,7 @@ SrsMp4Sample *SrsMp4SampleManager::at(uint32_t index)
void SrsMp4SampleManager::append(SrsMp4Sample *sample)
{
+ jitter_->on_sample(sample);
samples.push_back(sample);
}
@@ -5805,6 +5854,7 @@ srs_error_t SrsMp4SampleManager::write_track(SrsFrameType track,
} else {
// The first sample always in the STTS table.
stts_entry.sample_count++;
+ stts_entry.sample_delta = jitter_->get_first_sample_delta(track);
}
}
diff --git a/trunk/src/kernel/srs_kernel_mp4.hpp b/trunk/src/kernel/srs_kernel_mp4.hpp
index 8df73ef55..b1c903ae9 100644
--- a/trunk/src/kernel/srs_kernel_mp4.hpp
+++ b/trunk/src/kernel/srs_kernel_mp4.hpp
@@ -2434,6 +2434,34 @@ public:
virtual uint32_t pts_ms();
};
+// MP4 DVR jitter for audio/video synchronization in DVR recordings.
+// Handles timing offset between audio and video tracks to ensure proper A/V sync in MP4 files.
+class SrsMp4DvrJitter
+{
+private:
+ uint64_t video_start_dts_;
+ uint64_t audio_start_dts_;
+ bool has_first_video_;
+ bool has_first_audio_;
+
+public:
+ SrsMp4DvrJitter();
+ virtual ~SrsMp4DvrJitter();
+
+public:
+ // Record the first sample timestamp for each track type
+ virtual void on_sample(SrsMp4Sample *sample);
+ // Calculate the initial STTS delta for the first sample of a track
+ // to maintain A/V synchronization in MP4 files
+ virtual uint32_t get_first_sample_delta(SrsFrameType track);
+
+private:
+ // Reset the jitter state (useful for new recording sessions)
+ virtual void reset();
+ // Check if both audio and video start times have been captured
+ virtual bool is_initialized();
+};
+
// Build samples from moov, or write samples to moov.
// One or more sample are grouped to a chunk, each track contains one or more chunks.
// The offset of chunk is specified by stco.
@@ -2445,6 +2473,9 @@ public:
// The keyframe is specified by stss.
class SrsMp4SampleManager
{
+private:
+ SrsMp4DvrJitter *jitter_; // MP4 A/V sync jitter handler
+
public:
std::vector samples;
diff --git a/trunk/src/utest/srs_utest_mp4.cpp b/trunk/src/utest/srs_utest_mp4.cpp
index 76cf7d18f..e167c696f 100644
--- a/trunk/src/utest/srs_utest_mp4.cpp
+++ b/trunk/src/utest/srs_utest_mp4.cpp
@@ -2454,3 +2454,144 @@ VOID TEST(KernelMp4Test, SrsMp4M2tsInitEncoder)
EXPECT_TRUE(fw.filesize() > 0);
}
}
+
+VOID TEST(KernelMp4Test, SrsMp4DvrJitter)
+{
+ // Test basic initialization
+ if (true) {
+ SrsMp4DvrJitter jitter;
+
+ // Should not be initialized yet
+ EXPECT_FALSE(jitter.is_initialized());
+
+ // Delta should be 0 for uninitialized jitter
+ EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeVideo));
+ EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeAudio));
+ }
+
+ // Test audio first scenario
+ if (true) {
+ SrsMp4DvrJitter jitter;
+
+ // Create audio sample that arrives first
+ SrsMp4Sample audio_sample;
+ audio_sample.type = SrsFrameTypeAudio;
+ audio_sample.dts = 1000; // Audio starts at 1000us
+
+ // Create video sample that arrives later
+ SrsMp4Sample video_sample;
+ video_sample.type = SrsFrameTypeVideo;
+ video_sample.dts = 2000; // Video starts at 2000us
+
+ // Process samples
+ jitter.on_sample(&audio_sample);
+ jitter.on_sample(&video_sample);
+
+ // Should be initialized now
+ EXPECT_TRUE(jitter.is_initialized());
+
+ // Video should have delta = video_start - audio_start = 2000 - 1000 = 1000
+ EXPECT_EQ(1000, jitter.get_first_sample_delta(SrsFrameTypeVideo));
+
+ // Audio should have delta = 0 (since audio started first)
+ EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeAudio));
+ }
+
+ // Test video first scenario
+ if (true) {
+ SrsMp4DvrJitter jitter;
+
+ // Create video sample that arrives first
+ SrsMp4Sample video_sample;
+ video_sample.type = SrsFrameTypeVideo;
+ video_sample.dts = 500; // Video starts at 500us
+
+ // Create audio sample that arrives later
+ SrsMp4Sample audio_sample;
+ audio_sample.type = SrsFrameTypeAudio;
+ audio_sample.dts = 1500; // Audio starts at 1500us
+
+ // Process samples
+ jitter.on_sample(&video_sample);
+ jitter.on_sample(&audio_sample);
+
+ // Should be initialized now
+ EXPECT_TRUE(jitter.is_initialized());
+
+ // Audio should have delta = audio_start - video_start = 1500 - 500 = 1000
+ EXPECT_EQ(1000, jitter.get_first_sample_delta(SrsFrameTypeAudio));
+
+ // Video should have delta = 0 (since video started first)
+ EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeVideo));
+ }
+
+ // Test same start time scenario
+ if (true) {
+ SrsMp4DvrJitter jitter;
+
+ // Create samples with same start time
+ SrsMp4Sample audio_sample;
+ audio_sample.type = SrsFrameTypeAudio;
+ audio_sample.dts = 1000;
+
+ SrsMp4Sample video_sample;
+ video_sample.type = SrsFrameTypeVideo;
+ video_sample.dts = 1000;
+
+ // Process samples
+ jitter.on_sample(&audio_sample);
+ jitter.on_sample(&video_sample);
+
+ // Should be initialized now
+ EXPECT_TRUE(jitter.is_initialized());
+
+ // Both should have delta = 0 (same start time)
+ EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeVideo));
+ EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeAudio));
+ }
+
+ // Test reset functionality
+ if (true) {
+ SrsMp4DvrJitter jitter;
+
+ // Initialize with samples
+ SrsMp4Sample audio_sample;
+ audio_sample.type = SrsFrameTypeAudio;
+ audio_sample.dts = 1000;
+
+ jitter.on_sample(&audio_sample);
+
+ // Reset and verify
+ jitter.reset();
+ EXPECT_FALSE(jitter.is_initialized());
+ EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeVideo));
+ EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeAudio));
+ }
+
+ // Test multiple samples of same type (should only record first)
+ if (true) {
+ SrsMp4DvrJitter jitter;
+
+ // Create multiple audio samples
+ SrsMp4Sample audio1;
+ audio1.type = SrsFrameTypeAudio;
+ audio1.dts = 1000;
+
+ SrsMp4Sample audio2;
+ audio2.type = SrsFrameTypeAudio;
+ audio2.dts = 2000; // This should be ignored
+
+ SrsMp4Sample video1;
+ video1.type = SrsFrameTypeVideo;
+ video1.dts = 1500;
+
+ // Process samples
+ jitter.on_sample(&audio1);
+ jitter.on_sample(&audio2); // Should be ignored
+ jitter.on_sample(&video1);
+
+ // Should use first audio sample (1000) not second (2000)
+ EXPECT_EQ(500, jitter.get_first_sample_delta(SrsFrameTypeVideo)); // 1500 - 1000 = 500
+ EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeAudio));
+ }
+}