MP4 DVR: Fix audio/video synchronization issues in WebRTC recordings. v6.0.172 v7.0.52 (#4230)
Fixes #3993 - WebRTC streams recorded to MP4 via DVR exhibit audio/video synchronization issues, with audio typically ahead of video. **Note: This issue is specific to MP4 format; FLV recordings are not affected.** When WebRTC streams are converted to RTMP and then muxed to MP4, the audio and video tracks may start at different timestamps. The MP4 muxer was not accounting for this timing offset between the first audio and video samples in the STTS (Sample Time-to-Sample) table, causing the tracks to be misaligned in the final MP4 file. Introduces `SrsMp4DvrJitter` class specifically for MP4 audio/video synchronization: - **Timestamp Tracking**: Records the DTS of the first audio and video samples - **Offset Calculation**: Computes the timing difference between track start times - **MP4 STTS Correction**: Sets appropriate `sample_delta` values in the MP4 STTS table to maintain proper A/V sync - Added `SrsMp4DvrJitter` class in `srs_kernel_mp4.hpp/cpp` - Integrated jitter correction into `SrsMp4SampleManager::write_track()` for MP4 format only - Added comprehensive unit tests covering various timing scenarios - **Scope**: Changes are isolated to MP4 kernel code and do not affect FLV processing This fix ensures that MP4 DVR recordings from WebRTC streams maintain proper audio/video synchronization regardless of the relative timing of the first audio and video frames, while leaving FLV format processing unchanged. --------- Co-authored-by: Haibo Chen <495810242@qq.com> Co-authored-by: john <hondaxiao@tencent.com> Co-authored-by: winlin <winlinvip@gmail.com> Co-authored-by: OSSRS-AI <winlinam@gmail.com>
This commit is contained in:
parent
db5e43967c
commit
30ea67f5f2
|
|
@ -7,6 +7,7 @@ The changelog for SRS.
|
|||
<a name="v7-changes"></a>
|
||||
|
||||
## SRS 7.0 Changelog
|
||||
* v7.0, 2025-08-12, Merge [#4230](https://github.com/ossrs/srs/pull/4230): MP4 DVR: Fix audio/video synchronization issues in WebRTC recordings. v7.0.52 (#4230)
|
||||
* v7.0, 2025-08-12, Merge [#4301](https://github.com/ossrs/srs/pull/4301): Valgrind: Return error for unsupported check=new on Valgrind < 3.21. v7.0.52 (#4301)
|
||||
* v7.0, 2025-08-12, Merge [#4431](https://github.com/ossrs/srs/pull/4431): fix srt cmake 4.x compiling error. v7.0.52 (#4431)
|
||||
* v7.0, 2025-08-11, Merge [#4433](https://github.com/ossrs/srs/pull/4433): Use clang format. v7.0.52 (#4433)
|
||||
|
|
@ -65,6 +66,7 @@ The changelog for SRS.
|
|||
<a name="v6-changes"></a>
|
||||
|
||||
## SRS 6.0 Changelog
|
||||
* v6.0, 2025-08-12, Merge [#4230](https://github.com/ossrs/srs/pull/4230): MP4 DVR: Fix audio/video synchronization issues in WebRTC recordings. v6.0.172 (#4230)
|
||||
* v6.0, 2025-08-11, Merge [#4432](https://github.com/ossrs/srs/pull/4432): AI: HTTP-FLV: Fix heap-use-after-free crash during stream unmount. v6.0.171 (#4432)
|
||||
* v6.0, 2025-07-28, Merge [#4245](https://github.com/ossrs/srs/pull/4245): Allow Forward to be configured with Env Var. v6.0.170 (#4245)
|
||||
* v6.0, 2025-07-10, Merge [#4414](https://github.com/ossrs/srs/pull/4414): Fix H.264 B-frame detection logic to comply with specification. v6.0.169 (#4414)
|
||||
|
|
|
|||
|
|
@ -9,6 +9,6 @@
|
|||
|
||||
#define VERSION_MAJOR 6
|
||||
#define VERSION_MINOR 0
|
||||
#define VERSION_REVISION 171
|
||||
#define VERSION_REVISION 172
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -9,6 +9,6 @@
|
|||
|
||||
#define VERSION_MAJOR 7
|
||||
#define VERSION_MINOR 0
|
||||
#define VERSION_REVISION 51
|
||||
#define VERSION_REVISION 52
|
||||
|
||||
#endif
|
||||
|
|
@ -5549,12 +5549,60 @@ uint32_t SrsMp4Sample::pts_ms()
|
|||
return (uint32_t)(pts * 1000 / tbn) + adjust;
|
||||
}
|
||||
|
||||
SrsMp4DvrJitter::SrsMp4DvrJitter()
|
||||
{
|
||||
reset();
|
||||
}
|
||||
|
||||
SrsMp4DvrJitter::~SrsMp4DvrJitter()
|
||||
{
|
||||
}
|
||||
|
||||
void SrsMp4DvrJitter::on_sample(SrsMp4Sample *sample)
|
||||
{
|
||||
if (!has_first_audio_ && sample->type == SrsFrameTypeAudio) {
|
||||
has_first_audio_ = true;
|
||||
audio_start_dts_ = sample->dts;
|
||||
}
|
||||
|
||||
if (!has_first_video_ && sample->type == SrsFrameTypeVideo) {
|
||||
has_first_video_ = true;
|
||||
video_start_dts_ = sample->dts;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t SrsMp4DvrJitter::get_first_sample_delta(SrsFrameType track)
|
||||
{
|
||||
if (track == SrsFrameTypeVideo) {
|
||||
return video_start_dts_ > audio_start_dts_ ? video_start_dts_ - audio_start_dts_ : 0;
|
||||
} else if (track == SrsFrameTypeAudio) {
|
||||
return audio_start_dts_ > video_start_dts_ ? audio_start_dts_ - video_start_dts_ : 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void SrsMp4DvrJitter::reset()
|
||||
{
|
||||
video_start_dts_ = 0;
|
||||
audio_start_dts_ = 0;
|
||||
has_first_video_ = false;
|
||||
has_first_audio_ = false;
|
||||
}
|
||||
|
||||
bool SrsMp4DvrJitter::is_initialized()
|
||||
{
|
||||
return has_first_video_ && has_first_audio_;
|
||||
}
|
||||
|
||||
SrsMp4SampleManager::SrsMp4SampleManager()
|
||||
{
|
||||
jitter_ = new SrsMp4DvrJitter();
|
||||
}
|
||||
|
||||
SrsMp4SampleManager::~SrsMp4SampleManager()
|
||||
{
|
||||
srs_freep(jitter_);
|
||||
|
||||
vector<SrsMp4Sample *>::iterator it;
|
||||
for (it = samples.begin(); it != samples.end(); ++it) {
|
||||
SrsMp4Sample *sample = *it;
|
||||
|
|
@ -5631,6 +5679,7 @@ SrsMp4Sample *SrsMp4SampleManager::at(uint32_t index)
|
|||
|
||||
void SrsMp4SampleManager::append(SrsMp4Sample *sample)
|
||||
{
|
||||
jitter_->on_sample(sample);
|
||||
samples.push_back(sample);
|
||||
}
|
||||
|
||||
|
|
@ -5805,6 +5854,7 @@ srs_error_t SrsMp4SampleManager::write_track(SrsFrameType track,
|
|||
} else {
|
||||
// The first sample always in the STTS table.
|
||||
stts_entry.sample_count++;
|
||||
stts_entry.sample_delta = jitter_->get_first_sample_delta(track);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2434,6 +2434,34 @@ public:
|
|||
virtual uint32_t pts_ms();
|
||||
};
|
||||
|
||||
// MP4 DVR jitter for audio/video synchronization in DVR recordings.
|
||||
// Handles timing offset between audio and video tracks to ensure proper A/V sync in MP4 files.
|
||||
class SrsMp4DvrJitter
|
||||
{
|
||||
private:
|
||||
uint64_t video_start_dts_;
|
||||
uint64_t audio_start_dts_;
|
||||
bool has_first_video_;
|
||||
bool has_first_audio_;
|
||||
|
||||
public:
|
||||
SrsMp4DvrJitter();
|
||||
virtual ~SrsMp4DvrJitter();
|
||||
|
||||
public:
|
||||
// Record the first sample timestamp for each track type
|
||||
virtual void on_sample(SrsMp4Sample *sample);
|
||||
// Calculate the initial STTS delta for the first sample of a track
|
||||
// to maintain A/V synchronization in MP4 files
|
||||
virtual uint32_t get_first_sample_delta(SrsFrameType track);
|
||||
|
||||
private:
|
||||
// Reset the jitter state (useful for new recording sessions)
|
||||
virtual void reset();
|
||||
// Check if both audio and video start times have been captured
|
||||
virtual bool is_initialized();
|
||||
};
|
||||
|
||||
// Build samples from moov, or write samples to moov.
|
||||
// One or more sample are grouped to a chunk, each track contains one or more chunks.
|
||||
// The offset of chunk is specified by stco.
|
||||
|
|
@ -2445,6 +2473,9 @@ public:
|
|||
// The keyframe is specified by stss.
|
||||
class SrsMp4SampleManager
|
||||
{
|
||||
private:
|
||||
SrsMp4DvrJitter *jitter_; // MP4 A/V sync jitter handler
|
||||
|
||||
public:
|
||||
std::vector<SrsMp4Sample *> samples;
|
||||
|
||||
|
|
|
|||
|
|
@ -2454,3 +2454,144 @@ VOID TEST(KernelMp4Test, SrsMp4M2tsInitEncoder)
|
|||
EXPECT_TRUE(fw.filesize() > 0);
|
||||
}
|
||||
}
|
||||
|
||||
VOID TEST(KernelMp4Test, SrsMp4DvrJitter)
|
||||
{
|
||||
// Test basic initialization
|
||||
if (true) {
|
||||
SrsMp4DvrJitter jitter;
|
||||
|
||||
// Should not be initialized yet
|
||||
EXPECT_FALSE(jitter.is_initialized());
|
||||
|
||||
// Delta should be 0 for uninitialized jitter
|
||||
EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeVideo));
|
||||
EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeAudio));
|
||||
}
|
||||
|
||||
// Test audio first scenario
|
||||
if (true) {
|
||||
SrsMp4DvrJitter jitter;
|
||||
|
||||
// Create audio sample that arrives first
|
||||
SrsMp4Sample audio_sample;
|
||||
audio_sample.type = SrsFrameTypeAudio;
|
||||
audio_sample.dts = 1000; // Audio starts at 1000us
|
||||
|
||||
// Create video sample that arrives later
|
||||
SrsMp4Sample video_sample;
|
||||
video_sample.type = SrsFrameTypeVideo;
|
||||
video_sample.dts = 2000; // Video starts at 2000us
|
||||
|
||||
// Process samples
|
||||
jitter.on_sample(&audio_sample);
|
||||
jitter.on_sample(&video_sample);
|
||||
|
||||
// Should be initialized now
|
||||
EXPECT_TRUE(jitter.is_initialized());
|
||||
|
||||
// Video should have delta = video_start - audio_start = 2000 - 1000 = 1000
|
||||
EXPECT_EQ(1000, jitter.get_first_sample_delta(SrsFrameTypeVideo));
|
||||
|
||||
// Audio should have delta = 0 (since audio started first)
|
||||
EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeAudio));
|
||||
}
|
||||
|
||||
// Test video first scenario
|
||||
if (true) {
|
||||
SrsMp4DvrJitter jitter;
|
||||
|
||||
// Create video sample that arrives first
|
||||
SrsMp4Sample video_sample;
|
||||
video_sample.type = SrsFrameTypeVideo;
|
||||
video_sample.dts = 500; // Video starts at 500us
|
||||
|
||||
// Create audio sample that arrives later
|
||||
SrsMp4Sample audio_sample;
|
||||
audio_sample.type = SrsFrameTypeAudio;
|
||||
audio_sample.dts = 1500; // Audio starts at 1500us
|
||||
|
||||
// Process samples
|
||||
jitter.on_sample(&video_sample);
|
||||
jitter.on_sample(&audio_sample);
|
||||
|
||||
// Should be initialized now
|
||||
EXPECT_TRUE(jitter.is_initialized());
|
||||
|
||||
// Audio should have delta = audio_start - video_start = 1500 - 500 = 1000
|
||||
EXPECT_EQ(1000, jitter.get_first_sample_delta(SrsFrameTypeAudio));
|
||||
|
||||
// Video should have delta = 0 (since video started first)
|
||||
EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeVideo));
|
||||
}
|
||||
|
||||
// Test same start time scenario
|
||||
if (true) {
|
||||
SrsMp4DvrJitter jitter;
|
||||
|
||||
// Create samples with same start time
|
||||
SrsMp4Sample audio_sample;
|
||||
audio_sample.type = SrsFrameTypeAudio;
|
||||
audio_sample.dts = 1000;
|
||||
|
||||
SrsMp4Sample video_sample;
|
||||
video_sample.type = SrsFrameTypeVideo;
|
||||
video_sample.dts = 1000;
|
||||
|
||||
// Process samples
|
||||
jitter.on_sample(&audio_sample);
|
||||
jitter.on_sample(&video_sample);
|
||||
|
||||
// Should be initialized now
|
||||
EXPECT_TRUE(jitter.is_initialized());
|
||||
|
||||
// Both should have delta = 0 (same start time)
|
||||
EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeVideo));
|
||||
EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeAudio));
|
||||
}
|
||||
|
||||
// Test reset functionality
|
||||
if (true) {
|
||||
SrsMp4DvrJitter jitter;
|
||||
|
||||
// Initialize with samples
|
||||
SrsMp4Sample audio_sample;
|
||||
audio_sample.type = SrsFrameTypeAudio;
|
||||
audio_sample.dts = 1000;
|
||||
|
||||
jitter.on_sample(&audio_sample);
|
||||
|
||||
// Reset and verify
|
||||
jitter.reset();
|
||||
EXPECT_FALSE(jitter.is_initialized());
|
||||
EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeVideo));
|
||||
EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeAudio));
|
||||
}
|
||||
|
||||
// Test multiple samples of same type (should only record first)
|
||||
if (true) {
|
||||
SrsMp4DvrJitter jitter;
|
||||
|
||||
// Create multiple audio samples
|
||||
SrsMp4Sample audio1;
|
||||
audio1.type = SrsFrameTypeAudio;
|
||||
audio1.dts = 1000;
|
||||
|
||||
SrsMp4Sample audio2;
|
||||
audio2.type = SrsFrameTypeAudio;
|
||||
audio2.dts = 2000; // This should be ignored
|
||||
|
||||
SrsMp4Sample video1;
|
||||
video1.type = SrsFrameTypeVideo;
|
||||
video1.dts = 1500;
|
||||
|
||||
// Process samples
|
||||
jitter.on_sample(&audio1);
|
||||
jitter.on_sample(&audio2); // Should be ignored
|
||||
jitter.on_sample(&video1);
|
||||
|
||||
// Should use first audio sample (1000) not second (2000)
|
||||
EXPECT_EQ(500, jitter.get_first_sample_delta(SrsFrameTypeVideo)); // 1500 - 1000 = 500
|
||||
EXPECT_EQ(0, jitter.get_first_sample_delta(SrsFrameTypeAudio));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user