Compare commits

...

22 Commits

Author SHA1 Message Date
Cédric Verstraeten
e270223968 Merge pull request #238 from kerberos-io/fix/docker-build-release-action
fix/docker-build-release-action
2026-02-13 22:17:33 +01:00
cedricve
01ab1a9218 Disable build provenance in Docker builds
Add --provenance=false to docker build invocations in .github/workflows/release-create.yml (both default and arm64 steps) to suppress Docker provenance metadata during CI builds.
2026-02-13 22:16:23 +01:00
Cédric Verstraeten
6f0794b09c Merge pull request #237 from kerberos-io/feature/fix-quicktime-duration
feature/fix-quicktime-duration
2026-02-13 21:55:41 +01:00
cedricve
1ae6a46d88 Embed build version into binaries
Pass VERSION from CI into Docker builds and embed it into the Go binary via ldflags. Updated .github workflow to supply --build-arg VERSION for both architectures. Added ARG VERSION and logic in Dockerfile and Dockerfile.arm64 to derive the version from git (git describe --tags) or fall back to the provided build-arg, then set it with -X during go build. Changed VERSION in machinery/src/utils/main.go from a const to a var defaulting to "0.0.0" and documented that it is overridden at build time. This ensures released images contain the correct agent version while local/dev builds keep a sensible default.
2026-02-13 21:50:09 +01:00
cedricve
9d83cab5cc Set mdhd.Duration to 0 for fragmented MP4
Uncomment and explicitly set mdhd.Duration = 0 in machinery/src/video/mp4.go for relevant tracks (video H264/H265 and audio track). This ensures mdhd.Duration is zero for fragmented MP4 so players derive duration from fragments (avoiding QuickTime adding fragment durations and doubling the reported duration).
2026-02-13 21:46:32 +01:00
cedricve
6f559c2f00 Align MP4 headers to fragment durations
Compute actual video duration from SegmentDurations and ensure container headers reflect fragment durations. Set mvhd.Duration and mvex/mehd.FragmentDuration to the maximum of video (sum of segments) and audio durations so the overall mvhd matches the longest track. Use the summed segment duration for track tkhd.Duration and keep mdhd.Duration at 0 for fragmented MP4s (to avoid double-counting). Add a warning log when accumulated video duration differs from the recorded VideoTotalDuration. Harden fingerprint generation and private key handling with nil checks.

Add mp4_duration_test.go: unit test that creates a simulated H.264 fragmented MP4 (150 frames at 40ms), closes it, parses the output and verifies that mvhd/mehd and trun sample durations are consistent and that mdhd.Duration is zero.
2026-02-13 21:35:57 +01:00
cedricve
c147944f5a Convert MP4 timestamps to Mac HFS epoch
Add MacEpochOffset constant and convert mp4.StartTime to Mac HFS time for QuickTime compatibility. Compute macTime = mp4.StartTime + MacEpochOffset and use it for mvhd CreationTime/ModificationTime, as well as track tkhd and mdhd creation/modification timestamps for video and audio tracks. Also set mvhd Rate, Volume and NextTrackID. These changes ensure generated MP4s use QuickTime-compatible epoch and include proper mvhd metadata.
2026-02-13 21:01:45 +01:00
Cédric Verstraeten
e8ca776e4e Merge pull request #236 from kerberos-io/fix/debugging-lost-keyframes
fix/debugging-lost-keyframes
2026-02-11 16:51:07 +01:00
Cédric Verstraeten
de5c4b6e0a Merge branch 'master' into fix/debugging-lost-keyframes 2026-02-11 16:48:08 +01:00
Cédric Verstraeten
9ba64de090 add additional logging 2026-02-11 16:48:01 +01:00
Cédric Verstraeten
7ceeebe76e Merge pull request #235 from kerberos-io/fix/debugging-lost-keyframes
fix/debugging-lost-keyframes
2026-02-11 16:15:57 +01:00
Cédric Verstraeten
bd7dbcfcf2 Enhance FPS tracking and logging for keyframes in gortsplib and mp4 modules 2026-02-11 15:11:52 +00:00
Cédric Verstraeten
8c7a46e3ae Merge pull request #234 from kerberos-io/fix/fps-gop-size
fix/fps-gop-size
2026-02-11 15:05:31 +01:00
Cédric Verstraeten
57ccfaabf5 Merge branch 'fix/fps-gop-size' of github.com:kerberos-io/agent into fix/fps-gop-size 2026-02-11 14:59:34 +01:00
Cédric Verstraeten
4a9cb51e95 Update machinery/src/capture/gortsplib.go
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2026-02-11 14:59:15 +01:00
Cédric Verstraeten
ab6f621e76 Merge branch 'fix/fps-gop-size' of github.com:kerberos-io/agent into fix/fps-gop-size 2026-02-11 14:58:44 +01:00
Cédric Verstraeten
c365ae5af2 Ensure thread-safe closure of peer connections in InitializeWebRTCConnection 2026-02-11 13:58:29 +00:00
Cédric Verstraeten
b05c3d1baa Update machinery/src/capture/gortsplib.go
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2026-02-11 14:52:40 +01:00
Cédric Verstraeten
c7c7203fad Merge branch 'master' into fix/fps-gop-size 2026-02-11 14:48:05 +01:00
Cédric Verstraeten
d93f85b4f3 Refactor FPS calculation to use per-stream trackers for improved accuracy 2026-02-11 13:45:07 +00:00
Cédric Verstraeten
031212b98c Merge pull request #232 from kerberos-io/fix/fps-gop-size
fix/fps-gop-size
2026-02-11 14:27:18 +01:00
Cédric Verstraeten
a4837b3cb3 Implement PTS-based FPS calculation and GOP size adjustments 2026-02-11 13:14:29 +00:00
8 changed files with 480 additions and 142 deletions

View File

@@ -34,7 +34,7 @@ jobs:
length: 7
- name: Run Build
run: |
docker build -t ${{matrix.architecture}} .
docker build --provenance=false --build-arg VERSION=${{github.event.inputs.tag || github.ref_name}} -t ${{matrix.architecture}} .
CID=$(docker create ${{matrix.architecture}})
docker cp ${CID}:/home/agent ./output-${{matrix.architecture}}
docker rm ${CID}
@@ -71,7 +71,7 @@ jobs:
length: 7
- name: Run Build
run: |
docker build -t ${{matrix.architecture}} -f Dockerfile.arm64 .
docker build --provenance=false --build-arg VERSION=${{github.event.inputs.tag || github.ref_name}} -t ${{matrix.architecture}} -f Dockerfile.arm64 .
CID=$(docker create ${{matrix.architecture}})
docker cp ${CID}:/home/agent ./output-${{matrix.architecture}}
docker rm ${CID}

View File

@@ -1,5 +1,6 @@
ARG BASE_IMAGE_VERSION=amd64-ddbe40e
ARG VERSION=0.0.0
FROM kerberos/base:${BASE_IMAGE_VERSION} AS build-machinery
LABEL AUTHOR=uug.ai
@@ -34,7 +35,8 @@ RUN cat /go/src/github.com/kerberos-io/agent/machinery/version
RUN cd /go/src/github.com/kerberos-io/agent/machinery && \
go mod download && \
go build -tags timetzdata,netgo,osusergo --ldflags '-s -w -extldflags "-static -latomic"' main.go && \
VERSION=$(cd /go/src/github.com/kerberos-io/agent && git describe --tags --always 2>/dev/null || echo "${VERSION}") && \
go build -tags timetzdata,netgo,osusergo --ldflags "-s -w -X github.com/kerberos-io/agent/machinery/src/utils.VERSION=${VERSION} -extldflags '-static -latomic'" main.go && \
mkdir -p /agent && \
mv main /agent && \
mv version /agent && \

View File

@@ -1,5 +1,6 @@
ARG BASE_IMAGE_VERSION=arm64-ddbe40e
ARG VERSION=0.0.0
FROM kerberos/base:${BASE_IMAGE_VERSION} AS build-machinery
LABEL AUTHOR=uug.ai
@@ -34,7 +35,8 @@ RUN cat /go/src/github.com/kerberos-io/agent/machinery/version
RUN cd /go/src/github.com/kerberos-io/agent/machinery && \
go mod download && \
go build -tags timetzdata,netgo,osusergo --ldflags '-s -w -extldflags "-static -latomic"' main.go && \
VERSION=$(cd /go/src/github.com/kerberos-io/agent && git describe --tags --always 2>/dev/null || echo "${VERSION}") && \
go build -tags timetzdata,netgo,osusergo --ldflags "-s -w -X github.com/kerberos-io/agent/machinery/src/utils.VERSION=${VERSION} -extldflags '-static -latomic'" main.go && \
mkdir -p /agent && \
mv main /agent && \
mv version /agent && \

View File

@@ -85,12 +85,8 @@ type Golibrtsp struct {
Streams []packets.Stream
// FPS calculation fields
lastFrameTime time.Time
frameTimeBuffer []time.Duration
frameBufferSize int
frameBufferIndex int
fpsMutex sync.Mutex
// Per-stream FPS calculation (keyed by stream index)
fpsTrackers map[int8]*fpsTracker
// I-frame interval tracking fields
packetsSinceLastKeyframe int
@@ -101,6 +97,78 @@ type Golibrtsp struct {
keyframeMutex sync.Mutex
}
// fpsTracker holds per-stream state for PTS-based FPS calculation.
// Each video stream (H264 / H265) gets its own tracker so PTS
// samples from different codecs never interleave.
type fpsTracker struct {
mu sync.Mutex
lastPTS time.Duration
hasPTS bool
frameTimeBuffer []time.Duration
bufferSize int
bufferIndex int
cachedFPS float64 // latest computed FPS
}
func newFPSTracker(bufferSize int) *fpsTracker {
return &fpsTracker{
frameTimeBuffer: make([]time.Duration, bufferSize),
bufferSize: bufferSize,
}
}
// update records a new PTS sample and returns the latest FPS estimate.
// It must be called once per complete decoded frame (after Decode()
// succeeds), not on every RTP packet fragment.
func (ft *fpsTracker) update(pts time.Duration) float64 {
ft.mu.Lock()
defer ft.mu.Unlock()
if !ft.hasPTS {
ft.lastPTS = pts
ft.hasPTS = true
return 0
}
interval := pts - ft.lastPTS
ft.lastPTS = pts
// Skip invalid intervals (zero, negative, or very large which
// indicate a PTS discontinuity or wrap).
if interval <= 0 || interval > 5*time.Second {
return ft.cachedFPS
}
ft.frameTimeBuffer[ft.bufferIndex] = interval
ft.bufferIndex = (ft.bufferIndex + 1) % ft.bufferSize
var totalInterval time.Duration
validSamples := 0
for _, iv := range ft.frameTimeBuffer {
if iv > 0 {
totalInterval += iv
validSamples++
}
}
if validSamples == 0 {
return ft.cachedFPS
}
avgInterval := totalInterval / time.Duration(validSamples)
if avgInterval == 0 {
return ft.cachedFPS
}
ft.cachedFPS = float64(time.Second) / float64(avgInterval)
return ft.cachedFPS
}
// fps returns the most recent FPS estimate without recording a new sample.
func (ft *fpsTracker) fps() float64 {
ft.mu.Lock()
defer ft.mu.Unlock()
return ft.cachedFPS
}
// Init function
var H264FrameDecoder *Decoder
var H265FrameDecoder *Decoder
@@ -548,18 +616,17 @@ func (g *Golibrtsp) Start(ctx context.Context, streamType string, queue *packets
if len(rtppkt.Payload) > 0 {
// decode timestamp
pts, ok := g.Client.PacketPTS(g.VideoH264Media, rtppkt)
pts2, ok := g.Client.PacketPTS2(g.VideoH264Media, rtppkt)
if !ok {
log.Log.Debug("capture.golibrtsp.Start(): " + "unable to get PTS")
// decode timestamps — validate each call separately
pts, okPTS := g.Client.PacketPTS(g.VideoH264Media, rtppkt)
pts2, okPTS2 := g.Client.PacketPTS2(g.VideoH264Media, rtppkt)
if !okPTS2 {
log.Log.Debug("capture.golibrtsp.Start(): unable to get PTS2 from PacketPTS2")
return
}
// Extract access units from RTP packets
// We need to do this, because the decoder expects a full
// access unit. Once we have a full access unit, we can
// decode it, and know if it's a keyframe or not.
// Extract access units from RTP packets.
// We need a complete access unit to determine whether
// this is a keyframe.
au, errDecode := g.VideoH264Decoder.Decode(rtppkt)
if errDecode != nil {
if errDecode != rtph264.ErrNonStartingPacketAndNoPrevious && errDecode != rtph264.ErrMorePacketsNeeded {
@@ -568,6 +635,18 @@ func (g *Golibrtsp) Start(ctx context.Context, streamType string, queue *packets
return
}
// Frame is complete — update per-stream FPS from PTS.
if okPTS {
ft := g.fpsTrackers[g.VideoH264Index]
if ft == nil {
ft = newFPSTracker(30)
g.fpsTrackers[g.VideoH264Index] = ft
}
if ptsFPS := ft.update(pts); ptsFPS > 0 && ptsFPS <= 120 {
g.Streams[g.VideoH264Index].FPS = ptsFPS
}
}
// We'll need to read out a few things.
// prepend an AUD. This is required by some players
filteredAU = [][]byte{
@@ -578,8 +657,10 @@ func (g *Golibrtsp) Start(ctx context.Context, streamType string, queue *packets
nonIDRPresent := false
idrPresent := false
var naluTypes []string
for _, nalu := range au {
typ := h264.NALUType(nalu[0] & 0x1F)
naluTypes = append(naluTypes, fmt.Sprintf("%s(%d,sz=%d)", typ.String(), int(typ), len(nalu)))
switch typ {
case h264.NALUTypeAccessUnitDelimiter:
continue
@@ -626,6 +707,11 @@ func (g *Golibrtsp) Start(ctx context.Context, streamType string, queue *packets
return
}
if idrPresent {
log.Log.Debug(fmt.Sprintf("capture.golibrtsp.Start(%s): IDR frame NALUs: [%s]",
streamType, fmt.Sprintf("%v", naluTypes)))
}
// Convert to packet.
enc, err := h264.AnnexBMarshal(filteredAU)
if err != nil {
@@ -651,7 +737,11 @@ func (g *Golibrtsp) Start(ctx context.Context, streamType string, queue *packets
keyframeInterval := g.trackKeyframeInterval(idrPresent)
if idrPresent && keyframeInterval > 0 {
avgInterval := g.getAverageKeyframeInterval()
gopDuration := float64(keyframeInterval) / g.Streams[g.VideoH265Index].FPS
fps := g.Streams[g.VideoH264Index].FPS
if fps <= 0 {
fps = 25.0 // Default fallback FPS
}
gopDuration := float64(keyframeInterval) / fps
gopSize := int(avgInterval) // Store GOP size in a separate variable
g.Streams[g.VideoH264Index].GopSize = gopSize
log.Log.Debug(fmt.Sprintf("capture.golibrtsp.Start(%s): Keyframe interval=%d packets, Avg=%.1f, GOP=%.1fs, GOPSize=%d",
@@ -716,18 +806,17 @@ func (g *Golibrtsp) Start(ctx context.Context, streamType string, queue *packets
if len(rtppkt.Payload) > 0 {
// decode timestamp
pts, ok := g.Client.PacketPTS(g.VideoH265Media, rtppkt)
pts2, ok := g.Client.PacketPTS2(g.VideoH265Media, rtppkt)
if !ok {
log.Log.Debug("capture.golibrtsp.Start(): " + "unable to get PTS")
// decode timestamps — validate each call separately
pts, okPTS := g.Client.PacketPTS(g.VideoH265Media, rtppkt)
pts2, okPTS2 := g.Client.PacketPTS2(g.VideoH265Media, rtppkt)
if !okPTS2 {
log.Log.Debug("capture.golibrtsp.Start(): unable to get PTS")
return
}
// Extract access units from RTP packets
// We need to do this, because the decoder expects a full
// access unit. Once we have a full access unit, we can
// decode it, and know if it's a keyframe or not.
// Extract access units from RTP packets.
// We need a complete access unit to determine whether
// this is a keyframe.
au, errDecode := g.VideoH265Decoder.Decode(rtppkt)
if errDecode != nil {
if errDecode != rtph265.ErrNonStartingPacketAndNoPrevious && errDecode != rtph265.ErrMorePacketsNeeded {
@@ -736,6 +825,18 @@ func (g *Golibrtsp) Start(ctx context.Context, streamType string, queue *packets
return
}
// Frame is complete — update per-stream FPS from PTS.
if okPTS {
ft := g.fpsTrackers[g.VideoH265Index]
if ft == nil {
ft = newFPSTracker(30)
g.fpsTrackers[g.VideoH265Index] = ft
}
if ptsFPS := ft.update(pts); ptsFPS > 0 && ptsFPS <= 120 {
g.Streams[g.VideoH265Index].FPS = ptsFPS
}
}
filteredAU = [][]byte{
{byte(h265.NALUType_AUD_NUT) << 1, 1, 0x50},
}
@@ -796,7 +897,11 @@ func (g *Golibrtsp) Start(ctx context.Context, streamType string, queue *packets
keyframeInterval := g.trackKeyframeInterval(isRandomAccess)
if isRandomAccess && keyframeInterval > 0 {
avgInterval := g.getAverageKeyframeInterval()
gopDuration := float64(keyframeInterval) / g.Streams[g.VideoH265Index].FPS
fps := g.Streams[g.VideoH265Index].FPS
if fps <= 0 {
fps = 25.0 // Default fallback FPS
}
gopDuration := float64(keyframeInterval) / fps
gopSize := int(avgInterval) // Store GOP size in a separate variable
g.Streams[g.VideoH265Index].GopSize = gopSize
log.Log.Debug(fmt.Sprintf("capture.golibrtsp.Start(%s): Keyframe interval=%d packets, Avg=%.1f, GOP=%.1fs, GOPSize=%d",
@@ -1179,10 +1284,11 @@ func WriteMPEG4Audio(forma *format.MPEG4Audio, aus [][]byte) ([]byte, error) {
// Initialize FPS calculation buffers
func (g *Golibrtsp) initFPSCalculation() {
g.frameBufferSize = 30 // Store last 30 frame intervals
g.frameTimeBuffer = make([]time.Duration, g.frameBufferSize)
g.frameBufferIndex = 0
g.lastFrameTime = time.Time{}
// Ensure the per-stream FPS trackers map exists. Individual trackers
// can be created lazily when a given stream index is first used.
if g.fpsTrackers == nil {
g.fpsTrackers = make(map[int8]*fpsTracker)
}
// Initialize I-frame interval tracking
g.keyframeBufferSize = 10 // Store last 10 keyframe intervals
@@ -1192,50 +1298,11 @@ func (g *Golibrtsp) initFPSCalculation() {
g.lastKeyframePacketCount = 0
}
// Calculate FPS from frame timestamps
func (g *Golibrtsp) calculateFPSFromTimestamps() float64 {
g.fpsMutex.Lock()
defer g.fpsMutex.Unlock()
if g.lastFrameTime.IsZero() {
g.lastFrameTime = time.Now()
return 0
}
now := time.Now()
interval := now.Sub(g.lastFrameTime)
g.lastFrameTime = now
// Store the interval
g.frameTimeBuffer[g.frameBufferIndex] = interval
g.frameBufferIndex = (g.frameBufferIndex + 1) % g.frameBufferSize
// Calculate average FPS from stored intervals
var totalInterval time.Duration
validSamples := 0
for _, interval := range g.frameTimeBuffer {
if interval > 0 {
totalInterval += interval
validSamples++
}
}
if validSamples == 0 {
return 0
}
avgInterval := totalInterval / time.Duration(validSamples)
if avgInterval == 0 {
return 0
}
return float64(time.Second) / float64(avgInterval)
}
// Get enhanced FPS information from SPS with fallback
// Get enhanced FPS information from SPS with fallback to PTS-based calculation.
// The PTS-based FPS is computed per completed frame via fpsTracker.update(),
// so by the time this is called we already have a good estimate.
func (g *Golibrtsp) getEnhancedFPS(sps *h264.SPS, streamIndex int8) float64 {
// First try to get FPS from SPS
// First try to get FPS from SPS VUI parameters
spsFPS := sps.FPS()
// Check if SPS FPS is reasonable (between 1 and 120 fps)
@@ -1244,11 +1311,13 @@ func (g *Golibrtsp) getEnhancedFPS(sps *h264.SPS, streamIndex int8) float64 {
return spsFPS
}
// Fallback to timestamp-based calculation
timestampFPS := g.calculateFPSFromTimestamps()
if timestampFPS > 0 && timestampFPS <= 120 {
log.Log.Debug(fmt.Sprintf("capture.golibrtsp.getEnhancedFPS(): Timestamp FPS: %.2f", timestampFPS))
return timestampFPS
// Fallback to PTS-based FPS (already calculated per-frame)
if ft := g.fpsTrackers[streamIndex]; ft != nil {
ptsFPS := ft.fps()
if ptsFPS > 0 && ptsFPS <= 120 {
log.Log.Debug(fmt.Sprintf("capture.golibrtsp.getEnhancedFPS(): PTS FPS: %.2f", ptsFPS))
return ptsFPS
}
}
// Return SPS FPS even if it seems unreasonable, or default

View File

@@ -25,7 +25,10 @@ import (
"github.com/nfnt/resize"
)
const VERSION = "3.5.0"
// VERSION is the agent version. It defaults to "0.0.0" for local dev builds
// and is overridden at build time via:
// go build -ldflags "-X github.com/kerberos-io/agent/machinery/src/utils.VERSION=v1.2.3"
var VERSION = "0.0.0"
const letterBytes = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"

View File

@@ -22,6 +22,10 @@ import (
var LastPTS uint64 = 0 // Last PTS for the current segment
// MacEpochOffset is the number of seconds between Mac HFS epoch (1904-01-01)
// and Unix epoch (1970-01-01). QuickTime requires timestamps in Mac HFS format.
const MacEpochOffset uint64 = 2082844800
// FragmentDurationMs is the target duration for each fragment in milliseconds.
// Fragments will be flushed at the first keyframe after this duration has elapsed,
// resulting in ~3 second fragments (assuming a typical GOP interval).
@@ -29,42 +33,46 @@ const FragmentDurationMs = 3000
type MP4 struct {
// FileName is the name of the file
FileName string
width int
height int
Segments []*mp4ff.MediaSegment // List of media segments
Segment *mp4ff.MediaSegment
MultiTrackFragment *mp4ff.Fragment
TrackIDs []uint32
FileWriter *os.File
Writer *bufio.Writer
SegmentCount int
SampleCount int
StartPTS uint64
VideoTotalDuration uint64
AudioTotalDuration uint64
AudioPTS uint64
Start bool
SPSNALUs [][]byte // SPS NALUs for H264
PPSNALUs [][]byte // PPS NALUs for H264
VPSNALUs [][]byte // VPS NALUs for H264
FreeBoxSize int64
FragmentStartRawPTS uint64 // Raw PTS for timing when to flush fragments
FragmentStartDTS uint64 // Accumulated VideoTotalDuration at fragment start (matches tfdt)
MoofBoxes int64 // Number of moof boxes in the file
MoofBoxSizes []int64 // Sizes of each moof box
SegmentDurations []uint64 // Duration of each segment in timescale units
SegmentBaseDecTimes []uint64 // Base decode time of each segment
StartTime uint64 // Start time of the MP4 file
VideoTrackName string // Name of the video track
VideoTrack int // Track ID for the video track
AudioTrackName string // Name of the audio track
AudioTrack int // Track ID for the audio track
VideoFullSample *mp4ff.FullSample // Full sample for video track
AudioFullSample *mp4ff.FullSample // Full sample for audio track
LastAudioSampleDTS uint64 // Last PTS for audio sample
LastVideoSampleDTS uint64 // Last PTS for video sample
SampleType string // Type of the sample (e.g., "video", "audio", "subtitle")
FileName string
width int
height int
Segments []*mp4ff.MediaSegment // List of media segments
Segment *mp4ff.MediaSegment
MultiTrackFragment *mp4ff.Fragment
TrackIDs []uint32
FileWriter *os.File
Writer *bufio.Writer
SegmentCount int
SampleCount int
StartPTS uint64
VideoTotalDuration uint64
AudioTotalDuration uint64
AudioPTS uint64
Start bool
SPSNALUs [][]byte // SPS NALUs for H264
PPSNALUs [][]byte // PPS NALUs for H264
VPSNALUs [][]byte // VPS NALUs for H264
FreeBoxSize int64
FragmentStartRawPTS uint64 // Raw PTS for timing when to flush fragments
FragmentStartDTS uint64 // Accumulated VideoTotalDuration at fragment start (matches tfdt)
MoofBoxes int64 // Number of moof boxes in the file
MoofBoxSizes []int64 // Sizes of each moof box
SegmentDurations []uint64 // Duration of each segment in timescale units
SegmentBaseDecTimes []uint64 // Base decode time of each segment
StartTime uint64 // Start time of the MP4 file
VideoTrackName string // Name of the video track
VideoTrack int // Track ID for the video track
AudioTrackName string // Name of the audio track
AudioTrack int // Track ID for the audio track
VideoFullSample *mp4ff.FullSample // Full sample for video track
AudioFullSample *mp4ff.FullSample // Full sample for audio track
LastAudioSampleDTS uint64 // Last PTS for audio sample
LastVideoSampleDTS uint64 // Last PTS for video sample
SampleType string // Type of the sample (e.g., "video", "audio", "subtitle")
TotalKeyframesReceived int // Total keyframes received by AddSampleToTrack
TotalKeyframesWritten int // Total keyframes written to trun boxes
FragmentKeyframeCount int // Keyframes in the current fragment
PendingSampleIsKeyframe bool // Whether the pending video sample is a keyframe
}
// NewMP4 creates a new MP4 object.
@@ -178,17 +186,35 @@ func (mp4 *MP4) flushPendingVideoSample(nextPTS uint64) bool {
mp4.VideoFullSample.DecodeTime = mp4.VideoTotalDuration - duration
mp4.VideoFullSample.Sample.Dur = uint32(duration)
isKF := mp4.PendingSampleIsKeyframe
err := mp4.MultiTrackFragment.AddFullSampleToTrack(*mp4.VideoFullSample, uint32(mp4.VideoTrack))
if err != nil {
log.Log.Error("mp4.flushPendingVideoSample(): error adding sample: " + err.Error())
}
if isKF {
mp4.TotalKeyframesWritten++
mp4.FragmentKeyframeCount++
log.Log.Debug(fmt.Sprintf("mp4.flushPendingVideoSample(): KEYFRAME WRITTEN to trun - totalWritten=%d, fragmentKF=%d, flags=0x%08x, dur=%d, DTS=%d",
mp4.TotalKeyframesWritten, mp4.FragmentKeyframeCount, mp4.VideoFullSample.Sample.Flags, duration, mp4.VideoFullSample.DecodeTime))
}
mp4.VideoFullSample = nil
mp4.PendingSampleIsKeyframe = false
return true
}
func (mp4 *MP4) AddSampleToTrack(trackID uint32, isKeyframe bool, data []byte, pts uint64) error {
if isKeyframe && trackID == uint32(mp4.VideoTrack) {
mp4.TotalKeyframesReceived++
elapsedDbg := uint64(0)
if mp4.Start {
elapsedDbg = pts - mp4.FragmentStartRawPTS
}
log.Log.Debug(fmt.Sprintf("mp4.AddSampleToTrack(): KEYFRAME #%d received - PTS=%d, size=%d, elapsed=%dms, started=%t, segment=%d, fragKF=%d",
mp4.TotalKeyframesReceived, pts, len(data), elapsedDbg, mp4.Start, mp4.SegmentCount, mp4.FragmentKeyframeCount))
}
if isKeyframe {
// Determine whether to start a new fragment.
@@ -210,6 +236,8 @@ func (mp4 *MP4) AddSampleToTrack(trackID uint32, isKeyframe bool, data []byte, p
mp4.flushPendingVideoSample(pts)
}
log.Log.Debug(fmt.Sprintf("mp4.AddSampleToTrack(): FLUSHING segment #%d - keyframes_in_fragment=%d, totalKF_received=%d, totalKF_written=%d",
mp4.SegmentCount, mp4.FragmentKeyframeCount, mp4.TotalKeyframesReceived, mp4.TotalKeyframesWritten))
mp4.MoofBoxes = mp4.MoofBoxes + 1
mp4.MoofBoxSizes = append(mp4.MoofBoxSizes, int64(mp4.Segment.Size()))
// Track the segment's duration and base decode time for sidx.
@@ -248,6 +276,7 @@ func (mp4 *MP4) AddSampleToTrack(trackID uint32, isKeyframe bool, data []byte, p
mp4.StartPTS = pts
mp4.FragmentStartRawPTS = pts
mp4.FragmentStartDTS = mp4.VideoTotalDuration
mp4.FragmentKeyframeCount = 0 // Reset keyframe counter for new fragment
}
}
@@ -285,6 +314,7 @@ func (mp4 *MP4) AddSampleToTrack(trackID uint32, isKeyframe bool, data []byte, p
CompositionTimeOffset: 0, // No composition time offset for video
}
mp4.VideoFullSample = &fullSample
mp4.PendingSampleIsKeyframe = isKeyframe
mp4.SampleType = "video"
}
} else if trackID == uint32(mp4.AudioTrack) {
@@ -334,6 +364,9 @@ func (mp4 *MP4) AddSampleToTrack(trackID uint32, isKeyframe bool, data []byte, p
func (mp4 *MP4) Close(config *models.Config) {
log.Log.Info(fmt.Sprintf("mp4.Close(): KEYFRAME SUMMARY - totalReceived=%d, totalWritten=%d, segments=%d, lastFragmentKF=%d",
mp4.TotalKeyframesReceived, mp4.TotalKeyframesWritten, mp4.SegmentCount, mp4.FragmentKeyframeCount))
if mp4.VideoTotalDuration == 0 && mp4.AudioTotalDuration == 0 {
log.Log.Error("mp4.Close(): no video or audio samples added, cannot create MP4 file")
}
@@ -407,22 +440,50 @@ func (mp4 *MP4) Close(config *models.Config) {
moov := mp4ff.NewMoovBox()
init.AddChild(moov)
// Set the creation time and modification time for the moov box
// Compute the actual video duration by summing segment durations.
// This must exactly match the sum of sample durations in the trun boxes
// that were written to the file, ensuring QuickTime (which strictly trusts
// header durations) displays the correct value.
var actualVideoDuration uint64
for _, d := range mp4.SegmentDurations {
actualVideoDuration += d
}
if actualVideoDuration != mp4.VideoTotalDuration {
log.Log.Warning(fmt.Sprintf("mp4.Close(): duration mismatch: accumulated VideoTotalDuration=%d, sum of segment durations=%d (diff=%d ms)",
mp4.VideoTotalDuration, actualVideoDuration, int64(mp4.VideoTotalDuration)-int64(actualVideoDuration)))
}
// Set the creation time and modification time for the moov box.
// QuickTime requires timestamps in Mac HFS format (seconds since 1904-01-01),
// so we convert from Unix epoch by adding MacEpochOffset.
videoTimescale := uint32(1000)
audioTimescale := uint32(1000)
macTime := mp4.StartTime + MacEpochOffset
nextTrackID := uint32(len(mp4.TrackIDs) + 1)
// mvhd.Duration must be the duration of the longest track.
// Start with video; if audio is longer, we update below.
movDuration := actualVideoDuration
if mp4.AudioTotalDuration > movDuration {
movDuration = mp4.AudioTotalDuration
}
mvhd := &mp4ff.MvhdBox{
Version: 0,
Flags: 0,
CreationTime: mp4.StartTime,
ModificationTime: mp4.StartTime,
CreationTime: macTime,
ModificationTime: macTime,
Timescale: videoTimescale,
Duration: mp4.VideoTotalDuration,
Duration: movDuration,
Rate: 0x00010000, // 1.0 playback speed (16.16 fixed point)
Volume: 0x0100, // 1.0 full volume (8.8 fixed point)
NextTrackID: nextTrackID,
}
init.Moov.AddChild(mvhd)
// Set the total duration in the moov box
mvex := mp4ff.NewMvexBox()
mvex.AddChild(&mp4ff.MehdBox{FragmentDuration: int64(mp4.VideoTotalDuration)})
mvex.AddChild(&mp4ff.MehdBox{FragmentDuration: int64(movDuration)})
init.Moov.AddChild(mvex)
// Add a track for the video
@@ -433,22 +494,34 @@ func (mp4 *MP4) Close(config *models.Config) {
err := init.Moov.Traks[0].SetAVCDescriptor("avc1", mp4.SPSNALUs, mp4.PPSNALUs, includePS)
if err != nil {
}
init.Moov.Traks[0].Tkhd.Duration = mp4.VideoTotalDuration
init.Moov.Traks[0].Tkhd.Duration = actualVideoDuration
init.Moov.Traks[0].Tkhd.Width = mp4ff.Fixed32(uint32(mp4.width) << 16)
init.Moov.Traks[0].Tkhd.Height = mp4ff.Fixed32(uint32(mp4.height) << 16)
init.Moov.Traks[0].Tkhd.CreationTime = macTime
init.Moov.Traks[0].Tkhd.ModificationTime = macTime
init.Moov.Traks[0].Mdia.Hdlr.Name = "agent " + utils.VERSION
init.Moov.Traks[0].Mdia.Mdhd.Duration = mp4.VideoTotalDuration
// mdhd.Duration MUST be 0 for fragmented MP4. QuickTime adds mdhd.Duration
// to the fragment durations (mehd/sidx), so setting it non-zero doubles the
// reported duration. Leave it at 0 so the player derives duration from fragments.
init.Moov.Traks[0].Mdia.Mdhd.Duration = 0
init.Moov.Traks[0].Mdia.Mdhd.CreationTime = macTime
init.Moov.Traks[0].Mdia.Mdhd.ModificationTime = macTime
case "H265", "HVC1":
init.AddEmptyTrack(videoTimescale, "video", "und")
includePS := true
err := init.Moov.Traks[0].SetHEVCDescriptor("hvc1", mp4.VPSNALUs, mp4.SPSNALUs, mp4.PPSNALUs, [][]byte{}, includePS)
if err != nil {
}
init.Moov.Traks[0].Tkhd.Duration = mp4.VideoTotalDuration
init.Moov.Traks[0].Tkhd.Duration = actualVideoDuration
init.Moov.Traks[0].Tkhd.Width = mp4ff.Fixed32(uint32(mp4.width) << 16)
init.Moov.Traks[0].Tkhd.Height = mp4ff.Fixed32(uint32(mp4.height) << 16)
init.Moov.Traks[0].Tkhd.CreationTime = macTime
init.Moov.Traks[0].Tkhd.ModificationTime = macTime
init.Moov.Traks[0].Mdia.Hdlr.Name = "agent " + utils.VERSION
init.Moov.Traks[0].Mdia.Mdhd.Duration = mp4.VideoTotalDuration
// mdhd.Duration MUST be 0 for fragmented MP4 (see H264 case above).
init.Moov.Traks[0].Mdia.Mdhd.Duration = 0
init.Moov.Traks[0].Mdia.Mdhd.CreationTime = macTime
init.Moov.Traks[0].Mdia.Mdhd.ModificationTime = macTime
}
// Try adding audio track if available
@@ -466,8 +539,13 @@ func (mp4 *MP4) Close(config *models.Config) {
if err != nil {
}
init.Moov.Traks[1].Tkhd.Duration = mp4.AudioTotalDuration
init.Moov.Traks[1].Tkhd.CreationTime = macTime
init.Moov.Traks[1].Tkhd.ModificationTime = macTime
init.Moov.Traks[1].Mdia.Hdlr.Name = "agent " + utils.VERSION
init.Moov.Traks[1].Mdia.Mdhd.Duration = mp4.AudioTotalDuration
// mdhd.Duration MUST be 0 for fragmented MP4 (see video track comment).
init.Moov.Traks[1].Mdia.Mdhd.Duration = 0
init.Moov.Traks[1].Mdia.Mdhd.CreationTime = macTime
init.Moov.Traks[1].Mdia.Mdhd.ModificationTime = macTime
}
// Try adding subtitle track if available
@@ -498,9 +576,11 @@ func (mp4 *MP4) Close(config *models.Config) {
// and encrypted with the public key.
fingerprint := fmt.Sprintf("%d", init.Moov.Mvhd.CreationTime) + "_" +
fmt.Sprintf("%d", init.Moov.Mvhd.Duration) + "_" +
init.Moov.Trak.Mdia.Hdlr.Name + "_" +
fmt.Sprintf("%d", mp4.MoofBoxes) + "_" // Number of moof boxes
fmt.Sprintf("%d", init.Moov.Mvhd.Duration) + "_"
if init.Moov.Trak != nil {
fingerprint += init.Moov.Trak.Mdia.Hdlr.Name + "_"
}
fingerprint += fmt.Sprintf("%d", mp4.MoofBoxes) + "_" // Number of moof boxes
for i, size := range mp4.MoofBoxSizes {
fingerprint += fmt.Sprintf("%d", size)
@@ -514,7 +594,10 @@ func (mp4 *MP4) Close(config *models.Config) {
}
// Load the private key from the configuration
privateKey := config.Signing.PrivateKey
var privateKey string
if config.Signing != nil {
privateKey = config.Signing.PrivateKey
}
r := strings.NewReader(privateKey)
pemBytes, _ := ioutil.ReadAll(r)
block, _ := pem.Decode(pemBytes)

View File

@@ -0,0 +1,176 @@
package video
import (
"fmt"
"os"
"testing"
mp4ff "github.com/Eyevinn/mp4ff/mp4"
"github.com/kerberos-io/agent/machinery/src/models"
)
// TestMP4Duration creates an MP4 file simulating a 5-second video recording
// and verifies that the durations in all boxes match the sum of sample durations.
func TestMP4Duration(t *testing.T) {
tmpFile := "/tmp/test_duration.mp4"
defer os.Remove(tmpFile)
// Minimal SPS for H.264 (baseline, 640x480) - proper Annex B format with start code
sps := []byte{0x67, 0x42, 0xc0, 0x1e, 0xd9, 0x00, 0xa0, 0x47, 0xfe, 0xc8}
pps := []byte{0x68, 0xce, 0x38, 0x80}
mp4Video := NewMP4(tmpFile, [][]byte{sps}, [][]byte{pps}, nil, 10)
mp4Video.SetWidth(640)
mp4Video.SetHeight(480)
videoTrack := mp4Video.AddVideoTrack("H264")
// Simulate 5 seconds at 25fps (200 frames, keyframe every 50 frames = 2s)
// PTS in milliseconds (timescale=1000)
frameDuration := uint64(40) // 40ms per frame = 25fps
numFrames := 150
gopSize := 50
// Create a fake Annex B NAL unit (keyframe IDR = type 5, non-keyframe = type 1)
makeFrame := func(isKey bool) []byte {
nalType := byte(0x01) // non-IDR slice
if isKey {
nalType = 0x65 // IDR slice
}
// Start code (4 bytes) + NAL header + some data
frame := []byte{0x00, 0x00, 0x00, 0x01, nalType}
// Add some padding data
for i := 0; i < 100; i++ {
frame = append(frame, byte(i))
}
return frame
}
var expectedDuration uint64
for i := 0; i < numFrames; i++ {
pts := uint64(i) * frameDuration
isKeyframe := i%gopSize == 0
err := mp4Video.AddSampleToTrack(videoTrack, isKeyframe, makeFrame(isKeyframe), pts)
if err != nil {
t.Fatalf("AddSampleToTrack failed at frame %d: %v", i, err)
}
}
expectedDuration = uint64(numFrames) * frameDuration // Should be 6000ms (150 * 40)
// Close with config that has signing key to avoid nil panics
config := &models.Config{
Signing: &models.Signing{
PrivateKey: "",
},
}
mp4Video.Close(config)
// Log what the code computed
t.Logf("VideoTotalDuration: %d ms", mp4Video.VideoTotalDuration)
t.Logf("Expected duration: %d ms", expectedDuration)
t.Logf("Segments: %d", len(mp4Video.SegmentDurations))
var sumSegDur uint64
for i, d := range mp4Video.SegmentDurations {
t.Logf(" Segment %d: duration=%d ms", i, d)
sumSegDur += d
}
t.Logf("Sum of segment durations: %d ms", sumSegDur)
// Now read back the file and inspect the boxes
f, err := os.Open(tmpFile)
if err != nil {
t.Fatalf("Failed to open output file: %v", err)
}
defer f.Close()
fi, err := f.Stat()
if err != nil {
t.Fatalf("Failed to stat output file: %v", err)
}
parsedFile, err := mp4ff.DecodeFile(f)
if err != nil {
t.Fatalf("Failed to decode MP4: %v", err)
}
t.Logf("File size: %d bytes", fi.Size())
// Check moov box
if parsedFile.Moov == nil {
t.Fatal("No moov box found")
}
// Check mvhd duration
mvhd := parsedFile.Moov.Mvhd
t.Logf("mvhd.Duration: %d (timescale=%d) = %.2f seconds", mvhd.Duration, mvhd.Timescale, float64(mvhd.Duration)/float64(mvhd.Timescale))
t.Logf("mvhd.Rate: 0x%08x", mvhd.Rate)
t.Logf("mvhd.Volume: 0x%04x", mvhd.Volume)
// Check each trak
for i, trak := range parsedFile.Moov.Traks {
t.Logf("Track %d:", i)
t.Logf(" tkhd.Duration: %d", trak.Tkhd.Duration)
t.Logf(" mdhd.Duration: %d (timescale=%d) = %.2f seconds", trak.Mdia.Mdhd.Duration, trak.Mdia.Mdhd.Timescale, float64(trak.Mdia.Mdhd.Duration)/float64(trak.Mdia.Mdhd.Timescale))
}
// Check mvex/mehd
if parsedFile.Moov.Mvex != nil && parsedFile.Moov.Mvex.Mehd != nil {
t.Logf("mehd.FragmentDuration: %d", parsedFile.Moov.Mvex.Mehd.FragmentDuration)
}
// Sum up actual sample durations from trun boxes in all segments
var actualTrunDuration uint64
var sampleCount int
for _, seg := range parsedFile.Segments {
for _, frag := range seg.Fragments {
for _, traf := range frag.Moof.Trafs {
// Only count video track (track 1)
if traf.Tfhd.TrackID == 1 {
for _, trun := range traf.Truns {
for _, s := range trun.Samples {
actualTrunDuration += uint64(s.Dur)
sampleCount++
}
}
}
}
}
}
t.Logf("Actual trun sample count: %d", sampleCount)
t.Logf("Actual trun total duration: %d ms", actualTrunDuration)
// Check sidx
if parsedFile.Sidx != nil {
var sidxDuration uint64
for _, ref := range parsedFile.Sidx.SidxRefs {
sidxDuration += uint64(ref.SubSegmentDuration)
}
t.Logf("sidx total duration: %d ms", sidxDuration)
}
// VERIFY: All duration values should be consistent
// The expected duration for 150 frames at 40ms each:
// - The sample-buffering pattern means the LAST sample uses LastVideoSampleDTS as duration
// - So all 150 samples should produce 150 * 40ms = 6000ms total
// But due to the pending sample pattern, the actual trun durations might differ
fmt.Println()
fmt.Println("=== DURATION CONSISTENCY CHECK ===")
fmt.Printf("Expected (150 * 40ms): %d ms\n", expectedDuration)
fmt.Printf("mvhd.Duration: %d ms\n", mvhd.Duration)
fmt.Printf("tkhd.Duration: %d ms\n", parsedFile.Moov.Traks[0].Tkhd.Duration)
fmt.Printf("mdhd.Duration: %d ms\n", parsedFile.Moov.Traks[0].Mdia.Mdhd.Duration)
fmt.Printf("Actual trun durations sum: %d ms\n", actualTrunDuration)
fmt.Printf("VideoTotalDuration: %d ms\n", mp4Video.VideoTotalDuration)
fmt.Printf("Sum of SegmentDurations: %d ms\n", sumSegDur)
fmt.Println()
// The key assertion: header duration must equal trun sum
if mvhd.Duration != actualTrunDuration {
t.Errorf("MISMATCH: mvhd.Duration (%d) != actual trun sum (%d), diff = %d ms",
mvhd.Duration, actualTrunDuration, int64(mvhd.Duration)-int64(actualTrunDuration))
}
if parsedFile.Moov.Traks[0].Mdia.Mdhd.Duration != 0 {
t.Errorf("MISMATCH: mdhd.Duration should be 0 for fragmented MP4, got %d",
parsedFile.Moov.Traks[0].Mdia.Mdhd.Duration)
}
}

View File

@@ -49,6 +49,7 @@ type peerConnectionWrapper struct {
conn *pionWebRTC.PeerConnection
cancelCtx context.CancelFunc
done chan struct{}
closeOnce sync.Once
}
var globalConnectionManager = NewConnectionManager()
@@ -339,18 +340,20 @@ func InitializeWebRTCConnection(configuration *models.Configuration, communicati
switch connectionState {
case pionWebRTC.PeerConnectionStateDisconnected, pionWebRTC.PeerConnectionStateClosed:
count := globalConnectionManager.DecrementPeerCount()
log.Log.Info("webrtc.main.InitializeWebRTCConnection(): Peer disconnected. Active peers: " + string(rune(count)))
wrapper.closeOnce.Do(func() {
count := globalConnectionManager.DecrementPeerCount()
log.Log.Info("webrtc.main.InitializeWebRTCConnection(): Peer disconnected. Active peers: " + string(rune(count)))
// Clean up resources
globalConnectionManager.CloseCandidateChannel(sessionKey)
// Clean up resources
globalConnectionManager.CloseCandidateChannel(sessionKey)
if err := peerConnection.Close(); err != nil {
log.Log.Error("webrtc.main.InitializeWebRTCConnection(): error closing peer connection: " + err.Error())
}
if err := peerConnection.Close(); err != nil {
log.Log.Error("webrtc.main.InitializeWebRTCConnection(): error closing peer connection: " + err.Error())
}
globalConnectionManager.RemovePeerConnection(handshake.SessionID)
close(wrapper.done)
globalConnectionManager.RemovePeerConnection(handshake.SessionID)
close(wrapper.done)
})
case pionWebRTC.PeerConnectionStateConnected:
count := globalConnectionManager.IncrementPeerCount()