Compare commits

...

8 Commits

Author SHA1 Message Date
Cédric Verstraeten
2c02e0aeb1 Merge pull request #250 from kerberos-io/fix/add-avc-description-fallback
fix/add-avc-description-fallback
2026-02-27 11:48:34 +01:00
cedricve
d5464362bb Add AVC descriptor fallback for SPS parse errors
When setting the AVC descriptor fails in MP4.Close(), attempt a fallback that constructs an AvcC/avc1 sample entry from available SPS/PPS NALUs. Adds github.com/Eyevinn/mp4ff/avc import and two helpers: addAVCDescriptorFallback (builds a visual sample entry, sets tkhd width/height if available, and inserts it into stsd) and buildAVCDecConfRecFromSPS (creates an avc.DecConfRec from SPS/PPS bytes by extracting profile/compat/level and filling defaults). Logs errors and warns when the fallback is used. This provides resilience against SPS parsing errors when writing the MP4 track descriptor.
2026-02-27 11:35:22 +01:00
Cédric Verstraeten
5bcefd0015 Merge pull request #249 from kerberos-io/feature/enhance-avc-hevc-ssp-nalus
feature/enhance-avc-hevc-ssp-nalus
2026-02-27 11:12:03 +01:00
cedricve
5bb9def42d Normalize and debug H264/H265 parameter sets
Replace direct sanitizeParameterSets usage with normalizeH264ParameterSets and normalizeH265ParameterSets in mp4.Close. The new functions split Annex-B blobs, strip start codes, detect NALU types (SPS/PPS for AVC; VPS/SPS/PPS for HEVC), aggregate distinct parameter sets and fall back to sanitizeParameterSets if none are found. Added splitParamSetNALUs and formatNaluDebug helpers and debug logging to output concise parameter-set summaries before setting AVC/HEVC descriptors. These changes improve handling of concatenated Annex-B parameter set blobs and make debugging parameter extraction easier.
2026-02-27 11:09:28 +01:00
Cédric Verstraeten
ff38ccbadf Merge pull request #248 from kerberos-io/fix/sanitize-parameter-sets
fix/sanitize-parameter-sets
2026-02-26 20:43:53 +01:00
cedricve
f64e899de9 Populate/sanitize NALUs and avoid empty MP4
Fill missing SPS/PPS/VPS from camera config before closing recordings and warn when parameter sets are incomplete (for both continuous and motion-detection flows). Sanitize parameter sets (remove Annex-B start codes and drop empty NALUs) before writing AVC/HEVC descriptors. Prevent creation of empty MP4 files by flushing/closing and removing files when no audio/video samples were added, and only add an audio track when audio samples exist.
2026-02-26 20:37:10 +01:00
Cédric Verstraeten
b8a81d18af Merge pull request #247 from kerberos-io/fix/ensure-stsd
fix/ensure-stsd
2026-02-26 17:13:45 +01:00
cedricve
8c2e3e4cdd Recover video parameter sets from Annex B NALUs
Add updateVideoParameterSetsFromAnnexB to parse Annex B NALUs and populate missing SPS/PPS/VPS for H.264/H.265 streams. Call this helper when adding video samples so in-band parameter sets can be recovered early. Also add error logging in Close() when setting AVC/HEVC descriptors fails. These changes improve robustness for streams that carry SPS/PPS/VPS inline.
2026-02-26 17:05:09 +01:00
2 changed files with 277 additions and 5 deletions

View File

@@ -159,6 +159,19 @@ func HandleRecordStream(queue *packets.Queue, configDirectory string, configurat
}
// Close mp4
if len(mp4Video.SPSNALUs) == 0 && len(configuration.Config.Capture.IPCamera.SPSNALUs) > 0 {
mp4Video.SPSNALUs = configuration.Config.Capture.IPCamera.SPSNALUs
}
if len(mp4Video.PPSNALUs) == 0 && len(configuration.Config.Capture.IPCamera.PPSNALUs) > 0 {
mp4Video.PPSNALUs = configuration.Config.Capture.IPCamera.PPSNALUs
}
if len(mp4Video.VPSNALUs) == 0 && len(configuration.Config.Capture.IPCamera.VPSNALUs) > 0 {
mp4Video.VPSNALUs = configuration.Config.Capture.IPCamera.VPSNALUs
}
if (videoCodec == "H264" && (len(mp4Video.SPSNALUs) == 0 || len(mp4Video.PPSNALUs) == 0)) ||
(videoCodec == "H265" && (len(mp4Video.VPSNALUs) == 0 || len(mp4Video.SPSNALUs) == 0 || len(mp4Video.PPSNALUs) == 0)) {
log.Log.Warning("capture.main.HandleRecordStream(continuous): closing MP4 without full parameter sets, moov may be incomplete")
}
mp4Video.Close(&config)
log.Log.Info("capture.main.HandleRecordStream(continuous): recording finished: file save: " + name)
@@ -580,6 +593,19 @@ func HandleRecordStream(queue *packets.Queue, configDirectory string, configurat
lastRecordingTime = pkt.CurrentTime
// This will close the recording and write the last packet.
if len(mp4Video.SPSNALUs) == 0 && len(configuration.Config.Capture.IPCamera.SPSNALUs) > 0 {
mp4Video.SPSNALUs = configuration.Config.Capture.IPCamera.SPSNALUs
}
if len(mp4Video.PPSNALUs) == 0 && len(configuration.Config.Capture.IPCamera.PPSNALUs) > 0 {
mp4Video.PPSNALUs = configuration.Config.Capture.IPCamera.PPSNALUs
}
if len(mp4Video.VPSNALUs) == 0 && len(configuration.Config.Capture.IPCamera.VPSNALUs) > 0 {
mp4Video.VPSNALUs = configuration.Config.Capture.IPCamera.VPSNALUs
}
if (videoCodec == "H264" && (len(mp4Video.SPSNALUs) == 0 || len(mp4Video.PPSNALUs) == 0)) ||
(videoCodec == "H265" && (len(mp4Video.VPSNALUs) == 0 || len(mp4Video.SPSNALUs) == 0 || len(mp4Video.PPSNALUs) == 0)) {
log.Log.Warning("capture.main.HandleRecordStream(motiondetection): closing MP4 without full parameter sets, moov may be incomplete")
}
mp4Video.Close(&config)
log.Log.Info("capture.main.HandleRecordStream(motiondetection): file save: " + name)

View File

@@ -13,6 +13,7 @@ import (
"strings"
"time"
"github.com/Eyevinn/mp4ff/avc"
mp4ff "github.com/Eyevinn/mp4ff/mp4"
"github.com/kerberos-io/agent/machinery/src/encryption"
"github.com/kerberos-io/agent/machinery/src/log"
@@ -158,6 +159,68 @@ func (mp4 *MP4) AddAudioTrack(codec string) uint32 {
func (mp4 *MP4) AddMediaSegment(segNr int) {
}
// updateVideoParameterSetsFromAnnexB inspects Annex B data to fill missing SPS/PPS/VPS.
func (mp4 *MP4) updateVideoParameterSetsFromAnnexB(data []byte) {
if len(data) == 0 {
return
}
needSPS := len(mp4.SPSNALUs) == 0
needPPS := len(mp4.PPSNALUs) == 0
needVPS := len(mp4.VPSNALUs) == 0
if !(needSPS || needPPS || needVPS) {
return
}
for _, nalu := range splitNALUs(data) {
nalu = removeAnnexBStartCode(nalu)
if len(nalu) == 0 {
continue
}
switch mp4.VideoTrackName {
case "H264", "AVC1":
nalType := nalu[0] & 0x1F
switch nalType {
case 7: // SPS
if needSPS {
mp4.SPSNALUs = [][]byte{nalu}
needSPS = false
log.Log.Warning("mp4.updateVideoParameterSetsFromAnnexB(): SPS recovered from in-band NALU")
}
case 8: // PPS
if needPPS {
mp4.PPSNALUs = [][]byte{nalu}
needPPS = false
log.Log.Warning("mp4.updateVideoParameterSetsFromAnnexB(): PPS recovered from in-band NALU")
}
}
case "H265", "HVC1":
nalType := (nalu[0] >> 1) & 0x3F
switch nalType {
case 32: // VPS
if needVPS {
mp4.VPSNALUs = [][]byte{nalu}
needVPS = false
log.Log.Warning("mp4.updateVideoParameterSetsFromAnnexB(): VPS recovered from in-band NALU")
}
case 33: // SPS
if needSPS {
mp4.SPSNALUs = [][]byte{nalu}
needSPS = false
log.Log.Warning("mp4.updateVideoParameterSetsFromAnnexB(): SPS recovered from in-band NALU")
}
case 34: // PPS
if needPPS {
mp4.PPSNALUs = [][]byte{nalu}
needPPS = false
log.Log.Warning("mp4.updateVideoParameterSetsFromAnnexB(): PPS recovered from in-band NALU")
}
}
}
}
}
// flushPendingVideoSample writes the pending video sample to the current fragment.
// If nextPTS is provided (non-zero), it calculates duration from the PTS difference.
// If nextPTS is 0 (e.g., at Close time), it uses the last known duration.
@@ -283,6 +346,7 @@ func (mp4 *MP4) AddSampleToTrack(trackID uint32, isKeyframe bool, data []byte, p
if mp4.Start {
if trackID == uint32(mp4.VideoTrack) {
mp4.updateVideoParameterSetsFromAnnexB(data)
var lengthPrefixed []byte
var err error
@@ -368,7 +432,12 @@ func (mp4 *MP4) Close(config *models.Config) {
mp4.TotalKeyframesReceived, mp4.TotalKeyframesWritten, mp4.SegmentCount, mp4.FragmentKeyframeCount))
if mp4.VideoTotalDuration == 0 && mp4.AudioTotalDuration == 0 {
log.Log.Error("mp4.Close(): no video or audio samples added, cannot create MP4 file")
log.Log.Error("mp4.Close(): no video or audio samples added, removing empty MP4 file")
mp4.Writer.Flush()
_ = mp4.FileWriter.Sync()
_ = mp4.FileWriter.Close()
_ = os.Remove(mp4.FileName)
return
}
// Add final pending samples before closing
@@ -491,8 +560,16 @@ func (mp4 *MP4) Close(config *models.Config) {
case "H264", "AVC1":
init.AddEmptyTrack(videoTimescale, "video", "und")
includePS := true
err := init.Moov.Traks[0].SetAVCDescriptor("avc1", mp4.SPSNALUs, mp4.PPSNALUs, includePS)
spsNALUs, ppsNALUs := normalizeH264ParameterSets(mp4.SPSNALUs, mp4.PPSNALUs)
log.Log.Debug("mp4.Close(): AVC parameter sets: SPS=" + formatNaluDebug(spsNALUs) + ", PPS=" + formatNaluDebug(ppsNALUs))
err := init.Moov.Traks[0].SetAVCDescriptor("avc1", spsNALUs, ppsNALUs, includePS)
if err != nil {
log.Log.Error("mp4.Close(): error setting AVC descriptor: " + err.Error())
if fallbackErr := addAVCDescriptorFallback(init.Moov.Traks[0], spsNALUs, ppsNALUs, uint16(mp4.width), uint16(mp4.height)); fallbackErr != nil {
log.Log.Error("mp4.Close(): error setting AVC descriptor fallback: " + fallbackErr.Error())
} else {
log.Log.Warning("mp4.Close(): AVC descriptor fallback used due to SPS parse error")
}
}
init.Moov.Traks[0].Tkhd.Duration = actualVideoDuration
init.Moov.Traks[0].Tkhd.Width = mp4ff.Fixed32(uint32(mp4.width) << 16)
@@ -509,8 +586,11 @@ func (mp4 *MP4) Close(config *models.Config) {
case "H265", "HVC1":
init.AddEmptyTrack(videoTimescale, "video", "und")
includePS := true
err := init.Moov.Traks[0].SetHEVCDescriptor("hvc1", mp4.VPSNALUs, mp4.SPSNALUs, mp4.PPSNALUs, [][]byte{}, includePS)
vpsNALUs, spsNALUs, ppsNALUs := normalizeH265ParameterSets(mp4.VPSNALUs, mp4.SPSNALUs, mp4.PPSNALUs)
log.Log.Debug("mp4.Close(): HEVC parameter sets: VPS=" + formatNaluDebug(vpsNALUs) + ", SPS=" + formatNaluDebug(spsNALUs) + ", PPS=" + formatNaluDebug(ppsNALUs))
err := init.Moov.Traks[0].SetHEVCDescriptor("hvc1", vpsNALUs, spsNALUs, ppsNALUs, [][]byte{}, includePS)
if err != nil {
log.Log.Error("mp4.Close(): error setting HEVC descriptor: " + err.Error())
}
init.Moov.Traks[0].Tkhd.Duration = actualVideoDuration
init.Moov.Traks[0].Tkhd.Width = mp4ff.Fixed32(uint32(mp4.width) << 16)
@@ -524,8 +604,8 @@ func (mp4 *MP4) Close(config *models.Config) {
init.Moov.Traks[0].Mdia.Mdhd.ModificationTime = macTime
}
// Try adding audio track if available
if mp4.AudioTrackName == "AAC" || mp4.AudioTrackName == "MP4A" {
// Try adding audio track if available and samples were recorded.
if (mp4.AudioTrackName == "AAC" || mp4.AudioTrackName == "MP4A") && mp4.AudioTotalDuration > 0 {
// Add an audio track to the moov box
init.AddEmptyTrack(audioTimescale, "audio", "und")
@@ -763,6 +843,172 @@ func removeAnnexBStartCode(nalu []byte) []byte {
return nalu
}
// sanitizeParameterSets removes Annex B start codes and drops empty NALUs.
func sanitizeParameterSets(nalus [][]byte) [][]byte {
if len(nalus) == 0 {
return nalus
}
clean := make([][]byte, 0, len(nalus))
for _, nalu := range nalus {
trimmed := removeAnnexBStartCode(nalu)
if len(trimmed) == 0 {
continue
}
clean = append(clean, trimmed)
}
return clean
}
// normalizeH264ParameterSets splits Annex B blobs and extracts SPS/PPS NALUs.
func normalizeH264ParameterSets(spsIn [][]byte, ppsIn [][]byte) ([][]byte, [][]byte) {
all := make([][]byte, 0, len(spsIn)+len(ppsIn))
all = append(all, spsIn...)
all = append(all, ppsIn...)
var spsOut [][]byte
var ppsOut [][]byte
for _, blob := range all {
for _, nalu := range splitParamSetNALUs(blob) {
nalu = removeAnnexBStartCode(nalu)
if len(nalu) == 0 {
continue
}
typ := nalu[0] & 0x1F
switch typ {
case 7:
spsOut = append(spsOut, nalu)
case 8:
ppsOut = append(ppsOut, nalu)
}
}
}
if len(spsOut) == 0 {
spsOut = sanitizeParameterSets(spsIn)
}
if len(ppsOut) == 0 {
ppsOut = sanitizeParameterSets(ppsIn)
}
return spsOut, ppsOut
}
// normalizeH265ParameterSets splits Annex B blobs and extracts VPS/SPS/PPS NALUs.
func normalizeH265ParameterSets(vpsIn [][]byte, spsIn [][]byte, ppsIn [][]byte) ([][]byte, [][]byte, [][]byte) {
all := make([][]byte, 0, len(vpsIn)+len(spsIn)+len(ppsIn))
all = append(all, vpsIn...)
all = append(all, spsIn...)
all = append(all, ppsIn...)
var vpsOut [][]byte
var spsOut [][]byte
var ppsOut [][]byte
for _, blob := range all {
for _, nalu := range splitParamSetNALUs(blob) {
nalu = removeAnnexBStartCode(nalu)
if len(nalu) == 0 {
continue
}
typ := (nalu[0] >> 1) & 0x3F
switch typ {
case 32:
vpsOut = append(vpsOut, nalu)
case 33:
spsOut = append(spsOut, nalu)
case 34:
ppsOut = append(ppsOut, nalu)
}
}
}
if len(vpsOut) == 0 {
vpsOut = sanitizeParameterSets(vpsIn)
}
if len(spsOut) == 0 {
spsOut = sanitizeParameterSets(spsIn)
}
if len(ppsOut) == 0 {
ppsOut = sanitizeParameterSets(ppsIn)
}
return vpsOut, spsOut, ppsOut
}
// splitParamSetNALUs splits Annex B parameter set blobs; raw NALUs are returned as-is.
func splitParamSetNALUs(blob []byte) [][]byte {
if len(blob) == 0 {
return nil
}
if findStartCode(blob, 0) >= 0 {
return splitNALUs(blob)
}
return [][]byte{blob}
}
func formatNaluDebug(nalus [][]byte) string {
if len(nalus) == 0 {
return "none"
}
parts := make([]string, 0, len(nalus))
for _, nalu := range nalus {
if len(nalu) == 0 {
parts = append(parts, "len=0")
continue
}
max := 8
if len(nalu) < max {
max = len(nalu)
}
parts = append(parts, fmt.Sprintf("len=%d head=%x", len(nalu), nalu[:max]))
}
return strings.Join(parts, "; ")
}
func addAVCDescriptorFallback(trak *mp4ff.TrakBox, spsNALUs, ppsNALUs [][]byte, width, height uint16) error {
if trak == nil || trak.Mdia == nil || trak.Mdia.Minf == nil || trak.Mdia.Minf.Stbl == nil || trak.Mdia.Minf.Stbl.Stsd == nil {
return fmt.Errorf("missing trak stsd")
}
if len(spsNALUs) == 0 {
return fmt.Errorf("no SPS NALU available")
}
decConfRec, err := buildAVCDecConfRecFromSPS(spsNALUs, ppsNALUs)
if err != nil {
return err
}
if width == 0 && trak.Tkhd != nil {
width = uint16(uint32(trak.Tkhd.Width) >> 16)
}
if height == 0 && trak.Tkhd != nil {
height = uint16(uint32(trak.Tkhd.Height) >> 16)
}
if width > 0 && height > 0 && trak.Tkhd != nil {
trak.Tkhd.Width = mp4ff.Fixed32(uint32(width) << 16)
trak.Tkhd.Height = mp4ff.Fixed32(uint32(height) << 16)
}
avcC := &mp4ff.AvcCBox{DecConfRec: *decConfRec}
avcx := mp4ff.CreateVisualSampleEntryBox("avc1", width, height, avcC)
trak.Mdia.Minf.Stbl.Stsd.AddChild(avcx)
return nil
}
func buildAVCDecConfRecFromSPS(spsNALUs, ppsNALUs [][]byte) (*avc.DecConfRec, error) {
if len(spsNALUs) == 0 {
return nil, fmt.Errorf("no SPS NALU available")
}
sps := spsNALUs[0]
if len(sps) < 4 {
return nil, fmt.Errorf("SPS too short: len=%d", len(sps))
}
// SPS NALU: byte 0 is NAL header, next 3 bytes are profile/compat/level.
dec := &avc.DecConfRec{
AVCProfileIndication: sps[1],
ProfileCompatibility: sps[2],
AVCLevelIndication: sps[3],
SPSnalus: spsNALUs,
PPSnalus: ppsNALUs,
ChromaFormat: 1,
BitDepthLumaMinus1: 0,
BitDepthChromaMinus1: 0,
NumSPSExt: 0,
NoTrailingInfo: true,
}
return dec, nil
}
// splitNALUs splits Annex B data into raw NAL units without start codes.
func splitNALUs(data []byte) [][]byte {
var nalus [][]byte