wip: add vui timing injection, receive the requested fps from camera in rn layer, normalize timestamp

This commit is contained in:
2025-12-24 00:52:50 -05:00
parent c43f4d3a80
commit eceab60d7c
2 changed files with 351 additions and 22 deletions

View File

@@ -20,7 +20,8 @@ import java.io.File
*/
class FragmentedRecordingManager(
private val encoder: MediaCodec,
private val muxer: HlsMuxer
private val muxer: HlsMuxer,
private val configuredFps: Int
) : MediaCodec.Callback(), ChunkedRecorderInterface {
companion object {
@@ -88,9 +89,9 @@ class FragmentedRecordingManager(
)
muxer.setSegmentDuration(segmentDurationSeconds * 1_000_000L)
Log.d(TAG, "Created HlsMuxer with orientation: $recordingOrientationDegrees degrees")
Log.d(TAG, "Created HlsMuxer with orientation: $recordingOrientationDegrees degrees, fps: $effectiveFps")
return FragmentedRecordingManager(codec, muxer)
return FragmentedRecordingManager(codec, muxer, effectiveFps)
}
}
@@ -170,7 +171,8 @@ class FragmentedRecordingManager(
synchronized(this) {
Log.i(TAG, "Output format changed: $format")
trackIndex = muxer.addTrack(format)
// Pass configured fps to muxer (not the encoder's output format fps which may differ)
trackIndex = muxer.addTrack(format, configuredFps)
muxer.start()
muxerStarted = true
}

View File

@@ -42,6 +42,7 @@ class HlsMuxer(
// Configuration
private var targetSegmentDurationUs: Long = DEFAULT_SEGMENT_DURATION_US
private var timescale: Int = 30000 // Default, updated from format
private var configuredFps: Int = 30 // Configured fps from user, used for VUI timing
// State
private var state = State.UNINITIALIZED
@@ -54,6 +55,9 @@ class HlsMuxer(
private var segmentStartTimeUs = -1L
private var lastPresentationTimeUs = 0L
// Timestamp normalization - first timestamp becomes time 0
private var firstPresentationTimeUs = -1L
private enum class State {
UNINITIALIZED,
INITIALIZED,
@@ -69,6 +73,21 @@ class HlsMuxer(
val isKeyFrame: Boolean
)
// ==================== Timestamp Normalization ====================
/**
* Normalizes a presentation timestamp to start from 0.
* The first timestamp received becomes time 0, and all subsequent
* timestamps are relative to that.
*/
private fun normalizeTimestamp(rawPresentationTimeUs: Long): Long {
if (firstPresentationTimeUs < 0) {
firstPresentationTimeUs = rawPresentationTimeUs
Log.d(TAG, "First timestamp: ${rawPresentationTimeUs}us, normalizing to 0")
}
return rawPresentationTimeUs - firstPresentationTimeUs
}
// ==================== Annex-B to AVCC Conversion ====================
/**
@@ -194,19 +213,14 @@ class HlsMuxer(
* Adds a track to the muxer.
*
* @param format The MediaFormat describing the track
* @param fps The configured frame rate (used for VUI timing, overrides format's fps)
* @return Track index (always 0 for now, single video track)
*/
fun addTrack(format: MediaFormat): Int {
fun addTrack(format: MediaFormat, fps: Int = 30): Int {
check(state == State.UNINITIALIZED) { "addTrack() must be called before start()" }
trackFormat = format
// Extract timescale from frame rate
val fps = try {
format.getInteger(MediaFormat.KEY_FRAME_RATE)
} catch (e: Exception) {
30
}
configuredFps = fps
timescale = fps * 1000 // Use fps * 1000 for good precision
state = State.INITIALIZED
@@ -215,7 +229,7 @@ class HlsMuxer(
val formatHeight = try { format.getInteger(MediaFormat.KEY_HEIGHT) } catch (e: Exception) { -1 }
Log.d(TAG, "Added track: ${format.getString(MediaFormat.KEY_MIME)}, " +
"encoder output: ${formatWidth}x${formatHeight}, " +
"timescale=$timescale, orientation=$orientationDegrees°")
"configuredFps=$configuredFps, timescale=$timescale, orientation=$orientationDegrees°")
return 0 // Single track, index 0
}
@@ -259,7 +273,7 @@ class HlsMuxer(
}
val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
val presentationTimeUs = bufferInfo.presentationTimeUs
val presentationTimeUs = normalizeTimestamp(bufferInfo.presentationTimeUs)
// Initialize segment start time
if (segmentStartTimeUs < 0) {
@@ -364,6 +378,303 @@ class HlsMuxer(
}
}
// ==================== SPS VUI Timing Injection ====================
/**
* Injects VUI timing parameters into an H.264 SPS NAL unit.
* This ensures proper frame rate detection by players/decoders.
*
* The SPS from MediaCodec lacks VUI timing info, causing tools like
* ffprobe to misinterpret the frame rate.
*/
private fun injectVuiTiming(sps: ByteArray, fps: Int): ByteArray {
try {
val reader = BitReader(sps)
val writer = BitWriter()
// NAL header (1 byte: forbidden_zero_bit, nal_ref_idc, nal_unit_type)
writer.writeBits(reader.readBits(8), 8)
// profile_idc (1 byte)
val profileIdc = reader.readBits(8)
writer.writeBits(profileIdc, 8)
// constraint_set flags (1 byte)
writer.writeBits(reader.readBits(8), 8)
// level_idc (1 byte)
writer.writeBits(reader.readBits(8), 8)
// seq_parameter_set_id (ue(v))
copyExpGolomb(reader, writer)
// Profile-specific fields for High profile (100) and others
if (profileIdc == 100 || profileIdc == 110 || profileIdc == 122 ||
profileIdc == 244 || profileIdc == 44 || profileIdc == 83 ||
profileIdc == 86 || profileIdc == 118 || profileIdc == 128 ||
profileIdc == 138 || profileIdc == 139 || profileIdc == 134 ||
profileIdc == 135) {
// chroma_format_idc (ue(v))
val chromaFormatIdc = copyExpGolombAndReturn(reader, writer)
if (chromaFormatIdc == 3) {
// separate_colour_plane_flag (1 bit)
writer.writeBits(reader.readBits(1), 1)
}
// bit_depth_luma_minus8 (ue(v))
copyExpGolomb(reader, writer)
// bit_depth_chroma_minus8 (ue(v))
copyExpGolomb(reader, writer)
// qpprime_y_zero_transform_bypass_flag (1 bit)
writer.writeBits(reader.readBits(1), 1)
// seq_scaling_matrix_present_flag (1 bit)
val scalingMatrixFlag = reader.readBits(1)
writer.writeBits(scalingMatrixFlag, 1)
if (scalingMatrixFlag == 1) {
// Skip scaling lists - this is complex, just copy remaining and give up
Log.w(TAG, "SPS has scaling matrix, skipping VUI injection")
return sps
}
}
// log2_max_frame_num_minus4 (ue(v))
copyExpGolomb(reader, writer)
// pic_order_cnt_type (ue(v))
val picOrderCntType = copyExpGolombAndReturn(reader, writer)
if (picOrderCntType == 0) {
// log2_max_pic_order_cnt_lsb_minus4 (ue(v))
copyExpGolomb(reader, writer)
} else if (picOrderCntType == 1) {
// delta_pic_order_always_zero_flag (1 bit)
writer.writeBits(reader.readBits(1), 1)
// offset_for_non_ref_pic (se(v))
copySignedExpGolomb(reader, writer)
// offset_for_top_to_bottom_field (se(v))
copySignedExpGolomb(reader, writer)
// num_ref_frames_in_pic_order_cnt_cycle (ue(v))
val numRefFrames = copyExpGolombAndReturn(reader, writer)
for (i in 0 until numRefFrames) {
// offset_for_ref_frame[i] (se(v))
copySignedExpGolomb(reader, writer)
}
}
// max_num_ref_frames (ue(v))
copyExpGolomb(reader, writer)
// gaps_in_frame_num_value_allowed_flag (1 bit)
writer.writeBits(reader.readBits(1), 1)
// pic_width_in_mbs_minus1 (ue(v))
copyExpGolomb(reader, writer)
// pic_height_in_map_units_minus1 (ue(v))
copyExpGolomb(reader, writer)
// frame_mbs_only_flag (1 bit)
val frameMbsOnlyFlag = reader.readBits(1)
writer.writeBits(frameMbsOnlyFlag, 1)
if (frameMbsOnlyFlag == 0) {
// mb_adaptive_frame_field_flag (1 bit)
writer.writeBits(reader.readBits(1), 1)
}
// direct_8x8_inference_flag (1 bit)
writer.writeBits(reader.readBits(1), 1)
// frame_cropping_flag (1 bit)
val frameCroppingFlag = reader.readBits(1)
writer.writeBits(frameCroppingFlag, 1)
if (frameCroppingFlag == 1) {
// frame_crop_left_offset, right, top, bottom (ue(v) each)
copyExpGolomb(reader, writer)
copyExpGolomb(reader, writer)
copyExpGolomb(reader, writer)
copyExpGolomb(reader, writer)
}
// vui_parameters_present_flag - we'll set this to 1 and add our VUI
val originalVuiFlag = reader.readBits(1)
writer.writeBits(1, 1) // Set VUI present
// Write VUI parameters with timing info
writeVuiWithTiming(writer, fps, originalVuiFlag == 1, reader)
// Add RBSP trailing bits
writer.writeRbspTrailingBits()
val result = writer.toByteArray()
Log.d(TAG, "Injected VUI timing for ${fps}fps, SPS grew from ${sps.size} to ${result.size} bytes")
return result
} catch (e: Exception) {
Log.e(TAG, "Failed to inject VUI timing: ${e.message}, using original SPS")
return sps
}
}
/**
* Writes VUI parameters with timing info.
*/
private fun writeVuiWithTiming(writer: BitWriter, fps: Int, hadVui: Boolean, reader: BitReader) {
// aspect_ratio_info_present_flag
writer.writeBits(0, 1)
// overscan_info_present_flag
writer.writeBits(0, 1)
// video_signal_type_present_flag
writer.writeBits(0, 1)
// chroma_loc_info_present_flag
writer.writeBits(0, 1)
// timing_info_present_flag = 1
writer.writeBits(1, 1)
// num_units_in_tick (32 bits) = 1
writer.writeBits(1, 32)
// time_scale (32 bits) = fps * 2 (because each frame = 2 field counts)
writer.writeBits(fps * 2, 32)
// fixed_frame_rate_flag = 1
writer.writeBits(1, 1)
// nal_hrd_parameters_present_flag
writer.writeBits(0, 1)
// vcl_hrd_parameters_present_flag
writer.writeBits(0, 1)
// pic_struct_present_flag
writer.writeBits(0, 1)
// bitstream_restriction_flag
writer.writeBits(0, 1)
}
// ==================== Bit Manipulation Helpers ====================
/**
* Bit-level reader for parsing H.264 NAL units.
*/
private class BitReader(private val data: ByteArray) {
private var bytePos = 0
private var bitPos = 0
fun readBits(count: Int): Int {
var result = 0
for (i in 0 until count) {
if (bytePos >= data.size) throw IllegalStateException("End of data")
val bit = (data[bytePos].toInt() shr (7 - bitPos)) and 1
result = (result shl 1) or bit
bitPos++
if (bitPos == 8) {
bitPos = 0
bytePos++
}
}
return result
}
fun readExpGolomb(): Int {
var leadingZeros = 0
while (readBits(1) == 0) {
leadingZeros++
if (leadingZeros > 31) throw IllegalStateException("Invalid exp-golomb")
}
if (leadingZeros == 0) return 0
val suffix = readBits(leadingZeros)
return (1 shl leadingZeros) - 1 + suffix
}
fun readSignedExpGolomb(): Int {
val code = readExpGolomb()
return if (code % 2 == 0) -(code / 2) else (code + 1) / 2
}
}
/**
* Bit-level writer for constructing H.264 NAL units.
*/
private class BitWriter {
private val bytes = mutableListOf<Byte>()
private var currentByte = 0
private var bitPos = 0
fun writeBits(value: Int, count: Int) {
for (i in count - 1 downTo 0) {
val bit = (value shr i) and 1
currentByte = (currentByte shl 1) or bit
bitPos++
if (bitPos == 8) {
bytes.add(currentByte.toByte())
currentByte = 0
bitPos = 0
}
}
}
fun writeExpGolomb(value: Int) {
val code = value + 1
val bits = 32 - Integer.numberOfLeadingZeros(code)
// Write leading zeros
for (i in 0 until bits - 1) {
writeBits(0, 1)
}
// Write the code
writeBits(code, bits)
}
fun writeSignedExpGolomb(value: Int) {
val code = if (value <= 0) -2 * value else 2 * value - 1
writeExpGolomb(code)
}
fun writeRbspTrailingBits() {
writeBits(1, 1) // rbsp_stop_one_bit
while (bitPos != 0) {
writeBits(0, 1) // rbsp_alignment_zero_bit
}
}
fun toByteArray(): ByteArray {
// Flush remaining bits
if (bitPos > 0) {
currentByte = currentByte shl (8 - bitPos)
bytes.add(currentByte.toByte())
}
return bytes.toByteArray()
}
}
private fun copyExpGolomb(reader: BitReader, writer: BitWriter) {
val value = reader.readExpGolomb()
writer.writeExpGolomb(value)
}
private fun copyExpGolombAndReturn(reader: BitReader, writer: BitWriter): Int {
val value = reader.readExpGolomb()
writer.writeExpGolomb(value)
return value
}
private fun copySignedExpGolomb(reader: BitReader, writer: BitWriter) {
val value = reader.readSignedExpGolomb()
writer.writeSignedExpGolomb(value)
}
// ==================== Init Segment Building ====================
/**
@@ -373,11 +684,19 @@ class HlsMuxer(
val width = format.getInteger(MediaFormat.KEY_WIDTH)
val height = format.getInteger(MediaFormat.KEY_HEIGHT)
val sps = format.getByteBuffer("csd-0")?.let { extractNalUnit(it) }
val rawSps = format.getByteBuffer("csd-0")?.let { extractNalUnit(it) }
?: throw IllegalArgumentException("Missing SPS (csd-0)")
val pps = format.getByteBuffer("csd-1")?.let { extractNalUnit(it) }
?: throw IllegalArgumentException("Missing PPS (csd-1)")
Log.d(TAG, "Original SPS size: ${rawSps.size} bytes, PPS size: ${pps.size} bytes")
Log.d(TAG, "Original SPS hex: ${rawSps.joinToString("") { "%02x".format(it) }}")
// Inject VUI timing info into SPS using configured fps (not encoder output format fps)
val sps = injectVuiTiming(rawSps, configuredFps)
Log.d(TAG, "Modified SPS size: ${sps.size} bytes")
Log.d(TAG, "Modified SPS hex: ${sps.joinToString("") { "%02x".format(it) }}")
val output = ByteArrayOutputStream()
// ftyp
@@ -660,9 +979,10 @@ class HlsMuxer(
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
val profileIdc = if (sps.isNotEmpty()) sps[0].toInt() and 0xFF else 0x42
val profileCompat = if (sps.size > 1) sps[1].toInt() and 0xFF else 0x00
val levelIdc = if (sps.size > 2) sps[2].toInt() and 0xFF else 0x1F
// SPS layout: [0]=NAL header (0x67), [1]=profile_idc, [2]=constraint_flags, [3]=level_idc
val profileIdc = if (sps.size > 1) sps[1].toInt() and 0xFF else 0x42
val profileCompat = if (sps.size > 2) sps[2].toInt() and 0xFF else 0x00
val levelIdc = if (sps.size > 3) sps[3].toInt() and 0xFF else 0x1F
dos.writeByte(1) // configuration version
dos.writeByte(profileIdc) // AVC profile
@@ -722,10 +1042,14 @@ class HlsMuxer(
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
// Default sample duration: timescale / fps
// Since timescale = fps * 1000, duration = 1000 for any fps
val defaultSampleDuration = 1000
dos.writeInt(0) // version & flags
dos.writeInt(1) // track ID
dos.writeInt(1) // default sample description index
dos.writeInt(0) // default sample duration
dos.writeInt(defaultSampleDuration) // default sample duration
dos.writeInt(0) // default sample size
dos.writeInt(0) // default sample flags
@@ -837,10 +1161,13 @@ class HlsMuxer(
dos.writeInt(samples.size)
dos.writeInt(dataOffset)
// Use constant duration based on configured fps for consistent frame rate
// This ensures ffprobe reports correct fps instead of calculating from variable timing
val constantDuration = timescale / configuredFps // e.g., 30000/30 = 1000 ticks
Log.d(TAG, "Writing ${samples.size} samples with constant duration=${constantDuration} ticks (${configuredFps}fps)")
for (sample in samples) {
// Convert duration to timescale units
val durationInTimescale = ((sample.durationUs * timescale) / 1_000_000).toInt()
dos.writeInt(durationInTimescale)
dos.writeInt(constantDuration)
dos.writeInt(sample.data.size)
dos.writeInt(buildSampleFlags(sample.isKeyFrame))
}