normalize timestamps and fix framerate metadata in init file
This commit is contained in:
@@ -61,7 +61,12 @@ class FragmentedRecordingManager(
|
||||
MediaCodecInfo.CodecCapabilities.COLOR_FormatSurface
|
||||
)
|
||||
|
||||
val effectiveFps = fps ?: 30
|
||||
// Use 30fps as conservative default since many Android devices can't sustain
|
||||
// higher frame rates at high resolutions. This affects:
|
||||
// - Encoder: bitrate allocation and I-frame interval calculation
|
||||
// - HlsMuxer: timescale for accurate sample durations
|
||||
// The actual frame timing comes from camera timestamps regardless of this setting.
|
||||
val effectiveFps = 30
|
||||
format.setInteger(MediaFormat.KEY_FRAME_RATE, effectiveFps)
|
||||
format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, segmentDurationSeconds)
|
||||
format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate)
|
||||
@@ -70,7 +75,7 @@ class FragmentedRecordingManager(
|
||||
|
||||
codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE)
|
||||
|
||||
// Create muxer with callbacks and orientation
|
||||
// Create muxer with callbacks, orientation, and fps
|
||||
val muxer = HlsMuxer(
|
||||
outputDirectory = outputDirectory,
|
||||
callback = object : HlsMuxer.Callback {
|
||||
@@ -82,7 +87,8 @@ class FragmentedRecordingManager(
|
||||
callbacks.onVideoChunkReady(file, index, durationUs)
|
||||
}
|
||||
},
|
||||
orientationDegrees = recordingOrientationDegrees
|
||||
orientationDegrees = recordingOrientationDegrees,
|
||||
fps = effectiveFps
|
||||
)
|
||||
muxer.setSegmentDuration(segmentDurationSeconds * 1_000_000L)
|
||||
|
||||
|
||||
@@ -27,7 +27,8 @@ import java.nio.ByteBuffer
|
||||
class HlsMuxer(
|
||||
private val outputDirectory: File,
|
||||
private val callback: Callback,
|
||||
private val orientationDegrees: Int = 0
|
||||
private val orientationDegrees: Int = 0,
|
||||
private val fps: Int = 30
|
||||
) {
|
||||
companion object {
|
||||
private const val TAG = "HlsMuxer"
|
||||
@@ -41,7 +42,7 @@ class HlsMuxer(
|
||||
|
||||
// Configuration
|
||||
private var targetSegmentDurationUs: Long = DEFAULT_SEGMENT_DURATION_US
|
||||
private var timescale: Int = 30000 // Default, updated from format
|
||||
private var timescale: Int = 30000 // Default, updated in addTrack() to fps * 1000
|
||||
|
||||
// State
|
||||
private var state = State.UNINITIALIZED
|
||||
@@ -54,6 +55,14 @@ class HlsMuxer(
|
||||
private var segmentStartTimeUs = -1L
|
||||
private var lastPresentationTimeUs = 0L
|
||||
|
||||
// Timestamp normalization - MediaCodec timestamps are device uptime, not starting from 0
|
||||
private var firstPresentationTimeUs = -1L
|
||||
|
||||
// Actual fps detection from frame timestamps
|
||||
private var detectedFps: Int? = null
|
||||
private var fpsDetectionSamples = mutableListOf<Long>()
|
||||
private val FPS_DETECTION_SAMPLE_COUNT = 30
|
||||
|
||||
private enum class State {
|
||||
UNINITIALIZED,
|
||||
INITIALIZED,
|
||||
@@ -69,6 +78,29 @@ class HlsMuxer(
|
||||
val isKeyFrame: Boolean
|
||||
)
|
||||
|
||||
// ==================== Timestamp Normalization ====================
|
||||
|
||||
/**
|
||||
* Normalizes a presentation timestamp to start from 0.
|
||||
* The first timestamp received becomes time 0, and all subsequent
|
||||
* timestamps are relative to that.
|
||||
*
|
||||
* This is necessary because MediaCodec timestamps are based on device uptime,
|
||||
* not starting from 0. HLS players expect timestamps to start at or near 0.
|
||||
*/
|
||||
private fun normalizeTimestamp(rawPresentationTimeUs: Long): Long {
|
||||
if (firstPresentationTimeUs < 0) {
|
||||
firstPresentationTimeUs = rawPresentationTimeUs
|
||||
Log.d(TAG, "First timestamp captured: ${rawPresentationTimeUs}us (${rawPresentationTimeUs / 1_000_000.0}s), normalizing to 0")
|
||||
}
|
||||
val normalized = rawPresentationTimeUs - firstPresentationTimeUs
|
||||
// Log first few normalizations to debug
|
||||
if (normalized < 1_000_000) { // First second
|
||||
Log.d(TAG, "Timestamp: raw=${rawPresentationTimeUs}us -> normalized=${normalized}us")
|
||||
}
|
||||
return normalized
|
||||
}
|
||||
|
||||
// ==================== Annex-B to AVCC Conversion ====================
|
||||
|
||||
/**
|
||||
@@ -201,13 +233,11 @@ class HlsMuxer(
|
||||
|
||||
trackFormat = format
|
||||
|
||||
// Extract timescale from frame rate
|
||||
val fps = try {
|
||||
format.getInteger(MediaFormat.KEY_FRAME_RATE)
|
||||
} catch (e: Exception) {
|
||||
30
|
||||
}
|
||||
timescale = fps * 1000 // Use fps * 1000 for good precision
|
||||
// Use fps * 1000 as timescale for good precision (1000 timescale units per frame).
|
||||
// This ensures accurate sample durations without integer truncation issues.
|
||||
// Note: ffprobe may report r_frame_rate based on timescale, so the backend
|
||||
// should use the explicit framesPerSecond from the API mutation, not ffprobe.
|
||||
timescale = fps * 1000
|
||||
|
||||
state = State.INITIALIZED
|
||||
|
||||
@@ -215,7 +245,7 @@ class HlsMuxer(
|
||||
val formatHeight = try { format.getInteger(MediaFormat.KEY_HEIGHT) } catch (e: Exception) { -1 }
|
||||
Log.d(TAG, "Added track: ${format.getString(MediaFormat.KEY_MIME)}, " +
|
||||
"encoder output: ${formatWidth}x${formatHeight}, " +
|
||||
"timescale=$timescale, orientation=$orientationDegrees°")
|
||||
"fps=$fps, timescale=$timescale, orientation=$orientationDegrees°")
|
||||
|
||||
return 0 // Single track, index 0
|
||||
}
|
||||
@@ -227,16 +257,30 @@ class HlsMuxer(
|
||||
check(state == State.INITIALIZED) { "Must call addTrack() before start()" }
|
||||
val format = trackFormat ?: throw IllegalStateException("No track format")
|
||||
|
||||
// Create output directory if needed
|
||||
// Create output directory if needed, with proper error handling
|
||||
if (!outputDirectory.exists()) {
|
||||
outputDirectory.mkdirs()
|
||||
val created = outputDirectory.mkdirs()
|
||||
if (!created && !outputDirectory.exists()) {
|
||||
throw IllegalStateException(
|
||||
"Failed to create output directory: ${outputDirectory.absolutePath}. " +
|
||||
"Parent exists: ${outputDirectory.parentFile?.exists()}, " +
|
||||
"Parent path: ${outputDirectory.parentFile?.absolutePath}"
|
||||
)
|
||||
}
|
||||
Log.d(TAG, "Created output directory: ${outputDirectory.absolutePath}")
|
||||
}
|
||||
|
||||
// Write init segment
|
||||
val initBytes = buildInitSegment(format)
|
||||
val initFile = File(outputDirectory, "init.mp4")
|
||||
FileOutputStream(initFile).use { it.write(initBytes) }
|
||||
|
||||
// Log frame rate metadata for debugging
|
||||
val defaultSampleDuration = timescale / fps
|
||||
Log.d(TAG, "Created init segment: ${initFile.absolutePath} (${initBytes.size} bytes)")
|
||||
Log.d(TAG, "Frame rate metadata: timescale=$timescale, fps=$fps, " +
|
||||
"default_sample_duration=$defaultSampleDuration (ffprobe should calculate ${timescale}/${defaultSampleDuration}=${fps}fps)")
|
||||
|
||||
callback.onInitSegmentReady(initFile)
|
||||
|
||||
state = State.STARTED
|
||||
@@ -259,13 +303,40 @@ class HlsMuxer(
|
||||
}
|
||||
|
||||
val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
|
||||
val presentationTimeUs = bufferInfo.presentationTimeUs
|
||||
// Normalize timestamp to start from 0 (MediaCodec uses device uptime)
|
||||
val presentationTimeUs = normalizeTimestamp(bufferInfo.presentationTimeUs)
|
||||
|
||||
// Detect actual fps from first N samples
|
||||
if (detectedFps == null) {
|
||||
fpsDetectionSamples.add(presentationTimeUs)
|
||||
if (fpsDetectionSamples.size >= FPS_DETECTION_SAMPLE_COUNT) {
|
||||
val elapsed = fpsDetectionSamples.last() - fpsDetectionSamples.first()
|
||||
if (elapsed > 0) {
|
||||
val actualFps = ((FPS_DETECTION_SAMPLE_COUNT - 1) * 1_000_000.0 / elapsed).toInt()
|
||||
detectedFps = actualFps
|
||||
if (kotlin.math.abs(actualFps - fps) > 5) {
|
||||
Log.w(TAG, "Actual fps ($actualFps) differs significantly from configured fps ($fps)! " +
|
||||
"This may cause processing issues if backend uses configured fps.")
|
||||
} else {
|
||||
Log.d(TAG, "Detected actual fps: $actualFps (configured: $fps)")
|
||||
}
|
||||
}
|
||||
fpsDetectionSamples.clear() // Free memory
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize segment start time
|
||||
if (segmentStartTimeUs < 0) {
|
||||
segmentStartTimeUs = presentationTimeUs
|
||||
}
|
||||
|
||||
// Update duration of previous sample BEFORE finalization check
|
||||
// This ensures the last sample has correct duration when segment is finalized
|
||||
if (pendingSamples.isNotEmpty()) {
|
||||
val lastSample = pendingSamples.last()
|
||||
lastSample.durationUs = presentationTimeUs - lastSample.presentationTimeUs
|
||||
}
|
||||
|
||||
// Check if we should finalize current segment (at keyframe boundaries)
|
||||
if (isKeyFrame && pendingSamples.isNotEmpty()) {
|
||||
val segmentDurationUs = presentationTimeUs - segmentStartTimeUs
|
||||
@@ -284,12 +355,6 @@ class HlsMuxer(
|
||||
// Convert Annex-B (start codes) to AVCC (length prefixes)
|
||||
val data = convertAnnexBToAvcc(rawData)
|
||||
|
||||
// Update duration of previous sample
|
||||
if (pendingSamples.isNotEmpty()) {
|
||||
val lastSample = pendingSamples.last()
|
||||
lastSample.durationUs = presentationTimeUs - lastSample.presentationTimeUs
|
||||
}
|
||||
|
||||
// Estimate duration (will be corrected by next sample)
|
||||
val estimatedDurationUs = if (lastPresentationTimeUs > 0) {
|
||||
presentationTimeUs - lastPresentationTimeUs
|
||||
@@ -351,6 +416,7 @@ class HlsMuxer(
|
||||
val durationUs = (lastSample.presentationTimeUs - firstPts) + lastSample.durationUs
|
||||
|
||||
Log.d(TAG, "Created segment $segmentIndex: samples=${pendingSamples.size}, " +
|
||||
"baseDecodeTime=${baseDecodeTimeUs}us (${baseDecodeTimeUs / 1_000_000.0}s), " +
|
||||
"duration=${durationUs / 1000}ms, size=${fragmentBytes.size} bytes")
|
||||
|
||||
callback.onMediaSegmentReady(segmentFile, segmentIndex, durationUs)
|
||||
@@ -649,7 +715,7 @@ class HlsMuxer(
|
||||
private fun buildStblBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
|
||||
val content = ByteArrayOutputStream()
|
||||
content.write(buildStsdBox(width, height, sps, pps))
|
||||
content.write(buildEmptySttsBox())
|
||||
content.write(buildSttsBox()) // Contains default timing for ffprobe frame rate detection
|
||||
content.write(buildEmptyStscBox())
|
||||
content.write(buildEmptyStszBox())
|
||||
content.write(buildEmptyStcoBox())
|
||||
@@ -716,11 +782,21 @@ class HlsMuxer(
|
||||
return wrapBox("avcC", output.toByteArray())
|
||||
}
|
||||
|
||||
private fun buildEmptySttsBox(): ByteArray {
|
||||
private fun buildSttsBox(): ByteArray {
|
||||
val output = ByteArrayOutputStream()
|
||||
val dos = DataOutputStream(output)
|
||||
|
||||
// For fragmented MP4, stts is normally empty as timing is in trun boxes.
|
||||
// However, ffprobe uses stts to calculate r_frame_rate when present.
|
||||
// We add a single entry with the default sample duration so ffprobe
|
||||
// can derive: r_frame_rate = timescale / sample_delta = 30000/1000 = 30
|
||||
val defaultSampleDuration = timescale / fps
|
||||
|
||||
dos.writeInt(0) // version & flags
|
||||
dos.writeInt(0) // entry count
|
||||
dos.writeInt(1) // entry count (1 entry for default timing)
|
||||
dos.writeInt(1) // sample_count (indicates this is the default duration)
|
||||
dos.writeInt(defaultSampleDuration) // sample_delta in timescale units
|
||||
|
||||
return wrapBox("stts", output.toByteArray())
|
||||
}
|
||||
|
||||
@@ -757,10 +833,15 @@ class HlsMuxer(
|
||||
val output = ByteArrayOutputStream()
|
||||
val dos = DataOutputStream(output)
|
||||
|
||||
// Calculate default sample duration so ffprobe can derive correct fps
|
||||
// fps = timescale / default_sample_duration
|
||||
// At 30fps with timescale=30000: duration=1000, ffprobe calculates 30000/1000=30
|
||||
val defaultSampleDuration = timescale / fps
|
||||
|
||||
dos.writeInt(0) // version & flags
|
||||
dos.writeInt(1) // track ID
|
||||
dos.writeInt(1) // default sample description index
|
||||
dos.writeInt(0) // default sample duration
|
||||
dos.writeInt(defaultSampleDuration) // default sample duration
|
||||
dos.writeInt(0) // default sample size
|
||||
dos.writeInt(0) // default sample flags
|
||||
|
||||
|
||||
@@ -44,7 +44,14 @@ class RecordingSession(
|
||||
|
||||
data class Video(val path: String, val durationMs: Long, val size: Size)
|
||||
|
||||
private val outputPath: File = File(filePath)
|
||||
// Strip file:// prefix if present (expo-file-system returns URIs with this prefix)
|
||||
private val outputPath: File = File(
|
||||
if (filePath.startsWith("file://")) {
|
||||
filePath.removePrefix("file://")
|
||||
} else {
|
||||
filePath
|
||||
}
|
||||
)
|
||||
|
||||
private val bitRate = getBitRate()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user