From a307f9b7e873ad8078efd06343a3be2ea3d40950 Mon Sep 17 00:00:00 2001 From: Loewy Date: Mon, 29 Dec 2025 16:48:47 -0500 Subject: [PATCH] normalize timestamps and fix framerate metadata in init file --- .../camera/core/FragmentedRecordingManager.kt | 12 +- .../java/com/mrousavy/camera/core/HlsMuxer.kt | 127 ++++++++++++++---- .../mrousavy/camera/core/RecordingSession.kt | 9 +- 3 files changed, 121 insertions(+), 27 deletions(-) diff --git a/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt b/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt index 5899c08..64ac670 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt @@ -61,7 +61,12 @@ class FragmentedRecordingManager( MediaCodecInfo.CodecCapabilities.COLOR_FormatSurface ) - val effectiveFps = fps ?: 30 + // Use 30fps as conservative default since many Android devices can't sustain + // higher frame rates at high resolutions. This affects: + // - Encoder: bitrate allocation and I-frame interval calculation + // - HlsMuxer: timescale for accurate sample durations + // The actual frame timing comes from camera timestamps regardless of this setting. + val effectiveFps = 30 format.setInteger(MediaFormat.KEY_FRAME_RATE, effectiveFps) format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, segmentDurationSeconds) format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate) @@ -70,7 +75,7 @@ class FragmentedRecordingManager( codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE) - // Create muxer with callbacks and orientation + // Create muxer with callbacks, orientation, and fps val muxer = HlsMuxer( outputDirectory = outputDirectory, callback = object : HlsMuxer.Callback { @@ -82,7 +87,8 @@ class FragmentedRecordingManager( callbacks.onVideoChunkReady(file, index, durationUs) } }, - orientationDegrees = recordingOrientationDegrees + orientationDegrees = recordingOrientationDegrees, + fps = effectiveFps ) muxer.setSegmentDuration(segmentDurationSeconds * 1_000_000L) diff --git a/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt b/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt index a72cca7..7b9787e 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt @@ -27,7 +27,8 @@ import java.nio.ByteBuffer class HlsMuxer( private val outputDirectory: File, private val callback: Callback, - private val orientationDegrees: Int = 0 + private val orientationDegrees: Int = 0, + private val fps: Int = 30 ) { companion object { private const val TAG = "HlsMuxer" @@ -41,7 +42,7 @@ class HlsMuxer( // Configuration private var targetSegmentDurationUs: Long = DEFAULT_SEGMENT_DURATION_US - private var timescale: Int = 30000 // Default, updated from format + private var timescale: Int = 30000 // Default, updated in addTrack() to fps * 1000 // State private var state = State.UNINITIALIZED @@ -54,6 +55,14 @@ class HlsMuxer( private var segmentStartTimeUs = -1L private var lastPresentationTimeUs = 0L + // Timestamp normalization - MediaCodec timestamps are device uptime, not starting from 0 + private var firstPresentationTimeUs = -1L + + // Actual fps detection from frame timestamps + private var detectedFps: Int? = null + private var fpsDetectionSamples = mutableListOf() + private val FPS_DETECTION_SAMPLE_COUNT = 30 + private enum class State { UNINITIALIZED, INITIALIZED, @@ -69,6 +78,29 @@ class HlsMuxer( val isKeyFrame: Boolean ) + // ==================== Timestamp Normalization ==================== + + /** + * Normalizes a presentation timestamp to start from 0. + * The first timestamp received becomes time 0, and all subsequent + * timestamps are relative to that. + * + * This is necessary because MediaCodec timestamps are based on device uptime, + * not starting from 0. HLS players expect timestamps to start at or near 0. + */ + private fun normalizeTimestamp(rawPresentationTimeUs: Long): Long { + if (firstPresentationTimeUs < 0) { + firstPresentationTimeUs = rawPresentationTimeUs + Log.d(TAG, "First timestamp captured: ${rawPresentationTimeUs}us (${rawPresentationTimeUs / 1_000_000.0}s), normalizing to 0") + } + val normalized = rawPresentationTimeUs - firstPresentationTimeUs + // Log first few normalizations to debug + if (normalized < 1_000_000) { // First second + Log.d(TAG, "Timestamp: raw=${rawPresentationTimeUs}us -> normalized=${normalized}us") + } + return normalized + } + // ==================== Annex-B to AVCC Conversion ==================== /** @@ -201,13 +233,11 @@ class HlsMuxer( trackFormat = format - // Extract timescale from frame rate - val fps = try { - format.getInteger(MediaFormat.KEY_FRAME_RATE) - } catch (e: Exception) { - 30 - } - timescale = fps * 1000 // Use fps * 1000 for good precision + // Use fps * 1000 as timescale for good precision (1000 timescale units per frame). + // This ensures accurate sample durations without integer truncation issues. + // Note: ffprobe may report r_frame_rate based on timescale, so the backend + // should use the explicit framesPerSecond from the API mutation, not ffprobe. + timescale = fps * 1000 state = State.INITIALIZED @@ -215,7 +245,7 @@ class HlsMuxer( val formatHeight = try { format.getInteger(MediaFormat.KEY_HEIGHT) } catch (e: Exception) { -1 } Log.d(TAG, "Added track: ${format.getString(MediaFormat.KEY_MIME)}, " + "encoder output: ${formatWidth}x${formatHeight}, " + - "timescale=$timescale, orientation=$orientationDegrees°") + "fps=$fps, timescale=$timescale, orientation=$orientationDegrees°") return 0 // Single track, index 0 } @@ -227,16 +257,30 @@ class HlsMuxer( check(state == State.INITIALIZED) { "Must call addTrack() before start()" } val format = trackFormat ?: throw IllegalStateException("No track format") - // Create output directory if needed + // Create output directory if needed, with proper error handling if (!outputDirectory.exists()) { - outputDirectory.mkdirs() + val created = outputDirectory.mkdirs() + if (!created && !outputDirectory.exists()) { + throw IllegalStateException( + "Failed to create output directory: ${outputDirectory.absolutePath}. " + + "Parent exists: ${outputDirectory.parentFile?.exists()}, " + + "Parent path: ${outputDirectory.parentFile?.absolutePath}" + ) + } + Log.d(TAG, "Created output directory: ${outputDirectory.absolutePath}") } // Write init segment val initBytes = buildInitSegment(format) val initFile = File(outputDirectory, "init.mp4") FileOutputStream(initFile).use { it.write(initBytes) } + + // Log frame rate metadata for debugging + val defaultSampleDuration = timescale / fps Log.d(TAG, "Created init segment: ${initFile.absolutePath} (${initBytes.size} bytes)") + Log.d(TAG, "Frame rate metadata: timescale=$timescale, fps=$fps, " + + "default_sample_duration=$defaultSampleDuration (ffprobe should calculate ${timescale}/${defaultSampleDuration}=${fps}fps)") + callback.onInitSegmentReady(initFile) state = State.STARTED @@ -259,13 +303,40 @@ class HlsMuxer( } val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0 - val presentationTimeUs = bufferInfo.presentationTimeUs + // Normalize timestamp to start from 0 (MediaCodec uses device uptime) + val presentationTimeUs = normalizeTimestamp(bufferInfo.presentationTimeUs) + + // Detect actual fps from first N samples + if (detectedFps == null) { + fpsDetectionSamples.add(presentationTimeUs) + if (fpsDetectionSamples.size >= FPS_DETECTION_SAMPLE_COUNT) { + val elapsed = fpsDetectionSamples.last() - fpsDetectionSamples.first() + if (elapsed > 0) { + val actualFps = ((FPS_DETECTION_SAMPLE_COUNT - 1) * 1_000_000.0 / elapsed).toInt() + detectedFps = actualFps + if (kotlin.math.abs(actualFps - fps) > 5) { + Log.w(TAG, "Actual fps ($actualFps) differs significantly from configured fps ($fps)! " + + "This may cause processing issues if backend uses configured fps.") + } else { + Log.d(TAG, "Detected actual fps: $actualFps (configured: $fps)") + } + } + fpsDetectionSamples.clear() // Free memory + } + } // Initialize segment start time if (segmentStartTimeUs < 0) { segmentStartTimeUs = presentationTimeUs } + // Update duration of previous sample BEFORE finalization check + // This ensures the last sample has correct duration when segment is finalized + if (pendingSamples.isNotEmpty()) { + val lastSample = pendingSamples.last() + lastSample.durationUs = presentationTimeUs - lastSample.presentationTimeUs + } + // Check if we should finalize current segment (at keyframe boundaries) if (isKeyFrame && pendingSamples.isNotEmpty()) { val segmentDurationUs = presentationTimeUs - segmentStartTimeUs @@ -284,12 +355,6 @@ class HlsMuxer( // Convert Annex-B (start codes) to AVCC (length prefixes) val data = convertAnnexBToAvcc(rawData) - // Update duration of previous sample - if (pendingSamples.isNotEmpty()) { - val lastSample = pendingSamples.last() - lastSample.durationUs = presentationTimeUs - lastSample.presentationTimeUs - } - // Estimate duration (will be corrected by next sample) val estimatedDurationUs = if (lastPresentationTimeUs > 0) { presentationTimeUs - lastPresentationTimeUs @@ -351,6 +416,7 @@ class HlsMuxer( val durationUs = (lastSample.presentationTimeUs - firstPts) + lastSample.durationUs Log.d(TAG, "Created segment $segmentIndex: samples=${pendingSamples.size}, " + + "baseDecodeTime=${baseDecodeTimeUs}us (${baseDecodeTimeUs / 1_000_000.0}s), " + "duration=${durationUs / 1000}ms, size=${fragmentBytes.size} bytes") callback.onMediaSegmentReady(segmentFile, segmentIndex, durationUs) @@ -649,7 +715,7 @@ class HlsMuxer( private fun buildStblBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { val content = ByteArrayOutputStream() content.write(buildStsdBox(width, height, sps, pps)) - content.write(buildEmptySttsBox()) + content.write(buildSttsBox()) // Contains default timing for ffprobe frame rate detection content.write(buildEmptyStscBox()) content.write(buildEmptyStszBox()) content.write(buildEmptyStcoBox()) @@ -716,11 +782,21 @@ class HlsMuxer( return wrapBox("avcC", output.toByteArray()) } - private fun buildEmptySttsBox(): ByteArray { + private fun buildSttsBox(): ByteArray { val output = ByteArrayOutputStream() val dos = DataOutputStream(output) + + // For fragmented MP4, stts is normally empty as timing is in trun boxes. + // However, ffprobe uses stts to calculate r_frame_rate when present. + // We add a single entry with the default sample duration so ffprobe + // can derive: r_frame_rate = timescale / sample_delta = 30000/1000 = 30 + val defaultSampleDuration = timescale / fps + dos.writeInt(0) // version & flags - dos.writeInt(0) // entry count + dos.writeInt(1) // entry count (1 entry for default timing) + dos.writeInt(1) // sample_count (indicates this is the default duration) + dos.writeInt(defaultSampleDuration) // sample_delta in timescale units + return wrapBox("stts", output.toByteArray()) } @@ -757,10 +833,15 @@ class HlsMuxer( val output = ByteArrayOutputStream() val dos = DataOutputStream(output) + // Calculate default sample duration so ffprobe can derive correct fps + // fps = timescale / default_sample_duration + // At 30fps with timescale=30000: duration=1000, ffprobe calculates 30000/1000=30 + val defaultSampleDuration = timescale / fps + dos.writeInt(0) // version & flags dos.writeInt(1) // track ID dos.writeInt(1) // default sample description index - dos.writeInt(0) // default sample duration + dos.writeInt(defaultSampleDuration) // default sample duration dos.writeInt(0) // default sample size dos.writeInt(0) // default sample flags diff --git a/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt b/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt index fc2e2bb..a4585da 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt @@ -44,7 +44,14 @@ class RecordingSession( data class Video(val path: String, val durationMs: Long, val size: Size) - private val outputPath: File = File(filePath) + // Strip file:// prefix if present (expo-file-system returns URIs with this prefix) + private val outputPath: File = File( + if (filePath.startsWith("file://")) { + filePath.removePrefix("file://") + } else { + filePath + } + ) private val bitRate = getBitRate()