From a2d218580c1b8eead41b129ae06972bdecc0c21a Mon Sep 17 00:00:00 2001 From: Ivan Malison Date: Thu, 18 Dec 2025 12:29:03 -0800 Subject: [PATCH 1/4] feat: Add fragmented MP4 (fMP4) support for Android MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements HLS-compatible fragmented MP4 recording on Android using AndroidX Media3 FragmentedMp4Muxer, matching the iOS implementation. Changes: - Add FragmentedRecordingManager for fMP4 segment output - Add ChunkedRecorderInterface to abstract recorder implementations - Add onInitSegmentReady callback for init segment (init.mp4) - Update onVideoChunkReady to include segment duration - RecordingSession now uses FragmentedRecordingManager by default 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- package/android/build.gradle | 4 + .../com/mrousavy/camera/CameraView+Events.kt | 15 +- .../java/com/mrousavy/camera/CameraView.kt | 8 +- .../com/mrousavy/camera/CameraViewManager.kt | 1 + .../com/mrousavy/camera/core/CameraSession.kt | 3 +- .../mrousavy/camera/core/ChunkedRecorder.kt | 12 +- .../camera/core/ChunkedRecorderInterface.kt | 15 + .../camera/core/FragmentedRecordingManager.kt | 332 ++++++++++++++++++ .../mrousavy/camera/core/RecordingSession.kt | 45 ++- 9 files changed, 415 insertions(+), 20 deletions(-) create mode 100644 package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorderInterface.kt create mode 100644 package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt diff --git a/package/android/build.gradle b/package/android/build.gradle index 8a009fc..1b3aa73 100644 --- a/package/android/build.gradle +++ b/package/android/build.gradle @@ -178,6 +178,10 @@ dependencies { implementation "com.facebook.react:react-android:+" implementation "org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3" + // Media3 muxer for fragmented MP4 (HLS-compatible) recording + implementation "androidx.media3:media3-muxer:1.5.0" + implementation "androidx.media3:media3-common:1.5.0" + if (enableCodeScanner) { // User enabled code-scanner, so we bundle the 2.4 MB model in the app. implementation 'com.google.mlkit:barcode-scanning:17.2.0' diff --git a/package/android/src/main/java/com/mrousavy/camera/CameraView+Events.kt b/package/android/src/main/java/com/mrousavy/camera/CameraView+Events.kt index 3384ef2..07f0b9c 100644 --- a/package/android/src/main/java/com/mrousavy/camera/CameraView+Events.kt +++ b/package/android/src/main/java/com/mrousavy/camera/CameraView+Events.kt @@ -40,15 +40,26 @@ fun CameraView.invokeOnStopped() { this.sendEvent(event) } -fun CameraView.invokeOnChunkReady(filepath: File, index: Int) { - Log.e(CameraView.TAG, "invokeOnError(...):") +fun CameraView.invokeOnChunkReady(filepath: File, index: Int, durationUs: Long?) { + Log.i(CameraView.TAG, "invokeOnChunkReady(...): index=$index, filepath=$filepath, durationUs=$durationUs") val event = Arguments.createMap() event.putInt("index", index) event.putString("filepath", filepath.toString()) + if (durationUs != null) { + event.putDouble("duration", durationUs / 1_000_000.0) // Convert microseconds to seconds + } val reactContext = context as ReactContext reactContext.getJSModule(RCTEventEmitter::class.java).receiveEvent(id, "onVideoChunkReady", event) } +fun CameraView.invokeOnInitReady(filepath: File) { + Log.i(CameraView.TAG, "invokeOnInitReady(...): filepath=$filepath") + val event = Arguments.createMap() + event.putString("filepath", filepath.toString()) + val reactContext = context as ReactContext + reactContext.getJSModule(RCTEventEmitter::class.java).receiveEvent(id, "onInitReady", event) +} + fun CameraView.invokeOnError(error: Throwable) { Log.e(CameraView.TAG, "invokeOnError(...):") error.printStackTrace() diff --git a/package/android/src/main/java/com/mrousavy/camera/CameraView.kt b/package/android/src/main/java/com/mrousavy/camera/CameraView.kt index 3569cd3..af4f2c6 100644 --- a/package/android/src/main/java/com/mrousavy/camera/CameraView.kt +++ b/package/android/src/main/java/com/mrousavy/camera/CameraView.kt @@ -271,8 +271,12 @@ class CameraView(context: Context) : invokeOnStopped() } - override fun onVideoChunkReady(filepath: File, index: Int) { - invokeOnChunkReady(filepath, index) + override fun onVideoChunkReady(filepath: File, index: Int, durationUs: Long?) { + invokeOnChunkReady(filepath, index, durationUs) + } + + override fun onInitSegmentReady(filepath: File) { + invokeOnInitReady(filepath) } override fun onCodeScanned(codes: List, scannerFrame: CodeScannerFrame) { diff --git a/package/android/src/main/java/com/mrousavy/camera/CameraViewManager.kt b/package/android/src/main/java/com/mrousavy/camera/CameraViewManager.kt index af48c52..a6ac35e 100644 --- a/package/android/src/main/java/com/mrousavy/camera/CameraViewManager.kt +++ b/package/android/src/main/java/com/mrousavy/camera/CameraViewManager.kt @@ -32,6 +32,7 @@ class CameraViewManager : ViewGroupManager() { .put("cameraError", MapBuilder.of("registrationName", "onError")) .put("cameraCodeScanned", MapBuilder.of("registrationName", "onCodeScanned")) .put("onVideoChunkReady", MapBuilder.of("registrationName", "onVideoChunkReady")) + .put("onInitReady", MapBuilder.of("registrationName", "onInitReady")) .build()?.toMutableMap() override fun getName(): String = TAG diff --git a/package/android/src/main/java/com/mrousavy/camera/core/CameraSession.kt b/package/android/src/main/java/com/mrousavy/camera/core/CameraSession.kt index 7c9924c..770812f 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/CameraSession.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/CameraSession.kt @@ -513,7 +513,8 @@ class CameraSession(private val context: Context, private val cameraManager: Cam fun onInitialized() fun onStarted() fun onStopped() - fun onVideoChunkReady(filepath: File, index: Int) + fun onVideoChunkReady(filepath: File, index: Int, durationUs: Long?) + fun onInitSegmentReady(filepath: File) fun onCodeScanned(codes: List, scannerFrame: CodeScannerFrame) } } diff --git a/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorder.kt b/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorder.kt index 9444216..64c3389 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorder.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorder.kt @@ -14,7 +14,7 @@ import java.io.File import java.nio.ByteBuffer class ChunkedRecordingManager(private val encoder: MediaCodec, private val outputDirectory: File, private val orientationHint: Int, private val iFrameInterval: Int, private val callbacks: CameraSession.Callback) : - MediaCodec.Callback() { + MediaCodec.Callback(), ChunkedRecorderInterface { companion object { private const val TAG = "ChunkedRecorder" @@ -73,7 +73,7 @@ class ChunkedRecordingManager(private val encoder: MediaCodec, private val outpu private val targetDurationUs = iFrameInterval * 1000000 - val surface: Surface = encoder.createInputSurface() + override val surface: Surface = encoder.createInputSurface() init { if (!this.outputDirectory.exists()) { @@ -95,7 +95,9 @@ class ChunkedRecordingManager(private val encoder: MediaCodec, private val outpu fun finish() { muxer.stop() muxer.release() - callbacks.onVideoChunkReady(filepath, chunkIndex) + // Calculate duration from start time - this is approximate + // The new FragmentedRecordingManager provides accurate duration + callbacks.onVideoChunkReady(filepath, chunkIndex, null) } } @@ -133,12 +135,12 @@ class ChunkedRecordingManager(private val encoder: MediaCodec, private val outpu return bufferInfo.presentationTimeUs - context.startTimeUs } - fun start() { + override fun start() { encoder.start() recording = true } - fun finish() { + override fun finish() { synchronized(this) { muxerContext?.finish() recording = false diff --git a/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorderInterface.kt b/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorderInterface.kt new file mode 100644 index 0000000..3464dad --- /dev/null +++ b/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorderInterface.kt @@ -0,0 +1,15 @@ +package com.mrousavy.camera.core + +import android.view.Surface + +/** + * Common interface for chunked video recorders. + * Implemented by both ChunkedRecordingManager (regular MP4) and + * FragmentedRecordingManager (HLS-compatible fMP4). + */ +interface ChunkedRecorderInterface { + val surface: Surface + + fun start() + fun finish() +} diff --git a/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt b/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt new file mode 100644 index 0000000..6157808 --- /dev/null +++ b/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt @@ -0,0 +1,332 @@ +package com.mrousavy.camera.core + +import android.media.MediaCodec +import android.media.MediaCodec.BufferInfo +import android.media.MediaCodecInfo +import android.media.MediaFormat +import android.util.Log +import android.util.Size +import android.view.Surface +import androidx.media3.common.Format +import androidx.media3.common.MimeTypes +import androidx.media3.common.util.UnstableApi +import androidx.media3.muxer.FragmentedMp4Muxer +import androidx.media3.muxer.Muxer +import com.mrousavy.camera.types.Orientation +import com.mrousavy.camera.types.RecordVideoOptions +import java.io.File +import java.io.FileOutputStream +import java.nio.ByteBuffer + +/** + * A recording manager that produces HLS-compatible fragmented MP4 segments. + * + * This produces output similar to the iOS implementation: + * - An initialization segment (init.mp4) containing codec configuration + * - Numbered data segments (0.mp4, 1.mp4, ...) containing media data + * + * Uses AndroidX Media3's FragmentedMp4Muxer which produces proper fMP4 output. + */ +@UnstableApi +class FragmentedRecordingManager( + private val encoder: MediaCodec, + private val outputDirectory: File, + private val orientationDegrees: Int, + private val targetSegmentDurationUs: Long, + private val callbacks: CameraSession.Callback +) : MediaCodec.Callback(), ChunkedRecorderInterface { + + companion object { + private const val TAG = "FragmentedRecorder" + + fun fromParams( + callbacks: CameraSession.Callback, + size: Size, + enableAudio: Boolean, + fps: Int? = null, + cameraOrientation: Orientation, + bitRate: Int, + options: RecordVideoOptions, + outputDirectory: File, + segmentDurationSeconds: Int = 6 + ): FragmentedRecordingManager { + val mimeType = options.videoCodec.toMimeType() + val cameraOrientationDegrees = cameraOrientation.toDegrees() + val recordingOrientationDegrees = (options.orientation ?: Orientation.PORTRAIT).toDegrees() + + val (width, height) = if (cameraOrientation.isLandscape()) { + size.height to size.width + } else { + size.width to size.height + } + + val format = MediaFormat.createVideoFormat(mimeType, width, height) + val codec = MediaCodec.createEncoderByType(mimeType) + + format.setInteger( + MediaFormat.KEY_COLOR_FORMAT, + MediaCodecInfo.CodecCapabilities.COLOR_FormatSurface + ) + fps?.apply { + format.setInteger(MediaFormat.KEY_FRAME_RATE, this) + } + // I-frame interval affects segment boundaries + format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, segmentDurationSeconds) + format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate) + + Log.d(TAG, "Video Format: $format, camera orientation $cameraOrientationDegrees, recordingOrientation: $recordingOrientationDegrees") + + codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE) + + return FragmentedRecordingManager( + codec, + outputDirectory, + recordingOrientationDegrees, + segmentDurationSeconds * 1_000_000L, + callbacks + ) + } + } + + // State management + private var chunkIndex = 0 + private var encodedFormat: MediaFormat? = null + private var recording = false + + // Segment tracking + private var segmentContext: SegmentContext? = null + private var initSegmentEmitted = false + + override val surface: Surface = encoder.createInputSurface() + + init { + if (!outputDirectory.exists()) { + outputDirectory.mkdirs() + } + encoder.setCallback(this) + } + + /** + * Context for a single data segment being written. + * Init segments are created separately via createInitSegment(). + */ + private inner class SegmentContext( + private val format: MediaFormat, + private val segmentIndex: Int + ) { + private val filename = "$segmentIndex.mp4" + private val file = File(outputDirectory, filename) + private val outputStream = FileOutputStream(file) + private val muxer = FragmentedMp4Muxer.Builder(outputStream).build() + private lateinit var videoTrack: Muxer.TrackToken + private var startTimeUs: Long = -1L + private var lastTimeUs: Long = 0L + private var sampleCount = 0 + + init { + val media3Format = convertToMedia3Format(format) + videoTrack = muxer.addTrack(media3Format) + Log.d(TAG, "Created segment context: $filename") + } + + fun writeSample(buffer: ByteBuffer, bufferInfo: BufferInfo): Boolean { + if (startTimeUs < 0) { + startTimeUs = bufferInfo.presentationTimeUs + } + lastTimeUs = bufferInfo.presentationTimeUs + + val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0 + + muxer.writeSampleData(videoTrack, buffer, bufferInfo) + sampleCount++ + + // Check if we should start a new segment at the next keyframe + if (isKeyFrame && sampleCount > 1) { + val segmentDurationUs = bufferInfo.presentationTimeUs - startTimeUs + if (segmentDurationUs >= targetSegmentDurationUs) { + return true // Signal to create new segment + } + } + + return false + } + + fun finish(): Long { + try { + muxer.close() + outputStream.close() + } catch (e: Exception) { + Log.e(TAG, "Error closing segment", e) + } + + val durationUs = if (lastTimeUs > startTimeUs) lastTimeUs - startTimeUs else 0L + callbacks.onVideoChunkReady(file, segmentIndex, durationUs) + + Log.d(TAG, "Finished segment: $filename, samples=$sampleCount, duration=${durationUs/1000}ms") + return durationUs + } + } + + private fun createNewSegment() { + val format = encodedFormat + if (format == null) { + Log.e(TAG, "Cannot create segment: encodedFormat is null") + return + } + + // Close previous segment + segmentContext?.finish() + + // Create new data segment (init segments are created separately) + segmentContext = SegmentContext(format, chunkIndex) + chunkIndex++ + } + + override fun start() { + encoder.start() + recording = true + } + + override fun finish() { + synchronized(this) { + recording = false + segmentContext?.finish() + segmentContext = null + try { + encoder.stop() + encoder.release() + } catch (e: Exception) { + Log.e(TAG, "Error stopping encoder", e) + } + } + } + + // MediaCodec.Callback methods + override fun onInputBufferAvailable(codec: MediaCodec, index: Int) { + // Not used for Surface input + } + + override fun onOutputBufferAvailable(codec: MediaCodec, index: Int, bufferInfo: BufferInfo) { + synchronized(this) { + if (!recording) { + encoder.releaseOutputBuffer(index, false) + return + } + + val encodedData = encoder.getOutputBuffer(index) + if (encodedData == null) { + Log.e(TAG, "getOutputBuffer returned null") + encoder.releaseOutputBuffer(index, false) + return + } + + // Wait until init segment is emitted (happens in onOutputFormatChanged) + if (!initSegmentEmitted) { + encoder.releaseOutputBuffer(index, false) + return + } + + // Create first data segment if needed + if (segmentContext == null) { + createNewSegment() + } + + val context = segmentContext + if (context == null) { + encoder.releaseOutputBuffer(index, false) + return + } + + try { + val shouldStartNewSegment = context.writeSample(encodedData, bufferInfo) + if (shouldStartNewSegment) { + createNewSegment() + // Write this keyframe to the new segment as well + segmentContext?.writeSample(encodedData, bufferInfo) + } + } catch (e: Exception) { + Log.e(TAG, "Error writing sample", e) + } + + encoder.releaseOutputBuffer(index, false) + } + } + + override fun onError(codec: MediaCodec, e: MediaCodec.CodecException) { + Log.e(TAG, "Codec error: ${e.message}") + } + + override fun onOutputFormatChanged(codec: MediaCodec, format: MediaFormat) { + Log.i(TAG, "Output format changed: $format") + encodedFormat = format + + // Create the init segment immediately when we get the format + // This produces an fMP4 file with just ftyp + moov (no samples) + if (!initSegmentEmitted) { + createInitSegment(format) + initSegmentEmitted = true + } + } + + /** + * Creates an initialization segment containing only codec configuration (ftyp + moov). + * This is done by creating a muxer, adding the track, and immediately closing it + * without writing any samples. + */ + private fun createInitSegment(format: MediaFormat) { + val initFile = File(outputDirectory, "init.mp4") + try { + val outputStream = FileOutputStream(initFile) + val muxer = FragmentedMp4Muxer.Builder(outputStream).build() + + // Convert and add the track + val media3Format = convertToMedia3Format(format) + muxer.addTrack(media3Format) + + // Close immediately - this writes just the header (ftyp + moov) + muxer.close() + outputStream.close() + + Log.d(TAG, "Created init segment: ${initFile.absolutePath}") + callbacks.onInitSegmentReady(initFile) + } catch (e: Exception) { + Log.e(TAG, "Error creating init segment", e) + } + } + + private fun convertToMedia3Format(mediaFormat: MediaFormat): Format { + val mimeType = mediaFormat.getString(MediaFormat.KEY_MIME) ?: MimeTypes.VIDEO_H264 + val width = mediaFormat.getInteger(MediaFormat.KEY_WIDTH) + val height = mediaFormat.getInteger(MediaFormat.KEY_HEIGHT) + val bitRate = try { mediaFormat.getInteger(MediaFormat.KEY_BIT_RATE) } catch (e: Exception) { -1 } + val frameRate = try { mediaFormat.getInteger(MediaFormat.KEY_FRAME_RATE) } catch (e: Exception) { -1 } + + // Get CSD (Codec Specific Data) if available - required for init segment + val csd0 = mediaFormat.getByteBuffer("csd-0") + val csd1 = mediaFormat.getByteBuffer("csd-1") + + val initData = mutableListOf() + csd0?.let { + val bytes = ByteArray(it.remaining()) + it.duplicate().get(bytes) + initData.add(bytes) + } + csd1?.let { + val bytes = ByteArray(it.remaining()) + it.duplicate().get(bytes) + initData.add(bytes) + } + + return Format.Builder() + .setSampleMimeType(mimeType) + .setWidth(width) + .setHeight(height) + .setRotationDegrees(orientationDegrees) + .apply { + if (bitRate > 0) setAverageBitrate(bitRate) + if (frameRate > 0) setFrameRate(frameRate.toFloat()) + if (initData.isNotEmpty()) setInitializationData(initData) + } + .build() + } +} diff --git a/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt b/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt index 1d61188..4f3331e 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt @@ -4,6 +4,7 @@ import android.content.Context import android.util.Log import android.util.Size import android.view.Surface +import androidx.media3.common.util.UnstableApi import com.facebook.common.statfs.StatFsHelper import com.mrousavy.camera.extensions.getRecommendedBitRate import com.mrousavy.camera.types.Orientation @@ -14,6 +15,8 @@ import android.os.Environment import java.text.SimpleDateFormat import java.util.Locale import java.util.Date + +@UnstableApi class RecordingSession( context: Context, val cameraId: String, @@ -27,6 +30,8 @@ class RecordingSession( private val callback: (video: Video) -> Unit, private val onError: (error: CameraError) -> Unit, private val allCallbacks: CameraSession.Callback, + // Use the new FragmentedMp4Muxer-based recorder for HLS-compatible output + private val useFragmentedMp4: Boolean = true ) { companion object { private const val TAG = "RecordingSession" @@ -34,6 +39,9 @@ class RecordingSession( private const val AUDIO_SAMPLING_RATE = 44_100 private const val AUDIO_BIT_RATE = 16 * AUDIO_SAMPLING_RATE private const val AUDIO_CHANNELS = 1 + + // Segment duration in seconds (matching iOS default of 6 seconds) + private const val SEGMENT_DURATION_SECONDS = 6 } data class Video(val path: String, val durationMs: Long, val size: Size) @@ -41,16 +49,33 @@ class RecordingSession( private val outputPath: File = File(filePath) private val bitRate = getBitRate() - private val recorder = ChunkedRecordingManager.fromParams( - allCallbacks, - size, - enableAudio, - fps, - cameraOrientation, - bitRate, - options, - outputPath - ) + + // Use FragmentedRecordingManager for HLS-compatible fMP4 output, + // or fall back to ChunkedRecordingManager for regular MP4 chunks + private val recorder: ChunkedRecorderInterface = if (useFragmentedMp4) { + FragmentedRecordingManager.fromParams( + allCallbacks, + size, + enableAudio, + fps, + cameraOrientation, + bitRate, + options, + outputPath, + SEGMENT_DURATION_SECONDS + ) + } else { + ChunkedRecordingManager.fromParams( + allCallbacks, + size, + enableAudio, + fps, + cameraOrientation, + bitRate, + options, + outputPath + ) + } private var startTime: Long? = null val surface: Surface get() { -- 2.49.1 From e60c1a4eb174dff6b338f961026966693096cbc7 Mon Sep 17 00:00:00 2001 From: Ivan Malison Date: Sun, 21 Dec 2025 16:45:04 -0800 Subject: [PATCH 2/4] Write our own muxer to make hls uupload actually work --- package/android/build.gradle | 4 - .../camera/core/FragmentedRecordingManager.kt | 274 ++---- .../java/com/mrousavy/camera/core/HlsMuxer.kt | 857 ++++++++++++++++++ .../mrousavy/camera/core/RecordingSession.kt | 4 +- 4 files changed, 916 insertions(+), 223 deletions(-) create mode 100644 package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt diff --git a/package/android/build.gradle b/package/android/build.gradle index 1b3aa73..8a009fc 100644 --- a/package/android/build.gradle +++ b/package/android/build.gradle @@ -178,10 +178,6 @@ dependencies { implementation "com.facebook.react:react-android:+" implementation "org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3" - // Media3 muxer for fragmented MP4 (HLS-compatible) recording - implementation "androidx.media3:media3-muxer:1.5.0" - implementation "androidx.media3:media3-common:1.5.0" - if (enableCodeScanner) { // User enabled code-scanner, so we bundle the 2.4 MB model in the app. implementation 'com.google.mlkit:barcode-scanning:17.2.0' diff --git a/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt b/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt index 6157808..8ad49d7 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt @@ -7,37 +7,25 @@ import android.media.MediaFormat import android.util.Log import android.util.Size import android.view.Surface -import androidx.media3.common.Format -import androidx.media3.common.MimeTypes -import androidx.media3.common.util.UnstableApi -import androidx.media3.muxer.FragmentedMp4Muxer -import androidx.media3.muxer.Muxer import com.mrousavy.camera.types.Orientation import com.mrousavy.camera.types.RecordVideoOptions import java.io.File -import java.io.FileOutputStream -import java.nio.ByteBuffer /** * A recording manager that produces HLS-compatible fragmented MP4 segments. * - * This produces output similar to the iOS implementation: - * - An initialization segment (init.mp4) containing codec configuration - * - Numbered data segments (0.mp4, 1.mp4, ...) containing media data - * - * Uses AndroidX Media3's FragmentedMp4Muxer which produces proper fMP4 output. + * Uses HlsMuxer (following Android's MediaMuxer pattern) to produce: + * - init.mp4: Initialization segment (ftyp + moov with mvex) + * - 0.mp4, 1.mp4, ...: Media segments (moof + mdat) */ -@UnstableApi class FragmentedRecordingManager( private val encoder: MediaCodec, - private val outputDirectory: File, - private val orientationDegrees: Int, - private val targetSegmentDurationUs: Long, - private val callbacks: CameraSession.Callback + private val muxer: HlsMuxer ) : MediaCodec.Callback(), ChunkedRecorderInterface { companion object { private const val TAG = "FragmentedRecorder" + private const val DEFAULT_SEGMENT_DURATION_SECONDS = 6 fun fromParams( callbacks: CameraSession.Callback, @@ -48,17 +36,20 @@ class FragmentedRecordingManager( bitRate: Int, options: RecordVideoOptions, outputDirectory: File, - segmentDurationSeconds: Int = 6 + segmentDurationSeconds: Int = DEFAULT_SEGMENT_DURATION_SECONDS ): FragmentedRecordingManager { val mimeType = options.videoCodec.toMimeType() val cameraOrientationDegrees = cameraOrientation.toDegrees() val recordingOrientationDegrees = (options.orientation ?: Orientation.PORTRAIT).toDegrees() - val (width, height) = if (cameraOrientation.isLandscape()) { - size.height to size.width - } else { - size.width to size.height - } + // Use size dimensions directly - the encoder output format will have the actual dimensions + // Don't swap based on orientation here; the camera pipeline handles that + val width = size.width + val height = size.height + + Log.d(TAG, "Input size: ${size.width}x${size.height}, " + + "cameraOrientation: $cameraOrientation ($cameraOrientationDegrees°), " + + "recordingOrientation: $recordingOrientationDegrees°") val format = MediaFormat.createVideoFormat(mimeType, width, height) val codec = MediaCodec.createEncoderByType(mimeType) @@ -67,121 +58,48 @@ class FragmentedRecordingManager( MediaFormat.KEY_COLOR_FORMAT, MediaCodecInfo.CodecCapabilities.COLOR_FormatSurface ) - fps?.apply { - format.setInteger(MediaFormat.KEY_FRAME_RATE, this) - } - // I-frame interval affects segment boundaries + + val effectiveFps = fps ?: 30 + format.setInteger(MediaFormat.KEY_FRAME_RATE, effectiveFps) format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, segmentDurationSeconds) format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate) - Log.d(TAG, "Video Format: $format, camera orientation $cameraOrientationDegrees, recordingOrientation: $recordingOrientationDegrees") + Log.d(TAG, "Video Format: $format, orientation: $recordingOrientationDegrees") codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE) - return FragmentedRecordingManager( - codec, - outputDirectory, - recordingOrientationDegrees, - segmentDurationSeconds * 1_000_000L, - callbacks + // Create muxer with callbacks and orientation + val muxer = HlsMuxer( + outputDirectory = outputDirectory, + callback = object : HlsMuxer.Callback { + override fun onInitSegmentReady(file: File) { + callbacks.onInitSegmentReady(file) + } + + override fun onMediaSegmentReady(file: File, index: Int, durationUs: Long) { + callbacks.onVideoChunkReady(file, index, durationUs) + } + }, + orientationDegrees = recordingOrientationDegrees ) + muxer.setSegmentDuration(segmentDurationSeconds * 1_000_000L) + + Log.d(TAG, "Created HlsMuxer with orientation: $recordingOrientationDegrees degrees") + + return FragmentedRecordingManager(codec, muxer) } } - // State management - private var chunkIndex = 0 - private var encodedFormat: MediaFormat? = null private var recording = false - - // Segment tracking - private var segmentContext: SegmentContext? = null - private var initSegmentEmitted = false + private var muxerStarted = false + private var trackIndex = -1 override val surface: Surface = encoder.createInputSurface() init { - if (!outputDirectory.exists()) { - outputDirectory.mkdirs() - } encoder.setCallback(this) } - /** - * Context for a single data segment being written. - * Init segments are created separately via createInitSegment(). - */ - private inner class SegmentContext( - private val format: MediaFormat, - private val segmentIndex: Int - ) { - private val filename = "$segmentIndex.mp4" - private val file = File(outputDirectory, filename) - private val outputStream = FileOutputStream(file) - private val muxer = FragmentedMp4Muxer.Builder(outputStream).build() - private lateinit var videoTrack: Muxer.TrackToken - private var startTimeUs: Long = -1L - private var lastTimeUs: Long = 0L - private var sampleCount = 0 - - init { - val media3Format = convertToMedia3Format(format) - videoTrack = muxer.addTrack(media3Format) - Log.d(TAG, "Created segment context: $filename") - } - - fun writeSample(buffer: ByteBuffer, bufferInfo: BufferInfo): Boolean { - if (startTimeUs < 0) { - startTimeUs = bufferInfo.presentationTimeUs - } - lastTimeUs = bufferInfo.presentationTimeUs - - val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0 - - muxer.writeSampleData(videoTrack, buffer, bufferInfo) - sampleCount++ - - // Check if we should start a new segment at the next keyframe - if (isKeyFrame && sampleCount > 1) { - val segmentDurationUs = bufferInfo.presentationTimeUs - startTimeUs - if (segmentDurationUs >= targetSegmentDurationUs) { - return true // Signal to create new segment - } - } - - return false - } - - fun finish(): Long { - try { - muxer.close() - outputStream.close() - } catch (e: Exception) { - Log.e(TAG, "Error closing segment", e) - } - - val durationUs = if (lastTimeUs > startTimeUs) lastTimeUs - startTimeUs else 0L - callbacks.onVideoChunkReady(file, segmentIndex, durationUs) - - Log.d(TAG, "Finished segment: $filename, samples=$sampleCount, duration=${durationUs/1000}ms") - return durationUs - } - } - - private fun createNewSegment() { - val format = encodedFormat - if (format == null) { - Log.e(TAG, "Cannot create segment: encodedFormat is null") - return - } - - // Close previous segment - segmentContext?.finish() - - // Create new data segment (init segments are created separately) - segmentContext = SegmentContext(format, chunkIndex) - chunkIndex++ - } - override fun start() { encoder.start() recording = true @@ -190,8 +108,12 @@ class FragmentedRecordingManager( override fun finish() { synchronized(this) { recording = false - segmentContext?.finish() - segmentContext = null + + if (muxerStarted) { + muxer.stop() + muxer.release() + } + try { encoder.stop() encoder.release() @@ -202,6 +124,7 @@ class FragmentedRecordingManager( } // MediaCodec.Callback methods + override fun onInputBufferAvailable(codec: MediaCodec, index: Int) { // Not used for Surface input } @@ -213,37 +136,20 @@ class FragmentedRecordingManager( return } - val encodedData = encoder.getOutputBuffer(index) - if (encodedData == null) { + if (!muxerStarted) { + encoder.releaseOutputBuffer(index, false) + return + } + + val buffer = encoder.getOutputBuffer(index) + if (buffer == null) { Log.e(TAG, "getOutputBuffer returned null") encoder.releaseOutputBuffer(index, false) return } - // Wait until init segment is emitted (happens in onOutputFormatChanged) - if (!initSegmentEmitted) { - encoder.releaseOutputBuffer(index, false) - return - } - - // Create first data segment if needed - if (segmentContext == null) { - createNewSegment() - } - - val context = segmentContext - if (context == null) { - encoder.releaseOutputBuffer(index, false) - return - } - try { - val shouldStartNewSegment = context.writeSample(encodedData, bufferInfo) - if (shouldStartNewSegment) { - createNewSegment() - // Write this keyframe to the new segment as well - segmentContext?.writeSample(encodedData, bufferInfo) - } + muxer.writeSampleData(trackIndex, buffer, bufferInfo) } catch (e: Exception) { Log.e(TAG, "Error writing sample", e) } @@ -257,76 +163,12 @@ class FragmentedRecordingManager( } override fun onOutputFormatChanged(codec: MediaCodec, format: MediaFormat) { - Log.i(TAG, "Output format changed: $format") - encodedFormat = format + synchronized(this) { + Log.i(TAG, "Output format changed: $format") - // Create the init segment immediately when we get the format - // This produces an fMP4 file with just ftyp + moov (no samples) - if (!initSegmentEmitted) { - createInitSegment(format) - initSegmentEmitted = true + trackIndex = muxer.addTrack(format) + muxer.start() + muxerStarted = true } } - - /** - * Creates an initialization segment containing only codec configuration (ftyp + moov). - * This is done by creating a muxer, adding the track, and immediately closing it - * without writing any samples. - */ - private fun createInitSegment(format: MediaFormat) { - val initFile = File(outputDirectory, "init.mp4") - try { - val outputStream = FileOutputStream(initFile) - val muxer = FragmentedMp4Muxer.Builder(outputStream).build() - - // Convert and add the track - val media3Format = convertToMedia3Format(format) - muxer.addTrack(media3Format) - - // Close immediately - this writes just the header (ftyp + moov) - muxer.close() - outputStream.close() - - Log.d(TAG, "Created init segment: ${initFile.absolutePath}") - callbacks.onInitSegmentReady(initFile) - } catch (e: Exception) { - Log.e(TAG, "Error creating init segment", e) - } - } - - private fun convertToMedia3Format(mediaFormat: MediaFormat): Format { - val mimeType = mediaFormat.getString(MediaFormat.KEY_MIME) ?: MimeTypes.VIDEO_H264 - val width = mediaFormat.getInteger(MediaFormat.KEY_WIDTH) - val height = mediaFormat.getInteger(MediaFormat.KEY_HEIGHT) - val bitRate = try { mediaFormat.getInteger(MediaFormat.KEY_BIT_RATE) } catch (e: Exception) { -1 } - val frameRate = try { mediaFormat.getInteger(MediaFormat.KEY_FRAME_RATE) } catch (e: Exception) { -1 } - - // Get CSD (Codec Specific Data) if available - required for init segment - val csd0 = mediaFormat.getByteBuffer("csd-0") - val csd1 = mediaFormat.getByteBuffer("csd-1") - - val initData = mutableListOf() - csd0?.let { - val bytes = ByteArray(it.remaining()) - it.duplicate().get(bytes) - initData.add(bytes) - } - csd1?.let { - val bytes = ByteArray(it.remaining()) - it.duplicate().get(bytes) - initData.add(bytes) - } - - return Format.Builder() - .setSampleMimeType(mimeType) - .setWidth(width) - .setHeight(height) - .setRotationDegrees(orientationDegrees) - .apply { - if (bitRate > 0) setAverageBitrate(bitRate) - if (frameRate > 0) setFrameRate(frameRate.toFloat()) - if (initData.isNotEmpty()) setInitializationData(initData) - } - .build() - } } diff --git a/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt b/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt new file mode 100644 index 0000000..5c68668 --- /dev/null +++ b/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt @@ -0,0 +1,857 @@ +package com.mrousavy.camera.core + +import android.media.MediaCodec +import android.media.MediaFormat +import android.util.Log +import java.io.ByteArrayOutputStream +import java.io.DataOutputStream +import java.io.File +import java.io.FileOutputStream +import java.nio.ByteBuffer + +/** + * A muxer for creating HLS-compatible fragmented MP4 output. + * + * Follows the same pattern as Android's MediaMuxer: + * 1. Create muxer with output directory + * 2. addTrack() with MediaFormat + * 3. start() - writes init.mp4 + * 4. writeSampleData() for each encoded sample + * 5. stop() - finalizes last segment + * 6. release() - cleanup + * + * Produces: + * - init.mp4: Initialization segment (ftyp + moov with mvex) + * - 0.mp4, 1.mp4, ...: Media segments (moof + mdat) + */ +class HlsMuxer( + private val outputDirectory: File, + private val callback: Callback, + private val orientationDegrees: Int = 0 +) { + companion object { + private const val TAG = "HlsMuxer" + private const val DEFAULT_SEGMENT_DURATION_US = 6_000_000L // 6 seconds + } + + interface Callback { + fun onInitSegmentReady(file: File) + fun onMediaSegmentReady(file: File, index: Int, durationUs: Long) + } + + // Configuration + private var targetSegmentDurationUs: Long = DEFAULT_SEGMENT_DURATION_US + private var timescale: Int = 30000 // Default, updated from format + + // State + private var state = State.UNINITIALIZED + private var trackFormat: MediaFormat? = null + private var sequenceNumber = 1 + private var segmentIndex = 0 + + // Current segment data + private val pendingSamples = mutableListOf() + private var segmentStartTimeUs = -1L + private var lastPresentationTimeUs = 0L + + private enum class State { + UNINITIALIZED, + INITIALIZED, + STARTED, + STOPPED, + RELEASED + } + + private data class Sample( + val data: ByteArray, + val presentationTimeUs: Long, + var durationUs: Long, + val isKeyFrame: Boolean + ) + + // ==================== Annex-B to AVCC Conversion ==================== + + /** + * Converts H.264 data from Annex-B format to AVCC format. + * + * Annex-B uses start codes (00 00 00 01 or 00 00 01) to delimit NAL units. + * AVCC uses 4-byte big-endian length prefixes before each NAL unit. + * + * This conversion is required because: + * - MediaCodec outputs Annex-B format + * - fMP4/HLS requires AVCC format (as specified in avcC box with NAL length size = 4) + */ + private fun convertAnnexBToAvcc(annexBData: ByteArray): ByteArray { + val nalUnits = parseAnnexBNalUnits(annexBData) + if (nalUnits.isEmpty()) { + Log.w(TAG, "No NAL units found in sample, returning original data") + return annexBData + } + + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + for (nalUnit in nalUnits) { + // Write 4-byte big-endian length prefix + dos.writeInt(nalUnit.size) + // Write NAL unit data (without start code) + dos.write(nalUnit) + } + + return output.toByteArray() + } + + /** + * Parses Annex-B formatted data into individual NAL units. + * Returns list of NAL unit byte arrays (without start codes). + */ + private fun parseAnnexBNalUnits(data: ByteArray): List { + val nalUnits = mutableListOf() + var i = 0 + + while (i < data.size) { + // Find start code + val startCodeLength = findStartCode(data, i) + if (startCodeLength == 0) { + // No start code found at current position + // This might happen if data doesn't start with a start code + if (nalUnits.isEmpty() && i == 0) { + // Data might already be in AVCC format or malformed + // Try to detect AVCC format (first 4 bytes would be a reasonable length) + if (data.size >= 4) { + val possibleLength = ((data[0].toInt() and 0xFF) shl 24) or + ((data[1].toInt() and 0xFF) shl 16) or + ((data[2].toInt() and 0xFF) shl 8) or + (data[3].toInt() and 0xFF) + if (possibleLength > 0 && possibleLength <= data.size - 4) { + // Looks like AVCC format already, return original + Log.d(TAG, "Data appears to already be in AVCC format") + return emptyList() + } + } + } + i++ + continue + } + + val nalStart = i + startCodeLength + + // Find end of this NAL unit (start of next, or end of data) + var nalEnd = data.size + var j = nalStart + while (j < data.size - 2) { + val nextStartCode = findStartCode(data, j) + if (nextStartCode > 0) { + nalEnd = j + break + } + j++ + } + + if (nalEnd > nalStart) { + nalUnits.add(data.copyOfRange(nalStart, nalEnd)) + } + + i = nalEnd + } + + return nalUnits + } + + /** + * Checks for Annex-B start code at given position. + * Returns start code length (3 or 4) or 0 if no start code found. + */ + private fun findStartCode(data: ByteArray, offset: Int): Int { + if (offset + 4 <= data.size && + data[offset] == 0.toByte() && + data[offset + 1] == 0.toByte() && + data[offset + 2] == 0.toByte() && + data[offset + 3] == 1.toByte()) { + return 4 // 4-byte start code: 00 00 00 01 + } + if (offset + 3 <= data.size && + data[offset] == 0.toByte() && + data[offset + 1] == 0.toByte() && + data[offset + 2] == 1.toByte()) { + return 3 // 3-byte start code: 00 00 01 + } + return 0 + } + + /** + * Sets the target segment duration. + * Must be called before start(). + */ + fun setSegmentDuration(durationUs: Long) { + check(state == State.UNINITIALIZED || state == State.INITIALIZED) { + "Cannot set segment duration after start()" + } + targetSegmentDurationUs = durationUs + } + + /** + * Adds a track to the muxer. + * + * @param format The MediaFormat describing the track + * @return Track index (always 0 for now, single video track) + */ + fun addTrack(format: MediaFormat): Int { + check(state == State.UNINITIALIZED) { "addTrack() must be called before start()" } + + trackFormat = format + + // Extract timescale from frame rate + val fps = try { + format.getInteger(MediaFormat.KEY_FRAME_RATE) + } catch (e: Exception) { + 30 + } + timescale = fps * 1000 // Use fps * 1000 for good precision + + state = State.INITIALIZED + + val formatWidth = try { format.getInteger(MediaFormat.KEY_WIDTH) } catch (e: Exception) { -1 } + val formatHeight = try { format.getInteger(MediaFormat.KEY_HEIGHT) } catch (e: Exception) { -1 } + Log.d(TAG, "Added track: ${format.getString(MediaFormat.KEY_MIME)}, " + + "encoder output: ${formatWidth}x${formatHeight}, " + + "timescale=$timescale, orientation=$orientationDegrees°") + + return 0 // Single track, index 0 + } + + /** + * Starts the muxer, writing the initialization segment. + */ + fun start() { + check(state == State.INITIALIZED) { "Must call addTrack() before start()" } + val format = trackFormat ?: throw IllegalStateException("No track format") + + // Create output directory if needed + if (!outputDirectory.exists()) { + outputDirectory.mkdirs() + } + + // Write init segment + val initBytes = buildInitSegment(format) + val initFile = File(outputDirectory, "init.mp4") + FileOutputStream(initFile).use { it.write(initBytes) } + Log.d(TAG, "Created init segment: ${initFile.absolutePath} (${initBytes.size} bytes)") + callback.onInitSegmentReady(initFile) + + state = State.STARTED + } + + /** + * Writes sample data to the muxer. + * + * @param trackIndex Track index (must be 0) + * @param buffer The encoded sample data + * @param bufferInfo Sample metadata (size, presentation time, flags) + */ + fun writeSampleData(trackIndex: Int, buffer: ByteBuffer, bufferInfo: MediaCodec.BufferInfo) { + check(state == State.STARTED) { "Must call start() before writeSampleData()" } + check(trackIndex == 0) { "Invalid track index: $trackIndex" } + + // Skip codec config data (already in init segment) + if ((bufferInfo.flags and MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0) { + return + } + + val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0 + val presentationTimeUs = bufferInfo.presentationTimeUs + + // Initialize segment start time + if (segmentStartTimeUs < 0) { + segmentStartTimeUs = presentationTimeUs + } + + // Check if we should finalize current segment (at keyframe boundaries) + if (isKeyFrame && pendingSamples.isNotEmpty()) { + val segmentDurationUs = presentationTimeUs - segmentStartTimeUs + if (segmentDurationUs >= targetSegmentDurationUs) { + finalizeCurrentSegment() + segmentStartTimeUs = presentationTimeUs + } + } + + // Copy buffer data and convert from Annex-B to AVCC format + val rawData = ByteArray(bufferInfo.size) + buffer.position(bufferInfo.offset) + buffer.limit(bufferInfo.offset + bufferInfo.size) + buffer.get(rawData) + + // Convert Annex-B (start codes) to AVCC (length prefixes) + val data = convertAnnexBToAvcc(rawData) + + // Update duration of previous sample + if (pendingSamples.isNotEmpty()) { + val lastSample = pendingSamples.last() + lastSample.durationUs = presentationTimeUs - lastSample.presentationTimeUs + } + + // Estimate duration (will be corrected by next sample) + val estimatedDurationUs = if (lastPresentationTimeUs > 0) { + presentationTimeUs - lastPresentationTimeUs + } else { + 1_000_000L / 30 // Assume 30fps + } + + pendingSamples.add(Sample( + data = data, + presentationTimeUs = presentationTimeUs, + durationUs = estimatedDurationUs, + isKeyFrame = isKeyFrame + )) + + lastPresentationTimeUs = presentationTimeUs + } + + /** + * Stops the muxer, finalizing any pending segment. + */ + fun stop() { + check(state == State.STARTED) { "Muxer not started" } + + if (pendingSamples.isNotEmpty()) { + finalizeCurrentSegment() + } + + state = State.STOPPED + Log.d(TAG, "Muxer stopped, wrote $segmentIndex segments") + } + + /** + * Releases resources. + */ + fun release() { + if (state == State.STARTED) { + stop() + } + pendingSamples.clear() + state = State.RELEASED + } + + /** + * Finalizes the current segment and writes it to disk. + */ + private fun finalizeCurrentSegment() { + if (pendingSamples.isEmpty()) return + + try { + val baseDecodeTimeUs = pendingSamples.first().presentationTimeUs + val fragmentBytes = buildMediaSegment(pendingSamples, sequenceNumber, baseDecodeTimeUs) + + val segmentFile = File(outputDirectory, "$segmentIndex.mp4") + FileOutputStream(segmentFile).use { it.write(fragmentBytes) } + + // Calculate duration + val firstPts = pendingSamples.first().presentationTimeUs + val lastSample = pendingSamples.last() + val durationUs = (lastSample.presentationTimeUs - firstPts) + lastSample.durationUs + + Log.d(TAG, "Created segment $segmentIndex: samples=${pendingSamples.size}, " + + "duration=${durationUs / 1000}ms, size=${fragmentBytes.size} bytes") + + callback.onMediaSegmentReady(segmentFile, segmentIndex, durationUs) + + segmentIndex++ + sequenceNumber++ + pendingSamples.clear() + + } catch (e: Exception) { + Log.e(TAG, "Error finalizing segment $segmentIndex", e) + } + } + + // ==================== Init Segment Building ==================== + + /** + * Builds the initialization segment (ftyp + moov). + */ + private fun buildInitSegment(format: MediaFormat): ByteArray { + val width = format.getInteger(MediaFormat.KEY_WIDTH) + val height = format.getInteger(MediaFormat.KEY_HEIGHT) + + val sps = format.getByteBuffer("csd-0")?.let { extractNalUnit(it) } + ?: throw IllegalArgumentException("Missing SPS (csd-0)") + val pps = format.getByteBuffer("csd-1")?.let { extractNalUnit(it) } + ?: throw IllegalArgumentException("Missing PPS (csd-1)") + + val output = ByteArrayOutputStream() + + // ftyp + output.write(buildFtypBox()) + + // moov + output.write(buildMoovBox(width, height, sps, pps)) + + return output.toByteArray() + } + + private fun extractNalUnit(buffer: ByteBuffer): ByteArray { + val data = ByteArray(buffer.remaining()) + buffer.duplicate().get(data) + + // Strip start code prefix (0x00000001 or 0x000001) + return when { + data.size >= 4 && data[0] == 0.toByte() && data[1] == 0.toByte() && + data[2] == 0.toByte() && data[3] == 1.toByte() -> data.copyOfRange(4, data.size) + data.size >= 3 && data[0] == 0.toByte() && data[1] == 0.toByte() && + data[2] == 1.toByte() -> data.copyOfRange(3, data.size) + else -> data + } + } + + private fun buildFtypBox(): ByteArray { + val brands = listOf("isom", "iso5", "iso6", "avc1", "mp41", "dash") + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + val size = 8 + 4 + 4 + (brands.size * 4) + dos.writeInt(size) + dos.writeBytes("ftyp") + dos.writeBytes("isom") // major brand + dos.writeInt(0x200) // minor version + brands.forEach { dos.writeBytes(it) } + + return output.toByteArray() + } + + private fun buildMoovBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val content = ByteArrayOutputStream() + + content.write(buildMvhdBox()) + content.write(buildTrakBox(width, height, sps, pps)) + content.write(buildMvexBox()) + + return wrapBox("moov", content.toByteArray()) + } + + private fun buildMvhdBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(0) // creation time + dos.writeInt(0) // modification time + dos.writeInt(timescale) // timescale + dos.writeInt(0) // duration + dos.writeInt(0x00010000) // rate = 1.0 + dos.writeShort(0x0100) // volume = 1.0 + dos.writeShort(0) // reserved + dos.writeInt(0) // reserved + dos.writeInt(0) // reserved + + // Unity matrix + dos.writeInt(0x00010000); dos.writeInt(0); dos.writeInt(0) + dos.writeInt(0); dos.writeInt(0x00010000); dos.writeInt(0) + dos.writeInt(0); dos.writeInt(0); dos.writeInt(0x40000000) + + repeat(6) { dos.writeInt(0) } // pre-defined + dos.writeInt(2) // next track ID + + return wrapBox("mvhd", output.toByteArray()) + } + + private fun buildTrakBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val content = ByteArrayOutputStream() + content.write(buildTkhdBox(width, height)) + content.write(buildMdiaBox(width, height, sps, pps)) + return wrapBox("trak", content.toByteArray()) + } + + private fun buildTkhdBox(width: Int, height: Int): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0x00000007) // version 0, flags (enabled, in movie, in preview) + dos.writeInt(0) // creation time + dos.writeInt(0) // modification time + dos.writeInt(1) // track ID + dos.writeInt(0) // reserved + dos.writeInt(0) // duration + dos.writeInt(0) // reserved + dos.writeInt(0) // reserved + dos.writeShort(0) // layer + dos.writeShort(0) // alternate group + dos.writeShort(0) // volume (0 for video) + dos.writeShort(0) // reserved + + // Rotation matrix - use identity and rely on correct dimensions from encoder + // The encoder output format already has the correct dimensions for the content + writeRotationMatrix(dos) + + // Use dimensions as-is from encoder output format + dos.writeInt(width shl 16) // width (16.16 fixed point) + dos.writeInt(height shl 16) // height (16.16 fixed point) + + Log.d(TAG, "tkhd: ${width}x${height}, rotation=$orientationDegrees") + + return wrapBox("tkhd", output.toByteArray()) + } + + /** + * Writes the 3x3 transformation matrix for video rotation. + * Uses simple rotation values - the encoder already outputs correctly oriented frames. + */ + private fun writeRotationMatrix(dos: DataOutputStream) { + // Fixed-point constants + val one = 0x00010000 // 1.0 in 16.16 + val w = 0x40000000 // 1.0 in 2.30 + + // Identity matrix - no transformation + // Most HLS players handle rotation via the dimensions themselves + // or we can add rotation metadata separately if needed + dos.writeInt(one) // a = 1 + dos.writeInt(0) // b = 0 + dos.writeInt(0) // u = 0 + dos.writeInt(0) // c = 0 + dos.writeInt(one) // d = 1 + dos.writeInt(0) // v = 0 + dos.writeInt(0) // x = 0 + dos.writeInt(0) // y = 0 + dos.writeInt(w) // w = 1 + } + + private fun buildMdiaBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val content = ByteArrayOutputStream() + content.write(buildMdhdBox()) + content.write(buildHdlrBox()) + content.write(buildMinfBox(width, height, sps, pps)) + return wrapBox("mdia", content.toByteArray()) + } + + private fun buildMdhdBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(0) // creation time + dos.writeInt(0) // modification time + dos.writeInt(timescale) // timescale + dos.writeInt(0) // duration + dos.writeShort(0x55C4) // language: "und" + dos.writeShort(0) // pre-defined + + return wrapBox("mdhd", output.toByteArray()) + } + + private fun buildHdlrBox(): ByteArray { + val name = "VideoHandler" + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(0) // pre-defined + dos.writeBytes("vide") // handler type + dos.writeInt(0) // reserved + dos.writeInt(0) // reserved + dos.writeInt(0) // reserved + dos.writeBytes(name) + dos.writeByte(0) // null terminator + + return wrapBox("hdlr", output.toByteArray()) + } + + private fun buildMinfBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val content = ByteArrayOutputStream() + content.write(buildVmhdBox()) + content.write(buildDinfBox()) + content.write(buildStblBox(width, height, sps, pps)) + return wrapBox("minf", content.toByteArray()) + } + + private fun buildVmhdBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(1) // version 0, flags = 1 + dos.writeShort(0) // graphics mode + dos.writeShort(0) // opcolor[0] + dos.writeShort(0) // opcolor[1] + dos.writeShort(0) // opcolor[2] + + return wrapBox("vmhd", output.toByteArray()) + } + + private fun buildDinfBox(): ByteArray { + val dref = buildDrefBox() + return wrapBox("dinf", dref) + } + + private fun buildDrefBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(1) // entry count + + // url box (self-contained) + dos.writeInt(12) + dos.writeBytes("url ") + dos.writeInt(1) // flags: self-contained + + return wrapBox("dref", output.toByteArray()) + } + + private fun buildStblBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val content = ByteArrayOutputStream() + content.write(buildStsdBox(width, height, sps, pps)) + content.write(buildEmptySttsBox()) + content.write(buildEmptyStscBox()) + content.write(buildEmptyStszBox()) + content.write(buildEmptyStcoBox()) + return wrapBox("stbl", content.toByteArray()) + } + + private fun buildStsdBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(1) // entry count + output.write(buildAvc1Box(width, height, sps, pps)) + + return wrapBox("stsd", output.toByteArray()) + } + + private fun buildAvc1Box(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + repeat(6) { dos.writeByte(0) } // reserved + dos.writeShort(1) // data reference index + dos.writeShort(0) // pre-defined + dos.writeShort(0) // reserved + repeat(3) { dos.writeInt(0) } // pre-defined + dos.writeShort(width) // width + dos.writeShort(height) // height + dos.writeInt(0x00480000) // horiz resolution (72 dpi) + dos.writeInt(0x00480000) // vert resolution (72 dpi) + dos.writeInt(0) // reserved + dos.writeShort(1) // frame count + repeat(32) { dos.writeByte(0) } // compressor name + dos.writeShort(0x0018) // depth (24 bit) + dos.writeShort(-1) // pre-defined + + output.write(buildAvcCBox(sps, pps)) + + return wrapBox("avc1", output.toByteArray()) + } + + private fun buildAvcCBox(sps: ByteArray, pps: ByteArray): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + val profileIdc = if (sps.isNotEmpty()) sps[0].toInt() and 0xFF else 0x42 + val profileCompat = if (sps.size > 1) sps[1].toInt() and 0xFF else 0x00 + val levelIdc = if (sps.size > 2) sps[2].toInt() and 0xFF else 0x1F + + dos.writeByte(1) // configuration version + dos.writeByte(profileIdc) // AVC profile + dos.writeByte(profileCompat)// profile compatibility + dos.writeByte(levelIdc) // AVC level + dos.writeByte(0xFF) // 6 bits reserved + 2 bits NAL length - 1 + + dos.writeByte(0xE1) // 3 bits reserved + 5 bits SPS count + dos.writeShort(sps.size) // SPS length + dos.write(sps) // SPS data + + dos.writeByte(1) // PPS count + dos.writeShort(pps.size) // PPS length + dos.write(pps) // PPS data + + return wrapBox("avcC", output.toByteArray()) + } + + private fun buildEmptySttsBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + dos.writeInt(0) // version & flags + dos.writeInt(0) // entry count + return wrapBox("stts", output.toByteArray()) + } + + private fun buildEmptyStscBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + dos.writeInt(0) // version & flags + dos.writeInt(0) // entry count + return wrapBox("stsc", output.toByteArray()) + } + + private fun buildEmptyStszBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + dos.writeInt(0) // version & flags + dos.writeInt(0) // sample size (0 = variable) + dos.writeInt(0) // sample count + return wrapBox("stsz", output.toByteArray()) + } + + private fun buildEmptyStcoBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + dos.writeInt(0) // version & flags + dos.writeInt(0) // entry count + return wrapBox("stco", output.toByteArray()) + } + + private fun buildMvexBox(): ByteArray { + return wrapBox("mvex", buildTrexBox()) + } + + private fun buildTrexBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(1) // track ID + dos.writeInt(1) // default sample description index + dos.writeInt(0) // default sample duration + dos.writeInt(0) // default sample size + dos.writeInt(0) // default sample flags + + return wrapBox("trex", output.toByteArray()) + } + + // ==================== Media Segment Building ==================== + + /** + * Builds a media segment (moof + mdat). + */ + private fun buildMediaSegment( + samples: List, + sequenceNumber: Int, + baseDecodeTimeUs: Long + ): ByteArray { + val output = ByteArrayOutputStream() + + // Build mdat content first to know sizes + val mdatContent = ByteArrayOutputStream() + for (sample in samples) { + mdatContent.write(sample.data) + } + val mdatPayload = mdatContent.toByteArray() + + // Build moof + val moofBox = buildMoofBox(samples, sequenceNumber, baseDecodeTimeUs, mdatPayload.size) + output.write(moofBox) + + // Build mdat + output.write(wrapBox("mdat", mdatPayload)) + + return output.toByteArray() + } + + private fun buildMoofBox( + samples: List, + sequenceNumber: Int, + baseDecodeTimeUs: Long, + mdatPayloadSize: Int + ): ByteArray { + // Calculate sizes to determine data offset + val mfhdBox = buildMfhdBox(sequenceNumber) + val tfhdSize = 8 + 8 // box header + content (version/flags + track_id) + val tfdtSize = 8 + 12 // box header + version 1 content + val trunSize = 8 + 12 + (samples.size * 12) // header + fixed + per-sample (no composition offset) + val trafSize = 8 + tfhdSize + tfdtSize + trunSize + val moofSize = 8 + mfhdBox.size + trafSize + + val dataOffset = moofSize + 8 // moof size + mdat header + + val content = ByteArrayOutputStream() + content.write(mfhdBox) + content.write(buildTrafBox(samples, baseDecodeTimeUs, dataOffset)) + + return wrapBox("moof", content.toByteArray()) + } + + private fun buildMfhdBox(sequenceNumber: Int): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(sequenceNumber) + + return wrapBox("mfhd", output.toByteArray()) + } + + private fun buildTrafBox(samples: List, baseDecodeTimeUs: Long, dataOffset: Int): ByteArray { + val content = ByteArrayOutputStream() + content.write(buildTfhdBox()) + content.write(buildTfdtBox(baseDecodeTimeUs)) + content.write(buildTrunBox(samples, dataOffset)) + return wrapBox("traf", content.toByteArray()) + } + + private fun buildTfhdBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + // Flags: default-base-is-moof (0x020000) + dos.writeInt(0x00020000) + dos.writeInt(1) // track ID + + return wrapBox("tfhd", output.toByteArray()) + } + + private fun buildTfdtBox(baseDecodeTimeUs: Long): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + // Convert to timescale units + val baseMediaDecodeTime = (baseDecodeTimeUs * timescale) / 1_000_000 + + // Version 1 for 64-bit time + dos.writeInt(0x01000000) + dos.writeLong(baseMediaDecodeTime) + + return wrapBox("tfdt", output.toByteArray()) + } + + private fun buildTrunBox(samples: List, dataOffset: Int): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + // Flags: data-offset + sample-duration + sample-size + sample-flags + val flags = 0x000001 or 0x000100 or 0x000200 or 0x000400 + dos.writeInt(flags) + dos.writeInt(samples.size) + dos.writeInt(dataOffset) + + for (sample in samples) { + // Convert duration to timescale units + val durationInTimescale = ((sample.durationUs * timescale) / 1_000_000).toInt() + dos.writeInt(durationInTimescale) + dos.writeInt(sample.data.size) + dos.writeInt(buildSampleFlags(sample.isKeyFrame)) + } + + return wrapBox("trun", output.toByteArray()) + } + + private fun buildSampleFlags(isKeyFrame: Boolean): Int { + return if (isKeyFrame) { + // sample_depends_on=2 (no dependencies), not a difference sample + 0x02000000 + } else { + // sample_depends_on=1 (depends on others), is a difference sample + 0x01010000 + } + } + + // ==================== Utilities ==================== + + private fun wrapBox(type: String, content: ByteArray): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(8 + content.size) + dos.writeBytes(type) + dos.write(content) + + return output.toByteArray() + } +} diff --git a/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt b/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt index 4f3331e..fc2e2bb 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt @@ -4,7 +4,6 @@ import android.content.Context import android.util.Log import android.util.Size import android.view.Surface -import androidx.media3.common.util.UnstableApi import com.facebook.common.statfs.StatFsHelper import com.mrousavy.camera.extensions.getRecommendedBitRate import com.mrousavy.camera.types.Orientation @@ -16,7 +15,6 @@ import java.text.SimpleDateFormat import java.util.Locale import java.util.Date -@UnstableApi class RecordingSession( context: Context, val cameraId: String, @@ -30,7 +28,7 @@ class RecordingSession( private val callback: (video: Video) -> Unit, private val onError: (error: CameraError) -> Unit, private val allCallbacks: CameraSession.Callback, - // Use the new FragmentedMp4Muxer-based recorder for HLS-compatible output + // Use FragmentedRecordingManager for HLS-compatible fMP4 output private val useFragmentedMp4: Boolean = true ) { companion object { -- 2.49.1 From c43f4d3a80f48b3d195270137c81b7b4f5b33622 Mon Sep 17 00:00:00 2001 From: Loewy Date: Tue, 23 Dec 2025 21:56:17 -0500 Subject: [PATCH 3/4] add orientation and aspect ratio handling for landscape recording --- .../com/mrousavy/camera/core/CameraSession.kt | 10 ++-- .../camera/core/FragmentedRecordingManager.kt | 20 ++++--- .../java/com/mrousavy/camera/core/HlsMuxer.kt | 56 ++++++++++++------- 3 files changed, 53 insertions(+), 33 deletions(-) diff --git a/package/android/src/main/java/com/mrousavy/camera/core/CameraSession.kt b/package/android/src/main/java/com/mrousavy/camera/core/CameraSession.kt index 770812f..ed8ea69 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/CameraSession.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/CameraSession.kt @@ -429,15 +429,15 @@ class CameraSession(private val context: Context, private val cameraManager: Cam // Get actual device rotation from WindowManager since the React Native orientation hook // doesn't update when rotating between landscape-left and landscape-right on Android. // Map device rotation to the correct orientationHint for video recording: - // - Counter-clockwise (ROTATION_90) → 270° hint - // - Clockwise (ROTATION_270) → 90° hint + // - Counter-clockwise (ROTATION_90) → 90° hint + // - Clockwise (ROTATION_270) → 270° hint val windowManager = context.getSystemService(Context.WINDOW_SERVICE) as WindowManager val deviceRotation = windowManager.defaultDisplay.rotation val recordingOrientation = when (deviceRotation) { Surface.ROTATION_0 -> Orientation.PORTRAIT - Surface.ROTATION_90 -> Orientation.LANDSCAPE_RIGHT + Surface.ROTATION_90 -> Orientation.LANDSCAPE_LEFT Surface.ROTATION_180 -> Orientation.PORTRAIT_UPSIDE_DOWN - Surface.ROTATION_270 -> Orientation.LANDSCAPE_LEFT + Surface.ROTATION_270 -> Orientation.LANDSCAPE_RIGHT else -> Orientation.PORTRAIT } @@ -448,7 +448,7 @@ class CameraSession(private val context: Context, private val cameraManager: Cam enableAudio, fps, videoOutput.enableHdr, - orientation, + recordingOrientation, options, filePath, callback, diff --git a/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt b/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt index 8ad49d7..09ea131 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt @@ -39,17 +39,21 @@ class FragmentedRecordingManager( segmentDurationSeconds: Int = DEFAULT_SEGMENT_DURATION_SECONDS ): FragmentedRecordingManager { val mimeType = options.videoCodec.toMimeType() - val cameraOrientationDegrees = cameraOrientation.toDegrees() - val recordingOrientationDegrees = (options.orientation ?: Orientation.PORTRAIT).toDegrees() + // Use cameraOrientation from Android (computed from device rotation) + // instead of options.orientation from JS which may be stale + val recordingOrientationDegrees = cameraOrientation.toDegrees() - // Use size dimensions directly - the encoder output format will have the actual dimensions - // Don't swap based on orientation here; the camera pipeline handles that - val width = size.width - val height = size.height + // Swap dimensions based on orientation - same logic as ChunkedRecordingManager + // When camera is in landscape orientation, we need to swap width/height for the encoder + val (width, height) = if (cameraOrientation.isLandscape()) { + size.height to size.width + } else { + size.width to size.height + } Log.d(TAG, "Input size: ${size.width}x${size.height}, " + - "cameraOrientation: $cameraOrientation ($cameraOrientationDegrees°), " + - "recordingOrientation: $recordingOrientationDegrees°") + "encoder size: ${width}x${height}, " + + "orientation: $cameraOrientation ($recordingOrientationDegrees°)") val format = MediaFormat.createVideoFormat(mimeType, width, height) val codec = MediaCodec.createEncoderByType(mimeType) diff --git a/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt b/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt index 5c68668..9c0c3da 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt @@ -478,40 +478,56 @@ class HlsMuxer( dos.writeShort(0) // volume (0 for video) dos.writeShort(0) // reserved - // Rotation matrix - use identity and rely on correct dimensions from encoder - // The encoder output format already has the correct dimensions for the content + // Rotation matrix writeRotationMatrix(dos) - // Use dimensions as-is from encoder output format - dos.writeInt(width shl 16) // width (16.16 fixed point) - dos.writeInt(height shl 16) // height (16.16 fixed point) + // Display dimensions should be post-rotation dimensions + // For 90° or 270° rotation, swap width and height + val (displayWidth, displayHeight) = when (orientationDegrees) { + 90, 270 -> height to width + else -> width to height + } + dos.writeInt(displayWidth shl 16) // width (16.16 fixed point) + dos.writeInt(displayHeight shl 16) // height (16.16 fixed point) - Log.d(TAG, "tkhd: ${width}x${height}, rotation=$orientationDegrees") + Log.d(TAG, "tkhd: encoded=${width}x${height}, display=${displayWidth}x${displayHeight}, rotation=$orientationDegrees") return wrapBox("tkhd", output.toByteArray()) } /** * Writes the 3x3 transformation matrix for video rotation. - * Uses simple rotation values - the encoder already outputs correctly oriented frames. */ private fun writeRotationMatrix(dos: DataOutputStream) { - // Fixed-point constants - val one = 0x00010000 // 1.0 in 16.16 - val w = 0x40000000 // 1.0 in 2.30 + val one = 0x00010000 // 1.0 in 16.16 + val negOne = 0xFFFF0000.toInt() // -1.0 in 16.16 + val w = 0x40000000 // 1.0 in 2.30 - // Identity matrix - no transformation - // Most HLS players handle rotation via the dimensions themselves - // or we can add rotation metadata separately if needed - dos.writeInt(one) // a = 1 - dos.writeInt(0) // b = 0 + // For 270° device orientation (landscape-right), apply 90° CW rotation + // For 90° device orientation (landscape-left), apply 270° CW rotation + val a: Int + val b: Int + val c: Int + val d: Int + + when (orientationDegrees) { + 90 -> { a = 0; b = negOne; c = one; d = 0 } + 180 -> { a = negOne; b = 0; c = 0; d = negOne } + 270 -> { a = 0; b = one; c = negOne; d = 0 } + else -> { a = one; b = 0; c = 0; d = one } + } + + dos.writeInt(a) + dos.writeInt(b) dos.writeInt(0) // u = 0 - dos.writeInt(0) // c = 0 - dos.writeInt(one) // d = 1 + dos.writeInt(c) + dos.writeInt(d) dos.writeInt(0) // v = 0 - dos.writeInt(0) // x = 0 - dos.writeInt(0) // y = 0 - dos.writeInt(w) // w = 1 + dos.writeInt(0) // tx = 0 + dos.writeInt(0) // ty = 0 + dos.writeInt(w) // w = 1.0 + + Log.d(TAG, "Rotation matrix for $orientationDegrees°") } private fun buildMdiaBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { -- 2.49.1 From eceab60d7ca3d4418856714578bda61cb3d1d71b Mon Sep 17 00:00:00 2001 From: Loewy Date: Wed, 24 Dec 2025 00:52:50 -0500 Subject: [PATCH 4/4] wip: add vui timing injection, receive the requested fps from camera in rn layer, normalize timestamp --- .../camera/core/FragmentedRecordingManager.kt | 10 +- .../java/com/mrousavy/camera/core/HlsMuxer.kt | 363 +++++++++++++++++- 2 files changed, 351 insertions(+), 22 deletions(-) diff --git a/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt b/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt index 09ea131..545a934 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt @@ -20,7 +20,8 @@ import java.io.File */ class FragmentedRecordingManager( private val encoder: MediaCodec, - private val muxer: HlsMuxer + private val muxer: HlsMuxer, + private val configuredFps: Int ) : MediaCodec.Callback(), ChunkedRecorderInterface { companion object { @@ -88,9 +89,9 @@ class FragmentedRecordingManager( ) muxer.setSegmentDuration(segmentDurationSeconds * 1_000_000L) - Log.d(TAG, "Created HlsMuxer with orientation: $recordingOrientationDegrees degrees") + Log.d(TAG, "Created HlsMuxer with orientation: $recordingOrientationDegrees degrees, fps: $effectiveFps") - return FragmentedRecordingManager(codec, muxer) + return FragmentedRecordingManager(codec, muxer, effectiveFps) } } @@ -170,7 +171,8 @@ class FragmentedRecordingManager( synchronized(this) { Log.i(TAG, "Output format changed: $format") - trackIndex = muxer.addTrack(format) + // Pass configured fps to muxer (not the encoder's output format fps which may differ) + trackIndex = muxer.addTrack(format, configuredFps) muxer.start() muxerStarted = true } diff --git a/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt b/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt index 9c0c3da..28a29e5 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt @@ -42,6 +42,7 @@ class HlsMuxer( // Configuration private var targetSegmentDurationUs: Long = DEFAULT_SEGMENT_DURATION_US private var timescale: Int = 30000 // Default, updated from format + private var configuredFps: Int = 30 // Configured fps from user, used for VUI timing // State private var state = State.UNINITIALIZED @@ -54,6 +55,9 @@ class HlsMuxer( private var segmentStartTimeUs = -1L private var lastPresentationTimeUs = 0L + // Timestamp normalization - first timestamp becomes time 0 + private var firstPresentationTimeUs = -1L + private enum class State { UNINITIALIZED, INITIALIZED, @@ -69,6 +73,21 @@ class HlsMuxer( val isKeyFrame: Boolean ) + // ==================== Timestamp Normalization ==================== + + /** + * Normalizes a presentation timestamp to start from 0. + * The first timestamp received becomes time 0, and all subsequent + * timestamps are relative to that. + */ + private fun normalizeTimestamp(rawPresentationTimeUs: Long): Long { + if (firstPresentationTimeUs < 0) { + firstPresentationTimeUs = rawPresentationTimeUs + Log.d(TAG, "First timestamp: ${rawPresentationTimeUs}us, normalizing to 0") + } + return rawPresentationTimeUs - firstPresentationTimeUs + } + // ==================== Annex-B to AVCC Conversion ==================== /** @@ -194,19 +213,14 @@ class HlsMuxer( * Adds a track to the muxer. * * @param format The MediaFormat describing the track + * @param fps The configured frame rate (used for VUI timing, overrides format's fps) * @return Track index (always 0 for now, single video track) */ - fun addTrack(format: MediaFormat): Int { + fun addTrack(format: MediaFormat, fps: Int = 30): Int { check(state == State.UNINITIALIZED) { "addTrack() must be called before start()" } trackFormat = format - - // Extract timescale from frame rate - val fps = try { - format.getInteger(MediaFormat.KEY_FRAME_RATE) - } catch (e: Exception) { - 30 - } + configuredFps = fps timescale = fps * 1000 // Use fps * 1000 for good precision state = State.INITIALIZED @@ -215,7 +229,7 @@ class HlsMuxer( val formatHeight = try { format.getInteger(MediaFormat.KEY_HEIGHT) } catch (e: Exception) { -1 } Log.d(TAG, "Added track: ${format.getString(MediaFormat.KEY_MIME)}, " + "encoder output: ${formatWidth}x${formatHeight}, " + - "timescale=$timescale, orientation=$orientationDegrees°") + "configuredFps=$configuredFps, timescale=$timescale, orientation=$orientationDegrees°") return 0 // Single track, index 0 } @@ -259,7 +273,7 @@ class HlsMuxer( } val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0 - val presentationTimeUs = bufferInfo.presentationTimeUs + val presentationTimeUs = normalizeTimestamp(bufferInfo.presentationTimeUs) // Initialize segment start time if (segmentStartTimeUs < 0) { @@ -364,6 +378,303 @@ class HlsMuxer( } } + // ==================== SPS VUI Timing Injection ==================== + + /** + * Injects VUI timing parameters into an H.264 SPS NAL unit. + * This ensures proper frame rate detection by players/decoders. + * + * The SPS from MediaCodec lacks VUI timing info, causing tools like + * ffprobe to misinterpret the frame rate. + */ + private fun injectVuiTiming(sps: ByteArray, fps: Int): ByteArray { + try { + val reader = BitReader(sps) + val writer = BitWriter() + + // NAL header (1 byte: forbidden_zero_bit, nal_ref_idc, nal_unit_type) + writer.writeBits(reader.readBits(8), 8) + + // profile_idc (1 byte) + val profileIdc = reader.readBits(8) + writer.writeBits(profileIdc, 8) + + // constraint_set flags (1 byte) + writer.writeBits(reader.readBits(8), 8) + + // level_idc (1 byte) + writer.writeBits(reader.readBits(8), 8) + + // seq_parameter_set_id (ue(v)) + copyExpGolomb(reader, writer) + + // Profile-specific fields for High profile (100) and others + if (profileIdc == 100 || profileIdc == 110 || profileIdc == 122 || + profileIdc == 244 || profileIdc == 44 || profileIdc == 83 || + profileIdc == 86 || profileIdc == 118 || profileIdc == 128 || + profileIdc == 138 || profileIdc == 139 || profileIdc == 134 || + profileIdc == 135) { + + // chroma_format_idc (ue(v)) + val chromaFormatIdc = copyExpGolombAndReturn(reader, writer) + + if (chromaFormatIdc == 3) { + // separate_colour_plane_flag (1 bit) + writer.writeBits(reader.readBits(1), 1) + } + + // bit_depth_luma_minus8 (ue(v)) + copyExpGolomb(reader, writer) + + // bit_depth_chroma_minus8 (ue(v)) + copyExpGolomb(reader, writer) + + // qpprime_y_zero_transform_bypass_flag (1 bit) + writer.writeBits(reader.readBits(1), 1) + + // seq_scaling_matrix_present_flag (1 bit) + val scalingMatrixFlag = reader.readBits(1) + writer.writeBits(scalingMatrixFlag, 1) + + if (scalingMatrixFlag == 1) { + // Skip scaling lists - this is complex, just copy remaining and give up + Log.w(TAG, "SPS has scaling matrix, skipping VUI injection") + return sps + } + } + + // log2_max_frame_num_minus4 (ue(v)) + copyExpGolomb(reader, writer) + + // pic_order_cnt_type (ue(v)) + val picOrderCntType = copyExpGolombAndReturn(reader, writer) + + if (picOrderCntType == 0) { + // log2_max_pic_order_cnt_lsb_minus4 (ue(v)) + copyExpGolomb(reader, writer) + } else if (picOrderCntType == 1) { + // delta_pic_order_always_zero_flag (1 bit) + writer.writeBits(reader.readBits(1), 1) + // offset_for_non_ref_pic (se(v)) + copySignedExpGolomb(reader, writer) + // offset_for_top_to_bottom_field (se(v)) + copySignedExpGolomb(reader, writer) + // num_ref_frames_in_pic_order_cnt_cycle (ue(v)) + val numRefFrames = copyExpGolombAndReturn(reader, writer) + for (i in 0 until numRefFrames) { + // offset_for_ref_frame[i] (se(v)) + copySignedExpGolomb(reader, writer) + } + } + + // max_num_ref_frames (ue(v)) + copyExpGolomb(reader, writer) + + // gaps_in_frame_num_value_allowed_flag (1 bit) + writer.writeBits(reader.readBits(1), 1) + + // pic_width_in_mbs_minus1 (ue(v)) + copyExpGolomb(reader, writer) + + // pic_height_in_map_units_minus1 (ue(v)) + copyExpGolomb(reader, writer) + + // frame_mbs_only_flag (1 bit) + val frameMbsOnlyFlag = reader.readBits(1) + writer.writeBits(frameMbsOnlyFlag, 1) + + if (frameMbsOnlyFlag == 0) { + // mb_adaptive_frame_field_flag (1 bit) + writer.writeBits(reader.readBits(1), 1) + } + + // direct_8x8_inference_flag (1 bit) + writer.writeBits(reader.readBits(1), 1) + + // frame_cropping_flag (1 bit) + val frameCroppingFlag = reader.readBits(1) + writer.writeBits(frameCroppingFlag, 1) + + if (frameCroppingFlag == 1) { + // frame_crop_left_offset, right, top, bottom (ue(v) each) + copyExpGolomb(reader, writer) + copyExpGolomb(reader, writer) + copyExpGolomb(reader, writer) + copyExpGolomb(reader, writer) + } + + // vui_parameters_present_flag - we'll set this to 1 and add our VUI + val originalVuiFlag = reader.readBits(1) + writer.writeBits(1, 1) // Set VUI present + + // Write VUI parameters with timing info + writeVuiWithTiming(writer, fps, originalVuiFlag == 1, reader) + + // Add RBSP trailing bits + writer.writeRbspTrailingBits() + + val result = writer.toByteArray() + Log.d(TAG, "Injected VUI timing for ${fps}fps, SPS grew from ${sps.size} to ${result.size} bytes") + return result + + } catch (e: Exception) { + Log.e(TAG, "Failed to inject VUI timing: ${e.message}, using original SPS") + return sps + } + } + + /** + * Writes VUI parameters with timing info. + */ + private fun writeVuiWithTiming(writer: BitWriter, fps: Int, hadVui: Boolean, reader: BitReader) { + // aspect_ratio_info_present_flag + writer.writeBits(0, 1) + + // overscan_info_present_flag + writer.writeBits(0, 1) + + // video_signal_type_present_flag + writer.writeBits(0, 1) + + // chroma_loc_info_present_flag + writer.writeBits(0, 1) + + // timing_info_present_flag = 1 + writer.writeBits(1, 1) + + // num_units_in_tick (32 bits) = 1 + writer.writeBits(1, 32) + + // time_scale (32 bits) = fps * 2 (because each frame = 2 field counts) + writer.writeBits(fps * 2, 32) + + // fixed_frame_rate_flag = 1 + writer.writeBits(1, 1) + + // nal_hrd_parameters_present_flag + writer.writeBits(0, 1) + + // vcl_hrd_parameters_present_flag + writer.writeBits(0, 1) + + // pic_struct_present_flag + writer.writeBits(0, 1) + + // bitstream_restriction_flag + writer.writeBits(0, 1) + } + + // ==================== Bit Manipulation Helpers ==================== + + /** + * Bit-level reader for parsing H.264 NAL units. + */ + private class BitReader(private val data: ByteArray) { + private var bytePos = 0 + private var bitPos = 0 + + fun readBits(count: Int): Int { + var result = 0 + for (i in 0 until count) { + if (bytePos >= data.size) throw IllegalStateException("End of data") + val bit = (data[bytePos].toInt() shr (7 - bitPos)) and 1 + result = (result shl 1) or bit + bitPos++ + if (bitPos == 8) { + bitPos = 0 + bytePos++ + } + } + return result + } + + fun readExpGolomb(): Int { + var leadingZeros = 0 + while (readBits(1) == 0) { + leadingZeros++ + if (leadingZeros > 31) throw IllegalStateException("Invalid exp-golomb") + } + if (leadingZeros == 0) return 0 + val suffix = readBits(leadingZeros) + return (1 shl leadingZeros) - 1 + suffix + } + + fun readSignedExpGolomb(): Int { + val code = readExpGolomb() + return if (code % 2 == 0) -(code / 2) else (code + 1) / 2 + } + } + + /** + * Bit-level writer for constructing H.264 NAL units. + */ + private class BitWriter { + private val bytes = mutableListOf() + private var currentByte = 0 + private var bitPos = 0 + + fun writeBits(value: Int, count: Int) { + for (i in count - 1 downTo 0) { + val bit = (value shr i) and 1 + currentByte = (currentByte shl 1) or bit + bitPos++ + if (bitPos == 8) { + bytes.add(currentByte.toByte()) + currentByte = 0 + bitPos = 0 + } + } + } + + fun writeExpGolomb(value: Int) { + val code = value + 1 + val bits = 32 - Integer.numberOfLeadingZeros(code) + // Write leading zeros + for (i in 0 until bits - 1) { + writeBits(0, 1) + } + // Write the code + writeBits(code, bits) + } + + fun writeSignedExpGolomb(value: Int) { + val code = if (value <= 0) -2 * value else 2 * value - 1 + writeExpGolomb(code) + } + + fun writeRbspTrailingBits() { + writeBits(1, 1) // rbsp_stop_one_bit + while (bitPos != 0) { + writeBits(0, 1) // rbsp_alignment_zero_bit + } + } + + fun toByteArray(): ByteArray { + // Flush remaining bits + if (bitPos > 0) { + currentByte = currentByte shl (8 - bitPos) + bytes.add(currentByte.toByte()) + } + return bytes.toByteArray() + } + } + + private fun copyExpGolomb(reader: BitReader, writer: BitWriter) { + val value = reader.readExpGolomb() + writer.writeExpGolomb(value) + } + + private fun copyExpGolombAndReturn(reader: BitReader, writer: BitWriter): Int { + val value = reader.readExpGolomb() + writer.writeExpGolomb(value) + return value + } + + private fun copySignedExpGolomb(reader: BitReader, writer: BitWriter) { + val value = reader.readSignedExpGolomb() + writer.writeSignedExpGolomb(value) + } + // ==================== Init Segment Building ==================== /** @@ -373,11 +684,19 @@ class HlsMuxer( val width = format.getInteger(MediaFormat.KEY_WIDTH) val height = format.getInteger(MediaFormat.KEY_HEIGHT) - val sps = format.getByteBuffer("csd-0")?.let { extractNalUnit(it) } + val rawSps = format.getByteBuffer("csd-0")?.let { extractNalUnit(it) } ?: throw IllegalArgumentException("Missing SPS (csd-0)") val pps = format.getByteBuffer("csd-1")?.let { extractNalUnit(it) } ?: throw IllegalArgumentException("Missing PPS (csd-1)") + Log.d(TAG, "Original SPS size: ${rawSps.size} bytes, PPS size: ${pps.size} bytes") + Log.d(TAG, "Original SPS hex: ${rawSps.joinToString("") { "%02x".format(it) }}") + + // Inject VUI timing info into SPS using configured fps (not encoder output format fps) + val sps = injectVuiTiming(rawSps, configuredFps) + Log.d(TAG, "Modified SPS size: ${sps.size} bytes") + Log.d(TAG, "Modified SPS hex: ${sps.joinToString("") { "%02x".format(it) }}") + val output = ByteArrayOutputStream() // ftyp @@ -660,9 +979,10 @@ class HlsMuxer( val output = ByteArrayOutputStream() val dos = DataOutputStream(output) - val profileIdc = if (sps.isNotEmpty()) sps[0].toInt() and 0xFF else 0x42 - val profileCompat = if (sps.size > 1) sps[1].toInt() and 0xFF else 0x00 - val levelIdc = if (sps.size > 2) sps[2].toInt() and 0xFF else 0x1F + // SPS layout: [0]=NAL header (0x67), [1]=profile_idc, [2]=constraint_flags, [3]=level_idc + val profileIdc = if (sps.size > 1) sps[1].toInt() and 0xFF else 0x42 + val profileCompat = if (sps.size > 2) sps[2].toInt() and 0xFF else 0x00 + val levelIdc = if (sps.size > 3) sps[3].toInt() and 0xFF else 0x1F dos.writeByte(1) // configuration version dos.writeByte(profileIdc) // AVC profile @@ -722,10 +1042,14 @@ class HlsMuxer( val output = ByteArrayOutputStream() val dos = DataOutputStream(output) + // Default sample duration: timescale / fps + // Since timescale = fps * 1000, duration = 1000 for any fps + val defaultSampleDuration = 1000 + dos.writeInt(0) // version & flags dos.writeInt(1) // track ID dos.writeInt(1) // default sample description index - dos.writeInt(0) // default sample duration + dos.writeInt(defaultSampleDuration) // default sample duration dos.writeInt(0) // default sample size dos.writeInt(0) // default sample flags @@ -837,10 +1161,13 @@ class HlsMuxer( dos.writeInt(samples.size) dos.writeInt(dataOffset) + // Use constant duration based on configured fps for consistent frame rate + // This ensures ffprobe reports correct fps instead of calculating from variable timing + val constantDuration = timescale / configuredFps // e.g., 30000/30 = 1000 ticks + Log.d(TAG, "Writing ${samples.size} samples with constant duration=${constantDuration} ticks (${configuredFps}fps)") + for (sample in samples) { - // Convert duration to timescale units - val durationInTimescale = ((sample.durationUs * timescale) / 1_000_000).toInt() - dos.writeInt(durationInTimescale) + dos.writeInt(constantDuration) dos.writeInt(sample.data.size) dos.writeInt(buildSampleFlags(sample.isKeyFrame)) } -- 2.49.1