Write our own muxer to make hls uupload actually work

2025-12-21 16:45:04 -08:00
parent a2d218580c
commit e60c1a4eb1
4 changed files with 916 additions and 223 deletions
--- a/package/android/build.gradle
+++ b/package/android/build.gradle
@@ -178,10 +178,6 @@ dependencies {
  implementation "com.facebook.react:react-android:+"
  implementation "org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3"

-  // Media3 muxer for fragmented MP4 (HLS-compatible) recording
-  implementation "androidx.media3:media3-muxer:1.5.0"
-  implementation "androidx.media3:media3-common:1.5.0"
-
  if (enableCodeScanner) {
    // User enabled code-scanner, so we bundle the 2.4 MB model in the app.
    implementation 'com.google.mlkit:barcode-scanning:17.2.0'
--- a/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt
@@ -7,37 +7,25 @@ import android.media.MediaFormat
 import android.util.Log
 import android.util.Size
 import android.view.Surface
-import androidx.media3.common.Format
-import androidx.media3.common.MimeTypes
-import androidx.media3.common.util.UnstableApi
-import androidx.media3.muxer.FragmentedMp4Muxer
-import androidx.media3.muxer.Muxer
 import com.mrousavy.camera.types.Orientation
 import com.mrousavy.camera.types.RecordVideoOptions
 import java.io.File
-import java.io.FileOutputStream
-import java.nio.ByteBuffer

 /**
 * A recording manager that produces HLS-compatible fragmented MP4 segments.
 *
- * This produces output similar to the iOS implementation:
- * - An initialization segment (init.mp4) containing codec configuration
- * - Numbered data segments (0.mp4, 1.mp4, ...) containing media data
- *
- * Uses AndroidX Media3's FragmentedMp4Muxer which produces proper fMP4 output.
+ * Uses HlsMuxer (following Android's MediaMuxer pattern) to produce:
+ * - init.mp4: Initialization segment (ftyp + moov with mvex)
+ * - 0.mp4, 1.mp4, ...: Media segments (moof + mdat)
 */
-@UnstableApi
 class FragmentedRecordingManager(
    private val encoder: MediaCodec,
-    private val outputDirectory: File,
-    private val orientationDegrees: Int,
-    private val targetSegmentDurationUs: Long,
-    private val callbacks: CameraSession.Callback
+    private val muxer: HlsMuxer
 ) : MediaCodec.Callback(), ChunkedRecorderInterface {

    companion object {
        private const val TAG = "FragmentedRecorder"
+        private const val DEFAULT_SEGMENT_DURATION_SECONDS = 6

        fun fromParams(
            callbacks: CameraSession.Callback,
@@ -48,17 +36,20 @@ class FragmentedRecordingManager(
            bitRate: Int,
            options: RecordVideoOptions,
            outputDirectory: File,
-            segmentDurationSeconds: Int = 6
+            segmentDurationSeconds: Int = DEFAULT_SEGMENT_DURATION_SECONDS
        ): FragmentedRecordingManager {
            val mimeType = options.videoCodec.toMimeType()
            val cameraOrientationDegrees = cameraOrientation.toDegrees()
            val recordingOrientationDegrees = (options.orientation ?: Orientation.PORTRAIT).toDegrees()

-            val (width, height) = if (cameraOrientation.isLandscape()) {
-                size.height to size.width
-            } else {
-                size.width to size.height
-            }
+            // Use size dimensions directly - the encoder output format will have the actual dimensions
+            // Don't swap based on orientation here; the camera pipeline handles that
+            val width = size.width
+            val height = size.height
+
+            Log.d(TAG, "Input size: ${size.width}x${size.height}, " +
+                    "cameraOrientation: $cameraOrientation ($cameraOrientationDegrees°), " +
+                    "recordingOrientation: $recordingOrientationDegrees°")

            val format = MediaFormat.createVideoFormat(mimeType, width, height)
            val codec = MediaCodec.createEncoderByType(mimeType)
@@ -67,121 +58,48 @@ class FragmentedRecordingManager(
                MediaFormat.KEY_COLOR_FORMAT,
                MediaCodecInfo.CodecCapabilities.COLOR_FormatSurface
            )
-            fps?.apply {
-                format.setInteger(MediaFormat.KEY_FRAME_RATE, this)
-            }
-            // I-frame interval affects segment boundaries
+
+            val effectiveFps = fps ?: 30
+            format.setInteger(MediaFormat.KEY_FRAME_RATE, effectiveFps)
            format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, segmentDurationSeconds)
            format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate)

-            Log.d(TAG, "Video Format: $format, camera orientation $cameraOrientationDegrees, recordingOrientation: $recordingOrientationDegrees")
+            Log.d(TAG, "Video Format: $format, orientation: $recordingOrientationDegrees")

            codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE)

-            return FragmentedRecordingManager(
-                codec,
-                outputDirectory,
-                recordingOrientationDegrees,
-                segmentDurationSeconds * 1_000_000L,
-                callbacks
+            // Create muxer with callbacks and orientation
+            val muxer = HlsMuxer(
+                outputDirectory = outputDirectory,
+                callback = object : HlsMuxer.Callback {
+                    override fun onInitSegmentReady(file: File) {
+                        callbacks.onInitSegmentReady(file)
+                    }
+
+                    override fun onMediaSegmentReady(file: File, index: Int, durationUs: Long) {
+                        callbacks.onVideoChunkReady(file, index, durationUs)
+                    }
+                },
+                orientationDegrees = recordingOrientationDegrees
            )
+            muxer.setSegmentDuration(segmentDurationSeconds * 1_000_000L)
+
+            Log.d(TAG, "Created HlsMuxer with orientation: $recordingOrientationDegrees degrees")
+
+            return FragmentedRecordingManager(codec, muxer)
        }
    }

-    // State management
-    private var chunkIndex = 0
-    private var encodedFormat: MediaFormat? = null
    private var recording = false
-
-    // Segment tracking
-    private var segmentContext: SegmentContext? = null
-    private var initSegmentEmitted = false
+    private var muxerStarted = false
+    private var trackIndex = -1

    override val surface: Surface = encoder.createInputSurface()

    init {
-        if (!outputDirectory.exists()) {
-            outputDirectory.mkdirs()
-        }
        encoder.setCallback(this)
    }

-    /**
-     * Context for a single data segment being written.
-     * Init segments are created separately via createInitSegment().
-     */
-    private inner class SegmentContext(
-        private val format: MediaFormat,
-        private val segmentIndex: Int
-    ) {
-        private val filename = "$segmentIndex.mp4"
-        private val file = File(outputDirectory, filename)
-        private val outputStream = FileOutputStream(file)
-        private val muxer = FragmentedMp4Muxer.Builder(outputStream).build()
-        private lateinit var videoTrack: Muxer.TrackToken
-        private var startTimeUs: Long = -1L
-        private var lastTimeUs: Long = 0L
-        private var sampleCount = 0
-
-        init {
-            val media3Format = convertToMedia3Format(format)
-            videoTrack = muxer.addTrack(media3Format)
-            Log.d(TAG, "Created segment context: $filename")
-        }
-
-        fun writeSample(buffer: ByteBuffer, bufferInfo: BufferInfo): Boolean {
-            if (startTimeUs < 0) {
-                startTimeUs = bufferInfo.presentationTimeUs
-            }
-            lastTimeUs = bufferInfo.presentationTimeUs
-
-            val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
-
-            muxer.writeSampleData(videoTrack, buffer, bufferInfo)
-            sampleCount++
-
-            // Check if we should start a new segment at the next keyframe
-            if (isKeyFrame && sampleCount > 1) {
-                val segmentDurationUs = bufferInfo.presentationTimeUs - startTimeUs
-                if (segmentDurationUs >= targetSegmentDurationUs) {
-                    return true // Signal to create new segment
-                }
-            }
-
-            return false
-        }
-
-        fun finish(): Long {
-            try {
-                muxer.close()
-                outputStream.close()
-            } catch (e: Exception) {
-                Log.e(TAG, "Error closing segment", e)
-            }
-
-            val durationUs = if (lastTimeUs > startTimeUs) lastTimeUs - startTimeUs else 0L
-            callbacks.onVideoChunkReady(file, segmentIndex, durationUs)
-
-            Log.d(TAG, "Finished segment: $filename, samples=$sampleCount, duration=${durationUs/1000}ms")
-            return durationUs
-        }
-    }
-
-    private fun createNewSegment() {
-        val format = encodedFormat
-        if (format == null) {
-            Log.e(TAG, "Cannot create segment: encodedFormat is null")
-            return
-        }
-
-        // Close previous segment
-        segmentContext?.finish()
-
-        // Create new data segment (init segments are created separately)
-        segmentContext = SegmentContext(format, chunkIndex)
-        chunkIndex++
-    }
-
    override fun start() {
        encoder.start()
        recording = true
@@ -190,8 +108,12 @@ class FragmentedRecordingManager(
    override fun finish() {
        synchronized(this) {
            recording = false
-            segmentContext?.finish()
-            segmentContext = null
+
+            if (muxerStarted) {
+                muxer.stop()
+                muxer.release()
+            }
+
            try {
                encoder.stop()
                encoder.release()
@@ -202,6 +124,7 @@ class FragmentedRecordingManager(
    }

    // MediaCodec.Callback methods
+
    override fun onInputBufferAvailable(codec: MediaCodec, index: Int) {
        // Not used for Surface input
    }
@@ -213,37 +136,20 @@ class FragmentedRecordingManager(
                return
            }

-            val encodedData = encoder.getOutputBuffer(index)
-            if (encodedData == null) {
+            if (!muxerStarted) {
+                encoder.releaseOutputBuffer(index, false)
+                return
+            }
+
+            val buffer = encoder.getOutputBuffer(index)
+            if (buffer == null) {
                Log.e(TAG, "getOutputBuffer returned null")
                encoder.releaseOutputBuffer(index, false)
                return
            }

-            // Wait until init segment is emitted (happens in onOutputFormatChanged)
-            if (!initSegmentEmitted) {
-                encoder.releaseOutputBuffer(index, false)
-                return
-            }
-
-            // Create first data segment if needed
-            if (segmentContext == null) {
-                createNewSegment()
-            }
-
-            val context = segmentContext
-            if (context == null) {
-                encoder.releaseOutputBuffer(index, false)
-                return
-            }
-
            try {
-                val shouldStartNewSegment = context.writeSample(encodedData, bufferInfo)
-                if (shouldStartNewSegment) {
-                    createNewSegment()
-                    // Write this keyframe to the new segment as well
-                    segmentContext?.writeSample(encodedData, bufferInfo)
-                }
+                muxer.writeSampleData(trackIndex, buffer, bufferInfo)
            } catch (e: Exception) {
                Log.e(TAG, "Error writing sample", e)
            }
@@ -257,76 +163,12 @@ class FragmentedRecordingManager(
    }

    override fun onOutputFormatChanged(codec: MediaCodec, format: MediaFormat) {
-        Log.i(TAG, "Output format changed: $format")
-        encodedFormat = format
+        synchronized(this) {
+            Log.i(TAG, "Output format changed: $format")

-        // Create the init segment immediately when we get the format
-        // This produces an fMP4 file with just ftyp + moov (no samples)
-        if (!initSegmentEmitted) {
-            createInitSegment(format)
-            initSegmentEmitted = true
+            trackIndex = muxer.addTrack(format)
+            muxer.start()
+            muxerStarted = true
        }
    }
-
-    /**
-     * Creates an initialization segment containing only codec configuration (ftyp + moov).
-     * This is done by creating a muxer, adding the track, and immediately closing it
-     * without writing any samples.
-     */
-    private fun createInitSegment(format: MediaFormat) {
-        val initFile = File(outputDirectory, "init.mp4")
-        try {
-            val outputStream = FileOutputStream(initFile)
-            val muxer = FragmentedMp4Muxer.Builder(outputStream).build()
-
-            // Convert and add the track
-            val media3Format = convertToMedia3Format(format)
-            muxer.addTrack(media3Format)
-
-            // Close immediately - this writes just the header (ftyp + moov)
-            muxer.close()
-            outputStream.close()
-
-            Log.d(TAG, "Created init segment: ${initFile.absolutePath}")
-            callbacks.onInitSegmentReady(initFile)
-        } catch (e: Exception) {
-            Log.e(TAG, "Error creating init segment", e)
-        }
-    }
-
-    private fun convertToMedia3Format(mediaFormat: MediaFormat): Format {
-        val mimeType = mediaFormat.getString(MediaFormat.KEY_MIME) ?: MimeTypes.VIDEO_H264
-        val width = mediaFormat.getInteger(MediaFormat.KEY_WIDTH)
-        val height = mediaFormat.getInteger(MediaFormat.KEY_HEIGHT)
-        val bitRate = try { mediaFormat.getInteger(MediaFormat.KEY_BIT_RATE) } catch (e: Exception) { -1 }
-        val frameRate = try { mediaFormat.getInteger(MediaFormat.KEY_FRAME_RATE) } catch (e: Exception) { -1 }
-
-        // Get CSD (Codec Specific Data) if available - required for init segment
-        val csd0 = mediaFormat.getByteBuffer("csd-0")
-        val csd1 = mediaFormat.getByteBuffer("csd-1")
-
-        val initData = mutableListOf<ByteArray>()
-        csd0?.let {
-            val bytes = ByteArray(it.remaining())
-            it.duplicate().get(bytes)
-            initData.add(bytes)
-        }
-        csd1?.let {
-            val bytes = ByteArray(it.remaining())
-            it.duplicate().get(bytes)
-            initData.add(bytes)
-        }
-
-        return Format.Builder()
-            .setSampleMimeType(mimeType)
-            .setWidth(width)
-            .setHeight(height)
-            .setRotationDegrees(orientationDegrees)
-            .apply {
-                if (bitRate > 0) setAverageBitrate(bitRate)
-                if (frameRate > 0) setFrameRate(frameRate.toFloat())
-                if (initData.isNotEmpty()) setInitializationData(initData)
-            }
-            .build()
-    }
 }
--- a/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt
@@ -0,0 +1,857 @@
+package com.mrousavy.camera.core
+
+import android.media.MediaCodec
+import android.media.MediaFormat
+import android.util.Log
+import java.io.ByteArrayOutputStream
+import java.io.DataOutputStream
+import java.io.File
+import java.io.FileOutputStream
+import java.nio.ByteBuffer
+
+/**
+ * A muxer for creating HLS-compatible fragmented MP4 output.
+ *
+ * Follows the same pattern as Android's MediaMuxer:
+ * 1. Create muxer with output directory
+ * 2. addTrack() with MediaFormat
+ * 3. start() - writes init.mp4
+ * 4. writeSampleData() for each encoded sample
+ * 5. stop() - finalizes last segment
+ * 6. release() - cleanup
+ *
+ * Produces:
+ * - init.mp4: Initialization segment (ftyp + moov with mvex)
+ * - 0.mp4, 1.mp4, ...: Media segments (moof + mdat)
+ */
+class HlsMuxer(
+    private val outputDirectory: File,
+    private val callback: Callback,
+    private val orientationDegrees: Int = 0
+) {
+    companion object {
+        private const val TAG = "HlsMuxer"
+        private const val DEFAULT_SEGMENT_DURATION_US = 6_000_000L  // 6 seconds
+    }
+
+    interface Callback {
+        fun onInitSegmentReady(file: File)
+        fun onMediaSegmentReady(file: File, index: Int, durationUs: Long)
+    }
+
+    // Configuration
+    private var targetSegmentDurationUs: Long = DEFAULT_SEGMENT_DURATION_US
+    private var timescale: Int = 30000  // Default, updated from format
+
+    // State
+    private var state = State.UNINITIALIZED
+    private var trackFormat: MediaFormat? = null
+    private var sequenceNumber = 1
+    private var segmentIndex = 0
+
+    // Current segment data
+    private val pendingSamples = mutableListOf<Sample>()
+    private var segmentStartTimeUs = -1L
+    private var lastPresentationTimeUs = 0L
+
+    private enum class State {
+        UNINITIALIZED,
+        INITIALIZED,
+        STARTED,
+        STOPPED,
+        RELEASED
+    }
+
+    private data class Sample(
+        val data: ByteArray,
+        val presentationTimeUs: Long,
+        var durationUs: Long,
+        val isKeyFrame: Boolean
+    )
+
+    // ==================== Annex-B to AVCC Conversion ====================
+
+    /**
+     * Converts H.264 data from Annex-B format to AVCC format.
+     *
+     * Annex-B uses start codes (00 00 00 01 or 00 00 01) to delimit NAL units.
+     * AVCC uses 4-byte big-endian length prefixes before each NAL unit.
+     *
+     * This conversion is required because:
+     * - MediaCodec outputs Annex-B format
+     * - fMP4/HLS requires AVCC format (as specified in avcC box with NAL length size = 4)
+     */
+    private fun convertAnnexBToAvcc(annexBData: ByteArray): ByteArray {
+        val nalUnits = parseAnnexBNalUnits(annexBData)
+        if (nalUnits.isEmpty()) {
+            Log.w(TAG, "No NAL units found in sample, returning original data")
+            return annexBData
+        }
+
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        for (nalUnit in nalUnits) {
+            // Write 4-byte big-endian length prefix
+            dos.writeInt(nalUnit.size)
+            // Write NAL unit data (without start code)
+            dos.write(nalUnit)
+        }
+
+        return output.toByteArray()
+    }
+
+    /**
+     * Parses Annex-B formatted data into individual NAL units.
+     * Returns list of NAL unit byte arrays (without start codes).
+     */
+    private fun parseAnnexBNalUnits(data: ByteArray): List<ByteArray> {
+        val nalUnits = mutableListOf<ByteArray>()
+        var i = 0
+
+        while (i < data.size) {
+            // Find start code
+            val startCodeLength = findStartCode(data, i)
+            if (startCodeLength == 0) {
+                // No start code found at current position
+                // This might happen if data doesn't start with a start code
+                if (nalUnits.isEmpty() && i == 0) {
+                    // Data might already be in AVCC format or malformed
+                    // Try to detect AVCC format (first 4 bytes would be a reasonable length)
+                    if (data.size >= 4) {
+                        val possibleLength = ((data[0].toInt() and 0xFF) shl 24) or
+                                ((data[1].toInt() and 0xFF) shl 16) or
+                                ((data[2].toInt() and 0xFF) shl 8) or
+                                (data[3].toInt() and 0xFF)
+                        if (possibleLength > 0 && possibleLength <= data.size - 4) {
+                            // Looks like AVCC format already, return original
+                            Log.d(TAG, "Data appears to already be in AVCC format")
+                            return emptyList()
+                        }
+                    }
+                }
+                i++
+                continue
+            }
+
+            val nalStart = i + startCodeLength
+
+            // Find end of this NAL unit (start of next, or end of data)
+            var nalEnd = data.size
+            var j = nalStart
+            while (j < data.size - 2) {
+                val nextStartCode = findStartCode(data, j)
+                if (nextStartCode > 0) {
+                    nalEnd = j
+                    break
+                }
+                j++
+            }
+
+            if (nalEnd > nalStart) {
+                nalUnits.add(data.copyOfRange(nalStart, nalEnd))
+            }
+
+            i = nalEnd
+        }
+
+        return nalUnits
+    }
+
+    /**
+     * Checks for Annex-B start code at given position.
+     * Returns start code length (3 or 4) or 0 if no start code found.
+     */
+    private fun findStartCode(data: ByteArray, offset: Int): Int {
+        if (offset + 4 <= data.size &&
+            data[offset] == 0.toByte() &&
+            data[offset + 1] == 0.toByte() &&
+            data[offset + 2] == 0.toByte() &&
+            data[offset + 3] == 1.toByte()) {
+            return 4  // 4-byte start code: 00 00 00 01
+        }
+        if (offset + 3 <= data.size &&
+            data[offset] == 0.toByte() &&
+            data[offset + 1] == 0.toByte() &&
+            data[offset + 2] == 1.toByte()) {
+            return 3  // 3-byte start code: 00 00 01
+        }
+        return 0
+    }
+
+    /**
+     * Sets the target segment duration.
+     * Must be called before start().
+     */
+    fun setSegmentDuration(durationUs: Long) {
+        check(state == State.UNINITIALIZED || state == State.INITIALIZED) {
+            "Cannot set segment duration after start()"
+        }
+        targetSegmentDurationUs = durationUs
+    }
+
+    /**
+     * Adds a track to the muxer.
+     *
+     * @param format The MediaFormat describing the track
+     * @return Track index (always 0 for now, single video track)
+     */
+    fun addTrack(format: MediaFormat): Int {
+        check(state == State.UNINITIALIZED) { "addTrack() must be called before start()" }
+
+        trackFormat = format
+
+        // Extract timescale from frame rate
+        val fps = try {
+            format.getInteger(MediaFormat.KEY_FRAME_RATE)
+        } catch (e: Exception) {
+            30
+        }
+        timescale = fps * 1000  // Use fps * 1000 for good precision
+
+        state = State.INITIALIZED
+
+        val formatWidth = try { format.getInteger(MediaFormat.KEY_WIDTH) } catch (e: Exception) { -1 }
+        val formatHeight = try { format.getInteger(MediaFormat.KEY_HEIGHT) } catch (e: Exception) { -1 }
+        Log.d(TAG, "Added track: ${format.getString(MediaFormat.KEY_MIME)}, " +
+                "encoder output: ${formatWidth}x${formatHeight}, " +
+                "timescale=$timescale, orientation=$orientationDegrees°")
+
+        return 0  // Single track, index 0
+    }
+
+    /**
+     * Starts the muxer, writing the initialization segment.
+     */
+    fun start() {
+        check(state == State.INITIALIZED) { "Must call addTrack() before start()" }
+        val format = trackFormat ?: throw IllegalStateException("No track format")
+
+        // Create output directory if needed
+        if (!outputDirectory.exists()) {
+            outputDirectory.mkdirs()
+        }
+
+        // Write init segment
+        val initBytes = buildInitSegment(format)
+        val initFile = File(outputDirectory, "init.mp4")
+        FileOutputStream(initFile).use { it.write(initBytes) }
+        Log.d(TAG, "Created init segment: ${initFile.absolutePath} (${initBytes.size} bytes)")
+        callback.onInitSegmentReady(initFile)
+
+        state = State.STARTED
+    }
+
+    /**
+     * Writes sample data to the muxer.
+     *
+     * @param trackIndex Track index (must be 0)
+     * @param buffer The encoded sample data
+     * @param bufferInfo Sample metadata (size, presentation time, flags)
+     */
+    fun writeSampleData(trackIndex: Int, buffer: ByteBuffer, bufferInfo: MediaCodec.BufferInfo) {
+        check(state == State.STARTED) { "Must call start() before writeSampleData()" }
+        check(trackIndex == 0) { "Invalid track index: $trackIndex" }
+
+        // Skip codec config data (already in init segment)
+        if ((bufferInfo.flags and MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0) {
+            return
+        }
+
+        val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
+        val presentationTimeUs = bufferInfo.presentationTimeUs
+
+        // Initialize segment start time
+        if (segmentStartTimeUs < 0) {
+            segmentStartTimeUs = presentationTimeUs
+        }
+
+        // Check if we should finalize current segment (at keyframe boundaries)
+        if (isKeyFrame && pendingSamples.isNotEmpty()) {
+            val segmentDurationUs = presentationTimeUs - segmentStartTimeUs
+            if (segmentDurationUs >= targetSegmentDurationUs) {
+                finalizeCurrentSegment()
+                segmentStartTimeUs = presentationTimeUs
+            }
+        }
+
+        // Copy buffer data and convert from Annex-B to AVCC format
+        val rawData = ByteArray(bufferInfo.size)
+        buffer.position(bufferInfo.offset)
+        buffer.limit(bufferInfo.offset + bufferInfo.size)
+        buffer.get(rawData)
+
+        // Convert Annex-B (start codes) to AVCC (length prefixes)
+        val data = convertAnnexBToAvcc(rawData)
+
+        // Update duration of previous sample
+        if (pendingSamples.isNotEmpty()) {
+            val lastSample = pendingSamples.last()
+            lastSample.durationUs = presentationTimeUs - lastSample.presentationTimeUs
+        }
+
+        // Estimate duration (will be corrected by next sample)
+        val estimatedDurationUs = if (lastPresentationTimeUs > 0) {
+            presentationTimeUs - lastPresentationTimeUs
+        } else {
+            1_000_000L / 30  // Assume 30fps
+        }
+
+        pendingSamples.add(Sample(
+            data = data,
+            presentationTimeUs = presentationTimeUs,
+            durationUs = estimatedDurationUs,
+            isKeyFrame = isKeyFrame
+        ))
+
+        lastPresentationTimeUs = presentationTimeUs
+    }
+
+    /**
+     * Stops the muxer, finalizing any pending segment.
+     */
+    fun stop() {
+        check(state == State.STARTED) { "Muxer not started" }
+
+        if (pendingSamples.isNotEmpty()) {
+            finalizeCurrentSegment()
+        }
+
+        state = State.STOPPED
+        Log.d(TAG, "Muxer stopped, wrote $segmentIndex segments")
+    }
+
+    /**
+     * Releases resources.
+     */
+    fun release() {
+        if (state == State.STARTED) {
+            stop()
+        }
+        pendingSamples.clear()
+        state = State.RELEASED
+    }
+
+    /**
+     * Finalizes the current segment and writes it to disk.
+     */
+    private fun finalizeCurrentSegment() {
+        if (pendingSamples.isEmpty()) return
+
+        try {
+            val baseDecodeTimeUs = pendingSamples.first().presentationTimeUs
+            val fragmentBytes = buildMediaSegment(pendingSamples, sequenceNumber, baseDecodeTimeUs)
+
+            val segmentFile = File(outputDirectory, "$segmentIndex.mp4")
+            FileOutputStream(segmentFile).use { it.write(fragmentBytes) }
+
+            // Calculate duration
+            val firstPts = pendingSamples.first().presentationTimeUs
+            val lastSample = pendingSamples.last()
+            val durationUs = (lastSample.presentationTimeUs - firstPts) + lastSample.durationUs
+
+            Log.d(TAG, "Created segment $segmentIndex: samples=${pendingSamples.size}, " +
+                    "duration=${durationUs / 1000}ms, size=${fragmentBytes.size} bytes")
+
+            callback.onMediaSegmentReady(segmentFile, segmentIndex, durationUs)
+
+            segmentIndex++
+            sequenceNumber++
+            pendingSamples.clear()
+
+        } catch (e: Exception) {
+            Log.e(TAG, "Error finalizing segment $segmentIndex", e)
+        }
+    }
+
+    // ==================== Init Segment Building ====================
+
+    /**
+     * Builds the initialization segment (ftyp + moov).
+     */
+    private fun buildInitSegment(format: MediaFormat): ByteArray {
+        val width = format.getInteger(MediaFormat.KEY_WIDTH)
+        val height = format.getInteger(MediaFormat.KEY_HEIGHT)
+
+        val sps = format.getByteBuffer("csd-0")?.let { extractNalUnit(it) }
+            ?: throw IllegalArgumentException("Missing SPS (csd-0)")
+        val pps = format.getByteBuffer("csd-1")?.let { extractNalUnit(it) }
+            ?: throw IllegalArgumentException("Missing PPS (csd-1)")
+
+        val output = ByteArrayOutputStream()
+
+        // ftyp
+        output.write(buildFtypBox())
+
+        // moov
+        output.write(buildMoovBox(width, height, sps, pps))
+
+        return output.toByteArray()
+    }
+
+    private fun extractNalUnit(buffer: ByteBuffer): ByteArray {
+        val data = ByteArray(buffer.remaining())
+        buffer.duplicate().get(data)
+
+        // Strip start code prefix (0x00000001 or 0x000001)
+        return when {
+            data.size >= 4 && data[0] == 0.toByte() && data[1] == 0.toByte() &&
+                    data[2] == 0.toByte() && data[3] == 1.toByte() -> data.copyOfRange(4, data.size)
+            data.size >= 3 && data[0] == 0.toByte() && data[1] == 0.toByte() &&
+                    data[2] == 1.toByte() -> data.copyOfRange(3, data.size)
+            else -> data
+        }
+    }
+
+    private fun buildFtypBox(): ByteArray {
+        val brands = listOf("isom", "iso5", "iso6", "avc1", "mp41", "dash")
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        val size = 8 + 4 + 4 + (brands.size * 4)
+        dos.writeInt(size)
+        dos.writeBytes("ftyp")
+        dos.writeBytes("isom")  // major brand
+        dos.writeInt(0x200)     // minor version
+        brands.forEach { dos.writeBytes(it) }
+
+        return output.toByteArray()
+    }
+
+    private fun buildMoovBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
+        val content = ByteArrayOutputStream()
+
+        content.write(buildMvhdBox())
+        content.write(buildTrakBox(width, height, sps, pps))
+        content.write(buildMvexBox())
+
+        return wrapBox("moov", content.toByteArray())
+    }
+
+    private fun buildMvhdBox(): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        dos.writeInt(0)          // version & flags
+        dos.writeInt(0)          // creation time
+        dos.writeInt(0)          // modification time
+        dos.writeInt(timescale)  // timescale
+        dos.writeInt(0)          // duration
+        dos.writeInt(0x00010000) // rate = 1.0
+        dos.writeShort(0x0100)   // volume = 1.0
+        dos.writeShort(0)        // reserved
+        dos.writeInt(0)          // reserved
+        dos.writeInt(0)          // reserved
+
+        // Unity matrix
+        dos.writeInt(0x00010000); dos.writeInt(0); dos.writeInt(0)
+        dos.writeInt(0); dos.writeInt(0x00010000); dos.writeInt(0)
+        dos.writeInt(0); dos.writeInt(0); dos.writeInt(0x40000000)
+
+        repeat(6) { dos.writeInt(0) }  // pre-defined
+        dos.writeInt(2)  // next track ID
+
+        return wrapBox("mvhd", output.toByteArray())
+    }
+
+    private fun buildTrakBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
+        val content = ByteArrayOutputStream()
+        content.write(buildTkhdBox(width, height))
+        content.write(buildMdiaBox(width, height, sps, pps))
+        return wrapBox("trak", content.toByteArray())
+    }
+
+    private fun buildTkhdBox(width: Int, height: Int): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        dos.writeInt(0x00000007) // version 0, flags (enabled, in movie, in preview)
+        dos.writeInt(0)          // creation time
+        dos.writeInt(0)          // modification time
+        dos.writeInt(1)          // track ID
+        dos.writeInt(0)          // reserved
+        dos.writeInt(0)          // duration
+        dos.writeInt(0)          // reserved
+        dos.writeInt(0)          // reserved
+        dos.writeShort(0)        // layer
+        dos.writeShort(0)        // alternate group
+        dos.writeShort(0)        // volume (0 for video)
+        dos.writeShort(0)        // reserved
+
+        // Rotation matrix - use identity and rely on correct dimensions from encoder
+        // The encoder output format already has the correct dimensions for the content
+        writeRotationMatrix(dos)
+
+        // Use dimensions as-is from encoder output format
+        dos.writeInt(width shl 16)   // width (16.16 fixed point)
+        dos.writeInt(height shl 16)  // height (16.16 fixed point)
+
+        Log.d(TAG, "tkhd: ${width}x${height}, rotation=$orientationDegrees")
+
+        return wrapBox("tkhd", output.toByteArray())
+    }
+
+    /**
+     * Writes the 3x3 transformation matrix for video rotation.
+     * Uses simple rotation values - the encoder already outputs correctly oriented frames.
+     */
+    private fun writeRotationMatrix(dos: DataOutputStream) {
+        // Fixed-point constants
+        val one = 0x00010000      // 1.0 in 16.16
+        val w = 0x40000000        // 1.0 in 2.30
+
+        // Identity matrix - no transformation
+        // Most HLS players handle rotation via the dimensions themselves
+        // or we can add rotation metadata separately if needed
+        dos.writeInt(one)   // a = 1
+        dos.writeInt(0)     // b = 0
+        dos.writeInt(0)     // u = 0
+        dos.writeInt(0)     // c = 0
+        dos.writeInt(one)   // d = 1
+        dos.writeInt(0)     // v = 0
+        dos.writeInt(0)     // x = 0
+        dos.writeInt(0)     // y = 0
+        dos.writeInt(w)     // w = 1
+    }
+
+    private fun buildMdiaBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
+        val content = ByteArrayOutputStream()
+        content.write(buildMdhdBox())
+        content.write(buildHdlrBox())
+        content.write(buildMinfBox(width, height, sps, pps))
+        return wrapBox("mdia", content.toByteArray())
+    }
+
+    private fun buildMdhdBox(): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        dos.writeInt(0)          // version & flags
+        dos.writeInt(0)          // creation time
+        dos.writeInt(0)          // modification time
+        dos.writeInt(timescale)  // timescale
+        dos.writeInt(0)          // duration
+        dos.writeShort(0x55C4)   // language: "und"
+        dos.writeShort(0)        // pre-defined
+
+        return wrapBox("mdhd", output.toByteArray())
+    }
+
+    private fun buildHdlrBox(): ByteArray {
+        val name = "VideoHandler"
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        dos.writeInt(0)  // version & flags
+        dos.writeInt(0)  // pre-defined
+        dos.writeBytes("vide")  // handler type
+        dos.writeInt(0)  // reserved
+        dos.writeInt(0)  // reserved
+        dos.writeInt(0)  // reserved
+        dos.writeBytes(name)
+        dos.writeByte(0)  // null terminator
+
+        return wrapBox("hdlr", output.toByteArray())
+    }
+
+    private fun buildMinfBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
+        val content = ByteArrayOutputStream()
+        content.write(buildVmhdBox())
+        content.write(buildDinfBox())
+        content.write(buildStblBox(width, height, sps, pps))
+        return wrapBox("minf", content.toByteArray())
+    }
+
+    private fun buildVmhdBox(): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        dos.writeInt(1)      // version 0, flags = 1
+        dos.writeShort(0)    // graphics mode
+        dos.writeShort(0)    // opcolor[0]
+        dos.writeShort(0)    // opcolor[1]
+        dos.writeShort(0)    // opcolor[2]
+
+        return wrapBox("vmhd", output.toByteArray())
+    }
+
+    private fun buildDinfBox(): ByteArray {
+        val dref = buildDrefBox()
+        return wrapBox("dinf", dref)
+    }
+
+    private fun buildDrefBox(): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        dos.writeInt(0)  // version & flags
+        dos.writeInt(1)  // entry count
+
+        // url box (self-contained)
+        dos.writeInt(12)
+        dos.writeBytes("url ")
+        dos.writeInt(1)  // flags: self-contained
+
+        return wrapBox("dref", output.toByteArray())
+    }
+
+    private fun buildStblBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
+        val content = ByteArrayOutputStream()
+        content.write(buildStsdBox(width, height, sps, pps))
+        content.write(buildEmptySttsBox())
+        content.write(buildEmptyStscBox())
+        content.write(buildEmptyStszBox())
+        content.write(buildEmptyStcoBox())
+        return wrapBox("stbl", content.toByteArray())
+    }
+
+    private fun buildStsdBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        dos.writeInt(0)  // version & flags
+        dos.writeInt(1)  // entry count
+        output.write(buildAvc1Box(width, height, sps, pps))
+
+        return wrapBox("stsd", output.toByteArray())
+    }
+
+    private fun buildAvc1Box(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        repeat(6) { dos.writeByte(0) }  // reserved
+        dos.writeShort(1)               // data reference index
+        dos.writeShort(0)               // pre-defined
+        dos.writeShort(0)               // reserved
+        repeat(3) { dos.writeInt(0) }   // pre-defined
+        dos.writeShort(width)           // width
+        dos.writeShort(height)          // height
+        dos.writeInt(0x00480000)        // horiz resolution (72 dpi)
+        dos.writeInt(0x00480000)        // vert resolution (72 dpi)
+        dos.writeInt(0)                 // reserved
+        dos.writeShort(1)               // frame count
+        repeat(32) { dos.writeByte(0) } // compressor name
+        dos.writeShort(0x0018)          // depth (24 bit)
+        dos.writeShort(-1)              // pre-defined
+
+        output.write(buildAvcCBox(sps, pps))
+
+        return wrapBox("avc1", output.toByteArray())
+    }
+
+    private fun buildAvcCBox(sps: ByteArray, pps: ByteArray): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        val profileIdc = if (sps.isNotEmpty()) sps[0].toInt() and 0xFF else 0x42
+        val profileCompat = if (sps.size > 1) sps[1].toInt() and 0xFF else 0x00
+        val levelIdc = if (sps.size > 2) sps[2].toInt() and 0xFF else 0x1F
+
+        dos.writeByte(1)            // configuration version
+        dos.writeByte(profileIdc)   // AVC profile
+        dos.writeByte(profileCompat)// profile compatibility
+        dos.writeByte(levelIdc)     // AVC level
+        dos.writeByte(0xFF)         // 6 bits reserved + 2 bits NAL length - 1
+
+        dos.writeByte(0xE1)         // 3 bits reserved + 5 bits SPS count
+        dos.writeShort(sps.size)    // SPS length
+        dos.write(sps)              // SPS data
+
+        dos.writeByte(1)            // PPS count
+        dos.writeShort(pps.size)    // PPS length
+        dos.write(pps)              // PPS data
+
+        return wrapBox("avcC", output.toByteArray())
+    }
+
+    private fun buildEmptySttsBox(): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+        dos.writeInt(0)  // version & flags
+        dos.writeInt(0)  // entry count
+        return wrapBox("stts", output.toByteArray())
+    }
+
+    private fun buildEmptyStscBox(): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+        dos.writeInt(0)  // version & flags
+        dos.writeInt(0)  // entry count
+        return wrapBox("stsc", output.toByteArray())
+    }
+
+    private fun buildEmptyStszBox(): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+        dos.writeInt(0)  // version & flags
+        dos.writeInt(0)  // sample size (0 = variable)
+        dos.writeInt(0)  // sample count
+        return wrapBox("stsz", output.toByteArray())
+    }
+
+    private fun buildEmptyStcoBox(): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+        dos.writeInt(0)  // version & flags
+        dos.writeInt(0)  // entry count
+        return wrapBox("stco", output.toByteArray())
+    }
+
+    private fun buildMvexBox(): ByteArray {
+        return wrapBox("mvex", buildTrexBox())
+    }
+
+    private fun buildTrexBox(): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        dos.writeInt(0)  // version & flags
+        dos.writeInt(1)  // track ID
+        dos.writeInt(1)  // default sample description index
+        dos.writeInt(0)  // default sample duration
+        dos.writeInt(0)  // default sample size
+        dos.writeInt(0)  // default sample flags
+
+        return wrapBox("trex", output.toByteArray())
+    }
+
+    // ==================== Media Segment Building ====================
+
+    /**
+     * Builds a media segment (moof + mdat).
+     */
+    private fun buildMediaSegment(
+        samples: List<Sample>,
+        sequenceNumber: Int,
+        baseDecodeTimeUs: Long
+    ): ByteArray {
+        val output = ByteArrayOutputStream()
+
+        // Build mdat content first to know sizes
+        val mdatContent = ByteArrayOutputStream()
+        for (sample in samples) {
+            mdatContent.write(sample.data)
+        }
+        val mdatPayload = mdatContent.toByteArray()
+
+        // Build moof
+        val moofBox = buildMoofBox(samples, sequenceNumber, baseDecodeTimeUs, mdatPayload.size)
+        output.write(moofBox)
+
+        // Build mdat
+        output.write(wrapBox("mdat", mdatPayload))
+
+        return output.toByteArray()
+    }
+
+    private fun buildMoofBox(
+        samples: List<Sample>,
+        sequenceNumber: Int,
+        baseDecodeTimeUs: Long,
+        mdatPayloadSize: Int
+    ): ByteArray {
+        // Calculate sizes to determine data offset
+        val mfhdBox = buildMfhdBox(sequenceNumber)
+        val tfhdSize = 8 + 8   // box header + content (version/flags + track_id)
+        val tfdtSize = 8 + 12  // box header + version 1 content
+        val trunSize = 8 + 12 + (samples.size * 12)  // header + fixed + per-sample (no composition offset)
+        val trafSize = 8 + tfhdSize + tfdtSize + trunSize
+        val moofSize = 8 + mfhdBox.size + trafSize
+
+        val dataOffset = moofSize + 8  // moof size + mdat header
+
+        val content = ByteArrayOutputStream()
+        content.write(mfhdBox)
+        content.write(buildTrafBox(samples, baseDecodeTimeUs, dataOffset))
+
+        return wrapBox("moof", content.toByteArray())
+    }
+
+    private fun buildMfhdBox(sequenceNumber: Int): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        dos.writeInt(0)  // version & flags
+        dos.writeInt(sequenceNumber)
+
+        return wrapBox("mfhd", output.toByteArray())
+    }
+
+    private fun buildTrafBox(samples: List<Sample>, baseDecodeTimeUs: Long, dataOffset: Int): ByteArray {
+        val content = ByteArrayOutputStream()
+        content.write(buildTfhdBox())
+        content.write(buildTfdtBox(baseDecodeTimeUs))
+        content.write(buildTrunBox(samples, dataOffset))
+        return wrapBox("traf", content.toByteArray())
+    }
+
+    private fun buildTfhdBox(): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        // Flags: default-base-is-moof (0x020000)
+        dos.writeInt(0x00020000)
+        dos.writeInt(1)  // track ID
+
+        return wrapBox("tfhd", output.toByteArray())
+    }
+
+    private fun buildTfdtBox(baseDecodeTimeUs: Long): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        // Convert to timescale units
+        val baseMediaDecodeTime = (baseDecodeTimeUs * timescale) / 1_000_000
+
+        // Version 1 for 64-bit time
+        dos.writeInt(0x01000000)
+        dos.writeLong(baseMediaDecodeTime)
+
+        return wrapBox("tfdt", output.toByteArray())
+    }
+
+    private fun buildTrunBox(samples: List<Sample>, dataOffset: Int): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        // Flags: data-offset + sample-duration + sample-size + sample-flags
+        val flags = 0x000001 or 0x000100 or 0x000200 or 0x000400
+        dos.writeInt(flags)
+        dos.writeInt(samples.size)
+        dos.writeInt(dataOffset)
+
+        for (sample in samples) {
+            // Convert duration to timescale units
+            val durationInTimescale = ((sample.durationUs * timescale) / 1_000_000).toInt()
+            dos.writeInt(durationInTimescale)
+            dos.writeInt(sample.data.size)
+            dos.writeInt(buildSampleFlags(sample.isKeyFrame))
+        }
+
+        return wrapBox("trun", output.toByteArray())
+    }
+
+    private fun buildSampleFlags(isKeyFrame: Boolean): Int {
+        return if (isKeyFrame) {
+            // sample_depends_on=2 (no dependencies), not a difference sample
+            0x02000000
+        } else {
+            // sample_depends_on=1 (depends on others), is a difference sample
+            0x01010000
+        }
+    }
+
+    // ==================== Utilities ====================
+
+    private fun wrapBox(type: String, content: ByteArray): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        dos.writeInt(8 + content.size)
+        dos.writeBytes(type)
+        dos.write(content)
+
+        return output.toByteArray()
+    }
+}
--- a/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt
@@ -4,7 +4,6 @@ import android.content.Context
 import android.util.Log
 import android.util.Size
 import android.view.Surface
-import androidx.media3.common.util.UnstableApi
 import com.facebook.common.statfs.StatFsHelper
 import com.mrousavy.camera.extensions.getRecommendedBitRate
 import com.mrousavy.camera.types.Orientation
@@ -16,7 +15,6 @@ import java.text.SimpleDateFormat
 import java.util.Locale
 import java.util.Date

-@UnstableApi
 class RecordingSession(
  context: Context,
  val cameraId: String,
@@ -30,7 +28,7 @@ class RecordingSession(
  private val callback: (video: Video) -> Unit,
  private val onError: (error: CameraError) -> Unit,
  private val allCallbacks: CameraSession.Callback,
-  // Use the new FragmentedMp4Muxer-based recorder for HLS-compatible output
+  // Use FragmentedRecordingManager for HLS-compatible fMP4 output
  private val useFragmentedMp4: Boolean = true
 ) {
  companion object {