Fix orientation issues

Write our own muxer to make hls uupload actually work
2025-12-28 01:14:44 -08:00 · 2025-12-21 16:45:04 -08:00
9 changed files with 978 additions and 704 deletions
--- a/package/android/build.gradle
+++ b/package/android/build.gradle
@@ -178,10 +178,6 @@ dependencies {
  implementation "com.facebook.react:react-android:+"
  implementation "org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3"

-  // Media3 muxer for fragmented MP4 (HLS-compatible) recording
-  implementation "androidx.media3:media3-muxer:1.5.0"
-  implementation "androidx.media3:media3-common:1.5.0"
-
  if (enableCodeScanner) {
    // User enabled code-scanner, so we bundle the 2.4 MB model in the app.
    implementation 'com.google.mlkit:barcode-scanning:17.2.0'
--- a/package/android/src/main/cpp/OpenGLRenderer.cpp
+++ b/package/android/src/main/cpp/OpenGLRenderer.cpp
@@ -26,7 +26,6 @@ OpenGLRenderer::OpenGLRenderer(std::shared_ptr<OpenGLContext> context, ANativeWi
  _outputSurface = surface;
  _width = ANativeWindow_getWidth(surface);
  _height = ANativeWindow_getHeight(surface);
-  __android_log_print(ANDROID_LOG_INFO, TAG, "ROTATION_DEBUG OpenGLRenderer created with output surface dimensions: %dx%d", _width, _height);
 }

 OpenGLRenderer::~OpenGLRenderer() {
--- a/package/android/src/main/cpp/VideoPipeline.cpp
+++ b/package/android/src/main/cpp/VideoPipeline.cpp
@@ -56,11 +56,6 @@ void VideoPipeline::setRecordingSessionOutputSurface(jobject surface) {
  _recordingSessionOutput = OpenGLRenderer::CreateWithWindowSurface(_context, window);
 }

-void VideoPipeline::setRecordingOrientation(int orientation) {
-  _recordingOrientation = orientation;
-  __android_log_print(ANDROID_LOG_INFO, TAG, "Recording orientation set to: %d", orientation);
-}
-
 int VideoPipeline::getInputTextureId() {
  if (_inputTexture == std::nullopt) {
    _inputTexture = _context->createTexture(OpenGLTexture::Type::ExternalOES, _width, _height);
@@ -83,29 +78,8 @@ void VideoPipeline::onFrame(jni::alias_ref<jni::JArrayFloat> transformMatrixPara
  OpenGLTexture& texture = _inputTexture.value();

  if (_recordingSessionOutput) {
-    __android_log_print(ANDROID_LOG_INFO, TAG, "Rendering to RecordingSession.. orientation=%d", _recordingOrientation);
-
-    // For recording, use a simple transform matrix instead of the display transform.
-    // The display transform includes rotations for preview which we don't want in recordings.
-    float recordingMatrix[16];
-
-    if (_recordingOrientation == 1) {
-      // LANDSCAPE_RIGHT (CW): Y-flip + 180° rotation = flip both X and Y
-      // This negates both X and Y, then translates by (1,1)
-      recordingMatrix[0] = -1.0f; recordingMatrix[1] =  0.0f; recordingMatrix[2] = 0.0f; recordingMatrix[3] = 0.0f;
-      recordingMatrix[4] =  0.0f; recordingMatrix[5] =  1.0f; recordingMatrix[6] = 0.0f; recordingMatrix[7] = 0.0f;
-      recordingMatrix[8] =  0.0f; recordingMatrix[9] =  0.0f; recordingMatrix[10] = 1.0f; recordingMatrix[11] = 0.0f;
-      recordingMatrix[12] = 1.0f; recordingMatrix[13] = 0.0f; recordingMatrix[14] = 0.0f; recordingMatrix[15] = 1.0f;
-    } else {
-      // LANDSCAPE_LEFT (CCW): Simple Y-flip
-      // OpenGL origin is bottom-left, video expects top-left
-      recordingMatrix[0] = 1.0f; recordingMatrix[1] =  0.0f; recordingMatrix[2] = 0.0f; recordingMatrix[3] = 0.0f;
-      recordingMatrix[4] = 0.0f; recordingMatrix[5] = -1.0f; recordingMatrix[6] = 0.0f; recordingMatrix[7] = 0.0f;
-      recordingMatrix[8] = 0.0f; recordingMatrix[9] =  0.0f; recordingMatrix[10] = 1.0f; recordingMatrix[11] = 0.0f;
-      recordingMatrix[12] = 0.0f; recordingMatrix[13] = 1.0f; recordingMatrix[14] = 0.0f; recordingMatrix[15] = 1.0f;
-    }
-
-    _recordingSessionOutput->renderTextureToSurface(texture, recordingMatrix);
+    __android_log_print(ANDROID_LOG_INFO, TAG, "Rendering to RecordingSession..");
+    _recordingSessionOutput->renderTextureToSurface(texture, transformMatrix);
  }
 }

@@ -114,7 +88,6 @@ void VideoPipeline::registerNatives() {
      makeNativeMethod("initHybrid", VideoPipeline::initHybrid),
      makeNativeMethod("setRecordingSessionOutputSurface", VideoPipeline::setRecordingSessionOutputSurface),
      makeNativeMethod("removeRecordingSessionOutputSurface", VideoPipeline::removeRecordingSessionOutputSurface),
-      makeNativeMethod("setRecordingOrientation", VideoPipeline::setRecordingOrientation),
      makeNativeMethod("getInputTextureId", VideoPipeline::getInputTextureId),
      makeNativeMethod("onBeforeFrame", VideoPipeline::onBeforeFrame),
      makeNativeMethod("onFrame", VideoPipeline::onFrame),
--- a/package/android/src/main/cpp/VideoPipeline.h
+++ b/package/android/src/main/cpp/VideoPipeline.h
@@ -33,7 +33,6 @@ public:
  // <- MediaRecorder output
  void setRecordingSessionOutputSurface(jobject surface);
  void removeRecordingSessionOutputSurface();
-  void setRecordingOrientation(int orientation);

  // Frame callbacks
  void onBeforeFrame();
@@ -48,7 +47,6 @@ private:
  std::optional<OpenGLTexture> _inputTexture = std::nullopt;
  int _width = 0;
  int _height = 0;
-  int _recordingOrientation = 0; // 0=LANDSCAPE_LEFT, 1=LANDSCAPE_RIGHT

  // Output Contexts
  std::shared_ptr<OpenGLContext> _context = nullptr;
--- a/package/android/src/main/java/com/mrousavy/camera/core/CameraSession.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/CameraSession.kt
@@ -409,8 +409,7 @@ class CameraSession(private val context: Context, private val cameraManager: Cam
  private fun updateVideoOutputs() {
    val videoOutput = videoOutput ?: return
    Log.i(TAG, "Updating Video Outputs...")
-    val orientation = recording?.cameraOrientation ?: Orientation.LANDSCAPE_LEFT
-    videoOutput.videoPipeline.setRecordingSessionOutput(recording, orientation)
+    videoOutput.videoPipeline.setRecordingSessionOutput(recording)
  }

  suspend fun startRecording(
@@ -429,16 +428,18 @@ class CameraSession(private val context: Context, private val cameraManager: Cam

      // Get actual device rotation from WindowManager since the React Native orientation hook
      // doesn't update when rotating between landscape-left and landscape-right on Android.
+      // Map device rotation to the correct orientationHint for video recording:
+      // - Counter-clockwise (ROTATION_90) → 270° hint
+      // - Clockwise (ROTATION_270) → 90° hint
      val windowManager = context.getSystemService(Context.WINDOW_SERVICE) as WindowManager
      val deviceRotation = windowManager.defaultDisplay.rotation
      val recordingOrientation = when (deviceRotation) {
        Surface.ROTATION_0 -> Orientation.PORTRAIT
-        Surface.ROTATION_90 -> Orientation.LANDSCAPE_LEFT      // CCW rotation, top to left
+        Surface.ROTATION_90 -> Orientation.LANDSCAPE_RIGHT
        Surface.ROTATION_180 -> Orientation.PORTRAIT_UPSIDE_DOWN
-        Surface.ROTATION_270 -> Orientation.LANDSCAPE_RIGHT    // CW rotation, top to right
+        Surface.ROTATION_270 -> Orientation.LANDSCAPE_LEFT
        else -> Orientation.PORTRAIT
      }
-      Log.i(TAG, "ROTATION_DEBUG: deviceRotation=$deviceRotation, recordingOrientation=$recordingOrientation, options.orientation=${options.orientation}")

      val recording = RecordingSession(
        context,
@@ -447,7 +448,7 @@ class CameraSession(private val context: Context, private val cameraManager: Cam
        enableAudio,
        fps,
        videoOutput.enableHdr,
-        recordingOrientation,
+        orientation,
        options,
        filePath,
        callback,
--- a/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt
@@ -7,39 +7,25 @@ import android.media.MediaFormat
 import android.util.Log
 import android.util.Size
 import android.view.Surface
-import androidx.media3.common.Format
-import androidx.media3.common.MimeTypes
-import androidx.media3.common.util.UnstableApi
-import androidx.media3.muxer.FragmentedMp4Muxer
-import androidx.media3.muxer.Muxer
 import com.mrousavy.camera.types.Orientation
 import com.mrousavy.camera.types.RecordVideoOptions
 import java.io.File
-import java.io.FileOutputStream
-import java.io.RandomAccessFile
-import java.nio.ByteBuffer
-import java.nio.ByteOrder

 /**
 * A recording manager that produces HLS-compatible fragmented MP4 segments.
 *
- * This produces output similar to the iOS implementation:
- * - An initialization segment (init.mp4) containing codec configuration
- * - Numbered data segments (0.mp4, 1.mp4, ...) containing media data
- *
- * Uses AndroidX Media3's FragmentedMp4Muxer which produces proper fMP4 output.
+ * Uses HlsMuxer (following Android's MediaMuxer pattern) to produce:
+ * - init.mp4: Initialization segment (ftyp + moov with mvex)
+ * - 0.mp4, 1.mp4, ...: Media segments (moof + mdat)
 */
-@UnstableApi
 class FragmentedRecordingManager(
    private val encoder: MediaCodec,
-    private val outputDirectory: File,
-    private val orientationDegrees: Int,
-    private val targetSegmentDurationUs: Long,
-    private val callbacks: CameraSession.Callback
+    private val muxer: HlsMuxer
 ) : MediaCodec.Callback(), ChunkedRecorderInterface {

    companion object {
        private const val TAG = "FragmentedRecorder"
+        private const val DEFAULT_SEGMENT_DURATION_SECONDS = 6

        fun fromParams(
            callbacks: CameraSession.Callback,
@@ -50,24 +36,22 @@ class FragmentedRecordingManager(
            bitRate: Int,
            options: RecordVideoOptions,
            outputDirectory: File,
-            segmentDurationSeconds: Int = 6
+            segmentDurationSeconds: Int = DEFAULT_SEGMENT_DURATION_SECONDS
        ): FragmentedRecordingManager {
            val mimeType = options.videoCodec.toMimeType()
-            // For fragmented MP4: DON'T swap dimensions, use camera's native dimensions.
-            // The C++ VideoPipeline uses a custom transform matrix (not the display transform).
-            // This gives us raw sensor frames, and we rely on rotation metadata for playback.
-            val cameraOrientationDegrees = when (cameraOrientation) {
-                Orientation.LANDSCAPE_LEFT -> 0    // CCW landscape
-                Orientation.LANDSCAPE_RIGHT -> 0   // CW landscape
-                Orientation.PORTRAIT -> 90
-                Orientation.PORTRAIT_UPSIDE_DOWN -> 270
-            }
-            Log.i(TAG, "ROTATION_DEBUG FragmentedRecordingManager: cameraOrientation=$cameraOrientation, cameraOrientationDegrees=$cameraOrientationDegrees, inputSize=${size.width}x${size.height}")
+            val cameraOrientationDegrees = cameraOrientation.toDegrees()
+            val recordingOrientationDegrees = (options.orientation ?: Orientation.PORTRAIT).toDegrees()

-            // Keep original dimensions - don't swap. Let rotation metadata handle orientation.
-            val width = size.width
-            val height = size.height
-            Log.i(TAG, "ROTATION_DEBUG FragmentedRecordingManager: outputDimensions=${width}x${height} (no swap)")
+            // Swap dimensions based on camera orientation, same as ChunkedRecordingManager
+            val (width, height) = if (cameraOrientation.isLandscape()) {
+                size.height to size.width
+            } else {
+                size.width to size.height
+            }
+
+            Log.d(TAG, "Input size: ${size.width}x${size.height}, encoder size: ${width}x${height}, " +
+                    "cameraOrientation: $cameraOrientation ($cameraOrientationDegrees°), " +
+                    "recordingOrientation: $recordingOrientationDegrees°")

            val format = MediaFormat.createVideoFormat(mimeType, width, height)
            val codec = MediaCodec.createEncoderByType(mimeType)
@@ -76,151 +60,48 @@ class FragmentedRecordingManager(
                MediaFormat.KEY_COLOR_FORMAT,
                MediaCodecInfo.CodecCapabilities.COLOR_FormatSurface
            )
-            fps?.apply {
-                format.setInteger(MediaFormat.KEY_FRAME_RATE, this)
-            }
-            // I-frame interval affects segment boundaries
+
+            val effectiveFps = fps ?: 30
+            format.setInteger(MediaFormat.KEY_FRAME_RATE, effectiveFps)
            format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, segmentDurationSeconds)
            format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate)

-            Log.d(TAG, "Video Format: $format, orientationDegrees: $cameraOrientationDegrees")
+            Log.d(TAG, "Video Format: $format, orientation: $recordingOrientationDegrees")

            codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE)

-            return FragmentedRecordingManager(
-                codec,
-                outputDirectory,
-                cameraOrientationDegrees,
-                segmentDurationSeconds * 1_000_000L,
-                callbacks
+            // Create muxer with callbacks and orientation
+            val muxer = HlsMuxer(
+                outputDirectory = outputDirectory,
+                callback = object : HlsMuxer.Callback {
+                    override fun onInitSegmentReady(file: File) {
+                        callbacks.onInitSegmentReady(file)
+                    }
+
+                    override fun onMediaSegmentReady(file: File, index: Int, durationUs: Long) {
+                        callbacks.onVideoChunkReady(file, index, durationUs)
+                    }
+                },
+                orientationDegrees = recordingOrientationDegrees
            )
+            muxer.setSegmentDuration(segmentDurationSeconds * 1_000_000L)
+
+            Log.d(TAG, "Created HlsMuxer with orientation: $recordingOrientationDegrees degrees")
+
+            return FragmentedRecordingManager(codec, muxer)
        }
    }

-    // State management
-    private var chunkIndex = 0
-    private var encodedFormat: MediaFormat? = null
    private var recording = false
-
-    // Segment tracking
-    private var segmentContext: SegmentContext? = null
-    private var initSegmentEmitted = false
-
-    // Cumulative base time for HLS-compatible timestamps (in timescale units)
-    // Each segment's baseMediaDecodeTime should be the sum of all previous segment durations
-    private var cumulativeBaseTimeUs: Long = 0L
-
-    // Timescale used in the fMP4 (typically 1000000 for microseconds)
-    private val timescale: Long = 1_000_000L
+    private var muxerStarted = false
+    private var trackIndex = -1

    override val surface: Surface = encoder.createInputSurface()

    init {
-        if (!outputDirectory.exists()) {
-            outputDirectory.mkdirs()
-        }
        encoder.setCallback(this)
    }

-    /**
-     * Result from finishing a segment, used for tfdt patching.
-     */
-    private data class SegmentResult(
-        val file: File,
-        val segmentIndex: Int,
-        val durationUs: Long
-    )
-
-    /**
-     * Context for a single data segment being written.
-     * Init segments are created separately via createInitSegment().
-     */
-    private inner class SegmentContext(
-        private val format: MediaFormat,
-        val segmentIndex: Int,
-        private val baseTimeUs: Long  // The baseMediaDecodeTime for this segment
-    ) {
-        private val filename = "$segmentIndex.mp4"
-        val file = File(outputDirectory, filename)
-        private val outputStream = FileOutputStream(file)
-        private val muxer = FragmentedMp4Muxer.Builder(outputStream).build()
-        private lateinit var videoTrack: Muxer.TrackToken
-        private var startTimeUs: Long = -1L
-        private var lastTimeUs: Long = 0L
-        private var sampleCount = 0
-
-        init {
-            val media3Format = convertToMedia3Format(format)
-            videoTrack = muxer.addTrack(media3Format)
-            Log.d(TAG, "Created segment context: $filename with baseTimeUs=$baseTimeUs")
-        }
-
-        fun writeSample(buffer: ByteBuffer, bufferInfo: BufferInfo) {
-            if (startTimeUs < 0) {
-                startTimeUs = bufferInfo.presentationTimeUs
-                Log.i(TAG, "PTS_DEBUG Segment $segmentIndex FIRST sample: absolutePTS=${bufferInfo.presentationTimeUs}us, baseTimeUs=$baseTimeUs")
-            }
-
-            // Log first 3 samples and every keyframe for debugging
-            val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
-            if (sampleCount < 3 || isKeyFrame) {
-                Log.i(TAG, "PTS_DEBUG Segment $segmentIndex sample $sampleCount: PTS=${bufferInfo.presentationTimeUs}us, keyframe=$isKeyFrame")
-            }
-
-            lastTimeUs = bufferInfo.presentationTimeUs
-
-            muxer.writeSampleData(videoTrack, buffer, bufferInfo)
-            sampleCount++
-        }
-
-        /**
-         * Check if we've accumulated enough duration to start a new segment.
-         * Should only be called when we have a keyframe available.
-         */
-        fun shouldStartNewSegmentOnKeyframe(): Boolean {
-            if (sampleCount == 0) return false // Need at least one sample first
-            val currentDurationUs = lastTimeUs - startTimeUs
-            return currentDurationUs >= targetSegmentDurationUs
-        }
-
-        fun finish(): SegmentResult {
-            try {
-                muxer.close()
-                outputStream.close()
-            } catch (e: Exception) {
-                Log.e(TAG, "Error closing segment", e)
-            }
-
-            val durationUs = if (lastTimeUs > startTimeUs) lastTimeUs - startTimeUs else 0L
-
-            Log.i(TAG, "PTS_DEBUG Segment $segmentIndex FINISHED: startPTS=${startTimeUs}us, lastPTS=${lastTimeUs}us, duration=${durationUs/1000}ms, samples=$sampleCount, baseTimeUs=$baseTimeUs")
-            return SegmentResult(file, segmentIndex, durationUs)
-        }
-    }
-
-    private fun createNewSegment() {
-        val format = encodedFormat
-        if (format == null) {
-            Log.e(TAG, "Cannot create segment: encodedFormat is null")
-            return
-        }
-
-        // Close previous segment and process it for HLS
-        segmentContext?.let { ctx ->
-            val result = ctx.finish()
-            // Process the segment: extract init (if first), strip headers, inject tfdt
-            processSegmentForHLS(result.file, result.segmentIndex, cumulativeBaseTimeUs)
-            // Update cumulative time for next segment
-            cumulativeBaseTimeUs += result.durationUs
-            // Notify callback
-            callbacks.onVideoChunkReady(result.file, result.segmentIndex, result.durationUs)
-        }
-
-        // Create new data segment with current cumulative base time
-        segmentContext = SegmentContext(format, chunkIndex, cumulativeBaseTimeUs)
-        chunkIndex++
-    }
-
    override fun start() {
        encoder.start()
        recording = true
@@ -229,13 +110,12 @@ class FragmentedRecordingManager(
    override fun finish() {
        synchronized(this) {
            recording = false
-            // Close final segment and process it for HLS
-            segmentContext?.let { ctx ->
-                val result = ctx.finish()
-                processSegmentForHLS(result.file, result.segmentIndex, cumulativeBaseTimeUs)
-                callbacks.onVideoChunkReady(result.file, result.segmentIndex, result.durationUs)
+
+            if (muxerStarted) {
+                muxer.stop()
+                muxer.release()
            }
-            segmentContext = null
+
            try {
                encoder.stop()
                encoder.release()
@@ -246,6 +126,7 @@ class FragmentedRecordingManager(
    }

    // MediaCodec.Callback methods
+
    override fun onInputBufferAvailable(codec: MediaCodec, index: Int) {
        // Not used for Surface input
    }
@@ -257,46 +138,20 @@ class FragmentedRecordingManager(
                return
            }

-            // Skip codec config buffers - these contain SPS/PPS with annex-b start codes
-            // and should NOT be written as samples (they're already in the Format's initializationData)
-            if ((bufferInfo.flags and MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0) {
-                Log.d(TAG, "Skipping codec config buffer (size=${bufferInfo.size})")
+            if (!muxerStarted) {
                encoder.releaseOutputBuffer(index, false)
                return
            }

-            val encodedData = encoder.getOutputBuffer(index)
-            if (encodedData == null) {
+            val buffer = encoder.getOutputBuffer(index)
+            if (buffer == null) {
                Log.e(TAG, "getOutputBuffer returned null")
                encoder.releaseOutputBuffer(index, false)
                return
            }

-            // Create first data segment if needed
-            if (segmentContext == null) {
-                createNewSegment()
-            }
-
-            val context = segmentContext
-            if (context == null) {
-                encoder.releaseOutputBuffer(index, false)
-                return
-            }
-
            try {
-                // Check if this keyframe should start a new segment BEFORE writing
-                val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
-                val shouldStartNewSegment = isKeyFrame && context.shouldStartNewSegmentOnKeyframe()
-
-                if (shouldStartNewSegment) {
-                    // Finish old segment WITHOUT writing this keyframe to it
-                    createNewSegment()
-                    // Write keyframe to the NEW segment only
-                    segmentContext?.writeSample(encodedData, bufferInfo)
-                } else {
-                    // Write to current segment
-                    context.writeSample(encodedData, bufferInfo)
-                }
+                muxer.writeSampleData(trackIndex, buffer, bufferInfo)
            } catch (e: Exception) {
                Log.e(TAG, "Error writing sample", e)
            }
@@ -310,453 +165,12 @@ class FragmentedRecordingManager(
    }

    override fun onOutputFormatChanged(codec: MediaCodec, format: MediaFormat) {
-        Log.i(TAG, "Output format changed: $format")
-        encodedFormat = format
-        // Note: init segment is now extracted from the first segment's ftyp+moov
-        // rather than created separately (Media3's empty init was not working)
-    }
+        synchronized(this) {
+            Log.i(TAG, "Output format changed: $format")

-    private fun convertToMedia3Format(mediaFormat: MediaFormat): Format {
-        val mimeType = mediaFormat.getString(MediaFormat.KEY_MIME) ?: MimeTypes.VIDEO_H264
-        val width = mediaFormat.getInteger(MediaFormat.KEY_WIDTH)
-        val height = mediaFormat.getInteger(MediaFormat.KEY_HEIGHT)
-        val bitRate = try { mediaFormat.getInteger(MediaFormat.KEY_BIT_RATE) } catch (e: Exception) { -1 }
-        val frameRate = try { mediaFormat.getInteger(MediaFormat.KEY_FRAME_RATE) } catch (e: Exception) { -1 }
-
-        // Get CSD (Codec Specific Data) if available - required for init segment
-        // csd-0 contains SPS (Sequence Parameter Set)
-        // csd-1 contains PPS (Picture Parameter Set)
-        val csd0 = mediaFormat.getByteBuffer("csd-0")
-        val csd1 = mediaFormat.getByteBuffer("csd-1")
-
-        val initData = mutableListOf<ByteArray>()
-        csd0?.let {
-            val bytes = ByteArray(it.remaining())
-            it.duplicate().get(bytes)
-            initData.add(bytes)
-            Log.i(TAG, "CSD_DEBUG: csd-0 (SPS) size=${bytes.size} bytes, hex=${bytes.take(32).joinToString("") { "%02x".format(it) }}...")
-        }
-        csd1?.let {
-            val bytes = ByteArray(it.remaining())
-            it.duplicate().get(bytes)
-            initData.add(bytes)
-            Log.i(TAG, "CSD_DEBUG: csd-1 (PPS) size=${bytes.size} bytes, hex=${bytes.joinToString("") { "%02x".format(it) }}")
-        }
-
-        val totalCsdSize = initData.sumOf { it.size }
-        Log.i(TAG, "CSD_DEBUG: Total CSD size=$totalCsdSize bytes (csd-0=${csd0?.remaining() ?: 0}, csd-1=${csd1?.remaining() ?: 0})")
-
-        Log.i(TAG, "ROTATION_DEBUG convertToMedia3Format: orientationDegrees=$orientationDegrees, width=$width, height=$height")
-        return Format.Builder()
-            .setSampleMimeType(mimeType)
-            .setWidth(width)
-            .setHeight(height)
-            .setRotationDegrees(orientationDegrees)
-            .apply {
-                if (bitRate > 0) setAverageBitrate(bitRate)
-                if (frameRate > 0) setFrameRate(frameRate.toFloat())
-                if (initData.isNotEmpty()) setInitializationData(initData)
-            }
-            .build()
-    }
-
-    /**
-     * Processes a segment file for HLS compatibility:
-     * 1. For segment 0: extracts ftyp+moov header as init.mp4
-     * 2. Strips ftyp+moov from segment, keeping only moof+mdat (the fragment)
-     * 3. Injects tfdt box into moof for proper HLS timing
-     *
-     * Media3's FragmentedMp4Muxer creates self-contained MP4s, but HLS needs:
-     * - init.mp4: ftyp + moov (codec configuration)
-     * - segments: moof + mdat only (fragments referencing init)
-     */
-    private fun processSegmentForHLS(file: File, segmentIndex: Int, baseMediaDecodeTimeUs: Long) {
-        try {
-            val originalBytes = file.readBytes()
-            val buffer = ByteBuffer.wrap(originalBytes).order(ByteOrder.BIG_ENDIAN)
-
-            // Find where moof starts (everything before is header: ftyp + moov)
-            val moofStartPos = findMoofPosition(buffer)
-            if (moofStartPos < 0) {
-                Log.e(TAG, "HLS_PROCESS: Could not find moof in ${file.name}")
-                return
-            }
-
-            Log.i(TAG, "HLS_PROCESS: Segment $segmentIndex - moof starts at $moofStartPos, total size=${originalBytes.size}")
-
-            // For segment 0, extract header as init.mp4
-            if (segmentIndex == 0 && !initSegmentEmitted) {
-                val headerBytes = originalBytes.copyOfRange(0, moofStartPos)
-                val initFile = File(outputDirectory, "init.mp4")
-                initFile.writeBytes(headerBytes)
-                Log.i(TAG, "HLS_PROCESS: Created init.mp4 with ${headerBytes.size} bytes (ftyp+moov)")
-                // Debug: dump the init.mp4 structure
-                dumpMp4BoxStructure(headerBytes, "INIT_STRUCTURE")
-                callbacks.onInitSegmentReady(initFile)
-                initSegmentEmitted = true
-            }
-
-            // Extract fragment (moof + mdat only)
-            val fragmentBytes = originalBytes.copyOfRange(moofStartPos, originalBytes.size)
-            Log.d(TAG, "HLS_PROCESS: Extracted fragment of ${fragmentBytes.size} bytes")
-
-            // Inject tfdt into the fragment
-            // Note: in the fragment, moof is at position 0
-            val processedFragment = injectTfdtIntoFragment(fragmentBytes, baseMediaDecodeTimeUs)
-
-            // Write back the processed fragment (stripped of header)
-            file.writeBytes(processedFragment)
-
-            Log.i(TAG, "HLS_PROCESS: Segment $segmentIndex processed - header stripped, tfdt injected, final size=${processedFragment.size}")
-        } catch (e: Exception) {
-            Log.e(TAG, "Error processing segment ${file.name} for HLS", e)
-        }
-    }
-
-    /**
-     * Finds the position of the moof box in the file.
-     * Returns -1 if not found.
-     */
-    private fun findMoofPosition(buffer: ByteBuffer): Int {
-        var pos = 0
-        while (pos < buffer.limit() - 8) {
-            buffer.position(pos)
-            val size = buffer.int.toLong() and 0xFFFFFFFFL
-            val type = buffer.int
-
-            if (size < 8) break
-
-            // 'moof' = 0x6D6F6F66
-            if (type == 0x6D6F6F66) {
-                return pos
-            }
-
-            pos += size.toInt()
-        }
-        return -1
-    }
-
-    /**
-     * Injects a tfdt box into a fragment (moof+mdat).
-     * The fragment has moof at position 0 (header already stripped).
-     * Also fixes tfhd.base_data_offset since we stripped the original file header.
-     */
-    private fun injectTfdtIntoFragment(fragmentBytes: ByteArray, baseMediaDecodeTimeUs: Long): ByteArray {
-        val buffer = ByteBuffer.wrap(fragmentBytes).order(ByteOrder.BIG_ENDIAN)
-
-        // Find box positions within the fragment (moof is at position 0)
-        val positions = findBoxPositionsInFragment(buffer)
-        if (positions == null) {
-            Log.e(TAG, "TFDT_INJECT: Could not find required boxes in fragment")
-            return fragmentBytes
-        }
-
-        val (moofPos, moofSize, trafPos, trafSize, tfhdPos, tfhdEnd, trunPos) = positions
-        Log.d(TAG, "TFDT_INJECT: Fragment boxes - moof@$moofPos(size=$moofSize), traf@$trafPos, tfhd@$tfhdPos, trun@$trunPos")
-
-        // First, fix tfhd.base_data_offset - it was pointing to the original file position
-        // but now moof is at position 0, so base_data_offset should be 0
-        fixTfhdBaseDataOffset(buffer, tfhdPos.toInt())
-
-        // Create tfdt box (version 1, 64-bit baseMediaDecodeTime)
-        val tfdtSize = 20
-        val tfdtBytes = ByteBuffer.allocate(tfdtSize).order(ByteOrder.BIG_ENDIAN)
-        tfdtBytes.putInt(tfdtSize)                  // size
-        tfdtBytes.putInt(0x74666474)                // 'tfdt'
-        tfdtBytes.put(1.toByte())                   // version = 1
-        tfdtBytes.put(0.toByte())                   // flags[0]
-        tfdtBytes.put(0.toByte())                   // flags[1]
-        tfdtBytes.put(0.toByte())                   // flags[2]
-        tfdtBytes.putLong(baseMediaDecodeTimeUs)    // baseMediaDecodeTime
-
-        // Create new fragment with tfdt injected after tfhd
-        val newBytes = ByteArray(fragmentBytes.size + tfdtSize)
-        val insertPos = tfhdEnd.toInt()
-
-        // Copy bytes before insertion point
-        System.arraycopy(fragmentBytes, 0, newBytes, 0, insertPos)
-
-        // Insert tfdt
-        System.arraycopy(tfdtBytes.array(), 0, newBytes, insertPos, tfdtSize)
-
-        // Copy bytes after insertion point
-        System.arraycopy(fragmentBytes, insertPos, newBytes, insertPos + tfdtSize, fragmentBytes.size - insertPos)
-
-        // Update box sizes in the new buffer
-        val newBuffer = ByteBuffer.wrap(newBytes).order(ByteOrder.BIG_ENDIAN)
-
-        // Update moof size
-        val newMoofSize = moofSize + tfdtSize
-        newBuffer.putInt(moofPos.toInt(), newMoofSize.toInt())
-
-        // Update traf size
-        val newTrafSize = trafSize + tfdtSize
-        newBuffer.putInt(trafPos.toInt(), newTrafSize.toInt())
-
-        // Update trun data_offset if present
-        val newTrunPos = trunPos.toInt() + tfdtSize
-        updateTrunDataOffset(newBuffer, newTrunPos, tfdtSize)
-
-        Log.i(TAG, "TFDT_INJECT: Injected tfdt with baseMediaDecodeTime=$baseMediaDecodeTimeUs us")
-        return newBytes
-    }
-
-    /**
-     * Data class to hold box positions for tfdt injection.
-     */
-    private data class BoxPositions(
-        val moofPos: Long,
-        val moofSize: Long,
-        val trafPos: Long,
-        val trafSize: Long,
-        val tfhdPos: Long,  // Position of tfhd (need to fix base_data_offset)
-        val tfhdEnd: Long,  // Position right after tfhd where we'll insert tfdt
-        val trunPos: Long   // Position of trun (need to update its data_offset)
-    )
-
-    /**
-     * Finds the positions of moof, traf, tfhd, and trun boxes in a fragment.
-     * In a fragment, moof is expected to be at position 0.
-     */
-    private fun findBoxPositionsInFragment(buffer: ByteBuffer): BoxPositions? {
-        val fileSize = buffer.limit()
-        var pos = 0
-
-        while (pos < fileSize - 8) {
-            buffer.position(pos)
-            val size = buffer.int.toLong() and 0xFFFFFFFFL
-            val type = buffer.int
-
-            if (size < 8) break
-
-            // 'moof' = 0x6D6F6F66
-            if (type == 0x6D6F6F66) {
-                val moofPos = pos.toLong()
-                val moofSize = size
-                val moofEnd = pos + size.toInt()
-                var childPos = pos + 8
-
-                while (childPos < moofEnd - 8) {
-                    buffer.position(childPos)
-                    val childSize = buffer.int.toLong() and 0xFFFFFFFFL
-                    val childType = buffer.int
-
-                    if (childSize < 8) break
-
-                    // 'traf' = 0x74726166
-                    if (childType == 0x74726166) {
-                        val trafPos = childPos.toLong()
-                        val trafSize = childSize
-                        val trafEnd = childPos + childSize.toInt()
-                        var trafChildPos = childPos + 8
-
-                        var tfhdPos: Long = -1
-                        var tfhdEnd: Long = -1
-                        var trunPos: Long = -1
-
-                        while (trafChildPos < trafEnd - 8) {
-                            buffer.position(trafChildPos)
-                            val trafChildSize = buffer.int.toLong() and 0xFFFFFFFFL
-                            val trafChildType = buffer.int
-
-                            if (trafChildSize < 8) break
-
-                            // 'tfhd' = 0x74666864
-                            if (trafChildType == 0x74666864) {
-                                tfhdPos = trafChildPos.toLong()
-                                tfhdEnd = trafChildPos + trafChildSize
-                            }
-                            // 'trun' = 0x7472756E
-                            else if (trafChildType == 0x7472756E) {
-                                trunPos = trafChildPos.toLong()
-                            }
-
-                            trafChildPos += trafChildSize.toInt()
-                        }
-
-                        if (tfhdPos > 0 && tfhdEnd > 0 && trunPos > 0) {
-                            return BoxPositions(moofPos, moofSize, trafPos, trafSize, tfhdPos, tfhdEnd, trunPos)
-                        }
-                    }
-
-                    childPos += childSize.toInt()
-                }
-            }
-
-            pos += size.toInt()
-        }
-
-        return null
-    }
-
-    /**
-     * Updates the trun box's data_offset field if present.
-     * The data_offset points to sample data in mdat, and needs to be
-     * increased by the size of the injected tfdt box.
-     *
-     * trun structure:
-     * - 4 bytes: size
-     * - 4 bytes: type ('trun')
-     * - 1 byte: version
-     * - 3 bytes: flags
-     * - 4 bytes: sample_count
-     * - [optional] 4 bytes: data_offset (if flags & 0x000001)
-     */
-    private fun updateTrunDataOffset(buffer: ByteBuffer, trunPos: Int, offsetDelta: Int) {
-        buffer.position(trunPos + 8) // Skip size and type
-        val version = buffer.get().toInt() and 0xFF
-        val flags = ((buffer.get().toInt() and 0xFF) shl 16) or
-                    ((buffer.get().toInt() and 0xFF) shl 8) or
-                    (buffer.get().toInt() and 0xFF)
-
-        // Check if data_offset_present flag (0x000001) is set
-        if ((flags and 0x000001) != 0) {
-            val sampleCount = buffer.int
-            val dataOffsetPos = trunPos + 16 // size(4) + type(4) + version(1) + flags(3) + sample_count(4)
-            buffer.position(dataOffsetPos)
-            val originalOffset = buffer.int
-            val newOffset = originalOffset + offsetDelta
-            buffer.putInt(dataOffsetPos, newOffset)
-            Log.d(TAG, "TFDT_INJECT: Updated trun data_offset: $originalOffset -> $newOffset")
-        } else {
-            Log.d(TAG, "TFDT_INJECT: trun has no data_offset field (flags=0x${flags.toString(16)})")
-        }
-    }
-
-    /**
-     * Fixes the tfhd box's base_data_offset field after stripping the file header.
-     * When we strip ftyp+moov from the original segment, the base_data_offset
-     * (which pointed to a position in the original file) becomes incorrect.
-     * We set it to 0 since moof is now at the start of the fragment.
-     *
-     * tfhd structure:
-     * - 4 bytes: size
-     * - 4 bytes: type ('tfhd')
-     * - 1 byte: version
-     * - 3 bytes: flags
-     * - 4 bytes: track_id
-     * - [optional] 8 bytes: base_data_offset (if flags & 0x000001)
-     */
-    private fun fixTfhdBaseDataOffset(buffer: ByteBuffer, tfhdPos: Int) {
-        buffer.position(tfhdPos + 8) // Skip size and type
-        val version = buffer.get().toInt() and 0xFF
-        val flags = ((buffer.get().toInt() and 0xFF) shl 16) or
-                    ((buffer.get().toInt() and 0xFF) shl 8) or
-                    (buffer.get().toInt() and 0xFF)
-
-        // Check if base_data_offset_present flag (0x000001) is set
-        if ((flags and 0x000001) != 0) {
-            val trackId = buffer.int
-            val baseDataOffsetPos = tfhdPos + 16 // size(4) + type(4) + version(1) + flags(3) + track_id(4)
-            buffer.position(baseDataOffsetPos)
-            val originalOffset = buffer.long
-            // Set to 0 since moof is now at start of fragment
-            buffer.putLong(baseDataOffsetPos, 0L)
-            Log.i(TAG, "TFHD_FIX: Fixed base_data_offset: $originalOffset -> 0")
-        } else {
-            Log.d(TAG, "TFHD_FIX: tfhd has no base_data_offset field (flags=0x${flags.toString(16)})")
-        }
-    }
-
-    /**
-     * Debug function to dump MP4 box structure and find avcC/stsd info.
-     */
-    private fun dumpMp4BoxStructure(data: ByteArray, logPrefix: String) {
-        val buffer = ByteBuffer.wrap(data).order(ByteOrder.BIG_ENDIAN)
-        dumpBoxesRecursive(buffer, 0, data.size, 0, logPrefix)
-    }
-
-    private fun dumpBoxesRecursive(buffer: ByteBuffer, start: Int, end: Int, depth: Int, logPrefix: String) {
-        var pos = start
-        val indent = "  ".repeat(depth)
-
-        while (pos < end - 8) {
-            buffer.position(pos)
-            val size = buffer.int.toLong() and 0xFFFFFFFFL
-            val typeInt = buffer.int
-            val typeBytes = ByteArray(4)
-            typeBytes[0] = ((typeInt shr 24) and 0xFF).toByte()
-            typeBytes[1] = ((typeInt shr 16) and 0xFF).toByte()
-            typeBytes[2] = ((typeInt shr 8) and 0xFF).toByte()
-            typeBytes[3] = (typeInt and 0xFF).toByte()
-            val typeStr = String(typeBytes, Charsets.US_ASCII)
-
-            if (size < 8 || pos + size > end) break
-
-            Log.i(TAG, "$logPrefix: $indent[$typeStr] size=$size @ $pos")
-
-            // For ftyp, dump the brands
-            if (typeStr == "ftyp" && size >= 16) {
-                buffer.position(pos + 8)
-                val majorBrand = ByteArray(4)
-                buffer.get(majorBrand)
-                val minorVersion = buffer.int
-                Log.i(TAG, "$logPrefix: $indent  major_brand=${String(majorBrand)}, minor_version=$minorVersion")
-
-                val compatBrandsStart = pos + 16
-                val compatBrandsEnd = pos + size.toInt()
-                val brands = mutableListOf<String>()
-                var brandPos = compatBrandsStart
-                while (brandPos + 4 <= compatBrandsEnd) {
-                    buffer.position(brandPos)
-                    val brand = ByteArray(4)
-                    buffer.get(brand)
-                    brands.add(String(brand))
-                    brandPos += 4
-                }
-                Log.i(TAG, "$logPrefix: $indent  compatible_brands=${brands.joinToString(",")}")
-            }
-
-            // For avcC, dump the SPS/PPS info
-            if (typeStr == "avcC" && size >= 13) {
-                buffer.position(pos + 8)
-                val configVersion = buffer.get().toInt() and 0xFF
-                val profileIdc = buffer.get().toInt() and 0xFF
-                val profileCompat = buffer.get().toInt() and 0xFF
-                val levelIdc = buffer.get().toInt() and 0xFF
-                val lengthSizeMinusOne = buffer.get().toInt() and 0x03
-                val numSps = buffer.get().toInt() and 0x1F
-
-                Log.i(TAG, "$logPrefix: $indent  avcC: version=$configVersion, profile=$profileIdc, level=$levelIdc, numSPS=$numSps")
-
-                // Read SPS lengths
-                var spsTotal = 0
-                for (i in 0 until numSps) {
-                    val spsLen = buffer.short.toInt() and 0xFFFF
-                    spsTotal += spsLen
-                    Log.i(TAG, "$logPrefix: $indent  SPS[$i] length=$spsLen")
-                    buffer.position(buffer.position() + spsLen)  // Skip SPS data
-                }
-
-                // Read PPS count and lengths
-                if (buffer.position() < pos + size) {
-                    val numPps = buffer.get().toInt() and 0xFF
-                    var ppsTotal = 0
-                    for (i in 0 until numPps) {
-                        if (buffer.position() + 2 <= pos + size) {
-                            val ppsLen = buffer.short.toInt() and 0xFFFF
-                            ppsTotal += ppsLen
-                            Log.i(TAG, "$logPrefix: $indent  PPS[$i] length=$ppsLen")
-                            buffer.position(buffer.position() + ppsLen)  // Skip PPS data
-                        }
-                    }
-                    Log.i(TAG, "$logPrefix: $indent  avcC total: ${size} bytes, SPS=$spsTotal bytes, PPS=$ppsTotal bytes")
-                }
-            }
-
-            // Recurse into container boxes
-            val containerBoxes = setOf("moov", "trak", "mdia", "minf", "stbl", "stsd", "mvex", "edts")
-            if (typeStr in containerBoxes) {
-                // stsd has 8 extra bytes (version/flags + entry_count) before children
-                val childStart = if (typeStr == "stsd") pos + 16 else pos + 8
-                dumpBoxesRecursive(buffer, childStart, pos + size.toInt(), depth + 1, logPrefix)
-            }
-            // avc1 is a sample entry, structure: 8 byte header + 78 byte fixed fields + child boxes
-            if (typeStr == "avc1") {
-                dumpBoxesRecursive(buffer, pos + 86, pos + size.toInt(), depth + 1, logPrefix)
-            }
-
-            pos += size.toInt()
+            trackIndex = muxer.addTrack(format)
+            muxer.start()
+            muxerStarted = true
        }
    }
 }
--- a/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt
@@ -0,0 +1,908 @@
+package com.mrousavy.camera.core
+
+import android.media.MediaCodec
+import android.media.MediaFormat
+import android.util.Log
+import java.io.ByteArrayOutputStream
+import java.io.DataOutputStream
+import java.io.File
+import java.io.FileOutputStream
+import java.nio.ByteBuffer
+
+/**
+ * A muxer for creating HLS-compatible fragmented MP4 output.
+ *
+ * Follows the same pattern as Android's MediaMuxer:
+ * 1. Create muxer with output directory
+ * 2. addTrack() with MediaFormat
+ * 3. start() - writes init.mp4
+ * 4. writeSampleData() for each encoded sample
+ * 5. stop() - finalizes last segment
+ * 6. release() - cleanup
+ *
+ * Produces:
+ * - init.mp4: Initialization segment (ftyp + moov with mvex)
+ * - 0.mp4, 1.mp4, ...: Media segments (moof + mdat)
+ */
+class HlsMuxer(
+    private val outputDirectory: File,
+    private val callback: Callback,
+    private val orientationDegrees: Int = 0
+) {
+    companion object {
+        private const val TAG = "HlsMuxer"
+        private const val DEFAULT_SEGMENT_DURATION_US = 6_000_000L  // 6 seconds
+    }
+
+    interface Callback {
+        fun onInitSegmentReady(file: File)
+        fun onMediaSegmentReady(file: File, index: Int, durationUs: Long)
+    }
+
+    // Configuration
+    private var targetSegmentDurationUs: Long = DEFAULT_SEGMENT_DURATION_US
+    private var timescale: Int = 30000  // Default, updated from format
+
+    // State
+    private var state = State.UNINITIALIZED
+    private var trackFormat: MediaFormat? = null
+    private var sequenceNumber = 1
+    private var segmentIndex = 0
+
+    // Current segment data
+    private val pendingSamples = mutableListOf<Sample>()
+    private var segmentStartTimeUs = -1L
+    private var lastPresentationTimeUs = 0L
+
+    private enum class State {
+        UNINITIALIZED,
+        INITIALIZED,
+        STARTED,
+        STOPPED,
+        RELEASED
+    }
+
+    private data class Sample(
+        val data: ByteArray,
+        val presentationTimeUs: Long,
+        var durationUs: Long,
+        val isKeyFrame: Boolean
+    )
+
+    // ==================== Annex-B to AVCC Conversion ====================
+
+    /**
+     * Converts H.264 data from Annex-B format to AVCC format.
+     *
+     * Annex-B uses start codes (00 00 00 01 or 00 00 01) to delimit NAL units.
+     * AVCC uses 4-byte big-endian length prefixes before each NAL unit.
+     *
+     * This conversion is required because:
+     * - MediaCodec outputs Annex-B format
+     * - fMP4/HLS requires AVCC format (as specified in avcC box with NAL length size = 4)
+     */
+    private fun convertAnnexBToAvcc(annexBData: ByteArray): ByteArray {
+        val nalUnits = parseAnnexBNalUnits(annexBData)
+        if (nalUnits.isEmpty()) {
+            Log.w(TAG, "No NAL units found in sample, returning original data")
+            return annexBData
+        }
+
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        for (nalUnit in nalUnits) {
+            // Write 4-byte big-endian length prefix
+            dos.writeInt(nalUnit.size)
+            // Write NAL unit data (without start code)
+            dos.write(nalUnit)
+        }
+
+        return output.toByteArray()
+    }
+
+    /**
+     * Parses Annex-B formatted data into individual NAL units.
+     * Returns list of NAL unit byte arrays (without start codes).
+     */
+    private fun parseAnnexBNalUnits(data: ByteArray): List<ByteArray> {
+        val nalUnits = mutableListOf<ByteArray>()
+        var i = 0
+
+        while (i < data.size) {
+            // Find start code
+            val startCodeLength = findStartCode(data, i)
+            if (startCodeLength == 0) {
+                // No start code found at current position
+                // This might happen if data doesn't start with a start code
+                if (nalUnits.isEmpty() && i == 0) {
+                    // Data might already be in AVCC format or malformed
+                    // Try to detect AVCC format (first 4 bytes would be a reasonable length)
+                    if (data.size >= 4) {
+                        val possibleLength = ((data[0].toInt() and 0xFF) shl 24) or
+                                ((data[1].toInt() and 0xFF) shl 16) or
+                                ((data[2].toInt() and 0xFF) shl 8) or
+                                (data[3].toInt() and 0xFF)
+                        if (possibleLength > 0 && possibleLength <= data.size - 4) {
+                            // Looks like AVCC format already, return original
+                            Log.d(TAG, "Data appears to already be in AVCC format")
+                            return emptyList()
+                        }
+                    }
+                }
+                i++
+                continue
+            }
+
+            val nalStart = i + startCodeLength
+
+            // Find end of this NAL unit (start of next, or end of data)
+            var nalEnd = data.size
+            var j = nalStart
+            while (j < data.size - 2) {
+                val nextStartCode = findStartCode(data, j)
+                if (nextStartCode > 0) {
+                    nalEnd = j
+                    break
+                }
+                j++
+            }
+
+            if (nalEnd > nalStart) {
+                nalUnits.add(data.copyOfRange(nalStart, nalEnd))
+            }
+
+            i = nalEnd
+        }
+
+        return nalUnits
+    }
+
+    /**
+     * Checks for Annex-B start code at given position.
+     * Returns start code length (3 or 4) or 0 if no start code found.
+     */
+    private fun findStartCode(data: ByteArray, offset: Int): Int {
+        if (offset + 4 <= data.size &&
+            data[offset] == 0.toByte() &&
+            data[offset + 1] == 0.toByte() &&
+            data[offset + 2] == 0.toByte() &&
+            data[offset + 3] == 1.toByte()) {
+            return 4  // 4-byte start code: 00 00 00 01
+        }
+        if (offset + 3 <= data.size &&
+            data[offset] == 0.toByte() &&
+            data[offset + 1] == 0.toByte() &&
+            data[offset + 2] == 1.toByte()) {
+            return 3  // 3-byte start code: 00 00 01
+        }
+        return 0
+    }
+
+    /**
+     * Sets the target segment duration.
+     * Must be called before start().
+     */
+    fun setSegmentDuration(durationUs: Long) {
+        check(state == State.UNINITIALIZED || state == State.INITIALIZED) {
+            "Cannot set segment duration after start()"
+        }
+        targetSegmentDurationUs = durationUs
+    }
+
+    /**
+     * Adds a track to the muxer.
+     *
+     * @param format The MediaFormat describing the track
+     * @return Track index (always 0 for now, single video track)
+     */
+    fun addTrack(format: MediaFormat): Int {
+        check(state == State.UNINITIALIZED) { "addTrack() must be called before start()" }
+
+        trackFormat = format
+
+        // Extract timescale from frame rate
+        val fps = try {
+            format.getInteger(MediaFormat.KEY_FRAME_RATE)
+        } catch (e: Exception) {
+            30
+        }
+        timescale = fps * 1000  // Use fps * 1000 for good precision
+
+        state = State.INITIALIZED
+
+        val formatWidth = try { format.getInteger(MediaFormat.KEY_WIDTH) } catch (e: Exception) { -1 }
+        val formatHeight = try { format.getInteger(MediaFormat.KEY_HEIGHT) } catch (e: Exception) { -1 }
+        Log.d(TAG, "Added track: ${format.getString(MediaFormat.KEY_MIME)}, " +
+                "encoder output: ${formatWidth}x${formatHeight}, " +
+                "timescale=$timescale, orientation=$orientationDegrees°")
+
+        return 0  // Single track, index 0
+    }
+
+    /**
+     * Starts the muxer, writing the initialization segment.
+     */
+    fun start() {
+        check(state == State.INITIALIZED) { "Must call addTrack() before start()" }
+        val format = trackFormat ?: throw IllegalStateException("No track format")
+
+        // Create output directory if needed
+        if (!outputDirectory.exists()) {
+            outputDirectory.mkdirs()
+        }
+
+        // Write init segment
+        val initBytes = buildInitSegment(format)
+        val initFile = File(outputDirectory, "init.mp4")
+        FileOutputStream(initFile).use { it.write(initBytes) }
+        Log.d(TAG, "Created init segment: ${initFile.absolutePath} (${initBytes.size} bytes)")
+        callback.onInitSegmentReady(initFile)
+
+        state = State.STARTED
+    }
+
+    /**
+     * Writes sample data to the muxer.
+     *
+     * @param trackIndex Track index (must be 0)
+     * @param buffer The encoded sample data
+     * @param bufferInfo Sample metadata (size, presentation time, flags)
+     */
+    fun writeSampleData(trackIndex: Int, buffer: ByteBuffer, bufferInfo: MediaCodec.BufferInfo) {
+        check(state == State.STARTED) { "Must call start() before writeSampleData()" }
+        check(trackIndex == 0) { "Invalid track index: $trackIndex" }
+
+        // Skip codec config data (already in init segment)
+        if ((bufferInfo.flags and MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0) {
+            return
+        }
+
+        val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
+        val presentationTimeUs = bufferInfo.presentationTimeUs
+
+        // Initialize segment start time
+        if (segmentStartTimeUs < 0) {
+            segmentStartTimeUs = presentationTimeUs
+        }
+
+        // Check if we should finalize current segment (at keyframe boundaries)
+        if (isKeyFrame && pendingSamples.isNotEmpty()) {
+            val segmentDurationUs = presentationTimeUs - segmentStartTimeUs
+            if (segmentDurationUs >= targetSegmentDurationUs) {
+                finalizeCurrentSegment()
+                segmentStartTimeUs = presentationTimeUs
+            }
+        }
+
+        // Copy buffer data and convert from Annex-B to AVCC format
+        val rawData = ByteArray(bufferInfo.size)
+        buffer.position(bufferInfo.offset)
+        buffer.limit(bufferInfo.offset + bufferInfo.size)
+        buffer.get(rawData)
+
+        // Convert Annex-B (start codes) to AVCC (length prefixes)
+        val data = convertAnnexBToAvcc(rawData)
+
+        // Update duration of previous sample
+        if (pendingSamples.isNotEmpty()) {
+            val lastSample = pendingSamples.last()
+            lastSample.durationUs = presentationTimeUs - lastSample.presentationTimeUs
+        }
+
+        // Estimate duration (will be corrected by next sample)
+        val estimatedDurationUs = if (lastPresentationTimeUs > 0) {
+            presentationTimeUs - lastPresentationTimeUs
+        } else {
+            1_000_000L / 30  // Assume 30fps
+        }
+
+        pendingSamples.add(Sample(
+            data = data,
+            presentationTimeUs = presentationTimeUs,
+            durationUs = estimatedDurationUs,
+            isKeyFrame = isKeyFrame
+        ))
+
+        lastPresentationTimeUs = presentationTimeUs
+    }
+
+    /**
+     * Stops the muxer, finalizing any pending segment.
+     */
+    fun stop() {
+        check(state == State.STARTED) { "Muxer not started" }
+
+        if (pendingSamples.isNotEmpty()) {
+            finalizeCurrentSegment()
+        }
+
+        state = State.STOPPED
+        Log.d(TAG, "Muxer stopped, wrote $segmentIndex segments")
+    }
+
+    /**
+     * Releases resources.
+     */
+    fun release() {
+        if (state == State.STARTED) {
+            stop()
+        }
+        pendingSamples.clear()
+        state = State.RELEASED
+    }
+
+    /**
+     * Finalizes the current segment and writes it to disk.
+     */
+    private fun finalizeCurrentSegment() {
+        if (pendingSamples.isEmpty()) return
+
+        try {
+            val baseDecodeTimeUs = pendingSamples.first().presentationTimeUs
+            val fragmentBytes = buildMediaSegment(pendingSamples, sequenceNumber, baseDecodeTimeUs)
+
+            val segmentFile = File(outputDirectory, "$segmentIndex.mp4")
+            FileOutputStream(segmentFile).use { it.write(fragmentBytes) }
+
+            // Calculate duration
+            val firstPts = pendingSamples.first().presentationTimeUs
+            val lastSample = pendingSamples.last()
+            val durationUs = (lastSample.presentationTimeUs - firstPts) + lastSample.durationUs
+
+            Log.d(TAG, "Created segment $segmentIndex: samples=${pendingSamples.size}, " +
+                    "duration=${durationUs / 1000}ms, size=${fragmentBytes.size} bytes")
+
+            callback.onMediaSegmentReady(segmentFile, segmentIndex, durationUs)
+
+            segmentIndex++
+            sequenceNumber++
+            pendingSamples.clear()
+
+        } catch (e: Exception) {
+            Log.e(TAG, "Error finalizing segment $segmentIndex", e)
+        }
+    }
+
+    // ==================== Init Segment Building ====================
+
+    /**
+     * Builds the initialization segment (ftyp + moov).
+     */
+    private fun buildInitSegment(format: MediaFormat): ByteArray {
+        val width = format.getInteger(MediaFormat.KEY_WIDTH)
+        val height = format.getInteger(MediaFormat.KEY_HEIGHT)
+
+        val sps = format.getByteBuffer("csd-0")?.let { extractNalUnit(it) }
+            ?: throw IllegalArgumentException("Missing SPS (csd-0)")
+        val pps = format.getByteBuffer("csd-1")?.let { extractNalUnit(it) }
+            ?: throw IllegalArgumentException("Missing PPS (csd-1)")
+
+        val output = ByteArrayOutputStream()
+
+        // ftyp
+        output.write(buildFtypBox())
+
+        // moov
+        output.write(buildMoovBox(width, height, sps, pps))
+
+        return output.toByteArray()
+    }
+
+    private fun extractNalUnit(buffer: ByteBuffer): ByteArray {
+        val data = ByteArray(buffer.remaining())
+        buffer.duplicate().get(data)
+
+        // Strip start code prefix (0x00000001 or 0x000001)
+        return when {
+            data.size >= 4 && data[0] == 0.toByte() && data[1] == 0.toByte() &&
+                    data[2] == 0.toByte() && data[3] == 1.toByte() -> data.copyOfRange(4, data.size)
+            data.size >= 3 && data[0] == 0.toByte() && data[1] == 0.toByte() &&
+                    data[2] == 1.toByte() -> data.copyOfRange(3, data.size)
+            else -> data
+        }
+    }
+
+    private fun buildFtypBox(): ByteArray {
+        val brands = listOf("isom", "iso5", "iso6", "avc1", "mp41", "dash")
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        val size = 8 + 4 + 4 + (brands.size * 4)
+        dos.writeInt(size)
+        dos.writeBytes("ftyp")
+        dos.writeBytes("isom")  // major brand
+        dos.writeInt(0x200)     // minor version
+        brands.forEach { dos.writeBytes(it) }
+
+        return output.toByteArray()
+    }
+
+    private fun buildMoovBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
+        val content = ByteArrayOutputStream()
+
+        content.write(buildMvhdBox())
+        content.write(buildTrakBox(width, height, sps, pps))
+        content.write(buildMvexBox())
+
+        return wrapBox("moov", content.toByteArray())
+    }
+
+    private fun buildMvhdBox(): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        dos.writeInt(0)          // version & flags
+        dos.writeInt(0)          // creation time
+        dos.writeInt(0)          // modification time
+        dos.writeInt(timescale)  // timescale
+        dos.writeInt(0)          // duration
+        dos.writeInt(0x00010000) // rate = 1.0
+        dos.writeShort(0x0100)   // volume = 1.0
+        dos.writeShort(0)        // reserved
+        dos.writeInt(0)          // reserved
+        dos.writeInt(0)          // reserved
+
+        // Unity matrix
+        dos.writeInt(0x00010000); dos.writeInt(0); dos.writeInt(0)
+        dos.writeInt(0); dos.writeInt(0x00010000); dos.writeInt(0)
+        dos.writeInt(0); dos.writeInt(0); dos.writeInt(0x40000000)
+
+        repeat(6) { dos.writeInt(0) }  // pre-defined
+        dos.writeInt(2)  // next track ID
+
+        return wrapBox("mvhd", output.toByteArray())
+    }
+
+    private fun buildTrakBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
+        val content = ByteArrayOutputStream()
+        content.write(buildTkhdBox(width, height))
+        content.write(buildMdiaBox(width, height, sps, pps))
+        return wrapBox("trak", content.toByteArray())
+    }
+
+    private fun buildTkhdBox(width: Int, height: Int): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        dos.writeInt(0x00000007) // version 0, flags (enabled, in movie, in preview)
+        dos.writeInt(0)          // creation time
+        dos.writeInt(0)          // modification time
+        dos.writeInt(1)          // track ID
+        dos.writeInt(0)          // reserved
+        dos.writeInt(0)          // duration
+        dos.writeInt(0)          // reserved
+        dos.writeInt(0)          // reserved
+        dos.writeShort(0)        // layer
+        dos.writeShort(0)        // alternate group
+        dos.writeShort(0)        // volume (0 for video)
+        dos.writeShort(0)        // reserved
+
+        // Rotation matrix based on orientationDegrees
+        writeRotationMatrix(dos, width, height)
+
+        // For 90° and 270° rotations, the display dimensions are swapped
+        // The tkhd width/height represent the final display size after rotation
+        val (displayWidth, displayHeight) = when (orientationDegrees) {
+            90, 270 -> Pair(height, width)
+            else -> Pair(width, height)
+        }
+        dos.writeInt(displayWidth shl 16)   // width (16.16 fixed point)
+        dos.writeInt(displayHeight shl 16)  // height (16.16 fixed point)
+
+        Log.d(TAG, "tkhd: encoder=${width}x${height}, display=${displayWidth}x${displayHeight}, rotation=$orientationDegrees")
+
+        return wrapBox("tkhd", output.toByteArray())
+    }
+
+    /**
+     * Writes the 3x3 transformation matrix for video rotation.
+     * The matrix is applied to rotate the video content for correct display.
+     *
+     * Matrix format in tkhd box (all values in fixed-point):
+     * | a  b  u |   where a,b,c,d are 16.16 fixed-point
+     * | c  d  v |   and u,v are 2.30 fixed-point (always 0)
+     * | x  y  w |   x,y are 16.16, w is 2.30 (always 1.0)
+     *
+     * For rotation by θ: a=cos(θ), b=sin(θ), c=-sin(θ), d=cos(θ)
+     * Translation (x,y) keeps the rotated video in the visible area.
+     */
+    private fun writeRotationMatrix(dos: DataOutputStream, width: Int, height: Int) {
+        // Fixed-point constants
+        val one = 0x00010000      // 1.0 in 16.16
+        val negOne = -0x00010000  // -1.0 in 16.16 (will be written as unsigned)
+        val w = 0x40000000        // 1.0 in 2.30
+
+        when (orientationDegrees) {
+            90 -> {
+                // 90° rotation: x' = y, y' = -x + width
+                dos.writeInt(0)              // a = 0
+                dos.writeInt(negOne)         // b = -1
+                dos.writeInt(0)              // u = 0
+                dos.writeInt(one)            // c = 1
+                dos.writeInt(0)              // d = 0
+                dos.writeInt(0)              // v = 0
+                dos.writeInt(0)              // x = 0
+                dos.writeInt(width shl 16)   // y = width (translation)
+                dos.writeInt(w)              // w = 1
+            }
+            180 -> {
+                // 180° rotation
+                dos.writeInt(negOne)         // a = -1
+                dos.writeInt(0)              // b = 0
+                dos.writeInt(0)              // u = 0
+                dos.writeInt(0)              // c = 0
+                dos.writeInt(negOne)         // d = -1
+                dos.writeInt(0)              // v = 0
+                dos.writeInt(width shl 16)   // x = width (translation)
+                dos.writeInt(height shl 16)  // y = height (translation)
+                dos.writeInt(w)              // w = 1
+            }
+            270 -> {
+                // 270° rotation: x' = -y + height, y' = x
+                dos.writeInt(0)              // a = 0
+                dos.writeInt(one)            // b = 1
+                dos.writeInt(0)              // u = 0
+                dos.writeInt(negOne)         // c = -1
+                dos.writeInt(0)              // d = 0
+                dos.writeInt(0)              // v = 0
+                dos.writeInt(height shl 16)  // x = height (translation)
+                dos.writeInt(0)              // y = 0
+                dos.writeInt(w)              // w = 1
+            }
+            else -> {
+                // 0° or unknown: identity matrix
+                dos.writeInt(one)   // a = 1
+                dos.writeInt(0)     // b = 0
+                dos.writeInt(0)     // u = 0
+                dos.writeInt(0)     // c = 0
+                dos.writeInt(one)   // d = 1
+                dos.writeInt(0)     // v = 0
+                dos.writeInt(0)     // x = 0
+                dos.writeInt(0)     // y = 0
+                dos.writeInt(w)     // w = 1
+            }
+        }
+    }
+
+    private fun buildMdiaBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
+        val content = ByteArrayOutputStream()
+        content.write(buildMdhdBox())
+        content.write(buildHdlrBox())
+        content.write(buildMinfBox(width, height, sps, pps))
+        return wrapBox("mdia", content.toByteArray())
+    }
+
+    private fun buildMdhdBox(): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        dos.writeInt(0)          // version & flags
+        dos.writeInt(0)          // creation time
+        dos.writeInt(0)          // modification time
+        dos.writeInt(timescale)  // timescale
+        dos.writeInt(0)          // duration
+        dos.writeShort(0x55C4)   // language: "und"
+        dos.writeShort(0)        // pre-defined
+
+        return wrapBox("mdhd", output.toByteArray())
+    }
+
+    private fun buildHdlrBox(): ByteArray {
+        val name = "VideoHandler"
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        dos.writeInt(0)  // version & flags
+        dos.writeInt(0)  // pre-defined
+        dos.writeBytes("vide")  // handler type
+        dos.writeInt(0)  // reserved
+        dos.writeInt(0)  // reserved
+        dos.writeInt(0)  // reserved
+        dos.writeBytes(name)
+        dos.writeByte(0)  // null terminator
+
+        return wrapBox("hdlr", output.toByteArray())
+    }
+
+    private fun buildMinfBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
+        val content = ByteArrayOutputStream()
+        content.write(buildVmhdBox())
+        content.write(buildDinfBox())
+        content.write(buildStblBox(width, height, sps, pps))
+        return wrapBox("minf", content.toByteArray())
+    }
+
+    private fun buildVmhdBox(): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        dos.writeInt(1)      // version 0, flags = 1
+        dos.writeShort(0)    // graphics mode
+        dos.writeShort(0)    // opcolor[0]
+        dos.writeShort(0)    // opcolor[1]
+        dos.writeShort(0)    // opcolor[2]
+
+        return wrapBox("vmhd", output.toByteArray())
+    }
+
+    private fun buildDinfBox(): ByteArray {
+        val dref = buildDrefBox()
+        return wrapBox("dinf", dref)
+    }
+
+    private fun buildDrefBox(): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        dos.writeInt(0)  // version & flags
+        dos.writeInt(1)  // entry count
+
+        // url box (self-contained)
+        dos.writeInt(12)
+        dos.writeBytes("url ")
+        dos.writeInt(1)  // flags: self-contained
+
+        return wrapBox("dref", output.toByteArray())
+    }
+
+    private fun buildStblBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
+        val content = ByteArrayOutputStream()
+        content.write(buildStsdBox(width, height, sps, pps))
+        content.write(buildEmptySttsBox())
+        content.write(buildEmptyStscBox())
+        content.write(buildEmptyStszBox())
+        content.write(buildEmptyStcoBox())
+        return wrapBox("stbl", content.toByteArray())
+    }
+
+    private fun buildStsdBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        dos.writeInt(0)  // version & flags
+        dos.writeInt(1)  // entry count
+        output.write(buildAvc1Box(width, height, sps, pps))
+
+        return wrapBox("stsd", output.toByteArray())
+    }
+
+    private fun buildAvc1Box(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        repeat(6) { dos.writeByte(0) }  // reserved
+        dos.writeShort(1)               // data reference index
+        dos.writeShort(0)               // pre-defined
+        dos.writeShort(0)               // reserved
+        repeat(3) { dos.writeInt(0) }   // pre-defined
+        dos.writeShort(width)           // width
+        dos.writeShort(height)          // height
+        dos.writeInt(0x00480000)        // horiz resolution (72 dpi)
+        dos.writeInt(0x00480000)        // vert resolution (72 dpi)
+        dos.writeInt(0)                 // reserved
+        dos.writeShort(1)               // frame count
+        repeat(32) { dos.writeByte(0) } // compressor name
+        dos.writeShort(0x0018)          // depth (24 bit)
+        dos.writeShort(-1)              // pre-defined
+
+        output.write(buildAvcCBox(sps, pps))
+
+        return wrapBox("avc1", output.toByteArray())
+    }
+
+    private fun buildAvcCBox(sps: ByteArray, pps: ByteArray): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        val profileIdc = if (sps.isNotEmpty()) sps[0].toInt() and 0xFF else 0x42
+        val profileCompat = if (sps.size > 1) sps[1].toInt() and 0xFF else 0x00
+        val levelIdc = if (sps.size > 2) sps[2].toInt() and 0xFF else 0x1F
+
+        dos.writeByte(1)            // configuration version
+        dos.writeByte(profileIdc)   // AVC profile
+        dos.writeByte(profileCompat)// profile compatibility
+        dos.writeByte(levelIdc)     // AVC level
+        dos.writeByte(0xFF)         // 6 bits reserved + 2 bits NAL length - 1
+
+        dos.writeByte(0xE1)         // 3 bits reserved + 5 bits SPS count
+        dos.writeShort(sps.size)    // SPS length
+        dos.write(sps)              // SPS data
+
+        dos.writeByte(1)            // PPS count
+        dos.writeShort(pps.size)    // PPS length
+        dos.write(pps)              // PPS data
+
+        return wrapBox("avcC", output.toByteArray())
+    }
+
+    private fun buildEmptySttsBox(): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+        dos.writeInt(0)  // version & flags
+        dos.writeInt(0)  // entry count
+        return wrapBox("stts", output.toByteArray())
+    }
+
+    private fun buildEmptyStscBox(): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+        dos.writeInt(0)  // version & flags
+        dos.writeInt(0)  // entry count
+        return wrapBox("stsc", output.toByteArray())
+    }
+
+    private fun buildEmptyStszBox(): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+        dos.writeInt(0)  // version & flags
+        dos.writeInt(0)  // sample size (0 = variable)
+        dos.writeInt(0)  // sample count
+        return wrapBox("stsz", output.toByteArray())
+    }
+
+    private fun buildEmptyStcoBox(): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+        dos.writeInt(0)  // version & flags
+        dos.writeInt(0)  // entry count
+        return wrapBox("stco", output.toByteArray())
+    }
+
+    private fun buildMvexBox(): ByteArray {
+        return wrapBox("mvex", buildTrexBox())
+    }
+
+    private fun buildTrexBox(): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        dos.writeInt(0)  // version & flags
+        dos.writeInt(1)  // track ID
+        dos.writeInt(1)  // default sample description index
+        dos.writeInt(0)  // default sample duration
+        dos.writeInt(0)  // default sample size
+        dos.writeInt(0)  // default sample flags
+
+        return wrapBox("trex", output.toByteArray())
+    }
+
+    // ==================== Media Segment Building ====================
+
+    /**
+     * Builds a media segment (moof + mdat).
+     */
+    private fun buildMediaSegment(
+        samples: List<Sample>,
+        sequenceNumber: Int,
+        baseDecodeTimeUs: Long
+    ): ByteArray {
+        val output = ByteArrayOutputStream()
+
+        // Build mdat content first to know sizes
+        val mdatContent = ByteArrayOutputStream()
+        for (sample in samples) {
+            mdatContent.write(sample.data)
+        }
+        val mdatPayload = mdatContent.toByteArray()
+
+        // Build moof
+        val moofBox = buildMoofBox(samples, sequenceNumber, baseDecodeTimeUs, mdatPayload.size)
+        output.write(moofBox)
+
+        // Build mdat
+        output.write(wrapBox("mdat", mdatPayload))
+
+        return output.toByteArray()
+    }
+
+    private fun buildMoofBox(
+        samples: List<Sample>,
+        sequenceNumber: Int,
+        baseDecodeTimeUs: Long,
+        mdatPayloadSize: Int
+    ): ByteArray {
+        // Calculate sizes to determine data offset
+        val mfhdBox = buildMfhdBox(sequenceNumber)
+        val tfhdSize = 8 + 8   // box header + content (version/flags + track_id)
+        val tfdtSize = 8 + 12  // box header + version 1 content
+        val trunSize = 8 + 12 + (samples.size * 12)  // header + fixed + per-sample (no composition offset)
+        val trafSize = 8 + tfhdSize + tfdtSize + trunSize
+        val moofSize = 8 + mfhdBox.size + trafSize
+
+        val dataOffset = moofSize + 8  // moof size + mdat header
+
+        val content = ByteArrayOutputStream()
+        content.write(mfhdBox)
+        content.write(buildTrafBox(samples, baseDecodeTimeUs, dataOffset))
+
+        return wrapBox("moof", content.toByteArray())
+    }
+
+    private fun buildMfhdBox(sequenceNumber: Int): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        dos.writeInt(0)  // version & flags
+        dos.writeInt(sequenceNumber)
+
+        return wrapBox("mfhd", output.toByteArray())
+    }
+
+    private fun buildTrafBox(samples: List<Sample>, baseDecodeTimeUs: Long, dataOffset: Int): ByteArray {
+        val content = ByteArrayOutputStream()
+        content.write(buildTfhdBox())
+        content.write(buildTfdtBox(baseDecodeTimeUs))
+        content.write(buildTrunBox(samples, dataOffset))
+        return wrapBox("traf", content.toByteArray())
+    }
+
+    private fun buildTfhdBox(): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        // Flags: default-base-is-moof (0x020000)
+        dos.writeInt(0x00020000)
+        dos.writeInt(1)  // track ID
+
+        return wrapBox("tfhd", output.toByteArray())
+    }
+
+    private fun buildTfdtBox(baseDecodeTimeUs: Long): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        // Convert to timescale units
+        val baseMediaDecodeTime = (baseDecodeTimeUs * timescale) / 1_000_000
+
+        // Version 1 for 64-bit time
+        dos.writeInt(0x01000000)
+        dos.writeLong(baseMediaDecodeTime)
+
+        return wrapBox("tfdt", output.toByteArray())
+    }
+
+    private fun buildTrunBox(samples: List<Sample>, dataOffset: Int): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        // Flags: data-offset + sample-duration + sample-size + sample-flags
+        val flags = 0x000001 or 0x000100 or 0x000200 or 0x000400
+        dos.writeInt(flags)
+        dos.writeInt(samples.size)
+        dos.writeInt(dataOffset)
+
+        for (sample in samples) {
+            // Convert duration to timescale units
+            val durationInTimescale = ((sample.durationUs * timescale) / 1_000_000).toInt()
+            dos.writeInt(durationInTimescale)
+            dos.writeInt(sample.data.size)
+            dos.writeInt(buildSampleFlags(sample.isKeyFrame))
+        }
+
+        return wrapBox("trun", output.toByteArray())
+    }
+
+    private fun buildSampleFlags(isKeyFrame: Boolean): Int {
+        return if (isKeyFrame) {
+            // sample_depends_on=2 (no dependencies), not a difference sample
+            0x02000000
+        } else {
+            // sample_depends_on=1 (depends on others), is a difference sample
+            0x01010000
+        }
+    }
+
+    // ==================== Utilities ====================
+
+    private fun wrapBox(type: String, content: ByteArray): ByteArray {
+        val output = ByteArrayOutputStream()
+        val dos = DataOutputStream(output)
+
+        dos.writeInt(8 + content.size)
+        dos.writeBytes(type)
+        dos.write(content)
+
+        return output.toByteArray()
+    }
+}
--- a/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt
@@ -4,7 +4,6 @@ import android.content.Context
 import android.util.Log
 import android.util.Size
 import android.view.Surface
-import androidx.media3.common.util.UnstableApi
 import com.facebook.common.statfs.StatFsHelper
 import com.mrousavy.camera.extensions.getRecommendedBitRate
 import com.mrousavy.camera.types.Orientation
@@ -16,7 +15,6 @@ import java.text.SimpleDateFormat
 import java.util.Locale
 import java.util.Date

-@UnstableApi
 class RecordingSession(
  context: Context,
  val cameraId: String,
@@ -24,13 +22,13 @@ class RecordingSession(
  private val enableAudio: Boolean,
  private val fps: Int? = null,
  private val hdr: Boolean = false,
-  val cameraOrientation: Orientation,
+  private val cameraOrientation: Orientation,
  private val options: RecordVideoOptions,
  private val filePath: String,
  private val callback: (video: Video) -> Unit,
  private val onError: (error: CameraError) -> Unit,
  private val allCallbacks: CameraSession.Callback,
-  // Use the new FragmentedMp4Muxer-based recorder for HLS-compatible output
+  // Use FragmentedRecordingManager for HLS-compatible fMP4 output
  private val useFragmentedMp4: Boolean = true
 ) {
  companion object {
--- a/package/android/src/main/java/com/mrousavy/camera/core/VideoPipeline.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/VideoPipeline.kt
@@ -162,14 +162,6 @@ class VideoPipeline(
      // 4. Get the transform matrix from the SurfaceTexture (rotations/scales applied by Camera)
      surfaceTexture.getTransformMatrix(transformMatrix)

-      // Log transform matrix for debugging rotation issues (only when recording)
-      if (recordingSession != null) {
-        Log.i(TAG, "ROTATION_DEBUG TransformMatrix: [${transformMatrix[0]}, ${transformMatrix[1]}, ${transformMatrix[2]}, ${transformMatrix[3]}], " +
-                   "[${transformMatrix[4]}, ${transformMatrix[5]}, ${transformMatrix[6]}, ${transformMatrix[7]}], " +
-                   "[${transformMatrix[8]}, ${transformMatrix[9]}, ${transformMatrix[10]}, ${transformMatrix[11]}], " +
-                   "[${transformMatrix[12]}, ${transformMatrix[13]}, ${transformMatrix[14]}, ${transformMatrix[15]}]")
-      }
-
      // 5. Draw it with applied rotation/mirroring
      onFrame(transformMatrix)

@@ -189,15 +181,11 @@ class VideoPipeline(
  /**
   * Configures the Pipeline to also write Frames to a Surface from a `MediaRecorder` (or null)
   */
-  fun setRecordingSessionOutput(recordingSession: RecordingSession?, orientation: Orientation = Orientation.LANDSCAPE_LEFT) {
+  fun setRecordingSessionOutput(recordingSession: RecordingSession?) {
    synchronized(this) {
      if (recordingSession != null) {
        // Configure OpenGL pipeline to stream Frames into the Recording Session's surface
-        Log.i(TAG, "Setting ${recordingSession.size} RecordingSession Output with orientation=$orientation...")
-        // Set the recording orientation for the native layer
-        // 0 = LANDSCAPE_LEFT (CCW), 1 = LANDSCAPE_RIGHT (CW)
-        val orientationValue = if (orientation == Orientation.LANDSCAPE_RIGHT) 1 else 0
-        setRecordingOrientation(orientationValue)
+        Log.i(TAG, "Setting ${recordingSession.size} RecordingSession Output...")
        setRecordingSessionOutputSurface(recordingSession.surface)
        this.recordingSession = recordingSession
      } else {
@@ -264,6 +252,5 @@ class VideoPipeline(
  private external fun onFrame(transformMatrix: FloatArray)
  private external fun setRecordingSessionOutputSurface(surface: Any)
  private external fun removeRecordingSessionOutputSurface()
-  private external fun setRecordingOrientation(orientation: Int)
  private external fun initHybrid(width: Int, height: Int): HybridData
 }
Author	SHA1	Message	Date
Ivan Malison	b79f876114	Fix orientation issues	2025-12-28 01:14:44 -08:00
Ivan Malison	e60c1a4eb1	Write our own muxer to make hls uupload actually work	2025-12-21 16:45:04 -08:00