attempt to fix segment corruption w/ tfhd base data offset & tfdt injection, moof size updaes -- very messy code, WIP

fix clockwise rotation error
Fix fMP4 video orientation by using raw sensor frames with Y-flip transform
2025-12-23 15:14:45 -05:00 · 2025-12-22 18:55:08 -05:00 · 2025-12-22 18:48:12 -05:00
7 changed files with 557 additions and 85 deletions
--- a/package/android/src/main/cpp/OpenGLRenderer.cpp
+++ b/package/android/src/main/cpp/OpenGLRenderer.cpp
@@ -26,6 +26,7 @@ OpenGLRenderer::OpenGLRenderer(std::shared_ptr<OpenGLContext> context, ANativeWi
  _outputSurface = surface;
  _width = ANativeWindow_getWidth(surface);
  _height = ANativeWindow_getHeight(surface);
  __android_log_print(ANDROID_LOG_INFO, TAG, "ROTATION_DEBUG OpenGLRenderer created with output surface dimensions: %dx%d", _width, _height);
 }
 OpenGLRenderer::~OpenGLRenderer() {
--- a/package/android/src/main/cpp/VideoPipeline.cpp
+++ b/package/android/src/main/cpp/VideoPipeline.cpp
@@ -56,6 +56,11 @@ void VideoPipeline::setRecordingSessionOutputSurface(jobject surface) {
  _recordingSessionOutput = OpenGLRenderer::CreateWithWindowSurface(_context, window);
 }
 void VideoPipeline::setRecordingOrientation(int orientation) {
  _recordingOrientation = orientation;
  __android_log_print(ANDROID_LOG_INFO, TAG, "Recording orientation set to: %d", orientation);
 }
 int VideoPipeline::getInputTextureId() {
  if (_inputTexture == std::nullopt) {
    _inputTexture = _context->createTexture(OpenGLTexture::Type::ExternalOES, _width, _height);
@@ -78,8 +83,29 @@ void VideoPipeline::onFrame(jni::alias_ref<jni::JArrayFloat> transformMatrixPara
  OpenGLTexture& texture = _inputTexture.value();
  if (_recordingSessionOutput) {
-    __android_log_print(ANDROID_LOG_INFO, TAG, "Rendering to RecordingSession..");
+    __android_log_print(ANDROID_LOG_INFO, TAG, "Rendering to RecordingSession.. orientation=%d", _recordingOrientation);
-    _recordingSessionOutput->renderTextureToSurface(texture, transformMatrix);
+
    // For recording, use a simple transform matrix instead of the display transform.
    // The display transform includes rotations for preview which we don't want in recordings.
    float recordingMatrix[16];
    if (_recordingOrientation == 1) {
      // LANDSCAPE_RIGHT (CW): Y-flip + 180° rotation = flip both X and Y
      // This negates both X and Y, then translates by (1,1)
      recordingMatrix[0] = -1.0f; recordingMatrix[1] =  0.0f; recordingMatrix[2] = 0.0f; recordingMatrix[3] = 0.0f;
      recordingMatrix[4] =  0.0f; recordingMatrix[5] =  1.0f; recordingMatrix[6] = 0.0f; recordingMatrix[7] = 0.0f;
      recordingMatrix[8] =  0.0f; recordingMatrix[9] =  0.0f; recordingMatrix[10] = 1.0f; recordingMatrix[11] = 0.0f;
      recordingMatrix[12] = 1.0f; recordingMatrix[13] = 0.0f; recordingMatrix[14] = 0.0f; recordingMatrix[15] = 1.0f;
    } else {
      // LANDSCAPE_LEFT (CCW): Simple Y-flip
      // OpenGL origin is bottom-left, video expects top-left
      recordingMatrix[0] = 1.0f; recordingMatrix[1] =  0.0f; recordingMatrix[2] = 0.0f; recordingMatrix[3] = 0.0f;
      recordingMatrix[4] = 0.0f; recordingMatrix[5] = -1.0f; recordingMatrix[6] = 0.0f; recordingMatrix[7] = 0.0f;
      recordingMatrix[8] = 0.0f; recordingMatrix[9] =  0.0f; recordingMatrix[10] = 1.0f; recordingMatrix[11] = 0.0f;
      recordingMatrix[12] = 0.0f; recordingMatrix[13] = 1.0f; recordingMatrix[14] = 0.0f; recordingMatrix[15] = 1.0f;
    }
    _recordingSessionOutput->renderTextureToSurface(texture, recordingMatrix);
  }
 }
@@ -88,6 +114,7 @@ void VideoPipeline::registerNatives() {
      makeNativeMethod("initHybrid", VideoPipeline::initHybrid),
      makeNativeMethod("setRecordingSessionOutputSurface", VideoPipeline::setRecordingSessionOutputSurface),
      makeNativeMethod("removeRecordingSessionOutputSurface", VideoPipeline::removeRecordingSessionOutputSurface),
      makeNativeMethod("setRecordingOrientation", VideoPipeline::setRecordingOrientation),
      makeNativeMethod("getInputTextureId", VideoPipeline::getInputTextureId),
      makeNativeMethod("onBeforeFrame", VideoPipeline::onBeforeFrame),
      makeNativeMethod("onFrame", VideoPipeline::onFrame),
--- a/package/android/src/main/cpp/VideoPipeline.h
+++ b/package/android/src/main/cpp/VideoPipeline.h
@@ -33,6 +33,7 @@ public:
  // <- MediaRecorder output
  void setRecordingSessionOutputSurface(jobject surface);
  void removeRecordingSessionOutputSurface();
  void setRecordingOrientation(int orientation);
  // Frame callbacks
  void onBeforeFrame();
@@ -47,6 +48,7 @@ private:
  std::optional<OpenGLTexture> _inputTexture = std::nullopt;
  int _width = 0;
  int _height = 0;
  int _recordingOrientation = 0; // 0=LANDSCAPE_LEFT, 1=LANDSCAPE_RIGHT
  // Output Contexts
  std::shared_ptr<OpenGLContext> _context = nullptr;
--- a/package/android/src/main/java/com/mrousavy/camera/core/CameraSession.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/CameraSession.kt
@@ -409,7 +409,8 @@ class CameraSession(private val context: Context, private val cameraManager: Cam
  private fun updateVideoOutputs() {
    val videoOutput = videoOutput ?: return
    Log.i(TAG, "Updating Video Outputs...")
-    videoOutput.videoPipeline.setRecordingSessionOutput(recording)
+    val orientation = recording?.cameraOrientation ?: Orientation.LANDSCAPE_LEFT
    videoOutput.videoPipeline.setRecordingSessionOutput(recording, orientation)
  }
  suspend fun startRecording(
@@ -428,18 +429,16 @@ class CameraSession(private val context: Context, private val cameraManager: Cam
      // Get actual device rotation from WindowManager since the React Native orientation hook
      // doesn't update when rotating between landscape-left and landscape-right on Android.
      // Map device rotation to the correct orientationHint for video recording:
      // - Counter-clockwise (ROTATION_90) → 270° hint
      // - Clockwise (ROTATION_270) → 90° hint
      val windowManager = context.getSystemService(Context.WINDOW_SERVICE) as WindowManager
      val deviceRotation = windowManager.defaultDisplay.rotation
      val recordingOrientation = when (deviceRotation) {
        Surface.ROTATION_0 -> Orientation.PORTRAIT
-        Surface.ROTATION_90 -> Orientation.LANDSCAPE_RIGHT
+        Surface.ROTATION_90 -> Orientation.LANDSCAPE_LEFT      // CCW rotation, top to left
        Surface.ROTATION_180 -> Orientation.PORTRAIT_UPSIDE_DOWN
-        Surface.ROTATION_270 -> Orientation.LANDSCAPE_LEFT
+        Surface.ROTATION_270 -> Orientation.LANDSCAPE_RIGHT    // CW rotation, top to right
        else -> Orientation.PORTRAIT
      }
      Log.i(TAG, "ROTATION_DEBUG: deviceRotation=$deviceRotation, recordingOrientation=$recordingOrientation, options.orientation=${options.orientation}")
      val recording = RecordingSession(
        context,
@@ -448,7 +447,7 @@ class CameraSession(private val context: Context, private val cameraManager: Cam
        enableAudio,
        fps,
        videoOutput.enableHdr,
-        orientation,
+        recordingOrientation,
        options,
        filePath,
        callback,
--- a/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt
@@ -16,7 +16,9 @@ import com.mrousavy.camera.types.Orientation
 import com.mrousavy.camera.types.RecordVideoOptions
 import java.io.File
 import java.io.FileOutputStream
 import java.io.RandomAccessFile
 import java.nio.ByteBuffer
 import java.nio.ByteOrder
 /**
 * A recording manager that produces HLS-compatible fragmented MP4 segments.
@@ -51,14 +53,21 @@ class FragmentedRecordingManager(
            segmentDurationSeconds: Int = 6
        ): FragmentedRecordingManager {
            val mimeType = options.videoCodec.toMimeType()
-            val cameraOrientationDegrees = cameraOrientation.toDegrees()
+            // For fragmented MP4: DON'T swap dimensions, use camera's native dimensions.
-            val recordingOrientationDegrees = (options.orientation ?: Orientation.PORTRAIT).toDegrees()
+            // The C++ VideoPipeline uses a custom transform matrix (not the display transform).
-
+            // This gives us raw sensor frames, and we rely on rotation metadata for playback.
-            val (width, height) = if (cameraOrientation.isLandscape()) {
+            val cameraOrientationDegrees = when (cameraOrientation) {
-                size.height to size.width
+                Orientation.LANDSCAPE_LEFT -> 0    // CCW landscape
-            } else {
+                Orientation.LANDSCAPE_RIGHT -> 0   // CW landscape
-                size.width to size.height
+                Orientation.PORTRAIT -> 90
                Orientation.PORTRAIT_UPSIDE_DOWN -> 270
            }
            Log.i(TAG, "ROTATION_DEBUG FragmentedRecordingManager: cameraOrientation=$cameraOrientation, cameraOrientationDegrees=$cameraOrientationDegrees, inputSize=${size.width}x${size.height}")
            // Keep original dimensions - don't swap. Let rotation metadata handle orientation.
            val width = size.width
            val height = size.height
            Log.i(TAG, "ROTATION_DEBUG FragmentedRecordingManager: outputDimensions=${width}x${height} (no swap)")
            val format = MediaFormat.createVideoFormat(mimeType, width, height)
            val codec = MediaCodec.createEncoderByType(mimeType)
@@ -74,14 +83,14 @@ class FragmentedRecordingManager(
            format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, segmentDurationSeconds)
            format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate)
-            Log.d(TAG, "Video Format: $format, camera orientation $cameraOrientationDegrees, recordingOrientation: $recordingOrientationDegrees")
+            Log.d(TAG, "Video Format: $format, orientationDegrees: $cameraOrientationDegrees")
            codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE)
            return FragmentedRecordingManager(
                codec,
                outputDirectory,
-                recordingOrientationDegrees,
+                cameraOrientationDegrees,
                segmentDurationSeconds * 1_000_000L,
                callbacks
            )
@@ -97,6 +106,13 @@ class FragmentedRecordingManager(
    private var segmentContext: SegmentContext? = null
    private var initSegmentEmitted = false
    // Cumulative base time for HLS-compatible timestamps (in timescale units)
    // Each segment's baseMediaDecodeTime should be the sum of all previous segment durations
    private var cumulativeBaseTimeUs: Long = 0L
    // Timescale used in the fMP4 (typically 1000000 for microseconds)
    private val timescale: Long = 1_000_000L
    override val surface: Surface = encoder.createInputSurface()
    init {
@@ -106,16 +122,26 @@ class FragmentedRecordingManager(
        encoder.setCallback(this)
    }
    /**
     * Result from finishing a segment, used for tfdt patching.
     */
    private data class SegmentResult(
        val file: File,
        val segmentIndex: Int,
        val durationUs: Long
    )
    /**
     * Context for a single data segment being written.
     * Init segments are created separately via createInitSegment().
     */
    private inner class SegmentContext(
        private val format: MediaFormat,
-        private val segmentIndex: Int
+        val segmentIndex: Int,
        private val baseTimeUs: Long  // The baseMediaDecodeTime for this segment
    ) {
        private val filename = "$segmentIndex.mp4"
-        private val file = File(outputDirectory, filename)
+        val file = File(outputDirectory, filename)
        private val outputStream = FileOutputStream(file)
        private val muxer = FragmentedMp4Muxer.Builder(outputStream).build()
        private lateinit var videoTrack: Muxer.TrackToken
@@ -126,32 +152,38 @@ class FragmentedRecordingManager(
        init {
            val media3Format = convertToMedia3Format(format)
            videoTrack = muxer.addTrack(media3Format)
-            Log.d(TAG, "Created segment context: $filename")
+            Log.d(TAG, "Created segment context: $filename with baseTimeUs=$baseTimeUs")
        }
-        fun writeSample(buffer: ByteBuffer, bufferInfo: BufferInfo): Boolean {
+        fun writeSample(buffer: ByteBuffer, bufferInfo: BufferInfo) {
            if (startTimeUs < 0) {
                startTimeUs = bufferInfo.presentationTimeUs
                Log.i(TAG, "PTS_DEBUG Segment $segmentIndex FIRST sample: absolutePTS=${bufferInfo.presentationTimeUs}us, baseTimeUs=$baseTimeUs")
            }
            lastTimeUs = bufferInfo.presentationTimeUs
            // Log first 3 samples and every keyframe for debugging
            val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
            if (sampleCount < 3 || isKeyFrame) {
                Log.i(TAG, "PTS_DEBUG Segment $segmentIndex sample $sampleCount: PTS=${bufferInfo.presentationTimeUs}us, keyframe=$isKeyFrame")
            }
            lastTimeUs = bufferInfo.presentationTimeUs
            muxer.writeSampleData(videoTrack, buffer, bufferInfo)
            sampleCount++
            // Check if we should start a new segment at the next keyframe
            if (isKeyFrame && sampleCount > 1) {
                val segmentDurationUs = bufferInfo.presentationTimeUs - startTimeUs
                if (segmentDurationUs >= targetSegmentDurationUs) {
                    return true // Signal to create new segment
                }
        }
-            return false
+        /**
         * Check if we've accumulated enough duration to start a new segment.
         * Should only be called when we have a keyframe available.
         */
        fun shouldStartNewSegmentOnKeyframe(): Boolean {
            if (sampleCount == 0) return false // Need at least one sample first
            val currentDurationUs = lastTimeUs - startTimeUs
            return currentDurationUs >= targetSegmentDurationUs
        }
-        fun finish(): Long {
+        fun finish(): SegmentResult {
            try {
                muxer.close()
                outputStream.close()
@@ -160,10 +192,9 @@ class FragmentedRecordingManager(
            }
            val durationUs = if (lastTimeUs > startTimeUs) lastTimeUs - startTimeUs else 0L
            callbacks.onVideoChunkReady(file, segmentIndex, durationUs)
-            Log.d(TAG, "Finished segment: $filename, samples=$sampleCount, duration=${durationUs/1000}ms")
+            Log.i(TAG, "PTS_DEBUG Segment $segmentIndex FINISHED: startPTS=${startTimeUs}us, lastPTS=${lastTimeUs}us, duration=${durationUs/1000}ms, samples=$sampleCount, baseTimeUs=$baseTimeUs")
-            return durationUs
+            return SegmentResult(file, segmentIndex, durationUs)
        }
    }
@@ -174,11 +205,19 @@ class FragmentedRecordingManager(
            return
        }
-        // Close previous segment
+        // Close previous segment and process it for HLS
-        segmentContext?.finish()
+        segmentContext?.let { ctx ->
            val result = ctx.finish()
            // Process the segment: extract init (if first), strip headers, inject tfdt
            processSegmentForHLS(result.file, result.segmentIndex, cumulativeBaseTimeUs)
            // Update cumulative time for next segment
            cumulativeBaseTimeUs += result.durationUs
            // Notify callback
            callbacks.onVideoChunkReady(result.file, result.segmentIndex, result.durationUs)
        }
-        // Create new data segment (init segments are created separately)
+        // Create new data segment with current cumulative base time
-        segmentContext = SegmentContext(format, chunkIndex)
+        segmentContext = SegmentContext(format, chunkIndex, cumulativeBaseTimeUs)
        chunkIndex++
    }
@@ -190,7 +229,12 @@ class FragmentedRecordingManager(
    override fun finish() {
        synchronized(this) {
            recording = false
-            segmentContext?.finish()
+            // Close final segment and process it for HLS
            segmentContext?.let { ctx ->
                val result = ctx.finish()
                processSegmentForHLS(result.file, result.segmentIndex, cumulativeBaseTimeUs)
                callbacks.onVideoChunkReady(result.file, result.segmentIndex, result.durationUs)
            }
            segmentContext = null
            try {
                encoder.stop()
@@ -213,15 +257,17 @@ class FragmentedRecordingManager(
                return
            }
-            val encodedData = encoder.getOutputBuffer(index)
+            // Skip codec config buffers - these contain SPS/PPS with annex-b start codes
-            if (encodedData == null) {
+            // and should NOT be written as samples (they're already in the Format's initializationData)
-                Log.e(TAG, "getOutputBuffer returned null")
+            if ((bufferInfo.flags and MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0) {
                Log.d(TAG, "Skipping codec config buffer (size=${bufferInfo.size})")
                encoder.releaseOutputBuffer(index, false)
                return
            }
-            // Wait until init segment is emitted (happens in onOutputFormatChanged)
+            val encodedData = encoder.getOutputBuffer(index)
-            if (!initSegmentEmitted) {
+            if (encodedData == null) {
                Log.e(TAG, "getOutputBuffer returned null")
                encoder.releaseOutputBuffer(index, false)
                return
            }
@@ -238,11 +284,18 @@ class FragmentedRecordingManager(
            }
            try {
-                val shouldStartNewSegment = context.writeSample(encodedData, bufferInfo)
+                // Check if this keyframe should start a new segment BEFORE writing
                val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
                val shouldStartNewSegment = isKeyFrame && context.shouldStartNewSegmentOnKeyframe()
                if (shouldStartNewSegment) {
                    // Finish old segment WITHOUT writing this keyframe to it
                    createNewSegment()
-                    // Write this keyframe to the new segment as well
+                    // Write keyframe to the NEW segment only
                    segmentContext?.writeSample(encodedData, bufferInfo)
                } else {
                    // Write to current segment
                    context.writeSample(encodedData, bufferInfo)
                }
            } catch (e: Exception) {
                Log.e(TAG, "Error writing sample", e)
@@ -259,39 +312,8 @@ class FragmentedRecordingManager(
    override fun onOutputFormatChanged(codec: MediaCodec, format: MediaFormat) {
        Log.i(TAG, "Output format changed: $format")
        encodedFormat = format
-
+        // Note: init segment is now extracted from the first segment's ftyp+moov
-        // Create the init segment immediately when we get the format
+        // rather than created separately (Media3's empty init was not working)
        // This produces an fMP4 file with just ftyp + moov (no samples)
        if (!initSegmentEmitted) {
            createInitSegment(format)
            initSegmentEmitted = true
        }
    }
    /**
     * Creates an initialization segment containing only codec configuration (ftyp + moov).
     * This is done by creating a muxer, adding the track, and immediately closing it
     * without writing any samples.
     */
    private fun createInitSegment(format: MediaFormat) {
        val initFile = File(outputDirectory, "init.mp4")
        try {
            val outputStream = FileOutputStream(initFile)
            val muxer = FragmentedMp4Muxer.Builder(outputStream).build()
            // Convert and add the track
            val media3Format = convertToMedia3Format(format)
            muxer.addTrack(media3Format)
            // Close immediately - this writes just the header (ftyp + moov)
            muxer.close()
            outputStream.close()
            Log.d(TAG, "Created init segment: ${initFile.absolutePath}")
            callbacks.onInitSegmentReady(initFile)
        } catch (e: Exception) {
            Log.e(TAG, "Error creating init segment", e)
        }
    }
    private fun convertToMedia3Format(mediaFormat: MediaFormat): Format {
@@ -302,6 +324,8 @@ class FragmentedRecordingManager(
        val frameRate = try { mediaFormat.getInteger(MediaFormat.KEY_FRAME_RATE) } catch (e: Exception) { -1 }
        // Get CSD (Codec Specific Data) if available - required for init segment
        // csd-0 contains SPS (Sequence Parameter Set)
        // csd-1 contains PPS (Picture Parameter Set)
        val csd0 = mediaFormat.getByteBuffer("csd-0")
        val csd1 = mediaFormat.getByteBuffer("csd-1")
@@ -310,13 +334,19 @@ class FragmentedRecordingManager(
            val bytes = ByteArray(it.remaining())
            it.duplicate().get(bytes)
            initData.add(bytes)
            Log.i(TAG, "CSD_DEBUG: csd-0 (SPS) size=${bytes.size} bytes, hex=${bytes.take(32).joinToString("") { "%02x".format(it) }}...")
        }
        csd1?.let {
            val bytes = ByteArray(it.remaining())
            it.duplicate().get(bytes)
            initData.add(bytes)
            Log.i(TAG, "CSD_DEBUG: csd-1 (PPS) size=${bytes.size} bytes, hex=${bytes.joinToString("") { "%02x".format(it) }}")
        }
        val totalCsdSize = initData.sumOf { it.size }
        Log.i(TAG, "CSD_DEBUG: Total CSD size=$totalCsdSize bytes (csd-0=${csd0?.remaining() ?: 0}, csd-1=${csd1?.remaining() ?: 0})")
        Log.i(TAG, "ROTATION_DEBUG convertToMedia3Format: orientationDegrees=$orientationDegrees, width=$width, height=$height")
        return Format.Builder()
            .setSampleMimeType(mimeType)
            .setWidth(width)
@@ -329,4 +359,404 @@ class FragmentedRecordingManager(
            }
            .build()
    }
    /**
     * Processes a segment file for HLS compatibility:
     * 1. For segment 0: extracts ftyp+moov header as init.mp4
     * 2. Strips ftyp+moov from segment, keeping only moof+mdat (the fragment)
     * 3. Injects tfdt box into moof for proper HLS timing
     *
     * Media3's FragmentedMp4Muxer creates self-contained MP4s, but HLS needs:
     * - init.mp4: ftyp + moov (codec configuration)
     * - segments: moof + mdat only (fragments referencing init)
     */
    private fun processSegmentForHLS(file: File, segmentIndex: Int, baseMediaDecodeTimeUs: Long) {
        try {
            val originalBytes = file.readBytes()
            val buffer = ByteBuffer.wrap(originalBytes).order(ByteOrder.BIG_ENDIAN)
            // Find where moof starts (everything before is header: ftyp + moov)
            val moofStartPos = findMoofPosition(buffer)
            if (moofStartPos < 0) {
                Log.e(TAG, "HLS_PROCESS: Could not find moof in ${file.name}")
                return
            }
            Log.i(TAG, "HLS_PROCESS: Segment $segmentIndex - moof starts at $moofStartPos, total size=${originalBytes.size}")
            // For segment 0, extract header as init.mp4
            if (segmentIndex == 0 && !initSegmentEmitted) {
                val headerBytes = originalBytes.copyOfRange(0, moofStartPos)
                val initFile = File(outputDirectory, "init.mp4")
                initFile.writeBytes(headerBytes)
                Log.i(TAG, "HLS_PROCESS: Created init.mp4 with ${headerBytes.size} bytes (ftyp+moov)")
                // Debug: dump the init.mp4 structure
                dumpMp4BoxStructure(headerBytes, "INIT_STRUCTURE")
                callbacks.onInitSegmentReady(initFile)
                initSegmentEmitted = true
            }
            // Extract fragment (moof + mdat only)
            val fragmentBytes = originalBytes.copyOfRange(moofStartPos, originalBytes.size)
            Log.d(TAG, "HLS_PROCESS: Extracted fragment of ${fragmentBytes.size} bytes")
            // Inject tfdt into the fragment
            // Note: in the fragment, moof is at position 0
            val processedFragment = injectTfdtIntoFragment(fragmentBytes, baseMediaDecodeTimeUs)
            // Write back the processed fragment (stripped of header)
            file.writeBytes(processedFragment)
            Log.i(TAG, "HLS_PROCESS: Segment $segmentIndex processed - header stripped, tfdt injected, final size=${processedFragment.size}")
        } catch (e: Exception) {
            Log.e(TAG, "Error processing segment ${file.name} for HLS", e)
        }
    }
    /**
     * Finds the position of the moof box in the file.
     * Returns -1 if not found.
     */
    private fun findMoofPosition(buffer: ByteBuffer): Int {
        var pos = 0
        while (pos < buffer.limit() - 8) {
            buffer.position(pos)
            val size = buffer.int.toLong() and 0xFFFFFFFFL
            val type = buffer.int
            if (size < 8) break
            // 'moof' = 0x6D6F6F66
            if (type == 0x6D6F6F66) {
                return pos
            }
            pos += size.toInt()
        }
        return -1
    }
    /**
     * Injects a tfdt box into a fragment (moof+mdat).
     * The fragment has moof at position 0 (header already stripped).
     * Also fixes tfhd.base_data_offset since we stripped the original file header.
     */
    private fun injectTfdtIntoFragment(fragmentBytes: ByteArray, baseMediaDecodeTimeUs: Long): ByteArray {
        val buffer = ByteBuffer.wrap(fragmentBytes).order(ByteOrder.BIG_ENDIAN)
        // Find box positions within the fragment (moof is at position 0)
        val positions = findBoxPositionsInFragment(buffer)
        if (positions == null) {
            Log.e(TAG, "TFDT_INJECT: Could not find required boxes in fragment")
            return fragmentBytes
        }
        val (moofPos, moofSize, trafPos, trafSize, tfhdPos, tfhdEnd, trunPos) = positions
        Log.d(TAG, "TFDT_INJECT: Fragment boxes - moof@$moofPos(size=$moofSize), traf@$trafPos, tfhd@$tfhdPos, trun@$trunPos")
        // First, fix tfhd.base_data_offset - it was pointing to the original file position
        // but now moof is at position 0, so base_data_offset should be 0
        fixTfhdBaseDataOffset(buffer, tfhdPos.toInt())
        // Create tfdt box (version 1, 64-bit baseMediaDecodeTime)
        val tfdtSize = 20
        val tfdtBytes = ByteBuffer.allocate(tfdtSize).order(ByteOrder.BIG_ENDIAN)
        tfdtBytes.putInt(tfdtSize)                  // size
        tfdtBytes.putInt(0x74666474)                // 'tfdt'
        tfdtBytes.put(1.toByte())                   // version = 1
        tfdtBytes.put(0.toByte())                   // flags[0]
        tfdtBytes.put(0.toByte())                   // flags[1]
        tfdtBytes.put(0.toByte())                   // flags[2]
        tfdtBytes.putLong(baseMediaDecodeTimeUs)    // baseMediaDecodeTime
        // Create new fragment with tfdt injected after tfhd
        val newBytes = ByteArray(fragmentBytes.size + tfdtSize)
        val insertPos = tfhdEnd.toInt()
        // Copy bytes before insertion point
        System.arraycopy(fragmentBytes, 0, newBytes, 0, insertPos)
        // Insert tfdt
        System.arraycopy(tfdtBytes.array(), 0, newBytes, insertPos, tfdtSize)
        // Copy bytes after insertion point
        System.arraycopy(fragmentBytes, insertPos, newBytes, insertPos + tfdtSize, fragmentBytes.size - insertPos)
        // Update box sizes in the new buffer
        val newBuffer = ByteBuffer.wrap(newBytes).order(ByteOrder.BIG_ENDIAN)
        // Update moof size
        val newMoofSize = moofSize + tfdtSize
        newBuffer.putInt(moofPos.toInt(), newMoofSize.toInt())
        // Update traf size
        val newTrafSize = trafSize + tfdtSize
        newBuffer.putInt(trafPos.toInt(), newTrafSize.toInt())
        // Update trun data_offset if present
        val newTrunPos = trunPos.toInt() + tfdtSize
        updateTrunDataOffset(newBuffer, newTrunPos, tfdtSize)
        Log.i(TAG, "TFDT_INJECT: Injected tfdt with baseMediaDecodeTime=$baseMediaDecodeTimeUs us")
        return newBytes
    }
    /**
     * Data class to hold box positions for tfdt injection.
     */
    private data class BoxPositions(
        val moofPos: Long,
        val moofSize: Long,
        val trafPos: Long,
        val trafSize: Long,
        val tfhdPos: Long,  // Position of tfhd (need to fix base_data_offset)
        val tfhdEnd: Long,  // Position right after tfhd where we'll insert tfdt
        val trunPos: Long   // Position of trun (need to update its data_offset)
    )
    /**
     * Finds the positions of moof, traf, tfhd, and trun boxes in a fragment.
     * In a fragment, moof is expected to be at position 0.
     */
    private fun findBoxPositionsInFragment(buffer: ByteBuffer): BoxPositions? {
        val fileSize = buffer.limit()
        var pos = 0
        while (pos < fileSize - 8) {
            buffer.position(pos)
            val size = buffer.int.toLong() and 0xFFFFFFFFL
            val type = buffer.int
            if (size < 8) break
            // 'moof' = 0x6D6F6F66
            if (type == 0x6D6F6F66) {
                val moofPos = pos.toLong()
                val moofSize = size
                val moofEnd = pos + size.toInt()
                var childPos = pos + 8
                while (childPos < moofEnd - 8) {
                    buffer.position(childPos)
                    val childSize = buffer.int.toLong() and 0xFFFFFFFFL
                    val childType = buffer.int
                    if (childSize < 8) break
                    // 'traf' = 0x74726166
                    if (childType == 0x74726166) {
                        val trafPos = childPos.toLong()
                        val trafSize = childSize
                        val trafEnd = childPos + childSize.toInt()
                        var trafChildPos = childPos + 8
                        var tfhdPos: Long = -1
                        var tfhdEnd: Long = -1
                        var trunPos: Long = -1
                        while (trafChildPos < trafEnd - 8) {
                            buffer.position(trafChildPos)
                            val trafChildSize = buffer.int.toLong() and 0xFFFFFFFFL
                            val trafChildType = buffer.int
                            if (trafChildSize < 8) break
                            // 'tfhd' = 0x74666864
                            if (trafChildType == 0x74666864) {
                                tfhdPos = trafChildPos.toLong()
                                tfhdEnd = trafChildPos + trafChildSize
                            }
                            // 'trun' = 0x7472756E
                            else if (trafChildType == 0x7472756E) {
                                trunPos = trafChildPos.toLong()
                            }
                            trafChildPos += trafChildSize.toInt()
                        }
                        if (tfhdPos > 0 && tfhdEnd > 0 && trunPos > 0) {
                            return BoxPositions(moofPos, moofSize, trafPos, trafSize, tfhdPos, tfhdEnd, trunPos)
                        }
                    }
                    childPos += childSize.toInt()
                }
            }
            pos += size.toInt()
        }
        return null
    }
    /**
     * Updates the trun box's data_offset field if present.
     * The data_offset points to sample data in mdat, and needs to be
     * increased by the size of the injected tfdt box.
     *
     * trun structure:
     * - 4 bytes: size
     * - 4 bytes: type ('trun')
     * - 1 byte: version
     * - 3 bytes: flags
     * - 4 bytes: sample_count
     * - [optional] 4 bytes: data_offset (if flags & 0x000001)
     */
    private fun updateTrunDataOffset(buffer: ByteBuffer, trunPos: Int, offsetDelta: Int) {
        buffer.position(trunPos + 8) // Skip size and type
        val version = buffer.get().toInt() and 0xFF
        val flags = ((buffer.get().toInt() and 0xFF) shl 16) or
                    ((buffer.get().toInt() and 0xFF) shl 8) or
                    (buffer.get().toInt() and 0xFF)
        // Check if data_offset_present flag (0x000001) is set
        if ((flags and 0x000001) != 0) {
            val sampleCount = buffer.int
            val dataOffsetPos = trunPos + 16 // size(4) + type(4) + version(1) + flags(3) + sample_count(4)
            buffer.position(dataOffsetPos)
            val originalOffset = buffer.int
            val newOffset = originalOffset + offsetDelta
            buffer.putInt(dataOffsetPos, newOffset)
            Log.d(TAG, "TFDT_INJECT: Updated trun data_offset: $originalOffset -> $newOffset")
        } else {
            Log.d(TAG, "TFDT_INJECT: trun has no data_offset field (flags=0x${flags.toString(16)})")
        }
    }
    /**
     * Fixes the tfhd box's base_data_offset field after stripping the file header.
     * When we strip ftyp+moov from the original segment, the base_data_offset
     * (which pointed to a position in the original file) becomes incorrect.
     * We set it to 0 since moof is now at the start of the fragment.
     *
     * tfhd structure:
     * - 4 bytes: size
     * - 4 bytes: type ('tfhd')
     * - 1 byte: version
     * - 3 bytes: flags
     * - 4 bytes: track_id
     * - [optional] 8 bytes: base_data_offset (if flags & 0x000001)
     */
    private fun fixTfhdBaseDataOffset(buffer: ByteBuffer, tfhdPos: Int) {
        buffer.position(tfhdPos + 8) // Skip size and type
        val version = buffer.get().toInt() and 0xFF
        val flags = ((buffer.get().toInt() and 0xFF) shl 16) or
                    ((buffer.get().toInt() and 0xFF) shl 8) or
                    (buffer.get().toInt() and 0xFF)
        // Check if base_data_offset_present flag (0x000001) is set
        if ((flags and 0x000001) != 0) {
            val trackId = buffer.int
            val baseDataOffsetPos = tfhdPos + 16 // size(4) + type(4) + version(1) + flags(3) + track_id(4)
            buffer.position(baseDataOffsetPos)
            val originalOffset = buffer.long
            // Set to 0 since moof is now at start of fragment
            buffer.putLong(baseDataOffsetPos, 0L)
            Log.i(TAG, "TFHD_FIX: Fixed base_data_offset: $originalOffset -> 0")
        } else {
            Log.d(TAG, "TFHD_FIX: tfhd has no base_data_offset field (flags=0x${flags.toString(16)})")
        }
    }
    /**
     * Debug function to dump MP4 box structure and find avcC/stsd info.
     */
    private fun dumpMp4BoxStructure(data: ByteArray, logPrefix: String) {
        val buffer = ByteBuffer.wrap(data).order(ByteOrder.BIG_ENDIAN)
        dumpBoxesRecursive(buffer, 0, data.size, 0, logPrefix)
    }
    private fun dumpBoxesRecursive(buffer: ByteBuffer, start: Int, end: Int, depth: Int, logPrefix: String) {
        var pos = start
        val indent = "  ".repeat(depth)
        while (pos < end - 8) {
            buffer.position(pos)
            val size = buffer.int.toLong() and 0xFFFFFFFFL
            val typeInt = buffer.int
            val typeBytes = ByteArray(4)
            typeBytes[0] = ((typeInt shr 24) and 0xFF).toByte()
            typeBytes[1] = ((typeInt shr 16) and 0xFF).toByte()
            typeBytes[2] = ((typeInt shr 8) and 0xFF).toByte()
            typeBytes[3] = (typeInt and 0xFF).toByte()
            val typeStr = String(typeBytes, Charsets.US_ASCII)
            if (size < 8 || pos + size > end) break
            Log.i(TAG, "$logPrefix: $indent[$typeStr] size=$size @ $pos")
            // For ftyp, dump the brands
            if (typeStr == "ftyp" && size >= 16) {
                buffer.position(pos + 8)
                val majorBrand = ByteArray(4)
                buffer.get(majorBrand)
                val minorVersion = buffer.int
                Log.i(TAG, "$logPrefix: $indent  major_brand=${String(majorBrand)}, minor_version=$minorVersion")
                val compatBrandsStart = pos + 16
                val compatBrandsEnd = pos + size.toInt()
                val brands = mutableListOf<String>()
                var brandPos = compatBrandsStart
                while (brandPos + 4 <= compatBrandsEnd) {
                    buffer.position(brandPos)
                    val brand = ByteArray(4)
                    buffer.get(brand)
                    brands.add(String(brand))
                    brandPos += 4
                }
                Log.i(TAG, "$logPrefix: $indent  compatible_brands=${brands.joinToString(",")}")
            }
            // For avcC, dump the SPS/PPS info
            if (typeStr == "avcC" && size >= 13) {
                buffer.position(pos + 8)
                val configVersion = buffer.get().toInt() and 0xFF
                val profileIdc = buffer.get().toInt() and 0xFF
                val profileCompat = buffer.get().toInt() and 0xFF
                val levelIdc = buffer.get().toInt() and 0xFF
                val lengthSizeMinusOne = buffer.get().toInt() and 0x03
                val numSps = buffer.get().toInt() and 0x1F
                Log.i(TAG, "$logPrefix: $indent  avcC: version=$configVersion, profile=$profileIdc, level=$levelIdc, numSPS=$numSps")
                // Read SPS lengths
                var spsTotal = 0
                for (i in 0 until numSps) {
                    val spsLen = buffer.short.toInt() and 0xFFFF
                    spsTotal += spsLen
                    Log.i(TAG, "$logPrefix: $indent  SPS[$i] length=$spsLen")
                    buffer.position(buffer.position() + spsLen)  // Skip SPS data
                }
                // Read PPS count and lengths
                if (buffer.position() < pos + size) {
                    val numPps = buffer.get().toInt() and 0xFF
                    var ppsTotal = 0
                    for (i in 0 until numPps) {
                        if (buffer.position() + 2 <= pos + size) {
                            val ppsLen = buffer.short.toInt() and 0xFFFF
                            ppsTotal += ppsLen
                            Log.i(TAG, "$logPrefix: $indent  PPS[$i] length=$ppsLen")
                            buffer.position(buffer.position() + ppsLen)  // Skip PPS data
                        }
                    }
                    Log.i(TAG, "$logPrefix: $indent  avcC total: ${size} bytes, SPS=$spsTotal bytes, PPS=$ppsTotal bytes")
                }
            }
            // Recurse into container boxes
            val containerBoxes = setOf("moov", "trak", "mdia", "minf", "stbl", "stsd", "mvex", "edts")
            if (typeStr in containerBoxes) {
                // stsd has 8 extra bytes (version/flags + entry_count) before children
                val childStart = if (typeStr == "stsd") pos + 16 else pos + 8
                dumpBoxesRecursive(buffer, childStart, pos + size.toInt(), depth + 1, logPrefix)
            }
            // avc1 is a sample entry, structure: 8 byte header + 78 byte fixed fields + child boxes
            if (typeStr == "avc1") {
                dumpBoxesRecursive(buffer, pos + 86, pos + size.toInt(), depth + 1, logPrefix)
            }
            pos += size.toInt()
        }
    }
 }
--- a/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt
@@ -24,7 +24,7 @@ class RecordingSession(
  private val enableAudio: Boolean,
  private val fps: Int? = null,
  private val hdr: Boolean = false,
-  private val cameraOrientation: Orientation,
+  val cameraOrientation: Orientation,
  private val options: RecordVideoOptions,
  private val filePath: String,
  private val callback: (video: Video) -> Unit,
--- a/package/android/src/main/java/com/mrousavy/camera/core/VideoPipeline.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/VideoPipeline.kt
@@ -162,6 +162,14 @@ class VideoPipeline(
      // 4. Get the transform matrix from the SurfaceTexture (rotations/scales applied by Camera)
      surfaceTexture.getTransformMatrix(transformMatrix)
      // Log transform matrix for debugging rotation issues (only when recording)
      if (recordingSession != null) {
        Log.i(TAG, "ROTATION_DEBUG TransformMatrix: [${transformMatrix[0]}, ${transformMatrix[1]}, ${transformMatrix[2]}, ${transformMatrix[3]}], " +
                   "[${transformMatrix[4]}, ${transformMatrix[5]}, ${transformMatrix[6]}, ${transformMatrix[7]}], " +
                   "[${transformMatrix[8]}, ${transformMatrix[9]}, ${transformMatrix[10]}, ${transformMatrix[11]}], " +
                   "[${transformMatrix[12]}, ${transformMatrix[13]}, ${transformMatrix[14]}, ${transformMatrix[15]}]")
      }
      // 5. Draw it with applied rotation/mirroring
      onFrame(transformMatrix)
@@ -181,11 +189,15 @@ class VideoPipeline(
  /**
   * Configures the Pipeline to also write Frames to a Surface from a `MediaRecorder` (or null)
   */
-  fun setRecordingSessionOutput(recordingSession: RecordingSession?) {
+  fun setRecordingSessionOutput(recordingSession: RecordingSession?, orientation: Orientation = Orientation.LANDSCAPE_LEFT) {
    synchronized(this) {
      if (recordingSession != null) {
        // Configure OpenGL pipeline to stream Frames into the Recording Session's surface
-        Log.i(TAG, "Setting ${recordingSession.size} RecordingSession Output...")
+        Log.i(TAG, "Setting ${recordingSession.size} RecordingSession Output with orientation=$orientation...")
        // Set the recording orientation for the native layer
        // 0 = LANDSCAPE_LEFT (CCW), 1 = LANDSCAPE_RIGHT (CW)
        val orientationValue = if (orientation == Orientation.LANDSCAPE_RIGHT) 1 else 0
        setRecordingOrientation(orientationValue)
        setRecordingSessionOutputSurface(recordingSession.surface)
        this.recordingSession = recordingSession
      } else {
@@ -252,5 +264,6 @@ class VideoPipeline(
  private external fun onFrame(transformMatrix: FloatArray)
  private external fun setRecordingSessionOutputSurface(surface: Any)
  private external fun removeRecordingSessionOutputSurface()
  private external fun setRecordingOrientation(orientation: Int)
  private external fun initHybrid(width: Int, height: Int): HybridData
 }
Author	SHA1	Message	Date
Loewy	d1d359d836	attempt to fix segment corruption w/ tfhd base data offset & tfdt injection, moof size updaes -- very messy code, WIP	2025-12-23 15:14:45 -05:00
Loewy	6b0a3cbb98	fix clockwise rotation error	2025-12-22 18:55:08 -05:00
Loewy	49fba9ed60	Fix fMP4 video orientation by using raw sensor frames with Y-flip transform	2025-12-22 18:48:12 -05:00