diff --git a/package/android/src/main/java/com/mrousavy/camera/CameraView+Events.kt b/package/android/src/main/java/com/mrousavy/camera/CameraView+Events.kt index 3384ef2..07f0b9c 100644 --- a/package/android/src/main/java/com/mrousavy/camera/CameraView+Events.kt +++ b/package/android/src/main/java/com/mrousavy/camera/CameraView+Events.kt @@ -40,15 +40,26 @@ fun CameraView.invokeOnStopped() { this.sendEvent(event) } -fun CameraView.invokeOnChunkReady(filepath: File, index: Int) { - Log.e(CameraView.TAG, "invokeOnError(...):") +fun CameraView.invokeOnChunkReady(filepath: File, index: Int, durationUs: Long?) { + Log.i(CameraView.TAG, "invokeOnChunkReady(...): index=$index, filepath=$filepath, durationUs=$durationUs") val event = Arguments.createMap() event.putInt("index", index) event.putString("filepath", filepath.toString()) + if (durationUs != null) { + event.putDouble("duration", durationUs / 1_000_000.0) // Convert microseconds to seconds + } val reactContext = context as ReactContext reactContext.getJSModule(RCTEventEmitter::class.java).receiveEvent(id, "onVideoChunkReady", event) } +fun CameraView.invokeOnInitReady(filepath: File) { + Log.i(CameraView.TAG, "invokeOnInitReady(...): filepath=$filepath") + val event = Arguments.createMap() + event.putString("filepath", filepath.toString()) + val reactContext = context as ReactContext + reactContext.getJSModule(RCTEventEmitter::class.java).receiveEvent(id, "onInitReady", event) +} + fun CameraView.invokeOnError(error: Throwable) { Log.e(CameraView.TAG, "invokeOnError(...):") error.printStackTrace() diff --git a/package/android/src/main/java/com/mrousavy/camera/CameraView.kt b/package/android/src/main/java/com/mrousavy/camera/CameraView.kt index 3569cd3..af4f2c6 100644 --- a/package/android/src/main/java/com/mrousavy/camera/CameraView.kt +++ b/package/android/src/main/java/com/mrousavy/camera/CameraView.kt @@ -271,8 +271,12 @@ class CameraView(context: Context) : invokeOnStopped() } - override fun onVideoChunkReady(filepath: File, index: Int) { - invokeOnChunkReady(filepath, index) + override fun onVideoChunkReady(filepath: File, index: Int, durationUs: Long?) { + invokeOnChunkReady(filepath, index, durationUs) + } + + override fun onInitSegmentReady(filepath: File) { + invokeOnInitReady(filepath) } override fun onCodeScanned(codes: List, scannerFrame: CodeScannerFrame) { diff --git a/package/android/src/main/java/com/mrousavy/camera/CameraViewManager.kt b/package/android/src/main/java/com/mrousavy/camera/CameraViewManager.kt index af48c52..a6ac35e 100644 --- a/package/android/src/main/java/com/mrousavy/camera/CameraViewManager.kt +++ b/package/android/src/main/java/com/mrousavy/camera/CameraViewManager.kt @@ -32,6 +32,7 @@ class CameraViewManager : ViewGroupManager() { .put("cameraError", MapBuilder.of("registrationName", "onError")) .put("cameraCodeScanned", MapBuilder.of("registrationName", "onCodeScanned")) .put("onVideoChunkReady", MapBuilder.of("registrationName", "onVideoChunkReady")) + .put("onInitReady", MapBuilder.of("registrationName", "onInitReady")) .build()?.toMutableMap() override fun getName(): String = TAG diff --git a/package/android/src/main/java/com/mrousavy/camera/core/CameraSession.kt b/package/android/src/main/java/com/mrousavy/camera/core/CameraSession.kt index 7c9924c..dae4521 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/CameraSession.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/CameraSession.kt @@ -428,19 +428,21 @@ class CameraSession(private val context: Context, private val cameraManager: Cam // Get actual device rotation from WindowManager since the React Native orientation hook // doesn't update when rotating between landscape-left and landscape-right on Android. - // Map device rotation to the correct orientationHint for video recording: - // - Counter-clockwise (ROTATION_90) → 270° hint - // - Clockwise (ROTATION_270) → 90° hint + // Map device rotation to the correct orientation for video recording. + // Surface.ROTATION_90 = device rotated 90° CCW = phone top on LEFT = LANDSCAPE_LEFT + // Surface.ROTATION_270 = device rotated 90° CW = phone top on RIGHT = LANDSCAPE_RIGHT val windowManager = context.getSystemService(Context.WINDOW_SERVICE) as WindowManager val deviceRotation = windowManager.defaultDisplay.rotation val recordingOrientation = when (deviceRotation) { Surface.ROTATION_0 -> Orientation.PORTRAIT - Surface.ROTATION_90 -> Orientation.LANDSCAPE_RIGHT + Surface.ROTATION_90 -> Orientation.LANDSCAPE_LEFT Surface.ROTATION_180 -> Orientation.PORTRAIT_UPSIDE_DOWN - Surface.ROTATION_270 -> Orientation.LANDSCAPE_LEFT + Surface.ROTATION_270 -> Orientation.LANDSCAPE_RIGHT else -> Orientation.PORTRAIT } + Log.i(TAG, "startRecording: orientation=${recordingOrientation.toDegrees()}° (deviceRotation=$deviceRotation)") + val recording = RecordingSession( context, cameraId, @@ -448,7 +450,7 @@ class CameraSession(private val context: Context, private val cameraManager: Cam enableAudio, fps, videoOutput.enableHdr, - orientation, + recordingOrientation, options, filePath, callback, @@ -513,7 +515,8 @@ class CameraSession(private val context: Context, private val cameraManager: Cam fun onInitialized() fun onStarted() fun onStopped() - fun onVideoChunkReady(filepath: File, index: Int) + fun onVideoChunkReady(filepath: File, index: Int, durationUs: Long?) + fun onInitSegmentReady(filepath: File) fun onCodeScanned(codes: List, scannerFrame: CodeScannerFrame) } } diff --git a/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorder.kt b/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorder.kt index 9444216..e88fbf6 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorder.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorder.kt @@ -14,7 +14,7 @@ import java.io.File import java.nio.ByteBuffer class ChunkedRecordingManager(private val encoder: MediaCodec, private val outputDirectory: File, private val orientationHint: Int, private val iFrameInterval: Int, private val callbacks: CameraSession.Callback) : - MediaCodec.Callback() { + MediaCodec.Callback(), ChunkedRecorderInterface { companion object { private const val TAG = "ChunkedRecorder" @@ -30,8 +30,15 @@ class ChunkedRecordingManager(private val encoder: MediaCodec, private val outpu iFrameInterval: Int = 5 ): ChunkedRecordingManager { val mimeType = options.videoCodec.toMimeType() - val cameraOrientationDegrees = cameraOrientation.toDegrees() - val recordingOrientationDegrees = (options.orientation ?: Orientation.PORTRAIT).toDegrees(); + // Use cameraOrientation (from WindowManager) for rotation metadata + // The options.orientation from JavaScript is unreliable on Android when rotating between landscape modes + // Note: MediaMuxer.setOrientationHint() uses opposite convention from HlsMuxer's rotation matrix + // We need to invert the rotation: 90 <-> 270, while 0 and 180 stay the same + val orientationDegrees = when (cameraOrientation.toDegrees()) { + 90 -> 270 + 270 -> 90 + else -> cameraOrientation.toDegrees() + } val (width, height) = if (cameraOrientation.isLandscape()) { size.height to size.width } else { @@ -55,12 +62,12 @@ class ChunkedRecordingManager(private val encoder: MediaCodec, private val outpu format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, iFrameInterval) format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate) - Log.d(TAG, "Video Format: $format, camera orientation $cameraOrientationDegrees, recordingOrientation: $recordingOrientationDegrees") + Log.d(TAG, "Video Format: $format, orientation: $orientationDegrees") // Create a MediaCodec encoder, and configure it with our format. Get a Surface // we can use for input and wrap it with a class that handles the EGL work. codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE) return ChunkedRecordingManager( - codec, outputDirectory, recordingOrientationDegrees, iFrameInterval, callbacks + codec, outputDirectory, orientationDegrees, iFrameInterval, callbacks ) } } @@ -73,7 +80,7 @@ class ChunkedRecordingManager(private val encoder: MediaCodec, private val outpu private val targetDurationUs = iFrameInterval * 1000000 - val surface: Surface = encoder.createInputSurface() + override val surface: Surface = encoder.createInputSurface() init { if (!this.outputDirectory.exists()) { @@ -91,11 +98,14 @@ class ChunkedRecordingManager(private val encoder: MediaCodec, private val outpu muxer.start() } + fun writeSample(buffer: java.nio.ByteBuffer, bufferInfo: BufferInfo) { + muxer.writeSampleData(videoTrack, buffer, bufferInfo) + } fun finish() { muxer.stop() muxer.release() - callbacks.onVideoChunkReady(filepath, chunkIndex) + callbacks.onVideoChunkReady(filepath, chunkIndex, null) } } @@ -133,12 +143,12 @@ class ChunkedRecordingManager(private val encoder: MediaCodec, private val outpu return bufferInfo.presentationTimeUs - context.startTimeUs } - fun start() { + override fun start() { encoder.start() recording = true } - fun finish() { + override fun finish() { synchronized(this) { muxerContext?.finish() recording = false @@ -168,7 +178,7 @@ class ChunkedRecordingManager(private val encoder: MediaCodec, private val outpu encoder.releaseOutputBuffer(index, false) return } - context.muxer.writeSampleData(context.videoTrack, encodedData, bufferInfo) + context.writeSample(encodedData, bufferInfo) encoder.releaseOutputBuffer(index, false) } } diff --git a/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorderInterface.kt b/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorderInterface.kt new file mode 100644 index 0000000..3464dad --- /dev/null +++ b/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorderInterface.kt @@ -0,0 +1,15 @@ +package com.mrousavy.camera.core + +import android.view.Surface + +/** + * Common interface for chunked video recorders. + * Implemented by both ChunkedRecordingManager (regular MP4) and + * FragmentedRecordingManager (HLS-compatible fMP4). + */ +interface ChunkedRecorderInterface { + val surface: Surface + + fun start() + fun finish() +} diff --git a/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt b/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt new file mode 100644 index 0000000..fc44441 --- /dev/null +++ b/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt @@ -0,0 +1,176 @@ +package com.mrousavy.camera.core + +import android.media.MediaCodec +import android.media.MediaCodec.BufferInfo +import android.media.MediaCodecInfo +import android.media.MediaFormat +import android.util.Log +import android.util.Size +import android.view.Surface +import com.mrousavy.camera.types.Orientation +import com.mrousavy.camera.types.RecordVideoOptions +import java.io.File + +/** + * A recording manager that produces HLS-compatible fragmented MP4 segments. + * + * Uses HlsMuxer (following Android's MediaMuxer pattern) to produce: + * - init.mp4: Initialization segment (ftyp + moov with mvex) + * - 0.mp4, 1.mp4, ...: Media segments (moof + mdat) + */ +class FragmentedRecordingManager( + private val encoder: MediaCodec, + private val muxer: HlsMuxer +) : MediaCodec.Callback(), ChunkedRecorderInterface { + + companion object { + private const val TAG = "FragmentedRecorder" + private const val DEFAULT_SEGMENT_DURATION_SECONDS = 6 + + fun fromParams( + callbacks: CameraSession.Callback, + size: Size, + enableAudio: Boolean, + fps: Int? = null, + cameraOrientation: Orientation, + bitRate: Int, + options: RecordVideoOptions, + outputDirectory: File, + segmentDurationSeconds: Int = DEFAULT_SEGMENT_DURATION_SECONDS + ): FragmentedRecordingManager { + val mimeType = options.videoCodec.toMimeType() + // Use cameraOrientation (from WindowManager) for rotation metadata + // The options.orientation from JavaScript is unreliable on Android when rotating between landscape modes + val orientationDegrees = cameraOrientation.toDegrees() + + // Swap dimensions based on camera orientation, same as ChunkedRecordingManager + val (width, height) = if (cameraOrientation.isLandscape()) { + size.height to size.width + } else { + size.width to size.height + } + + Log.d(TAG, "Recording: ${width}x${height}, orientation=$orientationDegrees°") + + val format = MediaFormat.createVideoFormat(mimeType, width, height) + val codec = MediaCodec.createEncoderByType(mimeType) + + format.setInteger( + MediaFormat.KEY_COLOR_FORMAT, + MediaCodecInfo.CodecCapabilities.COLOR_FormatSurface + ) + + // Use 30fps as conservative default since many Android devices can't sustain + // higher frame rates at high resolutions. This affects: + // - Encoder: bitrate allocation and I-frame interval calculation + // - HlsMuxer: timescale for accurate sample durations + // The actual frame timing comes from camera timestamps regardless of this setting. + val effectiveFps = 30 + format.setInteger(MediaFormat.KEY_FRAME_RATE, effectiveFps) + format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, segmentDurationSeconds) + format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate) + + codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE) + + val muxer = HlsMuxer( + outputDirectory = outputDirectory, + callback = object : HlsMuxer.Callback { + override fun onInitSegmentReady(file: File) { + callbacks.onInitSegmentReady(file) + } + + override fun onMediaSegmentReady(file: File, index: Int, durationUs: Long) { + callbacks.onVideoChunkReady(file, index, durationUs) + } + }, + orientationDegrees = orientationDegrees, + fps = effectiveFps + ) + muxer.setSegmentDuration(segmentDurationSeconds * 1_000_000L) + + return FragmentedRecordingManager(codec, muxer) + } + } + + private var recording = false + private var muxerStarted = false + private var trackIndex = -1 + + override val surface: Surface = encoder.createInputSurface() + + init { + encoder.setCallback(this) + } + + override fun start() { + encoder.start() + recording = true + } + + override fun finish() { + synchronized(this) { + recording = false + + if (muxerStarted) { + muxer.stop() + muxer.release() + } + + try { + encoder.stop() + encoder.release() + } catch (e: Exception) { + Log.e(TAG, "Error stopping encoder", e) + } + } + } + + // MediaCodec.Callback methods + + override fun onInputBufferAvailable(codec: MediaCodec, index: Int) { + // Not used for Surface input + } + + override fun onOutputBufferAvailable(codec: MediaCodec, index: Int, bufferInfo: BufferInfo) { + synchronized(this) { + if (!recording) { + encoder.releaseOutputBuffer(index, false) + return + } + + if (!muxerStarted) { + encoder.releaseOutputBuffer(index, false) + return + } + + val buffer = encoder.getOutputBuffer(index) + if (buffer == null) { + Log.e(TAG, "getOutputBuffer returned null") + encoder.releaseOutputBuffer(index, false) + return + } + + try { + muxer.writeSampleData(trackIndex, buffer, bufferInfo) + } catch (e: Exception) { + Log.e(TAG, "Error writing sample", e) + } + + encoder.releaseOutputBuffer(index, false) + } + } + + override fun onError(codec: MediaCodec, e: MediaCodec.CodecException) { + Log.e(TAG, "Codec error: ${e.message}") + } + + override fun onOutputFormatChanged(codec: MediaCodec, format: MediaFormat) { + synchronized(this) { + Log.i(TAG, "Output format changed: $format") + + trackIndex = muxer.addTrack(format) + muxer.start() + muxerStarted = true + } + } +} diff --git a/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt b/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt new file mode 100644 index 0000000..1e41589 --- /dev/null +++ b/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt @@ -0,0 +1,1004 @@ +package com.mrousavy.camera.core + +import android.media.MediaCodec +import android.media.MediaFormat +import android.util.Log +import java.io.ByteArrayOutputStream +import java.io.DataOutputStream +import java.io.File +import java.io.FileOutputStream +import java.nio.ByteBuffer + +/** + * A muxer for creating HLS-compatible fragmented MP4 output. + * + * Follows the same pattern as Android's MediaMuxer: + * 1. Create muxer with output directory + * 2. addTrack() with MediaFormat + * 3. start() - writes init.mp4 + * 4. writeSampleData() for each encoded sample + * 5. stop() - finalizes last segment + * 6. release() - cleanup + * + * Produces: + * - init.mp4: Initialization segment (ftyp + moov with mvex) + * - 0.mp4, 1.mp4, ...: Media segments (moof + mdat) + */ +class HlsMuxer( + private val outputDirectory: File, + private val callback: Callback, + private val orientationDegrees: Int = 0, + private val fps: Int = 30 +) { + companion object { + private const val TAG = "HlsMuxer" + private const val DEFAULT_SEGMENT_DURATION_US = 6_000_000L // 6 seconds + } + + interface Callback { + fun onInitSegmentReady(file: File) + fun onMediaSegmentReady(file: File, index: Int, durationUs: Long) + } + + // Configuration + private var targetSegmentDurationUs: Long = DEFAULT_SEGMENT_DURATION_US + private var timescale: Int = 30000 // Default, updated in addTrack() to fps * 1000 + + // State + private var state = State.UNINITIALIZED + private var trackFormat: MediaFormat? = null + private var sequenceNumber = 1 + private var segmentIndex = 0 + + // Current segment data + private val pendingSamples = mutableListOf() + private var segmentStartTimeUs = -1L + private var lastPresentationTimeUs = 0L + + // Timestamp normalization - MediaCodec timestamps are device uptime, not starting from 0 + private var firstPresentationTimeUs = -1L + + // Actual fps detection from frame timestamps + private var detectedFps: Int? = null + private var fpsDetectionSamples = mutableListOf() + private val FPS_DETECTION_SAMPLE_COUNT = 30 + + private enum class State { + UNINITIALIZED, + INITIALIZED, + STARTED, + STOPPED, + RELEASED + } + + private data class Sample( + val data: ByteArray, + val presentationTimeUs: Long, + var durationUs: Long, + val isKeyFrame: Boolean + ) + + // ==================== Timestamp Normalization ==================== + + /** + * Normalizes a presentation timestamp to start from 0. + * The first timestamp received becomes time 0, and all subsequent + * timestamps are relative to that. + * + * This is necessary because MediaCodec timestamps are based on device uptime, + * not starting from 0. HLS players expect timestamps to start at or near 0. + */ + private fun normalizeTimestamp(rawPresentationTimeUs: Long): Long { + if (firstPresentationTimeUs < 0) { + firstPresentationTimeUs = rawPresentationTimeUs + Log.d(TAG, "First timestamp captured: ${rawPresentationTimeUs}us (${rawPresentationTimeUs / 1_000_000.0}s), normalizing to 0") + } + val normalized = rawPresentationTimeUs - firstPresentationTimeUs + // Log first few normalizations to debug + if (normalized < 1_000_000) { // First second + Log.d(TAG, "Timestamp: raw=${rawPresentationTimeUs}us -> normalized=${normalized}us") + } + return normalized + } + + // ==================== Annex-B to AVCC Conversion ==================== + + /** + * Converts H.264 data from Annex-B format to AVCC format. + * + * Annex-B uses start codes (00 00 00 01 or 00 00 01) to delimit NAL units. + * AVCC uses 4-byte big-endian length prefixes before each NAL unit. + * + * This conversion is required because: + * - MediaCodec outputs Annex-B format + * - fMP4/HLS requires AVCC format (as specified in avcC box with NAL length size = 4) + */ + private fun convertAnnexBToAvcc(annexBData: ByteArray): ByteArray { + val nalUnits = parseAnnexBNalUnits(annexBData) + if (nalUnits.isEmpty()) { + Log.w(TAG, "No NAL units found in sample, returning original data") + return annexBData + } + + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + for (nalUnit in nalUnits) { + // Write 4-byte big-endian length prefix + dos.writeInt(nalUnit.size) + // Write NAL unit data (without start code) + dos.write(nalUnit) + } + + return output.toByteArray() + } + + /** + * Parses Annex-B formatted data into individual NAL units. + * Returns list of NAL unit byte arrays (without start codes). + */ + private fun parseAnnexBNalUnits(data: ByteArray): List { + val nalUnits = mutableListOf() + var i = 0 + + while (i < data.size) { + // Find start code + val startCodeLength = findStartCode(data, i) + if (startCodeLength == 0) { + // No start code found at current position + // This might happen if data doesn't start with a start code + if (nalUnits.isEmpty() && i == 0) { + // Data might already be in AVCC format or malformed + // Try to detect AVCC format (first 4 bytes would be a reasonable length) + if (data.size >= 4) { + val possibleLength = ((data[0].toInt() and 0xFF) shl 24) or + ((data[1].toInt() and 0xFF) shl 16) or + ((data[2].toInt() and 0xFF) shl 8) or + (data[3].toInt() and 0xFF) + if (possibleLength > 0 && possibleLength <= data.size - 4) { + // Looks like AVCC format already, return original + Log.d(TAG, "Data appears to already be in AVCC format") + return emptyList() + } + } + } + i++ + continue + } + + val nalStart = i + startCodeLength + + // Find end of this NAL unit (start of next, or end of data) + var nalEnd = data.size + var j = nalStart + while (j < data.size - 2) { + val nextStartCode = findStartCode(data, j) + if (nextStartCode > 0) { + nalEnd = j + break + } + j++ + } + + if (nalEnd > nalStart) { + nalUnits.add(data.copyOfRange(nalStart, nalEnd)) + } + + i = nalEnd + } + + return nalUnits + } + + /** + * Checks for Annex-B start code at given position. + * Returns start code length (3 or 4) or 0 if no start code found. + */ + private fun findStartCode(data: ByteArray, offset: Int): Int { + if (offset + 4 <= data.size && + data[offset] == 0.toByte() && + data[offset + 1] == 0.toByte() && + data[offset + 2] == 0.toByte() && + data[offset + 3] == 1.toByte()) { + return 4 // 4-byte start code: 00 00 00 01 + } + if (offset + 3 <= data.size && + data[offset] == 0.toByte() && + data[offset + 1] == 0.toByte() && + data[offset + 2] == 1.toByte()) { + return 3 // 3-byte start code: 00 00 01 + } + return 0 + } + + /** + * Sets the target segment duration. + * Must be called before start(). + */ + fun setSegmentDuration(durationUs: Long) { + check(state == State.UNINITIALIZED || state == State.INITIALIZED) { + "Cannot set segment duration after start()" + } + targetSegmentDurationUs = durationUs + } + + /** + * Adds a track to the muxer. + * + * @param format The MediaFormat describing the track + * @return Track index (always 0 for now, single video track) + */ + fun addTrack(format: MediaFormat): Int { + check(state == State.UNINITIALIZED) { "addTrack() must be called before start()" } + + trackFormat = format + + // Use fps * 1000 as timescale for good precision (1000 timescale units per frame). + // This ensures accurate sample durations without integer truncation issues. + // Note: ffprobe may report r_frame_rate based on timescale, so the backend + // should use the explicit framesPerSecond from the API mutation, not ffprobe. + timescale = fps * 1000 + + state = State.INITIALIZED + + val formatWidth = try { format.getInteger(MediaFormat.KEY_WIDTH) } catch (e: Exception) { -1 } + val formatHeight = try { format.getInteger(MediaFormat.KEY_HEIGHT) } catch (e: Exception) { -1 } + Log.d(TAG, "Added track: ${format.getString(MediaFormat.KEY_MIME)}, " + + "encoder output: ${formatWidth}x${formatHeight}, " + + "fps=$fps, timescale=$timescale, orientation=$orientationDegrees°") + + return 0 // Single track, index 0 + } + + /** + * Starts the muxer, writing the initialization segment. + */ + fun start() { + check(state == State.INITIALIZED) { "Must call addTrack() before start()" } + val format = trackFormat ?: throw IllegalStateException("No track format") + + // Create output directory if needed, with proper error handling + if (!outputDirectory.exists()) { + val created = outputDirectory.mkdirs() + if (!created && !outputDirectory.exists()) { + throw IllegalStateException( + "Failed to create output directory: ${outputDirectory.absolutePath}. " + + "Parent exists: ${outputDirectory.parentFile?.exists()}, " + + "Parent path: ${outputDirectory.parentFile?.absolutePath}" + ) + } + Log.d(TAG, "Created output directory: ${outputDirectory.absolutePath}") + } + + // Write init segment + val initBytes = buildInitSegment(format) + val initFile = File(outputDirectory, "init.mp4") + FileOutputStream(initFile).use { it.write(initBytes) } + + // Log frame rate metadata for debugging + val defaultSampleDuration = timescale / fps + Log.d(TAG, "Created init segment: ${initFile.absolutePath} (${initBytes.size} bytes)") + Log.d(TAG, "Frame rate metadata: timescale=$timescale, fps=$fps, " + + "default_sample_duration=$defaultSampleDuration (ffprobe should calculate ${timescale}/${defaultSampleDuration}=${fps}fps)") + + callback.onInitSegmentReady(initFile) + + state = State.STARTED + } + + /** + * Writes sample data to the muxer. + * + * @param trackIndex Track index (must be 0) + * @param buffer The encoded sample data + * @param bufferInfo Sample metadata (size, presentation time, flags) + */ + fun writeSampleData(trackIndex: Int, buffer: ByteBuffer, bufferInfo: MediaCodec.BufferInfo) { + check(state == State.STARTED) { "Must call start() before writeSampleData()" } + check(trackIndex == 0) { "Invalid track index: $trackIndex" } + + // Skip codec config data (already in init segment) + if ((bufferInfo.flags and MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0) { + return + } + + val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0 + // Normalize timestamp to start from 0 (MediaCodec uses device uptime) + val presentationTimeUs = normalizeTimestamp(bufferInfo.presentationTimeUs) + + // Detect actual fps from first N samples + if (detectedFps == null) { + fpsDetectionSamples.add(presentationTimeUs) + if (fpsDetectionSamples.size >= FPS_DETECTION_SAMPLE_COUNT) { + val elapsed = fpsDetectionSamples.last() - fpsDetectionSamples.first() + if (elapsed > 0) { + val actualFps = ((FPS_DETECTION_SAMPLE_COUNT - 1) * 1_000_000.0 / elapsed).toInt() + detectedFps = actualFps + if (kotlin.math.abs(actualFps - fps) > 5) { + Log.w(TAG, "Actual fps ($actualFps) differs significantly from configured fps ($fps)! " + + "This may cause processing issues if backend uses configured fps.") + } else { + Log.d(TAG, "Detected actual fps: $actualFps (configured: $fps)") + } + } + fpsDetectionSamples.clear() // Free memory + } + } + + // Initialize segment start time + if (segmentStartTimeUs < 0) { + segmentStartTimeUs = presentationTimeUs + } + + // Update duration of previous sample BEFORE finalization check + // This ensures the last sample has correct duration when segment is finalized + if (pendingSamples.isNotEmpty()) { + val lastSample = pendingSamples.last() + lastSample.durationUs = presentationTimeUs - lastSample.presentationTimeUs + } + + // Check if we should finalize current segment (at keyframe boundaries) + if (isKeyFrame && pendingSamples.isNotEmpty()) { + val segmentDurationUs = presentationTimeUs - segmentStartTimeUs + if (segmentDurationUs >= targetSegmentDurationUs) { + finalizeCurrentSegment() + segmentStartTimeUs = presentationTimeUs + } + } + + // Copy buffer data and convert from Annex-B to AVCC format + val rawData = ByteArray(bufferInfo.size) + buffer.position(bufferInfo.offset) + buffer.limit(bufferInfo.offset + bufferInfo.size) + buffer.get(rawData) + + // Convert Annex-B (start codes) to AVCC (length prefixes) + val data = convertAnnexBToAvcc(rawData) + + // Estimate duration (will be corrected by next sample) + val estimatedDurationUs = if (lastPresentationTimeUs > 0) { + presentationTimeUs - lastPresentationTimeUs + } else { + 1_000_000L / 30 // Assume 30fps + } + + pendingSamples.add(Sample( + data = data, + presentationTimeUs = presentationTimeUs, + durationUs = estimatedDurationUs, + isKeyFrame = isKeyFrame + )) + + lastPresentationTimeUs = presentationTimeUs + } + + /** + * Stops the muxer, finalizing any pending segment. + */ + fun stop() { + check(state == State.STARTED) { "Muxer not started" } + + if (pendingSamples.isNotEmpty()) { + finalizeCurrentSegment() + } + + state = State.STOPPED + Log.d(TAG, "Muxer stopped, wrote $segmentIndex segments") + } + + /** + * Releases resources. + */ + fun release() { + if (state == State.STARTED) { + stop() + } + pendingSamples.clear() + state = State.RELEASED + } + + /** + * Finalizes the current segment and writes it to disk. + */ + private fun finalizeCurrentSegment() { + if (pendingSamples.isEmpty()) return + + try { + val baseDecodeTimeUs = pendingSamples.first().presentationTimeUs + val fragmentBytes = buildMediaSegment(pendingSamples, sequenceNumber, baseDecodeTimeUs) + + val segmentFile = File(outputDirectory, "$segmentIndex.mp4") + FileOutputStream(segmentFile).use { it.write(fragmentBytes) } + + // Calculate duration + val firstPts = pendingSamples.first().presentationTimeUs + val lastSample = pendingSamples.last() + val durationUs = (lastSample.presentationTimeUs - firstPts) + lastSample.durationUs + + Log.d(TAG, "Created segment $segmentIndex: samples=${pendingSamples.size}, " + + "baseDecodeTime=${baseDecodeTimeUs}us (${baseDecodeTimeUs / 1_000_000.0}s), " + + "duration=${durationUs / 1000}ms, size=${fragmentBytes.size} bytes") + + callback.onMediaSegmentReady(segmentFile, segmentIndex, durationUs) + + segmentIndex++ + sequenceNumber++ + pendingSamples.clear() + + } catch (e: Exception) { + Log.e(TAG, "Error finalizing segment $segmentIndex", e) + } + } + + // ==================== Init Segment Building ==================== + + /** + * Builds the initialization segment (ftyp + moov). + */ + private fun buildInitSegment(format: MediaFormat): ByteArray { + val width = format.getInteger(MediaFormat.KEY_WIDTH) + val height = format.getInteger(MediaFormat.KEY_HEIGHT) + + val sps = format.getByteBuffer("csd-0")?.let { extractNalUnit(it) } + ?: throw IllegalArgumentException("Missing SPS (csd-0)") + val pps = format.getByteBuffer("csd-1")?.let { extractNalUnit(it) } + ?: throw IllegalArgumentException("Missing PPS (csd-1)") + + val output = ByteArrayOutputStream() + + // ftyp + output.write(buildFtypBox()) + + // moov + output.write(buildMoovBox(width, height, sps, pps)) + + return output.toByteArray() + } + + private fun extractNalUnit(buffer: ByteBuffer): ByteArray { + val data = ByteArray(buffer.remaining()) + buffer.duplicate().get(data) + + // Strip start code prefix (0x00000001 or 0x000001) + return when { + data.size >= 4 && data[0] == 0.toByte() && data[1] == 0.toByte() && + data[2] == 0.toByte() && data[3] == 1.toByte() -> data.copyOfRange(4, data.size) + data.size >= 3 && data[0] == 0.toByte() && data[1] == 0.toByte() && + data[2] == 1.toByte() -> data.copyOfRange(3, data.size) + else -> data + } + } + + private fun buildFtypBox(): ByteArray { + val brands = listOf("isom", "iso5", "iso6", "avc1", "mp41", "dash") + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + val size = 8 + 4 + 4 + (brands.size * 4) + dos.writeInt(size) + dos.writeBytes("ftyp") + dos.writeBytes("isom") // major brand + dos.writeInt(0x200) // minor version + brands.forEach { dos.writeBytes(it) } + + return output.toByteArray() + } + + private fun buildMoovBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val content = ByteArrayOutputStream() + + content.write(buildMvhdBox()) + content.write(buildTrakBox(width, height, sps, pps)) + content.write(buildMvexBox()) + + return wrapBox("moov", content.toByteArray()) + } + + private fun buildMvhdBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(0) // creation time + dos.writeInt(0) // modification time + dos.writeInt(timescale) // timescale + dos.writeInt(0) // duration + dos.writeInt(0x00010000) // rate = 1.0 + dos.writeShort(0x0100) // volume = 1.0 + dos.writeShort(0) // reserved + dos.writeInt(0) // reserved + dos.writeInt(0) // reserved + + // Unity matrix + dos.writeInt(0x00010000); dos.writeInt(0); dos.writeInt(0) + dos.writeInt(0); dos.writeInt(0x00010000); dos.writeInt(0) + dos.writeInt(0); dos.writeInt(0); dos.writeInt(0x40000000) + + repeat(6) { dos.writeInt(0) } // pre-defined + dos.writeInt(2) // next track ID + + return wrapBox("mvhd", output.toByteArray()) + } + + private fun buildTrakBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val content = ByteArrayOutputStream() + content.write(buildTkhdBox(width, height)) + content.write(buildMdiaBox(width, height, sps, pps)) + return wrapBox("trak", content.toByteArray()) + } + + private fun buildTkhdBox(width: Int, height: Int): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0x00000007) // version 0, flags (enabled, in movie, in preview) + dos.writeInt(0) // creation time + dos.writeInt(0) // modification time + dos.writeInt(1) // track ID + dos.writeInt(0) // reserved + dos.writeInt(0) // duration + dos.writeInt(0) // reserved + dos.writeInt(0) // reserved + dos.writeShort(0) // layer + dos.writeShort(0) // alternate group + dos.writeShort(0) // volume (0 for video) + dos.writeShort(0) // reserved + + // Rotation matrix based on orientationDegrees + writeRotationMatrix(dos, width, height) + + // For 90° and 270° rotations, the display dimensions are swapped + // The tkhd width/height represent the final display size after rotation + val (displayWidth, displayHeight) = when (orientationDegrees) { + 90, 270 -> Pair(height, width) + else -> Pair(width, height) + } + dos.writeInt(displayWidth shl 16) // width (16.16 fixed point) + dos.writeInt(displayHeight shl 16) // height (16.16 fixed point) + + Log.d(TAG, "tkhd: encoder=${width}x${height}, display=${displayWidth}x${displayHeight}, rotation=$orientationDegrees") + + return wrapBox("tkhd", output.toByteArray()) + } + + /** + * Writes the 3x3 transformation matrix for video rotation. + * The matrix is applied to rotate the video content for correct display. + * + * Matrix format in tkhd box (all values in fixed-point): + * | a b u | where a,b,c,d are 16.16 fixed-point + * | c d v | and u,v are 2.30 fixed-point (always 0) + * | x y w | x,y are 16.16, w is 2.30 (always 1.0) + * + * For rotation by θ: a=cos(θ), b=sin(θ), c=-sin(θ), d=cos(θ) + * Translation (x,y) keeps the rotated video in the visible area. + */ + private fun writeRotationMatrix(dos: DataOutputStream, width: Int, height: Int) { + // Fixed-point constants + val one = 0x00010000 // 1.0 in 16.16 + val negOne = -0x00010000 // -1.0 in 16.16 (will be written as unsigned) + val w = 0x40000000 // 1.0 in 2.30 + + when (orientationDegrees) { + 90 -> { + // 90° rotation: x' = y, y' = -x + width + dos.writeInt(0) // a = 0 + dos.writeInt(negOne) // b = -1 + dos.writeInt(0) // u = 0 + dos.writeInt(one) // c = 1 + dos.writeInt(0) // d = 0 + dos.writeInt(0) // v = 0 + dos.writeInt(0) // x = 0 + dos.writeInt(width shl 16) // y = width (translation) + dos.writeInt(w) // w = 1 + } + 180 -> { + // 180° rotation + dos.writeInt(negOne) // a = -1 + dos.writeInt(0) // b = 0 + dos.writeInt(0) // u = 0 + dos.writeInt(0) // c = 0 + dos.writeInt(negOne) // d = -1 + dos.writeInt(0) // v = 0 + dos.writeInt(width shl 16) // x = width (translation) + dos.writeInt(height shl 16) // y = height (translation) + dos.writeInt(w) // w = 1 + } + 270 -> { + // 270° rotation: x' = -y + height, y' = x + dos.writeInt(0) // a = 0 + dos.writeInt(one) // b = 1 + dos.writeInt(0) // u = 0 + dos.writeInt(negOne) // c = -1 + dos.writeInt(0) // d = 0 + dos.writeInt(0) // v = 0 + dos.writeInt(height shl 16) // x = height (translation) + dos.writeInt(0) // y = 0 + dos.writeInt(w) // w = 1 + } + else -> { + // 0° or unknown: identity matrix + dos.writeInt(one) // a = 1 + dos.writeInt(0) // b = 0 + dos.writeInt(0) // u = 0 + dos.writeInt(0) // c = 0 + dos.writeInt(one) // d = 1 + dos.writeInt(0) // v = 0 + dos.writeInt(0) // x = 0 + dos.writeInt(0) // y = 0 + dos.writeInt(w) // w = 1 + } + } + } + + private fun buildMdiaBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val content = ByteArrayOutputStream() + content.write(buildMdhdBox()) + content.write(buildHdlrBox()) + content.write(buildMinfBox(width, height, sps, pps)) + return wrapBox("mdia", content.toByteArray()) + } + + private fun buildMdhdBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(0) // creation time + dos.writeInt(0) // modification time + dos.writeInt(timescale) // timescale + dos.writeInt(0) // duration + dos.writeShort(0x55C4) // language: "und" + dos.writeShort(0) // pre-defined + + return wrapBox("mdhd", output.toByteArray()) + } + + private fun buildHdlrBox(): ByteArray { + val name = "VideoHandler" + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(0) // pre-defined + dos.writeBytes("vide") // handler type + dos.writeInt(0) // reserved + dos.writeInt(0) // reserved + dos.writeInt(0) // reserved + dos.writeBytes(name) + dos.writeByte(0) // null terminator + + return wrapBox("hdlr", output.toByteArray()) + } + + private fun buildMinfBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val content = ByteArrayOutputStream() + content.write(buildVmhdBox()) + content.write(buildDinfBox()) + content.write(buildStblBox(width, height, sps, pps)) + return wrapBox("minf", content.toByteArray()) + } + + private fun buildVmhdBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(1) // version 0, flags = 1 + dos.writeShort(0) // graphics mode + dos.writeShort(0) // opcolor[0] + dos.writeShort(0) // opcolor[1] + dos.writeShort(0) // opcolor[2] + + return wrapBox("vmhd", output.toByteArray()) + } + + private fun buildDinfBox(): ByteArray { + val dref = buildDrefBox() + return wrapBox("dinf", dref) + } + + private fun buildDrefBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(1) // entry count + + // url box (self-contained) + dos.writeInt(12) + dos.writeBytes("url ") + dos.writeInt(1) // flags: self-contained + + return wrapBox("dref", output.toByteArray()) + } + + private fun buildStblBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val content = ByteArrayOutputStream() + content.write(buildStsdBox(width, height, sps, pps)) + content.write(buildSttsBox()) // Contains default timing for ffprobe frame rate detection + content.write(buildEmptyStscBox()) + content.write(buildEmptyStszBox()) + content.write(buildEmptyStcoBox()) + return wrapBox("stbl", content.toByteArray()) + } + + private fun buildStsdBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(1) // entry count + output.write(buildAvc1Box(width, height, sps, pps)) + + return wrapBox("stsd", output.toByteArray()) + } + + private fun buildAvc1Box(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + repeat(6) { dos.writeByte(0) } // reserved + dos.writeShort(1) // data reference index + dos.writeShort(0) // pre-defined + dos.writeShort(0) // reserved + repeat(3) { dos.writeInt(0) } // pre-defined + dos.writeShort(width) // width + dos.writeShort(height) // height + dos.writeInt(0x00480000) // horiz resolution (72 dpi) + dos.writeInt(0x00480000) // vert resolution (72 dpi) + dos.writeInt(0) // reserved + dos.writeShort(1) // frame count + repeat(32) { dos.writeByte(0) } // compressor name + dos.writeShort(0x0018) // depth (24 bit) + dos.writeShort(-1) // pre-defined + + output.write(buildAvcCBox(sps, pps)) + + return wrapBox("avc1", output.toByteArray()) + } + + private fun buildAvcCBox(sps: ByteArray, pps: ByteArray): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + val profileIdc = if (sps.isNotEmpty()) sps[0].toInt() and 0xFF else 0x42 + val profileCompat = if (sps.size > 1) sps[1].toInt() and 0xFF else 0x00 + val levelIdc = if (sps.size > 2) sps[2].toInt() and 0xFF else 0x1F + + dos.writeByte(1) // configuration version + dos.writeByte(profileIdc) // AVC profile + dos.writeByte(profileCompat)// profile compatibility + dos.writeByte(levelIdc) // AVC level + dos.writeByte(0xFF) // 6 bits reserved + 2 bits NAL length - 1 + + dos.writeByte(0xE1) // 3 bits reserved + 5 bits SPS count + dos.writeShort(sps.size) // SPS length + dos.write(sps) // SPS data + + dos.writeByte(1) // PPS count + dos.writeShort(pps.size) // PPS length + dos.write(pps) // PPS data + + return wrapBox("avcC", output.toByteArray()) + } + + private fun buildSttsBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + // For fragmented MP4, stts is normally empty as timing is in trun boxes. + // However, ffprobe uses stts to calculate r_frame_rate when present. + // We add a single entry with the default sample duration so ffprobe + // can derive: r_frame_rate = timescale / sample_delta = 30000/1000 = 30 + val defaultSampleDuration = timescale / fps + + dos.writeInt(0) // version & flags + dos.writeInt(1) // entry count (1 entry for default timing) + dos.writeInt(1) // sample_count (indicates this is the default duration) + dos.writeInt(defaultSampleDuration) // sample_delta in timescale units + + return wrapBox("stts", output.toByteArray()) + } + + private fun buildEmptyStscBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + dos.writeInt(0) // version & flags + dos.writeInt(0) // entry count + return wrapBox("stsc", output.toByteArray()) + } + + private fun buildEmptyStszBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + dos.writeInt(0) // version & flags + dos.writeInt(0) // sample size (0 = variable) + dos.writeInt(0) // sample count + return wrapBox("stsz", output.toByteArray()) + } + + private fun buildEmptyStcoBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + dos.writeInt(0) // version & flags + dos.writeInt(0) // entry count + return wrapBox("stco", output.toByteArray()) + } + + private fun buildMvexBox(): ByteArray { + return wrapBox("mvex", buildTrexBox()) + } + + private fun buildTrexBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + // Calculate default sample duration so ffprobe can derive correct fps + // fps = timescale / default_sample_duration + // At 30fps with timescale=30000: duration=1000, ffprobe calculates 30000/1000=30 + val defaultSampleDuration = timescale / fps + + dos.writeInt(0) // version & flags + dos.writeInt(1) // track ID + dos.writeInt(1) // default sample description index + dos.writeInt(defaultSampleDuration) // default sample duration + dos.writeInt(0) // default sample size + dos.writeInt(0) // default sample flags + + return wrapBox("trex", output.toByteArray()) + } + + // ==================== Media Segment Building ==================== + + /** + * Builds a media segment (moof + mdat). + */ + private fun buildMediaSegment( + samples: List, + sequenceNumber: Int, + baseDecodeTimeUs: Long + ): ByteArray { + val output = ByteArrayOutputStream() + + // Build mdat content first to know sizes + val mdatContent = ByteArrayOutputStream() + for (sample in samples) { + mdatContent.write(sample.data) + } + val mdatPayload = mdatContent.toByteArray() + + // Build moof + val moofBox = buildMoofBox(samples, sequenceNumber, baseDecodeTimeUs, mdatPayload.size) + output.write(moofBox) + + // Build mdat + output.write(wrapBox("mdat", mdatPayload)) + + return output.toByteArray() + } + + private fun buildMoofBox( + samples: List, + sequenceNumber: Int, + baseDecodeTimeUs: Long, + mdatPayloadSize: Int + ): ByteArray { + // Calculate sizes to determine data offset + val mfhdBox = buildMfhdBox(sequenceNumber) + // tfhd: 8 header + 4 version/flags + 4 track_id + 4 duration + 4 size + 4 flags = 28 bytes + val tfhdSize = 8 + 20 + val tfdtSize = 8 + 12 // box header + version 1 content + // trun: 8 header + 12 fixed + per-sample (size + flags only, no duration) + val trunSize = 8 + 12 + (samples.size * 8) + val trafSize = 8 + tfhdSize + tfdtSize + trunSize + val moofSize = 8 + mfhdBox.size + trafSize + + val dataOffset = moofSize + 8 // moof size + mdat header + + val content = ByteArrayOutputStream() + content.write(mfhdBox) + content.write(buildTrafBox(samples, baseDecodeTimeUs, dataOffset)) + + return wrapBox("moof", content.toByteArray()) + } + + private fun buildMfhdBox(sequenceNumber: Int): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(sequenceNumber) + + return wrapBox("mfhd", output.toByteArray()) + } + + private fun buildTrafBox(samples: List, baseDecodeTimeUs: Long, dataOffset: Int): ByteArray { + val content = ByteArrayOutputStream() + content.write(buildTfhdBox()) + content.write(buildTfdtBox(baseDecodeTimeUs)) + content.write(buildTrunBox(samples, dataOffset)) + return wrapBox("traf", content.toByteArray()) + } + + private fun buildTfhdBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + // Calculate default sample duration for this fragment + // This helps ffprobe calculate correct frame rate when reading via HLS + val defaultSampleDuration = timescale / fps // e.g., 30000/30 = 1000 + + // Match iOS AVFoundation's tfhd structure (28 bytes total) + // Flags: default-base-is-moof (0x020000) + default-sample-duration (0x000008) + // + default-sample-size (0x000010) + default-sample-flags (0x000020) + val flags = 0x00020000 or 0x000008 or 0x000010 or 0x000020 + dos.writeInt(flags) + dos.writeInt(1) // track ID + dos.writeInt(defaultSampleDuration) // default sample duration in timescale units + dos.writeInt(0) // default sample size (0 = variable, specified in trun) + dos.writeInt(0x01010000) // default sample flags (non-keyframe, depends on others) + + Log.d(TAG, "tfhd: default_sample_duration=$defaultSampleDuration (timescale=$timescale, fps=$fps)") + + return wrapBox("tfhd", output.toByteArray()) + } + + private fun buildTfdtBox(baseDecodeTimeUs: Long): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + // Convert to timescale units + val baseMediaDecodeTime = (baseDecodeTimeUs * timescale) / 1_000_000 + + // Version 1 for 64-bit time + dos.writeInt(0x01000000) + dos.writeLong(baseMediaDecodeTime) + + return wrapBox("tfdt", output.toByteArray()) + } + + private fun buildTrunBox(samples: List, dataOffset: Int): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + // Flags: data-offset + sample-size + sample-flags + // NOTE: We intentionally OMIT sample-duration (0x000100) so ffprobe uses + // the default_sample_duration from tfhd instead of per-sample durations. + // This ensures consistent frame rate calculation via HLS. + val flags = 0x000001 or 0x000200 or 0x000400 + dos.writeInt(flags) + dos.writeInt(samples.size) + dos.writeInt(dataOffset) + + for (sample in samples) { + // No duration - using default from tfhd + dos.writeInt(sample.data.size) + dos.writeInt(buildSampleFlags(sample.isKeyFrame)) + } + + return wrapBox("trun", output.toByteArray()) + } + + private fun buildSampleFlags(isKeyFrame: Boolean): Int { + return if (isKeyFrame) { + // sample_depends_on=2 (no dependencies), not a difference sample + 0x02000000 + } else { + // sample_depends_on=1 (depends on others), is a difference sample + 0x01010000 + } + } + + // ==================== Utilities ==================== + + private fun wrapBox(type: String, content: ByteArray): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(8 + content.size) + dos.writeBytes(type) + dos.write(content) + + return output.toByteArray() + } +} diff --git a/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt b/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt index 719709d..4784904 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt @@ -8,12 +8,14 @@ import com.facebook.common.statfs.StatFsHelper import com.mrousavy.camera.extensions.getRecommendedBitRate import com.mrousavy.camera.types.Orientation import com.mrousavy.camera.types.RecordVideoOptions +import com.mrousavy.camera.types.StreamSegmentType import com.mrousavy.camera.utils.FileUtils import java.io.File import android.os.Environment import java.text.SimpleDateFormat import java.util.Locale import java.util.Date + class RecordingSession( context: Context, val cameraId: String, @@ -26,7 +28,7 @@ class RecordingSession( private val filePath: String, private val callback: (video: Video) -> Unit, private val onError: (error: CameraError) -> Unit, - private val allCallbacks: CameraSession.Callback, + private val allCallbacks: CameraSession.Callback ) { companion object { private const val TAG = "RecordingSession" @@ -34,6 +36,9 @@ class RecordingSession( private const val AUDIO_SAMPLING_RATE = 44_100 private const val AUDIO_BIT_RATE = 16 * AUDIO_SAMPLING_RATE private const val AUDIO_CHANNELS = 1 + + // Segment duration in seconds (matching iOS default of 6 seconds) + private const val SEGMENT_DURATION_SECONDS = 6 } data class Video(val path: String, val durationMs: Long, val size: Size) @@ -42,16 +47,33 @@ class RecordingSession( private val outputPath: File = File(filePath.removePrefix("file://")) private val bitRate = getBitRate() - private val recorder = ChunkedRecordingManager.fromParams( - allCallbacks, - size, - enableAudio, - fps, - cameraOrientation, - bitRate, - options, - outputPath - ) + + // Use FragmentedRecordingManager for HLS-compatible fMP4 output, + // or fall back to ChunkedRecordingManager for regular MP4 chunks + private val recorder: ChunkedRecorderInterface = if (options.streamSegmentType == StreamSegmentType.FRAGMENTED_MP4) { + FragmentedRecordingManager.fromParams( + allCallbacks, + size, + enableAudio, + fps, + cameraOrientation, + bitRate, + options, + outputPath, + SEGMENT_DURATION_SECONDS + ) + } else { + ChunkedRecordingManager.fromParams( + allCallbacks, + size, + enableAudio, + fps, + cameraOrientation, + bitRate, + options, + outputPath + ) + } private var startTime: Long? = null val surface: Surface get() { @@ -60,7 +82,7 @@ class RecordingSession( fun start() { synchronized(this) { - Log.i(TAG, "Starting RecordingSession..") + Log.i(TAG, "Starting RecordingSession with ${options.streamSegmentType} recorder..") startTime = System.currentTimeMillis() recorder.start() } diff --git a/package/android/src/main/java/com/mrousavy/camera/types/RecordVideoOptions.kt b/package/android/src/main/java/com/mrousavy/camera/types/RecordVideoOptions.kt index 78bfdd2..921f223 100644 --- a/package/android/src/main/java/com/mrousavy/camera/types/RecordVideoOptions.kt +++ b/package/android/src/main/java/com/mrousavy/camera/types/RecordVideoOptions.kt @@ -9,6 +9,7 @@ class RecordVideoOptions(map: ReadableMap) { var videoBitRateOverride: Double? = null var videoBitRateMultiplier: Double? = null var orientation: Orientation? = null + var streamSegmentType: StreamSegmentType = StreamSegmentType.FRAGMENTED_MP4 init { if (map.hasKey("fileType")) { @@ -29,5 +30,8 @@ class RecordVideoOptions(map: ReadableMap) { if (map.hasKey("orientation")) { orientation = Orientation.fromUnionValue(map.getString("orientation")) } + if (map.hasKey("streamSegmentType")) { + streamSegmentType = StreamSegmentType.fromUnionValue(map.getString("streamSegmentType")) + } } } diff --git a/package/android/src/main/java/com/mrousavy/camera/types/StreamSegmentType.kt b/package/android/src/main/java/com/mrousavy/camera/types/StreamSegmentType.kt new file mode 100644 index 0000000..a67dccf --- /dev/null +++ b/package/android/src/main/java/com/mrousavy/camera/types/StreamSegmentType.kt @@ -0,0 +1,15 @@ +package com.mrousavy.camera.types + +enum class StreamSegmentType(override val unionValue: String) : JSUnionValue { + FRAGMENTED_MP4("FRAGMENTED_MP4"), + RB_CHUNKED_MP4("RB_CHUNKED_MP4"); + + companion object : JSUnionValue.Companion { + override fun fromUnionValue(unionValue: String?): StreamSegmentType = + when (unionValue) { + "FRAGMENTED_MP4" -> FRAGMENTED_MP4 + "RB_CHUNKED_MP4" -> RB_CHUNKED_MP4 + else -> FRAGMENTED_MP4 // Default to fMP4 + } + } +} diff --git a/package/src/VideoFile.ts b/package/src/VideoFile.ts index 65d3722..6266106 100644 --- a/package/src/VideoFile.ts +++ b/package/src/VideoFile.ts @@ -41,6 +41,17 @@ export interface RecordVideoOptions { * @default 'normal' */ videoBitRate?: 'extra-low' | 'low' | 'normal' | 'high' | 'extra-high' | number + /** + * The stream segment type for recording on Android. + * - `FRAGMENTED_MP4`: HLS-compatible segments (init.mp4 + numbered segments) + * - `RB_CHUNKED_MP4`: Legacy chunked MP4 format + * + * iOS always uses FRAGMENTED_MP4 regardless of this setting. + * + * @platform android + * @default 'FRAGMENTED_MP4' + */ + streamSegmentType?: 'FRAGMENTED_MP4' | 'RB_CHUNKED_MP4' } /**