diff --git a/package/android/src/main/java/com/mrousavy/camera/CameraView+Events.kt b/package/android/src/main/java/com/mrousavy/camera/CameraView+Events.kt index 3384ef2..07f0b9c 100644 --- a/package/android/src/main/java/com/mrousavy/camera/CameraView+Events.kt +++ b/package/android/src/main/java/com/mrousavy/camera/CameraView+Events.kt @@ -40,15 +40,26 @@ fun CameraView.invokeOnStopped() { this.sendEvent(event) } -fun CameraView.invokeOnChunkReady(filepath: File, index: Int) { - Log.e(CameraView.TAG, "invokeOnError(...):") +fun CameraView.invokeOnChunkReady(filepath: File, index: Int, durationUs: Long?) { + Log.i(CameraView.TAG, "invokeOnChunkReady(...): index=$index, filepath=$filepath, durationUs=$durationUs") val event = Arguments.createMap() event.putInt("index", index) event.putString("filepath", filepath.toString()) + if (durationUs != null) { + event.putDouble("duration", durationUs / 1_000_000.0) // Convert microseconds to seconds + } val reactContext = context as ReactContext reactContext.getJSModule(RCTEventEmitter::class.java).receiveEvent(id, "onVideoChunkReady", event) } +fun CameraView.invokeOnInitReady(filepath: File) { + Log.i(CameraView.TAG, "invokeOnInitReady(...): filepath=$filepath") + val event = Arguments.createMap() + event.putString("filepath", filepath.toString()) + val reactContext = context as ReactContext + reactContext.getJSModule(RCTEventEmitter::class.java).receiveEvent(id, "onInitReady", event) +} + fun CameraView.invokeOnError(error: Throwable) { Log.e(CameraView.TAG, "invokeOnError(...):") error.printStackTrace() diff --git a/package/android/src/main/java/com/mrousavy/camera/CameraView.kt b/package/android/src/main/java/com/mrousavy/camera/CameraView.kt index 3569cd3..af4f2c6 100644 --- a/package/android/src/main/java/com/mrousavy/camera/CameraView.kt +++ b/package/android/src/main/java/com/mrousavy/camera/CameraView.kt @@ -271,8 +271,12 @@ class CameraView(context: Context) : invokeOnStopped() } - override fun onVideoChunkReady(filepath: File, index: Int) { - invokeOnChunkReady(filepath, index) + override fun onVideoChunkReady(filepath: File, index: Int, durationUs: Long?) { + invokeOnChunkReady(filepath, index, durationUs) + } + + override fun onInitSegmentReady(filepath: File) { + invokeOnInitReady(filepath) } override fun onCodeScanned(codes: List, scannerFrame: CodeScannerFrame) { diff --git a/package/android/src/main/java/com/mrousavy/camera/CameraViewManager.kt b/package/android/src/main/java/com/mrousavy/camera/CameraViewManager.kt index af48c52..a6ac35e 100644 --- a/package/android/src/main/java/com/mrousavy/camera/CameraViewManager.kt +++ b/package/android/src/main/java/com/mrousavy/camera/CameraViewManager.kt @@ -32,6 +32,7 @@ class CameraViewManager : ViewGroupManager() { .put("cameraError", MapBuilder.of("registrationName", "onError")) .put("cameraCodeScanned", MapBuilder.of("registrationName", "onCodeScanned")) .put("onVideoChunkReady", MapBuilder.of("registrationName", "onVideoChunkReady")) + .put("onInitReady", MapBuilder.of("registrationName", "onInitReady")) .build()?.toMutableMap() override fun getName(): String = TAG diff --git a/package/android/src/main/java/com/mrousavy/camera/core/CameraSession.kt b/package/android/src/main/java/com/mrousavy/camera/core/CameraSession.kt index 7c9924c..ed8ea69 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/CameraSession.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/CameraSession.kt @@ -429,15 +429,15 @@ class CameraSession(private val context: Context, private val cameraManager: Cam // Get actual device rotation from WindowManager since the React Native orientation hook // doesn't update when rotating between landscape-left and landscape-right on Android. // Map device rotation to the correct orientationHint for video recording: - // - Counter-clockwise (ROTATION_90) → 270° hint - // - Clockwise (ROTATION_270) → 90° hint + // - Counter-clockwise (ROTATION_90) → 90° hint + // - Clockwise (ROTATION_270) → 270° hint val windowManager = context.getSystemService(Context.WINDOW_SERVICE) as WindowManager val deviceRotation = windowManager.defaultDisplay.rotation val recordingOrientation = when (deviceRotation) { Surface.ROTATION_0 -> Orientation.PORTRAIT - Surface.ROTATION_90 -> Orientation.LANDSCAPE_RIGHT + Surface.ROTATION_90 -> Orientation.LANDSCAPE_LEFT Surface.ROTATION_180 -> Orientation.PORTRAIT_UPSIDE_DOWN - Surface.ROTATION_270 -> Orientation.LANDSCAPE_LEFT + Surface.ROTATION_270 -> Orientation.LANDSCAPE_RIGHT else -> Orientation.PORTRAIT } @@ -448,7 +448,7 @@ class CameraSession(private val context: Context, private val cameraManager: Cam enableAudio, fps, videoOutput.enableHdr, - orientation, + recordingOrientation, options, filePath, callback, @@ -513,7 +513,8 @@ class CameraSession(private val context: Context, private val cameraManager: Cam fun onInitialized() fun onStarted() fun onStopped() - fun onVideoChunkReady(filepath: File, index: Int) + fun onVideoChunkReady(filepath: File, index: Int, durationUs: Long?) + fun onInitSegmentReady(filepath: File) fun onCodeScanned(codes: List, scannerFrame: CodeScannerFrame) } } diff --git a/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorder.kt b/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorder.kt index 9444216..64c3389 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorder.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorder.kt @@ -14,7 +14,7 @@ import java.io.File import java.nio.ByteBuffer class ChunkedRecordingManager(private val encoder: MediaCodec, private val outputDirectory: File, private val orientationHint: Int, private val iFrameInterval: Int, private val callbacks: CameraSession.Callback) : - MediaCodec.Callback() { + MediaCodec.Callback(), ChunkedRecorderInterface { companion object { private const val TAG = "ChunkedRecorder" @@ -73,7 +73,7 @@ class ChunkedRecordingManager(private val encoder: MediaCodec, private val outpu private val targetDurationUs = iFrameInterval * 1000000 - val surface: Surface = encoder.createInputSurface() + override val surface: Surface = encoder.createInputSurface() init { if (!this.outputDirectory.exists()) { @@ -95,7 +95,9 @@ class ChunkedRecordingManager(private val encoder: MediaCodec, private val outpu fun finish() { muxer.stop() muxer.release() - callbacks.onVideoChunkReady(filepath, chunkIndex) + // Calculate duration from start time - this is approximate + // The new FragmentedRecordingManager provides accurate duration + callbacks.onVideoChunkReady(filepath, chunkIndex, null) } } @@ -133,12 +135,12 @@ class ChunkedRecordingManager(private val encoder: MediaCodec, private val outpu return bufferInfo.presentationTimeUs - context.startTimeUs } - fun start() { + override fun start() { encoder.start() recording = true } - fun finish() { + override fun finish() { synchronized(this) { muxerContext?.finish() recording = false diff --git a/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorderInterface.kt b/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorderInterface.kt new file mode 100644 index 0000000..3464dad --- /dev/null +++ b/package/android/src/main/java/com/mrousavy/camera/core/ChunkedRecorderInterface.kt @@ -0,0 +1,15 @@ +package com.mrousavy.camera.core + +import android.view.Surface + +/** + * Common interface for chunked video recorders. + * Implemented by both ChunkedRecordingManager (regular MP4) and + * FragmentedRecordingManager (HLS-compatible fMP4). + */ +interface ChunkedRecorderInterface { + val surface: Surface + + fun start() + fun finish() +} diff --git a/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt b/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt new file mode 100644 index 0000000..545a934 --- /dev/null +++ b/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt @@ -0,0 +1,180 @@ +package com.mrousavy.camera.core + +import android.media.MediaCodec +import android.media.MediaCodec.BufferInfo +import android.media.MediaCodecInfo +import android.media.MediaFormat +import android.util.Log +import android.util.Size +import android.view.Surface +import com.mrousavy.camera.types.Orientation +import com.mrousavy.camera.types.RecordVideoOptions +import java.io.File + +/** + * A recording manager that produces HLS-compatible fragmented MP4 segments. + * + * Uses HlsMuxer (following Android's MediaMuxer pattern) to produce: + * - init.mp4: Initialization segment (ftyp + moov with mvex) + * - 0.mp4, 1.mp4, ...: Media segments (moof + mdat) + */ +class FragmentedRecordingManager( + private val encoder: MediaCodec, + private val muxer: HlsMuxer, + private val configuredFps: Int +) : MediaCodec.Callback(), ChunkedRecorderInterface { + + companion object { + private const val TAG = "FragmentedRecorder" + private const val DEFAULT_SEGMENT_DURATION_SECONDS = 6 + + fun fromParams( + callbacks: CameraSession.Callback, + size: Size, + enableAudio: Boolean, + fps: Int? = null, + cameraOrientation: Orientation, + bitRate: Int, + options: RecordVideoOptions, + outputDirectory: File, + segmentDurationSeconds: Int = DEFAULT_SEGMENT_DURATION_SECONDS + ): FragmentedRecordingManager { + val mimeType = options.videoCodec.toMimeType() + // Use cameraOrientation from Android (computed from device rotation) + // instead of options.orientation from JS which may be stale + val recordingOrientationDegrees = cameraOrientation.toDegrees() + + // Swap dimensions based on orientation - same logic as ChunkedRecordingManager + // When camera is in landscape orientation, we need to swap width/height for the encoder + val (width, height) = if (cameraOrientation.isLandscape()) { + size.height to size.width + } else { + size.width to size.height + } + + Log.d(TAG, "Input size: ${size.width}x${size.height}, " + + "encoder size: ${width}x${height}, " + + "orientation: $cameraOrientation ($recordingOrientationDegrees°)") + + val format = MediaFormat.createVideoFormat(mimeType, width, height) + val codec = MediaCodec.createEncoderByType(mimeType) + + format.setInteger( + MediaFormat.KEY_COLOR_FORMAT, + MediaCodecInfo.CodecCapabilities.COLOR_FormatSurface + ) + + val effectiveFps = fps ?: 30 + format.setInteger(MediaFormat.KEY_FRAME_RATE, effectiveFps) + format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, segmentDurationSeconds) + format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate) + + Log.d(TAG, "Video Format: $format, orientation: $recordingOrientationDegrees") + + codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE) + + // Create muxer with callbacks and orientation + val muxer = HlsMuxer( + outputDirectory = outputDirectory, + callback = object : HlsMuxer.Callback { + override fun onInitSegmentReady(file: File) { + callbacks.onInitSegmentReady(file) + } + + override fun onMediaSegmentReady(file: File, index: Int, durationUs: Long) { + callbacks.onVideoChunkReady(file, index, durationUs) + } + }, + orientationDegrees = recordingOrientationDegrees + ) + muxer.setSegmentDuration(segmentDurationSeconds * 1_000_000L) + + Log.d(TAG, "Created HlsMuxer with orientation: $recordingOrientationDegrees degrees, fps: $effectiveFps") + + return FragmentedRecordingManager(codec, muxer, effectiveFps) + } + } + + private var recording = false + private var muxerStarted = false + private var trackIndex = -1 + + override val surface: Surface = encoder.createInputSurface() + + init { + encoder.setCallback(this) + } + + override fun start() { + encoder.start() + recording = true + } + + override fun finish() { + synchronized(this) { + recording = false + + if (muxerStarted) { + muxer.stop() + muxer.release() + } + + try { + encoder.stop() + encoder.release() + } catch (e: Exception) { + Log.e(TAG, "Error stopping encoder", e) + } + } + } + + // MediaCodec.Callback methods + + override fun onInputBufferAvailable(codec: MediaCodec, index: Int) { + // Not used for Surface input + } + + override fun onOutputBufferAvailable(codec: MediaCodec, index: Int, bufferInfo: BufferInfo) { + synchronized(this) { + if (!recording) { + encoder.releaseOutputBuffer(index, false) + return + } + + if (!muxerStarted) { + encoder.releaseOutputBuffer(index, false) + return + } + + val buffer = encoder.getOutputBuffer(index) + if (buffer == null) { + Log.e(TAG, "getOutputBuffer returned null") + encoder.releaseOutputBuffer(index, false) + return + } + + try { + muxer.writeSampleData(trackIndex, buffer, bufferInfo) + } catch (e: Exception) { + Log.e(TAG, "Error writing sample", e) + } + + encoder.releaseOutputBuffer(index, false) + } + } + + override fun onError(codec: MediaCodec, e: MediaCodec.CodecException) { + Log.e(TAG, "Codec error: ${e.message}") + } + + override fun onOutputFormatChanged(codec: MediaCodec, format: MediaFormat) { + synchronized(this) { + Log.i(TAG, "Output format changed: $format") + + // Pass configured fps to muxer (not the encoder's output format fps which may differ) + trackIndex = muxer.addTrack(format, configuredFps) + muxer.start() + muxerStarted = true + } + } +} diff --git a/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt b/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt new file mode 100644 index 0000000..28a29e5 --- /dev/null +++ b/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt @@ -0,0 +1,1200 @@ +package com.mrousavy.camera.core + +import android.media.MediaCodec +import android.media.MediaFormat +import android.util.Log +import java.io.ByteArrayOutputStream +import java.io.DataOutputStream +import java.io.File +import java.io.FileOutputStream +import java.nio.ByteBuffer + +/** + * A muxer for creating HLS-compatible fragmented MP4 output. + * + * Follows the same pattern as Android's MediaMuxer: + * 1. Create muxer with output directory + * 2. addTrack() with MediaFormat + * 3. start() - writes init.mp4 + * 4. writeSampleData() for each encoded sample + * 5. stop() - finalizes last segment + * 6. release() - cleanup + * + * Produces: + * - init.mp4: Initialization segment (ftyp + moov with mvex) + * - 0.mp4, 1.mp4, ...: Media segments (moof + mdat) + */ +class HlsMuxer( + private val outputDirectory: File, + private val callback: Callback, + private val orientationDegrees: Int = 0 +) { + companion object { + private const val TAG = "HlsMuxer" + private const val DEFAULT_SEGMENT_DURATION_US = 6_000_000L // 6 seconds + } + + interface Callback { + fun onInitSegmentReady(file: File) + fun onMediaSegmentReady(file: File, index: Int, durationUs: Long) + } + + // Configuration + private var targetSegmentDurationUs: Long = DEFAULT_SEGMENT_DURATION_US + private var timescale: Int = 30000 // Default, updated from format + private var configuredFps: Int = 30 // Configured fps from user, used for VUI timing + + // State + private var state = State.UNINITIALIZED + private var trackFormat: MediaFormat? = null + private var sequenceNumber = 1 + private var segmentIndex = 0 + + // Current segment data + private val pendingSamples = mutableListOf() + private var segmentStartTimeUs = -1L + private var lastPresentationTimeUs = 0L + + // Timestamp normalization - first timestamp becomes time 0 + private var firstPresentationTimeUs = -1L + + private enum class State { + UNINITIALIZED, + INITIALIZED, + STARTED, + STOPPED, + RELEASED + } + + private data class Sample( + val data: ByteArray, + val presentationTimeUs: Long, + var durationUs: Long, + val isKeyFrame: Boolean + ) + + // ==================== Timestamp Normalization ==================== + + /** + * Normalizes a presentation timestamp to start from 0. + * The first timestamp received becomes time 0, and all subsequent + * timestamps are relative to that. + */ + private fun normalizeTimestamp(rawPresentationTimeUs: Long): Long { + if (firstPresentationTimeUs < 0) { + firstPresentationTimeUs = rawPresentationTimeUs + Log.d(TAG, "First timestamp: ${rawPresentationTimeUs}us, normalizing to 0") + } + return rawPresentationTimeUs - firstPresentationTimeUs + } + + // ==================== Annex-B to AVCC Conversion ==================== + + /** + * Converts H.264 data from Annex-B format to AVCC format. + * + * Annex-B uses start codes (00 00 00 01 or 00 00 01) to delimit NAL units. + * AVCC uses 4-byte big-endian length prefixes before each NAL unit. + * + * This conversion is required because: + * - MediaCodec outputs Annex-B format + * - fMP4/HLS requires AVCC format (as specified in avcC box with NAL length size = 4) + */ + private fun convertAnnexBToAvcc(annexBData: ByteArray): ByteArray { + val nalUnits = parseAnnexBNalUnits(annexBData) + if (nalUnits.isEmpty()) { + Log.w(TAG, "No NAL units found in sample, returning original data") + return annexBData + } + + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + for (nalUnit in nalUnits) { + // Write 4-byte big-endian length prefix + dos.writeInt(nalUnit.size) + // Write NAL unit data (without start code) + dos.write(nalUnit) + } + + return output.toByteArray() + } + + /** + * Parses Annex-B formatted data into individual NAL units. + * Returns list of NAL unit byte arrays (without start codes). + */ + private fun parseAnnexBNalUnits(data: ByteArray): List { + val nalUnits = mutableListOf() + var i = 0 + + while (i < data.size) { + // Find start code + val startCodeLength = findStartCode(data, i) + if (startCodeLength == 0) { + // No start code found at current position + // This might happen if data doesn't start with a start code + if (nalUnits.isEmpty() && i == 0) { + // Data might already be in AVCC format or malformed + // Try to detect AVCC format (first 4 bytes would be a reasonable length) + if (data.size >= 4) { + val possibleLength = ((data[0].toInt() and 0xFF) shl 24) or + ((data[1].toInt() and 0xFF) shl 16) or + ((data[2].toInt() and 0xFF) shl 8) or + (data[3].toInt() and 0xFF) + if (possibleLength > 0 && possibleLength <= data.size - 4) { + // Looks like AVCC format already, return original + Log.d(TAG, "Data appears to already be in AVCC format") + return emptyList() + } + } + } + i++ + continue + } + + val nalStart = i + startCodeLength + + // Find end of this NAL unit (start of next, or end of data) + var nalEnd = data.size + var j = nalStart + while (j < data.size - 2) { + val nextStartCode = findStartCode(data, j) + if (nextStartCode > 0) { + nalEnd = j + break + } + j++ + } + + if (nalEnd > nalStart) { + nalUnits.add(data.copyOfRange(nalStart, nalEnd)) + } + + i = nalEnd + } + + return nalUnits + } + + /** + * Checks for Annex-B start code at given position. + * Returns start code length (3 or 4) or 0 if no start code found. + */ + private fun findStartCode(data: ByteArray, offset: Int): Int { + if (offset + 4 <= data.size && + data[offset] == 0.toByte() && + data[offset + 1] == 0.toByte() && + data[offset + 2] == 0.toByte() && + data[offset + 3] == 1.toByte()) { + return 4 // 4-byte start code: 00 00 00 01 + } + if (offset + 3 <= data.size && + data[offset] == 0.toByte() && + data[offset + 1] == 0.toByte() && + data[offset + 2] == 1.toByte()) { + return 3 // 3-byte start code: 00 00 01 + } + return 0 + } + + /** + * Sets the target segment duration. + * Must be called before start(). + */ + fun setSegmentDuration(durationUs: Long) { + check(state == State.UNINITIALIZED || state == State.INITIALIZED) { + "Cannot set segment duration after start()" + } + targetSegmentDurationUs = durationUs + } + + /** + * Adds a track to the muxer. + * + * @param format The MediaFormat describing the track + * @param fps The configured frame rate (used for VUI timing, overrides format's fps) + * @return Track index (always 0 for now, single video track) + */ + fun addTrack(format: MediaFormat, fps: Int = 30): Int { + check(state == State.UNINITIALIZED) { "addTrack() must be called before start()" } + + trackFormat = format + configuredFps = fps + timescale = fps * 1000 // Use fps * 1000 for good precision + + state = State.INITIALIZED + + val formatWidth = try { format.getInteger(MediaFormat.KEY_WIDTH) } catch (e: Exception) { -1 } + val formatHeight = try { format.getInteger(MediaFormat.KEY_HEIGHT) } catch (e: Exception) { -1 } + Log.d(TAG, "Added track: ${format.getString(MediaFormat.KEY_MIME)}, " + + "encoder output: ${formatWidth}x${formatHeight}, " + + "configuredFps=$configuredFps, timescale=$timescale, orientation=$orientationDegrees°") + + return 0 // Single track, index 0 + } + + /** + * Starts the muxer, writing the initialization segment. + */ + fun start() { + check(state == State.INITIALIZED) { "Must call addTrack() before start()" } + val format = trackFormat ?: throw IllegalStateException("No track format") + + // Create output directory if needed + if (!outputDirectory.exists()) { + outputDirectory.mkdirs() + } + + // Write init segment + val initBytes = buildInitSegment(format) + val initFile = File(outputDirectory, "init.mp4") + FileOutputStream(initFile).use { it.write(initBytes) } + Log.d(TAG, "Created init segment: ${initFile.absolutePath} (${initBytes.size} bytes)") + callback.onInitSegmentReady(initFile) + + state = State.STARTED + } + + /** + * Writes sample data to the muxer. + * + * @param trackIndex Track index (must be 0) + * @param buffer The encoded sample data + * @param bufferInfo Sample metadata (size, presentation time, flags) + */ + fun writeSampleData(trackIndex: Int, buffer: ByteBuffer, bufferInfo: MediaCodec.BufferInfo) { + check(state == State.STARTED) { "Must call start() before writeSampleData()" } + check(trackIndex == 0) { "Invalid track index: $trackIndex" } + + // Skip codec config data (already in init segment) + if ((bufferInfo.flags and MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0) { + return + } + + val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0 + val presentationTimeUs = normalizeTimestamp(bufferInfo.presentationTimeUs) + + // Initialize segment start time + if (segmentStartTimeUs < 0) { + segmentStartTimeUs = presentationTimeUs + } + + // Check if we should finalize current segment (at keyframe boundaries) + if (isKeyFrame && pendingSamples.isNotEmpty()) { + val segmentDurationUs = presentationTimeUs - segmentStartTimeUs + if (segmentDurationUs >= targetSegmentDurationUs) { + finalizeCurrentSegment() + segmentStartTimeUs = presentationTimeUs + } + } + + // Copy buffer data and convert from Annex-B to AVCC format + val rawData = ByteArray(bufferInfo.size) + buffer.position(bufferInfo.offset) + buffer.limit(bufferInfo.offset + bufferInfo.size) + buffer.get(rawData) + + // Convert Annex-B (start codes) to AVCC (length prefixes) + val data = convertAnnexBToAvcc(rawData) + + // Update duration of previous sample + if (pendingSamples.isNotEmpty()) { + val lastSample = pendingSamples.last() + lastSample.durationUs = presentationTimeUs - lastSample.presentationTimeUs + } + + // Estimate duration (will be corrected by next sample) + val estimatedDurationUs = if (lastPresentationTimeUs > 0) { + presentationTimeUs - lastPresentationTimeUs + } else { + 1_000_000L / 30 // Assume 30fps + } + + pendingSamples.add(Sample( + data = data, + presentationTimeUs = presentationTimeUs, + durationUs = estimatedDurationUs, + isKeyFrame = isKeyFrame + )) + + lastPresentationTimeUs = presentationTimeUs + } + + /** + * Stops the muxer, finalizing any pending segment. + */ + fun stop() { + check(state == State.STARTED) { "Muxer not started" } + + if (pendingSamples.isNotEmpty()) { + finalizeCurrentSegment() + } + + state = State.STOPPED + Log.d(TAG, "Muxer stopped, wrote $segmentIndex segments") + } + + /** + * Releases resources. + */ + fun release() { + if (state == State.STARTED) { + stop() + } + pendingSamples.clear() + state = State.RELEASED + } + + /** + * Finalizes the current segment and writes it to disk. + */ + private fun finalizeCurrentSegment() { + if (pendingSamples.isEmpty()) return + + try { + val baseDecodeTimeUs = pendingSamples.first().presentationTimeUs + val fragmentBytes = buildMediaSegment(pendingSamples, sequenceNumber, baseDecodeTimeUs) + + val segmentFile = File(outputDirectory, "$segmentIndex.mp4") + FileOutputStream(segmentFile).use { it.write(fragmentBytes) } + + // Calculate duration + val firstPts = pendingSamples.first().presentationTimeUs + val lastSample = pendingSamples.last() + val durationUs = (lastSample.presentationTimeUs - firstPts) + lastSample.durationUs + + Log.d(TAG, "Created segment $segmentIndex: samples=${pendingSamples.size}, " + + "duration=${durationUs / 1000}ms, size=${fragmentBytes.size} bytes") + + callback.onMediaSegmentReady(segmentFile, segmentIndex, durationUs) + + segmentIndex++ + sequenceNumber++ + pendingSamples.clear() + + } catch (e: Exception) { + Log.e(TAG, "Error finalizing segment $segmentIndex", e) + } + } + + // ==================== SPS VUI Timing Injection ==================== + + /** + * Injects VUI timing parameters into an H.264 SPS NAL unit. + * This ensures proper frame rate detection by players/decoders. + * + * The SPS from MediaCodec lacks VUI timing info, causing tools like + * ffprobe to misinterpret the frame rate. + */ + private fun injectVuiTiming(sps: ByteArray, fps: Int): ByteArray { + try { + val reader = BitReader(sps) + val writer = BitWriter() + + // NAL header (1 byte: forbidden_zero_bit, nal_ref_idc, nal_unit_type) + writer.writeBits(reader.readBits(8), 8) + + // profile_idc (1 byte) + val profileIdc = reader.readBits(8) + writer.writeBits(profileIdc, 8) + + // constraint_set flags (1 byte) + writer.writeBits(reader.readBits(8), 8) + + // level_idc (1 byte) + writer.writeBits(reader.readBits(8), 8) + + // seq_parameter_set_id (ue(v)) + copyExpGolomb(reader, writer) + + // Profile-specific fields for High profile (100) and others + if (profileIdc == 100 || profileIdc == 110 || profileIdc == 122 || + profileIdc == 244 || profileIdc == 44 || profileIdc == 83 || + profileIdc == 86 || profileIdc == 118 || profileIdc == 128 || + profileIdc == 138 || profileIdc == 139 || profileIdc == 134 || + profileIdc == 135) { + + // chroma_format_idc (ue(v)) + val chromaFormatIdc = copyExpGolombAndReturn(reader, writer) + + if (chromaFormatIdc == 3) { + // separate_colour_plane_flag (1 bit) + writer.writeBits(reader.readBits(1), 1) + } + + // bit_depth_luma_minus8 (ue(v)) + copyExpGolomb(reader, writer) + + // bit_depth_chroma_minus8 (ue(v)) + copyExpGolomb(reader, writer) + + // qpprime_y_zero_transform_bypass_flag (1 bit) + writer.writeBits(reader.readBits(1), 1) + + // seq_scaling_matrix_present_flag (1 bit) + val scalingMatrixFlag = reader.readBits(1) + writer.writeBits(scalingMatrixFlag, 1) + + if (scalingMatrixFlag == 1) { + // Skip scaling lists - this is complex, just copy remaining and give up + Log.w(TAG, "SPS has scaling matrix, skipping VUI injection") + return sps + } + } + + // log2_max_frame_num_minus4 (ue(v)) + copyExpGolomb(reader, writer) + + // pic_order_cnt_type (ue(v)) + val picOrderCntType = copyExpGolombAndReturn(reader, writer) + + if (picOrderCntType == 0) { + // log2_max_pic_order_cnt_lsb_minus4 (ue(v)) + copyExpGolomb(reader, writer) + } else if (picOrderCntType == 1) { + // delta_pic_order_always_zero_flag (1 bit) + writer.writeBits(reader.readBits(1), 1) + // offset_for_non_ref_pic (se(v)) + copySignedExpGolomb(reader, writer) + // offset_for_top_to_bottom_field (se(v)) + copySignedExpGolomb(reader, writer) + // num_ref_frames_in_pic_order_cnt_cycle (ue(v)) + val numRefFrames = copyExpGolombAndReturn(reader, writer) + for (i in 0 until numRefFrames) { + // offset_for_ref_frame[i] (se(v)) + copySignedExpGolomb(reader, writer) + } + } + + // max_num_ref_frames (ue(v)) + copyExpGolomb(reader, writer) + + // gaps_in_frame_num_value_allowed_flag (1 bit) + writer.writeBits(reader.readBits(1), 1) + + // pic_width_in_mbs_minus1 (ue(v)) + copyExpGolomb(reader, writer) + + // pic_height_in_map_units_minus1 (ue(v)) + copyExpGolomb(reader, writer) + + // frame_mbs_only_flag (1 bit) + val frameMbsOnlyFlag = reader.readBits(1) + writer.writeBits(frameMbsOnlyFlag, 1) + + if (frameMbsOnlyFlag == 0) { + // mb_adaptive_frame_field_flag (1 bit) + writer.writeBits(reader.readBits(1), 1) + } + + // direct_8x8_inference_flag (1 bit) + writer.writeBits(reader.readBits(1), 1) + + // frame_cropping_flag (1 bit) + val frameCroppingFlag = reader.readBits(1) + writer.writeBits(frameCroppingFlag, 1) + + if (frameCroppingFlag == 1) { + // frame_crop_left_offset, right, top, bottom (ue(v) each) + copyExpGolomb(reader, writer) + copyExpGolomb(reader, writer) + copyExpGolomb(reader, writer) + copyExpGolomb(reader, writer) + } + + // vui_parameters_present_flag - we'll set this to 1 and add our VUI + val originalVuiFlag = reader.readBits(1) + writer.writeBits(1, 1) // Set VUI present + + // Write VUI parameters with timing info + writeVuiWithTiming(writer, fps, originalVuiFlag == 1, reader) + + // Add RBSP trailing bits + writer.writeRbspTrailingBits() + + val result = writer.toByteArray() + Log.d(TAG, "Injected VUI timing for ${fps}fps, SPS grew from ${sps.size} to ${result.size} bytes") + return result + + } catch (e: Exception) { + Log.e(TAG, "Failed to inject VUI timing: ${e.message}, using original SPS") + return sps + } + } + + /** + * Writes VUI parameters with timing info. + */ + private fun writeVuiWithTiming(writer: BitWriter, fps: Int, hadVui: Boolean, reader: BitReader) { + // aspect_ratio_info_present_flag + writer.writeBits(0, 1) + + // overscan_info_present_flag + writer.writeBits(0, 1) + + // video_signal_type_present_flag + writer.writeBits(0, 1) + + // chroma_loc_info_present_flag + writer.writeBits(0, 1) + + // timing_info_present_flag = 1 + writer.writeBits(1, 1) + + // num_units_in_tick (32 bits) = 1 + writer.writeBits(1, 32) + + // time_scale (32 bits) = fps * 2 (because each frame = 2 field counts) + writer.writeBits(fps * 2, 32) + + // fixed_frame_rate_flag = 1 + writer.writeBits(1, 1) + + // nal_hrd_parameters_present_flag + writer.writeBits(0, 1) + + // vcl_hrd_parameters_present_flag + writer.writeBits(0, 1) + + // pic_struct_present_flag + writer.writeBits(0, 1) + + // bitstream_restriction_flag + writer.writeBits(0, 1) + } + + // ==================== Bit Manipulation Helpers ==================== + + /** + * Bit-level reader for parsing H.264 NAL units. + */ + private class BitReader(private val data: ByteArray) { + private var bytePos = 0 + private var bitPos = 0 + + fun readBits(count: Int): Int { + var result = 0 + for (i in 0 until count) { + if (bytePos >= data.size) throw IllegalStateException("End of data") + val bit = (data[bytePos].toInt() shr (7 - bitPos)) and 1 + result = (result shl 1) or bit + bitPos++ + if (bitPos == 8) { + bitPos = 0 + bytePos++ + } + } + return result + } + + fun readExpGolomb(): Int { + var leadingZeros = 0 + while (readBits(1) == 0) { + leadingZeros++ + if (leadingZeros > 31) throw IllegalStateException("Invalid exp-golomb") + } + if (leadingZeros == 0) return 0 + val suffix = readBits(leadingZeros) + return (1 shl leadingZeros) - 1 + suffix + } + + fun readSignedExpGolomb(): Int { + val code = readExpGolomb() + return if (code % 2 == 0) -(code / 2) else (code + 1) / 2 + } + } + + /** + * Bit-level writer for constructing H.264 NAL units. + */ + private class BitWriter { + private val bytes = mutableListOf() + private var currentByte = 0 + private var bitPos = 0 + + fun writeBits(value: Int, count: Int) { + for (i in count - 1 downTo 0) { + val bit = (value shr i) and 1 + currentByte = (currentByte shl 1) or bit + bitPos++ + if (bitPos == 8) { + bytes.add(currentByte.toByte()) + currentByte = 0 + bitPos = 0 + } + } + } + + fun writeExpGolomb(value: Int) { + val code = value + 1 + val bits = 32 - Integer.numberOfLeadingZeros(code) + // Write leading zeros + for (i in 0 until bits - 1) { + writeBits(0, 1) + } + // Write the code + writeBits(code, bits) + } + + fun writeSignedExpGolomb(value: Int) { + val code = if (value <= 0) -2 * value else 2 * value - 1 + writeExpGolomb(code) + } + + fun writeRbspTrailingBits() { + writeBits(1, 1) // rbsp_stop_one_bit + while (bitPos != 0) { + writeBits(0, 1) // rbsp_alignment_zero_bit + } + } + + fun toByteArray(): ByteArray { + // Flush remaining bits + if (bitPos > 0) { + currentByte = currentByte shl (8 - bitPos) + bytes.add(currentByte.toByte()) + } + return bytes.toByteArray() + } + } + + private fun copyExpGolomb(reader: BitReader, writer: BitWriter) { + val value = reader.readExpGolomb() + writer.writeExpGolomb(value) + } + + private fun copyExpGolombAndReturn(reader: BitReader, writer: BitWriter): Int { + val value = reader.readExpGolomb() + writer.writeExpGolomb(value) + return value + } + + private fun copySignedExpGolomb(reader: BitReader, writer: BitWriter) { + val value = reader.readSignedExpGolomb() + writer.writeSignedExpGolomb(value) + } + + // ==================== Init Segment Building ==================== + + /** + * Builds the initialization segment (ftyp + moov). + */ + private fun buildInitSegment(format: MediaFormat): ByteArray { + val width = format.getInteger(MediaFormat.KEY_WIDTH) + val height = format.getInteger(MediaFormat.KEY_HEIGHT) + + val rawSps = format.getByteBuffer("csd-0")?.let { extractNalUnit(it) } + ?: throw IllegalArgumentException("Missing SPS (csd-0)") + val pps = format.getByteBuffer("csd-1")?.let { extractNalUnit(it) } + ?: throw IllegalArgumentException("Missing PPS (csd-1)") + + Log.d(TAG, "Original SPS size: ${rawSps.size} bytes, PPS size: ${pps.size} bytes") + Log.d(TAG, "Original SPS hex: ${rawSps.joinToString("") { "%02x".format(it) }}") + + // Inject VUI timing info into SPS using configured fps (not encoder output format fps) + val sps = injectVuiTiming(rawSps, configuredFps) + Log.d(TAG, "Modified SPS size: ${sps.size} bytes") + Log.d(TAG, "Modified SPS hex: ${sps.joinToString("") { "%02x".format(it) }}") + + val output = ByteArrayOutputStream() + + // ftyp + output.write(buildFtypBox()) + + // moov + output.write(buildMoovBox(width, height, sps, pps)) + + return output.toByteArray() + } + + private fun extractNalUnit(buffer: ByteBuffer): ByteArray { + val data = ByteArray(buffer.remaining()) + buffer.duplicate().get(data) + + // Strip start code prefix (0x00000001 or 0x000001) + return when { + data.size >= 4 && data[0] == 0.toByte() && data[1] == 0.toByte() && + data[2] == 0.toByte() && data[3] == 1.toByte() -> data.copyOfRange(4, data.size) + data.size >= 3 && data[0] == 0.toByte() && data[1] == 0.toByte() && + data[2] == 1.toByte() -> data.copyOfRange(3, data.size) + else -> data + } + } + + private fun buildFtypBox(): ByteArray { + val brands = listOf("isom", "iso5", "iso6", "avc1", "mp41", "dash") + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + val size = 8 + 4 + 4 + (brands.size * 4) + dos.writeInt(size) + dos.writeBytes("ftyp") + dos.writeBytes("isom") // major brand + dos.writeInt(0x200) // minor version + brands.forEach { dos.writeBytes(it) } + + return output.toByteArray() + } + + private fun buildMoovBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val content = ByteArrayOutputStream() + + content.write(buildMvhdBox()) + content.write(buildTrakBox(width, height, sps, pps)) + content.write(buildMvexBox()) + + return wrapBox("moov", content.toByteArray()) + } + + private fun buildMvhdBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(0) // creation time + dos.writeInt(0) // modification time + dos.writeInt(timescale) // timescale + dos.writeInt(0) // duration + dos.writeInt(0x00010000) // rate = 1.0 + dos.writeShort(0x0100) // volume = 1.0 + dos.writeShort(0) // reserved + dos.writeInt(0) // reserved + dos.writeInt(0) // reserved + + // Unity matrix + dos.writeInt(0x00010000); dos.writeInt(0); dos.writeInt(0) + dos.writeInt(0); dos.writeInt(0x00010000); dos.writeInt(0) + dos.writeInt(0); dos.writeInt(0); dos.writeInt(0x40000000) + + repeat(6) { dos.writeInt(0) } // pre-defined + dos.writeInt(2) // next track ID + + return wrapBox("mvhd", output.toByteArray()) + } + + private fun buildTrakBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val content = ByteArrayOutputStream() + content.write(buildTkhdBox(width, height)) + content.write(buildMdiaBox(width, height, sps, pps)) + return wrapBox("trak", content.toByteArray()) + } + + private fun buildTkhdBox(width: Int, height: Int): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0x00000007) // version 0, flags (enabled, in movie, in preview) + dos.writeInt(0) // creation time + dos.writeInt(0) // modification time + dos.writeInt(1) // track ID + dos.writeInt(0) // reserved + dos.writeInt(0) // duration + dos.writeInt(0) // reserved + dos.writeInt(0) // reserved + dos.writeShort(0) // layer + dos.writeShort(0) // alternate group + dos.writeShort(0) // volume (0 for video) + dos.writeShort(0) // reserved + + // Rotation matrix + writeRotationMatrix(dos) + + // Display dimensions should be post-rotation dimensions + // For 90° or 270° rotation, swap width and height + val (displayWidth, displayHeight) = when (orientationDegrees) { + 90, 270 -> height to width + else -> width to height + } + dos.writeInt(displayWidth shl 16) // width (16.16 fixed point) + dos.writeInt(displayHeight shl 16) // height (16.16 fixed point) + + Log.d(TAG, "tkhd: encoded=${width}x${height}, display=${displayWidth}x${displayHeight}, rotation=$orientationDegrees") + + return wrapBox("tkhd", output.toByteArray()) + } + + /** + * Writes the 3x3 transformation matrix for video rotation. + */ + private fun writeRotationMatrix(dos: DataOutputStream) { + val one = 0x00010000 // 1.0 in 16.16 + val negOne = 0xFFFF0000.toInt() // -1.0 in 16.16 + val w = 0x40000000 // 1.0 in 2.30 + + // For 270° device orientation (landscape-right), apply 90° CW rotation + // For 90° device orientation (landscape-left), apply 270° CW rotation + val a: Int + val b: Int + val c: Int + val d: Int + + when (orientationDegrees) { + 90 -> { a = 0; b = negOne; c = one; d = 0 } + 180 -> { a = negOne; b = 0; c = 0; d = negOne } + 270 -> { a = 0; b = one; c = negOne; d = 0 } + else -> { a = one; b = 0; c = 0; d = one } + } + + dos.writeInt(a) + dos.writeInt(b) + dos.writeInt(0) // u = 0 + dos.writeInt(c) + dos.writeInt(d) + dos.writeInt(0) // v = 0 + dos.writeInt(0) // tx = 0 + dos.writeInt(0) // ty = 0 + dos.writeInt(w) // w = 1.0 + + Log.d(TAG, "Rotation matrix for $orientationDegrees°") + } + + private fun buildMdiaBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val content = ByteArrayOutputStream() + content.write(buildMdhdBox()) + content.write(buildHdlrBox()) + content.write(buildMinfBox(width, height, sps, pps)) + return wrapBox("mdia", content.toByteArray()) + } + + private fun buildMdhdBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(0) // creation time + dos.writeInt(0) // modification time + dos.writeInt(timescale) // timescale + dos.writeInt(0) // duration + dos.writeShort(0x55C4) // language: "und" + dos.writeShort(0) // pre-defined + + return wrapBox("mdhd", output.toByteArray()) + } + + private fun buildHdlrBox(): ByteArray { + val name = "VideoHandler" + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(0) // pre-defined + dos.writeBytes("vide") // handler type + dos.writeInt(0) // reserved + dos.writeInt(0) // reserved + dos.writeInt(0) // reserved + dos.writeBytes(name) + dos.writeByte(0) // null terminator + + return wrapBox("hdlr", output.toByteArray()) + } + + private fun buildMinfBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val content = ByteArrayOutputStream() + content.write(buildVmhdBox()) + content.write(buildDinfBox()) + content.write(buildStblBox(width, height, sps, pps)) + return wrapBox("minf", content.toByteArray()) + } + + private fun buildVmhdBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(1) // version 0, flags = 1 + dos.writeShort(0) // graphics mode + dos.writeShort(0) // opcolor[0] + dos.writeShort(0) // opcolor[1] + dos.writeShort(0) // opcolor[2] + + return wrapBox("vmhd", output.toByteArray()) + } + + private fun buildDinfBox(): ByteArray { + val dref = buildDrefBox() + return wrapBox("dinf", dref) + } + + private fun buildDrefBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(1) // entry count + + // url box (self-contained) + dos.writeInt(12) + dos.writeBytes("url ") + dos.writeInt(1) // flags: self-contained + + return wrapBox("dref", output.toByteArray()) + } + + private fun buildStblBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val content = ByteArrayOutputStream() + content.write(buildStsdBox(width, height, sps, pps)) + content.write(buildEmptySttsBox()) + content.write(buildEmptyStscBox()) + content.write(buildEmptyStszBox()) + content.write(buildEmptyStcoBox()) + return wrapBox("stbl", content.toByteArray()) + } + + private fun buildStsdBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(1) // entry count + output.write(buildAvc1Box(width, height, sps, pps)) + + return wrapBox("stsd", output.toByteArray()) + } + + private fun buildAvc1Box(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + repeat(6) { dos.writeByte(0) } // reserved + dos.writeShort(1) // data reference index + dos.writeShort(0) // pre-defined + dos.writeShort(0) // reserved + repeat(3) { dos.writeInt(0) } // pre-defined + dos.writeShort(width) // width + dos.writeShort(height) // height + dos.writeInt(0x00480000) // horiz resolution (72 dpi) + dos.writeInt(0x00480000) // vert resolution (72 dpi) + dos.writeInt(0) // reserved + dos.writeShort(1) // frame count + repeat(32) { dos.writeByte(0) } // compressor name + dos.writeShort(0x0018) // depth (24 bit) + dos.writeShort(-1) // pre-defined + + output.write(buildAvcCBox(sps, pps)) + + return wrapBox("avc1", output.toByteArray()) + } + + private fun buildAvcCBox(sps: ByteArray, pps: ByteArray): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + // SPS layout: [0]=NAL header (0x67), [1]=profile_idc, [2]=constraint_flags, [3]=level_idc + val profileIdc = if (sps.size > 1) sps[1].toInt() and 0xFF else 0x42 + val profileCompat = if (sps.size > 2) sps[2].toInt() and 0xFF else 0x00 + val levelIdc = if (sps.size > 3) sps[3].toInt() and 0xFF else 0x1F + + dos.writeByte(1) // configuration version + dos.writeByte(profileIdc) // AVC profile + dos.writeByte(profileCompat)// profile compatibility + dos.writeByte(levelIdc) // AVC level + dos.writeByte(0xFF) // 6 bits reserved + 2 bits NAL length - 1 + + dos.writeByte(0xE1) // 3 bits reserved + 5 bits SPS count + dos.writeShort(sps.size) // SPS length + dos.write(sps) // SPS data + + dos.writeByte(1) // PPS count + dos.writeShort(pps.size) // PPS length + dos.write(pps) // PPS data + + return wrapBox("avcC", output.toByteArray()) + } + + private fun buildEmptySttsBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + dos.writeInt(0) // version & flags + dos.writeInt(0) // entry count + return wrapBox("stts", output.toByteArray()) + } + + private fun buildEmptyStscBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + dos.writeInt(0) // version & flags + dos.writeInt(0) // entry count + return wrapBox("stsc", output.toByteArray()) + } + + private fun buildEmptyStszBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + dos.writeInt(0) // version & flags + dos.writeInt(0) // sample size (0 = variable) + dos.writeInt(0) // sample count + return wrapBox("stsz", output.toByteArray()) + } + + private fun buildEmptyStcoBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + dos.writeInt(0) // version & flags + dos.writeInt(0) // entry count + return wrapBox("stco", output.toByteArray()) + } + + private fun buildMvexBox(): ByteArray { + return wrapBox("mvex", buildTrexBox()) + } + + private fun buildTrexBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + // Default sample duration: timescale / fps + // Since timescale = fps * 1000, duration = 1000 for any fps + val defaultSampleDuration = 1000 + + dos.writeInt(0) // version & flags + dos.writeInt(1) // track ID + dos.writeInt(1) // default sample description index + dos.writeInt(defaultSampleDuration) // default sample duration + dos.writeInt(0) // default sample size + dos.writeInt(0) // default sample flags + + return wrapBox("trex", output.toByteArray()) + } + + // ==================== Media Segment Building ==================== + + /** + * Builds a media segment (moof + mdat). + */ + private fun buildMediaSegment( + samples: List, + sequenceNumber: Int, + baseDecodeTimeUs: Long + ): ByteArray { + val output = ByteArrayOutputStream() + + // Build mdat content first to know sizes + val mdatContent = ByteArrayOutputStream() + for (sample in samples) { + mdatContent.write(sample.data) + } + val mdatPayload = mdatContent.toByteArray() + + // Build moof + val moofBox = buildMoofBox(samples, sequenceNumber, baseDecodeTimeUs, mdatPayload.size) + output.write(moofBox) + + // Build mdat + output.write(wrapBox("mdat", mdatPayload)) + + return output.toByteArray() + } + + private fun buildMoofBox( + samples: List, + sequenceNumber: Int, + baseDecodeTimeUs: Long, + mdatPayloadSize: Int + ): ByteArray { + // Calculate sizes to determine data offset + val mfhdBox = buildMfhdBox(sequenceNumber) + val tfhdSize = 8 + 8 // box header + content (version/flags + track_id) + val tfdtSize = 8 + 12 // box header + version 1 content + val trunSize = 8 + 12 + (samples.size * 12) // header + fixed + per-sample (no composition offset) + val trafSize = 8 + tfhdSize + tfdtSize + trunSize + val moofSize = 8 + mfhdBox.size + trafSize + + val dataOffset = moofSize + 8 // moof size + mdat header + + val content = ByteArrayOutputStream() + content.write(mfhdBox) + content.write(buildTrafBox(samples, baseDecodeTimeUs, dataOffset)) + + return wrapBox("moof", content.toByteArray()) + } + + private fun buildMfhdBox(sequenceNumber: Int): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(sequenceNumber) + + return wrapBox("mfhd", output.toByteArray()) + } + + private fun buildTrafBox(samples: List, baseDecodeTimeUs: Long, dataOffset: Int): ByteArray { + val content = ByteArrayOutputStream() + content.write(buildTfhdBox()) + content.write(buildTfdtBox(baseDecodeTimeUs)) + content.write(buildTrunBox(samples, dataOffset)) + return wrapBox("traf", content.toByteArray()) + } + + private fun buildTfhdBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + // Flags: default-base-is-moof (0x020000) + dos.writeInt(0x00020000) + dos.writeInt(1) // track ID + + return wrapBox("tfhd", output.toByteArray()) + } + + private fun buildTfdtBox(baseDecodeTimeUs: Long): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + // Convert to timescale units + val baseMediaDecodeTime = (baseDecodeTimeUs * timescale) / 1_000_000 + + // Version 1 for 64-bit time + dos.writeInt(0x01000000) + dos.writeLong(baseMediaDecodeTime) + + return wrapBox("tfdt", output.toByteArray()) + } + + private fun buildTrunBox(samples: List, dataOffset: Int): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + // Flags: data-offset + sample-duration + sample-size + sample-flags + val flags = 0x000001 or 0x000100 or 0x000200 or 0x000400 + dos.writeInt(flags) + dos.writeInt(samples.size) + dos.writeInt(dataOffset) + + // Use constant duration based on configured fps for consistent frame rate + // This ensures ffprobe reports correct fps instead of calculating from variable timing + val constantDuration = timescale / configuredFps // e.g., 30000/30 = 1000 ticks + Log.d(TAG, "Writing ${samples.size} samples with constant duration=${constantDuration} ticks (${configuredFps}fps)") + + for (sample in samples) { + dos.writeInt(constantDuration) + dos.writeInt(sample.data.size) + dos.writeInt(buildSampleFlags(sample.isKeyFrame)) + } + + return wrapBox("trun", output.toByteArray()) + } + + private fun buildSampleFlags(isKeyFrame: Boolean): Int { + return if (isKeyFrame) { + // sample_depends_on=2 (no dependencies), not a difference sample + 0x02000000 + } else { + // sample_depends_on=1 (depends on others), is a difference sample + 0x01010000 + } + } + + // ==================== Utilities ==================== + + private fun wrapBox(type: String, content: ByteArray): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(8 + content.size) + dos.writeBytes(type) + dos.write(content) + + return output.toByteArray() + } +} diff --git a/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt b/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt index 1d61188..fc2e2bb 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt @@ -14,6 +14,7 @@ import android.os.Environment import java.text.SimpleDateFormat import java.util.Locale import java.util.Date + class RecordingSession( context: Context, val cameraId: String, @@ -27,6 +28,8 @@ class RecordingSession( private val callback: (video: Video) -> Unit, private val onError: (error: CameraError) -> Unit, private val allCallbacks: CameraSession.Callback, + // Use FragmentedRecordingManager for HLS-compatible fMP4 output + private val useFragmentedMp4: Boolean = true ) { companion object { private const val TAG = "RecordingSession" @@ -34,6 +37,9 @@ class RecordingSession( private const val AUDIO_SAMPLING_RATE = 44_100 private const val AUDIO_BIT_RATE = 16 * AUDIO_SAMPLING_RATE private const val AUDIO_CHANNELS = 1 + + // Segment duration in seconds (matching iOS default of 6 seconds) + private const val SEGMENT_DURATION_SECONDS = 6 } data class Video(val path: String, val durationMs: Long, val size: Size) @@ -41,16 +47,33 @@ class RecordingSession( private val outputPath: File = File(filePath) private val bitRate = getBitRate() - private val recorder = ChunkedRecordingManager.fromParams( - allCallbacks, - size, - enableAudio, - fps, - cameraOrientation, - bitRate, - options, - outputPath - ) + + // Use FragmentedRecordingManager for HLS-compatible fMP4 output, + // or fall back to ChunkedRecordingManager for regular MP4 chunks + private val recorder: ChunkedRecorderInterface = if (useFragmentedMp4) { + FragmentedRecordingManager.fromParams( + allCallbacks, + size, + enableAudio, + fps, + cameraOrientation, + bitRate, + options, + outputPath, + SEGMENT_DURATION_SECONDS + ) + } else { + ChunkedRecordingManager.fromParams( + allCallbacks, + size, + enableAudio, + fps, + cameraOrientation, + bitRate, + options, + outputPath + ) + } private var startTime: Long? = null val surface: Surface get() {