diff --git a/package/android/build.gradle b/package/android/build.gradle index 1b3aa73..8a009fc 100644 --- a/package/android/build.gradle +++ b/package/android/build.gradle @@ -178,10 +178,6 @@ dependencies { implementation "com.facebook.react:react-android:+" implementation "org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3" - // Media3 muxer for fragmented MP4 (HLS-compatible) recording - implementation "androidx.media3:media3-muxer:1.5.0" - implementation "androidx.media3:media3-common:1.5.0" - if (enableCodeScanner) { // User enabled code-scanner, so we bundle the 2.4 MB model in the app. implementation 'com.google.mlkit:barcode-scanning:17.2.0' diff --git a/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt b/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt index 6157808..8ad49d7 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/FragmentedRecordingManager.kt @@ -7,37 +7,25 @@ import android.media.MediaFormat import android.util.Log import android.util.Size import android.view.Surface -import androidx.media3.common.Format -import androidx.media3.common.MimeTypes -import androidx.media3.common.util.UnstableApi -import androidx.media3.muxer.FragmentedMp4Muxer -import androidx.media3.muxer.Muxer import com.mrousavy.camera.types.Orientation import com.mrousavy.camera.types.RecordVideoOptions import java.io.File -import java.io.FileOutputStream -import java.nio.ByteBuffer /** * A recording manager that produces HLS-compatible fragmented MP4 segments. * - * This produces output similar to the iOS implementation: - * - An initialization segment (init.mp4) containing codec configuration - * - Numbered data segments (0.mp4, 1.mp4, ...) containing media data - * - * Uses AndroidX Media3's FragmentedMp4Muxer which produces proper fMP4 output. + * Uses HlsMuxer (following Android's MediaMuxer pattern) to produce: + * - init.mp4: Initialization segment (ftyp + moov with mvex) + * - 0.mp4, 1.mp4, ...: Media segments (moof + mdat) */ -@UnstableApi class FragmentedRecordingManager( private val encoder: MediaCodec, - private val outputDirectory: File, - private val orientationDegrees: Int, - private val targetSegmentDurationUs: Long, - private val callbacks: CameraSession.Callback + private val muxer: HlsMuxer ) : MediaCodec.Callback(), ChunkedRecorderInterface { companion object { private const val TAG = "FragmentedRecorder" + private const val DEFAULT_SEGMENT_DURATION_SECONDS = 6 fun fromParams( callbacks: CameraSession.Callback, @@ -48,17 +36,20 @@ class FragmentedRecordingManager( bitRate: Int, options: RecordVideoOptions, outputDirectory: File, - segmentDurationSeconds: Int = 6 + segmentDurationSeconds: Int = DEFAULT_SEGMENT_DURATION_SECONDS ): FragmentedRecordingManager { val mimeType = options.videoCodec.toMimeType() val cameraOrientationDegrees = cameraOrientation.toDegrees() val recordingOrientationDegrees = (options.orientation ?: Orientation.PORTRAIT).toDegrees() - val (width, height) = if (cameraOrientation.isLandscape()) { - size.height to size.width - } else { - size.width to size.height - } + // Use size dimensions directly - the encoder output format will have the actual dimensions + // Don't swap based on orientation here; the camera pipeline handles that + val width = size.width + val height = size.height + + Log.d(TAG, "Input size: ${size.width}x${size.height}, " + + "cameraOrientation: $cameraOrientation ($cameraOrientationDegrees°), " + + "recordingOrientation: $recordingOrientationDegrees°") val format = MediaFormat.createVideoFormat(mimeType, width, height) val codec = MediaCodec.createEncoderByType(mimeType) @@ -67,121 +58,48 @@ class FragmentedRecordingManager( MediaFormat.KEY_COLOR_FORMAT, MediaCodecInfo.CodecCapabilities.COLOR_FormatSurface ) - fps?.apply { - format.setInteger(MediaFormat.KEY_FRAME_RATE, this) - } - // I-frame interval affects segment boundaries + + val effectiveFps = fps ?: 30 + format.setInteger(MediaFormat.KEY_FRAME_RATE, effectiveFps) format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, segmentDurationSeconds) format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate) - Log.d(TAG, "Video Format: $format, camera orientation $cameraOrientationDegrees, recordingOrientation: $recordingOrientationDegrees") + Log.d(TAG, "Video Format: $format, orientation: $recordingOrientationDegrees") codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE) - return FragmentedRecordingManager( - codec, - outputDirectory, - recordingOrientationDegrees, - segmentDurationSeconds * 1_000_000L, - callbacks + // Create muxer with callbacks and orientation + val muxer = HlsMuxer( + outputDirectory = outputDirectory, + callback = object : HlsMuxer.Callback { + override fun onInitSegmentReady(file: File) { + callbacks.onInitSegmentReady(file) + } + + override fun onMediaSegmentReady(file: File, index: Int, durationUs: Long) { + callbacks.onVideoChunkReady(file, index, durationUs) + } + }, + orientationDegrees = recordingOrientationDegrees ) + muxer.setSegmentDuration(segmentDurationSeconds * 1_000_000L) + + Log.d(TAG, "Created HlsMuxer with orientation: $recordingOrientationDegrees degrees") + + return FragmentedRecordingManager(codec, muxer) } } - // State management - private var chunkIndex = 0 - private var encodedFormat: MediaFormat? = null private var recording = false - - // Segment tracking - private var segmentContext: SegmentContext? = null - private var initSegmentEmitted = false + private var muxerStarted = false + private var trackIndex = -1 override val surface: Surface = encoder.createInputSurface() init { - if (!outputDirectory.exists()) { - outputDirectory.mkdirs() - } encoder.setCallback(this) } - /** - * Context for a single data segment being written. - * Init segments are created separately via createInitSegment(). - */ - private inner class SegmentContext( - private val format: MediaFormat, - private val segmentIndex: Int - ) { - private val filename = "$segmentIndex.mp4" - private val file = File(outputDirectory, filename) - private val outputStream = FileOutputStream(file) - private val muxer = FragmentedMp4Muxer.Builder(outputStream).build() - private lateinit var videoTrack: Muxer.TrackToken - private var startTimeUs: Long = -1L - private var lastTimeUs: Long = 0L - private var sampleCount = 0 - - init { - val media3Format = convertToMedia3Format(format) - videoTrack = muxer.addTrack(media3Format) - Log.d(TAG, "Created segment context: $filename") - } - - fun writeSample(buffer: ByteBuffer, bufferInfo: BufferInfo): Boolean { - if (startTimeUs < 0) { - startTimeUs = bufferInfo.presentationTimeUs - } - lastTimeUs = bufferInfo.presentationTimeUs - - val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0 - - muxer.writeSampleData(videoTrack, buffer, bufferInfo) - sampleCount++ - - // Check if we should start a new segment at the next keyframe - if (isKeyFrame && sampleCount > 1) { - val segmentDurationUs = bufferInfo.presentationTimeUs - startTimeUs - if (segmentDurationUs >= targetSegmentDurationUs) { - return true // Signal to create new segment - } - } - - return false - } - - fun finish(): Long { - try { - muxer.close() - outputStream.close() - } catch (e: Exception) { - Log.e(TAG, "Error closing segment", e) - } - - val durationUs = if (lastTimeUs > startTimeUs) lastTimeUs - startTimeUs else 0L - callbacks.onVideoChunkReady(file, segmentIndex, durationUs) - - Log.d(TAG, "Finished segment: $filename, samples=$sampleCount, duration=${durationUs/1000}ms") - return durationUs - } - } - - private fun createNewSegment() { - val format = encodedFormat - if (format == null) { - Log.e(TAG, "Cannot create segment: encodedFormat is null") - return - } - - // Close previous segment - segmentContext?.finish() - - // Create new data segment (init segments are created separately) - segmentContext = SegmentContext(format, chunkIndex) - chunkIndex++ - } - override fun start() { encoder.start() recording = true @@ -190,8 +108,12 @@ class FragmentedRecordingManager( override fun finish() { synchronized(this) { recording = false - segmentContext?.finish() - segmentContext = null + + if (muxerStarted) { + muxer.stop() + muxer.release() + } + try { encoder.stop() encoder.release() @@ -202,6 +124,7 @@ class FragmentedRecordingManager( } // MediaCodec.Callback methods + override fun onInputBufferAvailable(codec: MediaCodec, index: Int) { // Not used for Surface input } @@ -213,37 +136,20 @@ class FragmentedRecordingManager( return } - val encodedData = encoder.getOutputBuffer(index) - if (encodedData == null) { + if (!muxerStarted) { + encoder.releaseOutputBuffer(index, false) + return + } + + val buffer = encoder.getOutputBuffer(index) + if (buffer == null) { Log.e(TAG, "getOutputBuffer returned null") encoder.releaseOutputBuffer(index, false) return } - // Wait until init segment is emitted (happens in onOutputFormatChanged) - if (!initSegmentEmitted) { - encoder.releaseOutputBuffer(index, false) - return - } - - // Create first data segment if needed - if (segmentContext == null) { - createNewSegment() - } - - val context = segmentContext - if (context == null) { - encoder.releaseOutputBuffer(index, false) - return - } - try { - val shouldStartNewSegment = context.writeSample(encodedData, bufferInfo) - if (shouldStartNewSegment) { - createNewSegment() - // Write this keyframe to the new segment as well - segmentContext?.writeSample(encodedData, bufferInfo) - } + muxer.writeSampleData(trackIndex, buffer, bufferInfo) } catch (e: Exception) { Log.e(TAG, "Error writing sample", e) } @@ -257,76 +163,12 @@ class FragmentedRecordingManager( } override fun onOutputFormatChanged(codec: MediaCodec, format: MediaFormat) { - Log.i(TAG, "Output format changed: $format") - encodedFormat = format + synchronized(this) { + Log.i(TAG, "Output format changed: $format") - // Create the init segment immediately when we get the format - // This produces an fMP4 file with just ftyp + moov (no samples) - if (!initSegmentEmitted) { - createInitSegment(format) - initSegmentEmitted = true + trackIndex = muxer.addTrack(format) + muxer.start() + muxerStarted = true } } - - /** - * Creates an initialization segment containing only codec configuration (ftyp + moov). - * This is done by creating a muxer, adding the track, and immediately closing it - * without writing any samples. - */ - private fun createInitSegment(format: MediaFormat) { - val initFile = File(outputDirectory, "init.mp4") - try { - val outputStream = FileOutputStream(initFile) - val muxer = FragmentedMp4Muxer.Builder(outputStream).build() - - // Convert and add the track - val media3Format = convertToMedia3Format(format) - muxer.addTrack(media3Format) - - // Close immediately - this writes just the header (ftyp + moov) - muxer.close() - outputStream.close() - - Log.d(TAG, "Created init segment: ${initFile.absolutePath}") - callbacks.onInitSegmentReady(initFile) - } catch (e: Exception) { - Log.e(TAG, "Error creating init segment", e) - } - } - - private fun convertToMedia3Format(mediaFormat: MediaFormat): Format { - val mimeType = mediaFormat.getString(MediaFormat.KEY_MIME) ?: MimeTypes.VIDEO_H264 - val width = mediaFormat.getInteger(MediaFormat.KEY_WIDTH) - val height = mediaFormat.getInteger(MediaFormat.KEY_HEIGHT) - val bitRate = try { mediaFormat.getInteger(MediaFormat.KEY_BIT_RATE) } catch (e: Exception) { -1 } - val frameRate = try { mediaFormat.getInteger(MediaFormat.KEY_FRAME_RATE) } catch (e: Exception) { -1 } - - // Get CSD (Codec Specific Data) if available - required for init segment - val csd0 = mediaFormat.getByteBuffer("csd-0") - val csd1 = mediaFormat.getByteBuffer("csd-1") - - val initData = mutableListOf() - csd0?.let { - val bytes = ByteArray(it.remaining()) - it.duplicate().get(bytes) - initData.add(bytes) - } - csd1?.let { - val bytes = ByteArray(it.remaining()) - it.duplicate().get(bytes) - initData.add(bytes) - } - - return Format.Builder() - .setSampleMimeType(mimeType) - .setWidth(width) - .setHeight(height) - .setRotationDegrees(orientationDegrees) - .apply { - if (bitRate > 0) setAverageBitrate(bitRate) - if (frameRate > 0) setFrameRate(frameRate.toFloat()) - if (initData.isNotEmpty()) setInitializationData(initData) - } - .build() - } } diff --git a/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt b/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt new file mode 100644 index 0000000..5c68668 --- /dev/null +++ b/package/android/src/main/java/com/mrousavy/camera/core/HlsMuxer.kt @@ -0,0 +1,857 @@ +package com.mrousavy.camera.core + +import android.media.MediaCodec +import android.media.MediaFormat +import android.util.Log +import java.io.ByteArrayOutputStream +import java.io.DataOutputStream +import java.io.File +import java.io.FileOutputStream +import java.nio.ByteBuffer + +/** + * A muxer for creating HLS-compatible fragmented MP4 output. + * + * Follows the same pattern as Android's MediaMuxer: + * 1. Create muxer with output directory + * 2. addTrack() with MediaFormat + * 3. start() - writes init.mp4 + * 4. writeSampleData() for each encoded sample + * 5. stop() - finalizes last segment + * 6. release() - cleanup + * + * Produces: + * - init.mp4: Initialization segment (ftyp + moov with mvex) + * - 0.mp4, 1.mp4, ...: Media segments (moof + mdat) + */ +class HlsMuxer( + private val outputDirectory: File, + private val callback: Callback, + private val orientationDegrees: Int = 0 +) { + companion object { + private const val TAG = "HlsMuxer" + private const val DEFAULT_SEGMENT_DURATION_US = 6_000_000L // 6 seconds + } + + interface Callback { + fun onInitSegmentReady(file: File) + fun onMediaSegmentReady(file: File, index: Int, durationUs: Long) + } + + // Configuration + private var targetSegmentDurationUs: Long = DEFAULT_SEGMENT_DURATION_US + private var timescale: Int = 30000 // Default, updated from format + + // State + private var state = State.UNINITIALIZED + private var trackFormat: MediaFormat? = null + private var sequenceNumber = 1 + private var segmentIndex = 0 + + // Current segment data + private val pendingSamples = mutableListOf() + private var segmentStartTimeUs = -1L + private var lastPresentationTimeUs = 0L + + private enum class State { + UNINITIALIZED, + INITIALIZED, + STARTED, + STOPPED, + RELEASED + } + + private data class Sample( + val data: ByteArray, + val presentationTimeUs: Long, + var durationUs: Long, + val isKeyFrame: Boolean + ) + + // ==================== Annex-B to AVCC Conversion ==================== + + /** + * Converts H.264 data from Annex-B format to AVCC format. + * + * Annex-B uses start codes (00 00 00 01 or 00 00 01) to delimit NAL units. + * AVCC uses 4-byte big-endian length prefixes before each NAL unit. + * + * This conversion is required because: + * - MediaCodec outputs Annex-B format + * - fMP4/HLS requires AVCC format (as specified in avcC box with NAL length size = 4) + */ + private fun convertAnnexBToAvcc(annexBData: ByteArray): ByteArray { + val nalUnits = parseAnnexBNalUnits(annexBData) + if (nalUnits.isEmpty()) { + Log.w(TAG, "No NAL units found in sample, returning original data") + return annexBData + } + + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + for (nalUnit in nalUnits) { + // Write 4-byte big-endian length prefix + dos.writeInt(nalUnit.size) + // Write NAL unit data (without start code) + dos.write(nalUnit) + } + + return output.toByteArray() + } + + /** + * Parses Annex-B formatted data into individual NAL units. + * Returns list of NAL unit byte arrays (without start codes). + */ + private fun parseAnnexBNalUnits(data: ByteArray): List { + val nalUnits = mutableListOf() + var i = 0 + + while (i < data.size) { + // Find start code + val startCodeLength = findStartCode(data, i) + if (startCodeLength == 0) { + // No start code found at current position + // This might happen if data doesn't start with a start code + if (nalUnits.isEmpty() && i == 0) { + // Data might already be in AVCC format or malformed + // Try to detect AVCC format (first 4 bytes would be a reasonable length) + if (data.size >= 4) { + val possibleLength = ((data[0].toInt() and 0xFF) shl 24) or + ((data[1].toInt() and 0xFF) shl 16) or + ((data[2].toInt() and 0xFF) shl 8) or + (data[3].toInt() and 0xFF) + if (possibleLength > 0 && possibleLength <= data.size - 4) { + // Looks like AVCC format already, return original + Log.d(TAG, "Data appears to already be in AVCC format") + return emptyList() + } + } + } + i++ + continue + } + + val nalStart = i + startCodeLength + + // Find end of this NAL unit (start of next, or end of data) + var nalEnd = data.size + var j = nalStart + while (j < data.size - 2) { + val nextStartCode = findStartCode(data, j) + if (nextStartCode > 0) { + nalEnd = j + break + } + j++ + } + + if (nalEnd > nalStart) { + nalUnits.add(data.copyOfRange(nalStart, nalEnd)) + } + + i = nalEnd + } + + return nalUnits + } + + /** + * Checks for Annex-B start code at given position. + * Returns start code length (3 or 4) or 0 if no start code found. + */ + private fun findStartCode(data: ByteArray, offset: Int): Int { + if (offset + 4 <= data.size && + data[offset] == 0.toByte() && + data[offset + 1] == 0.toByte() && + data[offset + 2] == 0.toByte() && + data[offset + 3] == 1.toByte()) { + return 4 // 4-byte start code: 00 00 00 01 + } + if (offset + 3 <= data.size && + data[offset] == 0.toByte() && + data[offset + 1] == 0.toByte() && + data[offset + 2] == 1.toByte()) { + return 3 // 3-byte start code: 00 00 01 + } + return 0 + } + + /** + * Sets the target segment duration. + * Must be called before start(). + */ + fun setSegmentDuration(durationUs: Long) { + check(state == State.UNINITIALIZED || state == State.INITIALIZED) { + "Cannot set segment duration after start()" + } + targetSegmentDurationUs = durationUs + } + + /** + * Adds a track to the muxer. + * + * @param format The MediaFormat describing the track + * @return Track index (always 0 for now, single video track) + */ + fun addTrack(format: MediaFormat): Int { + check(state == State.UNINITIALIZED) { "addTrack() must be called before start()" } + + trackFormat = format + + // Extract timescale from frame rate + val fps = try { + format.getInteger(MediaFormat.KEY_FRAME_RATE) + } catch (e: Exception) { + 30 + } + timescale = fps * 1000 // Use fps * 1000 for good precision + + state = State.INITIALIZED + + val formatWidth = try { format.getInteger(MediaFormat.KEY_WIDTH) } catch (e: Exception) { -1 } + val formatHeight = try { format.getInteger(MediaFormat.KEY_HEIGHT) } catch (e: Exception) { -1 } + Log.d(TAG, "Added track: ${format.getString(MediaFormat.KEY_MIME)}, " + + "encoder output: ${formatWidth}x${formatHeight}, " + + "timescale=$timescale, orientation=$orientationDegrees°") + + return 0 // Single track, index 0 + } + + /** + * Starts the muxer, writing the initialization segment. + */ + fun start() { + check(state == State.INITIALIZED) { "Must call addTrack() before start()" } + val format = trackFormat ?: throw IllegalStateException("No track format") + + // Create output directory if needed + if (!outputDirectory.exists()) { + outputDirectory.mkdirs() + } + + // Write init segment + val initBytes = buildInitSegment(format) + val initFile = File(outputDirectory, "init.mp4") + FileOutputStream(initFile).use { it.write(initBytes) } + Log.d(TAG, "Created init segment: ${initFile.absolutePath} (${initBytes.size} bytes)") + callback.onInitSegmentReady(initFile) + + state = State.STARTED + } + + /** + * Writes sample data to the muxer. + * + * @param trackIndex Track index (must be 0) + * @param buffer The encoded sample data + * @param bufferInfo Sample metadata (size, presentation time, flags) + */ + fun writeSampleData(trackIndex: Int, buffer: ByteBuffer, bufferInfo: MediaCodec.BufferInfo) { + check(state == State.STARTED) { "Must call start() before writeSampleData()" } + check(trackIndex == 0) { "Invalid track index: $trackIndex" } + + // Skip codec config data (already in init segment) + if ((bufferInfo.flags and MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0) { + return + } + + val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0 + val presentationTimeUs = bufferInfo.presentationTimeUs + + // Initialize segment start time + if (segmentStartTimeUs < 0) { + segmentStartTimeUs = presentationTimeUs + } + + // Check if we should finalize current segment (at keyframe boundaries) + if (isKeyFrame && pendingSamples.isNotEmpty()) { + val segmentDurationUs = presentationTimeUs - segmentStartTimeUs + if (segmentDurationUs >= targetSegmentDurationUs) { + finalizeCurrentSegment() + segmentStartTimeUs = presentationTimeUs + } + } + + // Copy buffer data and convert from Annex-B to AVCC format + val rawData = ByteArray(bufferInfo.size) + buffer.position(bufferInfo.offset) + buffer.limit(bufferInfo.offset + bufferInfo.size) + buffer.get(rawData) + + // Convert Annex-B (start codes) to AVCC (length prefixes) + val data = convertAnnexBToAvcc(rawData) + + // Update duration of previous sample + if (pendingSamples.isNotEmpty()) { + val lastSample = pendingSamples.last() + lastSample.durationUs = presentationTimeUs - lastSample.presentationTimeUs + } + + // Estimate duration (will be corrected by next sample) + val estimatedDurationUs = if (lastPresentationTimeUs > 0) { + presentationTimeUs - lastPresentationTimeUs + } else { + 1_000_000L / 30 // Assume 30fps + } + + pendingSamples.add(Sample( + data = data, + presentationTimeUs = presentationTimeUs, + durationUs = estimatedDurationUs, + isKeyFrame = isKeyFrame + )) + + lastPresentationTimeUs = presentationTimeUs + } + + /** + * Stops the muxer, finalizing any pending segment. + */ + fun stop() { + check(state == State.STARTED) { "Muxer not started" } + + if (pendingSamples.isNotEmpty()) { + finalizeCurrentSegment() + } + + state = State.STOPPED + Log.d(TAG, "Muxer stopped, wrote $segmentIndex segments") + } + + /** + * Releases resources. + */ + fun release() { + if (state == State.STARTED) { + stop() + } + pendingSamples.clear() + state = State.RELEASED + } + + /** + * Finalizes the current segment and writes it to disk. + */ + private fun finalizeCurrentSegment() { + if (pendingSamples.isEmpty()) return + + try { + val baseDecodeTimeUs = pendingSamples.first().presentationTimeUs + val fragmentBytes = buildMediaSegment(pendingSamples, sequenceNumber, baseDecodeTimeUs) + + val segmentFile = File(outputDirectory, "$segmentIndex.mp4") + FileOutputStream(segmentFile).use { it.write(fragmentBytes) } + + // Calculate duration + val firstPts = pendingSamples.first().presentationTimeUs + val lastSample = pendingSamples.last() + val durationUs = (lastSample.presentationTimeUs - firstPts) + lastSample.durationUs + + Log.d(TAG, "Created segment $segmentIndex: samples=${pendingSamples.size}, " + + "duration=${durationUs / 1000}ms, size=${fragmentBytes.size} bytes") + + callback.onMediaSegmentReady(segmentFile, segmentIndex, durationUs) + + segmentIndex++ + sequenceNumber++ + pendingSamples.clear() + + } catch (e: Exception) { + Log.e(TAG, "Error finalizing segment $segmentIndex", e) + } + } + + // ==================== Init Segment Building ==================== + + /** + * Builds the initialization segment (ftyp + moov). + */ + private fun buildInitSegment(format: MediaFormat): ByteArray { + val width = format.getInteger(MediaFormat.KEY_WIDTH) + val height = format.getInteger(MediaFormat.KEY_HEIGHT) + + val sps = format.getByteBuffer("csd-0")?.let { extractNalUnit(it) } + ?: throw IllegalArgumentException("Missing SPS (csd-0)") + val pps = format.getByteBuffer("csd-1")?.let { extractNalUnit(it) } + ?: throw IllegalArgumentException("Missing PPS (csd-1)") + + val output = ByteArrayOutputStream() + + // ftyp + output.write(buildFtypBox()) + + // moov + output.write(buildMoovBox(width, height, sps, pps)) + + return output.toByteArray() + } + + private fun extractNalUnit(buffer: ByteBuffer): ByteArray { + val data = ByteArray(buffer.remaining()) + buffer.duplicate().get(data) + + // Strip start code prefix (0x00000001 or 0x000001) + return when { + data.size >= 4 && data[0] == 0.toByte() && data[1] == 0.toByte() && + data[2] == 0.toByte() && data[3] == 1.toByte() -> data.copyOfRange(4, data.size) + data.size >= 3 && data[0] == 0.toByte() && data[1] == 0.toByte() && + data[2] == 1.toByte() -> data.copyOfRange(3, data.size) + else -> data + } + } + + private fun buildFtypBox(): ByteArray { + val brands = listOf("isom", "iso5", "iso6", "avc1", "mp41", "dash") + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + val size = 8 + 4 + 4 + (brands.size * 4) + dos.writeInt(size) + dos.writeBytes("ftyp") + dos.writeBytes("isom") // major brand + dos.writeInt(0x200) // minor version + brands.forEach { dos.writeBytes(it) } + + return output.toByteArray() + } + + private fun buildMoovBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val content = ByteArrayOutputStream() + + content.write(buildMvhdBox()) + content.write(buildTrakBox(width, height, sps, pps)) + content.write(buildMvexBox()) + + return wrapBox("moov", content.toByteArray()) + } + + private fun buildMvhdBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(0) // creation time + dos.writeInt(0) // modification time + dos.writeInt(timescale) // timescale + dos.writeInt(0) // duration + dos.writeInt(0x00010000) // rate = 1.0 + dos.writeShort(0x0100) // volume = 1.0 + dos.writeShort(0) // reserved + dos.writeInt(0) // reserved + dos.writeInt(0) // reserved + + // Unity matrix + dos.writeInt(0x00010000); dos.writeInt(0); dos.writeInt(0) + dos.writeInt(0); dos.writeInt(0x00010000); dos.writeInt(0) + dos.writeInt(0); dos.writeInt(0); dos.writeInt(0x40000000) + + repeat(6) { dos.writeInt(0) } // pre-defined + dos.writeInt(2) // next track ID + + return wrapBox("mvhd", output.toByteArray()) + } + + private fun buildTrakBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val content = ByteArrayOutputStream() + content.write(buildTkhdBox(width, height)) + content.write(buildMdiaBox(width, height, sps, pps)) + return wrapBox("trak", content.toByteArray()) + } + + private fun buildTkhdBox(width: Int, height: Int): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0x00000007) // version 0, flags (enabled, in movie, in preview) + dos.writeInt(0) // creation time + dos.writeInt(0) // modification time + dos.writeInt(1) // track ID + dos.writeInt(0) // reserved + dos.writeInt(0) // duration + dos.writeInt(0) // reserved + dos.writeInt(0) // reserved + dos.writeShort(0) // layer + dos.writeShort(0) // alternate group + dos.writeShort(0) // volume (0 for video) + dos.writeShort(0) // reserved + + // Rotation matrix - use identity and rely on correct dimensions from encoder + // The encoder output format already has the correct dimensions for the content + writeRotationMatrix(dos) + + // Use dimensions as-is from encoder output format + dos.writeInt(width shl 16) // width (16.16 fixed point) + dos.writeInt(height shl 16) // height (16.16 fixed point) + + Log.d(TAG, "tkhd: ${width}x${height}, rotation=$orientationDegrees") + + return wrapBox("tkhd", output.toByteArray()) + } + + /** + * Writes the 3x3 transformation matrix for video rotation. + * Uses simple rotation values - the encoder already outputs correctly oriented frames. + */ + private fun writeRotationMatrix(dos: DataOutputStream) { + // Fixed-point constants + val one = 0x00010000 // 1.0 in 16.16 + val w = 0x40000000 // 1.0 in 2.30 + + // Identity matrix - no transformation + // Most HLS players handle rotation via the dimensions themselves + // or we can add rotation metadata separately if needed + dos.writeInt(one) // a = 1 + dos.writeInt(0) // b = 0 + dos.writeInt(0) // u = 0 + dos.writeInt(0) // c = 0 + dos.writeInt(one) // d = 1 + dos.writeInt(0) // v = 0 + dos.writeInt(0) // x = 0 + dos.writeInt(0) // y = 0 + dos.writeInt(w) // w = 1 + } + + private fun buildMdiaBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val content = ByteArrayOutputStream() + content.write(buildMdhdBox()) + content.write(buildHdlrBox()) + content.write(buildMinfBox(width, height, sps, pps)) + return wrapBox("mdia", content.toByteArray()) + } + + private fun buildMdhdBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(0) // creation time + dos.writeInt(0) // modification time + dos.writeInt(timescale) // timescale + dos.writeInt(0) // duration + dos.writeShort(0x55C4) // language: "und" + dos.writeShort(0) // pre-defined + + return wrapBox("mdhd", output.toByteArray()) + } + + private fun buildHdlrBox(): ByteArray { + val name = "VideoHandler" + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(0) // pre-defined + dos.writeBytes("vide") // handler type + dos.writeInt(0) // reserved + dos.writeInt(0) // reserved + dos.writeInt(0) // reserved + dos.writeBytes(name) + dos.writeByte(0) // null terminator + + return wrapBox("hdlr", output.toByteArray()) + } + + private fun buildMinfBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val content = ByteArrayOutputStream() + content.write(buildVmhdBox()) + content.write(buildDinfBox()) + content.write(buildStblBox(width, height, sps, pps)) + return wrapBox("minf", content.toByteArray()) + } + + private fun buildVmhdBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(1) // version 0, flags = 1 + dos.writeShort(0) // graphics mode + dos.writeShort(0) // opcolor[0] + dos.writeShort(0) // opcolor[1] + dos.writeShort(0) // opcolor[2] + + return wrapBox("vmhd", output.toByteArray()) + } + + private fun buildDinfBox(): ByteArray { + val dref = buildDrefBox() + return wrapBox("dinf", dref) + } + + private fun buildDrefBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(1) // entry count + + // url box (self-contained) + dos.writeInt(12) + dos.writeBytes("url ") + dos.writeInt(1) // flags: self-contained + + return wrapBox("dref", output.toByteArray()) + } + + private fun buildStblBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val content = ByteArrayOutputStream() + content.write(buildStsdBox(width, height, sps, pps)) + content.write(buildEmptySttsBox()) + content.write(buildEmptyStscBox()) + content.write(buildEmptyStszBox()) + content.write(buildEmptyStcoBox()) + return wrapBox("stbl", content.toByteArray()) + } + + private fun buildStsdBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(1) // entry count + output.write(buildAvc1Box(width, height, sps, pps)) + + return wrapBox("stsd", output.toByteArray()) + } + + private fun buildAvc1Box(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + repeat(6) { dos.writeByte(0) } // reserved + dos.writeShort(1) // data reference index + dos.writeShort(0) // pre-defined + dos.writeShort(0) // reserved + repeat(3) { dos.writeInt(0) } // pre-defined + dos.writeShort(width) // width + dos.writeShort(height) // height + dos.writeInt(0x00480000) // horiz resolution (72 dpi) + dos.writeInt(0x00480000) // vert resolution (72 dpi) + dos.writeInt(0) // reserved + dos.writeShort(1) // frame count + repeat(32) { dos.writeByte(0) } // compressor name + dos.writeShort(0x0018) // depth (24 bit) + dos.writeShort(-1) // pre-defined + + output.write(buildAvcCBox(sps, pps)) + + return wrapBox("avc1", output.toByteArray()) + } + + private fun buildAvcCBox(sps: ByteArray, pps: ByteArray): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + val profileIdc = if (sps.isNotEmpty()) sps[0].toInt() and 0xFF else 0x42 + val profileCompat = if (sps.size > 1) sps[1].toInt() and 0xFF else 0x00 + val levelIdc = if (sps.size > 2) sps[2].toInt() and 0xFF else 0x1F + + dos.writeByte(1) // configuration version + dos.writeByte(profileIdc) // AVC profile + dos.writeByte(profileCompat)// profile compatibility + dos.writeByte(levelIdc) // AVC level + dos.writeByte(0xFF) // 6 bits reserved + 2 bits NAL length - 1 + + dos.writeByte(0xE1) // 3 bits reserved + 5 bits SPS count + dos.writeShort(sps.size) // SPS length + dos.write(sps) // SPS data + + dos.writeByte(1) // PPS count + dos.writeShort(pps.size) // PPS length + dos.write(pps) // PPS data + + return wrapBox("avcC", output.toByteArray()) + } + + private fun buildEmptySttsBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + dos.writeInt(0) // version & flags + dos.writeInt(0) // entry count + return wrapBox("stts", output.toByteArray()) + } + + private fun buildEmptyStscBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + dos.writeInt(0) // version & flags + dos.writeInt(0) // entry count + return wrapBox("stsc", output.toByteArray()) + } + + private fun buildEmptyStszBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + dos.writeInt(0) // version & flags + dos.writeInt(0) // sample size (0 = variable) + dos.writeInt(0) // sample count + return wrapBox("stsz", output.toByteArray()) + } + + private fun buildEmptyStcoBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + dos.writeInt(0) // version & flags + dos.writeInt(0) // entry count + return wrapBox("stco", output.toByteArray()) + } + + private fun buildMvexBox(): ByteArray { + return wrapBox("mvex", buildTrexBox()) + } + + private fun buildTrexBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(1) // track ID + dos.writeInt(1) // default sample description index + dos.writeInt(0) // default sample duration + dos.writeInt(0) // default sample size + dos.writeInt(0) // default sample flags + + return wrapBox("trex", output.toByteArray()) + } + + // ==================== Media Segment Building ==================== + + /** + * Builds a media segment (moof + mdat). + */ + private fun buildMediaSegment( + samples: List, + sequenceNumber: Int, + baseDecodeTimeUs: Long + ): ByteArray { + val output = ByteArrayOutputStream() + + // Build mdat content first to know sizes + val mdatContent = ByteArrayOutputStream() + for (sample in samples) { + mdatContent.write(sample.data) + } + val mdatPayload = mdatContent.toByteArray() + + // Build moof + val moofBox = buildMoofBox(samples, sequenceNumber, baseDecodeTimeUs, mdatPayload.size) + output.write(moofBox) + + // Build mdat + output.write(wrapBox("mdat", mdatPayload)) + + return output.toByteArray() + } + + private fun buildMoofBox( + samples: List, + sequenceNumber: Int, + baseDecodeTimeUs: Long, + mdatPayloadSize: Int + ): ByteArray { + // Calculate sizes to determine data offset + val mfhdBox = buildMfhdBox(sequenceNumber) + val tfhdSize = 8 + 8 // box header + content (version/flags + track_id) + val tfdtSize = 8 + 12 // box header + version 1 content + val trunSize = 8 + 12 + (samples.size * 12) // header + fixed + per-sample (no composition offset) + val trafSize = 8 + tfhdSize + tfdtSize + trunSize + val moofSize = 8 + mfhdBox.size + trafSize + + val dataOffset = moofSize + 8 // moof size + mdat header + + val content = ByteArrayOutputStream() + content.write(mfhdBox) + content.write(buildTrafBox(samples, baseDecodeTimeUs, dataOffset)) + + return wrapBox("moof", content.toByteArray()) + } + + private fun buildMfhdBox(sequenceNumber: Int): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(0) // version & flags + dos.writeInt(sequenceNumber) + + return wrapBox("mfhd", output.toByteArray()) + } + + private fun buildTrafBox(samples: List, baseDecodeTimeUs: Long, dataOffset: Int): ByteArray { + val content = ByteArrayOutputStream() + content.write(buildTfhdBox()) + content.write(buildTfdtBox(baseDecodeTimeUs)) + content.write(buildTrunBox(samples, dataOffset)) + return wrapBox("traf", content.toByteArray()) + } + + private fun buildTfhdBox(): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + // Flags: default-base-is-moof (0x020000) + dos.writeInt(0x00020000) + dos.writeInt(1) // track ID + + return wrapBox("tfhd", output.toByteArray()) + } + + private fun buildTfdtBox(baseDecodeTimeUs: Long): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + // Convert to timescale units + val baseMediaDecodeTime = (baseDecodeTimeUs * timescale) / 1_000_000 + + // Version 1 for 64-bit time + dos.writeInt(0x01000000) + dos.writeLong(baseMediaDecodeTime) + + return wrapBox("tfdt", output.toByteArray()) + } + + private fun buildTrunBox(samples: List, dataOffset: Int): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + // Flags: data-offset + sample-duration + sample-size + sample-flags + val flags = 0x000001 or 0x000100 or 0x000200 or 0x000400 + dos.writeInt(flags) + dos.writeInt(samples.size) + dos.writeInt(dataOffset) + + for (sample in samples) { + // Convert duration to timescale units + val durationInTimescale = ((sample.durationUs * timescale) / 1_000_000).toInt() + dos.writeInt(durationInTimescale) + dos.writeInt(sample.data.size) + dos.writeInt(buildSampleFlags(sample.isKeyFrame)) + } + + return wrapBox("trun", output.toByteArray()) + } + + private fun buildSampleFlags(isKeyFrame: Boolean): Int { + return if (isKeyFrame) { + // sample_depends_on=2 (no dependencies), not a difference sample + 0x02000000 + } else { + // sample_depends_on=1 (depends on others), is a difference sample + 0x01010000 + } + } + + // ==================== Utilities ==================== + + private fun wrapBox(type: String, content: ByteArray): ByteArray { + val output = ByteArrayOutputStream() + val dos = DataOutputStream(output) + + dos.writeInt(8 + content.size) + dos.writeBytes(type) + dos.write(content) + + return output.toByteArray() + } +} diff --git a/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt b/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt index 4f3331e..fc2e2bb 100644 --- a/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt +++ b/package/android/src/main/java/com/mrousavy/camera/core/RecordingSession.kt @@ -4,7 +4,6 @@ import android.content.Context import android.util.Log import android.util.Size import android.view.Surface -import androidx.media3.common.util.UnstableApi import com.facebook.common.statfs.StatFsHelper import com.mrousavy.camera.extensions.getRecommendedBitRate import com.mrousavy.camera.types.Orientation @@ -16,7 +15,6 @@ import java.text.SimpleDateFormat import java.util.Locale import java.util.Date -@UnstableApi class RecordingSession( context: Context, val cameraId: String, @@ -30,7 +28,7 @@ class RecordingSession( private val callback: (video: Video) -> Unit, private val onError: (error: CameraError) -> Unit, private val allCallbacks: CameraSession.Callback, - // Use the new FragmentedMp4Muxer-based recorder for HLS-compatible output + // Use FragmentedRecordingManager for HLS-compatible fMP4 output private val useFragmentedMp4: Boolean = true ) { companion object {