Write our own muxer to make hls uupload actually work

This commit is contained in:
2025-12-21 16:45:04 -08:00
parent a2d218580c
commit e60c1a4eb1
4 changed files with 916 additions and 223 deletions

View File

@@ -178,10 +178,6 @@ dependencies {
implementation "com.facebook.react:react-android:+" implementation "com.facebook.react:react-android:+"
implementation "org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3" implementation "org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3"
// Media3 muxer for fragmented MP4 (HLS-compatible) recording
implementation "androidx.media3:media3-muxer:1.5.0"
implementation "androidx.media3:media3-common:1.5.0"
if (enableCodeScanner) { if (enableCodeScanner) {
// User enabled code-scanner, so we bundle the 2.4 MB model in the app. // User enabled code-scanner, so we bundle the 2.4 MB model in the app.
implementation 'com.google.mlkit:barcode-scanning:17.2.0' implementation 'com.google.mlkit:barcode-scanning:17.2.0'

View File

@@ -7,37 +7,25 @@ import android.media.MediaFormat
import android.util.Log import android.util.Log
import android.util.Size import android.util.Size
import android.view.Surface import android.view.Surface
import androidx.media3.common.Format
import androidx.media3.common.MimeTypes
import androidx.media3.common.util.UnstableApi
import androidx.media3.muxer.FragmentedMp4Muxer
import androidx.media3.muxer.Muxer
import com.mrousavy.camera.types.Orientation import com.mrousavy.camera.types.Orientation
import com.mrousavy.camera.types.RecordVideoOptions import com.mrousavy.camera.types.RecordVideoOptions
import java.io.File import java.io.File
import java.io.FileOutputStream
import java.nio.ByteBuffer
/** /**
* A recording manager that produces HLS-compatible fragmented MP4 segments. * A recording manager that produces HLS-compatible fragmented MP4 segments.
* *
* This produces output similar to the iOS implementation: * Uses HlsMuxer (following Android's MediaMuxer pattern) to produce:
* - An initialization segment (init.mp4) containing codec configuration * - init.mp4: Initialization segment (ftyp + moov with mvex)
* - Numbered data segments (0.mp4, 1.mp4, ...) containing media data * - 0.mp4, 1.mp4, ...: Media segments (moof + mdat)
*
* Uses AndroidX Media3's FragmentedMp4Muxer which produces proper fMP4 output.
*/ */
@UnstableApi
class FragmentedRecordingManager( class FragmentedRecordingManager(
private val encoder: MediaCodec, private val encoder: MediaCodec,
private val outputDirectory: File, private val muxer: HlsMuxer
private val orientationDegrees: Int,
private val targetSegmentDurationUs: Long,
private val callbacks: CameraSession.Callback
) : MediaCodec.Callback(), ChunkedRecorderInterface { ) : MediaCodec.Callback(), ChunkedRecorderInterface {
companion object { companion object {
private const val TAG = "FragmentedRecorder" private const val TAG = "FragmentedRecorder"
private const val DEFAULT_SEGMENT_DURATION_SECONDS = 6
fun fromParams( fun fromParams(
callbacks: CameraSession.Callback, callbacks: CameraSession.Callback,
@@ -48,17 +36,20 @@ class FragmentedRecordingManager(
bitRate: Int, bitRate: Int,
options: RecordVideoOptions, options: RecordVideoOptions,
outputDirectory: File, outputDirectory: File,
segmentDurationSeconds: Int = 6 segmentDurationSeconds: Int = DEFAULT_SEGMENT_DURATION_SECONDS
): FragmentedRecordingManager { ): FragmentedRecordingManager {
val mimeType = options.videoCodec.toMimeType() val mimeType = options.videoCodec.toMimeType()
val cameraOrientationDegrees = cameraOrientation.toDegrees() val cameraOrientationDegrees = cameraOrientation.toDegrees()
val recordingOrientationDegrees = (options.orientation ?: Orientation.PORTRAIT).toDegrees() val recordingOrientationDegrees = (options.orientation ?: Orientation.PORTRAIT).toDegrees()
val (width, height) = if (cameraOrientation.isLandscape()) { // Use size dimensions directly - the encoder output format will have the actual dimensions
size.height to size.width // Don't swap based on orientation here; the camera pipeline handles that
} else { val width = size.width
size.width to size.height val height = size.height
}
Log.d(TAG, "Input size: ${size.width}x${size.height}, " +
"cameraOrientation: $cameraOrientation ($cameraOrientationDegrees°), " +
"recordingOrientation: $recordingOrientationDegrees°")
val format = MediaFormat.createVideoFormat(mimeType, width, height) val format = MediaFormat.createVideoFormat(mimeType, width, height)
val codec = MediaCodec.createEncoderByType(mimeType) val codec = MediaCodec.createEncoderByType(mimeType)
@@ -67,121 +58,48 @@ class FragmentedRecordingManager(
MediaFormat.KEY_COLOR_FORMAT, MediaFormat.KEY_COLOR_FORMAT,
MediaCodecInfo.CodecCapabilities.COLOR_FormatSurface MediaCodecInfo.CodecCapabilities.COLOR_FormatSurface
) )
fps?.apply {
format.setInteger(MediaFormat.KEY_FRAME_RATE, this) val effectiveFps = fps ?: 30
} format.setInteger(MediaFormat.KEY_FRAME_RATE, effectiveFps)
// I-frame interval affects segment boundaries
format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, segmentDurationSeconds) format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, segmentDurationSeconds)
format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate) format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate)
Log.d(TAG, "Video Format: $format, camera orientation $cameraOrientationDegrees, recordingOrientation: $recordingOrientationDegrees") Log.d(TAG, "Video Format: $format, orientation: $recordingOrientationDegrees")
codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE) codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE)
return FragmentedRecordingManager( // Create muxer with callbacks and orientation
codec, val muxer = HlsMuxer(
outputDirectory, outputDirectory = outputDirectory,
recordingOrientationDegrees, callback = object : HlsMuxer.Callback {
segmentDurationSeconds * 1_000_000L, override fun onInitSegmentReady(file: File) {
callbacks callbacks.onInitSegmentReady(file)
}
override fun onMediaSegmentReady(file: File, index: Int, durationUs: Long) {
callbacks.onVideoChunkReady(file, index, durationUs)
}
},
orientationDegrees = recordingOrientationDegrees
) )
muxer.setSegmentDuration(segmentDurationSeconds * 1_000_000L)
Log.d(TAG, "Created HlsMuxer with orientation: $recordingOrientationDegrees degrees")
return FragmentedRecordingManager(codec, muxer)
} }
} }
// State management
private var chunkIndex = 0
private var encodedFormat: MediaFormat? = null
private var recording = false private var recording = false
private var muxerStarted = false
// Segment tracking private var trackIndex = -1
private var segmentContext: SegmentContext? = null
private var initSegmentEmitted = false
override val surface: Surface = encoder.createInputSurface() override val surface: Surface = encoder.createInputSurface()
init { init {
if (!outputDirectory.exists()) {
outputDirectory.mkdirs()
}
encoder.setCallback(this) encoder.setCallback(this)
} }
/**
* Context for a single data segment being written.
* Init segments are created separately via createInitSegment().
*/
private inner class SegmentContext(
private val format: MediaFormat,
private val segmentIndex: Int
) {
private val filename = "$segmentIndex.mp4"
private val file = File(outputDirectory, filename)
private val outputStream = FileOutputStream(file)
private val muxer = FragmentedMp4Muxer.Builder(outputStream).build()
private lateinit var videoTrack: Muxer.TrackToken
private var startTimeUs: Long = -1L
private var lastTimeUs: Long = 0L
private var sampleCount = 0
init {
val media3Format = convertToMedia3Format(format)
videoTrack = muxer.addTrack(media3Format)
Log.d(TAG, "Created segment context: $filename")
}
fun writeSample(buffer: ByteBuffer, bufferInfo: BufferInfo): Boolean {
if (startTimeUs < 0) {
startTimeUs = bufferInfo.presentationTimeUs
}
lastTimeUs = bufferInfo.presentationTimeUs
val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
muxer.writeSampleData(videoTrack, buffer, bufferInfo)
sampleCount++
// Check if we should start a new segment at the next keyframe
if (isKeyFrame && sampleCount > 1) {
val segmentDurationUs = bufferInfo.presentationTimeUs - startTimeUs
if (segmentDurationUs >= targetSegmentDurationUs) {
return true // Signal to create new segment
}
}
return false
}
fun finish(): Long {
try {
muxer.close()
outputStream.close()
} catch (e: Exception) {
Log.e(TAG, "Error closing segment", e)
}
val durationUs = if (lastTimeUs > startTimeUs) lastTimeUs - startTimeUs else 0L
callbacks.onVideoChunkReady(file, segmentIndex, durationUs)
Log.d(TAG, "Finished segment: $filename, samples=$sampleCount, duration=${durationUs/1000}ms")
return durationUs
}
}
private fun createNewSegment() {
val format = encodedFormat
if (format == null) {
Log.e(TAG, "Cannot create segment: encodedFormat is null")
return
}
// Close previous segment
segmentContext?.finish()
// Create new data segment (init segments are created separately)
segmentContext = SegmentContext(format, chunkIndex)
chunkIndex++
}
override fun start() { override fun start() {
encoder.start() encoder.start()
recording = true recording = true
@@ -190,8 +108,12 @@ class FragmentedRecordingManager(
override fun finish() { override fun finish() {
synchronized(this) { synchronized(this) {
recording = false recording = false
segmentContext?.finish()
segmentContext = null if (muxerStarted) {
muxer.stop()
muxer.release()
}
try { try {
encoder.stop() encoder.stop()
encoder.release() encoder.release()
@@ -202,6 +124,7 @@ class FragmentedRecordingManager(
} }
// MediaCodec.Callback methods // MediaCodec.Callback methods
override fun onInputBufferAvailable(codec: MediaCodec, index: Int) { override fun onInputBufferAvailable(codec: MediaCodec, index: Int) {
// Not used for Surface input // Not used for Surface input
} }
@@ -213,37 +136,20 @@ class FragmentedRecordingManager(
return return
} }
val encodedData = encoder.getOutputBuffer(index) if (!muxerStarted) {
if (encodedData == null) { encoder.releaseOutputBuffer(index, false)
return
}
val buffer = encoder.getOutputBuffer(index)
if (buffer == null) {
Log.e(TAG, "getOutputBuffer returned null") Log.e(TAG, "getOutputBuffer returned null")
encoder.releaseOutputBuffer(index, false) encoder.releaseOutputBuffer(index, false)
return return
} }
// Wait until init segment is emitted (happens in onOutputFormatChanged)
if (!initSegmentEmitted) {
encoder.releaseOutputBuffer(index, false)
return
}
// Create first data segment if needed
if (segmentContext == null) {
createNewSegment()
}
val context = segmentContext
if (context == null) {
encoder.releaseOutputBuffer(index, false)
return
}
try { try {
val shouldStartNewSegment = context.writeSample(encodedData, bufferInfo) muxer.writeSampleData(trackIndex, buffer, bufferInfo)
if (shouldStartNewSegment) {
createNewSegment()
// Write this keyframe to the new segment as well
segmentContext?.writeSample(encodedData, bufferInfo)
}
} catch (e: Exception) { } catch (e: Exception) {
Log.e(TAG, "Error writing sample", e) Log.e(TAG, "Error writing sample", e)
} }
@@ -257,76 +163,12 @@ class FragmentedRecordingManager(
} }
override fun onOutputFormatChanged(codec: MediaCodec, format: MediaFormat) { override fun onOutputFormatChanged(codec: MediaCodec, format: MediaFormat) {
Log.i(TAG, "Output format changed: $format") synchronized(this) {
encodedFormat = format Log.i(TAG, "Output format changed: $format")
// Create the init segment immediately when we get the format trackIndex = muxer.addTrack(format)
// This produces an fMP4 file with just ftyp + moov (no samples) muxer.start()
if (!initSegmentEmitted) { muxerStarted = true
createInitSegment(format)
initSegmentEmitted = true
} }
} }
/**
* Creates an initialization segment containing only codec configuration (ftyp + moov).
* This is done by creating a muxer, adding the track, and immediately closing it
* without writing any samples.
*/
private fun createInitSegment(format: MediaFormat) {
val initFile = File(outputDirectory, "init.mp4")
try {
val outputStream = FileOutputStream(initFile)
val muxer = FragmentedMp4Muxer.Builder(outputStream).build()
// Convert and add the track
val media3Format = convertToMedia3Format(format)
muxer.addTrack(media3Format)
// Close immediately - this writes just the header (ftyp + moov)
muxer.close()
outputStream.close()
Log.d(TAG, "Created init segment: ${initFile.absolutePath}")
callbacks.onInitSegmentReady(initFile)
} catch (e: Exception) {
Log.e(TAG, "Error creating init segment", e)
}
}
private fun convertToMedia3Format(mediaFormat: MediaFormat): Format {
val mimeType = mediaFormat.getString(MediaFormat.KEY_MIME) ?: MimeTypes.VIDEO_H264
val width = mediaFormat.getInteger(MediaFormat.KEY_WIDTH)
val height = mediaFormat.getInteger(MediaFormat.KEY_HEIGHT)
val bitRate = try { mediaFormat.getInteger(MediaFormat.KEY_BIT_RATE) } catch (e: Exception) { -1 }
val frameRate = try { mediaFormat.getInteger(MediaFormat.KEY_FRAME_RATE) } catch (e: Exception) { -1 }
// Get CSD (Codec Specific Data) if available - required for init segment
val csd0 = mediaFormat.getByteBuffer("csd-0")
val csd1 = mediaFormat.getByteBuffer("csd-1")
val initData = mutableListOf<ByteArray>()
csd0?.let {
val bytes = ByteArray(it.remaining())
it.duplicate().get(bytes)
initData.add(bytes)
}
csd1?.let {
val bytes = ByteArray(it.remaining())
it.duplicate().get(bytes)
initData.add(bytes)
}
return Format.Builder()
.setSampleMimeType(mimeType)
.setWidth(width)
.setHeight(height)
.setRotationDegrees(orientationDegrees)
.apply {
if (bitRate > 0) setAverageBitrate(bitRate)
if (frameRate > 0) setFrameRate(frameRate.toFloat())
if (initData.isNotEmpty()) setInitializationData(initData)
}
.build()
}
} }

View File

@@ -0,0 +1,857 @@
package com.mrousavy.camera.core
import android.media.MediaCodec
import android.media.MediaFormat
import android.util.Log
import java.io.ByteArrayOutputStream
import java.io.DataOutputStream
import java.io.File
import java.io.FileOutputStream
import java.nio.ByteBuffer
/**
* A muxer for creating HLS-compatible fragmented MP4 output.
*
* Follows the same pattern as Android's MediaMuxer:
* 1. Create muxer with output directory
* 2. addTrack() with MediaFormat
* 3. start() - writes init.mp4
* 4. writeSampleData() for each encoded sample
* 5. stop() - finalizes last segment
* 6. release() - cleanup
*
* Produces:
* - init.mp4: Initialization segment (ftyp + moov with mvex)
* - 0.mp4, 1.mp4, ...: Media segments (moof + mdat)
*/
class HlsMuxer(
private val outputDirectory: File,
private val callback: Callback,
private val orientationDegrees: Int = 0
) {
companion object {
private const val TAG = "HlsMuxer"
private const val DEFAULT_SEGMENT_DURATION_US = 6_000_000L // 6 seconds
}
interface Callback {
fun onInitSegmentReady(file: File)
fun onMediaSegmentReady(file: File, index: Int, durationUs: Long)
}
// Configuration
private var targetSegmentDurationUs: Long = DEFAULT_SEGMENT_DURATION_US
private var timescale: Int = 30000 // Default, updated from format
// State
private var state = State.UNINITIALIZED
private var trackFormat: MediaFormat? = null
private var sequenceNumber = 1
private var segmentIndex = 0
// Current segment data
private val pendingSamples = mutableListOf<Sample>()
private var segmentStartTimeUs = -1L
private var lastPresentationTimeUs = 0L
private enum class State {
UNINITIALIZED,
INITIALIZED,
STARTED,
STOPPED,
RELEASED
}
private data class Sample(
val data: ByteArray,
val presentationTimeUs: Long,
var durationUs: Long,
val isKeyFrame: Boolean
)
// ==================== Annex-B to AVCC Conversion ====================
/**
* Converts H.264 data from Annex-B format to AVCC format.
*
* Annex-B uses start codes (00 00 00 01 or 00 00 01) to delimit NAL units.
* AVCC uses 4-byte big-endian length prefixes before each NAL unit.
*
* This conversion is required because:
* - MediaCodec outputs Annex-B format
* - fMP4/HLS requires AVCC format (as specified in avcC box with NAL length size = 4)
*/
private fun convertAnnexBToAvcc(annexBData: ByteArray): ByteArray {
val nalUnits = parseAnnexBNalUnits(annexBData)
if (nalUnits.isEmpty()) {
Log.w(TAG, "No NAL units found in sample, returning original data")
return annexBData
}
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
for (nalUnit in nalUnits) {
// Write 4-byte big-endian length prefix
dos.writeInt(nalUnit.size)
// Write NAL unit data (without start code)
dos.write(nalUnit)
}
return output.toByteArray()
}
/**
* Parses Annex-B formatted data into individual NAL units.
* Returns list of NAL unit byte arrays (without start codes).
*/
private fun parseAnnexBNalUnits(data: ByteArray): List<ByteArray> {
val nalUnits = mutableListOf<ByteArray>()
var i = 0
while (i < data.size) {
// Find start code
val startCodeLength = findStartCode(data, i)
if (startCodeLength == 0) {
// No start code found at current position
// This might happen if data doesn't start with a start code
if (nalUnits.isEmpty() && i == 0) {
// Data might already be in AVCC format or malformed
// Try to detect AVCC format (first 4 bytes would be a reasonable length)
if (data.size >= 4) {
val possibleLength = ((data[0].toInt() and 0xFF) shl 24) or
((data[1].toInt() and 0xFF) shl 16) or
((data[2].toInt() and 0xFF) shl 8) or
(data[3].toInt() and 0xFF)
if (possibleLength > 0 && possibleLength <= data.size - 4) {
// Looks like AVCC format already, return original
Log.d(TAG, "Data appears to already be in AVCC format")
return emptyList()
}
}
}
i++
continue
}
val nalStart = i + startCodeLength
// Find end of this NAL unit (start of next, or end of data)
var nalEnd = data.size
var j = nalStart
while (j < data.size - 2) {
val nextStartCode = findStartCode(data, j)
if (nextStartCode > 0) {
nalEnd = j
break
}
j++
}
if (nalEnd > nalStart) {
nalUnits.add(data.copyOfRange(nalStart, nalEnd))
}
i = nalEnd
}
return nalUnits
}
/**
* Checks for Annex-B start code at given position.
* Returns start code length (3 or 4) or 0 if no start code found.
*/
private fun findStartCode(data: ByteArray, offset: Int): Int {
if (offset + 4 <= data.size &&
data[offset] == 0.toByte() &&
data[offset + 1] == 0.toByte() &&
data[offset + 2] == 0.toByte() &&
data[offset + 3] == 1.toByte()) {
return 4 // 4-byte start code: 00 00 00 01
}
if (offset + 3 <= data.size &&
data[offset] == 0.toByte() &&
data[offset + 1] == 0.toByte() &&
data[offset + 2] == 1.toByte()) {
return 3 // 3-byte start code: 00 00 01
}
return 0
}
/**
* Sets the target segment duration.
* Must be called before start().
*/
fun setSegmentDuration(durationUs: Long) {
check(state == State.UNINITIALIZED || state == State.INITIALIZED) {
"Cannot set segment duration after start()"
}
targetSegmentDurationUs = durationUs
}
/**
* Adds a track to the muxer.
*
* @param format The MediaFormat describing the track
* @return Track index (always 0 for now, single video track)
*/
fun addTrack(format: MediaFormat): Int {
check(state == State.UNINITIALIZED) { "addTrack() must be called before start()" }
trackFormat = format
// Extract timescale from frame rate
val fps = try {
format.getInteger(MediaFormat.KEY_FRAME_RATE)
} catch (e: Exception) {
30
}
timescale = fps * 1000 // Use fps * 1000 for good precision
state = State.INITIALIZED
val formatWidth = try { format.getInteger(MediaFormat.KEY_WIDTH) } catch (e: Exception) { -1 }
val formatHeight = try { format.getInteger(MediaFormat.KEY_HEIGHT) } catch (e: Exception) { -1 }
Log.d(TAG, "Added track: ${format.getString(MediaFormat.KEY_MIME)}, " +
"encoder output: ${formatWidth}x${formatHeight}, " +
"timescale=$timescale, orientation=$orientationDegrees°")
return 0 // Single track, index 0
}
/**
* Starts the muxer, writing the initialization segment.
*/
fun start() {
check(state == State.INITIALIZED) { "Must call addTrack() before start()" }
val format = trackFormat ?: throw IllegalStateException("No track format")
// Create output directory if needed
if (!outputDirectory.exists()) {
outputDirectory.mkdirs()
}
// Write init segment
val initBytes = buildInitSegment(format)
val initFile = File(outputDirectory, "init.mp4")
FileOutputStream(initFile).use { it.write(initBytes) }
Log.d(TAG, "Created init segment: ${initFile.absolutePath} (${initBytes.size} bytes)")
callback.onInitSegmentReady(initFile)
state = State.STARTED
}
/**
* Writes sample data to the muxer.
*
* @param trackIndex Track index (must be 0)
* @param buffer The encoded sample data
* @param bufferInfo Sample metadata (size, presentation time, flags)
*/
fun writeSampleData(trackIndex: Int, buffer: ByteBuffer, bufferInfo: MediaCodec.BufferInfo) {
check(state == State.STARTED) { "Must call start() before writeSampleData()" }
check(trackIndex == 0) { "Invalid track index: $trackIndex" }
// Skip codec config data (already in init segment)
if ((bufferInfo.flags and MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0) {
return
}
val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
val presentationTimeUs = bufferInfo.presentationTimeUs
// Initialize segment start time
if (segmentStartTimeUs < 0) {
segmentStartTimeUs = presentationTimeUs
}
// Check if we should finalize current segment (at keyframe boundaries)
if (isKeyFrame && pendingSamples.isNotEmpty()) {
val segmentDurationUs = presentationTimeUs - segmentStartTimeUs
if (segmentDurationUs >= targetSegmentDurationUs) {
finalizeCurrentSegment()
segmentStartTimeUs = presentationTimeUs
}
}
// Copy buffer data and convert from Annex-B to AVCC format
val rawData = ByteArray(bufferInfo.size)
buffer.position(bufferInfo.offset)
buffer.limit(bufferInfo.offset + bufferInfo.size)
buffer.get(rawData)
// Convert Annex-B (start codes) to AVCC (length prefixes)
val data = convertAnnexBToAvcc(rawData)
// Update duration of previous sample
if (pendingSamples.isNotEmpty()) {
val lastSample = pendingSamples.last()
lastSample.durationUs = presentationTimeUs - lastSample.presentationTimeUs
}
// Estimate duration (will be corrected by next sample)
val estimatedDurationUs = if (lastPresentationTimeUs > 0) {
presentationTimeUs - lastPresentationTimeUs
} else {
1_000_000L / 30 // Assume 30fps
}
pendingSamples.add(Sample(
data = data,
presentationTimeUs = presentationTimeUs,
durationUs = estimatedDurationUs,
isKeyFrame = isKeyFrame
))
lastPresentationTimeUs = presentationTimeUs
}
/**
* Stops the muxer, finalizing any pending segment.
*/
fun stop() {
check(state == State.STARTED) { "Muxer not started" }
if (pendingSamples.isNotEmpty()) {
finalizeCurrentSegment()
}
state = State.STOPPED
Log.d(TAG, "Muxer stopped, wrote $segmentIndex segments")
}
/**
* Releases resources.
*/
fun release() {
if (state == State.STARTED) {
stop()
}
pendingSamples.clear()
state = State.RELEASED
}
/**
* Finalizes the current segment and writes it to disk.
*/
private fun finalizeCurrentSegment() {
if (pendingSamples.isEmpty()) return
try {
val baseDecodeTimeUs = pendingSamples.first().presentationTimeUs
val fragmentBytes = buildMediaSegment(pendingSamples, sequenceNumber, baseDecodeTimeUs)
val segmentFile = File(outputDirectory, "$segmentIndex.mp4")
FileOutputStream(segmentFile).use { it.write(fragmentBytes) }
// Calculate duration
val firstPts = pendingSamples.first().presentationTimeUs
val lastSample = pendingSamples.last()
val durationUs = (lastSample.presentationTimeUs - firstPts) + lastSample.durationUs
Log.d(TAG, "Created segment $segmentIndex: samples=${pendingSamples.size}, " +
"duration=${durationUs / 1000}ms, size=${fragmentBytes.size} bytes")
callback.onMediaSegmentReady(segmentFile, segmentIndex, durationUs)
segmentIndex++
sequenceNumber++
pendingSamples.clear()
} catch (e: Exception) {
Log.e(TAG, "Error finalizing segment $segmentIndex", e)
}
}
// ==================== Init Segment Building ====================
/**
* Builds the initialization segment (ftyp + moov).
*/
private fun buildInitSegment(format: MediaFormat): ByteArray {
val width = format.getInteger(MediaFormat.KEY_WIDTH)
val height = format.getInteger(MediaFormat.KEY_HEIGHT)
val sps = format.getByteBuffer("csd-0")?.let { extractNalUnit(it) }
?: throw IllegalArgumentException("Missing SPS (csd-0)")
val pps = format.getByteBuffer("csd-1")?.let { extractNalUnit(it) }
?: throw IllegalArgumentException("Missing PPS (csd-1)")
val output = ByteArrayOutputStream()
// ftyp
output.write(buildFtypBox())
// moov
output.write(buildMoovBox(width, height, sps, pps))
return output.toByteArray()
}
private fun extractNalUnit(buffer: ByteBuffer): ByteArray {
val data = ByteArray(buffer.remaining())
buffer.duplicate().get(data)
// Strip start code prefix (0x00000001 or 0x000001)
return when {
data.size >= 4 && data[0] == 0.toByte() && data[1] == 0.toByte() &&
data[2] == 0.toByte() && data[3] == 1.toByte() -> data.copyOfRange(4, data.size)
data.size >= 3 && data[0] == 0.toByte() && data[1] == 0.toByte() &&
data[2] == 1.toByte() -> data.copyOfRange(3, data.size)
else -> data
}
}
private fun buildFtypBox(): ByteArray {
val brands = listOf("isom", "iso5", "iso6", "avc1", "mp41", "dash")
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
val size = 8 + 4 + 4 + (brands.size * 4)
dos.writeInt(size)
dos.writeBytes("ftyp")
dos.writeBytes("isom") // major brand
dos.writeInt(0x200) // minor version
brands.forEach { dos.writeBytes(it) }
return output.toByteArray()
}
private fun buildMoovBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val content = ByteArrayOutputStream()
content.write(buildMvhdBox())
content.write(buildTrakBox(width, height, sps, pps))
content.write(buildMvexBox())
return wrapBox("moov", content.toByteArray())
}
private fun buildMvhdBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // creation time
dos.writeInt(0) // modification time
dos.writeInt(timescale) // timescale
dos.writeInt(0) // duration
dos.writeInt(0x00010000) // rate = 1.0
dos.writeShort(0x0100) // volume = 1.0
dos.writeShort(0) // reserved
dos.writeInt(0) // reserved
dos.writeInt(0) // reserved
// Unity matrix
dos.writeInt(0x00010000); dos.writeInt(0); dos.writeInt(0)
dos.writeInt(0); dos.writeInt(0x00010000); dos.writeInt(0)
dos.writeInt(0); dos.writeInt(0); dos.writeInt(0x40000000)
repeat(6) { dos.writeInt(0) } // pre-defined
dos.writeInt(2) // next track ID
return wrapBox("mvhd", output.toByteArray())
}
private fun buildTrakBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val content = ByteArrayOutputStream()
content.write(buildTkhdBox(width, height))
content.write(buildMdiaBox(width, height, sps, pps))
return wrapBox("trak", content.toByteArray())
}
private fun buildTkhdBox(width: Int, height: Int): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0x00000007) // version 0, flags (enabled, in movie, in preview)
dos.writeInt(0) // creation time
dos.writeInt(0) // modification time
dos.writeInt(1) // track ID
dos.writeInt(0) // reserved
dos.writeInt(0) // duration
dos.writeInt(0) // reserved
dos.writeInt(0) // reserved
dos.writeShort(0) // layer
dos.writeShort(0) // alternate group
dos.writeShort(0) // volume (0 for video)
dos.writeShort(0) // reserved
// Rotation matrix - use identity and rely on correct dimensions from encoder
// The encoder output format already has the correct dimensions for the content
writeRotationMatrix(dos)
// Use dimensions as-is from encoder output format
dos.writeInt(width shl 16) // width (16.16 fixed point)
dos.writeInt(height shl 16) // height (16.16 fixed point)
Log.d(TAG, "tkhd: ${width}x${height}, rotation=$orientationDegrees")
return wrapBox("tkhd", output.toByteArray())
}
/**
* Writes the 3x3 transformation matrix for video rotation.
* Uses simple rotation values - the encoder already outputs correctly oriented frames.
*/
private fun writeRotationMatrix(dos: DataOutputStream) {
// Fixed-point constants
val one = 0x00010000 // 1.0 in 16.16
val w = 0x40000000 // 1.0 in 2.30
// Identity matrix - no transformation
// Most HLS players handle rotation via the dimensions themselves
// or we can add rotation metadata separately if needed
dos.writeInt(one) // a = 1
dos.writeInt(0) // b = 0
dos.writeInt(0) // u = 0
dos.writeInt(0) // c = 0
dos.writeInt(one) // d = 1
dos.writeInt(0) // v = 0
dos.writeInt(0) // x = 0
dos.writeInt(0) // y = 0
dos.writeInt(w) // w = 1
}
private fun buildMdiaBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val content = ByteArrayOutputStream()
content.write(buildMdhdBox())
content.write(buildHdlrBox())
content.write(buildMinfBox(width, height, sps, pps))
return wrapBox("mdia", content.toByteArray())
}
private fun buildMdhdBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // creation time
dos.writeInt(0) // modification time
dos.writeInt(timescale) // timescale
dos.writeInt(0) // duration
dos.writeShort(0x55C4) // language: "und"
dos.writeShort(0) // pre-defined
return wrapBox("mdhd", output.toByteArray())
}
private fun buildHdlrBox(): ByteArray {
val name = "VideoHandler"
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // pre-defined
dos.writeBytes("vide") // handler type
dos.writeInt(0) // reserved
dos.writeInt(0) // reserved
dos.writeInt(0) // reserved
dos.writeBytes(name)
dos.writeByte(0) // null terminator
return wrapBox("hdlr", output.toByteArray())
}
private fun buildMinfBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val content = ByteArrayOutputStream()
content.write(buildVmhdBox())
content.write(buildDinfBox())
content.write(buildStblBox(width, height, sps, pps))
return wrapBox("minf", content.toByteArray())
}
private fun buildVmhdBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(1) // version 0, flags = 1
dos.writeShort(0) // graphics mode
dos.writeShort(0) // opcolor[0]
dos.writeShort(0) // opcolor[1]
dos.writeShort(0) // opcolor[2]
return wrapBox("vmhd", output.toByteArray())
}
private fun buildDinfBox(): ByteArray {
val dref = buildDrefBox()
return wrapBox("dinf", dref)
}
private fun buildDrefBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(1) // entry count
// url box (self-contained)
dos.writeInt(12)
dos.writeBytes("url ")
dos.writeInt(1) // flags: self-contained
return wrapBox("dref", output.toByteArray())
}
private fun buildStblBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val content = ByteArrayOutputStream()
content.write(buildStsdBox(width, height, sps, pps))
content.write(buildEmptySttsBox())
content.write(buildEmptyStscBox())
content.write(buildEmptyStszBox())
content.write(buildEmptyStcoBox())
return wrapBox("stbl", content.toByteArray())
}
private fun buildStsdBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(1) // entry count
output.write(buildAvc1Box(width, height, sps, pps))
return wrapBox("stsd", output.toByteArray())
}
private fun buildAvc1Box(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
repeat(6) { dos.writeByte(0) } // reserved
dos.writeShort(1) // data reference index
dos.writeShort(0) // pre-defined
dos.writeShort(0) // reserved
repeat(3) { dos.writeInt(0) } // pre-defined
dos.writeShort(width) // width
dos.writeShort(height) // height
dos.writeInt(0x00480000) // horiz resolution (72 dpi)
dos.writeInt(0x00480000) // vert resolution (72 dpi)
dos.writeInt(0) // reserved
dos.writeShort(1) // frame count
repeat(32) { dos.writeByte(0) } // compressor name
dos.writeShort(0x0018) // depth (24 bit)
dos.writeShort(-1) // pre-defined
output.write(buildAvcCBox(sps, pps))
return wrapBox("avc1", output.toByteArray())
}
private fun buildAvcCBox(sps: ByteArray, pps: ByteArray): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
val profileIdc = if (sps.isNotEmpty()) sps[0].toInt() and 0xFF else 0x42
val profileCompat = if (sps.size > 1) sps[1].toInt() and 0xFF else 0x00
val levelIdc = if (sps.size > 2) sps[2].toInt() and 0xFF else 0x1F
dos.writeByte(1) // configuration version
dos.writeByte(profileIdc) // AVC profile
dos.writeByte(profileCompat)// profile compatibility
dos.writeByte(levelIdc) // AVC level
dos.writeByte(0xFF) // 6 bits reserved + 2 bits NAL length - 1
dos.writeByte(0xE1) // 3 bits reserved + 5 bits SPS count
dos.writeShort(sps.size) // SPS length
dos.write(sps) // SPS data
dos.writeByte(1) // PPS count
dos.writeShort(pps.size) // PPS length
dos.write(pps) // PPS data
return wrapBox("avcC", output.toByteArray())
}
private fun buildEmptySttsBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // entry count
return wrapBox("stts", output.toByteArray())
}
private fun buildEmptyStscBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // entry count
return wrapBox("stsc", output.toByteArray())
}
private fun buildEmptyStszBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // sample size (0 = variable)
dos.writeInt(0) // sample count
return wrapBox("stsz", output.toByteArray())
}
private fun buildEmptyStcoBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // entry count
return wrapBox("stco", output.toByteArray())
}
private fun buildMvexBox(): ByteArray {
return wrapBox("mvex", buildTrexBox())
}
private fun buildTrexBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(1) // track ID
dos.writeInt(1) // default sample description index
dos.writeInt(0) // default sample duration
dos.writeInt(0) // default sample size
dos.writeInt(0) // default sample flags
return wrapBox("trex", output.toByteArray())
}
// ==================== Media Segment Building ====================
/**
* Builds a media segment (moof + mdat).
*/
private fun buildMediaSegment(
samples: List<Sample>,
sequenceNumber: Int,
baseDecodeTimeUs: Long
): ByteArray {
val output = ByteArrayOutputStream()
// Build mdat content first to know sizes
val mdatContent = ByteArrayOutputStream()
for (sample in samples) {
mdatContent.write(sample.data)
}
val mdatPayload = mdatContent.toByteArray()
// Build moof
val moofBox = buildMoofBox(samples, sequenceNumber, baseDecodeTimeUs, mdatPayload.size)
output.write(moofBox)
// Build mdat
output.write(wrapBox("mdat", mdatPayload))
return output.toByteArray()
}
private fun buildMoofBox(
samples: List<Sample>,
sequenceNumber: Int,
baseDecodeTimeUs: Long,
mdatPayloadSize: Int
): ByteArray {
// Calculate sizes to determine data offset
val mfhdBox = buildMfhdBox(sequenceNumber)
val tfhdSize = 8 + 8 // box header + content (version/flags + track_id)
val tfdtSize = 8 + 12 // box header + version 1 content
val trunSize = 8 + 12 + (samples.size * 12) // header + fixed + per-sample (no composition offset)
val trafSize = 8 + tfhdSize + tfdtSize + trunSize
val moofSize = 8 + mfhdBox.size + trafSize
val dataOffset = moofSize + 8 // moof size + mdat header
val content = ByteArrayOutputStream()
content.write(mfhdBox)
content.write(buildTrafBox(samples, baseDecodeTimeUs, dataOffset))
return wrapBox("moof", content.toByteArray())
}
private fun buildMfhdBox(sequenceNumber: Int): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(sequenceNumber)
return wrapBox("mfhd", output.toByteArray())
}
private fun buildTrafBox(samples: List<Sample>, baseDecodeTimeUs: Long, dataOffset: Int): ByteArray {
val content = ByteArrayOutputStream()
content.write(buildTfhdBox())
content.write(buildTfdtBox(baseDecodeTimeUs))
content.write(buildTrunBox(samples, dataOffset))
return wrapBox("traf", content.toByteArray())
}
private fun buildTfhdBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
// Flags: default-base-is-moof (0x020000)
dos.writeInt(0x00020000)
dos.writeInt(1) // track ID
return wrapBox("tfhd", output.toByteArray())
}
private fun buildTfdtBox(baseDecodeTimeUs: Long): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
// Convert to timescale units
val baseMediaDecodeTime = (baseDecodeTimeUs * timescale) / 1_000_000
// Version 1 for 64-bit time
dos.writeInt(0x01000000)
dos.writeLong(baseMediaDecodeTime)
return wrapBox("tfdt", output.toByteArray())
}
private fun buildTrunBox(samples: List<Sample>, dataOffset: Int): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
// Flags: data-offset + sample-duration + sample-size + sample-flags
val flags = 0x000001 or 0x000100 or 0x000200 or 0x000400
dos.writeInt(flags)
dos.writeInt(samples.size)
dos.writeInt(dataOffset)
for (sample in samples) {
// Convert duration to timescale units
val durationInTimescale = ((sample.durationUs * timescale) / 1_000_000).toInt()
dos.writeInt(durationInTimescale)
dos.writeInt(sample.data.size)
dos.writeInt(buildSampleFlags(sample.isKeyFrame))
}
return wrapBox("trun", output.toByteArray())
}
private fun buildSampleFlags(isKeyFrame: Boolean): Int {
return if (isKeyFrame) {
// sample_depends_on=2 (no dependencies), not a difference sample
0x02000000
} else {
// sample_depends_on=1 (depends on others), is a difference sample
0x01010000
}
}
// ==================== Utilities ====================
private fun wrapBox(type: String, content: ByteArray): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(8 + content.size)
dos.writeBytes(type)
dos.write(content)
return output.toByteArray()
}
}

View File

@@ -4,7 +4,6 @@ import android.content.Context
import android.util.Log import android.util.Log
import android.util.Size import android.util.Size
import android.view.Surface import android.view.Surface
import androidx.media3.common.util.UnstableApi
import com.facebook.common.statfs.StatFsHelper import com.facebook.common.statfs.StatFsHelper
import com.mrousavy.camera.extensions.getRecommendedBitRate import com.mrousavy.camera.extensions.getRecommendedBitRate
import com.mrousavy.camera.types.Orientation import com.mrousavy.camera.types.Orientation
@@ -16,7 +15,6 @@ import java.text.SimpleDateFormat
import java.util.Locale import java.util.Locale
import java.util.Date import java.util.Date
@UnstableApi
class RecordingSession( class RecordingSession(
context: Context, context: Context,
val cameraId: String, val cameraId: String,
@@ -30,7 +28,7 @@ class RecordingSession(
private val callback: (video: Video) -> Unit, private val callback: (video: Video) -> Unit,
private val onError: (error: CameraError) -> Unit, private val onError: (error: CameraError) -> Unit,
private val allCallbacks: CameraSession.Callback, private val allCallbacks: CameraSession.Callback,
// Use the new FragmentedMp4Muxer-based recorder for HLS-compatible output // Use FragmentedRecordingManager for HLS-compatible fMP4 output
private val useFragmentedMp4: Boolean = true private val useFragmentedMp4: Boolean = true
) { ) {
companion object { companion object {