Compare commits

...

2 Commits

Author SHA1 Message Date
e60c1a4eb1 Write our own muxer to make hls uupload actually work 2025-12-21 16:45:04 -08:00
a2d218580c feat: Add fragmented MP4 (fMP4) support for Android
Implements HLS-compatible fragmented MP4 recording on Android using
AndroidX Media3 FragmentedMp4Muxer, matching the iOS implementation.

Changes:
- Add FragmentedRecordingManager for fMP4 segment output
- Add ChunkedRecorderInterface to abstract recorder implementations
- Add onInitSegmentReady callback for init segment (init.mp4)
- Update onVideoChunkReady to include segment duration
- RecordingSession now uses FragmentedRecordingManager by default

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-18 12:29:03 -08:00
9 changed files with 1108 additions and 20 deletions

View File

@@ -40,15 +40,26 @@ fun CameraView.invokeOnStopped() {
this.sendEvent(event) this.sendEvent(event)
} }
fun CameraView.invokeOnChunkReady(filepath: File, index: Int) { fun CameraView.invokeOnChunkReady(filepath: File, index: Int, durationUs: Long?) {
Log.e(CameraView.TAG, "invokeOnError(...):") Log.i(CameraView.TAG, "invokeOnChunkReady(...): index=$index, filepath=$filepath, durationUs=$durationUs")
val event = Arguments.createMap() val event = Arguments.createMap()
event.putInt("index", index) event.putInt("index", index)
event.putString("filepath", filepath.toString()) event.putString("filepath", filepath.toString())
if (durationUs != null) {
event.putDouble("duration", durationUs / 1_000_000.0) // Convert microseconds to seconds
}
val reactContext = context as ReactContext val reactContext = context as ReactContext
reactContext.getJSModule(RCTEventEmitter::class.java).receiveEvent(id, "onVideoChunkReady", event) reactContext.getJSModule(RCTEventEmitter::class.java).receiveEvent(id, "onVideoChunkReady", event)
} }
fun CameraView.invokeOnInitReady(filepath: File) {
Log.i(CameraView.TAG, "invokeOnInitReady(...): filepath=$filepath")
val event = Arguments.createMap()
event.putString("filepath", filepath.toString())
val reactContext = context as ReactContext
reactContext.getJSModule(RCTEventEmitter::class.java).receiveEvent(id, "onInitReady", event)
}
fun CameraView.invokeOnError(error: Throwable) { fun CameraView.invokeOnError(error: Throwable) {
Log.e(CameraView.TAG, "invokeOnError(...):") Log.e(CameraView.TAG, "invokeOnError(...):")
error.printStackTrace() error.printStackTrace()

View File

@@ -271,8 +271,12 @@ class CameraView(context: Context) :
invokeOnStopped() invokeOnStopped()
} }
override fun onVideoChunkReady(filepath: File, index: Int) { override fun onVideoChunkReady(filepath: File, index: Int, durationUs: Long?) {
invokeOnChunkReady(filepath, index) invokeOnChunkReady(filepath, index, durationUs)
}
override fun onInitSegmentReady(filepath: File) {
invokeOnInitReady(filepath)
} }
override fun onCodeScanned(codes: List<Barcode>, scannerFrame: CodeScannerFrame) { override fun onCodeScanned(codes: List<Barcode>, scannerFrame: CodeScannerFrame) {

View File

@@ -32,6 +32,7 @@ class CameraViewManager : ViewGroupManager<CameraView>() {
.put("cameraError", MapBuilder.of("registrationName", "onError")) .put("cameraError", MapBuilder.of("registrationName", "onError"))
.put("cameraCodeScanned", MapBuilder.of("registrationName", "onCodeScanned")) .put("cameraCodeScanned", MapBuilder.of("registrationName", "onCodeScanned"))
.put("onVideoChunkReady", MapBuilder.of("registrationName", "onVideoChunkReady")) .put("onVideoChunkReady", MapBuilder.of("registrationName", "onVideoChunkReady"))
.put("onInitReady", MapBuilder.of("registrationName", "onInitReady"))
.build()?.toMutableMap() .build()?.toMutableMap()
override fun getName(): String = TAG override fun getName(): String = TAG

View File

@@ -513,7 +513,8 @@ class CameraSession(private val context: Context, private val cameraManager: Cam
fun onInitialized() fun onInitialized()
fun onStarted() fun onStarted()
fun onStopped() fun onStopped()
fun onVideoChunkReady(filepath: File, index: Int) fun onVideoChunkReady(filepath: File, index: Int, durationUs: Long?)
fun onInitSegmentReady(filepath: File)
fun onCodeScanned(codes: List<Barcode>, scannerFrame: CodeScannerFrame) fun onCodeScanned(codes: List<Barcode>, scannerFrame: CodeScannerFrame)
} }
} }

View File

@@ -14,7 +14,7 @@ import java.io.File
import java.nio.ByteBuffer import java.nio.ByteBuffer
class ChunkedRecordingManager(private val encoder: MediaCodec, private val outputDirectory: File, private val orientationHint: Int, private val iFrameInterval: Int, private val callbacks: CameraSession.Callback) : class ChunkedRecordingManager(private val encoder: MediaCodec, private val outputDirectory: File, private val orientationHint: Int, private val iFrameInterval: Int, private val callbacks: CameraSession.Callback) :
MediaCodec.Callback() { MediaCodec.Callback(), ChunkedRecorderInterface {
companion object { companion object {
private const val TAG = "ChunkedRecorder" private const val TAG = "ChunkedRecorder"
@@ -73,7 +73,7 @@ class ChunkedRecordingManager(private val encoder: MediaCodec, private val outpu
private val targetDurationUs = iFrameInterval * 1000000 private val targetDurationUs = iFrameInterval * 1000000
val surface: Surface = encoder.createInputSurface() override val surface: Surface = encoder.createInputSurface()
init { init {
if (!this.outputDirectory.exists()) { if (!this.outputDirectory.exists()) {
@@ -95,7 +95,9 @@ class ChunkedRecordingManager(private val encoder: MediaCodec, private val outpu
fun finish() { fun finish() {
muxer.stop() muxer.stop()
muxer.release() muxer.release()
callbacks.onVideoChunkReady(filepath, chunkIndex) // Calculate duration from start time - this is approximate
// The new FragmentedRecordingManager provides accurate duration
callbacks.onVideoChunkReady(filepath, chunkIndex, null)
} }
} }
@@ -133,12 +135,12 @@ class ChunkedRecordingManager(private val encoder: MediaCodec, private val outpu
return bufferInfo.presentationTimeUs - context.startTimeUs return bufferInfo.presentationTimeUs - context.startTimeUs
} }
fun start() { override fun start() {
encoder.start() encoder.start()
recording = true recording = true
} }
fun finish() { override fun finish() {
synchronized(this) { synchronized(this) {
muxerContext?.finish() muxerContext?.finish()
recording = false recording = false

View File

@@ -0,0 +1,15 @@
package com.mrousavy.camera.core
import android.view.Surface
/**
* Common interface for chunked video recorders.
* Implemented by both ChunkedRecordingManager (regular MP4) and
* FragmentedRecordingManager (HLS-compatible fMP4).
*/
interface ChunkedRecorderInterface {
val surface: Surface
fun start()
fun finish()
}

View File

@@ -0,0 +1,174 @@
package com.mrousavy.camera.core
import android.media.MediaCodec
import android.media.MediaCodec.BufferInfo
import android.media.MediaCodecInfo
import android.media.MediaFormat
import android.util.Log
import android.util.Size
import android.view.Surface
import com.mrousavy.camera.types.Orientation
import com.mrousavy.camera.types.RecordVideoOptions
import java.io.File
/**
* A recording manager that produces HLS-compatible fragmented MP4 segments.
*
* Uses HlsMuxer (following Android's MediaMuxer pattern) to produce:
* - init.mp4: Initialization segment (ftyp + moov with mvex)
* - 0.mp4, 1.mp4, ...: Media segments (moof + mdat)
*/
class FragmentedRecordingManager(
private val encoder: MediaCodec,
private val muxer: HlsMuxer
) : MediaCodec.Callback(), ChunkedRecorderInterface {
companion object {
private const val TAG = "FragmentedRecorder"
private const val DEFAULT_SEGMENT_DURATION_SECONDS = 6
fun fromParams(
callbacks: CameraSession.Callback,
size: Size,
enableAudio: Boolean,
fps: Int? = null,
cameraOrientation: Orientation,
bitRate: Int,
options: RecordVideoOptions,
outputDirectory: File,
segmentDurationSeconds: Int = DEFAULT_SEGMENT_DURATION_SECONDS
): FragmentedRecordingManager {
val mimeType = options.videoCodec.toMimeType()
val cameraOrientationDegrees = cameraOrientation.toDegrees()
val recordingOrientationDegrees = (options.orientation ?: Orientation.PORTRAIT).toDegrees()
// Use size dimensions directly - the encoder output format will have the actual dimensions
// Don't swap based on orientation here; the camera pipeline handles that
val width = size.width
val height = size.height
Log.d(TAG, "Input size: ${size.width}x${size.height}, " +
"cameraOrientation: $cameraOrientation ($cameraOrientationDegrees°), " +
"recordingOrientation: $recordingOrientationDegrees°")
val format = MediaFormat.createVideoFormat(mimeType, width, height)
val codec = MediaCodec.createEncoderByType(mimeType)
format.setInteger(
MediaFormat.KEY_COLOR_FORMAT,
MediaCodecInfo.CodecCapabilities.COLOR_FormatSurface
)
val effectiveFps = fps ?: 30
format.setInteger(MediaFormat.KEY_FRAME_RATE, effectiveFps)
format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, segmentDurationSeconds)
format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate)
Log.d(TAG, "Video Format: $format, orientation: $recordingOrientationDegrees")
codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE)
// Create muxer with callbacks and orientation
val muxer = HlsMuxer(
outputDirectory = outputDirectory,
callback = object : HlsMuxer.Callback {
override fun onInitSegmentReady(file: File) {
callbacks.onInitSegmentReady(file)
}
override fun onMediaSegmentReady(file: File, index: Int, durationUs: Long) {
callbacks.onVideoChunkReady(file, index, durationUs)
}
},
orientationDegrees = recordingOrientationDegrees
)
muxer.setSegmentDuration(segmentDurationSeconds * 1_000_000L)
Log.d(TAG, "Created HlsMuxer with orientation: $recordingOrientationDegrees degrees")
return FragmentedRecordingManager(codec, muxer)
}
}
private var recording = false
private var muxerStarted = false
private var trackIndex = -1
override val surface: Surface = encoder.createInputSurface()
init {
encoder.setCallback(this)
}
override fun start() {
encoder.start()
recording = true
}
override fun finish() {
synchronized(this) {
recording = false
if (muxerStarted) {
muxer.stop()
muxer.release()
}
try {
encoder.stop()
encoder.release()
} catch (e: Exception) {
Log.e(TAG, "Error stopping encoder", e)
}
}
}
// MediaCodec.Callback methods
override fun onInputBufferAvailable(codec: MediaCodec, index: Int) {
// Not used for Surface input
}
override fun onOutputBufferAvailable(codec: MediaCodec, index: Int, bufferInfo: BufferInfo) {
synchronized(this) {
if (!recording) {
encoder.releaseOutputBuffer(index, false)
return
}
if (!muxerStarted) {
encoder.releaseOutputBuffer(index, false)
return
}
val buffer = encoder.getOutputBuffer(index)
if (buffer == null) {
Log.e(TAG, "getOutputBuffer returned null")
encoder.releaseOutputBuffer(index, false)
return
}
try {
muxer.writeSampleData(trackIndex, buffer, bufferInfo)
} catch (e: Exception) {
Log.e(TAG, "Error writing sample", e)
}
encoder.releaseOutputBuffer(index, false)
}
}
override fun onError(codec: MediaCodec, e: MediaCodec.CodecException) {
Log.e(TAG, "Codec error: ${e.message}")
}
override fun onOutputFormatChanged(codec: MediaCodec, format: MediaFormat) {
synchronized(this) {
Log.i(TAG, "Output format changed: $format")
trackIndex = muxer.addTrack(format)
muxer.start()
muxerStarted = true
}
}
}

View File

@@ -0,0 +1,857 @@
package com.mrousavy.camera.core
import android.media.MediaCodec
import android.media.MediaFormat
import android.util.Log
import java.io.ByteArrayOutputStream
import java.io.DataOutputStream
import java.io.File
import java.io.FileOutputStream
import java.nio.ByteBuffer
/**
* A muxer for creating HLS-compatible fragmented MP4 output.
*
* Follows the same pattern as Android's MediaMuxer:
* 1. Create muxer with output directory
* 2. addTrack() with MediaFormat
* 3. start() - writes init.mp4
* 4. writeSampleData() for each encoded sample
* 5. stop() - finalizes last segment
* 6. release() - cleanup
*
* Produces:
* - init.mp4: Initialization segment (ftyp + moov with mvex)
* - 0.mp4, 1.mp4, ...: Media segments (moof + mdat)
*/
class HlsMuxer(
private val outputDirectory: File,
private val callback: Callback,
private val orientationDegrees: Int = 0
) {
companion object {
private const val TAG = "HlsMuxer"
private const val DEFAULT_SEGMENT_DURATION_US = 6_000_000L // 6 seconds
}
interface Callback {
fun onInitSegmentReady(file: File)
fun onMediaSegmentReady(file: File, index: Int, durationUs: Long)
}
// Configuration
private var targetSegmentDurationUs: Long = DEFAULT_SEGMENT_DURATION_US
private var timescale: Int = 30000 // Default, updated from format
// State
private var state = State.UNINITIALIZED
private var trackFormat: MediaFormat? = null
private var sequenceNumber = 1
private var segmentIndex = 0
// Current segment data
private val pendingSamples = mutableListOf<Sample>()
private var segmentStartTimeUs = -1L
private var lastPresentationTimeUs = 0L
private enum class State {
UNINITIALIZED,
INITIALIZED,
STARTED,
STOPPED,
RELEASED
}
private data class Sample(
val data: ByteArray,
val presentationTimeUs: Long,
var durationUs: Long,
val isKeyFrame: Boolean
)
// ==================== Annex-B to AVCC Conversion ====================
/**
* Converts H.264 data from Annex-B format to AVCC format.
*
* Annex-B uses start codes (00 00 00 01 or 00 00 01) to delimit NAL units.
* AVCC uses 4-byte big-endian length prefixes before each NAL unit.
*
* This conversion is required because:
* - MediaCodec outputs Annex-B format
* - fMP4/HLS requires AVCC format (as specified in avcC box with NAL length size = 4)
*/
private fun convertAnnexBToAvcc(annexBData: ByteArray): ByteArray {
val nalUnits = parseAnnexBNalUnits(annexBData)
if (nalUnits.isEmpty()) {
Log.w(TAG, "No NAL units found in sample, returning original data")
return annexBData
}
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
for (nalUnit in nalUnits) {
// Write 4-byte big-endian length prefix
dos.writeInt(nalUnit.size)
// Write NAL unit data (without start code)
dos.write(nalUnit)
}
return output.toByteArray()
}
/**
* Parses Annex-B formatted data into individual NAL units.
* Returns list of NAL unit byte arrays (without start codes).
*/
private fun parseAnnexBNalUnits(data: ByteArray): List<ByteArray> {
val nalUnits = mutableListOf<ByteArray>()
var i = 0
while (i < data.size) {
// Find start code
val startCodeLength = findStartCode(data, i)
if (startCodeLength == 0) {
// No start code found at current position
// This might happen if data doesn't start with a start code
if (nalUnits.isEmpty() && i == 0) {
// Data might already be in AVCC format or malformed
// Try to detect AVCC format (first 4 bytes would be a reasonable length)
if (data.size >= 4) {
val possibleLength = ((data[0].toInt() and 0xFF) shl 24) or
((data[1].toInt() and 0xFF) shl 16) or
((data[2].toInt() and 0xFF) shl 8) or
(data[3].toInt() and 0xFF)
if (possibleLength > 0 && possibleLength <= data.size - 4) {
// Looks like AVCC format already, return original
Log.d(TAG, "Data appears to already be in AVCC format")
return emptyList()
}
}
}
i++
continue
}
val nalStart = i + startCodeLength
// Find end of this NAL unit (start of next, or end of data)
var nalEnd = data.size
var j = nalStart
while (j < data.size - 2) {
val nextStartCode = findStartCode(data, j)
if (nextStartCode > 0) {
nalEnd = j
break
}
j++
}
if (nalEnd > nalStart) {
nalUnits.add(data.copyOfRange(nalStart, nalEnd))
}
i = nalEnd
}
return nalUnits
}
/**
* Checks for Annex-B start code at given position.
* Returns start code length (3 or 4) or 0 if no start code found.
*/
private fun findStartCode(data: ByteArray, offset: Int): Int {
if (offset + 4 <= data.size &&
data[offset] == 0.toByte() &&
data[offset + 1] == 0.toByte() &&
data[offset + 2] == 0.toByte() &&
data[offset + 3] == 1.toByte()) {
return 4 // 4-byte start code: 00 00 00 01
}
if (offset + 3 <= data.size &&
data[offset] == 0.toByte() &&
data[offset + 1] == 0.toByte() &&
data[offset + 2] == 1.toByte()) {
return 3 // 3-byte start code: 00 00 01
}
return 0
}
/**
* Sets the target segment duration.
* Must be called before start().
*/
fun setSegmentDuration(durationUs: Long) {
check(state == State.UNINITIALIZED || state == State.INITIALIZED) {
"Cannot set segment duration after start()"
}
targetSegmentDurationUs = durationUs
}
/**
* Adds a track to the muxer.
*
* @param format The MediaFormat describing the track
* @return Track index (always 0 for now, single video track)
*/
fun addTrack(format: MediaFormat): Int {
check(state == State.UNINITIALIZED) { "addTrack() must be called before start()" }
trackFormat = format
// Extract timescale from frame rate
val fps = try {
format.getInteger(MediaFormat.KEY_FRAME_RATE)
} catch (e: Exception) {
30
}
timescale = fps * 1000 // Use fps * 1000 for good precision
state = State.INITIALIZED
val formatWidth = try { format.getInteger(MediaFormat.KEY_WIDTH) } catch (e: Exception) { -1 }
val formatHeight = try { format.getInteger(MediaFormat.KEY_HEIGHT) } catch (e: Exception) { -1 }
Log.d(TAG, "Added track: ${format.getString(MediaFormat.KEY_MIME)}, " +
"encoder output: ${formatWidth}x${formatHeight}, " +
"timescale=$timescale, orientation=$orientationDegrees°")
return 0 // Single track, index 0
}
/**
* Starts the muxer, writing the initialization segment.
*/
fun start() {
check(state == State.INITIALIZED) { "Must call addTrack() before start()" }
val format = trackFormat ?: throw IllegalStateException("No track format")
// Create output directory if needed
if (!outputDirectory.exists()) {
outputDirectory.mkdirs()
}
// Write init segment
val initBytes = buildInitSegment(format)
val initFile = File(outputDirectory, "init.mp4")
FileOutputStream(initFile).use { it.write(initBytes) }
Log.d(TAG, "Created init segment: ${initFile.absolutePath} (${initBytes.size} bytes)")
callback.onInitSegmentReady(initFile)
state = State.STARTED
}
/**
* Writes sample data to the muxer.
*
* @param trackIndex Track index (must be 0)
* @param buffer The encoded sample data
* @param bufferInfo Sample metadata (size, presentation time, flags)
*/
fun writeSampleData(trackIndex: Int, buffer: ByteBuffer, bufferInfo: MediaCodec.BufferInfo) {
check(state == State.STARTED) { "Must call start() before writeSampleData()" }
check(trackIndex == 0) { "Invalid track index: $trackIndex" }
// Skip codec config data (already in init segment)
if ((bufferInfo.flags and MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0) {
return
}
val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
val presentationTimeUs = bufferInfo.presentationTimeUs
// Initialize segment start time
if (segmentStartTimeUs < 0) {
segmentStartTimeUs = presentationTimeUs
}
// Check if we should finalize current segment (at keyframe boundaries)
if (isKeyFrame && pendingSamples.isNotEmpty()) {
val segmentDurationUs = presentationTimeUs - segmentStartTimeUs
if (segmentDurationUs >= targetSegmentDurationUs) {
finalizeCurrentSegment()
segmentStartTimeUs = presentationTimeUs
}
}
// Copy buffer data and convert from Annex-B to AVCC format
val rawData = ByteArray(bufferInfo.size)
buffer.position(bufferInfo.offset)
buffer.limit(bufferInfo.offset + bufferInfo.size)
buffer.get(rawData)
// Convert Annex-B (start codes) to AVCC (length prefixes)
val data = convertAnnexBToAvcc(rawData)
// Update duration of previous sample
if (pendingSamples.isNotEmpty()) {
val lastSample = pendingSamples.last()
lastSample.durationUs = presentationTimeUs - lastSample.presentationTimeUs
}
// Estimate duration (will be corrected by next sample)
val estimatedDurationUs = if (lastPresentationTimeUs > 0) {
presentationTimeUs - lastPresentationTimeUs
} else {
1_000_000L / 30 // Assume 30fps
}
pendingSamples.add(Sample(
data = data,
presentationTimeUs = presentationTimeUs,
durationUs = estimatedDurationUs,
isKeyFrame = isKeyFrame
))
lastPresentationTimeUs = presentationTimeUs
}
/**
* Stops the muxer, finalizing any pending segment.
*/
fun stop() {
check(state == State.STARTED) { "Muxer not started" }
if (pendingSamples.isNotEmpty()) {
finalizeCurrentSegment()
}
state = State.STOPPED
Log.d(TAG, "Muxer stopped, wrote $segmentIndex segments")
}
/**
* Releases resources.
*/
fun release() {
if (state == State.STARTED) {
stop()
}
pendingSamples.clear()
state = State.RELEASED
}
/**
* Finalizes the current segment and writes it to disk.
*/
private fun finalizeCurrentSegment() {
if (pendingSamples.isEmpty()) return
try {
val baseDecodeTimeUs = pendingSamples.first().presentationTimeUs
val fragmentBytes = buildMediaSegment(pendingSamples, sequenceNumber, baseDecodeTimeUs)
val segmentFile = File(outputDirectory, "$segmentIndex.mp4")
FileOutputStream(segmentFile).use { it.write(fragmentBytes) }
// Calculate duration
val firstPts = pendingSamples.first().presentationTimeUs
val lastSample = pendingSamples.last()
val durationUs = (lastSample.presentationTimeUs - firstPts) + lastSample.durationUs
Log.d(TAG, "Created segment $segmentIndex: samples=${pendingSamples.size}, " +
"duration=${durationUs / 1000}ms, size=${fragmentBytes.size} bytes")
callback.onMediaSegmentReady(segmentFile, segmentIndex, durationUs)
segmentIndex++
sequenceNumber++
pendingSamples.clear()
} catch (e: Exception) {
Log.e(TAG, "Error finalizing segment $segmentIndex", e)
}
}
// ==================== Init Segment Building ====================
/**
* Builds the initialization segment (ftyp + moov).
*/
private fun buildInitSegment(format: MediaFormat): ByteArray {
val width = format.getInteger(MediaFormat.KEY_WIDTH)
val height = format.getInteger(MediaFormat.KEY_HEIGHT)
val sps = format.getByteBuffer("csd-0")?.let { extractNalUnit(it) }
?: throw IllegalArgumentException("Missing SPS (csd-0)")
val pps = format.getByteBuffer("csd-1")?.let { extractNalUnit(it) }
?: throw IllegalArgumentException("Missing PPS (csd-1)")
val output = ByteArrayOutputStream()
// ftyp
output.write(buildFtypBox())
// moov
output.write(buildMoovBox(width, height, sps, pps))
return output.toByteArray()
}
private fun extractNalUnit(buffer: ByteBuffer): ByteArray {
val data = ByteArray(buffer.remaining())
buffer.duplicate().get(data)
// Strip start code prefix (0x00000001 or 0x000001)
return when {
data.size >= 4 && data[0] == 0.toByte() && data[1] == 0.toByte() &&
data[2] == 0.toByte() && data[3] == 1.toByte() -> data.copyOfRange(4, data.size)
data.size >= 3 && data[0] == 0.toByte() && data[1] == 0.toByte() &&
data[2] == 1.toByte() -> data.copyOfRange(3, data.size)
else -> data
}
}
private fun buildFtypBox(): ByteArray {
val brands = listOf("isom", "iso5", "iso6", "avc1", "mp41", "dash")
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
val size = 8 + 4 + 4 + (brands.size * 4)
dos.writeInt(size)
dos.writeBytes("ftyp")
dos.writeBytes("isom") // major brand
dos.writeInt(0x200) // minor version
brands.forEach { dos.writeBytes(it) }
return output.toByteArray()
}
private fun buildMoovBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val content = ByteArrayOutputStream()
content.write(buildMvhdBox())
content.write(buildTrakBox(width, height, sps, pps))
content.write(buildMvexBox())
return wrapBox("moov", content.toByteArray())
}
private fun buildMvhdBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // creation time
dos.writeInt(0) // modification time
dos.writeInt(timescale) // timescale
dos.writeInt(0) // duration
dos.writeInt(0x00010000) // rate = 1.0
dos.writeShort(0x0100) // volume = 1.0
dos.writeShort(0) // reserved
dos.writeInt(0) // reserved
dos.writeInt(0) // reserved
// Unity matrix
dos.writeInt(0x00010000); dos.writeInt(0); dos.writeInt(0)
dos.writeInt(0); dos.writeInt(0x00010000); dos.writeInt(0)
dos.writeInt(0); dos.writeInt(0); dos.writeInt(0x40000000)
repeat(6) { dos.writeInt(0) } // pre-defined
dos.writeInt(2) // next track ID
return wrapBox("mvhd", output.toByteArray())
}
private fun buildTrakBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val content = ByteArrayOutputStream()
content.write(buildTkhdBox(width, height))
content.write(buildMdiaBox(width, height, sps, pps))
return wrapBox("trak", content.toByteArray())
}
private fun buildTkhdBox(width: Int, height: Int): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0x00000007) // version 0, flags (enabled, in movie, in preview)
dos.writeInt(0) // creation time
dos.writeInt(0) // modification time
dos.writeInt(1) // track ID
dos.writeInt(0) // reserved
dos.writeInt(0) // duration
dos.writeInt(0) // reserved
dos.writeInt(0) // reserved
dos.writeShort(0) // layer
dos.writeShort(0) // alternate group
dos.writeShort(0) // volume (0 for video)
dos.writeShort(0) // reserved
// Rotation matrix - use identity and rely on correct dimensions from encoder
// The encoder output format already has the correct dimensions for the content
writeRotationMatrix(dos)
// Use dimensions as-is from encoder output format
dos.writeInt(width shl 16) // width (16.16 fixed point)
dos.writeInt(height shl 16) // height (16.16 fixed point)
Log.d(TAG, "tkhd: ${width}x${height}, rotation=$orientationDegrees")
return wrapBox("tkhd", output.toByteArray())
}
/**
* Writes the 3x3 transformation matrix for video rotation.
* Uses simple rotation values - the encoder already outputs correctly oriented frames.
*/
private fun writeRotationMatrix(dos: DataOutputStream) {
// Fixed-point constants
val one = 0x00010000 // 1.0 in 16.16
val w = 0x40000000 // 1.0 in 2.30
// Identity matrix - no transformation
// Most HLS players handle rotation via the dimensions themselves
// or we can add rotation metadata separately if needed
dos.writeInt(one) // a = 1
dos.writeInt(0) // b = 0
dos.writeInt(0) // u = 0
dos.writeInt(0) // c = 0
dos.writeInt(one) // d = 1
dos.writeInt(0) // v = 0
dos.writeInt(0) // x = 0
dos.writeInt(0) // y = 0
dos.writeInt(w) // w = 1
}
private fun buildMdiaBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val content = ByteArrayOutputStream()
content.write(buildMdhdBox())
content.write(buildHdlrBox())
content.write(buildMinfBox(width, height, sps, pps))
return wrapBox("mdia", content.toByteArray())
}
private fun buildMdhdBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // creation time
dos.writeInt(0) // modification time
dos.writeInt(timescale) // timescale
dos.writeInt(0) // duration
dos.writeShort(0x55C4) // language: "und"
dos.writeShort(0) // pre-defined
return wrapBox("mdhd", output.toByteArray())
}
private fun buildHdlrBox(): ByteArray {
val name = "VideoHandler"
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // pre-defined
dos.writeBytes("vide") // handler type
dos.writeInt(0) // reserved
dos.writeInt(0) // reserved
dos.writeInt(0) // reserved
dos.writeBytes(name)
dos.writeByte(0) // null terminator
return wrapBox("hdlr", output.toByteArray())
}
private fun buildMinfBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val content = ByteArrayOutputStream()
content.write(buildVmhdBox())
content.write(buildDinfBox())
content.write(buildStblBox(width, height, sps, pps))
return wrapBox("minf", content.toByteArray())
}
private fun buildVmhdBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(1) // version 0, flags = 1
dos.writeShort(0) // graphics mode
dos.writeShort(0) // opcolor[0]
dos.writeShort(0) // opcolor[1]
dos.writeShort(0) // opcolor[2]
return wrapBox("vmhd", output.toByteArray())
}
private fun buildDinfBox(): ByteArray {
val dref = buildDrefBox()
return wrapBox("dinf", dref)
}
private fun buildDrefBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(1) // entry count
// url box (self-contained)
dos.writeInt(12)
dos.writeBytes("url ")
dos.writeInt(1) // flags: self-contained
return wrapBox("dref", output.toByteArray())
}
private fun buildStblBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val content = ByteArrayOutputStream()
content.write(buildStsdBox(width, height, sps, pps))
content.write(buildEmptySttsBox())
content.write(buildEmptyStscBox())
content.write(buildEmptyStszBox())
content.write(buildEmptyStcoBox())
return wrapBox("stbl", content.toByteArray())
}
private fun buildStsdBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(1) // entry count
output.write(buildAvc1Box(width, height, sps, pps))
return wrapBox("stsd", output.toByteArray())
}
private fun buildAvc1Box(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
repeat(6) { dos.writeByte(0) } // reserved
dos.writeShort(1) // data reference index
dos.writeShort(0) // pre-defined
dos.writeShort(0) // reserved
repeat(3) { dos.writeInt(0) } // pre-defined
dos.writeShort(width) // width
dos.writeShort(height) // height
dos.writeInt(0x00480000) // horiz resolution (72 dpi)
dos.writeInt(0x00480000) // vert resolution (72 dpi)
dos.writeInt(0) // reserved
dos.writeShort(1) // frame count
repeat(32) { dos.writeByte(0) } // compressor name
dos.writeShort(0x0018) // depth (24 bit)
dos.writeShort(-1) // pre-defined
output.write(buildAvcCBox(sps, pps))
return wrapBox("avc1", output.toByteArray())
}
private fun buildAvcCBox(sps: ByteArray, pps: ByteArray): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
val profileIdc = if (sps.isNotEmpty()) sps[0].toInt() and 0xFF else 0x42
val profileCompat = if (sps.size > 1) sps[1].toInt() and 0xFF else 0x00
val levelIdc = if (sps.size > 2) sps[2].toInt() and 0xFF else 0x1F
dos.writeByte(1) // configuration version
dos.writeByte(profileIdc) // AVC profile
dos.writeByte(profileCompat)// profile compatibility
dos.writeByte(levelIdc) // AVC level
dos.writeByte(0xFF) // 6 bits reserved + 2 bits NAL length - 1
dos.writeByte(0xE1) // 3 bits reserved + 5 bits SPS count
dos.writeShort(sps.size) // SPS length
dos.write(sps) // SPS data
dos.writeByte(1) // PPS count
dos.writeShort(pps.size) // PPS length
dos.write(pps) // PPS data
return wrapBox("avcC", output.toByteArray())
}
private fun buildEmptySttsBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // entry count
return wrapBox("stts", output.toByteArray())
}
private fun buildEmptyStscBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // entry count
return wrapBox("stsc", output.toByteArray())
}
private fun buildEmptyStszBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // sample size (0 = variable)
dos.writeInt(0) // sample count
return wrapBox("stsz", output.toByteArray())
}
private fun buildEmptyStcoBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // entry count
return wrapBox("stco", output.toByteArray())
}
private fun buildMvexBox(): ByteArray {
return wrapBox("mvex", buildTrexBox())
}
private fun buildTrexBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(1) // track ID
dos.writeInt(1) // default sample description index
dos.writeInt(0) // default sample duration
dos.writeInt(0) // default sample size
dos.writeInt(0) // default sample flags
return wrapBox("trex", output.toByteArray())
}
// ==================== Media Segment Building ====================
/**
* Builds a media segment (moof + mdat).
*/
private fun buildMediaSegment(
samples: List<Sample>,
sequenceNumber: Int,
baseDecodeTimeUs: Long
): ByteArray {
val output = ByteArrayOutputStream()
// Build mdat content first to know sizes
val mdatContent = ByteArrayOutputStream()
for (sample in samples) {
mdatContent.write(sample.data)
}
val mdatPayload = mdatContent.toByteArray()
// Build moof
val moofBox = buildMoofBox(samples, sequenceNumber, baseDecodeTimeUs, mdatPayload.size)
output.write(moofBox)
// Build mdat
output.write(wrapBox("mdat", mdatPayload))
return output.toByteArray()
}
private fun buildMoofBox(
samples: List<Sample>,
sequenceNumber: Int,
baseDecodeTimeUs: Long,
mdatPayloadSize: Int
): ByteArray {
// Calculate sizes to determine data offset
val mfhdBox = buildMfhdBox(sequenceNumber)
val tfhdSize = 8 + 8 // box header + content (version/flags + track_id)
val tfdtSize = 8 + 12 // box header + version 1 content
val trunSize = 8 + 12 + (samples.size * 12) // header + fixed + per-sample (no composition offset)
val trafSize = 8 + tfhdSize + tfdtSize + trunSize
val moofSize = 8 + mfhdBox.size + trafSize
val dataOffset = moofSize + 8 // moof size + mdat header
val content = ByteArrayOutputStream()
content.write(mfhdBox)
content.write(buildTrafBox(samples, baseDecodeTimeUs, dataOffset))
return wrapBox("moof", content.toByteArray())
}
private fun buildMfhdBox(sequenceNumber: Int): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(sequenceNumber)
return wrapBox("mfhd", output.toByteArray())
}
private fun buildTrafBox(samples: List<Sample>, baseDecodeTimeUs: Long, dataOffset: Int): ByteArray {
val content = ByteArrayOutputStream()
content.write(buildTfhdBox())
content.write(buildTfdtBox(baseDecodeTimeUs))
content.write(buildTrunBox(samples, dataOffset))
return wrapBox("traf", content.toByteArray())
}
private fun buildTfhdBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
// Flags: default-base-is-moof (0x020000)
dos.writeInt(0x00020000)
dos.writeInt(1) // track ID
return wrapBox("tfhd", output.toByteArray())
}
private fun buildTfdtBox(baseDecodeTimeUs: Long): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
// Convert to timescale units
val baseMediaDecodeTime = (baseDecodeTimeUs * timescale) / 1_000_000
// Version 1 for 64-bit time
dos.writeInt(0x01000000)
dos.writeLong(baseMediaDecodeTime)
return wrapBox("tfdt", output.toByteArray())
}
private fun buildTrunBox(samples: List<Sample>, dataOffset: Int): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
// Flags: data-offset + sample-duration + sample-size + sample-flags
val flags = 0x000001 or 0x000100 or 0x000200 or 0x000400
dos.writeInt(flags)
dos.writeInt(samples.size)
dos.writeInt(dataOffset)
for (sample in samples) {
// Convert duration to timescale units
val durationInTimescale = ((sample.durationUs * timescale) / 1_000_000).toInt()
dos.writeInt(durationInTimescale)
dos.writeInt(sample.data.size)
dos.writeInt(buildSampleFlags(sample.isKeyFrame))
}
return wrapBox("trun", output.toByteArray())
}
private fun buildSampleFlags(isKeyFrame: Boolean): Int {
return if (isKeyFrame) {
// sample_depends_on=2 (no dependencies), not a difference sample
0x02000000
} else {
// sample_depends_on=1 (depends on others), is a difference sample
0x01010000
}
}
// ==================== Utilities ====================
private fun wrapBox(type: String, content: ByteArray): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(8 + content.size)
dos.writeBytes(type)
dos.write(content)
return output.toByteArray()
}
}

View File

@@ -14,6 +14,7 @@ import android.os.Environment
import java.text.SimpleDateFormat import java.text.SimpleDateFormat
import java.util.Locale import java.util.Locale
import java.util.Date import java.util.Date
class RecordingSession( class RecordingSession(
context: Context, context: Context,
val cameraId: String, val cameraId: String,
@@ -27,6 +28,8 @@ class RecordingSession(
private val callback: (video: Video) -> Unit, private val callback: (video: Video) -> Unit,
private val onError: (error: CameraError) -> Unit, private val onError: (error: CameraError) -> Unit,
private val allCallbacks: CameraSession.Callback, private val allCallbacks: CameraSession.Callback,
// Use FragmentedRecordingManager for HLS-compatible fMP4 output
private val useFragmentedMp4: Boolean = true
) { ) {
companion object { companion object {
private const val TAG = "RecordingSession" private const val TAG = "RecordingSession"
@@ -34,6 +37,9 @@ class RecordingSession(
private const val AUDIO_SAMPLING_RATE = 44_100 private const val AUDIO_SAMPLING_RATE = 44_100
private const val AUDIO_BIT_RATE = 16 * AUDIO_SAMPLING_RATE private const val AUDIO_BIT_RATE = 16 * AUDIO_SAMPLING_RATE
private const val AUDIO_CHANNELS = 1 private const val AUDIO_CHANNELS = 1
// Segment duration in seconds (matching iOS default of 6 seconds)
private const val SEGMENT_DURATION_SECONDS = 6
} }
data class Video(val path: String, val durationMs: Long, val size: Size) data class Video(val path: String, val durationMs: Long, val size: Size)
@@ -41,16 +47,33 @@ class RecordingSession(
private val outputPath: File = File(filePath) private val outputPath: File = File(filePath)
private val bitRate = getBitRate() private val bitRate = getBitRate()
private val recorder = ChunkedRecordingManager.fromParams(
allCallbacks, // Use FragmentedRecordingManager for HLS-compatible fMP4 output,
size, // or fall back to ChunkedRecordingManager for regular MP4 chunks
enableAudio, private val recorder: ChunkedRecorderInterface = if (useFragmentedMp4) {
fps, FragmentedRecordingManager.fromParams(
cameraOrientation, allCallbacks,
bitRate, size,
options, enableAudio,
outputPath fps,
) cameraOrientation,
bitRate,
options,
outputPath,
SEGMENT_DURATION_SECONDS
)
} else {
ChunkedRecordingManager.fromParams(
allCallbacks,
size,
enableAudio,
fps,
cameraOrientation,
bitRate,
options,
outputPath
)
}
private var startTime: Long? = null private var startTime: Long? = null
val surface: Surface val surface: Surface
get() { get() {