Compare commits

..

1 Commits

Author SHA1 Message Date
e60c1a4eb1 Write our own muxer to make hls uupload actually work 2025-12-21 16:45:04 -08:00
9 changed files with 923 additions and 702 deletions

View File

@@ -178,10 +178,6 @@ dependencies {
implementation "com.facebook.react:react-android:+"
implementation "org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3"
// Media3 muxer for fragmented MP4 (HLS-compatible) recording
implementation "androidx.media3:media3-muxer:1.5.0"
implementation "androidx.media3:media3-common:1.5.0"
if (enableCodeScanner) {
// User enabled code-scanner, so we bundle the 2.4 MB model in the app.
implementation 'com.google.mlkit:barcode-scanning:17.2.0'

View File

@@ -26,7 +26,6 @@ OpenGLRenderer::OpenGLRenderer(std::shared_ptr<OpenGLContext> context, ANativeWi
_outputSurface = surface;
_width = ANativeWindow_getWidth(surface);
_height = ANativeWindow_getHeight(surface);
__android_log_print(ANDROID_LOG_INFO, TAG, "ROTATION_DEBUG OpenGLRenderer created with output surface dimensions: %dx%d", _width, _height);
}
OpenGLRenderer::~OpenGLRenderer() {

View File

@@ -56,11 +56,6 @@ void VideoPipeline::setRecordingSessionOutputSurface(jobject surface) {
_recordingSessionOutput = OpenGLRenderer::CreateWithWindowSurface(_context, window);
}
void VideoPipeline::setRecordingOrientation(int orientation) {
_recordingOrientation = orientation;
__android_log_print(ANDROID_LOG_INFO, TAG, "Recording orientation set to: %d", orientation);
}
int VideoPipeline::getInputTextureId() {
if (_inputTexture == std::nullopt) {
_inputTexture = _context->createTexture(OpenGLTexture::Type::ExternalOES, _width, _height);
@@ -83,29 +78,8 @@ void VideoPipeline::onFrame(jni::alias_ref<jni::JArrayFloat> transformMatrixPara
OpenGLTexture& texture = _inputTexture.value();
if (_recordingSessionOutput) {
__android_log_print(ANDROID_LOG_INFO, TAG, "Rendering to RecordingSession.. orientation=%d", _recordingOrientation);
// For recording, use a simple transform matrix instead of the display transform.
// The display transform includes rotations for preview which we don't want in recordings.
float recordingMatrix[16];
if (_recordingOrientation == 1) {
// LANDSCAPE_RIGHT (CW): Y-flip + 180° rotation = flip both X and Y
// This negates both X and Y, then translates by (1,1)
recordingMatrix[0] = -1.0f; recordingMatrix[1] = 0.0f; recordingMatrix[2] = 0.0f; recordingMatrix[3] = 0.0f;
recordingMatrix[4] = 0.0f; recordingMatrix[5] = 1.0f; recordingMatrix[6] = 0.0f; recordingMatrix[7] = 0.0f;
recordingMatrix[8] = 0.0f; recordingMatrix[9] = 0.0f; recordingMatrix[10] = 1.0f; recordingMatrix[11] = 0.0f;
recordingMatrix[12] = 1.0f; recordingMatrix[13] = 0.0f; recordingMatrix[14] = 0.0f; recordingMatrix[15] = 1.0f;
} else {
// LANDSCAPE_LEFT (CCW): Simple Y-flip
// OpenGL origin is bottom-left, video expects top-left
recordingMatrix[0] = 1.0f; recordingMatrix[1] = 0.0f; recordingMatrix[2] = 0.0f; recordingMatrix[3] = 0.0f;
recordingMatrix[4] = 0.0f; recordingMatrix[5] = -1.0f; recordingMatrix[6] = 0.0f; recordingMatrix[7] = 0.0f;
recordingMatrix[8] = 0.0f; recordingMatrix[9] = 0.0f; recordingMatrix[10] = 1.0f; recordingMatrix[11] = 0.0f;
recordingMatrix[12] = 0.0f; recordingMatrix[13] = 1.0f; recordingMatrix[14] = 0.0f; recordingMatrix[15] = 1.0f;
}
_recordingSessionOutput->renderTextureToSurface(texture, recordingMatrix);
__android_log_print(ANDROID_LOG_INFO, TAG, "Rendering to RecordingSession..");
_recordingSessionOutput->renderTextureToSurface(texture, transformMatrix);
}
}
@@ -114,7 +88,6 @@ void VideoPipeline::registerNatives() {
makeNativeMethod("initHybrid", VideoPipeline::initHybrid),
makeNativeMethod("setRecordingSessionOutputSurface", VideoPipeline::setRecordingSessionOutputSurface),
makeNativeMethod("removeRecordingSessionOutputSurface", VideoPipeline::removeRecordingSessionOutputSurface),
makeNativeMethod("setRecordingOrientation", VideoPipeline::setRecordingOrientation),
makeNativeMethod("getInputTextureId", VideoPipeline::getInputTextureId),
makeNativeMethod("onBeforeFrame", VideoPipeline::onBeforeFrame),
makeNativeMethod("onFrame", VideoPipeline::onFrame),

View File

@@ -33,7 +33,6 @@ public:
// <- MediaRecorder output
void setRecordingSessionOutputSurface(jobject surface);
void removeRecordingSessionOutputSurface();
void setRecordingOrientation(int orientation);
// Frame callbacks
void onBeforeFrame();
@@ -48,7 +47,6 @@ private:
std::optional<OpenGLTexture> _inputTexture = std::nullopt;
int _width = 0;
int _height = 0;
int _recordingOrientation = 0; // 0=LANDSCAPE_LEFT, 1=LANDSCAPE_RIGHT
// Output Contexts
std::shared_ptr<OpenGLContext> _context = nullptr;

View File

@@ -409,8 +409,7 @@ class CameraSession(private val context: Context, private val cameraManager: Cam
private fun updateVideoOutputs() {
val videoOutput = videoOutput ?: return
Log.i(TAG, "Updating Video Outputs...")
val orientation = recording?.cameraOrientation ?: Orientation.LANDSCAPE_LEFT
videoOutput.videoPipeline.setRecordingSessionOutput(recording, orientation)
videoOutput.videoPipeline.setRecordingSessionOutput(recording)
}
suspend fun startRecording(
@@ -429,16 +428,18 @@ class CameraSession(private val context: Context, private val cameraManager: Cam
// Get actual device rotation from WindowManager since the React Native orientation hook
// doesn't update when rotating between landscape-left and landscape-right on Android.
// Map device rotation to the correct orientationHint for video recording:
// - Counter-clockwise (ROTATION_90) → 270° hint
// - Clockwise (ROTATION_270) → 90° hint
val windowManager = context.getSystemService(Context.WINDOW_SERVICE) as WindowManager
val deviceRotation = windowManager.defaultDisplay.rotation
val recordingOrientation = when (deviceRotation) {
Surface.ROTATION_0 -> Orientation.PORTRAIT
Surface.ROTATION_90 -> Orientation.LANDSCAPE_LEFT // CCW rotation, top to left
Surface.ROTATION_90 -> Orientation.LANDSCAPE_RIGHT
Surface.ROTATION_180 -> Orientation.PORTRAIT_UPSIDE_DOWN
Surface.ROTATION_270 -> Orientation.LANDSCAPE_RIGHT // CW rotation, top to right
Surface.ROTATION_270 -> Orientation.LANDSCAPE_LEFT
else -> Orientation.PORTRAIT
}
Log.i(TAG, "ROTATION_DEBUG: deviceRotation=$deviceRotation, recordingOrientation=$recordingOrientation, options.orientation=${options.orientation}")
val recording = RecordingSession(
context,
@@ -447,7 +448,7 @@ class CameraSession(private val context: Context, private val cameraManager: Cam
enableAudio,
fps,
videoOutput.enableHdr,
recordingOrientation,
orientation,
options,
filePath,
callback,

View File

@@ -7,39 +7,25 @@ import android.media.MediaFormat
import android.util.Log
import android.util.Size
import android.view.Surface
import androidx.media3.common.Format
import androidx.media3.common.MimeTypes
import androidx.media3.common.util.UnstableApi
import androidx.media3.muxer.FragmentedMp4Muxer
import androidx.media3.muxer.Muxer
import com.mrousavy.camera.types.Orientation
import com.mrousavy.camera.types.RecordVideoOptions
import java.io.File
import java.io.FileOutputStream
import java.io.RandomAccessFile
import java.nio.ByteBuffer
import java.nio.ByteOrder
/**
* A recording manager that produces HLS-compatible fragmented MP4 segments.
*
* This produces output similar to the iOS implementation:
* - An initialization segment (init.mp4) containing codec configuration
* - Numbered data segments (0.mp4, 1.mp4, ...) containing media data
*
* Uses AndroidX Media3's FragmentedMp4Muxer which produces proper fMP4 output.
* Uses HlsMuxer (following Android's MediaMuxer pattern) to produce:
* - init.mp4: Initialization segment (ftyp + moov with mvex)
* - 0.mp4, 1.mp4, ...: Media segments (moof + mdat)
*/
@UnstableApi
class FragmentedRecordingManager(
private val encoder: MediaCodec,
private val outputDirectory: File,
private val orientationDegrees: Int,
private val targetSegmentDurationUs: Long,
private val callbacks: CameraSession.Callback
private val muxer: HlsMuxer
) : MediaCodec.Callback(), ChunkedRecorderInterface {
companion object {
private const val TAG = "FragmentedRecorder"
private const val DEFAULT_SEGMENT_DURATION_SECONDS = 6
fun fromParams(
callbacks: CameraSession.Callback,
@@ -50,24 +36,20 @@ class FragmentedRecordingManager(
bitRate: Int,
options: RecordVideoOptions,
outputDirectory: File,
segmentDurationSeconds: Int = 6
segmentDurationSeconds: Int = DEFAULT_SEGMENT_DURATION_SECONDS
): FragmentedRecordingManager {
val mimeType = options.videoCodec.toMimeType()
// For fragmented MP4: DON'T swap dimensions, use camera's native dimensions.
// The C++ VideoPipeline uses a custom transform matrix (not the display transform).
// This gives us raw sensor frames, and we rely on rotation metadata for playback.
val cameraOrientationDegrees = when (cameraOrientation) {
Orientation.LANDSCAPE_LEFT -> 0 // CCW landscape
Orientation.LANDSCAPE_RIGHT -> 0 // CW landscape
Orientation.PORTRAIT -> 90
Orientation.PORTRAIT_UPSIDE_DOWN -> 270
}
Log.i(TAG, "ROTATION_DEBUG FragmentedRecordingManager: cameraOrientation=$cameraOrientation, cameraOrientationDegrees=$cameraOrientationDegrees, inputSize=${size.width}x${size.height}")
val cameraOrientationDegrees = cameraOrientation.toDegrees()
val recordingOrientationDegrees = (options.orientation ?: Orientation.PORTRAIT).toDegrees()
// Keep original dimensions - don't swap. Let rotation metadata handle orientation.
// Use size dimensions directly - the encoder output format will have the actual dimensions
// Don't swap based on orientation here; the camera pipeline handles that
val width = size.width
val height = size.height
Log.i(TAG, "ROTATION_DEBUG FragmentedRecordingManager: outputDimensions=${width}x${height} (no swap)")
Log.d(TAG, "Input size: ${size.width}x${size.height}, " +
"cameraOrientation: $cameraOrientation ($cameraOrientationDegrees°), " +
"recordingOrientation: $recordingOrientationDegrees°")
val format = MediaFormat.createVideoFormat(mimeType, width, height)
val codec = MediaCodec.createEncoderByType(mimeType)
@@ -76,151 +58,48 @@ class FragmentedRecordingManager(
MediaFormat.KEY_COLOR_FORMAT,
MediaCodecInfo.CodecCapabilities.COLOR_FormatSurface
)
fps?.apply {
format.setInteger(MediaFormat.KEY_FRAME_RATE, this)
}
// I-frame interval affects segment boundaries
val effectiveFps = fps ?: 30
format.setInteger(MediaFormat.KEY_FRAME_RATE, effectiveFps)
format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, segmentDurationSeconds)
format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate)
Log.d(TAG, "Video Format: $format, orientationDegrees: $cameraOrientationDegrees")
Log.d(TAG, "Video Format: $format, orientation: $recordingOrientationDegrees")
codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE)
return FragmentedRecordingManager(
codec,
outputDirectory,
cameraOrientationDegrees,
segmentDurationSeconds * 1_000_000L,
callbacks
// Create muxer with callbacks and orientation
val muxer = HlsMuxer(
outputDirectory = outputDirectory,
callback = object : HlsMuxer.Callback {
override fun onInitSegmentReady(file: File) {
callbacks.onInitSegmentReady(file)
}
override fun onMediaSegmentReady(file: File, index: Int, durationUs: Long) {
callbacks.onVideoChunkReady(file, index, durationUs)
}
},
orientationDegrees = recordingOrientationDegrees
)
muxer.setSegmentDuration(segmentDurationSeconds * 1_000_000L)
Log.d(TAG, "Created HlsMuxer with orientation: $recordingOrientationDegrees degrees")
return FragmentedRecordingManager(codec, muxer)
}
}
// State management
private var chunkIndex = 0
private var encodedFormat: MediaFormat? = null
private var recording = false
// Segment tracking
private var segmentContext: SegmentContext? = null
private var initSegmentEmitted = false
// Cumulative base time for HLS-compatible timestamps (in timescale units)
// Each segment's baseMediaDecodeTime should be the sum of all previous segment durations
private var cumulativeBaseTimeUs: Long = 0L
// Timescale used in the fMP4 (typically 1000000 for microseconds)
private val timescale: Long = 1_000_000L
private var muxerStarted = false
private var trackIndex = -1
override val surface: Surface = encoder.createInputSurface()
init {
if (!outputDirectory.exists()) {
outputDirectory.mkdirs()
}
encoder.setCallback(this)
}
/**
* Result from finishing a segment, used for tfdt patching.
*/
private data class SegmentResult(
val file: File,
val segmentIndex: Int,
val durationUs: Long
)
/**
* Context for a single data segment being written.
* Init segments are created separately via createInitSegment().
*/
private inner class SegmentContext(
private val format: MediaFormat,
val segmentIndex: Int,
private val baseTimeUs: Long // The baseMediaDecodeTime for this segment
) {
private val filename = "$segmentIndex.mp4"
val file = File(outputDirectory, filename)
private val outputStream = FileOutputStream(file)
private val muxer = FragmentedMp4Muxer.Builder(outputStream).build()
private lateinit var videoTrack: Muxer.TrackToken
private var startTimeUs: Long = -1L
private var lastTimeUs: Long = 0L
private var sampleCount = 0
init {
val media3Format = convertToMedia3Format(format)
videoTrack = muxer.addTrack(media3Format)
Log.d(TAG, "Created segment context: $filename with baseTimeUs=$baseTimeUs")
}
fun writeSample(buffer: ByteBuffer, bufferInfo: BufferInfo) {
if (startTimeUs < 0) {
startTimeUs = bufferInfo.presentationTimeUs
Log.i(TAG, "PTS_DEBUG Segment $segmentIndex FIRST sample: absolutePTS=${bufferInfo.presentationTimeUs}us, baseTimeUs=$baseTimeUs")
}
// Log first 3 samples and every keyframe for debugging
val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
if (sampleCount < 3 || isKeyFrame) {
Log.i(TAG, "PTS_DEBUG Segment $segmentIndex sample $sampleCount: PTS=${bufferInfo.presentationTimeUs}us, keyframe=$isKeyFrame")
}
lastTimeUs = bufferInfo.presentationTimeUs
muxer.writeSampleData(videoTrack, buffer, bufferInfo)
sampleCount++
}
/**
* Check if we've accumulated enough duration to start a new segment.
* Should only be called when we have a keyframe available.
*/
fun shouldStartNewSegmentOnKeyframe(): Boolean {
if (sampleCount == 0) return false // Need at least one sample first
val currentDurationUs = lastTimeUs - startTimeUs
return currentDurationUs >= targetSegmentDurationUs
}
fun finish(): SegmentResult {
try {
muxer.close()
outputStream.close()
} catch (e: Exception) {
Log.e(TAG, "Error closing segment", e)
}
val durationUs = if (lastTimeUs > startTimeUs) lastTimeUs - startTimeUs else 0L
Log.i(TAG, "PTS_DEBUG Segment $segmentIndex FINISHED: startPTS=${startTimeUs}us, lastPTS=${lastTimeUs}us, duration=${durationUs/1000}ms, samples=$sampleCount, baseTimeUs=$baseTimeUs")
return SegmentResult(file, segmentIndex, durationUs)
}
}
private fun createNewSegment() {
val format = encodedFormat
if (format == null) {
Log.e(TAG, "Cannot create segment: encodedFormat is null")
return
}
// Close previous segment and process it for HLS
segmentContext?.let { ctx ->
val result = ctx.finish()
// Process the segment: extract init (if first), strip headers, inject tfdt
processSegmentForHLS(result.file, result.segmentIndex, cumulativeBaseTimeUs)
// Update cumulative time for next segment
cumulativeBaseTimeUs += result.durationUs
// Notify callback
callbacks.onVideoChunkReady(result.file, result.segmentIndex, result.durationUs)
}
// Create new data segment with current cumulative base time
segmentContext = SegmentContext(format, chunkIndex, cumulativeBaseTimeUs)
chunkIndex++
}
override fun start() {
encoder.start()
recording = true
@@ -229,13 +108,12 @@ class FragmentedRecordingManager(
override fun finish() {
synchronized(this) {
recording = false
// Close final segment and process it for HLS
segmentContext?.let { ctx ->
val result = ctx.finish()
processSegmentForHLS(result.file, result.segmentIndex, cumulativeBaseTimeUs)
callbacks.onVideoChunkReady(result.file, result.segmentIndex, result.durationUs)
if (muxerStarted) {
muxer.stop()
muxer.release()
}
segmentContext = null
try {
encoder.stop()
encoder.release()
@@ -246,6 +124,7 @@ class FragmentedRecordingManager(
}
// MediaCodec.Callback methods
override fun onInputBufferAvailable(codec: MediaCodec, index: Int) {
// Not used for Surface input
}
@@ -257,46 +136,20 @@ class FragmentedRecordingManager(
return
}
// Skip codec config buffers - these contain SPS/PPS with annex-b start codes
// and should NOT be written as samples (they're already in the Format's initializationData)
if ((bufferInfo.flags and MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0) {
Log.d(TAG, "Skipping codec config buffer (size=${bufferInfo.size})")
if (!muxerStarted) {
encoder.releaseOutputBuffer(index, false)
return
}
val encodedData = encoder.getOutputBuffer(index)
if (encodedData == null) {
val buffer = encoder.getOutputBuffer(index)
if (buffer == null) {
Log.e(TAG, "getOutputBuffer returned null")
encoder.releaseOutputBuffer(index, false)
return
}
// Create first data segment if needed
if (segmentContext == null) {
createNewSegment()
}
val context = segmentContext
if (context == null) {
encoder.releaseOutputBuffer(index, false)
return
}
try {
// Check if this keyframe should start a new segment BEFORE writing
val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
val shouldStartNewSegment = isKeyFrame && context.shouldStartNewSegmentOnKeyframe()
if (shouldStartNewSegment) {
// Finish old segment WITHOUT writing this keyframe to it
createNewSegment()
// Write keyframe to the NEW segment only
segmentContext?.writeSample(encodedData, bufferInfo)
} else {
// Write to current segment
context.writeSample(encodedData, bufferInfo)
}
muxer.writeSampleData(trackIndex, buffer, bufferInfo)
} catch (e: Exception) {
Log.e(TAG, "Error writing sample", e)
}
@@ -310,453 +163,12 @@ class FragmentedRecordingManager(
}
override fun onOutputFormatChanged(codec: MediaCodec, format: MediaFormat) {
Log.i(TAG, "Output format changed: $format")
encodedFormat = format
// Note: init segment is now extracted from the first segment's ftyp+moov
// rather than created separately (Media3's empty init was not working)
}
synchronized(this) {
Log.i(TAG, "Output format changed: $format")
private fun convertToMedia3Format(mediaFormat: MediaFormat): Format {
val mimeType = mediaFormat.getString(MediaFormat.KEY_MIME) ?: MimeTypes.VIDEO_H264
val width = mediaFormat.getInteger(MediaFormat.KEY_WIDTH)
val height = mediaFormat.getInteger(MediaFormat.KEY_HEIGHT)
val bitRate = try { mediaFormat.getInteger(MediaFormat.KEY_BIT_RATE) } catch (e: Exception) { -1 }
val frameRate = try { mediaFormat.getInteger(MediaFormat.KEY_FRAME_RATE) } catch (e: Exception) { -1 }
// Get CSD (Codec Specific Data) if available - required for init segment
// csd-0 contains SPS (Sequence Parameter Set)
// csd-1 contains PPS (Picture Parameter Set)
val csd0 = mediaFormat.getByteBuffer("csd-0")
val csd1 = mediaFormat.getByteBuffer("csd-1")
val initData = mutableListOf<ByteArray>()
csd0?.let {
val bytes = ByteArray(it.remaining())
it.duplicate().get(bytes)
initData.add(bytes)
Log.i(TAG, "CSD_DEBUG: csd-0 (SPS) size=${bytes.size} bytes, hex=${bytes.take(32).joinToString("") { "%02x".format(it) }}...")
}
csd1?.let {
val bytes = ByteArray(it.remaining())
it.duplicate().get(bytes)
initData.add(bytes)
Log.i(TAG, "CSD_DEBUG: csd-1 (PPS) size=${bytes.size} bytes, hex=${bytes.joinToString("") { "%02x".format(it) }}")
}
val totalCsdSize = initData.sumOf { it.size }
Log.i(TAG, "CSD_DEBUG: Total CSD size=$totalCsdSize bytes (csd-0=${csd0?.remaining() ?: 0}, csd-1=${csd1?.remaining() ?: 0})")
Log.i(TAG, "ROTATION_DEBUG convertToMedia3Format: orientationDegrees=$orientationDegrees, width=$width, height=$height")
return Format.Builder()
.setSampleMimeType(mimeType)
.setWidth(width)
.setHeight(height)
.setRotationDegrees(orientationDegrees)
.apply {
if (bitRate > 0) setAverageBitrate(bitRate)
if (frameRate > 0) setFrameRate(frameRate.toFloat())
if (initData.isNotEmpty()) setInitializationData(initData)
}
.build()
}
/**
* Processes a segment file for HLS compatibility:
* 1. For segment 0: extracts ftyp+moov header as init.mp4
* 2. Strips ftyp+moov from segment, keeping only moof+mdat (the fragment)
* 3. Injects tfdt box into moof for proper HLS timing
*
* Media3's FragmentedMp4Muxer creates self-contained MP4s, but HLS needs:
* - init.mp4: ftyp + moov (codec configuration)
* - segments: moof + mdat only (fragments referencing init)
*/
private fun processSegmentForHLS(file: File, segmentIndex: Int, baseMediaDecodeTimeUs: Long) {
try {
val originalBytes = file.readBytes()
val buffer = ByteBuffer.wrap(originalBytes).order(ByteOrder.BIG_ENDIAN)
// Find where moof starts (everything before is header: ftyp + moov)
val moofStartPos = findMoofPosition(buffer)
if (moofStartPos < 0) {
Log.e(TAG, "HLS_PROCESS: Could not find moof in ${file.name}")
return
}
Log.i(TAG, "HLS_PROCESS: Segment $segmentIndex - moof starts at $moofStartPos, total size=${originalBytes.size}")
// For segment 0, extract header as init.mp4
if (segmentIndex == 0 && !initSegmentEmitted) {
val headerBytes = originalBytes.copyOfRange(0, moofStartPos)
val initFile = File(outputDirectory, "init.mp4")
initFile.writeBytes(headerBytes)
Log.i(TAG, "HLS_PROCESS: Created init.mp4 with ${headerBytes.size} bytes (ftyp+moov)")
// Debug: dump the init.mp4 structure
dumpMp4BoxStructure(headerBytes, "INIT_STRUCTURE")
callbacks.onInitSegmentReady(initFile)
initSegmentEmitted = true
}
// Extract fragment (moof + mdat only)
val fragmentBytes = originalBytes.copyOfRange(moofStartPos, originalBytes.size)
Log.d(TAG, "HLS_PROCESS: Extracted fragment of ${fragmentBytes.size} bytes")
// Inject tfdt into the fragment
// Note: in the fragment, moof is at position 0
val processedFragment = injectTfdtIntoFragment(fragmentBytes, baseMediaDecodeTimeUs)
// Write back the processed fragment (stripped of header)
file.writeBytes(processedFragment)
Log.i(TAG, "HLS_PROCESS: Segment $segmentIndex processed - header stripped, tfdt injected, final size=${processedFragment.size}")
} catch (e: Exception) {
Log.e(TAG, "Error processing segment ${file.name} for HLS", e)
}
}
/**
* Finds the position of the moof box in the file.
* Returns -1 if not found.
*/
private fun findMoofPosition(buffer: ByteBuffer): Int {
var pos = 0
while (pos < buffer.limit() - 8) {
buffer.position(pos)
val size = buffer.int.toLong() and 0xFFFFFFFFL
val type = buffer.int
if (size < 8) break
// 'moof' = 0x6D6F6F66
if (type == 0x6D6F6F66) {
return pos
}
pos += size.toInt()
}
return -1
}
/**
* Injects a tfdt box into a fragment (moof+mdat).
* The fragment has moof at position 0 (header already stripped).
* Also fixes tfhd.base_data_offset since we stripped the original file header.
*/
private fun injectTfdtIntoFragment(fragmentBytes: ByteArray, baseMediaDecodeTimeUs: Long): ByteArray {
val buffer = ByteBuffer.wrap(fragmentBytes).order(ByteOrder.BIG_ENDIAN)
// Find box positions within the fragment (moof is at position 0)
val positions = findBoxPositionsInFragment(buffer)
if (positions == null) {
Log.e(TAG, "TFDT_INJECT: Could not find required boxes in fragment")
return fragmentBytes
}
val (moofPos, moofSize, trafPos, trafSize, tfhdPos, tfhdEnd, trunPos) = positions
Log.d(TAG, "TFDT_INJECT: Fragment boxes - moof@$moofPos(size=$moofSize), traf@$trafPos, tfhd@$tfhdPos, trun@$trunPos")
// First, fix tfhd.base_data_offset - it was pointing to the original file position
// but now moof is at position 0, so base_data_offset should be 0
fixTfhdBaseDataOffset(buffer, tfhdPos.toInt())
// Create tfdt box (version 1, 64-bit baseMediaDecodeTime)
val tfdtSize = 20
val tfdtBytes = ByteBuffer.allocate(tfdtSize).order(ByteOrder.BIG_ENDIAN)
tfdtBytes.putInt(tfdtSize) // size
tfdtBytes.putInt(0x74666474) // 'tfdt'
tfdtBytes.put(1.toByte()) // version = 1
tfdtBytes.put(0.toByte()) // flags[0]
tfdtBytes.put(0.toByte()) // flags[1]
tfdtBytes.put(0.toByte()) // flags[2]
tfdtBytes.putLong(baseMediaDecodeTimeUs) // baseMediaDecodeTime
// Create new fragment with tfdt injected after tfhd
val newBytes = ByteArray(fragmentBytes.size + tfdtSize)
val insertPos = tfhdEnd.toInt()
// Copy bytes before insertion point
System.arraycopy(fragmentBytes, 0, newBytes, 0, insertPos)
// Insert tfdt
System.arraycopy(tfdtBytes.array(), 0, newBytes, insertPos, tfdtSize)
// Copy bytes after insertion point
System.arraycopy(fragmentBytes, insertPos, newBytes, insertPos + tfdtSize, fragmentBytes.size - insertPos)
// Update box sizes in the new buffer
val newBuffer = ByteBuffer.wrap(newBytes).order(ByteOrder.BIG_ENDIAN)
// Update moof size
val newMoofSize = moofSize + tfdtSize
newBuffer.putInt(moofPos.toInt(), newMoofSize.toInt())
// Update traf size
val newTrafSize = trafSize + tfdtSize
newBuffer.putInt(trafPos.toInt(), newTrafSize.toInt())
// Update trun data_offset if present
val newTrunPos = trunPos.toInt() + tfdtSize
updateTrunDataOffset(newBuffer, newTrunPos, tfdtSize)
Log.i(TAG, "TFDT_INJECT: Injected tfdt with baseMediaDecodeTime=$baseMediaDecodeTimeUs us")
return newBytes
}
/**
* Data class to hold box positions for tfdt injection.
*/
private data class BoxPositions(
val moofPos: Long,
val moofSize: Long,
val trafPos: Long,
val trafSize: Long,
val tfhdPos: Long, // Position of tfhd (need to fix base_data_offset)
val tfhdEnd: Long, // Position right after tfhd where we'll insert tfdt
val trunPos: Long // Position of trun (need to update its data_offset)
)
/**
* Finds the positions of moof, traf, tfhd, and trun boxes in a fragment.
* In a fragment, moof is expected to be at position 0.
*/
private fun findBoxPositionsInFragment(buffer: ByteBuffer): BoxPositions? {
val fileSize = buffer.limit()
var pos = 0
while (pos < fileSize - 8) {
buffer.position(pos)
val size = buffer.int.toLong() and 0xFFFFFFFFL
val type = buffer.int
if (size < 8) break
// 'moof' = 0x6D6F6F66
if (type == 0x6D6F6F66) {
val moofPos = pos.toLong()
val moofSize = size
val moofEnd = pos + size.toInt()
var childPos = pos + 8
while (childPos < moofEnd - 8) {
buffer.position(childPos)
val childSize = buffer.int.toLong() and 0xFFFFFFFFL
val childType = buffer.int
if (childSize < 8) break
// 'traf' = 0x74726166
if (childType == 0x74726166) {
val trafPos = childPos.toLong()
val trafSize = childSize
val trafEnd = childPos + childSize.toInt()
var trafChildPos = childPos + 8
var tfhdPos: Long = -1
var tfhdEnd: Long = -1
var trunPos: Long = -1
while (trafChildPos < trafEnd - 8) {
buffer.position(trafChildPos)
val trafChildSize = buffer.int.toLong() and 0xFFFFFFFFL
val trafChildType = buffer.int
if (trafChildSize < 8) break
// 'tfhd' = 0x74666864
if (trafChildType == 0x74666864) {
tfhdPos = trafChildPos.toLong()
tfhdEnd = trafChildPos + trafChildSize
}
// 'trun' = 0x7472756E
else if (trafChildType == 0x7472756E) {
trunPos = trafChildPos.toLong()
}
trafChildPos += trafChildSize.toInt()
}
if (tfhdPos > 0 && tfhdEnd > 0 && trunPos > 0) {
return BoxPositions(moofPos, moofSize, trafPos, trafSize, tfhdPos, tfhdEnd, trunPos)
}
}
childPos += childSize.toInt()
}
}
pos += size.toInt()
}
return null
}
/**
* Updates the trun box's data_offset field if present.
* The data_offset points to sample data in mdat, and needs to be
* increased by the size of the injected tfdt box.
*
* trun structure:
* - 4 bytes: size
* - 4 bytes: type ('trun')
* - 1 byte: version
* - 3 bytes: flags
* - 4 bytes: sample_count
* - [optional] 4 bytes: data_offset (if flags & 0x000001)
*/
private fun updateTrunDataOffset(buffer: ByteBuffer, trunPos: Int, offsetDelta: Int) {
buffer.position(trunPos + 8) // Skip size and type
val version = buffer.get().toInt() and 0xFF
val flags = ((buffer.get().toInt() and 0xFF) shl 16) or
((buffer.get().toInt() and 0xFF) shl 8) or
(buffer.get().toInt() and 0xFF)
// Check if data_offset_present flag (0x000001) is set
if ((flags and 0x000001) != 0) {
val sampleCount = buffer.int
val dataOffsetPos = trunPos + 16 // size(4) + type(4) + version(1) + flags(3) + sample_count(4)
buffer.position(dataOffsetPos)
val originalOffset = buffer.int
val newOffset = originalOffset + offsetDelta
buffer.putInt(dataOffsetPos, newOffset)
Log.d(TAG, "TFDT_INJECT: Updated trun data_offset: $originalOffset -> $newOffset")
} else {
Log.d(TAG, "TFDT_INJECT: trun has no data_offset field (flags=0x${flags.toString(16)})")
}
}
/**
* Fixes the tfhd box's base_data_offset field after stripping the file header.
* When we strip ftyp+moov from the original segment, the base_data_offset
* (which pointed to a position in the original file) becomes incorrect.
* We set it to 0 since moof is now at the start of the fragment.
*
* tfhd structure:
* - 4 bytes: size
* - 4 bytes: type ('tfhd')
* - 1 byte: version
* - 3 bytes: flags
* - 4 bytes: track_id
* - [optional] 8 bytes: base_data_offset (if flags & 0x000001)
*/
private fun fixTfhdBaseDataOffset(buffer: ByteBuffer, tfhdPos: Int) {
buffer.position(tfhdPos + 8) // Skip size and type
val version = buffer.get().toInt() and 0xFF
val flags = ((buffer.get().toInt() and 0xFF) shl 16) or
((buffer.get().toInt() and 0xFF) shl 8) or
(buffer.get().toInt() and 0xFF)
// Check if base_data_offset_present flag (0x000001) is set
if ((flags and 0x000001) != 0) {
val trackId = buffer.int
val baseDataOffsetPos = tfhdPos + 16 // size(4) + type(4) + version(1) + flags(3) + track_id(4)
buffer.position(baseDataOffsetPos)
val originalOffset = buffer.long
// Set to 0 since moof is now at start of fragment
buffer.putLong(baseDataOffsetPos, 0L)
Log.i(TAG, "TFHD_FIX: Fixed base_data_offset: $originalOffset -> 0")
} else {
Log.d(TAG, "TFHD_FIX: tfhd has no base_data_offset field (flags=0x${flags.toString(16)})")
}
}
/**
* Debug function to dump MP4 box structure and find avcC/stsd info.
*/
private fun dumpMp4BoxStructure(data: ByteArray, logPrefix: String) {
val buffer = ByteBuffer.wrap(data).order(ByteOrder.BIG_ENDIAN)
dumpBoxesRecursive(buffer, 0, data.size, 0, logPrefix)
}
private fun dumpBoxesRecursive(buffer: ByteBuffer, start: Int, end: Int, depth: Int, logPrefix: String) {
var pos = start
val indent = " ".repeat(depth)
while (pos < end - 8) {
buffer.position(pos)
val size = buffer.int.toLong() and 0xFFFFFFFFL
val typeInt = buffer.int
val typeBytes = ByteArray(4)
typeBytes[0] = ((typeInt shr 24) and 0xFF).toByte()
typeBytes[1] = ((typeInt shr 16) and 0xFF).toByte()
typeBytes[2] = ((typeInt shr 8) and 0xFF).toByte()
typeBytes[3] = (typeInt and 0xFF).toByte()
val typeStr = String(typeBytes, Charsets.US_ASCII)
if (size < 8 || pos + size > end) break
Log.i(TAG, "$logPrefix: $indent[$typeStr] size=$size @ $pos")
// For ftyp, dump the brands
if (typeStr == "ftyp" && size >= 16) {
buffer.position(pos + 8)
val majorBrand = ByteArray(4)
buffer.get(majorBrand)
val minorVersion = buffer.int
Log.i(TAG, "$logPrefix: $indent major_brand=${String(majorBrand)}, minor_version=$minorVersion")
val compatBrandsStart = pos + 16
val compatBrandsEnd = pos + size.toInt()
val brands = mutableListOf<String>()
var brandPos = compatBrandsStart
while (brandPos + 4 <= compatBrandsEnd) {
buffer.position(brandPos)
val brand = ByteArray(4)
buffer.get(brand)
brands.add(String(brand))
brandPos += 4
}
Log.i(TAG, "$logPrefix: $indent compatible_brands=${brands.joinToString(",")}")
}
// For avcC, dump the SPS/PPS info
if (typeStr == "avcC" && size >= 13) {
buffer.position(pos + 8)
val configVersion = buffer.get().toInt() and 0xFF
val profileIdc = buffer.get().toInt() and 0xFF
val profileCompat = buffer.get().toInt() and 0xFF
val levelIdc = buffer.get().toInt() and 0xFF
val lengthSizeMinusOne = buffer.get().toInt() and 0x03
val numSps = buffer.get().toInt() and 0x1F
Log.i(TAG, "$logPrefix: $indent avcC: version=$configVersion, profile=$profileIdc, level=$levelIdc, numSPS=$numSps")
// Read SPS lengths
var spsTotal = 0
for (i in 0 until numSps) {
val spsLen = buffer.short.toInt() and 0xFFFF
spsTotal += spsLen
Log.i(TAG, "$logPrefix: $indent SPS[$i] length=$spsLen")
buffer.position(buffer.position() + spsLen) // Skip SPS data
}
// Read PPS count and lengths
if (buffer.position() < pos + size) {
val numPps = buffer.get().toInt() and 0xFF
var ppsTotal = 0
for (i in 0 until numPps) {
if (buffer.position() + 2 <= pos + size) {
val ppsLen = buffer.short.toInt() and 0xFFFF
ppsTotal += ppsLen
Log.i(TAG, "$logPrefix: $indent PPS[$i] length=$ppsLen")
buffer.position(buffer.position() + ppsLen) // Skip PPS data
}
}
Log.i(TAG, "$logPrefix: $indent avcC total: ${size} bytes, SPS=$spsTotal bytes, PPS=$ppsTotal bytes")
}
}
// Recurse into container boxes
val containerBoxes = setOf("moov", "trak", "mdia", "minf", "stbl", "stsd", "mvex", "edts")
if (typeStr in containerBoxes) {
// stsd has 8 extra bytes (version/flags + entry_count) before children
val childStart = if (typeStr == "stsd") pos + 16 else pos + 8
dumpBoxesRecursive(buffer, childStart, pos + size.toInt(), depth + 1, logPrefix)
}
// avc1 is a sample entry, structure: 8 byte header + 78 byte fixed fields + child boxes
if (typeStr == "avc1") {
dumpBoxesRecursive(buffer, pos + 86, pos + size.toInt(), depth + 1, logPrefix)
}
pos += size.toInt()
trackIndex = muxer.addTrack(format)
muxer.start()
muxerStarted = true
}
}
}

View File

@@ -0,0 +1,857 @@
package com.mrousavy.camera.core
import android.media.MediaCodec
import android.media.MediaFormat
import android.util.Log
import java.io.ByteArrayOutputStream
import java.io.DataOutputStream
import java.io.File
import java.io.FileOutputStream
import java.nio.ByteBuffer
/**
* A muxer for creating HLS-compatible fragmented MP4 output.
*
* Follows the same pattern as Android's MediaMuxer:
* 1. Create muxer with output directory
* 2. addTrack() with MediaFormat
* 3. start() - writes init.mp4
* 4. writeSampleData() for each encoded sample
* 5. stop() - finalizes last segment
* 6. release() - cleanup
*
* Produces:
* - init.mp4: Initialization segment (ftyp + moov with mvex)
* - 0.mp4, 1.mp4, ...: Media segments (moof + mdat)
*/
class HlsMuxer(
private val outputDirectory: File,
private val callback: Callback,
private val orientationDegrees: Int = 0
) {
companion object {
private const val TAG = "HlsMuxer"
private const val DEFAULT_SEGMENT_DURATION_US = 6_000_000L // 6 seconds
}
interface Callback {
fun onInitSegmentReady(file: File)
fun onMediaSegmentReady(file: File, index: Int, durationUs: Long)
}
// Configuration
private var targetSegmentDurationUs: Long = DEFAULT_SEGMENT_DURATION_US
private var timescale: Int = 30000 // Default, updated from format
// State
private var state = State.UNINITIALIZED
private var trackFormat: MediaFormat? = null
private var sequenceNumber = 1
private var segmentIndex = 0
// Current segment data
private val pendingSamples = mutableListOf<Sample>()
private var segmentStartTimeUs = -1L
private var lastPresentationTimeUs = 0L
private enum class State {
UNINITIALIZED,
INITIALIZED,
STARTED,
STOPPED,
RELEASED
}
private data class Sample(
val data: ByteArray,
val presentationTimeUs: Long,
var durationUs: Long,
val isKeyFrame: Boolean
)
// ==================== Annex-B to AVCC Conversion ====================
/**
* Converts H.264 data from Annex-B format to AVCC format.
*
* Annex-B uses start codes (00 00 00 01 or 00 00 01) to delimit NAL units.
* AVCC uses 4-byte big-endian length prefixes before each NAL unit.
*
* This conversion is required because:
* - MediaCodec outputs Annex-B format
* - fMP4/HLS requires AVCC format (as specified in avcC box with NAL length size = 4)
*/
private fun convertAnnexBToAvcc(annexBData: ByteArray): ByteArray {
val nalUnits = parseAnnexBNalUnits(annexBData)
if (nalUnits.isEmpty()) {
Log.w(TAG, "No NAL units found in sample, returning original data")
return annexBData
}
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
for (nalUnit in nalUnits) {
// Write 4-byte big-endian length prefix
dos.writeInt(nalUnit.size)
// Write NAL unit data (without start code)
dos.write(nalUnit)
}
return output.toByteArray()
}
/**
* Parses Annex-B formatted data into individual NAL units.
* Returns list of NAL unit byte arrays (without start codes).
*/
private fun parseAnnexBNalUnits(data: ByteArray): List<ByteArray> {
val nalUnits = mutableListOf<ByteArray>()
var i = 0
while (i < data.size) {
// Find start code
val startCodeLength = findStartCode(data, i)
if (startCodeLength == 0) {
// No start code found at current position
// This might happen if data doesn't start with a start code
if (nalUnits.isEmpty() && i == 0) {
// Data might already be in AVCC format or malformed
// Try to detect AVCC format (first 4 bytes would be a reasonable length)
if (data.size >= 4) {
val possibleLength = ((data[0].toInt() and 0xFF) shl 24) or
((data[1].toInt() and 0xFF) shl 16) or
((data[2].toInt() and 0xFF) shl 8) or
(data[3].toInt() and 0xFF)
if (possibleLength > 0 && possibleLength <= data.size - 4) {
// Looks like AVCC format already, return original
Log.d(TAG, "Data appears to already be in AVCC format")
return emptyList()
}
}
}
i++
continue
}
val nalStart = i + startCodeLength
// Find end of this NAL unit (start of next, or end of data)
var nalEnd = data.size
var j = nalStart
while (j < data.size - 2) {
val nextStartCode = findStartCode(data, j)
if (nextStartCode > 0) {
nalEnd = j
break
}
j++
}
if (nalEnd > nalStart) {
nalUnits.add(data.copyOfRange(nalStart, nalEnd))
}
i = nalEnd
}
return nalUnits
}
/**
* Checks for Annex-B start code at given position.
* Returns start code length (3 or 4) or 0 if no start code found.
*/
private fun findStartCode(data: ByteArray, offset: Int): Int {
if (offset + 4 <= data.size &&
data[offset] == 0.toByte() &&
data[offset + 1] == 0.toByte() &&
data[offset + 2] == 0.toByte() &&
data[offset + 3] == 1.toByte()) {
return 4 // 4-byte start code: 00 00 00 01
}
if (offset + 3 <= data.size &&
data[offset] == 0.toByte() &&
data[offset + 1] == 0.toByte() &&
data[offset + 2] == 1.toByte()) {
return 3 // 3-byte start code: 00 00 01
}
return 0
}
/**
* Sets the target segment duration.
* Must be called before start().
*/
fun setSegmentDuration(durationUs: Long) {
check(state == State.UNINITIALIZED || state == State.INITIALIZED) {
"Cannot set segment duration after start()"
}
targetSegmentDurationUs = durationUs
}
/**
* Adds a track to the muxer.
*
* @param format The MediaFormat describing the track
* @return Track index (always 0 for now, single video track)
*/
fun addTrack(format: MediaFormat): Int {
check(state == State.UNINITIALIZED) { "addTrack() must be called before start()" }
trackFormat = format
// Extract timescale from frame rate
val fps = try {
format.getInteger(MediaFormat.KEY_FRAME_RATE)
} catch (e: Exception) {
30
}
timescale = fps * 1000 // Use fps * 1000 for good precision
state = State.INITIALIZED
val formatWidth = try { format.getInteger(MediaFormat.KEY_WIDTH) } catch (e: Exception) { -1 }
val formatHeight = try { format.getInteger(MediaFormat.KEY_HEIGHT) } catch (e: Exception) { -1 }
Log.d(TAG, "Added track: ${format.getString(MediaFormat.KEY_MIME)}, " +
"encoder output: ${formatWidth}x${formatHeight}, " +
"timescale=$timescale, orientation=$orientationDegrees°")
return 0 // Single track, index 0
}
/**
* Starts the muxer, writing the initialization segment.
*/
fun start() {
check(state == State.INITIALIZED) { "Must call addTrack() before start()" }
val format = trackFormat ?: throw IllegalStateException("No track format")
// Create output directory if needed
if (!outputDirectory.exists()) {
outputDirectory.mkdirs()
}
// Write init segment
val initBytes = buildInitSegment(format)
val initFile = File(outputDirectory, "init.mp4")
FileOutputStream(initFile).use { it.write(initBytes) }
Log.d(TAG, "Created init segment: ${initFile.absolutePath} (${initBytes.size} bytes)")
callback.onInitSegmentReady(initFile)
state = State.STARTED
}
/**
* Writes sample data to the muxer.
*
* @param trackIndex Track index (must be 0)
* @param buffer The encoded sample data
* @param bufferInfo Sample metadata (size, presentation time, flags)
*/
fun writeSampleData(trackIndex: Int, buffer: ByteBuffer, bufferInfo: MediaCodec.BufferInfo) {
check(state == State.STARTED) { "Must call start() before writeSampleData()" }
check(trackIndex == 0) { "Invalid track index: $trackIndex" }
// Skip codec config data (already in init segment)
if ((bufferInfo.flags and MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0) {
return
}
val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
val presentationTimeUs = bufferInfo.presentationTimeUs
// Initialize segment start time
if (segmentStartTimeUs < 0) {
segmentStartTimeUs = presentationTimeUs
}
// Check if we should finalize current segment (at keyframe boundaries)
if (isKeyFrame && pendingSamples.isNotEmpty()) {
val segmentDurationUs = presentationTimeUs - segmentStartTimeUs
if (segmentDurationUs >= targetSegmentDurationUs) {
finalizeCurrentSegment()
segmentStartTimeUs = presentationTimeUs
}
}
// Copy buffer data and convert from Annex-B to AVCC format
val rawData = ByteArray(bufferInfo.size)
buffer.position(bufferInfo.offset)
buffer.limit(bufferInfo.offset + bufferInfo.size)
buffer.get(rawData)
// Convert Annex-B (start codes) to AVCC (length prefixes)
val data = convertAnnexBToAvcc(rawData)
// Update duration of previous sample
if (pendingSamples.isNotEmpty()) {
val lastSample = pendingSamples.last()
lastSample.durationUs = presentationTimeUs - lastSample.presentationTimeUs
}
// Estimate duration (will be corrected by next sample)
val estimatedDurationUs = if (lastPresentationTimeUs > 0) {
presentationTimeUs - lastPresentationTimeUs
} else {
1_000_000L / 30 // Assume 30fps
}
pendingSamples.add(Sample(
data = data,
presentationTimeUs = presentationTimeUs,
durationUs = estimatedDurationUs,
isKeyFrame = isKeyFrame
))
lastPresentationTimeUs = presentationTimeUs
}
/**
* Stops the muxer, finalizing any pending segment.
*/
fun stop() {
check(state == State.STARTED) { "Muxer not started" }
if (pendingSamples.isNotEmpty()) {
finalizeCurrentSegment()
}
state = State.STOPPED
Log.d(TAG, "Muxer stopped, wrote $segmentIndex segments")
}
/**
* Releases resources.
*/
fun release() {
if (state == State.STARTED) {
stop()
}
pendingSamples.clear()
state = State.RELEASED
}
/**
* Finalizes the current segment and writes it to disk.
*/
private fun finalizeCurrentSegment() {
if (pendingSamples.isEmpty()) return
try {
val baseDecodeTimeUs = pendingSamples.first().presentationTimeUs
val fragmentBytes = buildMediaSegment(pendingSamples, sequenceNumber, baseDecodeTimeUs)
val segmentFile = File(outputDirectory, "$segmentIndex.mp4")
FileOutputStream(segmentFile).use { it.write(fragmentBytes) }
// Calculate duration
val firstPts = pendingSamples.first().presentationTimeUs
val lastSample = pendingSamples.last()
val durationUs = (lastSample.presentationTimeUs - firstPts) + lastSample.durationUs
Log.d(TAG, "Created segment $segmentIndex: samples=${pendingSamples.size}, " +
"duration=${durationUs / 1000}ms, size=${fragmentBytes.size} bytes")
callback.onMediaSegmentReady(segmentFile, segmentIndex, durationUs)
segmentIndex++
sequenceNumber++
pendingSamples.clear()
} catch (e: Exception) {
Log.e(TAG, "Error finalizing segment $segmentIndex", e)
}
}
// ==================== Init Segment Building ====================
/**
* Builds the initialization segment (ftyp + moov).
*/
private fun buildInitSegment(format: MediaFormat): ByteArray {
val width = format.getInteger(MediaFormat.KEY_WIDTH)
val height = format.getInteger(MediaFormat.KEY_HEIGHT)
val sps = format.getByteBuffer("csd-0")?.let { extractNalUnit(it) }
?: throw IllegalArgumentException("Missing SPS (csd-0)")
val pps = format.getByteBuffer("csd-1")?.let { extractNalUnit(it) }
?: throw IllegalArgumentException("Missing PPS (csd-1)")
val output = ByteArrayOutputStream()
// ftyp
output.write(buildFtypBox())
// moov
output.write(buildMoovBox(width, height, sps, pps))
return output.toByteArray()
}
private fun extractNalUnit(buffer: ByteBuffer): ByteArray {
val data = ByteArray(buffer.remaining())
buffer.duplicate().get(data)
// Strip start code prefix (0x00000001 or 0x000001)
return when {
data.size >= 4 && data[0] == 0.toByte() && data[1] == 0.toByte() &&
data[2] == 0.toByte() && data[3] == 1.toByte() -> data.copyOfRange(4, data.size)
data.size >= 3 && data[0] == 0.toByte() && data[1] == 0.toByte() &&
data[2] == 1.toByte() -> data.copyOfRange(3, data.size)
else -> data
}
}
private fun buildFtypBox(): ByteArray {
val brands = listOf("isom", "iso5", "iso6", "avc1", "mp41", "dash")
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
val size = 8 + 4 + 4 + (brands.size * 4)
dos.writeInt(size)
dos.writeBytes("ftyp")
dos.writeBytes("isom") // major brand
dos.writeInt(0x200) // minor version
brands.forEach { dos.writeBytes(it) }
return output.toByteArray()
}
private fun buildMoovBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val content = ByteArrayOutputStream()
content.write(buildMvhdBox())
content.write(buildTrakBox(width, height, sps, pps))
content.write(buildMvexBox())
return wrapBox("moov", content.toByteArray())
}
private fun buildMvhdBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // creation time
dos.writeInt(0) // modification time
dos.writeInt(timescale) // timescale
dos.writeInt(0) // duration
dos.writeInt(0x00010000) // rate = 1.0
dos.writeShort(0x0100) // volume = 1.0
dos.writeShort(0) // reserved
dos.writeInt(0) // reserved
dos.writeInt(0) // reserved
// Unity matrix
dos.writeInt(0x00010000); dos.writeInt(0); dos.writeInt(0)
dos.writeInt(0); dos.writeInt(0x00010000); dos.writeInt(0)
dos.writeInt(0); dos.writeInt(0); dos.writeInt(0x40000000)
repeat(6) { dos.writeInt(0) } // pre-defined
dos.writeInt(2) // next track ID
return wrapBox("mvhd", output.toByteArray())
}
private fun buildTrakBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val content = ByteArrayOutputStream()
content.write(buildTkhdBox(width, height))
content.write(buildMdiaBox(width, height, sps, pps))
return wrapBox("trak", content.toByteArray())
}
private fun buildTkhdBox(width: Int, height: Int): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0x00000007) // version 0, flags (enabled, in movie, in preview)
dos.writeInt(0) // creation time
dos.writeInt(0) // modification time
dos.writeInt(1) // track ID
dos.writeInt(0) // reserved
dos.writeInt(0) // duration
dos.writeInt(0) // reserved
dos.writeInt(0) // reserved
dos.writeShort(0) // layer
dos.writeShort(0) // alternate group
dos.writeShort(0) // volume (0 for video)
dos.writeShort(0) // reserved
// Rotation matrix - use identity and rely on correct dimensions from encoder
// The encoder output format already has the correct dimensions for the content
writeRotationMatrix(dos)
// Use dimensions as-is from encoder output format
dos.writeInt(width shl 16) // width (16.16 fixed point)
dos.writeInt(height shl 16) // height (16.16 fixed point)
Log.d(TAG, "tkhd: ${width}x${height}, rotation=$orientationDegrees")
return wrapBox("tkhd", output.toByteArray())
}
/**
* Writes the 3x3 transformation matrix for video rotation.
* Uses simple rotation values - the encoder already outputs correctly oriented frames.
*/
private fun writeRotationMatrix(dos: DataOutputStream) {
// Fixed-point constants
val one = 0x00010000 // 1.0 in 16.16
val w = 0x40000000 // 1.0 in 2.30
// Identity matrix - no transformation
// Most HLS players handle rotation via the dimensions themselves
// or we can add rotation metadata separately if needed
dos.writeInt(one) // a = 1
dos.writeInt(0) // b = 0
dos.writeInt(0) // u = 0
dos.writeInt(0) // c = 0
dos.writeInt(one) // d = 1
dos.writeInt(0) // v = 0
dos.writeInt(0) // x = 0
dos.writeInt(0) // y = 0
dos.writeInt(w) // w = 1
}
private fun buildMdiaBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val content = ByteArrayOutputStream()
content.write(buildMdhdBox())
content.write(buildHdlrBox())
content.write(buildMinfBox(width, height, sps, pps))
return wrapBox("mdia", content.toByteArray())
}
private fun buildMdhdBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // creation time
dos.writeInt(0) // modification time
dos.writeInt(timescale) // timescale
dos.writeInt(0) // duration
dos.writeShort(0x55C4) // language: "und"
dos.writeShort(0) // pre-defined
return wrapBox("mdhd", output.toByteArray())
}
private fun buildHdlrBox(): ByteArray {
val name = "VideoHandler"
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // pre-defined
dos.writeBytes("vide") // handler type
dos.writeInt(0) // reserved
dos.writeInt(0) // reserved
dos.writeInt(0) // reserved
dos.writeBytes(name)
dos.writeByte(0) // null terminator
return wrapBox("hdlr", output.toByteArray())
}
private fun buildMinfBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val content = ByteArrayOutputStream()
content.write(buildVmhdBox())
content.write(buildDinfBox())
content.write(buildStblBox(width, height, sps, pps))
return wrapBox("minf", content.toByteArray())
}
private fun buildVmhdBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(1) // version 0, flags = 1
dos.writeShort(0) // graphics mode
dos.writeShort(0) // opcolor[0]
dos.writeShort(0) // opcolor[1]
dos.writeShort(0) // opcolor[2]
return wrapBox("vmhd", output.toByteArray())
}
private fun buildDinfBox(): ByteArray {
val dref = buildDrefBox()
return wrapBox("dinf", dref)
}
private fun buildDrefBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(1) // entry count
// url box (self-contained)
dos.writeInt(12)
dos.writeBytes("url ")
dos.writeInt(1) // flags: self-contained
return wrapBox("dref", output.toByteArray())
}
private fun buildStblBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val content = ByteArrayOutputStream()
content.write(buildStsdBox(width, height, sps, pps))
content.write(buildEmptySttsBox())
content.write(buildEmptyStscBox())
content.write(buildEmptyStszBox())
content.write(buildEmptyStcoBox())
return wrapBox("stbl", content.toByteArray())
}
private fun buildStsdBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(1) // entry count
output.write(buildAvc1Box(width, height, sps, pps))
return wrapBox("stsd", output.toByteArray())
}
private fun buildAvc1Box(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
repeat(6) { dos.writeByte(0) } // reserved
dos.writeShort(1) // data reference index
dos.writeShort(0) // pre-defined
dos.writeShort(0) // reserved
repeat(3) { dos.writeInt(0) } // pre-defined
dos.writeShort(width) // width
dos.writeShort(height) // height
dos.writeInt(0x00480000) // horiz resolution (72 dpi)
dos.writeInt(0x00480000) // vert resolution (72 dpi)
dos.writeInt(0) // reserved
dos.writeShort(1) // frame count
repeat(32) { dos.writeByte(0) } // compressor name
dos.writeShort(0x0018) // depth (24 bit)
dos.writeShort(-1) // pre-defined
output.write(buildAvcCBox(sps, pps))
return wrapBox("avc1", output.toByteArray())
}
private fun buildAvcCBox(sps: ByteArray, pps: ByteArray): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
val profileIdc = if (sps.isNotEmpty()) sps[0].toInt() and 0xFF else 0x42
val profileCompat = if (sps.size > 1) sps[1].toInt() and 0xFF else 0x00
val levelIdc = if (sps.size > 2) sps[2].toInt() and 0xFF else 0x1F
dos.writeByte(1) // configuration version
dos.writeByte(profileIdc) // AVC profile
dos.writeByte(profileCompat)// profile compatibility
dos.writeByte(levelIdc) // AVC level
dos.writeByte(0xFF) // 6 bits reserved + 2 bits NAL length - 1
dos.writeByte(0xE1) // 3 bits reserved + 5 bits SPS count
dos.writeShort(sps.size) // SPS length
dos.write(sps) // SPS data
dos.writeByte(1) // PPS count
dos.writeShort(pps.size) // PPS length
dos.write(pps) // PPS data
return wrapBox("avcC", output.toByteArray())
}
private fun buildEmptySttsBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // entry count
return wrapBox("stts", output.toByteArray())
}
private fun buildEmptyStscBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // entry count
return wrapBox("stsc", output.toByteArray())
}
private fun buildEmptyStszBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // sample size (0 = variable)
dos.writeInt(0) // sample count
return wrapBox("stsz", output.toByteArray())
}
private fun buildEmptyStcoBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(0) // entry count
return wrapBox("stco", output.toByteArray())
}
private fun buildMvexBox(): ByteArray {
return wrapBox("mvex", buildTrexBox())
}
private fun buildTrexBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(1) // track ID
dos.writeInt(1) // default sample description index
dos.writeInt(0) // default sample duration
dos.writeInt(0) // default sample size
dos.writeInt(0) // default sample flags
return wrapBox("trex", output.toByteArray())
}
// ==================== Media Segment Building ====================
/**
* Builds a media segment (moof + mdat).
*/
private fun buildMediaSegment(
samples: List<Sample>,
sequenceNumber: Int,
baseDecodeTimeUs: Long
): ByteArray {
val output = ByteArrayOutputStream()
// Build mdat content first to know sizes
val mdatContent = ByteArrayOutputStream()
for (sample in samples) {
mdatContent.write(sample.data)
}
val mdatPayload = mdatContent.toByteArray()
// Build moof
val moofBox = buildMoofBox(samples, sequenceNumber, baseDecodeTimeUs, mdatPayload.size)
output.write(moofBox)
// Build mdat
output.write(wrapBox("mdat", mdatPayload))
return output.toByteArray()
}
private fun buildMoofBox(
samples: List<Sample>,
sequenceNumber: Int,
baseDecodeTimeUs: Long,
mdatPayloadSize: Int
): ByteArray {
// Calculate sizes to determine data offset
val mfhdBox = buildMfhdBox(sequenceNumber)
val tfhdSize = 8 + 8 // box header + content (version/flags + track_id)
val tfdtSize = 8 + 12 // box header + version 1 content
val trunSize = 8 + 12 + (samples.size * 12) // header + fixed + per-sample (no composition offset)
val trafSize = 8 + tfhdSize + tfdtSize + trunSize
val moofSize = 8 + mfhdBox.size + trafSize
val dataOffset = moofSize + 8 // moof size + mdat header
val content = ByteArrayOutputStream()
content.write(mfhdBox)
content.write(buildTrafBox(samples, baseDecodeTimeUs, dataOffset))
return wrapBox("moof", content.toByteArray())
}
private fun buildMfhdBox(sequenceNumber: Int): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(0) // version & flags
dos.writeInt(sequenceNumber)
return wrapBox("mfhd", output.toByteArray())
}
private fun buildTrafBox(samples: List<Sample>, baseDecodeTimeUs: Long, dataOffset: Int): ByteArray {
val content = ByteArrayOutputStream()
content.write(buildTfhdBox())
content.write(buildTfdtBox(baseDecodeTimeUs))
content.write(buildTrunBox(samples, dataOffset))
return wrapBox("traf", content.toByteArray())
}
private fun buildTfhdBox(): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
// Flags: default-base-is-moof (0x020000)
dos.writeInt(0x00020000)
dos.writeInt(1) // track ID
return wrapBox("tfhd", output.toByteArray())
}
private fun buildTfdtBox(baseDecodeTimeUs: Long): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
// Convert to timescale units
val baseMediaDecodeTime = (baseDecodeTimeUs * timescale) / 1_000_000
// Version 1 for 64-bit time
dos.writeInt(0x01000000)
dos.writeLong(baseMediaDecodeTime)
return wrapBox("tfdt", output.toByteArray())
}
private fun buildTrunBox(samples: List<Sample>, dataOffset: Int): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
// Flags: data-offset + sample-duration + sample-size + sample-flags
val flags = 0x000001 or 0x000100 or 0x000200 or 0x000400
dos.writeInt(flags)
dos.writeInt(samples.size)
dos.writeInt(dataOffset)
for (sample in samples) {
// Convert duration to timescale units
val durationInTimescale = ((sample.durationUs * timescale) / 1_000_000).toInt()
dos.writeInt(durationInTimescale)
dos.writeInt(sample.data.size)
dos.writeInt(buildSampleFlags(sample.isKeyFrame))
}
return wrapBox("trun", output.toByteArray())
}
private fun buildSampleFlags(isKeyFrame: Boolean): Int {
return if (isKeyFrame) {
// sample_depends_on=2 (no dependencies), not a difference sample
0x02000000
} else {
// sample_depends_on=1 (depends on others), is a difference sample
0x01010000
}
}
// ==================== Utilities ====================
private fun wrapBox(type: String, content: ByteArray): ByteArray {
val output = ByteArrayOutputStream()
val dos = DataOutputStream(output)
dos.writeInt(8 + content.size)
dos.writeBytes(type)
dos.write(content)
return output.toByteArray()
}
}

View File

@@ -4,7 +4,6 @@ import android.content.Context
import android.util.Log
import android.util.Size
import android.view.Surface
import androidx.media3.common.util.UnstableApi
import com.facebook.common.statfs.StatFsHelper
import com.mrousavy.camera.extensions.getRecommendedBitRate
import com.mrousavy.camera.types.Orientation
@@ -16,7 +15,6 @@ import java.text.SimpleDateFormat
import java.util.Locale
import java.util.Date
@UnstableApi
class RecordingSession(
context: Context,
val cameraId: String,
@@ -24,13 +22,13 @@ class RecordingSession(
private val enableAudio: Boolean,
private val fps: Int? = null,
private val hdr: Boolean = false,
val cameraOrientation: Orientation,
private val cameraOrientation: Orientation,
private val options: RecordVideoOptions,
private val filePath: String,
private val callback: (video: Video) -> Unit,
private val onError: (error: CameraError) -> Unit,
private val allCallbacks: CameraSession.Callback,
// Use the new FragmentedMp4Muxer-based recorder for HLS-compatible output
// Use FragmentedRecordingManager for HLS-compatible fMP4 output
private val useFragmentedMp4: Boolean = true
) {
companion object {

View File

@@ -162,14 +162,6 @@ class VideoPipeline(
// 4. Get the transform matrix from the SurfaceTexture (rotations/scales applied by Camera)
surfaceTexture.getTransformMatrix(transformMatrix)
// Log transform matrix for debugging rotation issues (only when recording)
if (recordingSession != null) {
Log.i(TAG, "ROTATION_DEBUG TransformMatrix: [${transformMatrix[0]}, ${transformMatrix[1]}, ${transformMatrix[2]}, ${transformMatrix[3]}], " +
"[${transformMatrix[4]}, ${transformMatrix[5]}, ${transformMatrix[6]}, ${transformMatrix[7]}], " +
"[${transformMatrix[8]}, ${transformMatrix[9]}, ${transformMatrix[10]}, ${transformMatrix[11]}], " +
"[${transformMatrix[12]}, ${transformMatrix[13]}, ${transformMatrix[14]}, ${transformMatrix[15]}]")
}
// 5. Draw it with applied rotation/mirroring
onFrame(transformMatrix)
@@ -189,15 +181,11 @@ class VideoPipeline(
/**
* Configures the Pipeline to also write Frames to a Surface from a `MediaRecorder` (or null)
*/
fun setRecordingSessionOutput(recordingSession: RecordingSession?, orientation: Orientation = Orientation.LANDSCAPE_LEFT) {
fun setRecordingSessionOutput(recordingSession: RecordingSession?) {
synchronized(this) {
if (recordingSession != null) {
// Configure OpenGL pipeline to stream Frames into the Recording Session's surface
Log.i(TAG, "Setting ${recordingSession.size} RecordingSession Output with orientation=$orientation...")
// Set the recording orientation for the native layer
// 0 = LANDSCAPE_LEFT (CCW), 1 = LANDSCAPE_RIGHT (CW)
val orientationValue = if (orientation == Orientation.LANDSCAPE_RIGHT) 1 else 0
setRecordingOrientation(orientationValue)
Log.i(TAG, "Setting ${recordingSession.size} RecordingSession Output...")
setRecordingSessionOutputSurface(recordingSession.surface)
this.recordingSession = recordingSession
} else {
@@ -264,6 +252,5 @@ class VideoPipeline(
private external fun onFrame(transformMatrix: FloatArray)
private external fun setRecordingSessionOutputSurface(surface: Any)
private external fun removeRecordingSessionOutputSurface()
private external fun setRecordingOrientation(orientation: Int)
private external fun initHybrid(width: Int, height: Int): HybridData
}