Compare commits

...

3 Commits

7 changed files with 557 additions and 85 deletions

View File

@@ -26,6 +26,7 @@ OpenGLRenderer::OpenGLRenderer(std::shared_ptr<OpenGLContext> context, ANativeWi
_outputSurface = surface; _outputSurface = surface;
_width = ANativeWindow_getWidth(surface); _width = ANativeWindow_getWidth(surface);
_height = ANativeWindow_getHeight(surface); _height = ANativeWindow_getHeight(surface);
__android_log_print(ANDROID_LOG_INFO, TAG, "ROTATION_DEBUG OpenGLRenderer created with output surface dimensions: %dx%d", _width, _height);
} }
OpenGLRenderer::~OpenGLRenderer() { OpenGLRenderer::~OpenGLRenderer() {

View File

@@ -56,6 +56,11 @@ void VideoPipeline::setRecordingSessionOutputSurface(jobject surface) {
_recordingSessionOutput = OpenGLRenderer::CreateWithWindowSurface(_context, window); _recordingSessionOutput = OpenGLRenderer::CreateWithWindowSurface(_context, window);
} }
void VideoPipeline::setRecordingOrientation(int orientation) {
_recordingOrientation = orientation;
__android_log_print(ANDROID_LOG_INFO, TAG, "Recording orientation set to: %d", orientation);
}
int VideoPipeline::getInputTextureId() { int VideoPipeline::getInputTextureId() {
if (_inputTexture == std::nullopt) { if (_inputTexture == std::nullopt) {
_inputTexture = _context->createTexture(OpenGLTexture::Type::ExternalOES, _width, _height); _inputTexture = _context->createTexture(OpenGLTexture::Type::ExternalOES, _width, _height);
@@ -78,8 +83,29 @@ void VideoPipeline::onFrame(jni::alias_ref<jni::JArrayFloat> transformMatrixPara
OpenGLTexture& texture = _inputTexture.value(); OpenGLTexture& texture = _inputTexture.value();
if (_recordingSessionOutput) { if (_recordingSessionOutput) {
__android_log_print(ANDROID_LOG_INFO, TAG, "Rendering to RecordingSession.."); __android_log_print(ANDROID_LOG_INFO, TAG, "Rendering to RecordingSession.. orientation=%d", _recordingOrientation);
_recordingSessionOutput->renderTextureToSurface(texture, transformMatrix);
// For recording, use a simple transform matrix instead of the display transform.
// The display transform includes rotations for preview which we don't want in recordings.
float recordingMatrix[16];
if (_recordingOrientation == 1) {
// LANDSCAPE_RIGHT (CW): Y-flip + 180° rotation = flip both X and Y
// This negates both X and Y, then translates by (1,1)
recordingMatrix[0] = -1.0f; recordingMatrix[1] = 0.0f; recordingMatrix[2] = 0.0f; recordingMatrix[3] = 0.0f;
recordingMatrix[4] = 0.0f; recordingMatrix[5] = 1.0f; recordingMatrix[6] = 0.0f; recordingMatrix[7] = 0.0f;
recordingMatrix[8] = 0.0f; recordingMatrix[9] = 0.0f; recordingMatrix[10] = 1.0f; recordingMatrix[11] = 0.0f;
recordingMatrix[12] = 1.0f; recordingMatrix[13] = 0.0f; recordingMatrix[14] = 0.0f; recordingMatrix[15] = 1.0f;
} else {
// LANDSCAPE_LEFT (CCW): Simple Y-flip
// OpenGL origin is bottom-left, video expects top-left
recordingMatrix[0] = 1.0f; recordingMatrix[1] = 0.0f; recordingMatrix[2] = 0.0f; recordingMatrix[3] = 0.0f;
recordingMatrix[4] = 0.0f; recordingMatrix[5] = -1.0f; recordingMatrix[6] = 0.0f; recordingMatrix[7] = 0.0f;
recordingMatrix[8] = 0.0f; recordingMatrix[9] = 0.0f; recordingMatrix[10] = 1.0f; recordingMatrix[11] = 0.0f;
recordingMatrix[12] = 0.0f; recordingMatrix[13] = 1.0f; recordingMatrix[14] = 0.0f; recordingMatrix[15] = 1.0f;
}
_recordingSessionOutput->renderTextureToSurface(texture, recordingMatrix);
} }
} }
@@ -88,6 +114,7 @@ void VideoPipeline::registerNatives() {
makeNativeMethod("initHybrid", VideoPipeline::initHybrid), makeNativeMethod("initHybrid", VideoPipeline::initHybrid),
makeNativeMethod("setRecordingSessionOutputSurface", VideoPipeline::setRecordingSessionOutputSurface), makeNativeMethod("setRecordingSessionOutputSurface", VideoPipeline::setRecordingSessionOutputSurface),
makeNativeMethod("removeRecordingSessionOutputSurface", VideoPipeline::removeRecordingSessionOutputSurface), makeNativeMethod("removeRecordingSessionOutputSurface", VideoPipeline::removeRecordingSessionOutputSurface),
makeNativeMethod("setRecordingOrientation", VideoPipeline::setRecordingOrientation),
makeNativeMethod("getInputTextureId", VideoPipeline::getInputTextureId), makeNativeMethod("getInputTextureId", VideoPipeline::getInputTextureId),
makeNativeMethod("onBeforeFrame", VideoPipeline::onBeforeFrame), makeNativeMethod("onBeforeFrame", VideoPipeline::onBeforeFrame),
makeNativeMethod("onFrame", VideoPipeline::onFrame), makeNativeMethod("onFrame", VideoPipeline::onFrame),

View File

@@ -33,6 +33,7 @@ public:
// <- MediaRecorder output // <- MediaRecorder output
void setRecordingSessionOutputSurface(jobject surface); void setRecordingSessionOutputSurface(jobject surface);
void removeRecordingSessionOutputSurface(); void removeRecordingSessionOutputSurface();
void setRecordingOrientation(int orientation);
// Frame callbacks // Frame callbacks
void onBeforeFrame(); void onBeforeFrame();
@@ -47,6 +48,7 @@ private:
std::optional<OpenGLTexture> _inputTexture = std::nullopt; std::optional<OpenGLTexture> _inputTexture = std::nullopt;
int _width = 0; int _width = 0;
int _height = 0; int _height = 0;
int _recordingOrientation = 0; // 0=LANDSCAPE_LEFT, 1=LANDSCAPE_RIGHT
// Output Contexts // Output Contexts
std::shared_ptr<OpenGLContext> _context = nullptr; std::shared_ptr<OpenGLContext> _context = nullptr;

View File

@@ -409,7 +409,8 @@ class CameraSession(private val context: Context, private val cameraManager: Cam
private fun updateVideoOutputs() { private fun updateVideoOutputs() {
val videoOutput = videoOutput ?: return val videoOutput = videoOutput ?: return
Log.i(TAG, "Updating Video Outputs...") Log.i(TAG, "Updating Video Outputs...")
videoOutput.videoPipeline.setRecordingSessionOutput(recording) val orientation = recording?.cameraOrientation ?: Orientation.LANDSCAPE_LEFT
videoOutput.videoPipeline.setRecordingSessionOutput(recording, orientation)
} }
suspend fun startRecording( suspend fun startRecording(
@@ -428,18 +429,16 @@ class CameraSession(private val context: Context, private val cameraManager: Cam
// Get actual device rotation from WindowManager since the React Native orientation hook // Get actual device rotation from WindowManager since the React Native orientation hook
// doesn't update when rotating between landscape-left and landscape-right on Android. // doesn't update when rotating between landscape-left and landscape-right on Android.
// Map device rotation to the correct orientationHint for video recording:
// - Counter-clockwise (ROTATION_90) → 270° hint
// - Clockwise (ROTATION_270) → 90° hint
val windowManager = context.getSystemService(Context.WINDOW_SERVICE) as WindowManager val windowManager = context.getSystemService(Context.WINDOW_SERVICE) as WindowManager
val deviceRotation = windowManager.defaultDisplay.rotation val deviceRotation = windowManager.defaultDisplay.rotation
val recordingOrientation = when (deviceRotation) { val recordingOrientation = when (deviceRotation) {
Surface.ROTATION_0 -> Orientation.PORTRAIT Surface.ROTATION_0 -> Orientation.PORTRAIT
Surface.ROTATION_90 -> Orientation.LANDSCAPE_RIGHT Surface.ROTATION_90 -> Orientation.LANDSCAPE_LEFT // CCW rotation, top to left
Surface.ROTATION_180 -> Orientation.PORTRAIT_UPSIDE_DOWN Surface.ROTATION_180 -> Orientation.PORTRAIT_UPSIDE_DOWN
Surface.ROTATION_270 -> Orientation.LANDSCAPE_LEFT Surface.ROTATION_270 -> Orientation.LANDSCAPE_RIGHT // CW rotation, top to right
else -> Orientation.PORTRAIT else -> Orientation.PORTRAIT
} }
Log.i(TAG, "ROTATION_DEBUG: deviceRotation=$deviceRotation, recordingOrientation=$recordingOrientation, options.orientation=${options.orientation}")
val recording = RecordingSession( val recording = RecordingSession(
context, context,
@@ -448,7 +447,7 @@ class CameraSession(private val context: Context, private val cameraManager: Cam
enableAudio, enableAudio,
fps, fps,
videoOutput.enableHdr, videoOutput.enableHdr,
orientation, recordingOrientation,
options, options,
filePath, filePath,
callback, callback,

View File

@@ -16,7 +16,9 @@ import com.mrousavy.camera.types.Orientation
import com.mrousavy.camera.types.RecordVideoOptions import com.mrousavy.camera.types.RecordVideoOptions
import java.io.File import java.io.File
import java.io.FileOutputStream import java.io.FileOutputStream
import java.io.RandomAccessFile
import java.nio.ByteBuffer import java.nio.ByteBuffer
import java.nio.ByteOrder
/** /**
* A recording manager that produces HLS-compatible fragmented MP4 segments. * A recording manager that produces HLS-compatible fragmented MP4 segments.
@@ -51,14 +53,21 @@ class FragmentedRecordingManager(
segmentDurationSeconds: Int = 6 segmentDurationSeconds: Int = 6
): FragmentedRecordingManager { ): FragmentedRecordingManager {
val mimeType = options.videoCodec.toMimeType() val mimeType = options.videoCodec.toMimeType()
val cameraOrientationDegrees = cameraOrientation.toDegrees() // For fragmented MP4: DON'T swap dimensions, use camera's native dimensions.
val recordingOrientationDegrees = (options.orientation ?: Orientation.PORTRAIT).toDegrees() // The C++ VideoPipeline uses a custom transform matrix (not the display transform).
// This gives us raw sensor frames, and we rely on rotation metadata for playback.
val (width, height) = if (cameraOrientation.isLandscape()) { val cameraOrientationDegrees = when (cameraOrientation) {
size.height to size.width Orientation.LANDSCAPE_LEFT -> 0 // CCW landscape
} else { Orientation.LANDSCAPE_RIGHT -> 0 // CW landscape
size.width to size.height Orientation.PORTRAIT -> 90
Orientation.PORTRAIT_UPSIDE_DOWN -> 270
} }
Log.i(TAG, "ROTATION_DEBUG FragmentedRecordingManager: cameraOrientation=$cameraOrientation, cameraOrientationDegrees=$cameraOrientationDegrees, inputSize=${size.width}x${size.height}")
// Keep original dimensions - don't swap. Let rotation metadata handle orientation.
val width = size.width
val height = size.height
Log.i(TAG, "ROTATION_DEBUG FragmentedRecordingManager: outputDimensions=${width}x${height} (no swap)")
val format = MediaFormat.createVideoFormat(mimeType, width, height) val format = MediaFormat.createVideoFormat(mimeType, width, height)
val codec = MediaCodec.createEncoderByType(mimeType) val codec = MediaCodec.createEncoderByType(mimeType)
@@ -74,14 +83,14 @@ class FragmentedRecordingManager(
format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, segmentDurationSeconds) format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, segmentDurationSeconds)
format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate) format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate)
Log.d(TAG, "Video Format: $format, camera orientation $cameraOrientationDegrees, recordingOrientation: $recordingOrientationDegrees") Log.d(TAG, "Video Format: $format, orientationDegrees: $cameraOrientationDegrees")
codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE) codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE)
return FragmentedRecordingManager( return FragmentedRecordingManager(
codec, codec,
outputDirectory, outputDirectory,
recordingOrientationDegrees, cameraOrientationDegrees,
segmentDurationSeconds * 1_000_000L, segmentDurationSeconds * 1_000_000L,
callbacks callbacks
) )
@@ -97,6 +106,13 @@ class FragmentedRecordingManager(
private var segmentContext: SegmentContext? = null private var segmentContext: SegmentContext? = null
private var initSegmentEmitted = false private var initSegmentEmitted = false
// Cumulative base time for HLS-compatible timestamps (in timescale units)
// Each segment's baseMediaDecodeTime should be the sum of all previous segment durations
private var cumulativeBaseTimeUs: Long = 0L
// Timescale used in the fMP4 (typically 1000000 for microseconds)
private val timescale: Long = 1_000_000L
override val surface: Surface = encoder.createInputSurface() override val surface: Surface = encoder.createInputSurface()
init { init {
@@ -106,16 +122,26 @@ class FragmentedRecordingManager(
encoder.setCallback(this) encoder.setCallback(this)
} }
/**
* Result from finishing a segment, used for tfdt patching.
*/
private data class SegmentResult(
val file: File,
val segmentIndex: Int,
val durationUs: Long
)
/** /**
* Context for a single data segment being written. * Context for a single data segment being written.
* Init segments are created separately via createInitSegment(). * Init segments are created separately via createInitSegment().
*/ */
private inner class SegmentContext( private inner class SegmentContext(
private val format: MediaFormat, private val format: MediaFormat,
private val segmentIndex: Int val segmentIndex: Int,
private val baseTimeUs: Long // The baseMediaDecodeTime for this segment
) { ) {
private val filename = "$segmentIndex.mp4" private val filename = "$segmentIndex.mp4"
private val file = File(outputDirectory, filename) val file = File(outputDirectory, filename)
private val outputStream = FileOutputStream(file) private val outputStream = FileOutputStream(file)
private val muxer = FragmentedMp4Muxer.Builder(outputStream).build() private val muxer = FragmentedMp4Muxer.Builder(outputStream).build()
private lateinit var videoTrack: Muxer.TrackToken private lateinit var videoTrack: Muxer.TrackToken
@@ -126,32 +152,38 @@ class FragmentedRecordingManager(
init { init {
val media3Format = convertToMedia3Format(format) val media3Format = convertToMedia3Format(format)
videoTrack = muxer.addTrack(media3Format) videoTrack = muxer.addTrack(media3Format)
Log.d(TAG, "Created segment context: $filename") Log.d(TAG, "Created segment context: $filename with baseTimeUs=$baseTimeUs")
} }
fun writeSample(buffer: ByteBuffer, bufferInfo: BufferInfo): Boolean { fun writeSample(buffer: ByteBuffer, bufferInfo: BufferInfo) {
if (startTimeUs < 0) { if (startTimeUs < 0) {
startTimeUs = bufferInfo.presentationTimeUs startTimeUs = bufferInfo.presentationTimeUs
Log.i(TAG, "PTS_DEBUG Segment $segmentIndex FIRST sample: absolutePTS=${bufferInfo.presentationTimeUs}us, baseTimeUs=$baseTimeUs")
} }
lastTimeUs = bufferInfo.presentationTimeUs
// Log first 3 samples and every keyframe for debugging
val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0 val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
if (sampleCount < 3 || isKeyFrame) {
Log.i(TAG, "PTS_DEBUG Segment $segmentIndex sample $sampleCount: PTS=${bufferInfo.presentationTimeUs}us, keyframe=$isKeyFrame")
}
lastTimeUs = bufferInfo.presentationTimeUs
muxer.writeSampleData(videoTrack, buffer, bufferInfo) muxer.writeSampleData(videoTrack, buffer, bufferInfo)
sampleCount++ sampleCount++
// Check if we should start a new segment at the next keyframe
if (isKeyFrame && sampleCount > 1) {
val segmentDurationUs = bufferInfo.presentationTimeUs - startTimeUs
if (segmentDurationUs >= targetSegmentDurationUs) {
return true // Signal to create new segment
}
} }
return false /**
* Check if we've accumulated enough duration to start a new segment.
* Should only be called when we have a keyframe available.
*/
fun shouldStartNewSegmentOnKeyframe(): Boolean {
if (sampleCount == 0) return false // Need at least one sample first
val currentDurationUs = lastTimeUs - startTimeUs
return currentDurationUs >= targetSegmentDurationUs
} }
fun finish(): Long { fun finish(): SegmentResult {
try { try {
muxer.close() muxer.close()
outputStream.close() outputStream.close()
@@ -160,10 +192,9 @@ class FragmentedRecordingManager(
} }
val durationUs = if (lastTimeUs > startTimeUs) lastTimeUs - startTimeUs else 0L val durationUs = if (lastTimeUs > startTimeUs) lastTimeUs - startTimeUs else 0L
callbacks.onVideoChunkReady(file, segmentIndex, durationUs)
Log.d(TAG, "Finished segment: $filename, samples=$sampleCount, duration=${durationUs/1000}ms") Log.i(TAG, "PTS_DEBUG Segment $segmentIndex FINISHED: startPTS=${startTimeUs}us, lastPTS=${lastTimeUs}us, duration=${durationUs/1000}ms, samples=$sampleCount, baseTimeUs=$baseTimeUs")
return durationUs return SegmentResult(file, segmentIndex, durationUs)
} }
} }
@@ -174,11 +205,19 @@ class FragmentedRecordingManager(
return return
} }
// Close previous segment // Close previous segment and process it for HLS
segmentContext?.finish() segmentContext?.let { ctx ->
val result = ctx.finish()
// Process the segment: extract init (if first), strip headers, inject tfdt
processSegmentForHLS(result.file, result.segmentIndex, cumulativeBaseTimeUs)
// Update cumulative time for next segment
cumulativeBaseTimeUs += result.durationUs
// Notify callback
callbacks.onVideoChunkReady(result.file, result.segmentIndex, result.durationUs)
}
// Create new data segment (init segments are created separately) // Create new data segment with current cumulative base time
segmentContext = SegmentContext(format, chunkIndex) segmentContext = SegmentContext(format, chunkIndex, cumulativeBaseTimeUs)
chunkIndex++ chunkIndex++
} }
@@ -190,7 +229,12 @@ class FragmentedRecordingManager(
override fun finish() { override fun finish() {
synchronized(this) { synchronized(this) {
recording = false recording = false
segmentContext?.finish() // Close final segment and process it for HLS
segmentContext?.let { ctx ->
val result = ctx.finish()
processSegmentForHLS(result.file, result.segmentIndex, cumulativeBaseTimeUs)
callbacks.onVideoChunkReady(result.file, result.segmentIndex, result.durationUs)
}
segmentContext = null segmentContext = null
try { try {
encoder.stop() encoder.stop()
@@ -213,15 +257,17 @@ class FragmentedRecordingManager(
return return
} }
val encodedData = encoder.getOutputBuffer(index) // Skip codec config buffers - these contain SPS/PPS with annex-b start codes
if (encodedData == null) { // and should NOT be written as samples (they're already in the Format's initializationData)
Log.e(TAG, "getOutputBuffer returned null") if ((bufferInfo.flags and MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0) {
Log.d(TAG, "Skipping codec config buffer (size=${bufferInfo.size})")
encoder.releaseOutputBuffer(index, false) encoder.releaseOutputBuffer(index, false)
return return
} }
// Wait until init segment is emitted (happens in onOutputFormatChanged) val encodedData = encoder.getOutputBuffer(index)
if (!initSegmentEmitted) { if (encodedData == null) {
Log.e(TAG, "getOutputBuffer returned null")
encoder.releaseOutputBuffer(index, false) encoder.releaseOutputBuffer(index, false)
return return
} }
@@ -238,11 +284,18 @@ class FragmentedRecordingManager(
} }
try { try {
val shouldStartNewSegment = context.writeSample(encodedData, bufferInfo) // Check if this keyframe should start a new segment BEFORE writing
val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
val shouldStartNewSegment = isKeyFrame && context.shouldStartNewSegmentOnKeyframe()
if (shouldStartNewSegment) { if (shouldStartNewSegment) {
// Finish old segment WITHOUT writing this keyframe to it
createNewSegment() createNewSegment()
// Write this keyframe to the new segment as well // Write keyframe to the NEW segment only
segmentContext?.writeSample(encodedData, bufferInfo) segmentContext?.writeSample(encodedData, bufferInfo)
} else {
// Write to current segment
context.writeSample(encodedData, bufferInfo)
} }
} catch (e: Exception) { } catch (e: Exception) {
Log.e(TAG, "Error writing sample", e) Log.e(TAG, "Error writing sample", e)
@@ -259,39 +312,8 @@ class FragmentedRecordingManager(
override fun onOutputFormatChanged(codec: MediaCodec, format: MediaFormat) { override fun onOutputFormatChanged(codec: MediaCodec, format: MediaFormat) {
Log.i(TAG, "Output format changed: $format") Log.i(TAG, "Output format changed: $format")
encodedFormat = format encodedFormat = format
// Note: init segment is now extracted from the first segment's ftyp+moov
// Create the init segment immediately when we get the format // rather than created separately (Media3's empty init was not working)
// This produces an fMP4 file with just ftyp + moov (no samples)
if (!initSegmentEmitted) {
createInitSegment(format)
initSegmentEmitted = true
}
}
/**
* Creates an initialization segment containing only codec configuration (ftyp + moov).
* This is done by creating a muxer, adding the track, and immediately closing it
* without writing any samples.
*/
private fun createInitSegment(format: MediaFormat) {
val initFile = File(outputDirectory, "init.mp4")
try {
val outputStream = FileOutputStream(initFile)
val muxer = FragmentedMp4Muxer.Builder(outputStream).build()
// Convert and add the track
val media3Format = convertToMedia3Format(format)
muxer.addTrack(media3Format)
// Close immediately - this writes just the header (ftyp + moov)
muxer.close()
outputStream.close()
Log.d(TAG, "Created init segment: ${initFile.absolutePath}")
callbacks.onInitSegmentReady(initFile)
} catch (e: Exception) {
Log.e(TAG, "Error creating init segment", e)
}
} }
private fun convertToMedia3Format(mediaFormat: MediaFormat): Format { private fun convertToMedia3Format(mediaFormat: MediaFormat): Format {
@@ -302,6 +324,8 @@ class FragmentedRecordingManager(
val frameRate = try { mediaFormat.getInteger(MediaFormat.KEY_FRAME_RATE) } catch (e: Exception) { -1 } val frameRate = try { mediaFormat.getInteger(MediaFormat.KEY_FRAME_RATE) } catch (e: Exception) { -1 }
// Get CSD (Codec Specific Data) if available - required for init segment // Get CSD (Codec Specific Data) if available - required for init segment
// csd-0 contains SPS (Sequence Parameter Set)
// csd-1 contains PPS (Picture Parameter Set)
val csd0 = mediaFormat.getByteBuffer("csd-0") val csd0 = mediaFormat.getByteBuffer("csd-0")
val csd1 = mediaFormat.getByteBuffer("csd-1") val csd1 = mediaFormat.getByteBuffer("csd-1")
@@ -310,13 +334,19 @@ class FragmentedRecordingManager(
val bytes = ByteArray(it.remaining()) val bytes = ByteArray(it.remaining())
it.duplicate().get(bytes) it.duplicate().get(bytes)
initData.add(bytes) initData.add(bytes)
Log.i(TAG, "CSD_DEBUG: csd-0 (SPS) size=${bytes.size} bytes, hex=${bytes.take(32).joinToString("") { "%02x".format(it) }}...")
} }
csd1?.let { csd1?.let {
val bytes = ByteArray(it.remaining()) val bytes = ByteArray(it.remaining())
it.duplicate().get(bytes) it.duplicate().get(bytes)
initData.add(bytes) initData.add(bytes)
Log.i(TAG, "CSD_DEBUG: csd-1 (PPS) size=${bytes.size} bytes, hex=${bytes.joinToString("") { "%02x".format(it) }}")
} }
val totalCsdSize = initData.sumOf { it.size }
Log.i(TAG, "CSD_DEBUG: Total CSD size=$totalCsdSize bytes (csd-0=${csd0?.remaining() ?: 0}, csd-1=${csd1?.remaining() ?: 0})")
Log.i(TAG, "ROTATION_DEBUG convertToMedia3Format: orientationDegrees=$orientationDegrees, width=$width, height=$height")
return Format.Builder() return Format.Builder()
.setSampleMimeType(mimeType) .setSampleMimeType(mimeType)
.setWidth(width) .setWidth(width)
@@ -329,4 +359,404 @@ class FragmentedRecordingManager(
} }
.build() .build()
} }
/**
* Processes a segment file for HLS compatibility:
* 1. For segment 0: extracts ftyp+moov header as init.mp4
* 2. Strips ftyp+moov from segment, keeping only moof+mdat (the fragment)
* 3. Injects tfdt box into moof for proper HLS timing
*
* Media3's FragmentedMp4Muxer creates self-contained MP4s, but HLS needs:
* - init.mp4: ftyp + moov (codec configuration)
* - segments: moof + mdat only (fragments referencing init)
*/
private fun processSegmentForHLS(file: File, segmentIndex: Int, baseMediaDecodeTimeUs: Long) {
try {
val originalBytes = file.readBytes()
val buffer = ByteBuffer.wrap(originalBytes).order(ByteOrder.BIG_ENDIAN)
// Find where moof starts (everything before is header: ftyp + moov)
val moofStartPos = findMoofPosition(buffer)
if (moofStartPos < 0) {
Log.e(TAG, "HLS_PROCESS: Could not find moof in ${file.name}")
return
}
Log.i(TAG, "HLS_PROCESS: Segment $segmentIndex - moof starts at $moofStartPos, total size=${originalBytes.size}")
// For segment 0, extract header as init.mp4
if (segmentIndex == 0 && !initSegmentEmitted) {
val headerBytes = originalBytes.copyOfRange(0, moofStartPos)
val initFile = File(outputDirectory, "init.mp4")
initFile.writeBytes(headerBytes)
Log.i(TAG, "HLS_PROCESS: Created init.mp4 with ${headerBytes.size} bytes (ftyp+moov)")
// Debug: dump the init.mp4 structure
dumpMp4BoxStructure(headerBytes, "INIT_STRUCTURE")
callbacks.onInitSegmentReady(initFile)
initSegmentEmitted = true
}
// Extract fragment (moof + mdat only)
val fragmentBytes = originalBytes.copyOfRange(moofStartPos, originalBytes.size)
Log.d(TAG, "HLS_PROCESS: Extracted fragment of ${fragmentBytes.size} bytes")
// Inject tfdt into the fragment
// Note: in the fragment, moof is at position 0
val processedFragment = injectTfdtIntoFragment(fragmentBytes, baseMediaDecodeTimeUs)
// Write back the processed fragment (stripped of header)
file.writeBytes(processedFragment)
Log.i(TAG, "HLS_PROCESS: Segment $segmentIndex processed - header stripped, tfdt injected, final size=${processedFragment.size}")
} catch (e: Exception) {
Log.e(TAG, "Error processing segment ${file.name} for HLS", e)
}
}
/**
* Finds the position of the moof box in the file.
* Returns -1 if not found.
*/
private fun findMoofPosition(buffer: ByteBuffer): Int {
var pos = 0
while (pos < buffer.limit() - 8) {
buffer.position(pos)
val size = buffer.int.toLong() and 0xFFFFFFFFL
val type = buffer.int
if (size < 8) break
// 'moof' = 0x6D6F6F66
if (type == 0x6D6F6F66) {
return pos
}
pos += size.toInt()
}
return -1
}
/**
* Injects a tfdt box into a fragment (moof+mdat).
* The fragment has moof at position 0 (header already stripped).
* Also fixes tfhd.base_data_offset since we stripped the original file header.
*/
private fun injectTfdtIntoFragment(fragmentBytes: ByteArray, baseMediaDecodeTimeUs: Long): ByteArray {
val buffer = ByteBuffer.wrap(fragmentBytes).order(ByteOrder.BIG_ENDIAN)
// Find box positions within the fragment (moof is at position 0)
val positions = findBoxPositionsInFragment(buffer)
if (positions == null) {
Log.e(TAG, "TFDT_INJECT: Could not find required boxes in fragment")
return fragmentBytes
}
val (moofPos, moofSize, trafPos, trafSize, tfhdPos, tfhdEnd, trunPos) = positions
Log.d(TAG, "TFDT_INJECT: Fragment boxes - moof@$moofPos(size=$moofSize), traf@$trafPos, tfhd@$tfhdPos, trun@$trunPos")
// First, fix tfhd.base_data_offset - it was pointing to the original file position
// but now moof is at position 0, so base_data_offset should be 0
fixTfhdBaseDataOffset(buffer, tfhdPos.toInt())
// Create tfdt box (version 1, 64-bit baseMediaDecodeTime)
val tfdtSize = 20
val tfdtBytes = ByteBuffer.allocate(tfdtSize).order(ByteOrder.BIG_ENDIAN)
tfdtBytes.putInt(tfdtSize) // size
tfdtBytes.putInt(0x74666474) // 'tfdt'
tfdtBytes.put(1.toByte()) // version = 1
tfdtBytes.put(0.toByte()) // flags[0]
tfdtBytes.put(0.toByte()) // flags[1]
tfdtBytes.put(0.toByte()) // flags[2]
tfdtBytes.putLong(baseMediaDecodeTimeUs) // baseMediaDecodeTime
// Create new fragment with tfdt injected after tfhd
val newBytes = ByteArray(fragmentBytes.size + tfdtSize)
val insertPos = tfhdEnd.toInt()
// Copy bytes before insertion point
System.arraycopy(fragmentBytes, 0, newBytes, 0, insertPos)
// Insert tfdt
System.arraycopy(tfdtBytes.array(), 0, newBytes, insertPos, tfdtSize)
// Copy bytes after insertion point
System.arraycopy(fragmentBytes, insertPos, newBytes, insertPos + tfdtSize, fragmentBytes.size - insertPos)
// Update box sizes in the new buffer
val newBuffer = ByteBuffer.wrap(newBytes).order(ByteOrder.BIG_ENDIAN)
// Update moof size
val newMoofSize = moofSize + tfdtSize
newBuffer.putInt(moofPos.toInt(), newMoofSize.toInt())
// Update traf size
val newTrafSize = trafSize + tfdtSize
newBuffer.putInt(trafPos.toInt(), newTrafSize.toInt())
// Update trun data_offset if present
val newTrunPos = trunPos.toInt() + tfdtSize
updateTrunDataOffset(newBuffer, newTrunPos, tfdtSize)
Log.i(TAG, "TFDT_INJECT: Injected tfdt with baseMediaDecodeTime=$baseMediaDecodeTimeUs us")
return newBytes
}
/**
* Data class to hold box positions for tfdt injection.
*/
private data class BoxPositions(
val moofPos: Long,
val moofSize: Long,
val trafPos: Long,
val trafSize: Long,
val tfhdPos: Long, // Position of tfhd (need to fix base_data_offset)
val tfhdEnd: Long, // Position right after tfhd where we'll insert tfdt
val trunPos: Long // Position of trun (need to update its data_offset)
)
/**
* Finds the positions of moof, traf, tfhd, and trun boxes in a fragment.
* In a fragment, moof is expected to be at position 0.
*/
private fun findBoxPositionsInFragment(buffer: ByteBuffer): BoxPositions? {
val fileSize = buffer.limit()
var pos = 0
while (pos < fileSize - 8) {
buffer.position(pos)
val size = buffer.int.toLong() and 0xFFFFFFFFL
val type = buffer.int
if (size < 8) break
// 'moof' = 0x6D6F6F66
if (type == 0x6D6F6F66) {
val moofPos = pos.toLong()
val moofSize = size
val moofEnd = pos + size.toInt()
var childPos = pos + 8
while (childPos < moofEnd - 8) {
buffer.position(childPos)
val childSize = buffer.int.toLong() and 0xFFFFFFFFL
val childType = buffer.int
if (childSize < 8) break
// 'traf' = 0x74726166
if (childType == 0x74726166) {
val trafPos = childPos.toLong()
val trafSize = childSize
val trafEnd = childPos + childSize.toInt()
var trafChildPos = childPos + 8
var tfhdPos: Long = -1
var tfhdEnd: Long = -1
var trunPos: Long = -1
while (trafChildPos < trafEnd - 8) {
buffer.position(trafChildPos)
val trafChildSize = buffer.int.toLong() and 0xFFFFFFFFL
val trafChildType = buffer.int
if (trafChildSize < 8) break
// 'tfhd' = 0x74666864
if (trafChildType == 0x74666864) {
tfhdPos = trafChildPos.toLong()
tfhdEnd = trafChildPos + trafChildSize
}
// 'trun' = 0x7472756E
else if (trafChildType == 0x7472756E) {
trunPos = trafChildPos.toLong()
}
trafChildPos += trafChildSize.toInt()
}
if (tfhdPos > 0 && tfhdEnd > 0 && trunPos > 0) {
return BoxPositions(moofPos, moofSize, trafPos, trafSize, tfhdPos, tfhdEnd, trunPos)
}
}
childPos += childSize.toInt()
}
}
pos += size.toInt()
}
return null
}
/**
* Updates the trun box's data_offset field if present.
* The data_offset points to sample data in mdat, and needs to be
* increased by the size of the injected tfdt box.
*
* trun structure:
* - 4 bytes: size
* - 4 bytes: type ('trun')
* - 1 byte: version
* - 3 bytes: flags
* - 4 bytes: sample_count
* - [optional] 4 bytes: data_offset (if flags & 0x000001)
*/
private fun updateTrunDataOffset(buffer: ByteBuffer, trunPos: Int, offsetDelta: Int) {
buffer.position(trunPos + 8) // Skip size and type
val version = buffer.get().toInt() and 0xFF
val flags = ((buffer.get().toInt() and 0xFF) shl 16) or
((buffer.get().toInt() and 0xFF) shl 8) or
(buffer.get().toInt() and 0xFF)
// Check if data_offset_present flag (0x000001) is set
if ((flags and 0x000001) != 0) {
val sampleCount = buffer.int
val dataOffsetPos = trunPos + 16 // size(4) + type(4) + version(1) + flags(3) + sample_count(4)
buffer.position(dataOffsetPos)
val originalOffset = buffer.int
val newOffset = originalOffset + offsetDelta
buffer.putInt(dataOffsetPos, newOffset)
Log.d(TAG, "TFDT_INJECT: Updated trun data_offset: $originalOffset -> $newOffset")
} else {
Log.d(TAG, "TFDT_INJECT: trun has no data_offset field (flags=0x${flags.toString(16)})")
}
}
/**
* Fixes the tfhd box's base_data_offset field after stripping the file header.
* When we strip ftyp+moov from the original segment, the base_data_offset
* (which pointed to a position in the original file) becomes incorrect.
* We set it to 0 since moof is now at the start of the fragment.
*
* tfhd structure:
* - 4 bytes: size
* - 4 bytes: type ('tfhd')
* - 1 byte: version
* - 3 bytes: flags
* - 4 bytes: track_id
* - [optional] 8 bytes: base_data_offset (if flags & 0x000001)
*/
private fun fixTfhdBaseDataOffset(buffer: ByteBuffer, tfhdPos: Int) {
buffer.position(tfhdPos + 8) // Skip size and type
val version = buffer.get().toInt() and 0xFF
val flags = ((buffer.get().toInt() and 0xFF) shl 16) or
((buffer.get().toInt() and 0xFF) shl 8) or
(buffer.get().toInt() and 0xFF)
// Check if base_data_offset_present flag (0x000001) is set
if ((flags and 0x000001) != 0) {
val trackId = buffer.int
val baseDataOffsetPos = tfhdPos + 16 // size(4) + type(4) + version(1) + flags(3) + track_id(4)
buffer.position(baseDataOffsetPos)
val originalOffset = buffer.long
// Set to 0 since moof is now at start of fragment
buffer.putLong(baseDataOffsetPos, 0L)
Log.i(TAG, "TFHD_FIX: Fixed base_data_offset: $originalOffset -> 0")
} else {
Log.d(TAG, "TFHD_FIX: tfhd has no base_data_offset field (flags=0x${flags.toString(16)})")
}
}
/**
* Debug function to dump MP4 box structure and find avcC/stsd info.
*/
private fun dumpMp4BoxStructure(data: ByteArray, logPrefix: String) {
val buffer = ByteBuffer.wrap(data).order(ByteOrder.BIG_ENDIAN)
dumpBoxesRecursive(buffer, 0, data.size, 0, logPrefix)
}
private fun dumpBoxesRecursive(buffer: ByteBuffer, start: Int, end: Int, depth: Int, logPrefix: String) {
var pos = start
val indent = " ".repeat(depth)
while (pos < end - 8) {
buffer.position(pos)
val size = buffer.int.toLong() and 0xFFFFFFFFL
val typeInt = buffer.int
val typeBytes = ByteArray(4)
typeBytes[0] = ((typeInt shr 24) and 0xFF).toByte()
typeBytes[1] = ((typeInt shr 16) and 0xFF).toByte()
typeBytes[2] = ((typeInt shr 8) and 0xFF).toByte()
typeBytes[3] = (typeInt and 0xFF).toByte()
val typeStr = String(typeBytes, Charsets.US_ASCII)
if (size < 8 || pos + size > end) break
Log.i(TAG, "$logPrefix: $indent[$typeStr] size=$size @ $pos")
// For ftyp, dump the brands
if (typeStr == "ftyp" && size >= 16) {
buffer.position(pos + 8)
val majorBrand = ByteArray(4)
buffer.get(majorBrand)
val minorVersion = buffer.int
Log.i(TAG, "$logPrefix: $indent major_brand=${String(majorBrand)}, minor_version=$minorVersion")
val compatBrandsStart = pos + 16
val compatBrandsEnd = pos + size.toInt()
val brands = mutableListOf<String>()
var brandPos = compatBrandsStart
while (brandPos + 4 <= compatBrandsEnd) {
buffer.position(brandPos)
val brand = ByteArray(4)
buffer.get(brand)
brands.add(String(brand))
brandPos += 4
}
Log.i(TAG, "$logPrefix: $indent compatible_brands=${brands.joinToString(",")}")
}
// For avcC, dump the SPS/PPS info
if (typeStr == "avcC" && size >= 13) {
buffer.position(pos + 8)
val configVersion = buffer.get().toInt() and 0xFF
val profileIdc = buffer.get().toInt() and 0xFF
val profileCompat = buffer.get().toInt() and 0xFF
val levelIdc = buffer.get().toInt() and 0xFF
val lengthSizeMinusOne = buffer.get().toInt() and 0x03
val numSps = buffer.get().toInt() and 0x1F
Log.i(TAG, "$logPrefix: $indent avcC: version=$configVersion, profile=$profileIdc, level=$levelIdc, numSPS=$numSps")
// Read SPS lengths
var spsTotal = 0
for (i in 0 until numSps) {
val spsLen = buffer.short.toInt() and 0xFFFF
spsTotal += spsLen
Log.i(TAG, "$logPrefix: $indent SPS[$i] length=$spsLen")
buffer.position(buffer.position() + spsLen) // Skip SPS data
}
// Read PPS count and lengths
if (buffer.position() < pos + size) {
val numPps = buffer.get().toInt() and 0xFF
var ppsTotal = 0
for (i in 0 until numPps) {
if (buffer.position() + 2 <= pos + size) {
val ppsLen = buffer.short.toInt() and 0xFFFF
ppsTotal += ppsLen
Log.i(TAG, "$logPrefix: $indent PPS[$i] length=$ppsLen")
buffer.position(buffer.position() + ppsLen) // Skip PPS data
}
}
Log.i(TAG, "$logPrefix: $indent avcC total: ${size} bytes, SPS=$spsTotal bytes, PPS=$ppsTotal bytes")
}
}
// Recurse into container boxes
val containerBoxes = setOf("moov", "trak", "mdia", "minf", "stbl", "stsd", "mvex", "edts")
if (typeStr in containerBoxes) {
// stsd has 8 extra bytes (version/flags + entry_count) before children
val childStart = if (typeStr == "stsd") pos + 16 else pos + 8
dumpBoxesRecursive(buffer, childStart, pos + size.toInt(), depth + 1, logPrefix)
}
// avc1 is a sample entry, structure: 8 byte header + 78 byte fixed fields + child boxes
if (typeStr == "avc1") {
dumpBoxesRecursive(buffer, pos + 86, pos + size.toInt(), depth + 1, logPrefix)
}
pos += size.toInt()
}
}
} }

View File

@@ -24,7 +24,7 @@ class RecordingSession(
private val enableAudio: Boolean, private val enableAudio: Boolean,
private val fps: Int? = null, private val fps: Int? = null,
private val hdr: Boolean = false, private val hdr: Boolean = false,
private val cameraOrientation: Orientation, val cameraOrientation: Orientation,
private val options: RecordVideoOptions, private val options: RecordVideoOptions,
private val filePath: String, private val filePath: String,
private val callback: (video: Video) -> Unit, private val callback: (video: Video) -> Unit,

View File

@@ -162,6 +162,14 @@ class VideoPipeline(
// 4. Get the transform matrix from the SurfaceTexture (rotations/scales applied by Camera) // 4. Get the transform matrix from the SurfaceTexture (rotations/scales applied by Camera)
surfaceTexture.getTransformMatrix(transformMatrix) surfaceTexture.getTransformMatrix(transformMatrix)
// Log transform matrix for debugging rotation issues (only when recording)
if (recordingSession != null) {
Log.i(TAG, "ROTATION_DEBUG TransformMatrix: [${transformMatrix[0]}, ${transformMatrix[1]}, ${transformMatrix[2]}, ${transformMatrix[3]}], " +
"[${transformMatrix[4]}, ${transformMatrix[5]}, ${transformMatrix[6]}, ${transformMatrix[7]}], " +
"[${transformMatrix[8]}, ${transformMatrix[9]}, ${transformMatrix[10]}, ${transformMatrix[11]}], " +
"[${transformMatrix[12]}, ${transformMatrix[13]}, ${transformMatrix[14]}, ${transformMatrix[15]}]")
}
// 5. Draw it with applied rotation/mirroring // 5. Draw it with applied rotation/mirroring
onFrame(transformMatrix) onFrame(transformMatrix)
@@ -181,11 +189,15 @@ class VideoPipeline(
/** /**
* Configures the Pipeline to also write Frames to a Surface from a `MediaRecorder` (or null) * Configures the Pipeline to also write Frames to a Surface from a `MediaRecorder` (or null)
*/ */
fun setRecordingSessionOutput(recordingSession: RecordingSession?) { fun setRecordingSessionOutput(recordingSession: RecordingSession?, orientation: Orientation = Orientation.LANDSCAPE_LEFT) {
synchronized(this) { synchronized(this) {
if (recordingSession != null) { if (recordingSession != null) {
// Configure OpenGL pipeline to stream Frames into the Recording Session's surface // Configure OpenGL pipeline to stream Frames into the Recording Session's surface
Log.i(TAG, "Setting ${recordingSession.size} RecordingSession Output...") Log.i(TAG, "Setting ${recordingSession.size} RecordingSession Output with orientation=$orientation...")
// Set the recording orientation for the native layer
// 0 = LANDSCAPE_LEFT (CCW), 1 = LANDSCAPE_RIGHT (CW)
val orientationValue = if (orientation == Orientation.LANDSCAPE_RIGHT) 1 else 0
setRecordingOrientation(orientationValue)
setRecordingSessionOutputSurface(recordingSession.surface) setRecordingSessionOutputSurface(recordingSession.surface)
this.recordingSession = recordingSession this.recordingSession = recordingSession
} else { } else {
@@ -252,5 +264,6 @@ class VideoPipeline(
private external fun onFrame(transformMatrix: FloatArray) private external fun onFrame(transformMatrix: FloatArray)
private external fun setRecordingSessionOutputSurface(surface: Any) private external fun setRecordingSessionOutputSurface(surface: Any)
private external fun removeRecordingSessionOutputSurface() private external fun removeRecordingSessionOutputSurface()
private external fun setRecordingOrientation(orientation: Int)
private external fun initHybrid(width: Int, height: Int): HybridData private external fun initHybrid(width: Int, height: Int): HybridData
} }