Compare commits

..

2 Commits

4 changed files with 398 additions and 196 deletions

View File

@@ -429,15 +429,15 @@ class CameraSession(private val context: Context, private val cameraManager: Cam
// Get actual device rotation from WindowManager since the React Native orientation hook // Get actual device rotation from WindowManager since the React Native orientation hook
// doesn't update when rotating between landscape-left and landscape-right on Android. // doesn't update when rotating between landscape-left and landscape-right on Android.
// Map device rotation to the correct orientationHint for video recording: // Map device rotation to the correct orientationHint for video recording:
// - Counter-clockwise (ROTATION_90) → 270° hint // - Counter-clockwise (ROTATION_90) → 90° hint
// - Clockwise (ROTATION_270) → 90° hint // - Clockwise (ROTATION_270) → 270° hint
val windowManager = context.getSystemService(Context.WINDOW_SERVICE) as WindowManager val windowManager = context.getSystemService(Context.WINDOW_SERVICE) as WindowManager
val deviceRotation = windowManager.defaultDisplay.rotation val deviceRotation = windowManager.defaultDisplay.rotation
val recordingOrientation = when (deviceRotation) { val recordingOrientation = when (deviceRotation) {
Surface.ROTATION_0 -> Orientation.PORTRAIT Surface.ROTATION_0 -> Orientation.PORTRAIT
Surface.ROTATION_90 -> Orientation.LANDSCAPE_RIGHT Surface.ROTATION_90 -> Orientation.LANDSCAPE_LEFT
Surface.ROTATION_180 -> Orientation.PORTRAIT_UPSIDE_DOWN Surface.ROTATION_180 -> Orientation.PORTRAIT_UPSIDE_DOWN
Surface.ROTATION_270 -> Orientation.LANDSCAPE_LEFT Surface.ROTATION_270 -> Orientation.LANDSCAPE_RIGHT
else -> Orientation.PORTRAIT else -> Orientation.PORTRAIT
} }
@@ -448,7 +448,7 @@ class CameraSession(private val context: Context, private val cameraManager: Cam
enableAudio, enableAudio,
fps, fps,
videoOutput.enableHdr, videoOutput.enableHdr,
orientation, recordingOrientation,
options, options,
filePath, filePath,
callback, callback,

View File

@@ -20,7 +20,8 @@ import java.io.File
*/ */
class FragmentedRecordingManager( class FragmentedRecordingManager(
private val encoder: MediaCodec, private val encoder: MediaCodec,
private val muxer: HlsMuxer private val muxer: HlsMuxer,
private val configuredFps: Int
) : MediaCodec.Callback(), ChunkedRecorderInterface { ) : MediaCodec.Callback(), ChunkedRecorderInterface {
companion object { companion object {
@@ -39,19 +40,21 @@ class FragmentedRecordingManager(
segmentDurationSeconds: Int = DEFAULT_SEGMENT_DURATION_SECONDS segmentDurationSeconds: Int = DEFAULT_SEGMENT_DURATION_SECONDS
): FragmentedRecordingManager { ): FragmentedRecordingManager {
val mimeType = options.videoCodec.toMimeType() val mimeType = options.videoCodec.toMimeType()
val cameraOrientationDegrees = cameraOrientation.toDegrees() // Use cameraOrientation from Android (computed from device rotation)
val recordingOrientationDegrees = (options.orientation ?: Orientation.PORTRAIT).toDegrees() // instead of options.orientation from JS which may be stale
val recordingOrientationDegrees = cameraOrientation.toDegrees()
// Swap dimensions based on camera orientation, same as ChunkedRecordingManager // Swap dimensions based on orientation - same logic as ChunkedRecordingManager
// When camera is in landscape orientation, we need to swap width/height for the encoder
val (width, height) = if (cameraOrientation.isLandscape()) { val (width, height) = if (cameraOrientation.isLandscape()) {
size.height to size.width size.height to size.width
} else { } else {
size.width to size.height size.width to size.height
} }
Log.d(TAG, "Input size: ${size.width}x${size.height}, encoder size: ${width}x${height}, " + Log.d(TAG, "Input size: ${size.width}x${size.height}, " +
"cameraOrientation: $cameraOrientation ($cameraOrientationDegrees°), " + "encoder size: ${width}x${height}, " +
"recordingOrientation: $recordingOrientationDegrees°") "orientation: $cameraOrientation ($recordingOrientationDegrees°)")
val format = MediaFormat.createVideoFormat(mimeType, width, height) val format = MediaFormat.createVideoFormat(mimeType, width, height)
val codec = MediaCodec.createEncoderByType(mimeType) val codec = MediaCodec.createEncoderByType(mimeType)
@@ -61,12 +64,7 @@ class FragmentedRecordingManager(
MediaCodecInfo.CodecCapabilities.COLOR_FormatSurface MediaCodecInfo.CodecCapabilities.COLOR_FormatSurface
) )
// Use 30fps as conservative default since many Android devices can't sustain val effectiveFps = fps ?: 30
// higher frame rates at high resolutions. This affects:
// - Encoder: bitrate allocation and I-frame interval calculation
// - HlsMuxer: timescale for accurate sample durations
// The actual frame timing comes from camera timestamps regardless of this setting.
val effectiveFps = 30
format.setInteger(MediaFormat.KEY_FRAME_RATE, effectiveFps) format.setInteger(MediaFormat.KEY_FRAME_RATE, effectiveFps)
format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, segmentDurationSeconds) format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, segmentDurationSeconds)
format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate) format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate)
@@ -75,7 +73,7 @@ class FragmentedRecordingManager(
codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE) codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE)
// Create muxer with callbacks, orientation, and fps // Create muxer with callbacks and orientation
val muxer = HlsMuxer( val muxer = HlsMuxer(
outputDirectory = outputDirectory, outputDirectory = outputDirectory,
callback = object : HlsMuxer.Callback { callback = object : HlsMuxer.Callback {
@@ -87,14 +85,13 @@ class FragmentedRecordingManager(
callbacks.onVideoChunkReady(file, index, durationUs) callbacks.onVideoChunkReady(file, index, durationUs)
} }
}, },
orientationDegrees = recordingOrientationDegrees, orientationDegrees = recordingOrientationDegrees
fps = effectiveFps
) )
muxer.setSegmentDuration(segmentDurationSeconds * 1_000_000L) muxer.setSegmentDuration(segmentDurationSeconds * 1_000_000L)
Log.d(TAG, "Created HlsMuxer with orientation: $recordingOrientationDegrees degrees") Log.d(TAG, "Created HlsMuxer with orientation: $recordingOrientationDegrees degrees, fps: $effectiveFps")
return FragmentedRecordingManager(codec, muxer) return FragmentedRecordingManager(codec, muxer, effectiveFps)
} }
} }
@@ -174,7 +171,8 @@ class FragmentedRecordingManager(
synchronized(this) { synchronized(this) {
Log.i(TAG, "Output format changed: $format") Log.i(TAG, "Output format changed: $format")
trackIndex = muxer.addTrack(format) // Pass configured fps to muxer (not the encoder's output format fps which may differ)
trackIndex = muxer.addTrack(format, configuredFps)
muxer.start() muxer.start()
muxerStarted = true muxerStarted = true
} }

View File

@@ -27,8 +27,7 @@ import java.nio.ByteBuffer
class HlsMuxer( class HlsMuxer(
private val outputDirectory: File, private val outputDirectory: File,
private val callback: Callback, private val callback: Callback,
private val orientationDegrees: Int = 0, private val orientationDegrees: Int = 0
private val fps: Int = 30
) { ) {
companion object { companion object {
private const val TAG = "HlsMuxer" private const val TAG = "HlsMuxer"
@@ -42,7 +41,8 @@ class HlsMuxer(
// Configuration // Configuration
private var targetSegmentDurationUs: Long = DEFAULT_SEGMENT_DURATION_US private var targetSegmentDurationUs: Long = DEFAULT_SEGMENT_DURATION_US
private var timescale: Int = 30000 // Default, updated in addTrack() to fps * 1000 private var timescale: Int = 30000 // Default, updated from format
private var configuredFps: Int = 30 // Configured fps from user, used for VUI timing
// State // State
private var state = State.UNINITIALIZED private var state = State.UNINITIALIZED
@@ -55,14 +55,9 @@ class HlsMuxer(
private var segmentStartTimeUs = -1L private var segmentStartTimeUs = -1L
private var lastPresentationTimeUs = 0L private var lastPresentationTimeUs = 0L
// Timestamp normalization - MediaCodec timestamps are device uptime, not starting from 0 // Timestamp normalization - first timestamp becomes time 0
private var firstPresentationTimeUs = -1L private var firstPresentationTimeUs = -1L
// Actual fps detection from frame timestamps
private var detectedFps: Int? = null
private var fpsDetectionSamples = mutableListOf<Long>()
private val FPS_DETECTION_SAMPLE_COUNT = 30
private enum class State { private enum class State {
UNINITIALIZED, UNINITIALIZED,
INITIALIZED, INITIALIZED,
@@ -84,21 +79,13 @@ class HlsMuxer(
* Normalizes a presentation timestamp to start from 0. * Normalizes a presentation timestamp to start from 0.
* The first timestamp received becomes time 0, and all subsequent * The first timestamp received becomes time 0, and all subsequent
* timestamps are relative to that. * timestamps are relative to that.
*
* This is necessary because MediaCodec timestamps are based on device uptime,
* not starting from 0. HLS players expect timestamps to start at or near 0.
*/ */
private fun normalizeTimestamp(rawPresentationTimeUs: Long): Long { private fun normalizeTimestamp(rawPresentationTimeUs: Long): Long {
if (firstPresentationTimeUs < 0) { if (firstPresentationTimeUs < 0) {
firstPresentationTimeUs = rawPresentationTimeUs firstPresentationTimeUs = rawPresentationTimeUs
Log.d(TAG, "First timestamp captured: ${rawPresentationTimeUs}us (${rawPresentationTimeUs / 1_000_000.0}s), normalizing to 0") Log.d(TAG, "First timestamp: ${rawPresentationTimeUs}us, normalizing to 0")
} }
val normalized = rawPresentationTimeUs - firstPresentationTimeUs return rawPresentationTimeUs - firstPresentationTimeUs
// Log first few normalizations to debug
if (normalized < 1_000_000) { // First second
Log.d(TAG, "Timestamp: raw=${rawPresentationTimeUs}us -> normalized=${normalized}us")
}
return normalized
} }
// ==================== Annex-B to AVCC Conversion ==================== // ==================== Annex-B to AVCC Conversion ====================
@@ -226,18 +213,15 @@ class HlsMuxer(
* Adds a track to the muxer. * Adds a track to the muxer.
* *
* @param format The MediaFormat describing the track * @param format The MediaFormat describing the track
* @param fps The configured frame rate (used for VUI timing, overrides format's fps)
* @return Track index (always 0 for now, single video track) * @return Track index (always 0 for now, single video track)
*/ */
fun addTrack(format: MediaFormat): Int { fun addTrack(format: MediaFormat, fps: Int = 30): Int {
check(state == State.UNINITIALIZED) { "addTrack() must be called before start()" } check(state == State.UNINITIALIZED) { "addTrack() must be called before start()" }
trackFormat = format trackFormat = format
configuredFps = fps
// Use fps * 1000 as timescale for good precision (1000 timescale units per frame). timescale = fps * 1000 // Use fps * 1000 for good precision
// This ensures accurate sample durations without integer truncation issues.
// Note: ffprobe may report r_frame_rate based on timescale, so the backend
// should use the explicit framesPerSecond from the API mutation, not ffprobe.
timescale = fps * 1000
state = State.INITIALIZED state = State.INITIALIZED
@@ -245,7 +229,7 @@ class HlsMuxer(
val formatHeight = try { format.getInteger(MediaFormat.KEY_HEIGHT) } catch (e: Exception) { -1 } val formatHeight = try { format.getInteger(MediaFormat.KEY_HEIGHT) } catch (e: Exception) { -1 }
Log.d(TAG, "Added track: ${format.getString(MediaFormat.KEY_MIME)}, " + Log.d(TAG, "Added track: ${format.getString(MediaFormat.KEY_MIME)}, " +
"encoder output: ${formatWidth}x${formatHeight}, " + "encoder output: ${formatWidth}x${formatHeight}, " +
"fps=$fps, timescale=$timescale, orientation=$orientationDegrees°") "configuredFps=$configuredFps, timescale=$timescale, orientation=$orientationDegrees°")
return 0 // Single track, index 0 return 0 // Single track, index 0
} }
@@ -257,30 +241,16 @@ class HlsMuxer(
check(state == State.INITIALIZED) { "Must call addTrack() before start()" } check(state == State.INITIALIZED) { "Must call addTrack() before start()" }
val format = trackFormat ?: throw IllegalStateException("No track format") val format = trackFormat ?: throw IllegalStateException("No track format")
// Create output directory if needed, with proper error handling // Create output directory if needed
if (!outputDirectory.exists()) { if (!outputDirectory.exists()) {
val created = outputDirectory.mkdirs() outputDirectory.mkdirs()
if (!created && !outputDirectory.exists()) {
throw IllegalStateException(
"Failed to create output directory: ${outputDirectory.absolutePath}. " +
"Parent exists: ${outputDirectory.parentFile?.exists()}, " +
"Parent path: ${outputDirectory.parentFile?.absolutePath}"
)
}
Log.d(TAG, "Created output directory: ${outputDirectory.absolutePath}")
} }
// Write init segment // Write init segment
val initBytes = buildInitSegment(format) val initBytes = buildInitSegment(format)
val initFile = File(outputDirectory, "init.mp4") val initFile = File(outputDirectory, "init.mp4")
FileOutputStream(initFile).use { it.write(initBytes) } FileOutputStream(initFile).use { it.write(initBytes) }
// Log frame rate metadata for debugging
val defaultSampleDuration = timescale / fps
Log.d(TAG, "Created init segment: ${initFile.absolutePath} (${initBytes.size} bytes)") Log.d(TAG, "Created init segment: ${initFile.absolutePath} (${initBytes.size} bytes)")
Log.d(TAG, "Frame rate metadata: timescale=$timescale, fps=$fps, " +
"default_sample_duration=$defaultSampleDuration (ffprobe should calculate ${timescale}/${defaultSampleDuration}=${fps}fps)")
callback.onInitSegmentReady(initFile) callback.onInitSegmentReady(initFile)
state = State.STARTED state = State.STARTED
@@ -303,40 +273,13 @@ class HlsMuxer(
} }
val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0 val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
// Normalize timestamp to start from 0 (MediaCodec uses device uptime)
val presentationTimeUs = normalizeTimestamp(bufferInfo.presentationTimeUs) val presentationTimeUs = normalizeTimestamp(bufferInfo.presentationTimeUs)
// Detect actual fps from first N samples
if (detectedFps == null) {
fpsDetectionSamples.add(presentationTimeUs)
if (fpsDetectionSamples.size >= FPS_DETECTION_SAMPLE_COUNT) {
val elapsed = fpsDetectionSamples.last() - fpsDetectionSamples.first()
if (elapsed > 0) {
val actualFps = ((FPS_DETECTION_SAMPLE_COUNT - 1) * 1_000_000.0 / elapsed).toInt()
detectedFps = actualFps
if (kotlin.math.abs(actualFps - fps) > 5) {
Log.w(TAG, "Actual fps ($actualFps) differs significantly from configured fps ($fps)! " +
"This may cause processing issues if backend uses configured fps.")
} else {
Log.d(TAG, "Detected actual fps: $actualFps (configured: $fps)")
}
}
fpsDetectionSamples.clear() // Free memory
}
}
// Initialize segment start time // Initialize segment start time
if (segmentStartTimeUs < 0) { if (segmentStartTimeUs < 0) {
segmentStartTimeUs = presentationTimeUs segmentStartTimeUs = presentationTimeUs
} }
// Update duration of previous sample BEFORE finalization check
// This ensures the last sample has correct duration when segment is finalized
if (pendingSamples.isNotEmpty()) {
val lastSample = pendingSamples.last()
lastSample.durationUs = presentationTimeUs - lastSample.presentationTimeUs
}
// Check if we should finalize current segment (at keyframe boundaries) // Check if we should finalize current segment (at keyframe boundaries)
if (isKeyFrame && pendingSamples.isNotEmpty()) { if (isKeyFrame && pendingSamples.isNotEmpty()) {
val segmentDurationUs = presentationTimeUs - segmentStartTimeUs val segmentDurationUs = presentationTimeUs - segmentStartTimeUs
@@ -355,6 +298,12 @@ class HlsMuxer(
// Convert Annex-B (start codes) to AVCC (length prefixes) // Convert Annex-B (start codes) to AVCC (length prefixes)
val data = convertAnnexBToAvcc(rawData) val data = convertAnnexBToAvcc(rawData)
// Update duration of previous sample
if (pendingSamples.isNotEmpty()) {
val lastSample = pendingSamples.last()
lastSample.durationUs = presentationTimeUs - lastSample.presentationTimeUs
}
// Estimate duration (will be corrected by next sample) // Estimate duration (will be corrected by next sample)
val estimatedDurationUs = if (lastPresentationTimeUs > 0) { val estimatedDurationUs = if (lastPresentationTimeUs > 0) {
presentationTimeUs - lastPresentationTimeUs presentationTimeUs - lastPresentationTimeUs
@@ -416,7 +365,6 @@ class HlsMuxer(
val durationUs = (lastSample.presentationTimeUs - firstPts) + lastSample.durationUs val durationUs = (lastSample.presentationTimeUs - firstPts) + lastSample.durationUs
Log.d(TAG, "Created segment $segmentIndex: samples=${pendingSamples.size}, " + Log.d(TAG, "Created segment $segmentIndex: samples=${pendingSamples.size}, " +
"baseDecodeTime=${baseDecodeTimeUs}us (${baseDecodeTimeUs / 1_000_000.0}s), " +
"duration=${durationUs / 1000}ms, size=${fragmentBytes.size} bytes") "duration=${durationUs / 1000}ms, size=${fragmentBytes.size} bytes")
callback.onMediaSegmentReady(segmentFile, segmentIndex, durationUs) callback.onMediaSegmentReady(segmentFile, segmentIndex, durationUs)
@@ -430,6 +378,303 @@ class HlsMuxer(
} }
} }
// ==================== SPS VUI Timing Injection ====================
/**
* Injects VUI timing parameters into an H.264 SPS NAL unit.
* This ensures proper frame rate detection by players/decoders.
*
* The SPS from MediaCodec lacks VUI timing info, causing tools like
* ffprobe to misinterpret the frame rate.
*/
private fun injectVuiTiming(sps: ByteArray, fps: Int): ByteArray {
try {
val reader = BitReader(sps)
val writer = BitWriter()
// NAL header (1 byte: forbidden_zero_bit, nal_ref_idc, nal_unit_type)
writer.writeBits(reader.readBits(8), 8)
// profile_idc (1 byte)
val profileIdc = reader.readBits(8)
writer.writeBits(profileIdc, 8)
// constraint_set flags (1 byte)
writer.writeBits(reader.readBits(8), 8)
// level_idc (1 byte)
writer.writeBits(reader.readBits(8), 8)
// seq_parameter_set_id (ue(v))
copyExpGolomb(reader, writer)
// Profile-specific fields for High profile (100) and others
if (profileIdc == 100 || profileIdc == 110 || profileIdc == 122 ||
profileIdc == 244 || profileIdc == 44 || profileIdc == 83 ||
profileIdc == 86 || profileIdc == 118 || profileIdc == 128 ||
profileIdc == 138 || profileIdc == 139 || profileIdc == 134 ||
profileIdc == 135) {
// chroma_format_idc (ue(v))
val chromaFormatIdc = copyExpGolombAndReturn(reader, writer)
if (chromaFormatIdc == 3) {
// separate_colour_plane_flag (1 bit)
writer.writeBits(reader.readBits(1), 1)
}
// bit_depth_luma_minus8 (ue(v))
copyExpGolomb(reader, writer)
// bit_depth_chroma_minus8 (ue(v))
copyExpGolomb(reader, writer)
// qpprime_y_zero_transform_bypass_flag (1 bit)
writer.writeBits(reader.readBits(1), 1)
// seq_scaling_matrix_present_flag (1 bit)
val scalingMatrixFlag = reader.readBits(1)
writer.writeBits(scalingMatrixFlag, 1)
if (scalingMatrixFlag == 1) {
// Skip scaling lists - this is complex, just copy remaining and give up
Log.w(TAG, "SPS has scaling matrix, skipping VUI injection")
return sps
}
}
// log2_max_frame_num_minus4 (ue(v))
copyExpGolomb(reader, writer)
// pic_order_cnt_type (ue(v))
val picOrderCntType = copyExpGolombAndReturn(reader, writer)
if (picOrderCntType == 0) {
// log2_max_pic_order_cnt_lsb_minus4 (ue(v))
copyExpGolomb(reader, writer)
} else if (picOrderCntType == 1) {
// delta_pic_order_always_zero_flag (1 bit)
writer.writeBits(reader.readBits(1), 1)
// offset_for_non_ref_pic (se(v))
copySignedExpGolomb(reader, writer)
// offset_for_top_to_bottom_field (se(v))
copySignedExpGolomb(reader, writer)
// num_ref_frames_in_pic_order_cnt_cycle (ue(v))
val numRefFrames = copyExpGolombAndReturn(reader, writer)
for (i in 0 until numRefFrames) {
// offset_for_ref_frame[i] (se(v))
copySignedExpGolomb(reader, writer)
}
}
// max_num_ref_frames (ue(v))
copyExpGolomb(reader, writer)
// gaps_in_frame_num_value_allowed_flag (1 bit)
writer.writeBits(reader.readBits(1), 1)
// pic_width_in_mbs_minus1 (ue(v))
copyExpGolomb(reader, writer)
// pic_height_in_map_units_minus1 (ue(v))
copyExpGolomb(reader, writer)
// frame_mbs_only_flag (1 bit)
val frameMbsOnlyFlag = reader.readBits(1)
writer.writeBits(frameMbsOnlyFlag, 1)
if (frameMbsOnlyFlag == 0) {
// mb_adaptive_frame_field_flag (1 bit)
writer.writeBits(reader.readBits(1), 1)
}
// direct_8x8_inference_flag (1 bit)
writer.writeBits(reader.readBits(1), 1)
// frame_cropping_flag (1 bit)
val frameCroppingFlag = reader.readBits(1)
writer.writeBits(frameCroppingFlag, 1)
if (frameCroppingFlag == 1) {
// frame_crop_left_offset, right, top, bottom (ue(v) each)
copyExpGolomb(reader, writer)
copyExpGolomb(reader, writer)
copyExpGolomb(reader, writer)
copyExpGolomb(reader, writer)
}
// vui_parameters_present_flag - we'll set this to 1 and add our VUI
val originalVuiFlag = reader.readBits(1)
writer.writeBits(1, 1) // Set VUI present
// Write VUI parameters with timing info
writeVuiWithTiming(writer, fps, originalVuiFlag == 1, reader)
// Add RBSP trailing bits
writer.writeRbspTrailingBits()
val result = writer.toByteArray()
Log.d(TAG, "Injected VUI timing for ${fps}fps, SPS grew from ${sps.size} to ${result.size} bytes")
return result
} catch (e: Exception) {
Log.e(TAG, "Failed to inject VUI timing: ${e.message}, using original SPS")
return sps
}
}
/**
* Writes VUI parameters with timing info.
*/
private fun writeVuiWithTiming(writer: BitWriter, fps: Int, hadVui: Boolean, reader: BitReader) {
// aspect_ratio_info_present_flag
writer.writeBits(0, 1)
// overscan_info_present_flag
writer.writeBits(0, 1)
// video_signal_type_present_flag
writer.writeBits(0, 1)
// chroma_loc_info_present_flag
writer.writeBits(0, 1)
// timing_info_present_flag = 1
writer.writeBits(1, 1)
// num_units_in_tick (32 bits) = 1
writer.writeBits(1, 32)
// time_scale (32 bits) = fps * 2 (because each frame = 2 field counts)
writer.writeBits(fps * 2, 32)
// fixed_frame_rate_flag = 1
writer.writeBits(1, 1)
// nal_hrd_parameters_present_flag
writer.writeBits(0, 1)
// vcl_hrd_parameters_present_flag
writer.writeBits(0, 1)
// pic_struct_present_flag
writer.writeBits(0, 1)
// bitstream_restriction_flag
writer.writeBits(0, 1)
}
// ==================== Bit Manipulation Helpers ====================
/**
* Bit-level reader for parsing H.264 NAL units.
*/
private class BitReader(private val data: ByteArray) {
private var bytePos = 0
private var bitPos = 0
fun readBits(count: Int): Int {
var result = 0
for (i in 0 until count) {
if (bytePos >= data.size) throw IllegalStateException("End of data")
val bit = (data[bytePos].toInt() shr (7 - bitPos)) and 1
result = (result shl 1) or bit
bitPos++
if (bitPos == 8) {
bitPos = 0
bytePos++
}
}
return result
}
fun readExpGolomb(): Int {
var leadingZeros = 0
while (readBits(1) == 0) {
leadingZeros++
if (leadingZeros > 31) throw IllegalStateException("Invalid exp-golomb")
}
if (leadingZeros == 0) return 0
val suffix = readBits(leadingZeros)
return (1 shl leadingZeros) - 1 + suffix
}
fun readSignedExpGolomb(): Int {
val code = readExpGolomb()
return if (code % 2 == 0) -(code / 2) else (code + 1) / 2
}
}
/**
* Bit-level writer for constructing H.264 NAL units.
*/
private class BitWriter {
private val bytes = mutableListOf<Byte>()
private var currentByte = 0
private var bitPos = 0
fun writeBits(value: Int, count: Int) {
for (i in count - 1 downTo 0) {
val bit = (value shr i) and 1
currentByte = (currentByte shl 1) or bit
bitPos++
if (bitPos == 8) {
bytes.add(currentByte.toByte())
currentByte = 0
bitPos = 0
}
}
}
fun writeExpGolomb(value: Int) {
val code = value + 1
val bits = 32 - Integer.numberOfLeadingZeros(code)
// Write leading zeros
for (i in 0 until bits - 1) {
writeBits(0, 1)
}
// Write the code
writeBits(code, bits)
}
fun writeSignedExpGolomb(value: Int) {
val code = if (value <= 0) -2 * value else 2 * value - 1
writeExpGolomb(code)
}
fun writeRbspTrailingBits() {
writeBits(1, 1) // rbsp_stop_one_bit
while (bitPos != 0) {
writeBits(0, 1) // rbsp_alignment_zero_bit
}
}
fun toByteArray(): ByteArray {
// Flush remaining bits
if (bitPos > 0) {
currentByte = currentByte shl (8 - bitPos)
bytes.add(currentByte.toByte())
}
return bytes.toByteArray()
}
}
private fun copyExpGolomb(reader: BitReader, writer: BitWriter) {
val value = reader.readExpGolomb()
writer.writeExpGolomb(value)
}
private fun copyExpGolombAndReturn(reader: BitReader, writer: BitWriter): Int {
val value = reader.readExpGolomb()
writer.writeExpGolomb(value)
return value
}
private fun copySignedExpGolomb(reader: BitReader, writer: BitWriter) {
val value = reader.readSignedExpGolomb()
writer.writeSignedExpGolomb(value)
}
// ==================== Init Segment Building ==================== // ==================== Init Segment Building ====================
/** /**
@@ -439,11 +684,19 @@ class HlsMuxer(
val width = format.getInteger(MediaFormat.KEY_WIDTH) val width = format.getInteger(MediaFormat.KEY_WIDTH)
val height = format.getInteger(MediaFormat.KEY_HEIGHT) val height = format.getInteger(MediaFormat.KEY_HEIGHT)
val sps = format.getByteBuffer("csd-0")?.let { extractNalUnit(it) } val rawSps = format.getByteBuffer("csd-0")?.let { extractNalUnit(it) }
?: throw IllegalArgumentException("Missing SPS (csd-0)") ?: throw IllegalArgumentException("Missing SPS (csd-0)")
val pps = format.getByteBuffer("csd-1")?.let { extractNalUnit(it) } val pps = format.getByteBuffer("csd-1")?.let { extractNalUnit(it) }
?: throw IllegalArgumentException("Missing PPS (csd-1)") ?: throw IllegalArgumentException("Missing PPS (csd-1)")
Log.d(TAG, "Original SPS size: ${rawSps.size} bytes, PPS size: ${pps.size} bytes")
Log.d(TAG, "Original SPS hex: ${rawSps.joinToString("") { "%02x".format(it) }}")
// Inject VUI timing info into SPS using configured fps (not encoder output format fps)
val sps = injectVuiTiming(rawSps, configuredFps)
Log.d(TAG, "Modified SPS size: ${sps.size} bytes")
Log.d(TAG, "Modified SPS hex: ${sps.joinToString("") { "%02x".format(it) }}")
val output = ByteArrayOutputStream() val output = ByteArrayOutputStream()
// ftyp // ftyp
@@ -544,91 +797,56 @@ class HlsMuxer(
dos.writeShort(0) // volume (0 for video) dos.writeShort(0) // volume (0 for video)
dos.writeShort(0) // reserved dos.writeShort(0) // reserved
// Rotation matrix based on orientationDegrees // Rotation matrix
writeRotationMatrix(dos, width, height) writeRotationMatrix(dos)
// For 90° and 270° rotations, the display dimensions are swapped // Display dimensions should be post-rotation dimensions
// The tkhd width/height represent the final display size after rotation // For 90° or 270° rotation, swap width and height
val (displayWidth, displayHeight) = when (orientationDegrees) { val (displayWidth, displayHeight) = when (orientationDegrees) {
90, 270 -> Pair(height, width) 90, 270 -> height to width
else -> Pair(width, height) else -> width to height
} }
dos.writeInt(displayWidth shl 16) // width (16.16 fixed point) dos.writeInt(displayWidth shl 16) // width (16.16 fixed point)
dos.writeInt(displayHeight shl 16) // height (16.16 fixed point) dos.writeInt(displayHeight shl 16) // height (16.16 fixed point)
Log.d(TAG, "tkhd: encoder=${width}x${height}, display=${displayWidth}x${displayHeight}, rotation=$orientationDegrees") Log.d(TAG, "tkhd: encoded=${width}x${height}, display=${displayWidth}x${displayHeight}, rotation=$orientationDegrees")
return wrapBox("tkhd", output.toByteArray()) return wrapBox("tkhd", output.toByteArray())
} }
/** /**
* Writes the 3x3 transformation matrix for video rotation. * Writes the 3x3 transformation matrix for video rotation.
* The matrix is applied to rotate the video content for correct display.
*
* Matrix format in tkhd box (all values in fixed-point):
* | a b u | where a,b,c,d are 16.16 fixed-point
* | c d v | and u,v are 2.30 fixed-point (always 0)
* | x y w | x,y are 16.16, w is 2.30 (always 1.0)
*
* For rotation by θ: a=cos(θ), b=sin(θ), c=-sin(θ), d=cos(θ)
* Translation (x,y) keeps the rotated video in the visible area.
*/ */
private fun writeRotationMatrix(dos: DataOutputStream, width: Int, height: Int) { private fun writeRotationMatrix(dos: DataOutputStream) {
// Fixed-point constants
val one = 0x00010000 // 1.0 in 16.16 val one = 0x00010000 // 1.0 in 16.16
val negOne = -0x00010000 // -1.0 in 16.16 (will be written as unsigned) val negOne = 0xFFFF0000.toInt() // -1.0 in 16.16
val w = 0x40000000 // 1.0 in 2.30 val w = 0x40000000 // 1.0 in 2.30
// For 270° device orientation (landscape-right), apply 90° CW rotation
// For 90° device orientation (landscape-left), apply 270° CW rotation
val a: Int
val b: Int
val c: Int
val d: Int
when (orientationDegrees) { when (orientationDegrees) {
90 -> { 90 -> { a = 0; b = negOne; c = one; d = 0 }
// 90° rotation: x' = y, y' = -x + width 180 -> { a = negOne; b = 0; c = 0; d = negOne }
dos.writeInt(0) // a = 0 270 -> { a = 0; b = one; c = negOne; d = 0 }
dos.writeInt(negOne) // b = -1 else -> { a = one; b = 0; c = 0; d = one }
}
dos.writeInt(a)
dos.writeInt(b)
dos.writeInt(0) // u = 0 dos.writeInt(0) // u = 0
dos.writeInt(one) // c = 1 dos.writeInt(c)
dos.writeInt(0) // d = 0 dos.writeInt(d)
dos.writeInt(0) // v = 0 dos.writeInt(0) // v = 0
dos.writeInt(0) // x = 0 dos.writeInt(0) // tx = 0
dos.writeInt(width shl 16) // y = width (translation) dos.writeInt(0) // ty = 0
dos.writeInt(w) // w = 1 dos.writeInt(w) // w = 1.0
}
180 -> { Log.d(TAG, "Rotation matrix for $orientationDegrees°")
// 180° rotation
dos.writeInt(negOne) // a = -1
dos.writeInt(0) // b = 0
dos.writeInt(0) // u = 0
dos.writeInt(0) // c = 0
dos.writeInt(negOne) // d = -1
dos.writeInt(0) // v = 0
dos.writeInt(width shl 16) // x = width (translation)
dos.writeInt(height shl 16) // y = height (translation)
dos.writeInt(w) // w = 1
}
270 -> {
// 270° rotation: x' = -y + height, y' = x
dos.writeInt(0) // a = 0
dos.writeInt(one) // b = 1
dos.writeInt(0) // u = 0
dos.writeInt(negOne) // c = -1
dos.writeInt(0) // d = 0
dos.writeInt(0) // v = 0
dos.writeInt(height shl 16) // x = height (translation)
dos.writeInt(0) // y = 0
dos.writeInt(w) // w = 1
}
else -> {
// 0° or unknown: identity matrix
dos.writeInt(one) // a = 1
dos.writeInt(0) // b = 0
dos.writeInt(0) // u = 0
dos.writeInt(0) // c = 0
dos.writeInt(one) // d = 1
dos.writeInt(0) // v = 0
dos.writeInt(0) // x = 0
dos.writeInt(0) // y = 0
dos.writeInt(w) // w = 1
}
}
} }
private fun buildMdiaBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { private fun buildMdiaBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
@@ -715,7 +933,7 @@ class HlsMuxer(
private fun buildStblBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray { private fun buildStblBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
val content = ByteArrayOutputStream() val content = ByteArrayOutputStream()
content.write(buildStsdBox(width, height, sps, pps)) content.write(buildStsdBox(width, height, sps, pps))
content.write(buildSttsBox()) // Contains default timing for ffprobe frame rate detection content.write(buildEmptySttsBox())
content.write(buildEmptyStscBox()) content.write(buildEmptyStscBox())
content.write(buildEmptyStszBox()) content.write(buildEmptyStszBox())
content.write(buildEmptyStcoBox()) content.write(buildEmptyStcoBox())
@@ -761,9 +979,10 @@ class HlsMuxer(
val output = ByteArrayOutputStream() val output = ByteArrayOutputStream()
val dos = DataOutputStream(output) val dos = DataOutputStream(output)
val profileIdc = if (sps.isNotEmpty()) sps[0].toInt() and 0xFF else 0x42 // SPS layout: [0]=NAL header (0x67), [1]=profile_idc, [2]=constraint_flags, [3]=level_idc
val profileCompat = if (sps.size > 1) sps[1].toInt() and 0xFF else 0x00 val profileIdc = if (sps.size > 1) sps[1].toInt() and 0xFF else 0x42
val levelIdc = if (sps.size > 2) sps[2].toInt() and 0xFF else 0x1F val profileCompat = if (sps.size > 2) sps[2].toInt() and 0xFF else 0x00
val levelIdc = if (sps.size > 3) sps[3].toInt() and 0xFF else 0x1F
dos.writeByte(1) // configuration version dos.writeByte(1) // configuration version
dos.writeByte(profileIdc) // AVC profile dos.writeByte(profileIdc) // AVC profile
@@ -782,21 +1001,11 @@ class HlsMuxer(
return wrapBox("avcC", output.toByteArray()) return wrapBox("avcC", output.toByteArray())
} }
private fun buildSttsBox(): ByteArray { private fun buildEmptySttsBox(): ByteArray {
val output = ByteArrayOutputStream() val output = ByteArrayOutputStream()
val dos = DataOutputStream(output) val dos = DataOutputStream(output)
// For fragmented MP4, stts is normally empty as timing is in trun boxes.
// However, ffprobe uses stts to calculate r_frame_rate when present.
// We add a single entry with the default sample duration so ffprobe
// can derive: r_frame_rate = timescale / sample_delta = 30000/1000 = 30
val defaultSampleDuration = timescale / fps
dos.writeInt(0) // version & flags dos.writeInt(0) // version & flags
dos.writeInt(1) // entry count (1 entry for default timing) dos.writeInt(0) // entry count
dos.writeInt(1) // sample_count (indicates this is the default duration)
dos.writeInt(defaultSampleDuration) // sample_delta in timescale units
return wrapBox("stts", output.toByteArray()) return wrapBox("stts", output.toByteArray())
} }
@@ -833,10 +1042,9 @@ class HlsMuxer(
val output = ByteArrayOutputStream() val output = ByteArrayOutputStream()
val dos = DataOutputStream(output) val dos = DataOutputStream(output)
// Calculate default sample duration so ffprobe can derive correct fps // Default sample duration: timescale / fps
// fps = timescale / default_sample_duration // Since timescale = fps * 1000, duration = 1000 for any fps
// At 30fps with timescale=30000: duration=1000, ffprobe calculates 30000/1000=30 val defaultSampleDuration = 1000
val defaultSampleDuration = timescale / fps
dos.writeInt(0) // version & flags dos.writeInt(0) // version & flags
dos.writeInt(1) // track ID dos.writeInt(1) // track ID
@@ -953,10 +1161,13 @@ class HlsMuxer(
dos.writeInt(samples.size) dos.writeInt(samples.size)
dos.writeInt(dataOffset) dos.writeInt(dataOffset)
// Use constant duration based on configured fps for consistent frame rate
// This ensures ffprobe reports correct fps instead of calculating from variable timing
val constantDuration = timescale / configuredFps // e.g., 30000/30 = 1000 ticks
Log.d(TAG, "Writing ${samples.size} samples with constant duration=${constantDuration} ticks (${configuredFps}fps)")
for (sample in samples) { for (sample in samples) {
// Convert duration to timescale units dos.writeInt(constantDuration)
val durationInTimescale = ((sample.durationUs * timescale) / 1_000_000).toInt()
dos.writeInt(durationInTimescale)
dos.writeInt(sample.data.size) dos.writeInt(sample.data.size)
dos.writeInt(buildSampleFlags(sample.isKeyFrame)) dos.writeInt(buildSampleFlags(sample.isKeyFrame))
} }

View File

@@ -44,14 +44,7 @@ class RecordingSession(
data class Video(val path: String, val durationMs: Long, val size: Size) data class Video(val path: String, val durationMs: Long, val size: Size)
// Strip file:// prefix if present (expo-file-system returns URIs with this prefix) private val outputPath: File = File(filePath)
private val outputPath: File = File(
if (filePath.startsWith("file://")) {
filePath.removePrefix("file://")
} else {
filePath
}
)
private val bitRate = getBitRate() private val bitRate = getBitRate()