attempt to fix segment corruption w/ tfhd base data offset & tfdt injection, moof size updaes -- very messy code, WIP
This commit is contained in:
@@ -16,7 +16,9 @@ import com.mrousavy.camera.types.Orientation
|
|||||||
import com.mrousavy.camera.types.RecordVideoOptions
|
import com.mrousavy.camera.types.RecordVideoOptions
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import java.io.FileOutputStream
|
import java.io.FileOutputStream
|
||||||
|
import java.io.RandomAccessFile
|
||||||
import java.nio.ByteBuffer
|
import java.nio.ByteBuffer
|
||||||
|
import java.nio.ByteOrder
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A recording manager that produces HLS-compatible fragmented MP4 segments.
|
* A recording manager that produces HLS-compatible fragmented MP4 segments.
|
||||||
@@ -52,12 +54,12 @@ class FragmentedRecordingManager(
|
|||||||
): FragmentedRecordingManager {
|
): FragmentedRecordingManager {
|
||||||
val mimeType = options.videoCodec.toMimeType()
|
val mimeType = options.videoCodec.toMimeType()
|
||||||
// For fragmented MP4: DON'T swap dimensions, use camera's native dimensions.
|
// For fragmented MP4: DON'T swap dimensions, use camera's native dimensions.
|
||||||
// The C++ VideoPipeline now uses a simple Y-flip matrix (not the display transform).
|
// The C++ VideoPipeline uses a custom transform matrix (not the display transform).
|
||||||
// This gives us raw sensor frames, and we rely on rotation metadata for playback.
|
// This gives us raw sensor frames, and we rely on rotation metadata for playback.
|
||||||
val cameraOrientationDegrees = when (cameraOrientation) {
|
val cameraOrientationDegrees = when (cameraOrientation) {
|
||||||
Orientation.LANDSCAPE_LEFT -> 0 // CCW landscape - works!
|
Orientation.LANDSCAPE_LEFT -> 0 // CCW landscape
|
||||||
Orientation.LANDSCAPE_RIGHT -> 0 // CW landscape - same as CCW (Y-flip handles it)
|
Orientation.LANDSCAPE_RIGHT -> 0 // CW landscape
|
||||||
Orientation.PORTRAIT -> 90 // Portrait typically needs 90° on Android
|
Orientation.PORTRAIT -> 90
|
||||||
Orientation.PORTRAIT_UPSIDE_DOWN -> 270
|
Orientation.PORTRAIT_UPSIDE_DOWN -> 270
|
||||||
}
|
}
|
||||||
Log.i(TAG, "ROTATION_DEBUG FragmentedRecordingManager: cameraOrientation=$cameraOrientation, cameraOrientationDegrees=$cameraOrientationDegrees, inputSize=${size.width}x${size.height}")
|
Log.i(TAG, "ROTATION_DEBUG FragmentedRecordingManager: cameraOrientation=$cameraOrientation, cameraOrientationDegrees=$cameraOrientationDegrees, inputSize=${size.width}x${size.height}")
|
||||||
@@ -81,7 +83,7 @@ class FragmentedRecordingManager(
|
|||||||
format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, segmentDurationSeconds)
|
format.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, segmentDurationSeconds)
|
||||||
format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate)
|
format.setInteger(MediaFormat.KEY_BIT_RATE, bitRate)
|
||||||
|
|
||||||
Log.d(TAG, "Video Format: $format, camera orientation $cameraOrientationDegrees")
|
Log.d(TAG, "Video Format: $format, orientationDegrees: $cameraOrientationDegrees")
|
||||||
|
|
||||||
codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE)
|
codec.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE)
|
||||||
|
|
||||||
@@ -104,6 +106,13 @@ class FragmentedRecordingManager(
|
|||||||
private var segmentContext: SegmentContext? = null
|
private var segmentContext: SegmentContext? = null
|
||||||
private var initSegmentEmitted = false
|
private var initSegmentEmitted = false
|
||||||
|
|
||||||
|
// Cumulative base time for HLS-compatible timestamps (in timescale units)
|
||||||
|
// Each segment's baseMediaDecodeTime should be the sum of all previous segment durations
|
||||||
|
private var cumulativeBaseTimeUs: Long = 0L
|
||||||
|
|
||||||
|
// Timescale used in the fMP4 (typically 1000000 for microseconds)
|
||||||
|
private val timescale: Long = 1_000_000L
|
||||||
|
|
||||||
override val surface: Surface = encoder.createInputSurface()
|
override val surface: Surface = encoder.createInputSurface()
|
||||||
|
|
||||||
init {
|
init {
|
||||||
@@ -113,16 +122,26 @@ class FragmentedRecordingManager(
|
|||||||
encoder.setCallback(this)
|
encoder.setCallback(this)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result from finishing a segment, used for tfdt patching.
|
||||||
|
*/
|
||||||
|
private data class SegmentResult(
|
||||||
|
val file: File,
|
||||||
|
val segmentIndex: Int,
|
||||||
|
val durationUs: Long
|
||||||
|
)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Context for a single data segment being written.
|
* Context for a single data segment being written.
|
||||||
* Init segments are created separately via createInitSegment().
|
* Init segments are created separately via createInitSegment().
|
||||||
*/
|
*/
|
||||||
private inner class SegmentContext(
|
private inner class SegmentContext(
|
||||||
private val format: MediaFormat,
|
private val format: MediaFormat,
|
||||||
private val segmentIndex: Int
|
val segmentIndex: Int,
|
||||||
|
private val baseTimeUs: Long // The baseMediaDecodeTime for this segment
|
||||||
) {
|
) {
|
||||||
private val filename = "$segmentIndex.mp4"
|
private val filename = "$segmentIndex.mp4"
|
||||||
private val file = File(outputDirectory, filename)
|
val file = File(outputDirectory, filename)
|
||||||
private val outputStream = FileOutputStream(file)
|
private val outputStream = FileOutputStream(file)
|
||||||
private val muxer = FragmentedMp4Muxer.Builder(outputStream).build()
|
private val muxer = FragmentedMp4Muxer.Builder(outputStream).build()
|
||||||
private lateinit var videoTrack: Muxer.TrackToken
|
private lateinit var videoTrack: Muxer.TrackToken
|
||||||
@@ -133,32 +152,38 @@ class FragmentedRecordingManager(
|
|||||||
init {
|
init {
|
||||||
val media3Format = convertToMedia3Format(format)
|
val media3Format = convertToMedia3Format(format)
|
||||||
videoTrack = muxer.addTrack(media3Format)
|
videoTrack = muxer.addTrack(media3Format)
|
||||||
Log.d(TAG, "Created segment context: $filename")
|
Log.d(TAG, "Created segment context: $filename with baseTimeUs=$baseTimeUs")
|
||||||
}
|
}
|
||||||
|
|
||||||
fun writeSample(buffer: ByteBuffer, bufferInfo: BufferInfo): Boolean {
|
fun writeSample(buffer: ByteBuffer, bufferInfo: BufferInfo) {
|
||||||
if (startTimeUs < 0) {
|
if (startTimeUs < 0) {
|
||||||
startTimeUs = bufferInfo.presentationTimeUs
|
startTimeUs = bufferInfo.presentationTimeUs
|
||||||
|
Log.i(TAG, "PTS_DEBUG Segment $segmentIndex FIRST sample: absolutePTS=${bufferInfo.presentationTimeUs}us, baseTimeUs=$baseTimeUs")
|
||||||
}
|
}
|
||||||
lastTimeUs = bufferInfo.presentationTimeUs
|
|
||||||
|
|
||||||
|
// Log first 3 samples and every keyframe for debugging
|
||||||
val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
|
val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
|
||||||
|
if (sampleCount < 3 || isKeyFrame) {
|
||||||
|
Log.i(TAG, "PTS_DEBUG Segment $segmentIndex sample $sampleCount: PTS=${bufferInfo.presentationTimeUs}us, keyframe=$isKeyFrame")
|
||||||
|
}
|
||||||
|
|
||||||
|
lastTimeUs = bufferInfo.presentationTimeUs
|
||||||
|
|
||||||
muxer.writeSampleData(videoTrack, buffer, bufferInfo)
|
muxer.writeSampleData(videoTrack, buffer, bufferInfo)
|
||||||
sampleCount++
|
sampleCount++
|
||||||
|
|
||||||
// Check if we should start a new segment at the next keyframe
|
|
||||||
if (isKeyFrame && sampleCount > 1) {
|
|
||||||
val segmentDurationUs = bufferInfo.presentationTimeUs - startTimeUs
|
|
||||||
if (segmentDurationUs >= targetSegmentDurationUs) {
|
|
||||||
return true // Signal to create new segment
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fun finish(): Long {
|
/**
|
||||||
|
* Check if we've accumulated enough duration to start a new segment.
|
||||||
|
* Should only be called when we have a keyframe available.
|
||||||
|
*/
|
||||||
|
fun shouldStartNewSegmentOnKeyframe(): Boolean {
|
||||||
|
if (sampleCount == 0) return false // Need at least one sample first
|
||||||
|
val currentDurationUs = lastTimeUs - startTimeUs
|
||||||
|
return currentDurationUs >= targetSegmentDurationUs
|
||||||
|
}
|
||||||
|
|
||||||
|
fun finish(): SegmentResult {
|
||||||
try {
|
try {
|
||||||
muxer.close()
|
muxer.close()
|
||||||
outputStream.close()
|
outputStream.close()
|
||||||
@@ -167,10 +192,9 @@ class FragmentedRecordingManager(
|
|||||||
}
|
}
|
||||||
|
|
||||||
val durationUs = if (lastTimeUs > startTimeUs) lastTimeUs - startTimeUs else 0L
|
val durationUs = if (lastTimeUs > startTimeUs) lastTimeUs - startTimeUs else 0L
|
||||||
callbacks.onVideoChunkReady(file, segmentIndex, durationUs)
|
|
||||||
|
|
||||||
Log.d(TAG, "Finished segment: $filename, samples=$sampleCount, duration=${durationUs/1000}ms")
|
Log.i(TAG, "PTS_DEBUG Segment $segmentIndex FINISHED: startPTS=${startTimeUs}us, lastPTS=${lastTimeUs}us, duration=${durationUs/1000}ms, samples=$sampleCount, baseTimeUs=$baseTimeUs")
|
||||||
return durationUs
|
return SegmentResult(file, segmentIndex, durationUs)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -181,11 +205,19 @@ class FragmentedRecordingManager(
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close previous segment
|
// Close previous segment and process it for HLS
|
||||||
segmentContext?.finish()
|
segmentContext?.let { ctx ->
|
||||||
|
val result = ctx.finish()
|
||||||
|
// Process the segment: extract init (if first), strip headers, inject tfdt
|
||||||
|
processSegmentForHLS(result.file, result.segmentIndex, cumulativeBaseTimeUs)
|
||||||
|
// Update cumulative time for next segment
|
||||||
|
cumulativeBaseTimeUs += result.durationUs
|
||||||
|
// Notify callback
|
||||||
|
callbacks.onVideoChunkReady(result.file, result.segmentIndex, result.durationUs)
|
||||||
|
}
|
||||||
|
|
||||||
// Create new data segment (init segments are created separately)
|
// Create new data segment with current cumulative base time
|
||||||
segmentContext = SegmentContext(format, chunkIndex)
|
segmentContext = SegmentContext(format, chunkIndex, cumulativeBaseTimeUs)
|
||||||
chunkIndex++
|
chunkIndex++
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -197,7 +229,12 @@ class FragmentedRecordingManager(
|
|||||||
override fun finish() {
|
override fun finish() {
|
||||||
synchronized(this) {
|
synchronized(this) {
|
||||||
recording = false
|
recording = false
|
||||||
segmentContext?.finish()
|
// Close final segment and process it for HLS
|
||||||
|
segmentContext?.let { ctx ->
|
||||||
|
val result = ctx.finish()
|
||||||
|
processSegmentForHLS(result.file, result.segmentIndex, cumulativeBaseTimeUs)
|
||||||
|
callbacks.onVideoChunkReady(result.file, result.segmentIndex, result.durationUs)
|
||||||
|
}
|
||||||
segmentContext = null
|
segmentContext = null
|
||||||
try {
|
try {
|
||||||
encoder.stop()
|
encoder.stop()
|
||||||
@@ -220,15 +257,17 @@ class FragmentedRecordingManager(
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
val encodedData = encoder.getOutputBuffer(index)
|
// Skip codec config buffers - these contain SPS/PPS with annex-b start codes
|
||||||
if (encodedData == null) {
|
// and should NOT be written as samples (they're already in the Format's initializationData)
|
||||||
Log.e(TAG, "getOutputBuffer returned null")
|
if ((bufferInfo.flags and MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0) {
|
||||||
|
Log.d(TAG, "Skipping codec config buffer (size=${bufferInfo.size})")
|
||||||
encoder.releaseOutputBuffer(index, false)
|
encoder.releaseOutputBuffer(index, false)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait until init segment is emitted (happens in onOutputFormatChanged)
|
val encodedData = encoder.getOutputBuffer(index)
|
||||||
if (!initSegmentEmitted) {
|
if (encodedData == null) {
|
||||||
|
Log.e(TAG, "getOutputBuffer returned null")
|
||||||
encoder.releaseOutputBuffer(index, false)
|
encoder.releaseOutputBuffer(index, false)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -245,11 +284,18 @@ class FragmentedRecordingManager(
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
val shouldStartNewSegment = context.writeSample(encodedData, bufferInfo)
|
// Check if this keyframe should start a new segment BEFORE writing
|
||||||
|
val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
|
||||||
|
val shouldStartNewSegment = isKeyFrame && context.shouldStartNewSegmentOnKeyframe()
|
||||||
|
|
||||||
if (shouldStartNewSegment) {
|
if (shouldStartNewSegment) {
|
||||||
|
// Finish old segment WITHOUT writing this keyframe to it
|
||||||
createNewSegment()
|
createNewSegment()
|
||||||
// Write this keyframe to the new segment as well
|
// Write keyframe to the NEW segment only
|
||||||
segmentContext?.writeSample(encodedData, bufferInfo)
|
segmentContext?.writeSample(encodedData, bufferInfo)
|
||||||
|
} else {
|
||||||
|
// Write to current segment
|
||||||
|
context.writeSample(encodedData, bufferInfo)
|
||||||
}
|
}
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
Log.e(TAG, "Error writing sample", e)
|
Log.e(TAG, "Error writing sample", e)
|
||||||
@@ -266,39 +312,8 @@ class FragmentedRecordingManager(
|
|||||||
override fun onOutputFormatChanged(codec: MediaCodec, format: MediaFormat) {
|
override fun onOutputFormatChanged(codec: MediaCodec, format: MediaFormat) {
|
||||||
Log.i(TAG, "Output format changed: $format")
|
Log.i(TAG, "Output format changed: $format")
|
||||||
encodedFormat = format
|
encodedFormat = format
|
||||||
|
// Note: init segment is now extracted from the first segment's ftyp+moov
|
||||||
// Create the init segment immediately when we get the format
|
// rather than created separately (Media3's empty init was not working)
|
||||||
// This produces an fMP4 file with just ftyp + moov (no samples)
|
|
||||||
if (!initSegmentEmitted) {
|
|
||||||
createInitSegment(format)
|
|
||||||
initSegmentEmitted = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates an initialization segment containing only codec configuration (ftyp + moov).
|
|
||||||
* This is done by creating a muxer, adding the track, and immediately closing it
|
|
||||||
* without writing any samples.
|
|
||||||
*/
|
|
||||||
private fun createInitSegment(format: MediaFormat) {
|
|
||||||
val initFile = File(outputDirectory, "init.mp4")
|
|
||||||
try {
|
|
||||||
val outputStream = FileOutputStream(initFile)
|
|
||||||
val muxer = FragmentedMp4Muxer.Builder(outputStream).build()
|
|
||||||
|
|
||||||
// Convert and add the track
|
|
||||||
val media3Format = convertToMedia3Format(format)
|
|
||||||
muxer.addTrack(media3Format)
|
|
||||||
|
|
||||||
// Close immediately - this writes just the header (ftyp + moov)
|
|
||||||
muxer.close()
|
|
||||||
outputStream.close()
|
|
||||||
|
|
||||||
Log.d(TAG, "Created init segment: ${initFile.absolutePath}")
|
|
||||||
callbacks.onInitSegmentReady(initFile)
|
|
||||||
} catch (e: Exception) {
|
|
||||||
Log.e(TAG, "Error creating init segment", e)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun convertToMedia3Format(mediaFormat: MediaFormat): Format {
|
private fun convertToMedia3Format(mediaFormat: MediaFormat): Format {
|
||||||
@@ -309,6 +324,8 @@ class FragmentedRecordingManager(
|
|||||||
val frameRate = try { mediaFormat.getInteger(MediaFormat.KEY_FRAME_RATE) } catch (e: Exception) { -1 }
|
val frameRate = try { mediaFormat.getInteger(MediaFormat.KEY_FRAME_RATE) } catch (e: Exception) { -1 }
|
||||||
|
|
||||||
// Get CSD (Codec Specific Data) if available - required for init segment
|
// Get CSD (Codec Specific Data) if available - required for init segment
|
||||||
|
// csd-0 contains SPS (Sequence Parameter Set)
|
||||||
|
// csd-1 contains PPS (Picture Parameter Set)
|
||||||
val csd0 = mediaFormat.getByteBuffer("csd-0")
|
val csd0 = mediaFormat.getByteBuffer("csd-0")
|
||||||
val csd1 = mediaFormat.getByteBuffer("csd-1")
|
val csd1 = mediaFormat.getByteBuffer("csd-1")
|
||||||
|
|
||||||
@@ -317,13 +334,18 @@ class FragmentedRecordingManager(
|
|||||||
val bytes = ByteArray(it.remaining())
|
val bytes = ByteArray(it.remaining())
|
||||||
it.duplicate().get(bytes)
|
it.duplicate().get(bytes)
|
||||||
initData.add(bytes)
|
initData.add(bytes)
|
||||||
|
Log.i(TAG, "CSD_DEBUG: csd-0 (SPS) size=${bytes.size} bytes, hex=${bytes.take(32).joinToString("") { "%02x".format(it) }}...")
|
||||||
}
|
}
|
||||||
csd1?.let {
|
csd1?.let {
|
||||||
val bytes = ByteArray(it.remaining())
|
val bytes = ByteArray(it.remaining())
|
||||||
it.duplicate().get(bytes)
|
it.duplicate().get(bytes)
|
||||||
initData.add(bytes)
|
initData.add(bytes)
|
||||||
|
Log.i(TAG, "CSD_DEBUG: csd-1 (PPS) size=${bytes.size} bytes, hex=${bytes.joinToString("") { "%02x".format(it) }}")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
val totalCsdSize = initData.sumOf { it.size }
|
||||||
|
Log.i(TAG, "CSD_DEBUG: Total CSD size=$totalCsdSize bytes (csd-0=${csd0?.remaining() ?: 0}, csd-1=${csd1?.remaining() ?: 0})")
|
||||||
|
|
||||||
Log.i(TAG, "ROTATION_DEBUG convertToMedia3Format: orientationDegrees=$orientationDegrees, width=$width, height=$height")
|
Log.i(TAG, "ROTATION_DEBUG convertToMedia3Format: orientationDegrees=$orientationDegrees, width=$width, height=$height")
|
||||||
return Format.Builder()
|
return Format.Builder()
|
||||||
.setSampleMimeType(mimeType)
|
.setSampleMimeType(mimeType)
|
||||||
@@ -337,4 +359,404 @@ class FragmentedRecordingManager(
|
|||||||
}
|
}
|
||||||
.build()
|
.build()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Processes a segment file for HLS compatibility:
|
||||||
|
* 1. For segment 0: extracts ftyp+moov header as init.mp4
|
||||||
|
* 2. Strips ftyp+moov from segment, keeping only moof+mdat (the fragment)
|
||||||
|
* 3. Injects tfdt box into moof for proper HLS timing
|
||||||
|
*
|
||||||
|
* Media3's FragmentedMp4Muxer creates self-contained MP4s, but HLS needs:
|
||||||
|
* - init.mp4: ftyp + moov (codec configuration)
|
||||||
|
* - segments: moof + mdat only (fragments referencing init)
|
||||||
|
*/
|
||||||
|
private fun processSegmentForHLS(file: File, segmentIndex: Int, baseMediaDecodeTimeUs: Long) {
|
||||||
|
try {
|
||||||
|
val originalBytes = file.readBytes()
|
||||||
|
val buffer = ByteBuffer.wrap(originalBytes).order(ByteOrder.BIG_ENDIAN)
|
||||||
|
|
||||||
|
// Find where moof starts (everything before is header: ftyp + moov)
|
||||||
|
val moofStartPos = findMoofPosition(buffer)
|
||||||
|
if (moofStartPos < 0) {
|
||||||
|
Log.e(TAG, "HLS_PROCESS: Could not find moof in ${file.name}")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
Log.i(TAG, "HLS_PROCESS: Segment $segmentIndex - moof starts at $moofStartPos, total size=${originalBytes.size}")
|
||||||
|
|
||||||
|
// For segment 0, extract header as init.mp4
|
||||||
|
if (segmentIndex == 0 && !initSegmentEmitted) {
|
||||||
|
val headerBytes = originalBytes.copyOfRange(0, moofStartPos)
|
||||||
|
val initFile = File(outputDirectory, "init.mp4")
|
||||||
|
initFile.writeBytes(headerBytes)
|
||||||
|
Log.i(TAG, "HLS_PROCESS: Created init.mp4 with ${headerBytes.size} bytes (ftyp+moov)")
|
||||||
|
// Debug: dump the init.mp4 structure
|
||||||
|
dumpMp4BoxStructure(headerBytes, "INIT_STRUCTURE")
|
||||||
|
callbacks.onInitSegmentReady(initFile)
|
||||||
|
initSegmentEmitted = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract fragment (moof + mdat only)
|
||||||
|
val fragmentBytes = originalBytes.copyOfRange(moofStartPos, originalBytes.size)
|
||||||
|
Log.d(TAG, "HLS_PROCESS: Extracted fragment of ${fragmentBytes.size} bytes")
|
||||||
|
|
||||||
|
// Inject tfdt into the fragment
|
||||||
|
// Note: in the fragment, moof is at position 0
|
||||||
|
val processedFragment = injectTfdtIntoFragment(fragmentBytes, baseMediaDecodeTimeUs)
|
||||||
|
|
||||||
|
// Write back the processed fragment (stripped of header)
|
||||||
|
file.writeBytes(processedFragment)
|
||||||
|
|
||||||
|
Log.i(TAG, "HLS_PROCESS: Segment $segmentIndex processed - header stripped, tfdt injected, final size=${processedFragment.size}")
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.e(TAG, "Error processing segment ${file.name} for HLS", e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds the position of the moof box in the file.
|
||||||
|
* Returns -1 if not found.
|
||||||
|
*/
|
||||||
|
private fun findMoofPosition(buffer: ByteBuffer): Int {
|
||||||
|
var pos = 0
|
||||||
|
while (pos < buffer.limit() - 8) {
|
||||||
|
buffer.position(pos)
|
||||||
|
val size = buffer.int.toLong() and 0xFFFFFFFFL
|
||||||
|
val type = buffer.int
|
||||||
|
|
||||||
|
if (size < 8) break
|
||||||
|
|
||||||
|
// 'moof' = 0x6D6F6F66
|
||||||
|
if (type == 0x6D6F6F66) {
|
||||||
|
return pos
|
||||||
|
}
|
||||||
|
|
||||||
|
pos += size.toInt()
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Injects a tfdt box into a fragment (moof+mdat).
|
||||||
|
* The fragment has moof at position 0 (header already stripped).
|
||||||
|
* Also fixes tfhd.base_data_offset since we stripped the original file header.
|
||||||
|
*/
|
||||||
|
private fun injectTfdtIntoFragment(fragmentBytes: ByteArray, baseMediaDecodeTimeUs: Long): ByteArray {
|
||||||
|
val buffer = ByteBuffer.wrap(fragmentBytes).order(ByteOrder.BIG_ENDIAN)
|
||||||
|
|
||||||
|
// Find box positions within the fragment (moof is at position 0)
|
||||||
|
val positions = findBoxPositionsInFragment(buffer)
|
||||||
|
if (positions == null) {
|
||||||
|
Log.e(TAG, "TFDT_INJECT: Could not find required boxes in fragment")
|
||||||
|
return fragmentBytes
|
||||||
|
}
|
||||||
|
|
||||||
|
val (moofPos, moofSize, trafPos, trafSize, tfhdPos, tfhdEnd, trunPos) = positions
|
||||||
|
Log.d(TAG, "TFDT_INJECT: Fragment boxes - moof@$moofPos(size=$moofSize), traf@$trafPos, tfhd@$tfhdPos, trun@$trunPos")
|
||||||
|
|
||||||
|
// First, fix tfhd.base_data_offset - it was pointing to the original file position
|
||||||
|
// but now moof is at position 0, so base_data_offset should be 0
|
||||||
|
fixTfhdBaseDataOffset(buffer, tfhdPos.toInt())
|
||||||
|
|
||||||
|
// Create tfdt box (version 1, 64-bit baseMediaDecodeTime)
|
||||||
|
val tfdtSize = 20
|
||||||
|
val tfdtBytes = ByteBuffer.allocate(tfdtSize).order(ByteOrder.BIG_ENDIAN)
|
||||||
|
tfdtBytes.putInt(tfdtSize) // size
|
||||||
|
tfdtBytes.putInt(0x74666474) // 'tfdt'
|
||||||
|
tfdtBytes.put(1.toByte()) // version = 1
|
||||||
|
tfdtBytes.put(0.toByte()) // flags[0]
|
||||||
|
tfdtBytes.put(0.toByte()) // flags[1]
|
||||||
|
tfdtBytes.put(0.toByte()) // flags[2]
|
||||||
|
tfdtBytes.putLong(baseMediaDecodeTimeUs) // baseMediaDecodeTime
|
||||||
|
|
||||||
|
// Create new fragment with tfdt injected after tfhd
|
||||||
|
val newBytes = ByteArray(fragmentBytes.size + tfdtSize)
|
||||||
|
val insertPos = tfhdEnd.toInt()
|
||||||
|
|
||||||
|
// Copy bytes before insertion point
|
||||||
|
System.arraycopy(fragmentBytes, 0, newBytes, 0, insertPos)
|
||||||
|
|
||||||
|
// Insert tfdt
|
||||||
|
System.arraycopy(tfdtBytes.array(), 0, newBytes, insertPos, tfdtSize)
|
||||||
|
|
||||||
|
// Copy bytes after insertion point
|
||||||
|
System.arraycopy(fragmentBytes, insertPos, newBytes, insertPos + tfdtSize, fragmentBytes.size - insertPos)
|
||||||
|
|
||||||
|
// Update box sizes in the new buffer
|
||||||
|
val newBuffer = ByteBuffer.wrap(newBytes).order(ByteOrder.BIG_ENDIAN)
|
||||||
|
|
||||||
|
// Update moof size
|
||||||
|
val newMoofSize = moofSize + tfdtSize
|
||||||
|
newBuffer.putInt(moofPos.toInt(), newMoofSize.toInt())
|
||||||
|
|
||||||
|
// Update traf size
|
||||||
|
val newTrafSize = trafSize + tfdtSize
|
||||||
|
newBuffer.putInt(trafPos.toInt(), newTrafSize.toInt())
|
||||||
|
|
||||||
|
// Update trun data_offset if present
|
||||||
|
val newTrunPos = trunPos.toInt() + tfdtSize
|
||||||
|
updateTrunDataOffset(newBuffer, newTrunPos, tfdtSize)
|
||||||
|
|
||||||
|
Log.i(TAG, "TFDT_INJECT: Injected tfdt with baseMediaDecodeTime=$baseMediaDecodeTimeUs us")
|
||||||
|
return newBytes
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Data class to hold box positions for tfdt injection.
|
||||||
|
*/
|
||||||
|
private data class BoxPositions(
|
||||||
|
val moofPos: Long,
|
||||||
|
val moofSize: Long,
|
||||||
|
val trafPos: Long,
|
||||||
|
val trafSize: Long,
|
||||||
|
val tfhdPos: Long, // Position of tfhd (need to fix base_data_offset)
|
||||||
|
val tfhdEnd: Long, // Position right after tfhd where we'll insert tfdt
|
||||||
|
val trunPos: Long // Position of trun (need to update its data_offset)
|
||||||
|
)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds the positions of moof, traf, tfhd, and trun boxes in a fragment.
|
||||||
|
* In a fragment, moof is expected to be at position 0.
|
||||||
|
*/
|
||||||
|
private fun findBoxPositionsInFragment(buffer: ByteBuffer): BoxPositions? {
|
||||||
|
val fileSize = buffer.limit()
|
||||||
|
var pos = 0
|
||||||
|
|
||||||
|
while (pos < fileSize - 8) {
|
||||||
|
buffer.position(pos)
|
||||||
|
val size = buffer.int.toLong() and 0xFFFFFFFFL
|
||||||
|
val type = buffer.int
|
||||||
|
|
||||||
|
if (size < 8) break
|
||||||
|
|
||||||
|
// 'moof' = 0x6D6F6F66
|
||||||
|
if (type == 0x6D6F6F66) {
|
||||||
|
val moofPos = pos.toLong()
|
||||||
|
val moofSize = size
|
||||||
|
val moofEnd = pos + size.toInt()
|
||||||
|
var childPos = pos + 8
|
||||||
|
|
||||||
|
while (childPos < moofEnd - 8) {
|
||||||
|
buffer.position(childPos)
|
||||||
|
val childSize = buffer.int.toLong() and 0xFFFFFFFFL
|
||||||
|
val childType = buffer.int
|
||||||
|
|
||||||
|
if (childSize < 8) break
|
||||||
|
|
||||||
|
// 'traf' = 0x74726166
|
||||||
|
if (childType == 0x74726166) {
|
||||||
|
val trafPos = childPos.toLong()
|
||||||
|
val trafSize = childSize
|
||||||
|
val trafEnd = childPos + childSize.toInt()
|
||||||
|
var trafChildPos = childPos + 8
|
||||||
|
|
||||||
|
var tfhdPos: Long = -1
|
||||||
|
var tfhdEnd: Long = -1
|
||||||
|
var trunPos: Long = -1
|
||||||
|
|
||||||
|
while (trafChildPos < trafEnd - 8) {
|
||||||
|
buffer.position(trafChildPos)
|
||||||
|
val trafChildSize = buffer.int.toLong() and 0xFFFFFFFFL
|
||||||
|
val trafChildType = buffer.int
|
||||||
|
|
||||||
|
if (trafChildSize < 8) break
|
||||||
|
|
||||||
|
// 'tfhd' = 0x74666864
|
||||||
|
if (trafChildType == 0x74666864) {
|
||||||
|
tfhdPos = trafChildPos.toLong()
|
||||||
|
tfhdEnd = trafChildPos + trafChildSize
|
||||||
|
}
|
||||||
|
// 'trun' = 0x7472756E
|
||||||
|
else if (trafChildType == 0x7472756E) {
|
||||||
|
trunPos = trafChildPos.toLong()
|
||||||
|
}
|
||||||
|
|
||||||
|
trafChildPos += trafChildSize.toInt()
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tfhdPos > 0 && tfhdEnd > 0 && trunPos > 0) {
|
||||||
|
return BoxPositions(moofPos, moofSize, trafPos, trafSize, tfhdPos, tfhdEnd, trunPos)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
childPos += childSize.toInt()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pos += size.toInt()
|
||||||
|
}
|
||||||
|
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Updates the trun box's data_offset field if present.
|
||||||
|
* The data_offset points to sample data in mdat, and needs to be
|
||||||
|
* increased by the size of the injected tfdt box.
|
||||||
|
*
|
||||||
|
* trun structure:
|
||||||
|
* - 4 bytes: size
|
||||||
|
* - 4 bytes: type ('trun')
|
||||||
|
* - 1 byte: version
|
||||||
|
* - 3 bytes: flags
|
||||||
|
* - 4 bytes: sample_count
|
||||||
|
* - [optional] 4 bytes: data_offset (if flags & 0x000001)
|
||||||
|
*/
|
||||||
|
private fun updateTrunDataOffset(buffer: ByteBuffer, trunPos: Int, offsetDelta: Int) {
|
||||||
|
buffer.position(trunPos + 8) // Skip size and type
|
||||||
|
val version = buffer.get().toInt() and 0xFF
|
||||||
|
val flags = ((buffer.get().toInt() and 0xFF) shl 16) or
|
||||||
|
((buffer.get().toInt() and 0xFF) shl 8) or
|
||||||
|
(buffer.get().toInt() and 0xFF)
|
||||||
|
|
||||||
|
// Check if data_offset_present flag (0x000001) is set
|
||||||
|
if ((flags and 0x000001) != 0) {
|
||||||
|
val sampleCount = buffer.int
|
||||||
|
val dataOffsetPos = trunPos + 16 // size(4) + type(4) + version(1) + flags(3) + sample_count(4)
|
||||||
|
buffer.position(dataOffsetPos)
|
||||||
|
val originalOffset = buffer.int
|
||||||
|
val newOffset = originalOffset + offsetDelta
|
||||||
|
buffer.putInt(dataOffsetPos, newOffset)
|
||||||
|
Log.d(TAG, "TFDT_INJECT: Updated trun data_offset: $originalOffset -> $newOffset")
|
||||||
|
} else {
|
||||||
|
Log.d(TAG, "TFDT_INJECT: trun has no data_offset field (flags=0x${flags.toString(16)})")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fixes the tfhd box's base_data_offset field after stripping the file header.
|
||||||
|
* When we strip ftyp+moov from the original segment, the base_data_offset
|
||||||
|
* (which pointed to a position in the original file) becomes incorrect.
|
||||||
|
* We set it to 0 since moof is now at the start of the fragment.
|
||||||
|
*
|
||||||
|
* tfhd structure:
|
||||||
|
* - 4 bytes: size
|
||||||
|
* - 4 bytes: type ('tfhd')
|
||||||
|
* - 1 byte: version
|
||||||
|
* - 3 bytes: flags
|
||||||
|
* - 4 bytes: track_id
|
||||||
|
* - [optional] 8 bytes: base_data_offset (if flags & 0x000001)
|
||||||
|
*/
|
||||||
|
private fun fixTfhdBaseDataOffset(buffer: ByteBuffer, tfhdPos: Int) {
|
||||||
|
buffer.position(tfhdPos + 8) // Skip size and type
|
||||||
|
val version = buffer.get().toInt() and 0xFF
|
||||||
|
val flags = ((buffer.get().toInt() and 0xFF) shl 16) or
|
||||||
|
((buffer.get().toInt() and 0xFF) shl 8) or
|
||||||
|
(buffer.get().toInt() and 0xFF)
|
||||||
|
|
||||||
|
// Check if base_data_offset_present flag (0x000001) is set
|
||||||
|
if ((flags and 0x000001) != 0) {
|
||||||
|
val trackId = buffer.int
|
||||||
|
val baseDataOffsetPos = tfhdPos + 16 // size(4) + type(4) + version(1) + flags(3) + track_id(4)
|
||||||
|
buffer.position(baseDataOffsetPos)
|
||||||
|
val originalOffset = buffer.long
|
||||||
|
// Set to 0 since moof is now at start of fragment
|
||||||
|
buffer.putLong(baseDataOffsetPos, 0L)
|
||||||
|
Log.i(TAG, "TFHD_FIX: Fixed base_data_offset: $originalOffset -> 0")
|
||||||
|
} else {
|
||||||
|
Log.d(TAG, "TFHD_FIX: tfhd has no base_data_offset field (flags=0x${flags.toString(16)})")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Debug function to dump MP4 box structure and find avcC/stsd info.
|
||||||
|
*/
|
||||||
|
private fun dumpMp4BoxStructure(data: ByteArray, logPrefix: String) {
|
||||||
|
val buffer = ByteBuffer.wrap(data).order(ByteOrder.BIG_ENDIAN)
|
||||||
|
dumpBoxesRecursive(buffer, 0, data.size, 0, logPrefix)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun dumpBoxesRecursive(buffer: ByteBuffer, start: Int, end: Int, depth: Int, logPrefix: String) {
|
||||||
|
var pos = start
|
||||||
|
val indent = " ".repeat(depth)
|
||||||
|
|
||||||
|
while (pos < end - 8) {
|
||||||
|
buffer.position(pos)
|
||||||
|
val size = buffer.int.toLong() and 0xFFFFFFFFL
|
||||||
|
val typeInt = buffer.int
|
||||||
|
val typeBytes = ByteArray(4)
|
||||||
|
typeBytes[0] = ((typeInt shr 24) and 0xFF).toByte()
|
||||||
|
typeBytes[1] = ((typeInt shr 16) and 0xFF).toByte()
|
||||||
|
typeBytes[2] = ((typeInt shr 8) and 0xFF).toByte()
|
||||||
|
typeBytes[3] = (typeInt and 0xFF).toByte()
|
||||||
|
val typeStr = String(typeBytes, Charsets.US_ASCII)
|
||||||
|
|
||||||
|
if (size < 8 || pos + size > end) break
|
||||||
|
|
||||||
|
Log.i(TAG, "$logPrefix: $indent[$typeStr] size=$size @ $pos")
|
||||||
|
|
||||||
|
// For ftyp, dump the brands
|
||||||
|
if (typeStr == "ftyp" && size >= 16) {
|
||||||
|
buffer.position(pos + 8)
|
||||||
|
val majorBrand = ByteArray(4)
|
||||||
|
buffer.get(majorBrand)
|
||||||
|
val minorVersion = buffer.int
|
||||||
|
Log.i(TAG, "$logPrefix: $indent major_brand=${String(majorBrand)}, minor_version=$minorVersion")
|
||||||
|
|
||||||
|
val compatBrandsStart = pos + 16
|
||||||
|
val compatBrandsEnd = pos + size.toInt()
|
||||||
|
val brands = mutableListOf<String>()
|
||||||
|
var brandPos = compatBrandsStart
|
||||||
|
while (brandPos + 4 <= compatBrandsEnd) {
|
||||||
|
buffer.position(brandPos)
|
||||||
|
val brand = ByteArray(4)
|
||||||
|
buffer.get(brand)
|
||||||
|
brands.add(String(brand))
|
||||||
|
brandPos += 4
|
||||||
|
}
|
||||||
|
Log.i(TAG, "$logPrefix: $indent compatible_brands=${brands.joinToString(",")}")
|
||||||
|
}
|
||||||
|
|
||||||
|
// For avcC, dump the SPS/PPS info
|
||||||
|
if (typeStr == "avcC" && size >= 13) {
|
||||||
|
buffer.position(pos + 8)
|
||||||
|
val configVersion = buffer.get().toInt() and 0xFF
|
||||||
|
val profileIdc = buffer.get().toInt() and 0xFF
|
||||||
|
val profileCompat = buffer.get().toInt() and 0xFF
|
||||||
|
val levelIdc = buffer.get().toInt() and 0xFF
|
||||||
|
val lengthSizeMinusOne = buffer.get().toInt() and 0x03
|
||||||
|
val numSps = buffer.get().toInt() and 0x1F
|
||||||
|
|
||||||
|
Log.i(TAG, "$logPrefix: $indent avcC: version=$configVersion, profile=$profileIdc, level=$levelIdc, numSPS=$numSps")
|
||||||
|
|
||||||
|
// Read SPS lengths
|
||||||
|
var spsTotal = 0
|
||||||
|
for (i in 0 until numSps) {
|
||||||
|
val spsLen = buffer.short.toInt() and 0xFFFF
|
||||||
|
spsTotal += spsLen
|
||||||
|
Log.i(TAG, "$logPrefix: $indent SPS[$i] length=$spsLen")
|
||||||
|
buffer.position(buffer.position() + spsLen) // Skip SPS data
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read PPS count and lengths
|
||||||
|
if (buffer.position() < pos + size) {
|
||||||
|
val numPps = buffer.get().toInt() and 0xFF
|
||||||
|
var ppsTotal = 0
|
||||||
|
for (i in 0 until numPps) {
|
||||||
|
if (buffer.position() + 2 <= pos + size) {
|
||||||
|
val ppsLen = buffer.short.toInt() and 0xFFFF
|
||||||
|
ppsTotal += ppsLen
|
||||||
|
Log.i(TAG, "$logPrefix: $indent PPS[$i] length=$ppsLen")
|
||||||
|
buffer.position(buffer.position() + ppsLen) // Skip PPS data
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Log.i(TAG, "$logPrefix: $indent avcC total: ${size} bytes, SPS=$spsTotal bytes, PPS=$ppsTotal bytes")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recurse into container boxes
|
||||||
|
val containerBoxes = setOf("moov", "trak", "mdia", "minf", "stbl", "stsd", "mvex", "edts")
|
||||||
|
if (typeStr in containerBoxes) {
|
||||||
|
// stsd has 8 extra bytes (version/flags + entry_count) before children
|
||||||
|
val childStart = if (typeStr == "stsd") pos + 16 else pos + 8
|
||||||
|
dumpBoxesRecursive(buffer, childStart, pos + size.toInt(), depth + 1, logPrefix)
|
||||||
|
}
|
||||||
|
// avc1 is a sample entry, structure: 8 byte header + 78 byte fixed fields + child boxes
|
||||||
|
if (typeStr == "avc1") {
|
||||||
|
dumpBoxesRecursive(buffer, pos + 86, pos + size.toInt(), depth + 1, logPrefix)
|
||||||
|
}
|
||||||
|
|
||||||
|
pos += size.toInt()
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user