|
|
|
|
@@ -27,7 +27,8 @@ import java.nio.ByteBuffer
|
|
|
|
|
class HlsMuxer(
|
|
|
|
|
private val outputDirectory: File,
|
|
|
|
|
private val callback: Callback,
|
|
|
|
|
private val orientationDegrees: Int = 0
|
|
|
|
|
private val orientationDegrees: Int = 0,
|
|
|
|
|
private val fps: Int = 30
|
|
|
|
|
) {
|
|
|
|
|
companion object {
|
|
|
|
|
private const val TAG = "HlsMuxer"
|
|
|
|
|
@@ -41,7 +42,7 @@ class HlsMuxer(
|
|
|
|
|
|
|
|
|
|
// Configuration
|
|
|
|
|
private var targetSegmentDurationUs: Long = DEFAULT_SEGMENT_DURATION_US
|
|
|
|
|
private var timescale: Int = 30000 // Default, updated from format
|
|
|
|
|
private var timescale: Int = 30000 // Default, updated in addTrack() to fps * 1000
|
|
|
|
|
|
|
|
|
|
// State
|
|
|
|
|
private var state = State.UNINITIALIZED
|
|
|
|
|
@@ -54,6 +55,14 @@ class HlsMuxer(
|
|
|
|
|
private var segmentStartTimeUs = -1L
|
|
|
|
|
private var lastPresentationTimeUs = 0L
|
|
|
|
|
|
|
|
|
|
// Timestamp normalization - MediaCodec timestamps are device uptime, not starting from 0
|
|
|
|
|
private var firstPresentationTimeUs = -1L
|
|
|
|
|
|
|
|
|
|
// Actual fps detection from frame timestamps
|
|
|
|
|
private var detectedFps: Int? = null
|
|
|
|
|
private var fpsDetectionSamples = mutableListOf<Long>()
|
|
|
|
|
private val FPS_DETECTION_SAMPLE_COUNT = 30
|
|
|
|
|
|
|
|
|
|
private enum class State {
|
|
|
|
|
UNINITIALIZED,
|
|
|
|
|
INITIALIZED,
|
|
|
|
|
@@ -69,6 +78,29 @@ class HlsMuxer(
|
|
|
|
|
val isKeyFrame: Boolean
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// ==================== Timestamp Normalization ====================
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Normalizes a presentation timestamp to start from 0.
|
|
|
|
|
* The first timestamp received becomes time 0, and all subsequent
|
|
|
|
|
* timestamps are relative to that.
|
|
|
|
|
*
|
|
|
|
|
* This is necessary because MediaCodec timestamps are based on device uptime,
|
|
|
|
|
* not starting from 0. HLS players expect timestamps to start at or near 0.
|
|
|
|
|
*/
|
|
|
|
|
private fun normalizeTimestamp(rawPresentationTimeUs: Long): Long {
|
|
|
|
|
if (firstPresentationTimeUs < 0) {
|
|
|
|
|
firstPresentationTimeUs = rawPresentationTimeUs
|
|
|
|
|
Log.d(TAG, "First timestamp captured: ${rawPresentationTimeUs}us (${rawPresentationTimeUs / 1_000_000.0}s), normalizing to 0")
|
|
|
|
|
}
|
|
|
|
|
val normalized = rawPresentationTimeUs - firstPresentationTimeUs
|
|
|
|
|
// Log first few normalizations to debug
|
|
|
|
|
if (normalized < 1_000_000) { // First second
|
|
|
|
|
Log.d(TAG, "Timestamp: raw=${rawPresentationTimeUs}us -> normalized=${normalized}us")
|
|
|
|
|
}
|
|
|
|
|
return normalized
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ==================== Annex-B to AVCC Conversion ====================
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
@@ -201,13 +233,11 @@ class HlsMuxer(
|
|
|
|
|
|
|
|
|
|
trackFormat = format
|
|
|
|
|
|
|
|
|
|
// Extract timescale from frame rate
|
|
|
|
|
val fps = try {
|
|
|
|
|
format.getInteger(MediaFormat.KEY_FRAME_RATE)
|
|
|
|
|
} catch (e: Exception) {
|
|
|
|
|
30
|
|
|
|
|
}
|
|
|
|
|
timescale = fps * 1000 // Use fps * 1000 for good precision
|
|
|
|
|
// Use fps * 1000 as timescale for good precision (1000 timescale units per frame).
|
|
|
|
|
// This ensures accurate sample durations without integer truncation issues.
|
|
|
|
|
// Note: ffprobe may report r_frame_rate based on timescale, so the backend
|
|
|
|
|
// should use the explicit framesPerSecond from the API mutation, not ffprobe.
|
|
|
|
|
timescale = fps * 1000
|
|
|
|
|
|
|
|
|
|
state = State.INITIALIZED
|
|
|
|
|
|
|
|
|
|
@@ -215,7 +245,7 @@ class HlsMuxer(
|
|
|
|
|
val formatHeight = try { format.getInteger(MediaFormat.KEY_HEIGHT) } catch (e: Exception) { -1 }
|
|
|
|
|
Log.d(TAG, "Added track: ${format.getString(MediaFormat.KEY_MIME)}, " +
|
|
|
|
|
"encoder output: ${formatWidth}x${formatHeight}, " +
|
|
|
|
|
"timescale=$timescale, orientation=$orientationDegrees°")
|
|
|
|
|
"fps=$fps, timescale=$timescale, orientation=$orientationDegrees°")
|
|
|
|
|
|
|
|
|
|
return 0 // Single track, index 0
|
|
|
|
|
}
|
|
|
|
|
@@ -227,16 +257,30 @@ class HlsMuxer(
|
|
|
|
|
check(state == State.INITIALIZED) { "Must call addTrack() before start()" }
|
|
|
|
|
val format = trackFormat ?: throw IllegalStateException("No track format")
|
|
|
|
|
|
|
|
|
|
// Create output directory if needed
|
|
|
|
|
// Create output directory if needed, with proper error handling
|
|
|
|
|
if (!outputDirectory.exists()) {
|
|
|
|
|
outputDirectory.mkdirs()
|
|
|
|
|
val created = outputDirectory.mkdirs()
|
|
|
|
|
if (!created && !outputDirectory.exists()) {
|
|
|
|
|
throw IllegalStateException(
|
|
|
|
|
"Failed to create output directory: ${outputDirectory.absolutePath}. " +
|
|
|
|
|
"Parent exists: ${outputDirectory.parentFile?.exists()}, " +
|
|
|
|
|
"Parent path: ${outputDirectory.parentFile?.absolutePath}"
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
Log.d(TAG, "Created output directory: ${outputDirectory.absolutePath}")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Write init segment
|
|
|
|
|
val initBytes = buildInitSegment(format)
|
|
|
|
|
val initFile = File(outputDirectory, "init.mp4")
|
|
|
|
|
FileOutputStream(initFile).use { it.write(initBytes) }
|
|
|
|
|
|
|
|
|
|
// Log frame rate metadata for debugging
|
|
|
|
|
val defaultSampleDuration = timescale / fps
|
|
|
|
|
Log.d(TAG, "Created init segment: ${initFile.absolutePath} (${initBytes.size} bytes)")
|
|
|
|
|
Log.d(TAG, "Frame rate metadata: timescale=$timescale, fps=$fps, " +
|
|
|
|
|
"default_sample_duration=$defaultSampleDuration (ffprobe should calculate ${timescale}/${defaultSampleDuration}=${fps}fps)")
|
|
|
|
|
|
|
|
|
|
callback.onInitSegmentReady(initFile)
|
|
|
|
|
|
|
|
|
|
state = State.STARTED
|
|
|
|
|
@@ -259,13 +303,40 @@ class HlsMuxer(
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
val isKeyFrame = (bufferInfo.flags and MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
|
|
|
|
|
val presentationTimeUs = bufferInfo.presentationTimeUs
|
|
|
|
|
// Normalize timestamp to start from 0 (MediaCodec uses device uptime)
|
|
|
|
|
val presentationTimeUs = normalizeTimestamp(bufferInfo.presentationTimeUs)
|
|
|
|
|
|
|
|
|
|
// Detect actual fps from first N samples
|
|
|
|
|
if (detectedFps == null) {
|
|
|
|
|
fpsDetectionSamples.add(presentationTimeUs)
|
|
|
|
|
if (fpsDetectionSamples.size >= FPS_DETECTION_SAMPLE_COUNT) {
|
|
|
|
|
val elapsed = fpsDetectionSamples.last() - fpsDetectionSamples.first()
|
|
|
|
|
if (elapsed > 0) {
|
|
|
|
|
val actualFps = ((FPS_DETECTION_SAMPLE_COUNT - 1) * 1_000_000.0 / elapsed).toInt()
|
|
|
|
|
detectedFps = actualFps
|
|
|
|
|
if (kotlin.math.abs(actualFps - fps) > 5) {
|
|
|
|
|
Log.w(TAG, "Actual fps ($actualFps) differs significantly from configured fps ($fps)! " +
|
|
|
|
|
"This may cause processing issues if backend uses configured fps.")
|
|
|
|
|
} else {
|
|
|
|
|
Log.d(TAG, "Detected actual fps: $actualFps (configured: $fps)")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
fpsDetectionSamples.clear() // Free memory
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Initialize segment start time
|
|
|
|
|
if (segmentStartTimeUs < 0) {
|
|
|
|
|
segmentStartTimeUs = presentationTimeUs
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Update duration of previous sample BEFORE finalization check
|
|
|
|
|
// This ensures the last sample has correct duration when segment is finalized
|
|
|
|
|
if (pendingSamples.isNotEmpty()) {
|
|
|
|
|
val lastSample = pendingSamples.last()
|
|
|
|
|
lastSample.durationUs = presentationTimeUs - lastSample.presentationTimeUs
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check if we should finalize current segment (at keyframe boundaries)
|
|
|
|
|
if (isKeyFrame && pendingSamples.isNotEmpty()) {
|
|
|
|
|
val segmentDurationUs = presentationTimeUs - segmentStartTimeUs
|
|
|
|
|
@@ -284,12 +355,6 @@ class HlsMuxer(
|
|
|
|
|
// Convert Annex-B (start codes) to AVCC (length prefixes)
|
|
|
|
|
val data = convertAnnexBToAvcc(rawData)
|
|
|
|
|
|
|
|
|
|
// Update duration of previous sample
|
|
|
|
|
if (pendingSamples.isNotEmpty()) {
|
|
|
|
|
val lastSample = pendingSamples.last()
|
|
|
|
|
lastSample.durationUs = presentationTimeUs - lastSample.presentationTimeUs
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Estimate duration (will be corrected by next sample)
|
|
|
|
|
val estimatedDurationUs = if (lastPresentationTimeUs > 0) {
|
|
|
|
|
presentationTimeUs - lastPresentationTimeUs
|
|
|
|
|
@@ -351,6 +416,7 @@ class HlsMuxer(
|
|
|
|
|
val durationUs = (lastSample.presentationTimeUs - firstPts) + lastSample.durationUs
|
|
|
|
|
|
|
|
|
|
Log.d(TAG, "Created segment $segmentIndex: samples=${pendingSamples.size}, " +
|
|
|
|
|
"baseDecodeTime=${baseDecodeTimeUs}us (${baseDecodeTimeUs / 1_000_000.0}s), " +
|
|
|
|
|
"duration=${durationUs / 1000}ms, size=${fragmentBytes.size} bytes")
|
|
|
|
|
|
|
|
|
|
callback.onMediaSegmentReady(segmentFile, segmentIndex, durationUs)
|
|
|
|
|
@@ -478,40 +544,91 @@ class HlsMuxer(
|
|
|
|
|
dos.writeShort(0) // volume (0 for video)
|
|
|
|
|
dos.writeShort(0) // reserved
|
|
|
|
|
|
|
|
|
|
// Rotation matrix - use identity and rely on correct dimensions from encoder
|
|
|
|
|
// The encoder output format already has the correct dimensions for the content
|
|
|
|
|
writeRotationMatrix(dos)
|
|
|
|
|
// Rotation matrix based on orientationDegrees
|
|
|
|
|
writeRotationMatrix(dos, width, height)
|
|
|
|
|
|
|
|
|
|
// Use dimensions as-is from encoder output format
|
|
|
|
|
dos.writeInt(width shl 16) // width (16.16 fixed point)
|
|
|
|
|
dos.writeInt(height shl 16) // height (16.16 fixed point)
|
|
|
|
|
// For 90° and 270° rotations, the display dimensions are swapped
|
|
|
|
|
// The tkhd width/height represent the final display size after rotation
|
|
|
|
|
val (displayWidth, displayHeight) = when (orientationDegrees) {
|
|
|
|
|
90, 270 -> Pair(height, width)
|
|
|
|
|
else -> Pair(width, height)
|
|
|
|
|
}
|
|
|
|
|
dos.writeInt(displayWidth shl 16) // width (16.16 fixed point)
|
|
|
|
|
dos.writeInt(displayHeight shl 16) // height (16.16 fixed point)
|
|
|
|
|
|
|
|
|
|
Log.d(TAG, "tkhd: ${width}x${height}, rotation=$orientationDegrees")
|
|
|
|
|
Log.d(TAG, "tkhd: encoder=${width}x${height}, display=${displayWidth}x${displayHeight}, rotation=$orientationDegrees")
|
|
|
|
|
|
|
|
|
|
return wrapBox("tkhd", output.toByteArray())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Writes the 3x3 transformation matrix for video rotation.
|
|
|
|
|
* Uses simple rotation values - the encoder already outputs correctly oriented frames.
|
|
|
|
|
* The matrix is applied to rotate the video content for correct display.
|
|
|
|
|
*
|
|
|
|
|
* Matrix format in tkhd box (all values in fixed-point):
|
|
|
|
|
* | a b u | where a,b,c,d are 16.16 fixed-point
|
|
|
|
|
* | c d v | and u,v are 2.30 fixed-point (always 0)
|
|
|
|
|
* | x y w | x,y are 16.16, w is 2.30 (always 1.0)
|
|
|
|
|
*
|
|
|
|
|
* For rotation by θ: a=cos(θ), b=sin(θ), c=-sin(θ), d=cos(θ)
|
|
|
|
|
* Translation (x,y) keeps the rotated video in the visible area.
|
|
|
|
|
*/
|
|
|
|
|
private fun writeRotationMatrix(dos: DataOutputStream) {
|
|
|
|
|
private fun writeRotationMatrix(dos: DataOutputStream, width: Int, height: Int) {
|
|
|
|
|
// Fixed-point constants
|
|
|
|
|
val one = 0x00010000 // 1.0 in 16.16
|
|
|
|
|
val negOne = -0x00010000 // -1.0 in 16.16 (will be written as unsigned)
|
|
|
|
|
val w = 0x40000000 // 1.0 in 2.30
|
|
|
|
|
|
|
|
|
|
// Identity matrix - no transformation
|
|
|
|
|
// Most HLS players handle rotation via the dimensions themselves
|
|
|
|
|
// or we can add rotation metadata separately if needed
|
|
|
|
|
dos.writeInt(one) // a = 1
|
|
|
|
|
dos.writeInt(0) // b = 0
|
|
|
|
|
dos.writeInt(0) // u = 0
|
|
|
|
|
dos.writeInt(0) // c = 0
|
|
|
|
|
dos.writeInt(one) // d = 1
|
|
|
|
|
dos.writeInt(0) // v = 0
|
|
|
|
|
dos.writeInt(0) // x = 0
|
|
|
|
|
dos.writeInt(0) // y = 0
|
|
|
|
|
dos.writeInt(w) // w = 1
|
|
|
|
|
when (orientationDegrees) {
|
|
|
|
|
90 -> {
|
|
|
|
|
// 90° rotation: x' = y, y' = -x + width
|
|
|
|
|
dos.writeInt(0) // a = 0
|
|
|
|
|
dos.writeInt(negOne) // b = -1
|
|
|
|
|
dos.writeInt(0) // u = 0
|
|
|
|
|
dos.writeInt(one) // c = 1
|
|
|
|
|
dos.writeInt(0) // d = 0
|
|
|
|
|
dos.writeInt(0) // v = 0
|
|
|
|
|
dos.writeInt(0) // x = 0
|
|
|
|
|
dos.writeInt(width shl 16) // y = width (translation)
|
|
|
|
|
dos.writeInt(w) // w = 1
|
|
|
|
|
}
|
|
|
|
|
180 -> {
|
|
|
|
|
// 180° rotation
|
|
|
|
|
dos.writeInt(negOne) // a = -1
|
|
|
|
|
dos.writeInt(0) // b = 0
|
|
|
|
|
dos.writeInt(0) // u = 0
|
|
|
|
|
dos.writeInt(0) // c = 0
|
|
|
|
|
dos.writeInt(negOne) // d = -1
|
|
|
|
|
dos.writeInt(0) // v = 0
|
|
|
|
|
dos.writeInt(width shl 16) // x = width (translation)
|
|
|
|
|
dos.writeInt(height shl 16) // y = height (translation)
|
|
|
|
|
dos.writeInt(w) // w = 1
|
|
|
|
|
}
|
|
|
|
|
270 -> {
|
|
|
|
|
// 270° rotation: x' = -y + height, y' = x
|
|
|
|
|
dos.writeInt(0) // a = 0
|
|
|
|
|
dos.writeInt(one) // b = 1
|
|
|
|
|
dos.writeInt(0) // u = 0
|
|
|
|
|
dos.writeInt(negOne) // c = -1
|
|
|
|
|
dos.writeInt(0) // d = 0
|
|
|
|
|
dos.writeInt(0) // v = 0
|
|
|
|
|
dos.writeInt(height shl 16) // x = height (translation)
|
|
|
|
|
dos.writeInt(0) // y = 0
|
|
|
|
|
dos.writeInt(w) // w = 1
|
|
|
|
|
}
|
|
|
|
|
else -> {
|
|
|
|
|
// 0° or unknown: identity matrix
|
|
|
|
|
dos.writeInt(one) // a = 1
|
|
|
|
|
dos.writeInt(0) // b = 0
|
|
|
|
|
dos.writeInt(0) // u = 0
|
|
|
|
|
dos.writeInt(0) // c = 0
|
|
|
|
|
dos.writeInt(one) // d = 1
|
|
|
|
|
dos.writeInt(0) // v = 0
|
|
|
|
|
dos.writeInt(0) // x = 0
|
|
|
|
|
dos.writeInt(0) // y = 0
|
|
|
|
|
dos.writeInt(w) // w = 1
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private fun buildMdiaBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
|
|
|
|
|
@@ -598,7 +715,7 @@ class HlsMuxer(
|
|
|
|
|
private fun buildStblBox(width: Int, height: Int, sps: ByteArray, pps: ByteArray): ByteArray {
|
|
|
|
|
val content = ByteArrayOutputStream()
|
|
|
|
|
content.write(buildStsdBox(width, height, sps, pps))
|
|
|
|
|
content.write(buildEmptySttsBox())
|
|
|
|
|
content.write(buildSttsBox()) // Contains default timing for ffprobe frame rate detection
|
|
|
|
|
content.write(buildEmptyStscBox())
|
|
|
|
|
content.write(buildEmptyStszBox())
|
|
|
|
|
content.write(buildEmptyStcoBox())
|
|
|
|
|
@@ -665,11 +782,21 @@ class HlsMuxer(
|
|
|
|
|
return wrapBox("avcC", output.toByteArray())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private fun buildEmptySttsBox(): ByteArray {
|
|
|
|
|
private fun buildSttsBox(): ByteArray {
|
|
|
|
|
val output = ByteArrayOutputStream()
|
|
|
|
|
val dos = DataOutputStream(output)
|
|
|
|
|
|
|
|
|
|
// For fragmented MP4, stts is normally empty as timing is in trun boxes.
|
|
|
|
|
// However, ffprobe uses stts to calculate r_frame_rate when present.
|
|
|
|
|
// We add a single entry with the default sample duration so ffprobe
|
|
|
|
|
// can derive: r_frame_rate = timescale / sample_delta = 30000/1000 = 30
|
|
|
|
|
val defaultSampleDuration = timescale / fps
|
|
|
|
|
|
|
|
|
|
dos.writeInt(0) // version & flags
|
|
|
|
|
dos.writeInt(0) // entry count
|
|
|
|
|
dos.writeInt(1) // entry count (1 entry for default timing)
|
|
|
|
|
dos.writeInt(1) // sample_count (indicates this is the default duration)
|
|
|
|
|
dos.writeInt(defaultSampleDuration) // sample_delta in timescale units
|
|
|
|
|
|
|
|
|
|
return wrapBox("stts", output.toByteArray())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@@ -706,10 +833,15 @@ class HlsMuxer(
|
|
|
|
|
val output = ByteArrayOutputStream()
|
|
|
|
|
val dos = DataOutputStream(output)
|
|
|
|
|
|
|
|
|
|
// Calculate default sample duration so ffprobe can derive correct fps
|
|
|
|
|
// fps = timescale / default_sample_duration
|
|
|
|
|
// At 30fps with timescale=30000: duration=1000, ffprobe calculates 30000/1000=30
|
|
|
|
|
val defaultSampleDuration = timescale / fps
|
|
|
|
|
|
|
|
|
|
dos.writeInt(0) // version & flags
|
|
|
|
|
dos.writeInt(1) // track ID
|
|
|
|
|
dos.writeInt(1) // default sample description index
|
|
|
|
|
dos.writeInt(0) // default sample duration
|
|
|
|
|
dos.writeInt(defaultSampleDuration) // default sample duration
|
|
|
|
|
dos.writeInt(0) // default sample size
|
|
|
|
|
dos.writeInt(0) // default sample flags
|
|
|
|
|
|
|
|
|
|
|