feat: Route images through ImageWriter into OpenGL pipeline (#1874)

* feat: Route images through `ImageWriter` into OpenGL pipeline * fix: Use RGB format * fix: Every device supports YUV, RGB and NATIVE * Update VideoPipeline.kt * log format * Plug ImageReader between OpenGL pipeline * Call Frame Processor * Format * Remove logs
2023-09-29 21:52:19 +02:00 · 2023-09-29 21:52:19 +02:00 · 954b44810b
commit 954b44810b
parent a1fbba8d66
8 changed files with 77 additions and 110 deletions
--- a/package/android/src/main/cpp/VideoPipeline.cpp
+++ b/package/android/src/main/cpp/VideoPipeline.cpp
@ -31,7 +31,6 @@ VideoPipeline::VideoPipeline(jni::alias_ref<jhybridobject> jThis, int width, int
 VideoPipeline::~VideoPipeline() {
  // 1. Remove output surfaces
  removeFrameProcessorOutputSurface();
  removeRecordingSessionOutputSurface();
  // 2. Delete the input textures
  if (_inputTexture != std::nullopt) {
@ -42,21 +41,6 @@ VideoPipeline::~VideoPipeline() {
  _context = nullptr;
 }
 void VideoPipeline::removeFrameProcessorOutputSurface() {
  if (_frameProcessorOutput)
    _frameProcessorOutput->destroy();
  _frameProcessorOutput = nullptr;
 }
 void VideoPipeline::setFrameProcessorOutputSurface(jobject surface) {
  // 1. Delete existing output surface
  removeFrameProcessorOutputSurface();
  // 2. Set new output surface if it is not null
  ANativeWindow* window = ANativeWindow_fromSurface(jni::Environment::current(), surface);
  _frameProcessorOutput = OpenGLRenderer::CreateWithWindowSurface(_context, window);
 }
 void VideoPipeline::removeRecordingSessionOutputSurface() {
  if (_recordingSessionOutput)
    _recordingSessionOutput->destroy();
@ -93,10 +77,6 @@ void VideoPipeline::onFrame(jni::alias_ref<jni::JArrayFloat> transformMatrixPara
  OpenGLTexture& texture = _inputTexture.value();
  if (_frameProcessorOutput) {
    __android_log_print(ANDROID_LOG_INFO, TAG, "Rendering to FrameProcessor..");
    _frameProcessorOutput->renderTextureToSurface(texture, transformMatrix);
  }
  if (_recordingSessionOutput) {
    __android_log_print(ANDROID_LOG_INFO, TAG, "Rendering to RecordingSession..");
    _recordingSessionOutput->renderTextureToSurface(texture, transformMatrix);
@ -106,8 +86,6 @@ void VideoPipeline::onFrame(jni::alias_ref<jni::JArrayFloat> transformMatrixPara
 void VideoPipeline::registerNatives() {
  registerHybrid({
      makeNativeMethod("initHybrid", VideoPipeline::initHybrid),
      makeNativeMethod("setFrameProcessorOutputSurface", VideoPipeline::setFrameProcessorOutputSurface),
      makeNativeMethod("removeFrameProcessorOutputSurface", VideoPipeline::removeFrameProcessorOutputSurface),
      makeNativeMethod("setRecordingSessionOutputSurface", VideoPipeline::setRecordingSessionOutputSurface),
      makeNativeMethod("removeRecordingSessionOutputSurface", VideoPipeline::removeRecordingSessionOutputSurface),
      makeNativeMethod("getInputTextureId", VideoPipeline::getInputTextureId),
--- a/package/android/src/main/cpp/VideoPipeline.h
+++ b/package/android/src/main/cpp/VideoPipeline.h
@ -30,10 +30,6 @@ public:
  // -> SurfaceTexture input
  int getInputTextureId();
  // <- Frame Processor output
  void setFrameProcessorOutputSurface(jobject surface);
  void removeFrameProcessorOutputSurface();
  // <- MediaRecorder output
  void setRecordingSessionOutputSurface(jobject surface);
  void removeRecordingSessionOutputSurface();
@ -54,7 +50,6 @@ private:
  // Output Contexts
  std::shared_ptr<OpenGLContext> _context = nullptr;
  std::unique_ptr<OpenGLRenderer> _frameProcessorOutput = nullptr;
  std::unique_ptr<OpenGLRenderer> _recordingSessionOutput = nullptr;
 private:
--- a/package/android/src/main/java/com/mrousavy/camera/CameraView.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/CameraView.kt
@ -210,7 +210,7 @@ class CameraView(context: Context) : FrameLayout(context) {
        null
      }
      val videoOutput = if (video == true || enableFrameProcessor) {
-        CameraOutputs.VideoOutput(targetVideoSize, video == true, enableFrameProcessor, pixelFormat.toImageFormat())
+        CameraOutputs.VideoOutput(targetVideoSize, video == true, enableFrameProcessor, pixelFormat)
      } else {
        null
      }
--- a/package/android/src/main/java/com/mrousavy/camera/Errors.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/Errors.kt
@ -66,6 +66,8 @@ class CameraSessionCannotBeConfiguredError(cameraId: String, outputs: CameraOutp
  CameraError("session", "cannot-create-session", "Failed to create a Camera Session for Camera $cameraId! Outputs: $outputs")
 class CameraDisconnectedError(cameraId: String, error: CameraDeviceError) :
  CameraError("session", "camera-has-been-disconnected", "The given Camera device (id: $cameraId) has been disconnected! Error: $error")
 class FrameProcessorsUnavailableError(reason: String) :
  CameraError("system", "frame-processors-unavailable", "Frame Processors are unavailable! Reason: $reason")
 class VideoNotEnabledError :
  CameraError("capture", "video-not-enabled", "Video capture is disabled! Pass `video={true}` to enable video recordings.")
--- a/package/android/src/main/java/com/mrousavy/camera/core/CameraDeviceDetails.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/CameraDeviceDetails.kt
@ -157,16 +157,11 @@ class CameraDeviceDetails(private val cameraManager: CameraManager, private val
    return array
  }
-  private fun createPixelFormats(size: Size): ReadableArray {
+  private fun createPixelFormats(): ReadableArray {
-    val formats = cameraConfig.outputFormats
+    // Every output in Camera2 supports YUV and NATIVE
    val array = Arguments.createArray()
-    formats.forEach { format ->
+    array.pushString(PixelFormat.YUV.unionValue)
-      val sizes = cameraConfig.getOutputSizes(format)
+    array.pushString(PixelFormat.NATIVE.unionValue)
      val hasSize = sizes.any { it.width == size.width && it.height == size.height }
      if (hasSize) {
        array.pushString(PixelFormat.fromImageFormat(format).unionValue)
      }
    }
    return array
  }
@ -186,7 +181,7 @@ class CameraDeviceDetails(private val cameraManager: CameraManager, private val
    map.putBoolean("supportsDepthCapture", supportsDepthCapture)
    map.putString("autoFocusSystem", "contrast-detection") // TODO: Is this wrong?
    map.putArray("videoStabilizationModes", createStabilizationModes())
-    map.putArray("pixelFormats", createPixelFormats(videoSize))
+    map.putArray("pixelFormats", createPixelFormats())
    return map
  }
--- a/package/android/src/main/java/com/mrousavy/camera/core/VideoPipeline.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/VideoPipeline.kt
@ -2,12 +2,15 @@ package com.mrousavy.camera.core
 import android.graphics.ImageFormat
 import android.graphics.SurfaceTexture
 import android.hardware.HardwareBuffer
 import android.media.ImageReader
 import android.media.ImageWriter
 import android.os.Build
 import android.util.Log
 import android.view.Surface
 import com.facebook.jni.HybridData
 import com.mrousavy.camera.CameraQueues
-import com.mrousavy.camera.PixelFormatNotSupportedInVideoPipelineError
+import com.mrousavy.camera.FrameProcessorsUnavailableError
 import com.mrousavy.camera.frameprocessor.Frame
 import com.mrousavy.camera.frameprocessor.FrameProcessor
 import com.mrousavy.camera.parsers.Orientation
@ -16,15 +19,20 @@ import java.io.Closeable
 /**
 * An OpenGL pipeline for streaming Camera Frames to one or more outputs.
- * Currently, [VideoPipeline] can stream to a [FrameProcessor] and a [MediaRecorder].
+ * Currently, [VideoPipeline] can stream to a [FrameProcessor] and a [RecordingSession].
 *
 * @param [width] The width of the Frames to stream (> 0)
 * @param [height] The height of the Frames to stream (> 0)
 * @param [format] The format of the Frames to stream. ([ImageFormat.PRIVATE], [ImageFormat.YUV_420_888] or [ImageFormat.JPEG])
 */
@Suppress("KotlinJniMissingFunction")
-class VideoPipeline(val width: Int, val height: Int, val format: Int = ImageFormat.PRIVATE, private val isMirrored: Boolean = false) :
+class VideoPipeline(
-  SurfaceTexture.OnFrameAvailableListener,
+  val width: Int,
  val height: Int,
  val format: PixelFormat = PixelFormat.NATIVE,
  private val isMirrored: Boolean = false,
  enableFrameProcessor: Boolean = false
 ) : SurfaceTexture.OnFrameAvailableListener,
  Closeable {
  companion object {
    private const val MAX_IMAGES = 3
@ -52,7 +60,6 @@ class VideoPipeline(val width: Int, val height: Int, val format: Int = ImageForm
  // Output 1
  private var frameProcessor: FrameProcessor? = null
  private var imageReader: ImageReader? = null
  // Output 2
  private var recordingSession: RecordingSession? = null
@ -61,36 +68,61 @@ class VideoPipeline(val width: Int, val height: Int, val format: Int = ImageForm
  private val surfaceTexture: SurfaceTexture
  val surface: Surface
  // If Frame Processors are enabled, we go through ImageReader first before we go thru OpenGL
  private var imageReader: ImageReader? = null
  private var imageWriter: ImageWriter? = null
  init {
    Log.i(
      TAG,
-      "Initializing $width x $height Video Pipeline " +
+      "Initializing $width x $height Video Pipeline (format: $format)"
        "(format: ${PixelFormat.fromImageFormat(format)} #$format)"
    )
    // TODO: We currently use OpenGL for the Video Pipeline.
    //  OpenGL only works in the RGB (RGBA_8888; 0x23) pixel-format, so we cannot
    //  override the pixel-format to something like YUV or PRIVATE.
    //  This absolutely sucks and I would prefer to replace the OpenGL pipeline with
    //  something similar to how iOS works where we just pass GPU buffers around,
    //  but android.media APIs are just not as advanced yet.
    //  For example, ImageReader/ImageWriter is way too buggy and does not work with MediaRecorder.
    //  See this issue ($4.000 bounty!) for more details:
    //  https://github.com/mrousavy/react-native-vision-camera/issues/1837
    if (format != ImageFormat.PRIVATE && format != 0x23) {
      throw PixelFormatNotSupportedInVideoPipelineError(PixelFormat.fromImageFormat(format).unionValue)
    }
    mHybridData = initHybrid(width, height)
    surfaceTexture = SurfaceTexture(false)
    surfaceTexture.setDefaultBufferSize(width, height)
    surfaceTexture.setOnFrameAvailableListener(this)
-    surface = Surface(surfaceTexture)
+    val glSurface = Surface(surfaceTexture)
    if (enableFrameProcessor) {
      // User has passed a Frame Processor, we need to route images through ImageReader so we can get
      // CPU access to the Frames, then send them to the OpenGL pipeline later.
      if (Build.VERSION.SDK_INT < Build.VERSION_CODES.Q) {
        throw FrameProcessorsUnavailableError("Frame Processors require API 29 or higher. (Q)")
      }
      // GPU_SAMPLED because we redirect to OpenGL, CPU_READ because we read pixels before that.
      val usage = HardwareBuffer.USAGE_GPU_SAMPLED_IMAGE or HardwareBuffer.USAGE_CPU_READ_OFTEN
      val format = getImageReaderFormat()
      Log.i(TAG, "Using ImageReader round-trip (format: #$format)")
      imageWriter = ImageWriter.newInstance(glSurface, MAX_IMAGES, format)
      imageReader = ImageReader.newInstance(width, height, format, MAX_IMAGES, usage)
      imageReader!!.setOnImageAvailableListener({ reader ->
        Log.i(TAG, "ImageReader::onImageAvailable!")
        val image = reader.acquireNextImage() ?: return@setOnImageAvailableListener
        Log.i(TAG, "Image Format: ${image.format}")
        // // TODO: Get correct orientation and isMirrored
        val frame = Frame(image, image.timestamp, Orientation.PORTRAIT, isMirrored)
        frame.incrementRefCount()
        frameProcessor?.call(frame)
        imageWriter!!.queueInputImage(image)
        frame.decrementRefCount()
      }, CameraQueues.videoQueue.handler)
      surface = imageReader!!.surface
    } else {
      // No Frame Processor will be used, directly render into the OpenGL pipeline to avoid ImageReader roundtrip.
      surface = glSurface
    }
  }
  override fun close() {
    synchronized(this) {
      isActive = false
      imageWriter?.close()
      imageReader?.close()
      imageReader = null
      frameProcessor = null
      recordingSession = null
      surfaceTexture.release()
@ -123,29 +155,13 @@ class VideoPipeline(val width: Int, val height: Int, val format: Int = ImageForm
    }
  }
-  private fun getImageReader(): ImageReader {
+  private fun getImageReaderFormat(): Int =
-    if (format != ImageFormat.PRIVATE) {
+    when (format) {
-      Log.w(
+      PixelFormat.NATIVE -> ImageFormat.PRIVATE
-        TAG,
+      PixelFormat.RGB -> HardwareBuffer.RGBA_8888
-        "Warning: pixelFormat \"${PixelFormat.fromImageFormat(format).unionValue}\" might " +
+      PixelFormat.YUV -> ImageFormat.YUV_420_888
-          "not be supported on this device because the C++ OpenGL GPU Video Pipeline operates in RGBA_8888. " +
+      else -> ImageFormat.PRIVATE
          "I wanted to use an ImageReader -> ImageWriter setup for this, but I couldn't get it to work. " +
          "See this PR for more details: https://github.com/mrousavy/react-native-vision-camera/pull/1836"
      )
    }
    val imageReader = ImageReader.newInstance(width, height, format, MAX_IMAGES)
    imageReader.setOnImageAvailableListener({ reader ->
      Log.i("VideoPipeline", "ImageReader::onImageAvailable!")
      val image = reader.acquireNextImage() ?: return@setOnImageAvailableListener
      // TODO: Get correct orientation and isMirrored
      val frame = Frame(image, image.timestamp, Orientation.PORTRAIT, isMirrored)
      frame.incrementRefCount()
      frameProcessor?.call(frame)
      frame.decrementRefCount()
    }, CameraQueues.videoQueue.handler)
    return imageReader
  }
  /**
   * Configures the Pipeline to also call the given [FrameProcessor] (or null).
@ -154,28 +170,11 @@ class VideoPipeline(val width: Int, val height: Int, val format: Int = ImageForm
    synchronized(this) {
      Log.i(TAG, "Setting $width x $height FrameProcessor Output...")
      this.frameProcessor = frameProcessor
      if (frameProcessor != null) {
        if (this.imageReader == null) {
          // 1. Create new ImageReader that just calls the Frame Processor
          this.imageReader = getImageReader()
        }
        // 2. Configure OpenGL pipeline to stream Frames into the ImageReader's surface
        setFrameProcessorOutputSurface(imageReader!!.surface)
      } else {
        // 1. Configure OpenGL pipeline to stop streaming Frames into the ImageReader's surface
        removeFrameProcessorOutputSurface()
        // 2. Close the ImageReader
        this.imageReader?.close()
        this.imageReader = null
      }
    }
  }
  /**
-   * Configures the Pipeline to also write Frames to a Surface from a [MediaRecorder] (or null)
+   * Configures the Pipeline to also write Frames to a Surface from a `MediaRecorder` (or null)
   */
  fun setRecordingSessionOutput(recordingSession: RecordingSession?) {
    synchronized(this) {
@ -195,8 +194,6 @@ class VideoPipeline(val width: Int, val height: Int, val format: Int = ImageForm
  private external fun getInputTextureId(): Int
  private external fun onBeforeFrame()
  private external fun onFrame(transformMatrix: FloatArray)
  private external fun setFrameProcessorOutputSurface(surface: Any)
  private external fun removeFrameProcessorOutputSurface()
  private external fun setRecordingSessionOutputSurface(surface: Any)
  private external fun removeRecordingSessionOutputSurface()
  private external fun initHybrid(width: Int, height: Int): HybridData
--- a/package/android/src/main/java/com/mrousavy/camera/core/outputs/CameraOutputs.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/outputs/CameraOutputs.kt
@ -16,6 +16,7 @@ import com.mrousavy.camera.extensions.getPhotoSizes
 import com.mrousavy.camera.extensions.getPreviewTargetSize
 import com.mrousavy.camera.extensions.getVideoSizes
 import com.mrousavy.camera.extensions.smaller
 import com.mrousavy.camera.parsers.PixelFormat
 import java.io.Closeable
 class CameraOutputs(
@ -38,7 +39,7 @@ class CameraOutputs(
    val targetSize: Size? = null,
    val enableRecording: Boolean = false,
    val enableFrameProcessor: Boolean? = false,
-    val format: Int = ImageFormat.PRIVATE
+    val format: PixelFormat = PixelFormat.NATIVE
  )
  interface Callback {
@ -134,8 +135,11 @@ class CameraOutputs(
    // Video output: High resolution repeating images (startRecording() or useFrameProcessor())
    if (video != null) {
-      val size = characteristics.getVideoSizes(cameraId, video.format).closestToOrMax(video.targetSize)
+      // TODO: Should this be dynamic?
-      val videoPipeline = VideoPipeline(size.width, size.height, video.format, isMirrored)
+      val format = ImageFormat.YUV_420_888
      val size = characteristics.getVideoSizes(cameraId, format).closestToOrMax(video.targetSize)
      val enableFrameProcessor = video.enableFrameProcessor ?: false
      val videoPipeline = VideoPipeline(size.width, size.height, video.format, isMirrored, enableFrameProcessor)
      Log.i(TAG, "Adding ${size.width}x${size.height} video output. (Format: ${video.format})")
      videoOutput = VideoPipelineOutput(videoPipeline, SurfaceOutput.OutputType.VIDEO)
--- a/package/android/src/main/java/com/mrousavy/camera/parsers/PixelFormat.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/parsers/PixelFormat.kt
@ -7,15 +7,13 @@ import com.mrousavy.camera.PixelFormatNotSupportedError
 enum class PixelFormat(override val unionValue: String) : JSUnionValue {
  YUV("yuv"),
  RGB("rgb"),
  DNG("dng"),
  NATIVE("native"),
  UNKNOWN("unknown");
  fun toImageFormat(): Int {
    val result = when (this) {
      YUV -> ImageFormat.YUV_420_888
-      RGB -> ImageFormat.JPEG
+      RGB -> android.graphics.PixelFormat.RGBA_8888
      DNG -> ImageFormat.RAW_SENSOR
      NATIVE -> ImageFormat.PRIVATE
      UNKNOWN -> null
    }
@ -29,8 +27,7 @@ enum class PixelFormat(override val unionValue: String) : JSUnionValue {
    fun fromImageFormat(imageFormat: Int): PixelFormat =
      when (imageFormat) {
        ImageFormat.YUV_420_888 -> YUV
-        ImageFormat.JPEG, ImageFormat.DEPTH_JPEG -> RGB
+        android.graphics.PixelFormat.RGBA_8888 -> RGB
        ImageFormat.RAW_SENSOR -> DNG
        ImageFormat.PRIVATE -> NATIVE
        else -> UNKNOWN
      }
@ -39,7 +36,6 @@ enum class PixelFormat(override val unionValue: String) : JSUnionValue {
      when (unionValue) {
        "yuv" -> YUV
        "rgb" -> RGB
        "dng" -> DNG
        "native" -> NATIVE
        "unknown" -> UNKNOWN
        else -> null