feat: Route images through ImageWriter into OpenGL pipeline (#1874)

* feat: Route images through `ImageWriter` into OpenGL pipeline * fix: Use RGB format * fix: Every device supports YUV, RGB and NATIVE * Update VideoPipeline.kt * log format * Plug ImageReader between OpenGL pipeline * Call Frame Processor * Format * Remove logs
2023-09-29 21:52:19 +02:00 · 2023-09-29 21:52:19 +02:00 · 954b44810b
commit 954b44810b
parent a1fbba8d66
8 changed files with 77 additions and 110 deletions
--- a/package/android/src/main/cpp/VideoPipeline.cpp
+++ b/package/android/src/main/cpp/VideoPipeline.cpp
@ -31,7 +31,6 @@ VideoPipeline::VideoPipeline(jni::alias_ref<jhybridobject> jThis, int width, int

 VideoPipeline::~VideoPipeline() {
  // 1. Remove output surfaces
-  removeFrameProcessorOutputSurface();
  removeRecordingSessionOutputSurface();
  // 2. Delete the input textures
  if (_inputTexture != std::nullopt) {
@ -42,21 +41,6 @@ VideoPipeline::~VideoPipeline() {
  _context = nullptr;
 }

-void VideoPipeline::removeFrameProcessorOutputSurface() {
-  if (_frameProcessorOutput)
-    _frameProcessorOutput->destroy();
-  _frameProcessorOutput = nullptr;
-}
-
-void VideoPipeline::setFrameProcessorOutputSurface(jobject surface) {
-  // 1. Delete existing output surface
-  removeFrameProcessorOutputSurface();
-
-  // 2. Set new output surface if it is not null
-  ANativeWindow* window = ANativeWindow_fromSurface(jni::Environment::current(), surface);
-  _frameProcessorOutput = OpenGLRenderer::CreateWithWindowSurface(_context, window);
-}
-
 void VideoPipeline::removeRecordingSessionOutputSurface() {
  if (_recordingSessionOutput)
    _recordingSessionOutput->destroy();
@ -93,10 +77,6 @@ void VideoPipeline::onFrame(jni::alias_ref<jni::JArrayFloat> transformMatrixPara

  OpenGLTexture& texture = _inputTexture.value();

-  if (_frameProcessorOutput) {
-    __android_log_print(ANDROID_LOG_INFO, TAG, "Rendering to FrameProcessor..");
-    _frameProcessorOutput->renderTextureToSurface(texture, transformMatrix);
-  }
  if (_recordingSessionOutput) {
    __android_log_print(ANDROID_LOG_INFO, TAG, "Rendering to RecordingSession..");
    _recordingSessionOutput->renderTextureToSurface(texture, transformMatrix);
@ -106,8 +86,6 @@ void VideoPipeline::onFrame(jni::alias_ref<jni::JArrayFloat> transformMatrixPara
 void VideoPipeline::registerNatives() {
  registerHybrid({
      makeNativeMethod("initHybrid", VideoPipeline::initHybrid),
-      makeNativeMethod("setFrameProcessorOutputSurface", VideoPipeline::setFrameProcessorOutputSurface),
-      makeNativeMethod("removeFrameProcessorOutputSurface", VideoPipeline::removeFrameProcessorOutputSurface),
      makeNativeMethod("setRecordingSessionOutputSurface", VideoPipeline::setRecordingSessionOutputSurface),
      makeNativeMethod("removeRecordingSessionOutputSurface", VideoPipeline::removeRecordingSessionOutputSurface),
      makeNativeMethod("getInputTextureId", VideoPipeline::getInputTextureId),
--- a/package/android/src/main/cpp/VideoPipeline.h
+++ b/package/android/src/main/cpp/VideoPipeline.h
@ -30,10 +30,6 @@ public:
  // -> SurfaceTexture input
  int getInputTextureId();

-  // <- Frame Processor output
-  void setFrameProcessorOutputSurface(jobject surface);
-  void removeFrameProcessorOutputSurface();
-
  // <- MediaRecorder output
  void setRecordingSessionOutputSurface(jobject surface);
  void removeRecordingSessionOutputSurface();
@ -54,7 +50,6 @@ private:

  // Output Contexts
  std::shared_ptr<OpenGLContext> _context = nullptr;
-  std::unique_ptr<OpenGLRenderer> _frameProcessorOutput = nullptr;
  std::unique_ptr<OpenGLRenderer> _recordingSessionOutput = nullptr;

 private:
--- a/package/android/src/main/java/com/mrousavy/camera/CameraView.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/CameraView.kt
@ -210,7 +210,7 @@ class CameraView(context: Context) : FrameLayout(context) {
        null
      }
      val videoOutput = if (video == true || enableFrameProcessor) {
-        CameraOutputs.VideoOutput(targetVideoSize, video == true, enableFrameProcessor, pixelFormat.toImageFormat())
+        CameraOutputs.VideoOutput(targetVideoSize, video == true, enableFrameProcessor, pixelFormat)
      } else {
        null
      }
--- a/package/android/src/main/java/com/mrousavy/camera/Errors.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/Errors.kt
@ -66,6 +66,8 @@ class CameraSessionCannotBeConfiguredError(cameraId: String, outputs: CameraOutp
  CameraError("session", "cannot-create-session", "Failed to create a Camera Session for Camera $cameraId! Outputs: $outputs")
 class CameraDisconnectedError(cameraId: String, error: CameraDeviceError) :
  CameraError("session", "camera-has-been-disconnected", "The given Camera device (id: $cameraId) has been disconnected! Error: $error")
+class FrameProcessorsUnavailableError(reason: String) :
+  CameraError("system", "frame-processors-unavailable", "Frame Processors are unavailable! Reason: $reason")

 class VideoNotEnabledError :
  CameraError("capture", "video-not-enabled", "Video capture is disabled! Pass `video={true}` to enable video recordings.")
--- a/package/android/src/main/java/com/mrousavy/camera/core/CameraDeviceDetails.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/CameraDeviceDetails.kt
@ -157,16 +157,11 @@ class CameraDeviceDetails(private val cameraManager: CameraManager, private val
    return array
  }

-  private fun createPixelFormats(size: Size): ReadableArray {
-    val formats = cameraConfig.outputFormats
+  private fun createPixelFormats(): ReadableArray {
+    // Every output in Camera2 supports YUV and NATIVE
    val array = Arguments.createArray()
-    formats.forEach { format ->
-      val sizes = cameraConfig.getOutputSizes(format)
-      val hasSize = sizes.any { it.width == size.width && it.height == size.height }
-      if (hasSize) {
-        array.pushString(PixelFormat.fromImageFormat(format).unionValue)
-      }
-    }
+    array.pushString(PixelFormat.YUV.unionValue)
+    array.pushString(PixelFormat.NATIVE.unionValue)
    return array
  }

@ -186,7 +181,7 @@ class CameraDeviceDetails(private val cameraManager: CameraManager, private val
    map.putBoolean("supportsDepthCapture", supportsDepthCapture)
    map.putString("autoFocusSystem", "contrast-detection") // TODO: Is this wrong?
    map.putArray("videoStabilizationModes", createStabilizationModes())
-    map.putArray("pixelFormats", createPixelFormats(videoSize))
+    map.putArray("pixelFormats", createPixelFormats())
    return map
  }

--- a/package/android/src/main/java/com/mrousavy/camera/core/VideoPipeline.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/VideoPipeline.kt
@ -2,12 +2,15 @@ package com.mrousavy.camera.core

 import android.graphics.ImageFormat
 import android.graphics.SurfaceTexture
+import android.hardware.HardwareBuffer
 import android.media.ImageReader
+import android.media.ImageWriter
+import android.os.Build
 import android.util.Log
 import android.view.Surface
 import com.facebook.jni.HybridData
 import com.mrousavy.camera.CameraQueues
-import com.mrousavy.camera.PixelFormatNotSupportedInVideoPipelineError
+import com.mrousavy.camera.FrameProcessorsUnavailableError
 import com.mrousavy.camera.frameprocessor.Frame
 import com.mrousavy.camera.frameprocessor.FrameProcessor
 import com.mrousavy.camera.parsers.Orientation
@ -16,15 +19,20 @@ import java.io.Closeable

 /**
 * An OpenGL pipeline for streaming Camera Frames to one or more outputs.
- * Currently, [VideoPipeline] can stream to a [FrameProcessor] and a [MediaRecorder].
+ * Currently, [VideoPipeline] can stream to a [FrameProcessor] and a [RecordingSession].
 *
 * @param [width] The width of the Frames to stream (> 0)
 * @param [height] The height of the Frames to stream (> 0)
 * @param [format] The format of the Frames to stream. ([ImageFormat.PRIVATE], [ImageFormat.YUV_420_888] or [ImageFormat.JPEG])
 */
@Suppress("KotlinJniMissingFunction")
-class VideoPipeline(val width: Int, val height: Int, val format: Int = ImageFormat.PRIVATE, private val isMirrored: Boolean = false) :
-  SurfaceTexture.OnFrameAvailableListener,
+class VideoPipeline(
+  val width: Int,
+  val height: Int,
+  val format: PixelFormat = PixelFormat.NATIVE,
+  private val isMirrored: Boolean = false,
+  enableFrameProcessor: Boolean = false
+) : SurfaceTexture.OnFrameAvailableListener,
  Closeable {
  companion object {
    private const val MAX_IMAGES = 3
@ -52,7 +60,6 @@ class VideoPipeline(val width: Int, val height: Int, val format: Int = ImageForm

  // Output 1
  private var frameProcessor: FrameProcessor? = null
-  private var imageReader: ImageReader? = null

  // Output 2
  private var recordingSession: RecordingSession? = null
@ -61,36 +68,61 @@ class VideoPipeline(val width: Int, val height: Int, val format: Int = ImageForm
  private val surfaceTexture: SurfaceTexture
  val surface: Surface

+  // If Frame Processors are enabled, we go through ImageReader first before we go thru OpenGL
+  private var imageReader: ImageReader? = null
+  private var imageWriter: ImageWriter? = null
+
  init {
    Log.i(
      TAG,
-      "Initializing $width x $height Video Pipeline " +
-        "(format: ${PixelFormat.fromImageFormat(format)} #$format)"
+      "Initializing $width x $height Video Pipeline (format: $format)"
    )
-    // TODO: We currently use OpenGL for the Video Pipeline.
-    //  OpenGL only works in the RGB (RGBA_8888; 0x23) pixel-format, so we cannot
-    //  override the pixel-format to something like YUV or PRIVATE.
-    //  This absolutely sucks and I would prefer to replace the OpenGL pipeline with
-    //  something similar to how iOS works where we just pass GPU buffers around,
-    //  but android.media APIs are just not as advanced yet.
-    //  For example, ImageReader/ImageWriter is way too buggy and does not work with MediaRecorder.
-    //  See this issue ($4.000 bounty!) for more details:
-    //  https://github.com/mrousavy/react-native-vision-camera/issues/1837
-    if (format != ImageFormat.PRIVATE && format != 0x23) {
-      throw PixelFormatNotSupportedInVideoPipelineError(PixelFormat.fromImageFormat(format).unionValue)
-    }
    mHybridData = initHybrid(width, height)
    surfaceTexture = SurfaceTexture(false)
    surfaceTexture.setDefaultBufferSize(width, height)
    surfaceTexture.setOnFrameAvailableListener(this)
-    surface = Surface(surfaceTexture)
+    val glSurface = Surface(surfaceTexture)
+
+    if (enableFrameProcessor) {
+      // User has passed a Frame Processor, we need to route images through ImageReader so we can get
+      // CPU access to the Frames, then send them to the OpenGL pipeline later.
+      if (Build.VERSION.SDK_INT < Build.VERSION_CODES.Q) {
+        throw FrameProcessorsUnavailableError("Frame Processors require API 29 or higher. (Q)")
+      }
+      // GPU_SAMPLED because we redirect to OpenGL, CPU_READ because we read pixels before that.
+      val usage = HardwareBuffer.USAGE_GPU_SAMPLED_IMAGE or HardwareBuffer.USAGE_CPU_READ_OFTEN
+      val format = getImageReaderFormat()
+      Log.i(TAG, "Using ImageReader round-trip (format: #$format)")
+      imageWriter = ImageWriter.newInstance(glSurface, MAX_IMAGES, format)
+      imageReader = ImageReader.newInstance(width, height, format, MAX_IMAGES, usage)
+      imageReader!!.setOnImageAvailableListener({ reader ->
+        Log.i(TAG, "ImageReader::onImageAvailable!")
+        val image = reader.acquireNextImage() ?: return@setOnImageAvailableListener
+
+        Log.i(TAG, "Image Format: ${image.format}")
+
+        // // TODO: Get correct orientation and isMirrored
+        val frame = Frame(image, image.timestamp, Orientation.PORTRAIT, isMirrored)
+        frame.incrementRefCount()
+        frameProcessor?.call(frame)
+
+        imageWriter!!.queueInputImage(image)
+
+        frame.decrementRefCount()
+      }, CameraQueues.videoQueue.handler)
+
+      surface = imageReader!!.surface
+    } else {
+      // No Frame Processor will be used, directly render into the OpenGL pipeline to avoid ImageReader roundtrip.
+      surface = glSurface
+    }
  }

  override fun close() {
    synchronized(this) {
      isActive = false
+      imageWriter?.close()
      imageReader?.close()
-      imageReader = null
      frameProcessor = null
      recordingSession = null
      surfaceTexture.release()
@ -123,29 +155,13 @@ class VideoPipeline(val width: Int, val height: Int, val format: Int = ImageForm
    }
  }

-  private fun getImageReader(): ImageReader {
-    if (format != ImageFormat.PRIVATE) {
-      Log.w(
-        TAG,
-        "Warning: pixelFormat \"${PixelFormat.fromImageFormat(format).unionValue}\" might " +
-          "not be supported on this device because the C++ OpenGL GPU Video Pipeline operates in RGBA_8888. " +
-          "I wanted to use an ImageReader -> ImageWriter setup for this, but I couldn't get it to work. " +
-          "See this PR for more details: https://github.com/mrousavy/react-native-vision-camera/pull/1836"
-      )
+  private fun getImageReaderFormat(): Int =
+    when (format) {
+      PixelFormat.NATIVE -> ImageFormat.PRIVATE
+      PixelFormat.RGB -> HardwareBuffer.RGBA_8888
+      PixelFormat.YUV -> ImageFormat.YUV_420_888
+      else -> ImageFormat.PRIVATE
    }
-    val imageReader = ImageReader.newInstance(width, height, format, MAX_IMAGES)
-    imageReader.setOnImageAvailableListener({ reader ->
-      Log.i("VideoPipeline", "ImageReader::onImageAvailable!")
-      val image = reader.acquireNextImage() ?: return@setOnImageAvailableListener
-
-      // TODO: Get correct orientation and isMirrored
-      val frame = Frame(image, image.timestamp, Orientation.PORTRAIT, isMirrored)
-      frame.incrementRefCount()
-      frameProcessor?.call(frame)
-      frame.decrementRefCount()
-    }, CameraQueues.videoQueue.handler)
-    return imageReader
-  }

  /**
   * Configures the Pipeline to also call the given [FrameProcessor] (or null).
@ -154,28 +170,11 @@ class VideoPipeline(val width: Int, val height: Int, val format: Int = ImageForm
    synchronized(this) {
      Log.i(TAG, "Setting $width x $height FrameProcessor Output...")
      this.frameProcessor = frameProcessor
-
-      if (frameProcessor != null) {
-        if (this.imageReader == null) {
-          // 1. Create new ImageReader that just calls the Frame Processor
-          this.imageReader = getImageReader()
-        }
-
-        // 2. Configure OpenGL pipeline to stream Frames into the ImageReader's surface
-        setFrameProcessorOutputSurface(imageReader!!.surface)
-      } else {
-        // 1. Configure OpenGL pipeline to stop streaming Frames into the ImageReader's surface
-        removeFrameProcessorOutputSurface()
-
-        // 2. Close the ImageReader
-        this.imageReader?.close()
-        this.imageReader = null
-      }
    }
  }

  /**
-   * Configures the Pipeline to also write Frames to a Surface from a [MediaRecorder] (or null)
+   * Configures the Pipeline to also write Frames to a Surface from a `MediaRecorder` (or null)
   */
  fun setRecordingSessionOutput(recordingSession: RecordingSession?) {
    synchronized(this) {
@ -195,8 +194,6 @@ class VideoPipeline(val width: Int, val height: Int, val format: Int = ImageForm
  private external fun getInputTextureId(): Int
  private external fun onBeforeFrame()
  private external fun onFrame(transformMatrix: FloatArray)
-  private external fun setFrameProcessorOutputSurface(surface: Any)
-  private external fun removeFrameProcessorOutputSurface()
  private external fun setRecordingSessionOutputSurface(surface: Any)
  private external fun removeRecordingSessionOutputSurface()
  private external fun initHybrid(width: Int, height: Int): HybridData
--- a/package/android/src/main/java/com/mrousavy/camera/core/outputs/CameraOutputs.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/outputs/CameraOutputs.kt
@ -16,6 +16,7 @@ import com.mrousavy.camera.extensions.getPhotoSizes
 import com.mrousavy.camera.extensions.getPreviewTargetSize
 import com.mrousavy.camera.extensions.getVideoSizes
 import com.mrousavy.camera.extensions.smaller
+import com.mrousavy.camera.parsers.PixelFormat
 import java.io.Closeable

 class CameraOutputs(
@ -38,7 +39,7 @@ class CameraOutputs(
    val targetSize: Size? = null,
    val enableRecording: Boolean = false,
    val enableFrameProcessor: Boolean? = false,
-    val format: Int = ImageFormat.PRIVATE
+    val format: PixelFormat = PixelFormat.NATIVE
  )

  interface Callback {
@ -134,8 +135,11 @@ class CameraOutputs(

    // Video output: High resolution repeating images (startRecording() or useFrameProcessor())
    if (video != null) {
-      val size = characteristics.getVideoSizes(cameraId, video.format).closestToOrMax(video.targetSize)
-      val videoPipeline = VideoPipeline(size.width, size.height, video.format, isMirrored)
+      // TODO: Should this be dynamic?
+      val format = ImageFormat.YUV_420_888
+      val size = characteristics.getVideoSizes(cameraId, format).closestToOrMax(video.targetSize)
+      val enableFrameProcessor = video.enableFrameProcessor ?: false
+      val videoPipeline = VideoPipeline(size.width, size.height, video.format, isMirrored, enableFrameProcessor)

      Log.i(TAG, "Adding ${size.width}x${size.height} video output. (Format: ${video.format})")
      videoOutput = VideoPipelineOutput(videoPipeline, SurfaceOutput.OutputType.VIDEO)
--- a/package/android/src/main/java/com/mrousavy/camera/parsers/PixelFormat.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/parsers/PixelFormat.kt
@ -7,15 +7,13 @@ import com.mrousavy.camera.PixelFormatNotSupportedError
 enum class PixelFormat(override val unionValue: String) : JSUnionValue {
  YUV("yuv"),
  RGB("rgb"),
-  DNG("dng"),
  NATIVE("native"),
  UNKNOWN("unknown");

  fun toImageFormat(): Int {
    val result = when (this) {
      YUV -> ImageFormat.YUV_420_888
-      RGB -> ImageFormat.JPEG
-      DNG -> ImageFormat.RAW_SENSOR
+      RGB -> android.graphics.PixelFormat.RGBA_8888
      NATIVE -> ImageFormat.PRIVATE
      UNKNOWN -> null
    }
@ -29,8 +27,7 @@ enum class PixelFormat(override val unionValue: String) : JSUnionValue {
    fun fromImageFormat(imageFormat: Int): PixelFormat =
      when (imageFormat) {
        ImageFormat.YUV_420_888 -> YUV
-        ImageFormat.JPEG, ImageFormat.DEPTH_JPEG -> RGB
-        ImageFormat.RAW_SENSOR -> DNG
+        android.graphics.PixelFormat.RGBA_8888 -> RGB
        ImageFormat.PRIVATE -> NATIVE
        else -> UNKNOWN
      }
@ -39,7 +36,6 @@ enum class PixelFormat(override val unionValue: String) : JSUnionValue {
      when (unionValue) {
        "yuv" -> YUV
        "rgb" -> RGB
-        "dng" -> DNG
        "native" -> NATIVE
        "unknown" -> UNKNOWN
        else -> null