From 954b44810b08b06f3940fd25a703b52cc4897530 Mon Sep 17 00:00:00 2001
From: Marc Rousavy <me@mrousavy.com>
Date: Fri, 29 Sep 2023 21:52:19 +0200
Subject: [PATCH] feat: Route images through `ImageWriter` into OpenGL pipeline
 (#1874)

* feat: Route images through `ImageWriter` into OpenGL pipeline

* fix: Use RGB format

* fix: Every device supports YUV, RGB and NATIVE

* Update VideoPipeline.kt

* log format

* Plug ImageReader between OpenGL pipeline

* Call Frame Processor

* Format

* Remove logs
---
 .../android/src/main/cpp/VideoPipeline.cpp    |  22 ----
 package/android/src/main/cpp/VideoPipeline.h  |   5 -
 .../java/com/mrousavy/camera/CameraView.kt    |   2 +-
 .../main/java/com/mrousavy/camera/Errors.kt   |   2 +
 .../camera/core/CameraDeviceDetails.kt        |  15 +--
 .../com/mrousavy/camera/core/VideoPipeline.kt | 123 +++++++++---------
 .../camera/core/outputs/CameraOutputs.kt      |  10 +-
 .../mrousavy/camera/parsers/PixelFormat.kt    |   8 +-
 8 files changed, 77 insertions(+), 110 deletions(-)
diff --git a/package/android/src/main/cpp/VideoPipeline.cpp b/package/android/src/main/cpp/VideoPipeline.cpp
index 387e2fc..8160daa 100644
--- a/package/android/src/main/cpp/VideoPipeline.cpp
+++ b/package/android/src/main/cpp/VideoPipeline.cpp
@@ -31,7 +31,6 @@ VideoPipeline::VideoPipeline(jni::alias_ref<jhybridobject> jThis, int width, int
 
 VideoPipeline::~VideoPipeline() {
   // 1. Remove output surfaces
-  removeFrameProcessorOutputSurface();
   removeRecordingSessionOutputSurface();
   // 2. Delete the input textures
   if (_inputTexture != std::nullopt) {
@@ -42,21 +41,6 @@ VideoPipeline::~VideoPipeline() {
   _context = nullptr;
 }
 
-void VideoPipeline::removeFrameProcessorOutputSurface() {
-  if (_frameProcessorOutput)
-    _frameProcessorOutput->destroy();
-  _frameProcessorOutput = nullptr;
-}
-
-void VideoPipeline::setFrameProcessorOutputSurface(jobject surface) {
-  // 1. Delete existing output surface
-  removeFrameProcessorOutputSurface();
-
-  // 2. Set new output surface if it is not null
-  ANativeWindow* window = ANativeWindow_fromSurface(jni::Environment::current(), surface);
-  _frameProcessorOutput = OpenGLRenderer::CreateWithWindowSurface(_context, window);
-}
-
 void VideoPipeline::removeRecordingSessionOutputSurface() {
   if (_recordingSessionOutput)
     _recordingSessionOutput->destroy();
@@ -93,10 +77,6 @@ void VideoPipeline::onFrame(jni::alias_ref<jni::JArrayFloat> transformMatrixPara
 
   OpenGLTexture& texture = _inputTexture.value();
 
-  if (_frameProcessorOutput) {
-    __android_log_print(ANDROID_LOG_INFO, TAG, "Rendering to FrameProcessor..");
-    _frameProcessorOutput->renderTextureToSurface(texture, transformMatrix);
-  }
   if (_recordingSessionOutput) {
     __android_log_print(ANDROID_LOG_INFO, TAG, "Rendering to RecordingSession..");
     _recordingSessionOutput->renderTextureToSurface(texture, transformMatrix);
@@ -106,8 +86,6 @@ void VideoPipeline::onFrame(jni::alias_ref<jni::JArrayFloat> transformMatrixPara
 void VideoPipeline::registerNatives() {
   registerHybrid({
       makeNativeMethod("initHybrid", VideoPipeline::initHybrid),
-      makeNativeMethod("setFrameProcessorOutputSurface", VideoPipeline::setFrameProcessorOutputSurface),
-      makeNativeMethod("removeFrameProcessorOutputSurface", VideoPipeline::removeFrameProcessorOutputSurface),
       makeNativeMethod("setRecordingSessionOutputSurface", VideoPipeline::setRecordingSessionOutputSurface),
       makeNativeMethod("removeRecordingSessionOutputSurface", VideoPipeline::removeRecordingSessionOutputSurface),
       makeNativeMethod("getInputTextureId", VideoPipeline::getInputTextureId),
diff --git a/package/android/src/main/cpp/VideoPipeline.h b/package/android/src/main/cpp/VideoPipeline.h
index ca2551f..67f0725 100644
--- a/package/android/src/main/cpp/VideoPipeline.h
+++ b/package/android/src/main/cpp/VideoPipeline.h
@@ -30,10 +30,6 @@ public:
   // -> SurfaceTexture input
   int getInputTextureId();
 
-  // <- Frame Processor output
-  void setFrameProcessorOutputSurface(jobject surface);
-  void removeFrameProcessorOutputSurface();
-
   // <- MediaRecorder output
   void setRecordingSessionOutputSurface(jobject surface);
   void removeRecordingSessionOutputSurface();
@@ -54,7 +50,6 @@ private:
 
   // Output Contexts
   std::shared_ptr<OpenGLContext> _context = nullptr;
-  std::unique_ptr<OpenGLRenderer> _frameProcessorOutput = nullptr;
   std::unique_ptr<OpenGLRenderer> _recordingSessionOutput = nullptr;
 
 private:
diff --git a/package/android/src/main/java/com/mrousavy/camera/CameraView.kt b/package/android/src/main/java/com/mrousavy/camera/CameraView.kt
index 42bd26a..0fdf5b8 100644
--- a/package/android/src/main/java/com/mrousavy/camera/CameraView.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/CameraView.kt
@@ -210,7 +210,7 @@ class CameraView(context: Context) : FrameLayout(context) {
         null
       }
       val videoOutput = if (video == true || enableFrameProcessor) {
-        CameraOutputs.VideoOutput(targetVideoSize, video == true, enableFrameProcessor, pixelFormat.toImageFormat())
+        CameraOutputs.VideoOutput(targetVideoSize, video == true, enableFrameProcessor, pixelFormat)
       } else {
         null
       }
diff --git a/package/android/src/main/java/com/mrousavy/camera/Errors.kt b/package/android/src/main/java/com/mrousavy/camera/Errors.kt
index e87745b..0cdc043 100644
--- a/package/android/src/main/java/com/mrousavy/camera/Errors.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/Errors.kt
@@ -66,6 +66,8 @@ class CameraSessionCannotBeConfiguredError(cameraId: String, outputs: CameraOutp
   CameraError("session", "cannot-create-session", "Failed to create a Camera Session for Camera $cameraId! Outputs: $outputs")
 class CameraDisconnectedError(cameraId: String, error: CameraDeviceError) :
   CameraError("session", "camera-has-been-disconnected", "The given Camera device (id: $cameraId) has been disconnected! Error: $error")
+class FrameProcessorsUnavailableError(reason: String) :
+  CameraError("system", "frame-processors-unavailable", "Frame Processors are unavailable! Reason: $reason")
 
 class VideoNotEnabledError :
   CameraError("capture", "video-not-enabled", "Video capture is disabled! Pass `video={true}` to enable video recordings.")
diff --git a/package/android/src/main/java/com/mrousavy/camera/core/CameraDeviceDetails.kt b/package/android/src/main/java/com/mrousavy/camera/core/CameraDeviceDetails.kt
index 015f3c0..200d932 100644
--- a/package/android/src/main/java/com/mrousavy/camera/core/CameraDeviceDetails.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/CameraDeviceDetails.kt
@@ -157,16 +157,11 @@ class CameraDeviceDetails(private val cameraManager: CameraManager, private val
     return array
   }
 
-  private fun createPixelFormats(size: Size): ReadableArray {
-    val formats = cameraConfig.outputFormats
+  private fun createPixelFormats(): ReadableArray {
+    // Every output in Camera2 supports YUV and NATIVE
     val array = Arguments.createArray()
-    formats.forEach { format ->
-      val sizes = cameraConfig.getOutputSizes(format)
-      val hasSize = sizes.any { it.width == size.width && it.height == size.height }
-      if (hasSize) {
-        array.pushString(PixelFormat.fromImageFormat(format).unionValue)
-      }
-    }
+    array.pushString(PixelFormat.YUV.unionValue)
+    array.pushString(PixelFormat.NATIVE.unionValue)
     return array
   }
 
@@ -186,7 +181,7 @@ class CameraDeviceDetails(private val cameraManager: CameraManager, private val
     map.putBoolean("supportsDepthCapture", supportsDepthCapture)
     map.putString("autoFocusSystem", "contrast-detection") // TODO: Is this wrong?
     map.putArray("videoStabilizationModes", createStabilizationModes())
-    map.putArray("pixelFormats", createPixelFormats(videoSize))
+    map.putArray("pixelFormats", createPixelFormats())
     return map
   }
 
diff --git a/package/android/src/main/java/com/mrousavy/camera/core/VideoPipeline.kt b/package/android/src/main/java/com/mrousavy/camera/core/VideoPipeline.kt
index 16cca60..c335a17 100644
--- a/package/android/src/main/java/com/mrousavy/camera/core/VideoPipeline.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/VideoPipeline.kt
@@ -2,12 +2,15 @@ package com.mrousavy.camera.core
 
 import android.graphics.ImageFormat
 import android.graphics.SurfaceTexture
+import android.hardware.HardwareBuffer
 import android.media.ImageReader
+import android.media.ImageWriter
+import android.os.Build
 import android.util.Log
 import android.view.Surface
 import com.facebook.jni.HybridData
 import com.mrousavy.camera.CameraQueues
-import com.mrousavy.camera.PixelFormatNotSupportedInVideoPipelineError
+import com.mrousavy.camera.FrameProcessorsUnavailableError
 import com.mrousavy.camera.frameprocessor.Frame
 import com.mrousavy.camera.frameprocessor.FrameProcessor
 import com.mrousavy.camera.parsers.Orientation
@@ -16,15 +19,20 @@ import java.io.Closeable
 
 /**
  * An OpenGL pipeline for streaming Camera Frames to one or more outputs.
- * Currently, [VideoPipeline] can stream to a [FrameProcessor] and a [MediaRecorder].
+ * Currently, [VideoPipeline] can stream to a [FrameProcessor] and a [RecordingSession].
  *
  * @param [width] The width of the Frames to stream (> 0)
  * @param [height] The height of the Frames to stream (> 0)
  * @param [format] The format of the Frames to stream. ([ImageFormat.PRIVATE], [ImageFormat.YUV_420_888] or [ImageFormat.JPEG])
  */
 @Suppress("KotlinJniMissingFunction")
-class VideoPipeline(val width: Int, val height: Int, val format: Int = ImageFormat.PRIVATE, private val isMirrored: Boolean = false) :
-  SurfaceTexture.OnFrameAvailableListener,
+class VideoPipeline(
+  val width: Int,
+  val height: Int,
+  val format: PixelFormat = PixelFormat.NATIVE,
+  private val isMirrored: Boolean = false,
+  enableFrameProcessor: Boolean = false
+) : SurfaceTexture.OnFrameAvailableListener,
   Closeable {
   companion object {
     private const val MAX_IMAGES = 3
@@ -52,7 +60,6 @@ class VideoPipeline(val width: Int, val height: Int, val format: Int = ImageForm
 
   // Output 1
   private var frameProcessor: FrameProcessor? = null
-  private var imageReader: ImageReader? = null
 
   // Output 2
   private var recordingSession: RecordingSession? = null
@@ -61,36 +68,61 @@ class VideoPipeline(val width: Int, val height: Int, val format: Int = ImageForm
   private val surfaceTexture: SurfaceTexture
   val surface: Surface
 
+  // If Frame Processors are enabled, we go through ImageReader first before we go thru OpenGL
+  private var imageReader: ImageReader? = null
+  private var imageWriter: ImageWriter? = null
+
   init {
     Log.i(
       TAG,
-      "Initializing $width x $height Video Pipeline " +
-        "(format: ${PixelFormat.fromImageFormat(format)} #$format)"
+      "Initializing $width x $height Video Pipeline (format: $format)"
     )
-    // TODO: We currently use OpenGL for the Video Pipeline.
-    //  OpenGL only works in the RGB (RGBA_8888; 0x23) pixel-format, so we cannot
-    //  override the pixel-format to something like YUV or PRIVATE.
-    //  This absolutely sucks and I would prefer to replace the OpenGL pipeline with
-    //  something similar to how iOS works where we just pass GPU buffers around,
-    //  but android.media APIs are just not as advanced yet.
-    //  For example, ImageReader/ImageWriter is way too buggy and does not work with MediaRecorder.
-    //  See this issue ($4.000 bounty!) for more details:
-    //  https://github.com/mrousavy/react-native-vision-camera/issues/1837
-    if (format != ImageFormat.PRIVATE && format != 0x23) {
-      throw PixelFormatNotSupportedInVideoPipelineError(PixelFormat.fromImageFormat(format).unionValue)
-    }
     mHybridData = initHybrid(width, height)
     surfaceTexture = SurfaceTexture(false)
     surfaceTexture.setDefaultBufferSize(width, height)
     surfaceTexture.setOnFrameAvailableListener(this)
-    surface = Surface(surfaceTexture)
+    val glSurface = Surface(surfaceTexture)
+
+    if (enableFrameProcessor) {
+      // User has passed a Frame Processor, we need to route images through ImageReader so we can get
+      // CPU access to the Frames, then send them to the OpenGL pipeline later.
+      if (Build.VERSION.SDK_INT < Build.VERSION_CODES.Q) {
+        throw FrameProcessorsUnavailableError("Frame Processors require API 29 or higher. (Q)")
+      }
+      // GPU_SAMPLED because we redirect to OpenGL, CPU_READ because we read pixels before that.
+      val usage = HardwareBuffer.USAGE_GPU_SAMPLED_IMAGE or HardwareBuffer.USAGE_CPU_READ_OFTEN
+      val format = getImageReaderFormat()
+      Log.i(TAG, "Using ImageReader round-trip (format: #$format)")
+      imageWriter = ImageWriter.newInstance(glSurface, MAX_IMAGES, format)
+      imageReader = ImageReader.newInstance(width, height, format, MAX_IMAGES, usage)
+      imageReader!!.setOnImageAvailableListener({ reader ->
+        Log.i(TAG, "ImageReader::onImageAvailable!")
+        val image = reader.acquireNextImage() ?: return@setOnImageAvailableListener
+
+        Log.i(TAG, "Image Format: ${image.format}")
+
+        // // TODO: Get correct orientation and isMirrored
+        val frame = Frame(image, image.timestamp, Orientation.PORTRAIT, isMirrored)
+        frame.incrementRefCount()
+        frameProcessor?.call(frame)
+
+        imageWriter!!.queueInputImage(image)
+
+        frame.decrementRefCount()
+      }, CameraQueues.videoQueue.handler)
+
+      surface = imageReader!!.surface
+    } else {
+      // No Frame Processor will be used, directly render into the OpenGL pipeline to avoid ImageReader roundtrip.
+      surface = glSurface
+    }
   }
 
   override fun close() {
     synchronized(this) {
       isActive = false
+      imageWriter?.close()
       imageReader?.close()
-      imageReader = null
       frameProcessor = null
       recordingSession = null
       surfaceTexture.release()
@@ -123,29 +155,13 @@ class VideoPipeline(val width: Int, val height: Int, val format: Int = ImageForm
     }
   }
 
-  private fun getImageReader(): ImageReader {
-    if (format != ImageFormat.PRIVATE) {
-      Log.w(
-        TAG,
-        "Warning: pixelFormat \"${PixelFormat.fromImageFormat(format).unionValue}\" might " +
-          "not be supported on this device because the C++ OpenGL GPU Video Pipeline operates in RGBA_8888. " +
-          "I wanted to use an ImageReader -> ImageWriter setup for this, but I couldn't get it to work. " +
-          "See this PR for more details: https://github.com/mrousavy/react-native-vision-camera/pull/1836"
-      )
+  private fun getImageReaderFormat(): Int =
+    when (format) {
+      PixelFormat.NATIVE -> ImageFormat.PRIVATE
+      PixelFormat.RGB -> HardwareBuffer.RGBA_8888
+      PixelFormat.YUV -> ImageFormat.YUV_420_888
+      else -> ImageFormat.PRIVATE
     }
-    val imageReader = ImageReader.newInstance(width, height, format, MAX_IMAGES)
-    imageReader.setOnImageAvailableListener({ reader ->
-      Log.i("VideoPipeline", "ImageReader::onImageAvailable!")
-      val image = reader.acquireNextImage() ?: return@setOnImageAvailableListener
-
-      // TODO: Get correct orientation and isMirrored
-      val frame = Frame(image, image.timestamp, Orientation.PORTRAIT, isMirrored)
-      frame.incrementRefCount()
-      frameProcessor?.call(frame)
-      frame.decrementRefCount()
-    }, CameraQueues.videoQueue.handler)
-    return imageReader
-  }
 
   /**
    * Configures the Pipeline to also call the given [FrameProcessor] (or null).
@@ -154,28 +170,11 @@ class VideoPipeline(val width: Int, val height: Int, val format: Int = ImageForm
     synchronized(this) {
       Log.i(TAG, "Setting $width x $height FrameProcessor Output...")
       this.frameProcessor = frameProcessor
-
-      if (frameProcessor != null) {
-        if (this.imageReader == null) {
-          // 1. Create new ImageReader that just calls the Frame Processor
-          this.imageReader = getImageReader()
-        }
-
-        // 2. Configure OpenGL pipeline to stream Frames into the ImageReader's surface
-        setFrameProcessorOutputSurface(imageReader!!.surface)
-      } else {
-        // 1. Configure OpenGL pipeline to stop streaming Frames into the ImageReader's surface
-        removeFrameProcessorOutputSurface()
-
-        // 2. Close the ImageReader
-        this.imageReader?.close()
-        this.imageReader = null
-      }
     }
   }
 
   /**
-   * Configures the Pipeline to also write Frames to a Surface from a [MediaRecorder] (or null)
+   * Configures the Pipeline to also write Frames to a Surface from a `MediaRecorder` (or null)
    */
   fun setRecordingSessionOutput(recordingSession: RecordingSession?) {
     synchronized(this) {
@@ -195,8 +194,6 @@ class VideoPipeline(val width: Int, val height: Int, val format: Int = ImageForm
   private external fun getInputTextureId(): Int
   private external fun onBeforeFrame()
   private external fun onFrame(transformMatrix: FloatArray)
-  private external fun setFrameProcessorOutputSurface(surface: Any)
-  private external fun removeFrameProcessorOutputSurface()
   private external fun setRecordingSessionOutputSurface(surface: Any)
   private external fun removeRecordingSessionOutputSurface()
   private external fun initHybrid(width: Int, height: Int): HybridData
diff --git a/package/android/src/main/java/com/mrousavy/camera/core/outputs/CameraOutputs.kt b/package/android/src/main/java/com/mrousavy/camera/core/outputs/CameraOutputs.kt
index a485a43..866d8ae 100644
--- a/package/android/src/main/java/com/mrousavy/camera/core/outputs/CameraOutputs.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/core/outputs/CameraOutputs.kt
@@ -16,6 +16,7 @@ import com.mrousavy.camera.extensions.getPhotoSizes
 import com.mrousavy.camera.extensions.getPreviewTargetSize
 import com.mrousavy.camera.extensions.getVideoSizes
 import com.mrousavy.camera.extensions.smaller
+import com.mrousavy.camera.parsers.PixelFormat
 import java.io.Closeable
 
 class CameraOutputs(
@@ -38,7 +39,7 @@ class CameraOutputs(
     val targetSize: Size? = null,
     val enableRecording: Boolean = false,
     val enableFrameProcessor: Boolean? = false,
-    val format: Int = ImageFormat.PRIVATE
+    val format: PixelFormat = PixelFormat.NATIVE
   )
 
   interface Callback {
@@ -134,8 +135,11 @@ class CameraOutputs(
 
     // Video output: High resolution repeating images (startRecording() or useFrameProcessor())
     if (video != null) {
-      val size = characteristics.getVideoSizes(cameraId, video.format).closestToOrMax(video.targetSize)
-      val videoPipeline = VideoPipeline(size.width, size.height, video.format, isMirrored)
+      // TODO: Should this be dynamic?
+      val format = ImageFormat.YUV_420_888
+      val size = characteristics.getVideoSizes(cameraId, format).closestToOrMax(video.targetSize)
+      val enableFrameProcessor = video.enableFrameProcessor ?: false
+      val videoPipeline = VideoPipeline(size.width, size.height, video.format, isMirrored, enableFrameProcessor)
 
       Log.i(TAG, "Adding ${size.width}x${size.height} video output. (Format: ${video.format})")
       videoOutput = VideoPipelineOutput(videoPipeline, SurfaceOutput.OutputType.VIDEO)
diff --git a/package/android/src/main/java/com/mrousavy/camera/parsers/PixelFormat.kt b/package/android/src/main/java/com/mrousavy/camera/parsers/PixelFormat.kt
index af617fd..9320dd2 100644
--- a/package/android/src/main/java/com/mrousavy/camera/parsers/PixelFormat.kt
+++ b/package/android/src/main/java/com/mrousavy/camera/parsers/PixelFormat.kt
@@ -7,15 +7,13 @@ import com.mrousavy.camera.PixelFormatNotSupportedError
 enum class PixelFormat(override val unionValue: String) : JSUnionValue {
   YUV("yuv"),
   RGB("rgb"),
-  DNG("dng"),
   NATIVE("native"),
   UNKNOWN("unknown");
 
   fun toImageFormat(): Int {
     val result = when (this) {
       YUV -> ImageFormat.YUV_420_888
-      RGB -> ImageFormat.JPEG
-      DNG -> ImageFormat.RAW_SENSOR
+      RGB -> android.graphics.PixelFormat.RGBA_8888
       NATIVE -> ImageFormat.PRIVATE
       UNKNOWN -> null
     }
@@ -29,8 +27,7 @@ enum class PixelFormat(override val unionValue: String) : JSUnionValue {
     fun fromImageFormat(imageFormat: Int): PixelFormat =
       when (imageFormat) {
         ImageFormat.YUV_420_888 -> YUV
-        ImageFormat.JPEG, ImageFormat.DEPTH_JPEG -> RGB
-        ImageFormat.RAW_SENSOR -> DNG
+        android.graphics.PixelFormat.RGBA_8888 -> RGB
         ImageFormat.PRIVATE -> NATIVE
         else -> UNKNOWN
       }
@@ -39,7 +36,6 @@ enum class PixelFormat(override val unionValue: String) : JSUnionValue {
       when (unionValue) {
         "yuv" -> YUV
         "rgb" -> RGB
-        "dng" -> DNG
         "native" -> NATIVE
         "unknown" -> UNKNOWN
         else -> null