Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions extension/apple/BUCK
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ non_fbcode_target(_kind = fb_apple_library,
autoglob_mode = "EXPORT_UNLESS_INTERNAL",
extension_api_only = True,
frameworks = [
"CoreVideo",
"Foundation",
],
preprocessor_flags = [
Expand All @@ -29,11 +30,13 @@ non_fbcode_target(_kind = fb_apple_library,
visibility = EXECUTORCH_CLIENTS,
deps = select({
"ovr_config//os:macos": [
"//xplat/executorch/extension/image:image_processorAppleMac",
"//xplat/executorch/extension/module:moduleAppleMac",
"//xplat/executorch/extension/tensor:tensorAppleMac",
"//xplat/executorch/runtime/platform:platformAppleMac",
],
"DEFAULT": [
"//xplat/executorch/extension/image:image_processorApple",
"//xplat/executorch/extension/module:moduleApple",
"//xplat/executorch/extension/tensor:tensorApple",
"//xplat/executorch/runtime/platform:platformApple",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

import CoreVideo

public extension ImageNormalization {
/// Create a normalization with a custom scale factor and per-channel RGB mean
/// and standard deviation. `mean` and `standardDeviation` must each contain
/// exactly 3 elements (R, G, B); every `standardDeviation` entry must be
/// nonzero. Applied per channel as
/// `(pixel * scaleFactor - mean[c]) / standardDeviation[c]`.
convenience init(scaleFactor: Float, mean: [Float], standardDeviation: [Float]) {
precondition(mean.count == 3, "mean must have exactly 3 elements (R, G, B)")
precondition(
standardDeviation.count == 3,
"standardDeviation must have exactly 3 elements (R, G, B)")
self.init(
__scaleFactor: scaleFactor,
mean: mean.map { NSNumber(value: $0) },
standardDeviation: standardDeviation.map { NSNumber(value: $0) })
}
}

public extension ImageProcessorConfig {
/// Source pixel count (width * height) sentinels for `gpuMinInputPixels`.
static let alwaysGPU = 0
static let alwaysCPU = Int.max

/// Create an image processor config, specifying only the values that differ
/// from the defaults.
///
/// `gpuMinInputPixels` is the minimum source pixel count at which the GPU
/// path may be used; smaller inputs run on the CPU. Use `.alwaysGPU` (0) or
/// `.alwaysCPU` to force a path.
convenience init(
targetWidth: Int,
targetHeight: Int,
resizeMode: ImageResizeMode = .stretch,
letterboxAnchor: ImageLetterboxAnchor = .center,
padValue: Float = 0,
normalization: ImageNormalization = .zeroToOne(),
gpuMinInputPixels: Int = ImageProcessorConfig.defaultGpuMinInputPixels
) {
self.init(
__targetWidth: targetWidth,
targetHeight: targetHeight,
resizeMode: resizeMode,
letterboxAnchor: letterboxAnchor,
padValue: padValue,
normalization: normalization,
gpuMinInputPixels: gpuMinInputPixels)
}
}

public extension ImageProcessor {
/// Process a CVPixelBuffer into a normalized float tensor.
///
/// Auto-detects pixel format from the buffer. Supported formats: BGRA,
/// RGBA, 8-bit NV12, and 10-bit P010. Output is a `Tensor<Float>` with
/// shape `[1, 3, target_height, target_width]`.
///
/// The buffer is treated as already upright: orientation correction is not
/// applied and cannot be derived from a CVPixelBuffer, so the caller is
/// responsible for supplying an upright buffer.
func process(_ pixelBuffer: CVPixelBuffer) throws -> Tensor<Float> {
let anyTensor = try processPixelBuffer(pixelBuffer)
return Tensor<Float>(anyTensor)
}

/// Process a CVPixelBuffer into a caller-provided tensor, reusing its storage.
///
/// Avoids the per-call allocation of `process(_:)`, which matters for
/// sustained video. `tensor` must be a `Tensor<Float>` with shape
/// `[1, 3, target_height, target_width]`; its storage is overwritten and can
/// be reused across frames. The contents are valid until the next call that
/// writes into the same tensor.
///
/// The buffer is treated as already upright (see `process(_:)`).
func process(_ pixelBuffer: CVPixelBuffer, into tensor: Tensor<Float>) throws {
try processPixelBuffer(pixelBuffer, into: tensor.anyTensor)
}

/// Letterbox padding (per side, in pixels) applied for a source of the given
/// size: `x` is the left/right pad and `y` the top/bottom pad of the resized
/// content. Returns `(0, 0)` for the stretch resize mode or the top-left
/// anchor. Lets callers map the padded output back to the source region.
func computeLetterboxPadding(inputWidth: Int, inputHeight: Int) -> (x: Int, y: Int) {
let padding = __computeLetterboxPadding(forInputWidth: inputWidth, height: inputHeight)
return (padding.x, padding.y)
}
}
1 change: 1 addition & 0 deletions extension/apple/ExecuTorch/Exported/ExecuTorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#import "ExecuTorchBackendOption.h"
#import "ExecuTorchBackendOptionsMap.h"
#import "ExecuTorchError.h"
#import "ExecuTorchImageProcessor.h"
#import "ExecuTorchLog.h"
#import "ExecuTorchModule.h"
#import "ExecuTorchTensor.h"
Expand Down
147 changes: 147 additions & 0 deletions extension/apple/ExecuTorch/Exported/ExecuTorchImageProcessor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#import <CoreVideo/CoreVideo.h>
#import <Foundation/Foundation.h>

#import "ExecuTorchTensor.h"

NS_ASSUME_NONNULL_BEGIN

typedef NS_ENUM(uint8_t, ExecuTorchImageResizeMode) {
ExecuTorchImageResizeModeStretch,
ExecuTorchImageResizeModeLetterbox,
} NS_SWIFT_NAME(ImageResizeMode);

typedef NS_ENUM(uint8_t, ExecuTorchImageLetterboxAnchor) {
ExecuTorchImageLetterboxAnchorCenter,
ExecuTorchImageLetterboxAnchorTopLeft,
} NS_SWIFT_NAME(ImageLetterboxAnchor);

/// Per-side letterbox padding in pixels: `x` is the left/right pad and `y` the
/// top/bottom pad of the resized content.
typedef struct ExecuTorchImageLetterboxPadding {
NSInteger x;
NSInteger y;
} ExecuTorchImageLetterboxPadding NS_SWIFT_NAME(ImageLetterboxPadding);

NS_SWIFT_NAME(ImageNormalization)
__attribute__((objc_subclassing_restricted))
@interface ExecuTorchImageNormalization : NSObject

+ (instancetype)zeroToOne;
+ (instancetype)imagenet;

/// Create a normalization with a custom scale factor and per-channel RGB mean
/// and standard deviation. `mean` and `standardDeviation` must each contain
/// exactly 3 elements (R, G, B). Normalization is applied per channel as
/// `(pixel * scaleFactor - mean[c]) / standardDeviation[c]`, so every
/// `standardDeviation` entry must be nonzero.
- (instancetype)initWithScaleFactor:(float)scaleFactor
mean:(NSArray<NSNumber *> *)mean
standardDeviation:(NSArray<NSNumber *> *)standardDeviation
NS_REFINED_FOR_SWIFT;

+ (instancetype)new NS_UNAVAILABLE;
- (instancetype)init NS_UNAVAILABLE;

@end

NS_SWIFT_NAME(ImageProcessorConfig)
__attribute__((objc_subclassing_restricted))
@interface ExecuTorchImageProcessorConfig : NSObject

@property(nonatomic, readonly) NSInteger targetWidth;
@property(nonatomic, readonly) NSInteger targetHeight;
@property(nonatomic, readonly) ExecuTorchImageResizeMode resizeMode;
@property(nonatomic, readonly) ExecuTorchImageLetterboxAnchor letterboxAnchor;
@property(nonatomic, readonly) float padValue;
@property(nonatomic, readonly) ExecuTorchImageNormalization *normalization;
// Minimum source pixel count (width * height) at which the GPU path may be
// used; smaller inputs run on the CPU. 0 forces GPU, NSIntegerMax forces CPU.
@property(nonatomic, readonly) NSInteger gpuMinInputPixels;

// Default value for gpuMinInputPixels (mirrors the C++ config default).
@property(class, nonatomic, readonly) NSInteger defaultGpuMinInputPixels;

- (instancetype)initWithTargetWidth:(NSInteger)targetWidth
targetHeight:(NSInteger)targetHeight
resizeMode:(ExecuTorchImageResizeMode)resizeMode
letterboxAnchor:(ExecuTorchImageLetterboxAnchor)letterboxAnchor
padValue:(float)padValue
normalization:(ExecuTorchImageNormalization *)normalization
gpuMinInputPixels:(NSInteger)gpuMinInputPixels NS_REFINED_FOR_SWIFT;

+ (instancetype)new NS_UNAVAILABLE;
- (instancetype)init NS_UNAVAILABLE;

@end

/// Thread-safety: ExecuTorchImageProcessor is NOT thread-safe per instance.
/// Internal scratch buffers are mutated during processing. Use one instance
/// per concurrent caller. Different instances are safe to use concurrently.
NS_SWIFT_NAME(ImageProcessor)
__attribute__((objc_subclassing_restricted))
@interface ExecuTorchImageProcessor : NSObject

@property(nonatomic, readonly) ExecuTorchImageProcessorConfig *config;

- (instancetype)initWithConfig:(ExecuTorchImageProcessorConfig *)config;

/// Process a CVPixelBuffer into a normalized float tensor.
///
/// Auto-detects pixel format from the buffer's metadata. Supported
/// formats: BGRA, RGBA, 8-bit NV12, and 10-bit P010 (P010 is narrowed to NV12
/// internally). Other formats return an error.
///
/// The buffer is treated as already upright. Orientation correction is not
/// applied and cannot be derived from a CVPixelBuffer, so the caller is
/// responsible for supplying an upright buffer (e.g. by configuring the
/// capture connection's orientation).
///
/// @param pixelBuffer The input pixel buffer.
/// @param error On failure, set to an NSError describing what went wrong.
/// @return An ExecuTorchTensor with shape [1, 3, H, W] (CHW), or nil on failure.
- (nullable ExecuTorchTensor *)processPixelBuffer:(_Nullable CVPixelBufferRef)pixelBuffer
error:(NSError **)error;

/// Process a CVPixelBuffer into a caller-provided tensor, reusing its storage.
///
/// Avoids the per-call output allocation of processPixelBuffer:error:, which
/// matters for sustained video. `tensor` must be a Float tensor shaped
/// [1, 3, targetHeight, targetWidth]; its storage is overwritten and can be
/// reused across frames. The result aliases `tensor`, so the caller must
/// finish using the previous result before the next call.
///
/// @param pixelBuffer The input pixel buffer.
/// @param tensor The output tensor to fill.
/// @param error On failure, set to an NSError describing what went wrong.
/// @return YES on success, NO on failure.
- (BOOL)processPixelBuffer:(_Nullable CVPixelBufferRef)pixelBuffer
intoTensor:(ExecuTorchTensor *)tensor
error:(NSError **)error;

/// Letterbox padding (per side, in pixels) the processor applies for a source
/// of the given size: `x` is the left/right pad and `y` the top/bottom pad of
/// the resized content. Returns {0, 0} for the stretch resize mode or the
/// top-left anchor. Lets callers map the padded output back to the source
/// region without replicating the resize geometry.
///
/// @param inputWidth The source pixel width.
/// @param inputHeight The source pixel height.
/// @return The {x, y} padding in pixels.
- (ExecuTorchImageLetterboxPadding)computeLetterboxPaddingForInputWidth:(NSInteger)inputWidth
height:(NSInteger)inputHeight
NS_REFINED_FOR_SWIFT;

+ (instancetype)new NS_UNAVAILABLE;
- (instancetype)init NS_UNAVAILABLE;

@end

NS_ASSUME_NONNULL_END
Loading
Loading