Added AV Foundation capture

2026-03-22 01:40:28 +00:00 · 2014-07-01 15:08:37 +02:00
parent 5e15562fdb
commit 09877cc827
5 changed files with 533 additions and 0 deletions
--- a/Makefile.in
+++ b/Makefile.in
@@ -190,6 +190,12 @@ $(REFLECTOR_TARGET): bindir $(OBJS) $(GENERATED_HEADERS) $(REFLECTOR_OBJS)
 %.o: %.cpp $(ALL_INCLUDES)
 	$(CXX) $(CXXFLAGS) $(INC) -c $< -o $@

+%.o: %.m $(ALL_INCLUDES)
+	$(CXX) $(CFLAGS) $(INC) -c $< -o $@
+
+%.o: %.mm $(ALL_INCLUDES)
+	$(CXX) -x objective-c++ -std=c++11 $(CXXFLAGS) $(INC) -c $< -o $@
+
 # Set suffix for CUDA files
 .SUFFIXES: .cu

--- a/configure.ac
+++ b/configure.ac
@@ -1048,6 +1048,27 @@ AC_SUBST(QUICKTIME_INC)
 AC_SUBST(QUICKTIME_LIB)
 AC_SUBST(QUICKTIME_OBJ)

+# -------------------------------------------------------------------------------------------------
+# AV Foundation capture
+# -------------------------------------------------------------------------------------------------
+
+if test $system = MacOSX
+then
+	AVFOUNDATION_INC=""
+	AVFOUNDATION_LIB="-framework AVFoundation -framework CoreMedia -framework CoreVideo"
+	AC_DEFINE([HAVE_AVFOUNDATION], [1],  [Build with AV Foundation support])
+	AVFOUNDATION_OBJ="src/video_capture/avfoundation.o"
+	#LIB_TARGETS not needed here since we are not building modules on Mac
+	LIB_OBJS="$LIB_OBJS $AVFOUNDATION_OBJ"
+	avfoundation=yes
+fi
+
+LIB_MODULES="$LIB_MODULES $AVFOUNDATION_LIB"
+
+AC_SUBST(AVFOUNDATION_INC)
+AC_SUBST(AVFOUNDATION_LIB)
+AC_SUBST(AVFOUNDATION_OBJ)
+
 # -------------------------------------------------------------------------------------------------
 # SDL Stuff
 SDL_OBJ=
@@ -2677,6 +2698,7 @@ RESULT=\
  Library curl ................ $curl
  Library live555 ............. $livemedia

+  AV Foundation ............... $avfoundation
  Bluefish444 ................. $bluefish444 (audio: $blue_audio)
  DeckLink .................... $decklink
  DirectShow .................. $dshow
--- a/src/video_capture.c
+++ b/src/video_capture.c
@@ -66,6 +66,7 @@
 #include "video_capture.h"
 #include "video_capture/DirectShowGrabber.h"
 #include "video_capture/aggregate.h"
+#include "video_capture/avfoundation.h"
 #include "video_capture/bluefish444.h"
 #include "video_capture/decklink.h"
 #include "video_capture/deltacast.h"
@@ -159,6 +160,17 @@ struct vidcap_device_api vidcap_device_table[] = {
         MK_STATIC(vidcap_aggregate_grab),
         NULL
        },
+#if defined HAVE_AVFOUNDATION
+        {
+         0,
+         "avfoundation",
+         MK_NAME(vidcap_avfoundation_probe),
+         MK_NAME(vidcap_avfoundation_init),
+         MK_NAME(vidcap_avfoundation_done),
+         MK_NAME(vidcap_avfoundation_grab),
+         NULL
+        },
+#endif
        {
         0,
         NULL,
--- a/src/video_capture/avfoundation.h
+++ b/src/video_capture/avfoundation.h
@@ -0,0 +1,52 @@
+/**
+ * @file   video_capture/avfoundation.h
+ * @author Martin Pulec     <pulec@cesnet.cz>
+ */
+/*
+ * Copyright (c) 2014 CESNET, z. s. p. o.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, is permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of CESNET nor the names of its contributors may be
+ *    used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+        struct audio_frame;
+
+        struct vidcap_type	*vidcap_avfoundation_probe(void);
+        void			*vidcap_avfoundation_init(const struct vidcap_params *params);
+        void			 vidcap_avfoundation_done(void *state);
+        struct video_frame	*vidcap_avfoundation_grab(void *state, struct audio_frame **audio);
+
+#ifdef __cplusplus
+}
+#endif
+
--- a/src/video_capture/avfoundation.mm
+++ b/src/video_capture/avfoundation.mm
@@ -0,0 +1,441 @@
+/**
+ * @file   video_capture/avfoundation.mm
+ * @author Martin Pulec     <pulec@cesnet.cz>
+ */
+/*
+ * Copyright (c) 2014 CESNET z.s.p.o.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, is permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of CESNET nor the names of its contributors may be
+ *    used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#include "config_unix.h"
+#endif
+
+#include "video.h"
+#include "video_capture.h"
+#include "video_capture/avfoundation.h"
+
+#import <AVFoundation/AVFoundation.h>
+#include <iostream>
+#include <mutex>
+#include <queue>
+#include <unordered_map>
+
+#define VIDCAP_AVFOUNDATION_ID 0x522B376F
+
+namespace vidcap_avfoundation {
+std::unordered_map<std::string, NSString *> preset_to_av = {
+        { "high", AVCaptureSessionPresetHigh },
+        { "medium", AVCaptureSessionPresetMedium },
+        { "low", AVCaptureSessionPresetLow },
+        { "VGA", AVCaptureSessionPreset640x480 },
+        { "HD", AVCaptureSessionPreset1280x720 },
+};
+
+std::unordered_map<FourCharCode, codec_t> av_to_uv = {
+        {kCVPixelFormatType_422YpCbCr8_yuvs, UYVY},
+        {kCVPixelFormatType_422YpCbCr8FullRange, UYVY},
+        {kCVPixelFormatType_422YpCbCr8, UYVY},
+        {kCVPixelFormatType_32RGBA, RGBA},
+        {kCVPixelFormatType_24RGB, RGB},
+        {kCVPixelFormatType_422YpCbCr10, v210},
+        {kCMVideoCodecType_JPEG_OpenDML, MJPG},
+        {kCMVideoCodecType_JPEG, MJPG},
+        {kCMVideoCodecType_H264, H264},
+};
+
+constexpr int MAX_CAPTURE_QUEUE_SIZE = 2;
+} // namespace vidcap_avfoundation
+
+using namespace std;
+using namespace vidcap_avfoundation;
+
+@interface vidcap_avfoundation_state : NSObject
+{
+        AVCaptureDevice *m_device;
+        AVCaptureSession *m_session;
+        mutex m_lock;
+        queue<struct video_frame *> m_queue;
+}
+
+- (void)captureOutput:(AVCaptureOutput *)captureOutput didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer
+fromConnection:(AVCaptureConnection *)connection;
+- (struct video_frame *) grab;
+@end
+
+@interface vidcap_avfoundation_state () <AVCaptureVideoDataOutputSampleBufferDelegate>
+@end
+
+@implementation vidcap_avfoundation_state
+
+ (void)usage: (BOOL) verbose
+{
+        cout << "AV Foundation capture usage:" << "\n";
+        cout << "\t-t avfoundation[:device=<dev>][:preset=<preset>][:mode=<mode>[:framerate=<fr>]]" << "\n";
+        cout << "\n";
+        cout << "<preset> may be \"low\", \"medium\", \"high\", \"VGA\" or \"HD\"" << "\n";
+        cout << "\n";
+        cout << "All other parameters are represented by appropriate numeric index." << "\n\n";
+        cout << "Examples:" << "\n";
+        cout << "\t-t avfoundation" << "\n";
+        cout << "\t-t avfoundation:preset=high" << "\n";
+        cout << "\t-t avfoundation:device=0:preset=high" << "\n";
+        cout << "\t-t avfoundation:device=0:mode=24:framerate=4 (advanced)" << "\n";
+        cout << "\n";
+        cout << "Available AV foundation capture devices and modes:" << "\n";
+        cout << "(Type -t avfoundation:fullhelp to see available framerates)" << "\n\n";
+        int i = 0;
+        NSArray *devices = [AVCaptureDevice devicesWithMediaType:AVMediaTypeVideo];
+        for (AVCaptureDevice *device in devices) {
+                int j = 0;
+                if (device == [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]) {
+                        cout << "*";
+                }
+                cout << i << ": " << [[device localizedName] UTF8String] << "\n";
+                for ( AVCaptureDeviceFormat *format in [device formats] ) {
+                        CMVideoFormatDescriptionRef formatDesc = [format formatDescription];
+                        FourCharCode fcc = CMFormatDescriptionGetMediaSubType(formatDesc);
+                        CMVideoDimensions dim = CMVideoFormatDescriptionGetDimensions(formatDesc);
+
+                        printf("\t%d: %.4s %dx%d", j, (const char *) &fcc, dim.width, dim.height);
+                        if (verbose) {
+                                cout << endl;
+                                int k = 0;
+                                for ( AVFrameRateRange *range in format.videoSupportedFrameRateRanges ) {
+                                        cout << "\t\t" << k++ << ": " << range.maxFrameRate << "-" << range.minFrameRate << endl;
+                                }
+                        } else {
+                                for ( AVFrameRateRange *range in format.videoSupportedFrameRateRanges ) {
+                                        cout << " (max frame rate " << range.maxFrameRate << " FPS)";
+                                        break;
+                                }
+                        }
+                        printf("\n");
+                        j++;
+                }
+                i++;
+        }
+}
+
+- (id)initWithParams: (NSDictionary *) params
+{
+        self = [super init];
+        bool use_preset = true;
+
+        NSError *error = nil;
+
+        m_session = [[AVCaptureSession alloc] init];
+        // Add inputs and outputs.
+
+        // Configure the session to produce lower resolution video frames, if your
+        // processing algorithm can cope. We'll specify medium quality for the
+        // chosen device.
+
+        // Find a suitable AVCaptureDevice
+        if ([params valueForKey:@"device"]) {
+                int device_idx = [[params valueForKey:@"device"] intValue];
+                int i = -1;
+                NSArray *devices = [AVCaptureDevice devicesWithMediaType:AVMediaTypeVideo];
+                for (AVCaptureDevice *device in devices) {
+                        i++;
+                        if (i == device_idx) {
+                                m_device = device;
+                                break;
+                        }
+                }
+                if (i != device_idx) {
+                        [NSException raise:@"Invalid argument" format:@"Device index %d is invalid", device_idx];
+                }
+        } else {
+                m_device = [AVCaptureDevice
+                        defaultDeviceWithMediaType:AVMediaTypeVideo];
+        }
+
+        if (m_device == nil) {
+                [NSException raise:@"No device" format:@"No capture device was found!"];
+        }
+
+        // Create a device input with the device and add it to the session.
+        AVCaptureDeviceInput *input = [AVCaptureDeviceInput deviceInputWithDevice:m_device
+                error:&error];
+        if (!input) {
+                [NSException raise:@"No media" format:@"No media input!"];
+        }
+        [m_session addInput:input];
+
+        // Create a VideoDataOutput and add it to the session
+        AVCaptureVideoDataOutput *output = [[AVCaptureVideoDataOutput alloc] init];
+        output.alwaysDiscardsLateVideoFrames = YES;
+        [m_session addOutput:output];
+
+        // Configure your output.
+        dispatch_queue_t queue = dispatch_queue_create("myQueue", NULL);
+        [output setSampleBufferDelegate:self queue:queue];
+        dispatch_release(queue);
+
+#if 0
+        // TODO: do not do this, AV foundation usually selects better codec than we
+        // Specify the pixel format
+        output.videoSettings =
+                [NSDictionary dictionaryWithObject:
+                [NSNumber numberWithInt:kCVPixelFormatType_422YpCbCr8]
+                forKey:(id)kCVPixelBufferPixelFormatTypeKey];
+#endif
+
+
+        // If you wish to cap the frame rate to a known value, such as 15 fps, set
+        // minFrameDuration.
+
+        if ([params valueForKey:@"mode"]) {
+                use_preset = false;
+                int mode = [[params valueForKey:@"mode"] intValue];
+                int i = -1;
+                // Find a suitable AVCaptureDevice
+                AVCaptureDeviceFormat *format = nil;
+                AVFrameRateRange *rate = nil;
+                for (format in [m_device formats] ) {
+                        i++;
+                        if (i == mode)
+                                break;
+                }
+                if (i != mode) {
+                        NSLog(@"Mode index out of bounds!");
+                        format = nil;
+                }
+                if (format && [params valueForKey:@"framerate"]) {
+                        int rate_idx = -1;
+                        for ( rate in format.videoSupportedFrameRateRanges ) {
+                                rate_idx++;
+                                if (rate_idx == [[params valueForKey:@"framerate"] intValue])
+                                        break;
+                        }
+                        if (rate_idx != [[params valueForKey:@"framerate"] intValue]) {
+                                NSLog(@"Frame rate index out of bounds!");
+                                rate = nil;
+                        }
+                }
+                if ([m_device lockForConfiguration:&error]) {
+                        if (format) {
+                                [m_device setActiveFormat: format];
+                        }
+                        if (rate) {
+                                m_device.activeVideoMinFrameDuration = rate.minFrameDuration;
+                                m_device.activeVideoMaxFrameDuration = rate.minFrameDuration;
+                        }
+                        [m_device unlockForConfiguration];
+                } else {
+                        NSLog(@"Unable to set mode!");
+                }
+        } else {
+                NSString *preset = AVCaptureSessionPresetMedium;
+                if ([params valueForKey:@"preset"]) {
+                        auto it = preset_to_av.find([[params valueForKey:@"preset"] UTF8String]);
+                        if (it != preset_to_av.end()) {
+                                preset = it->second;
+                        } else {
+                                NSLog(@"Unknown preset %@!", [params valueForKey:@"preset"]);
+                        }
+                }
+                m_session.sessionPreset = preset;
+        }
+
+        // You must also call lockForConfiguration: before calling the AVCaptureSession method startRunning, or the session's preset will override the selected active format on the capture device.
+        //https://developer.apple.com/library/mac/documentation/AVFoundation/Reference/AVCaptureDevice_Class/Reference/Reference.html
+        if (!use_preset) {
+                [m_device lockForConfiguration:&error];
+        }
+        // Start the session running to start the flow of data
+        [m_session startRunning];
+        if (!use_preset) {
+                [m_device unlockForConfiguration];
+        }
+
+        return self;
+}
+
+- (void)captureOutput:(AVCaptureOutput *)captureOutput didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer
+fromConnection:(AVCaptureConnection *)connection
+{
+        lock_guard<mutex> lock(m_lock);
+        struct video_frame *frame = [self imageFromSampleBuffer: sampleBuffer];
+        if (frame) {
+                if (m_queue.size() < MAX_CAPTURE_QUEUE_SIZE) {
+                        m_queue.push(frame);
+                } else {
+                        NSLog(@"Frame dropped!");
+                        VIDEO_FRAME_DISPOSE(frame);
+                }
+        }
+}
+
+- (void)dealloc
+{
+        [m_session stopRunning];
+        [m_session release];
+
+        [super dealloc];
+}
+
+- (AVFrameRateRange *)frameRateRange
+{
+        AVFrameRateRange *activeFrameRateRange = nil;
+        for (AVFrameRateRange *frameRateRange in [[m_device activeFormat] videoSupportedFrameRateRanges])
+        {
+                if (CMTIME_COMPARE_INLINE([frameRateRange minFrameDuration], ==, [m_device activeVideoMinFrameDuration]))
+                {
+                        activeFrameRateRange = frameRateRange;
+                        break;
+                }
+        }
+
+        return activeFrameRateRange;
+}
+
+// Create a UIImage from sample buffer data
+- (struct video_frame *) imageFromSampleBuffer:(CMSampleBufferRef) sampleBuffer
+{
+        NSLog(@"imageFromSampleBuffer: called");
+        // Get a CMSampleBuffer's Core Video image buffer for the media data
+        CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
+        [(id) imageBuffer retain];
+
+        //CMVideoFormatDescriptionRef formatDesc = [[m_device activeFormat] formatDescription];
+        //FourCharCode fcc = CMFormatDescriptionGetMediaSubType(formatDesc);
+        OSType fcc = CVPixelBufferGetPixelFormatType(imageBuffer);
+        auto codec_it = av_to_uv.find(fcc);
+        if (codec_it == av_to_uv.end()) {
+                NSLog(@"Unhandled codec: %.4s!\n", (const char *) &fcc);
+                [(id) imageBuffer release];
+                return NULL;
+        }
+
+        struct video_desc desc;
+        desc.color_spec = codec_it->second;
+        desc.width = CVPixelBufferGetWidth(imageBuffer);
+        desc.height = CVPixelBufferGetHeight(imageBuffer);
+        desc.fps = [[self frameRateRange] maxFrameRate];
+        desc.tile_count = 1;
+        desc.interlacing = PROGRESSIVE;
+
+        struct video_frame *ret = vf_alloc_desc(desc);
+        // Lock the base address of the pixel buffer
+        CVPixelBufferLockBaseAddress(imageBuffer, 0);
+        ret->tiles[0].data = (char *) CVPixelBufferGetBaseAddress(imageBuffer);
+        ret->dispose_udata = imageBuffer;
+        ret->dispose = static_cast<void (*)(struct video_frame *)>([](struct video_frame *frame)
+                        {
+                        CVImageBufferRef imageBuffer = (CVImageBufferRef) frame->dispose_udata;
+                        // Unlock the pixel buffer
+                        CVPixelBufferUnlockBaseAddress(imageBuffer, 0);
+                        [(id) imageBuffer release];
+                        vf_free(frame);
+                        });
+
+        return ret;
+}
+
+- (struct video_frame *) grab
+{
+        lock_guard<mutex> lock(m_lock);
+        if (m_queue.size() == 0) {
+                return NULL;
+        } else {
+                struct video_frame *ret;
+                ret = m_queue.front();
+                m_queue.pop();
+                return ret;
+        }
+}
+@end
+
+struct vidcap_type *vidcap_avfoundation_probe(void)
+{
+        struct vidcap_type *vt;
+
+        vt = (struct vidcap_type *) malloc(sizeof(struct vidcap_type));
+        if (vt != NULL) {
+                vt->id = VIDCAP_AVFOUNDATION_ID;
+                vt->name = "avfoundation";
+                vt->description = "AV Foundation capture device";
+        }
+
+        return vt;
+}
+
+void *vidcap_avfoundation_init(const struct vidcap_params *params)
+{
+        if (strcasecmp(vidcap_params_get_fmt(params), "help") == 0) {
+                [vidcap_avfoundation_state usage: false];
+                return &vidcap_init_noerr;
+        } else if (strcasecmp(vidcap_params_get_fmt(params), "fullhelp") == 0) {
+                [vidcap_avfoundation_state usage: true];
+                return &vidcap_init_noerr;
+        }
+        NSMutableDictionary *init_params = [[NSMutableDictionary alloc] init];
+        char *tmp = strdup(vidcap_params_get_fmt(params));
+        char *item, *save_ptr, *cfg = tmp;
+        while ((item = strtok_r(cfg, ":", &save_ptr))) {
+                char *key_cstr = item;
+                if (strchr(item, '=')) {
+                        char *val_cstr = strchr(item, '=') + 1;
+                        *strchr(item, '=') = '\0';
+                        NSString *key = [NSString stringWithCString:key_cstr encoding:NSASCIIStringEncoding];
+                        NSString *val = [NSString stringWithCString:val_cstr encoding:NSASCIIStringEncoding];
+                        [init_params setObject:val forKey:key];
+                        [key release];
+                        [val release];
+                }
+                cfg = NULL;
+        }
+        void *ret = nullptr;
+        @try {
+                ret = (void *) [[vidcap_avfoundation_state alloc] initWithParams: init_params];
+        }
+        @catch ( NSException *e ) {
+                cerr << [[e reason] UTF8String] << "\n";
+                ret = nullptr;
+        }
+        [init_params release];
+        free(tmp);
+        return ret;
+}
+
+void vidcap_avfoundation_done(void *state)
+{
+        [(vidcap_avfoundation_state *) state release];
+}
+
+struct video_frame *vidcap_avfoundation_grab(void *state, struct audio_frame **audio)
+{
+        return [(vidcap_avfoundation_state *) state grab];
+}
+