libhb: add a Metal accelerated comb detect filter.
This commit is contained in:
parent
ad20fe40c7
commit
4e40332d2e
@ -4693,6 +4693,10 @@ hb_filter_object_t * hb_filter_get( int filter_id )
|
||||
filter = &hb_filter_prefilter_vt;
|
||||
break;
|
||||
|
||||
case HB_FILTER_COMB_DETECT_VT:
|
||||
filter = &hb_filter_comb_detect_vt;
|
||||
break;
|
||||
|
||||
case HB_FILTER_YADIF_VT:
|
||||
filter = &hb_filter_yadif_vt;
|
||||
break;
|
||||
|
@ -1489,6 +1489,7 @@ enum
|
||||
// First, filters that may change the framerate (drop or dup frames)
|
||||
HB_FILTER_DETELECINE,
|
||||
HB_FILTER_COMB_DETECT,
|
||||
HB_FILTER_COMB_DETECT_VT,
|
||||
HB_FILTER_DECOMB,
|
||||
HB_FILTER_YADIF,
|
||||
HB_FILTER_YADIF_VT,
|
||||
|
@ -496,6 +496,7 @@ extern hb_filter_object_t hb_filter_format;
|
||||
|
||||
#if defined(__APPLE__)
|
||||
extern hb_filter_object_t hb_filter_prefilter_vt;
|
||||
extern hb_filter_object_t hb_filter_comb_detect_vt;
|
||||
extern hb_filter_object_t hb_filter_yadif_vt;
|
||||
extern hb_filter_object_t hb_filter_bwdif_vt;
|
||||
extern hb_filter_object_t hb_filter_crop_scale_vt;
|
||||
|
524
libhb/platform/macosx/comb_detect_vt.m
Normal file
524
libhb/platform/macosx/comb_detect_vt.m
Normal file
@ -0,0 +1,524 @@
|
||||
/* comb_detect.c
|
||||
|
||||
Copyright (c) 2003-2023 HandBrake Team
|
||||
This file is part of the HandBrake source code
|
||||
Homepage: <http://handbrake.fr/>.
|
||||
It may be used under the terms of the GNU General Public License v2.
|
||||
For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
|
||||
|
||||
*/
|
||||
|
||||
#include "handbrake/handbrake.h"
|
||||
#include "cv_utils.h"
|
||||
#include "metal_utils.h"
|
||||
#include "vt_common.h"
|
||||
|
||||
extern char hb_comb_detect_vt_metallib_data[];
|
||||
extern unsigned int hb_comb_detect_vt_metallib_len;
|
||||
|
||||
struct mtl_comb_detect_params
|
||||
{
|
||||
int spatial_metric;
|
||||
|
||||
float motion_threshold;
|
||||
float spatial_threshold;
|
||||
int block_threshold;
|
||||
int block_width;
|
||||
int block_height;
|
||||
|
||||
float gamma_motion_threshold;
|
||||
float gamma_spatial_threshold;
|
||||
float gamma_spatial_threshold6;
|
||||
float spatial_threshold_squared;
|
||||
float spatial_threshold6;
|
||||
float comb32detect_min;
|
||||
float comb32detect_max;
|
||||
|
||||
bool force_exaustive_check;
|
||||
};
|
||||
|
||||
#define MODE_GAMMA 1 // Scale gamma when decombing
|
||||
#define MODE_FILTER 2 // Filter combing mask
|
||||
#define MODE_MASK 4 // Output combing masks instead of pictures
|
||||
#define MODE_COMPOSITE 8 // Overlay combing mask onto picture
|
||||
|
||||
#define FILTER_CLASSIC 1
|
||||
#define FILTER_ERODE_DILATE 2
|
||||
|
||||
struct hb_filter_private_s
|
||||
{
|
||||
hb_metal_context_t *mtl;
|
||||
const AVPixFmtDescriptor *desc;
|
||||
|
||||
// comb detect parameters
|
||||
int mode;
|
||||
int filter_mode;
|
||||
int spatial_metric;
|
||||
float motion_threshold;
|
||||
float spatial_threshold;
|
||||
int block_threshold;
|
||||
int block_width;
|
||||
int block_height;
|
||||
|
||||
// Computed parameters
|
||||
float gamma_motion_threshold;
|
||||
float gamma_spatial_threshold;
|
||||
float gamma_spatial_threshold6;
|
||||
float spatial_threshold_squared;
|
||||
float spatial_threshold6;
|
||||
float comb32detect_min;
|
||||
float comb32detect_max;
|
||||
|
||||
bool force_exaustive_check;
|
||||
|
||||
// Mask textures
|
||||
id<MTLTexture> mask;
|
||||
id<MTLTexture> temp;
|
||||
|
||||
// Comb result
|
||||
id<MTLBuffer> combed;
|
||||
|
||||
hb_buffer_t *ref[3];
|
||||
hb_buffer_list_t out_list;
|
||||
|
||||
// Filter statistics
|
||||
int comb_heavy;
|
||||
int comb_light;
|
||||
int comb_none;
|
||||
int frames;
|
||||
};
|
||||
|
||||
static int comb_detect_vt_init(hb_filter_object_t *filter,
|
||||
hb_filter_init_t *init);
|
||||
|
||||
static int comb_detect_vt_work(hb_filter_object_t *filter,
|
||||
hb_buffer_t **buf_in,
|
||||
hb_buffer_t **buf_out );
|
||||
|
||||
static void comb_detect_vt_close(hb_filter_object_t *filter);
|
||||
|
||||
static const char comb_detect_vt_template[] =
|
||||
"mode=^"HB_INT_REG"$:spatial-metric=^([012])$:"
|
||||
"motion-thresh=^"HB_INT_REG"$:spatial-thresh=^"HB_INT_REG"$:"
|
||||
"filter-mode=^([012])$:block-thresh=^"HB_INT_REG"$:"
|
||||
"block-width=^"HB_INT_REG"$:block-height=^"HB_INT_REG"$:"
|
||||
"disable=^"HB_BOOL_REG"$";
|
||||
|
||||
hb_filter_object_t hb_filter_comb_detect_vt =
|
||||
{
|
||||
.id = HB_FILTER_COMB_DETECT_VT,
|
||||
.enforce_order = 1,
|
||||
.name = "Comb Detect (VideoToolbox)",
|
||||
.settings = NULL,
|
||||
.init = comb_detect_vt_init,
|
||||
.work = comb_detect_vt_work,
|
||||
.close = comb_detect_vt_close,
|
||||
.settings_template = comb_detect_vt_template,
|
||||
};
|
||||
|
||||
#define PREV 0
|
||||
#define CURR 1
|
||||
#define NEXT 2
|
||||
|
||||
static void store_buf(hb_filter_private_t *pv, hb_buffer_t *in)
|
||||
{
|
||||
if (pv->ref[PREV])
|
||||
{
|
||||
hb_buffer_close(&pv->ref[PREV]);
|
||||
}
|
||||
pv->ref[PREV] = pv->ref[CURR];
|
||||
pv->ref[CURR] = pv->ref[NEXT];
|
||||
pv->ref[NEXT] = in;
|
||||
}
|
||||
|
||||
static int comb_detect_vt_init(hb_filter_object_t *filter,
|
||||
hb_filter_init_t *init)
|
||||
{
|
||||
filter->private_data = calloc(1, sizeof(struct hb_filter_private_s));
|
||||
if (filter->private_data == NULL)
|
||||
{
|
||||
hb_error("comb_detect_vt: calloc failed");
|
||||
return -1;
|
||||
}
|
||||
hb_filter_private_t *pv = filter->private_data;
|
||||
|
||||
hb_buffer_list_clear(&pv->out_list);
|
||||
|
||||
pv->desc = av_pix_fmt_desc_get(init->pix_fmt);
|
||||
|
||||
pv->frames = 0;
|
||||
pv->force_exaustive_check = 1;
|
||||
pv->comb_heavy = 0;
|
||||
pv->comb_light = 0;
|
||||
pv->comb_none = 0;
|
||||
|
||||
pv->mode = MODE_GAMMA | MODE_FILTER;
|
||||
pv->filter_mode = FILTER_ERODE_DILATE;
|
||||
pv->spatial_metric = 2;
|
||||
pv->motion_threshold = 3;
|
||||
pv->spatial_threshold = 3;
|
||||
pv->block_threshold = 40;
|
||||
pv->block_width = 16;
|
||||
pv->block_height = 16;
|
||||
|
||||
if (filter->settings)
|
||||
{
|
||||
int motion_threshold, spatial_threshold, block_threshold;
|
||||
hb_value_t *dict = filter->settings;
|
||||
|
||||
// Get comb detection settings
|
||||
hb_dict_extract_int(&pv->mode, dict, "mode");
|
||||
hb_dict_extract_int(&pv->spatial_metric, dict, "spatial-metric");
|
||||
hb_dict_extract_int(&motion_threshold, dict, "motion-thresh");
|
||||
hb_dict_extract_int(&spatial_threshold, dict, "spatial-thresh");
|
||||
hb_dict_extract_int(&pv->filter_mode, dict, "filter-mode");
|
||||
hb_dict_extract_int(&block_threshold, dict, "block-thresh");
|
||||
hb_dict_extract_int(&pv->block_width, dict, "block-width");
|
||||
hb_dict_extract_int(&pv->block_height, dict, "block-height");
|
||||
|
||||
pv->motion_threshold = motion_threshold;
|
||||
pv->spatial_threshold = spatial_threshold;
|
||||
pv->block_threshold = block_threshold;
|
||||
}
|
||||
|
||||
pv->motion_threshold /= 255.f;
|
||||
pv->spatial_threshold /= 255.f;
|
||||
|
||||
// Compute thresholds
|
||||
pv->gamma_motion_threshold = pv->motion_threshold;
|
||||
pv->gamma_spatial_threshold = pv->spatial_threshold;
|
||||
pv->gamma_spatial_threshold6 = 6 * pv->gamma_spatial_threshold;
|
||||
pv->spatial_threshold_squared = pv->spatial_threshold * pv->spatial_threshold;
|
||||
pv->spatial_threshold6 = 6 * pv->spatial_threshold;
|
||||
pv->comb32detect_min = 10 / 255.f;
|
||||
pv->comb32detect_max = 15 / 255.f;
|
||||
|
||||
if (pv->block_width > 32) {pv->block_width = 32;}
|
||||
if (pv->block_height > 32) {pv->block_height = 32;}
|
||||
if (pv->block_width < 8) {pv->block_width = 8; }
|
||||
if (pv->block_height < 8) {pv->block_height = 8; }
|
||||
|
||||
pv->mtl = hb_metal_context_init(hb_comb_detect_vt_metallib_data,
|
||||
hb_comb_detect_vt_metallib_len,
|
||||
pv->mode & MODE_GAMMA ? "comb_detect_gamma" : "comb_detect",
|
||||
sizeof(struct mtl_comb_detect_params),
|
||||
init->geometry.width, init->geometry.height,
|
||||
init->pix_fmt, init->color_range);
|
||||
if (pv->mtl == NULL)
|
||||
{
|
||||
hb_error("comb_detect_vt: failed to create Metal device");
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct mtl_comb_detect_params *params = (struct mtl_comb_detect_params *)pv->mtl->params_buffer.contents;
|
||||
*params = (struct mtl_comb_detect_params) {
|
||||
.spatial_metric = pv->spatial_metric,
|
||||
.motion_threshold = pv->motion_threshold,
|
||||
.spatial_threshold = pv->spatial_threshold,
|
||||
.block_threshold = pv->block_threshold,
|
||||
.block_width = pv->block_width,
|
||||
.block_height = pv->block_height,
|
||||
.gamma_motion_threshold = pv->gamma_motion_threshold,
|
||||
.gamma_spatial_threshold = pv->gamma_spatial_threshold,
|
||||
.gamma_spatial_threshold6 = pv->gamma_spatial_threshold6,
|
||||
.spatial_threshold_squared = pv->spatial_threshold_squared,
|
||||
.spatial_threshold6 = pv->spatial_threshold6,
|
||||
.comb32detect_min = pv->comb32detect_min,
|
||||
.comb32detect_max = pv->comb32detect_max,
|
||||
.force_exaustive_check = pv->force_exaustive_check
|
||||
};
|
||||
|
||||
if (hb_metal_add_pipeline(pv->mtl, pv->filter_mode == FILTER_ERODE_DILATE ? "filter_erode_dilate" : "filter_classic",
|
||||
pv->mtl->pipelines_count))
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
if (hb_metal_add_pipeline(pv->mtl, "erode_mask", pv->mtl->pipelines_count))
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
if (hb_metal_add_pipeline(pv->mtl, "dilate_mask", pv->mtl->pipelines_count))
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
char *check_combing_name = pv->mode & MODE_FILTER ? "check_filtered_combing_mask" : "check_combing_mask";
|
||||
if (@available(macOS 13, *))
|
||||
{
|
||||
if ([pv->mtl->device supportsFamily:MTLGPUFamilyMetal3] &&
|
||||
((pv->block_width == 16 && pv->block_height == 16) || (pv->block_width == 32 && pv->block_height == 32)))
|
||||
{
|
||||
// Use simd_sum() to speed up the final reduction pass
|
||||
check_combing_name = pv->mode & MODE_FILTER ? "check_filtered_combing_mask_quad" : "check_combing_mask_quad";
|
||||
}
|
||||
else if ([pv->mtl->device supportsFamily:MTLGPUFamilyCommon3] &&
|
||||
(pv->block_width * pv->block_height) % 4)
|
||||
{
|
||||
// Use quad_sum() to speed up the final reduction pass
|
||||
check_combing_name = pv->mode & MODE_FILTER ? "check_filtered_combing_mask_quad" : "check_combing_mask_quad";
|
||||
}
|
||||
}
|
||||
if (hb_metal_add_pipeline(pv->mtl,check_combing_name, pv->mtl->pipelines_count))
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
if (hb_metal_add_pipeline(pv->mtl, "apply_mask", pv->mtl->pipelines_count))
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Allocate buffers to store the mask and the comb result
|
||||
MTLTextureDescriptor *descriptor = [[MTLTextureDescriptor alloc] init];
|
||||
descriptor.textureType = MTLTextureType2D;
|
||||
descriptor.pixelFormat = MTLPixelFormatR8Uint;
|
||||
descriptor.width = init->geometry.width;
|
||||
descriptor.height = init->geometry.height;
|
||||
descriptor.depth = 1;
|
||||
descriptor.storageMode = MTLStorageModePrivate;
|
||||
|
||||
pv->mask = [pv->mtl->device newTextureWithDescriptor:descriptor];
|
||||
pv->temp = [pv->mtl->device newTextureWithDescriptor:descriptor];
|
||||
|
||||
[descriptor release];
|
||||
|
||||
pv->combed = [pv->mtl->device newBufferWithLength:sizeof(uint32_t) options:MTLResourceStorageModeShared];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void comb_detect_vt_close(hb_filter_object_t *filter)
|
||||
{
|
||||
hb_filter_private_t *pv = filter->private_data;
|
||||
|
||||
if (pv == NULL)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
hb_log("comb detect: heavy %i | light %i | uncombed %i | total %i",
|
||||
pv->comb_heavy, pv->comb_light, pv->comb_none, pv->frames);
|
||||
|
||||
[pv->combed release];
|
||||
[pv->temp release];
|
||||
[pv->mask release];
|
||||
|
||||
hb_metal_context_close(&pv->mtl);
|
||||
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
hb_buffer_close(&pv->ref[i]);
|
||||
}
|
||||
|
||||
free(pv);
|
||||
filter->private_data = NULL;
|
||||
}
|
||||
|
||||
static void call_kernel(hb_filter_private_t *pv,
|
||||
id<MTLTexture> prev,
|
||||
id<MTLTexture> cur,
|
||||
id<MTLTexture> next,
|
||||
id<MTLTexture> dest)
|
||||
{
|
||||
id<MTLCommandBuffer> buffer = pv->mtl->queue.commandBuffer;
|
||||
id<MTLComputeCommandEncoder> encoder = buffer.computeCommandEncoder;
|
||||
|
||||
int width = cur.width, height = cur.height;
|
||||
struct mtl_comb_detect_params *params = (struct mtl_comb_detect_params *)pv->mtl->params_buffer.contents;
|
||||
params->force_exaustive_check = pv->force_exaustive_check;
|
||||
|
||||
[encoder setTexture:prev atIndex:0];
|
||||
[encoder setTexture:cur atIndex:1];
|
||||
[encoder setTexture:next atIndex:2];
|
||||
[encoder setTexture:pv->mask atIndex:3];
|
||||
[encoder setTexture:pv->temp atIndex:4];
|
||||
if (pv->mode & MODE_MASK || pv->mode & MODE_COMPOSITE)
|
||||
{
|
||||
[encoder setTexture:dest atIndex:5];
|
||||
}
|
||||
|
||||
[encoder setBuffer:pv->combed offset:0 atIndex:0];
|
||||
[encoder setBuffer:pv->mtl->params_buffer offset:0 atIndex:1];
|
||||
|
||||
hb_metal_compute_encoder_dispatch(pv->mtl->device, pv->mtl->pipelines[0], encoder, width, height);
|
||||
|
||||
if (pv->mode & MODE_FILTER)
|
||||
{
|
||||
hb_metal_compute_encoder_dispatch(pv->mtl->device, pv->mtl->pipelines[1], encoder, width, height);
|
||||
|
||||
if (pv->filter_mode == FILTER_ERODE_DILATE)
|
||||
{
|
||||
hb_metal_compute_encoder_dispatch(pv->mtl->device, pv->mtl->pipelines[2], encoder, width, height);
|
||||
hb_metal_compute_encoder_dispatch(pv->mtl->device, pv->mtl->pipelines[3], encoder, width, height);
|
||||
hb_metal_compute_encoder_dispatch(pv->mtl->device, pv->mtl->pipelines[2], encoder, width, height);
|
||||
}
|
||||
}
|
||||
|
||||
if (pv->mode & MODE_FILTER && pv->filter_mode == FILTER_CLASSIC)
|
||||
{
|
||||
[encoder setTexture:pv->temp atIndex:3];
|
||||
}
|
||||
|
||||
hb_metal_compute_encoder_dispatch_fixed_threadgroup_size(pv->mtl->device, pv->mtl->pipelines[4], encoder,
|
||||
width, height, pv->block_width, pv->block_height);
|
||||
|
||||
if (pv->mode & MODE_MASK || pv->mode & MODE_COMPOSITE)
|
||||
{
|
||||
hb_metal_compute_encoder_dispatch(pv->mtl->device, pv->mtl->pipelines[5], encoder, width, height);
|
||||
}
|
||||
|
||||
[encoder endEncoding];
|
||||
|
||||
[buffer commit];
|
||||
[buffer waitUntilCompleted];
|
||||
}
|
||||
|
||||
static int analyze_frame(hb_filter_private_t *pv, hb_buffer_t **out)
|
||||
{
|
||||
CVReturn ret = kCVReturnSuccess;
|
||||
|
||||
CVPixelBufferRef cv_dest = NULL;
|
||||
CVPixelBufferRef cv_prev = pv->ref[PREV] ? hb_cv_get_pixel_buffer(pv->ref[PREV]) : hb_cv_get_pixel_buffer(pv->ref[CURR]);
|
||||
CVPixelBufferRef cv_cur = hb_cv_get_pixel_buffer(pv->ref[CURR]);
|
||||
CVPixelBufferRef cv_next = pv->ref[NEXT] ? hb_cv_get_pixel_buffer(pv->ref[NEXT]) : hb_cv_get_pixel_buffer(pv->ref[CURR]);
|
||||
|
||||
if (cv_prev == NULL || cv_cur == NULL || cv_next == NULL)
|
||||
{
|
||||
hb_log("comb_detect_vt: extract_buf failed");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
const AVComponentDescriptor *comp = &pv->desc->comp[0];
|
||||
|
||||
int channels;
|
||||
const MTLPixelFormat format = hb_metal_pix_fmt_from_component(comp, &channels);
|
||||
if (format == MTLPixelFormatInvalid)
|
||||
{
|
||||
goto fail;
|
||||
}
|
||||
|
||||
CVMetalTextureRef dest = NULL;
|
||||
id<MTLTexture> tex_dest = nil;
|
||||
|
||||
if (pv->mode & MODE_MASK || pv->mode & MODE_COMPOSITE)
|
||||
{
|
||||
ret = CVPixelBufferPoolCreatePixelBuffer(kCFAllocatorDefault, pv->mtl->pool, &cv_dest);
|
||||
if (ret != kCVReturnSuccess)
|
||||
{
|
||||
hb_log("comb_detect_vt: CVPixelBufferPoolCreatePixelBuffer failed");
|
||||
goto fail;
|
||||
}
|
||||
dest = hb_metal_create_texture_from_pixbuf(pv->mtl->cache, cv_dest, 0, format);
|
||||
tex_dest = CVMetalTextureGetTexture(dest);
|
||||
}
|
||||
|
||||
CVMetalTextureRef prev = hb_metal_create_texture_from_pixbuf(pv->mtl->cache, cv_prev, 0, format);
|
||||
CVMetalTextureRef cur = hb_metal_create_texture_from_pixbuf(pv->mtl->cache, cv_cur, 0, format);
|
||||
CVMetalTextureRef next = hb_metal_create_texture_from_pixbuf(pv->mtl->cache, cv_next, 0, format);
|
||||
|
||||
id<MTLTexture> tex_prev = CVMetalTextureGetTexture(prev);
|
||||
id<MTLTexture> tex_cur = CVMetalTextureGetTexture(cur);
|
||||
id<MTLTexture> tex_next = CVMetalTextureGetTexture(next);
|
||||
|
||||
uint32_t *combed = pv->combed.contents;
|
||||
*combed = HB_COMB_NONE;
|
||||
|
||||
call_kernel(pv, tex_prev, tex_cur, tex_next, tex_dest);
|
||||
|
||||
CFRelease(prev);
|
||||
CFRelease(cur);
|
||||
CFRelease(next);
|
||||
|
||||
if (pv->mode & MODE_MASK || pv->mode & MODE_COMPOSITE)
|
||||
{
|
||||
CFRelease(dest);
|
||||
CVBufferPropagateAttachments(cv_cur, cv_dest);
|
||||
|
||||
*out = hb_buffer_wrapper_init();
|
||||
(*out)->storage_type = COREMEDIA;
|
||||
(*out)->storage = cv_dest;
|
||||
(*out)->f.width = pv->ref[CURR]->f.width;
|
||||
(*out)->f.height = pv->ref[CURR]->f.height;
|
||||
(*out)->f.fmt = pv->ref[CURR]->f.fmt;
|
||||
(*out)->f.color_prim = pv->ref[CURR]->f.color_prim;
|
||||
(*out)->f.color_transfer = pv->ref[CURR]->f.color_transfer;
|
||||
(*out)->f.color_matrix = pv->ref[CURR]->f.color_matrix;
|
||||
(*out)->f.color_range = pv->ref[CURR]->f.color_range;
|
||||
(*out)->f.chroma_location = pv->ref[CURR]->f.chroma_location;
|
||||
hb_buffer_copy_props(*out, pv->ref[CURR]);
|
||||
}
|
||||
|
||||
return *combed;
|
||||
|
||||
fail:
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void process_frame(hb_filter_private_t *pv)
|
||||
{
|
||||
int combed = 0;
|
||||
hb_buffer_t *out = NULL;
|
||||
|
||||
@autoreleasepool
|
||||
{
|
||||
combed = analyze_frame(pv, &out);
|
||||
}
|
||||
|
||||
switch (combed)
|
||||
{
|
||||
case HB_COMB_HEAVY:
|
||||
pv->comb_heavy++;
|
||||
break;
|
||||
|
||||
case HB_COMB_LIGHT:
|
||||
pv->comb_light++;
|
||||
break;
|
||||
|
||||
case HB_COMB_NONE:
|
||||
default:
|
||||
pv->comb_none++;
|
||||
break;
|
||||
}
|
||||
pv->frames++;
|
||||
pv->ref[CURR]->s.combed = combed;
|
||||
if (out)
|
||||
{
|
||||
hb_buffer_list_append(&pv->out_list, out);
|
||||
}
|
||||
else
|
||||
{
|
||||
hb_buffer_list_append(&pv->out_list, hb_vt_buffer_dup(pv->ref[CURR]));
|
||||
}
|
||||
|
||||
pv->force_exaustive_check = 0;
|
||||
}
|
||||
|
||||
static int comb_detect_vt_work(hb_filter_object_t *filter,
|
||||
hb_buffer_t **buf_in,
|
||||
hb_buffer_t **buf_out)
|
||||
{
|
||||
hb_filter_private_t *pv = filter->private_data;
|
||||
hb_buffer_t *in = *buf_in;
|
||||
|
||||
*buf_in = NULL;
|
||||
if (in->s.flags & HB_BUF_FLAG_EOF)
|
||||
{
|
||||
store_buf(pv, NULL);
|
||||
pv->force_exaustive_check = 1;
|
||||
process_frame(pv);
|
||||
hb_buffer_list_append(&pv->out_list, in);
|
||||
*buf_out = hb_buffer_list_clear(&pv->out_list);
|
||||
return HB_FILTER_DONE;
|
||||
}
|
||||
|
||||
store_buf(pv, in);
|
||||
|
||||
if (pv->ref[CURR] == NULL)
|
||||
{
|
||||
// Wait for next buffer
|
||||
return HB_FILTER_DELAY;
|
||||
}
|
||||
|
||||
process_frame(pv);
|
||||
*buf_out = hb_buffer_list_clear(&pv->out_list);
|
||||
|
||||
return *buf_out == NULL ? HB_FILTER_FAILED : HB_FILTER_OK;
|
||||
}
|
555
libhb/platform/macosx/shaders/comb_detect_vt.metal
Normal file
555
libhb/platform/macosx/shaders/comb_detect_vt.metal
Normal file
@ -0,0 +1,555 @@
|
||||
/* comb_detect.metal
|
||||
|
||||
Copyright (c) 2003-2023 HandBrake Team
|
||||
|
||||
This file is part of the HandBrake source code
|
||||
Homepage: <http://handbrake.fr/>.
|
||||
It may be used under the terms of the GNU General Public License v2.
|
||||
For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
|
||||
*/
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <metal_integer>
|
||||
#include <metal_texture>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
#define HB_COMB_NONE 0
|
||||
#define HB_COMB_LIGHT 1
|
||||
#define HB_COMB_HEAVY 2
|
||||
|
||||
/*
|
||||
* Parameters
|
||||
*/
|
||||
|
||||
struct params {
|
||||
int spatial_metric;
|
||||
|
||||
float motion_threshold;
|
||||
float spatial_threshold;
|
||||
int block_threshold;
|
||||
int block_width;
|
||||
int block_height;
|
||||
|
||||
float gamma_motion_threshold;
|
||||
float gamma_spatial_threshold;
|
||||
float gamma_spatial_threshold6;
|
||||
float spatial_threshold_squared;
|
||||
float spatial_threshold6;
|
||||
float comb32detect_min;
|
||||
float comb32detect_max;
|
||||
|
||||
bool force_exaustive_check;
|
||||
};
|
||||
|
||||
/*
|
||||
* Texture access helpers
|
||||
*/
|
||||
|
||||
constexpr sampler s(coord::pixel);
|
||||
|
||||
template <typename T>
|
||||
T tex2D(texture2d<float, access::sample> tex, short2 pos)
|
||||
{
|
||||
return tex.sample(s, float2(pos)).x;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T tex2D(texture2d<float, access::sample> tex, ushort2 pos)
|
||||
{
|
||||
return tex.sample(s, float2(pos)).x;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T tex2D(texture2d<ushort, access::sample> tex, short2 pos)
|
||||
{
|
||||
return tex.sample(s, float2(pos)).x;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T tex2D(texture2d<ushort, access::sample> tex, ushort2 pos)
|
||||
{
|
||||
return tex.sample(s, float2(pos)).x;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T tex2D(texture2d<float, access::read> tex, uint x, uint y)
|
||||
{
|
||||
return tex.read(uint2(x, y)).x;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T tex2D(texture2d<ushort, access::read> tex, ushort2 pos)
|
||||
{
|
||||
return tex.read(pos).x;
|
||||
}
|
||||
|
||||
constexpr sampler szero(coord::pixel,address::clamp_to_zero);
|
||||
template <typename T>
|
||||
T tex2Dc(texture2d<ushort, access::sample> tex, short2 pos)
|
||||
{
|
||||
return tex.sample(szero, float2(pos)).x;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T tex2Dc(texture2d<ushort, access::sample> tex, ushort2 pos)
|
||||
{
|
||||
return tex.sample(szero, float2(pos)).x;
|
||||
}
|
||||
|
||||
/*
|
||||
* Comb detect helpers
|
||||
*/
|
||||
|
||||
void write_result(
|
||||
device atomic_int *combed,
|
||||
uchar block_threshold,
|
||||
ushort block_score)
|
||||
{
|
||||
int current = atomic_load_explicit(combed, memory_order_relaxed);
|
||||
if (current == HB_COMB_HEAVY) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (block_score >= (block_threshold / 2)) {
|
||||
if (block_score > block_threshold) {
|
||||
atomic_store_explicit(combed, HB_COMB_HEAVY, memory_order_relaxed);
|
||||
} else {
|
||||
atomic_compare_exchange_weak_explicit(combed, ¤t, HB_COMB_LIGHT, memory_order_relaxed, memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T gamma(T value) {
|
||||
return pow(value, 2.2f);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void detect_gamma_combed_segment(
|
||||
texture2d<float, access::sample> prev,
|
||||
texture2d<float, access::sample> cur,
|
||||
texture2d<float, access::sample> next,
|
||||
texture2d<ushort, access::write> mask,
|
||||
constant params& p,
|
||||
ushort2 pos)
|
||||
{
|
||||
// A mishmash of various comb detection tricks
|
||||
// picked up from neuron2's Decomb plugin for
|
||||
// AviSynth and tritical's IsCombedT and
|
||||
// IsCombedTIVTC plugins.
|
||||
|
||||
// Comb scoring algorithm
|
||||
const float mthresh = p.gamma_motion_threshold;
|
||||
const float athresh = p.gamma_spatial_threshold;
|
||||
const float athresh6 = p.gamma_spatial_threshold6;
|
||||
|
||||
// These are just to make the buffer locations easier to read.
|
||||
const short2 up_2 = short2(pos.x, pos.y -2);
|
||||
const short2 up_1 = short2(pos.x, pos.y -1);
|
||||
const short2 down_1 = short2(pos.x, pos.y +1);
|
||||
const short2 down_2 = short2(pos.x, pos.y +2);
|
||||
|
||||
const T up_diff = gamma(tex2D<T>(cur, pos)) - gamma(tex2D<T>(cur, up_1));
|
||||
const T down_diff = gamma(tex2D<T>(cur, pos)) - gamma(tex2D<T>(cur, down_1));
|
||||
|
||||
mask.write(0, pos);
|
||||
|
||||
if ((up_diff > athresh && down_diff > athresh) ||
|
||||
(up_diff < -athresh && down_diff < -athresh)) {
|
||||
// The pixel above and below are different,
|
||||
// and they change in the same "direction" too.
|
||||
bool motion = false;
|
||||
if (mthresh > 0) {
|
||||
// Make sure there's sufficient motion between frame t-1 to frame t+1.
|
||||
if (abs(gamma(tex2D<T>(prev, pos)) - gamma(tex2D<T>(cur, pos))) > mthresh &&
|
||||
abs(gamma(tex2D<T>(cur, up_1)) - gamma(tex2D<T>(next, up_1))) > mthresh &&
|
||||
abs(gamma(tex2D<T>(cur, down_1)) - gamma(tex2D<T>(next, down_1))) > mthresh) {
|
||||
motion = true;
|
||||
}
|
||||
if (abs(gamma(tex2D<T>(next, pos)) - gamma(tex2D<T>(cur, pos))) > mthresh &&
|
||||
abs(gamma(tex2D<T>(prev, up_1)) - gamma(tex2D<T>(cur, up_1))) > mthresh &&
|
||||
abs(gamma(tex2D<T>(prev, down_1)) - gamma(tex2D<T>(cur, down_1))) > mthresh) {
|
||||
motion = true;
|
||||
}
|
||||
} else {
|
||||
// User doesn't want to check for motion,
|
||||
// so move on to the spatial check
|
||||
motion = true;
|
||||
}
|
||||
|
||||
// If motion, or we can't measure motion yet…
|
||||
if (motion || p.force_exaustive_check) {
|
||||
// Tritical's noise-resistant combing scorer
|
||||
// The check is done on a bob+blur convolution
|
||||
const T combing = abs(gamma(tex2D<T>(cur, up_2))
|
||||
+ (4 * gamma(tex2D<T>(cur, pos)))
|
||||
+ gamma(tex2D<T>(cur, down_2))
|
||||
- (3 * (gamma(tex2D<T>(cur, up_1))
|
||||
+ gamma(tex2D<T>(cur, down_1)))));
|
||||
|
||||
// If the frame is sufficiently combed,
|
||||
// then mark it down on the mask as 1.
|
||||
if (combing > athresh6) {
|
||||
mask.write(1, pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void detect_combed_segment(
|
||||
texture2d<float, access::sample> prev,
|
||||
texture2d<float, access::sample> cur,
|
||||
texture2d<float, access::sample> next,
|
||||
texture2d<ushort, access::write> mask,
|
||||
constant params& p,
|
||||
ushort2 pos)
|
||||
{
|
||||
// A mishmash of various comb detection tricks
|
||||
// picked up from neuron2's Decomb plugin for
|
||||
// AviSynth and tritical's IsCombedT and
|
||||
// IsCombedTIVTC plugins.
|
||||
|
||||
// Comb scoring algorithm
|
||||
const float mthresh = p.motion_threshold;
|
||||
const float athresh = p.spatial_threshold;
|
||||
const float athresh_squared = p.spatial_threshold_squared;
|
||||
const float athresh6 = p.spatial_threshold6;
|
||||
|
||||
// These are just to make the buffer locations easier to read.
|
||||
const short2 up_2 = short2(pos.x, pos.y -2);
|
||||
const short2 up_1 = short2(pos.x, pos.y -1);
|
||||
const short2 down_1 = short2(pos.x, pos.y +1);
|
||||
const short2 down_2 = short2(pos.x, pos.y +2);
|
||||
|
||||
const float up_diff = tex2D<T>(cur, pos) - tex2D<T>(cur, up_1);
|
||||
const float down_diff = tex2D<T>(cur, pos) - tex2D<T>(cur, down_1);
|
||||
|
||||
mask.write(0, pos);
|
||||
|
||||
if ((up_diff > athresh && down_diff > athresh) ||
|
||||
(up_diff < -athresh && down_diff < -athresh)) {
|
||||
// The pixel above and below are different,
|
||||
// and they change in the same "direction" too.
|
||||
bool motion = false;
|
||||
if (mthresh > 0) {
|
||||
// Make sure there's sufficient motion between frame t-1 to frame t+1.
|
||||
if (abs(tex2D<T>(prev, pos) - tex2D<T>(cur, pos)) > mthresh &&
|
||||
abs(tex2D<T>(cur, up_1) - tex2D<T>(next, up_1)) > mthresh &&
|
||||
abs(tex2D<T>(cur, down_1) - tex2D<T>(next, down_1)) > mthresh) {
|
||||
motion = true;
|
||||
}
|
||||
if (abs(tex2D<T>(next, pos) - tex2D<T>(cur, pos)) > mthresh &&
|
||||
abs(tex2D<T>(prev, up_1) - tex2D<T>(cur, up_1)) > mthresh &&
|
||||
abs(tex2D<T>(prev, down_1) - tex2D<T>(cur, down_1)) > mthresh) {
|
||||
motion = true;
|
||||
}
|
||||
} else {
|
||||
// User doesn't want to check for motion,
|
||||
// so move on to the spatial check
|
||||
motion = true;
|
||||
}
|
||||
|
||||
// If motion, or we can't measure motion yet…
|
||||
if (motion || p.force_exaustive_check) {
|
||||
// That means it's time for the spatial check
|
||||
// We've got several options here
|
||||
if (p.spatial_metric == 0) {
|
||||
// Simple 32detect style comb detection.
|
||||
if ((abs(tex2D<T>(cur, pos) - tex2D<T>(cur, down_2)) < p.comb32detect_min) &&
|
||||
(abs(tex2D<T>(cur, pos) - tex2D<T>(cur, down_1)) > p.comb32detect_max)) {
|
||||
mask.write(1, pos);
|
||||
}
|
||||
} else if (p.spatial_metric == 1) {
|
||||
// This, for comparison, is what IsCombed uses
|
||||
// It's better, but still noise sensitive
|
||||
const T combing = (tex2D<T>(cur, up_1) - tex2D<T>(cur, pos)) *
|
||||
(tex2D<T>(cur, down_1) - tex2D<T>(cur, pos));
|
||||
|
||||
if (combing > athresh_squared) {
|
||||
mask.write(1, pos);
|
||||
}
|
||||
} else if (p.spatial_metric == 2) {
|
||||
// Tritical's noise-resistant combing scorer
|
||||
// The check is done on a bob+blur convolution
|
||||
const T combing = abs(tex2D<T>(cur, up_2)
|
||||
+ (4 * tex2D<T>(cur, pos))
|
||||
+ tex2D<T>(cur, down_2)
|
||||
- (3 * (tex2D<T>(cur, up_1)
|
||||
+ tex2D<T>(cur, down_1))));
|
||||
|
||||
// If the frame is sufficiently combed,
|
||||
// then mark it down on the mask as 1.
|
||||
if (combing > athresh6) {
|
||||
mask.write(1, pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Kernel dispatch
|
||||
*/
|
||||
|
||||
kernel void apply_mask(
|
||||
texture2d<ushort, access::read> mask [[texture(3)]],
|
||||
texture2d<half, access::write> dst [[texture(5)]],
|
||||
ushort2 pos [[thread_position_in_grid]])
|
||||
{
|
||||
auto value = tex2D<ushort>(mask, pos) ? 1.h : 0.h;
|
||||
dst.write(value, pos);
|
||||
}
|
||||
|
||||
kernel void check_filtered_combing_mask_simd(
|
||||
texture2d<ushort, access::sample> mask [[texture(3)]],
|
||||
device atomic_int *combed [[buffer(0)]],
|
||||
constant params& p [[buffer(1)]],
|
||||
ushort2 pos [[thread_position_in_grid]],
|
||||
ushort sid [[simdgroup_index_in_threadgroup]],
|
||||
ushort w [[simdgroups_per_threadgroup]])
|
||||
{
|
||||
threadgroup ushort partial_score[32];
|
||||
|
||||
ushort value = tex2Dc<ushort>(mask, pos);
|
||||
partial_score[sid] = simd_sum(value);
|
||||
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
||||
if (sid == 0) {
|
||||
ushort block_score = 0;
|
||||
for (uchar i = 0; i < w; i++) {
|
||||
block_score += partial_score[i];
|
||||
}
|
||||
write_result(combed, p.block_threshold, block_score);
|
||||
}
|
||||
}
|
||||
|
||||
kernel void check_filtered_combing_mask_quad(
|
||||
texture2d<ushort, access::sample> mask [[texture(3)]],
|
||||
device atomic_int *combed [[buffer(0)]],
|
||||
constant params& p [[buffer(1)]],
|
||||
ushort2 pos [[thread_position_in_grid]],
|
||||
ushort qid [[quadgroup_index_in_threadgroup]],
|
||||
ushort w [[quadgroups_per_threadgroup]])
|
||||
{
|
||||
threadgroup ushort partial_score[256];
|
||||
|
||||
ushort value = tex2Dc<ushort>(mask, pos);
|
||||
partial_score[qid] = quad_sum(value);
|
||||
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
||||
if (qid == 0) {
|
||||
ushort block_score = 0;
|
||||
for (uchar i = 0; i < w; i++) {
|
||||
block_score += partial_score[i];
|
||||
}
|
||||
write_result(combed, p.block_threshold, block_score);
|
||||
}
|
||||
}
|
||||
|
||||
kernel void check_filtered_combing_mask(
|
||||
texture2d<ushort, access::sample> mask [[texture(3)]],
|
||||
device atomic_int *combed [[buffer(0)]],
|
||||
constant params& p [[buffer(1)]],
|
||||
ushort2 pos [[thread_position_in_grid]])
|
||||
{
|
||||
if (pos.x % p.block_width > 0 || pos.y % p.block_height > 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
ushort block_score = 0;
|
||||
|
||||
for (uchar x = 0; x < p.block_width; x++) {
|
||||
for (uchar y = 0; y < p.block_height; y++) {
|
||||
ushort2 block_pos = ushort2(pos.x + x, pos.y + y);
|
||||
block_score += tex2Dc<ushort>(mask, block_pos);
|
||||
}
|
||||
}
|
||||
|
||||
write_result(combed, p.block_threshold, block_score);
|
||||
}
|
||||
|
||||
kernel void check_combing_mask_simd(
|
||||
texture2d<ushort, access::sample> mask [[texture(3)]],
|
||||
device atomic_int *combed [[buffer(0)]],
|
||||
constant params& p [[buffer(1)]],
|
||||
ushort2 pos [[thread_position_in_grid]],
|
||||
ushort sid [[simdgroup_index_in_threadgroup]],
|
||||
ushort w [[simdgroups_per_threadgroup]])
|
||||
{
|
||||
threadgroup ushort partial_score[32];
|
||||
const short2 left = short2(pos.x -1, pos.y);
|
||||
const short2 right = short2(pos.x +1, pos.y);
|
||||
|
||||
ushort value = tex2Dc<ushort>(mask, left) & tex2Dc<ushort>(mask, pos) & tex2Dc<ushort>(mask, right);
|
||||
partial_score[sid] = simd_sum(value);
|
||||
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
||||
if (sid == 0) {
|
||||
ushort block_score = 0;
|
||||
for (uchar i = 0; i < w; i++) {
|
||||
block_score += partial_score[i];
|
||||
}
|
||||
write_result(combed, p.block_threshold, block_score);
|
||||
}
|
||||
}
|
||||
|
||||
kernel void check_combing_mask_quad(
|
||||
texture2d<ushort, access::sample> mask [[texture(3)]],
|
||||
device atomic_int *combed [[buffer(0)]],
|
||||
constant params& p [[buffer(1)]],
|
||||
ushort2 pos [[thread_position_in_grid]],
|
||||
ushort qid [[quadgroup_index_in_threadgroup]],
|
||||
ushort w [[quadgroups_per_threadgroup]])
|
||||
{
|
||||
threadgroup ushort partial_score[256];
|
||||
const short2 left = short2(pos.x -1, pos.y);
|
||||
const short2 right = short2(pos.x +1, pos.y);
|
||||
|
||||
ushort value = tex2Dc<ushort>(mask, left) & tex2Dc<ushort>(mask, pos) & tex2Dc<ushort>(mask, right);
|
||||
partial_score[qid] = quad_sum(value);
|
||||
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
||||
if (qid == 0) {
|
||||
ushort block_score = 0;
|
||||
for (uchar i = 0; i < w; i++) {
|
||||
block_score += partial_score[i];
|
||||
}
|
||||
write_result(combed, p.block_threshold, block_score);
|
||||
}
|
||||
}
|
||||
|
||||
kernel void check_combing_mask(
|
||||
texture2d<ushort, access::sample> mask [[texture(3)]],
|
||||
device atomic_int *combed [[buffer(0)]],
|
||||
constant params& p [[buffer(1)]],
|
||||
ushort2 pos [[thread_position_in_grid]])
|
||||
{
|
||||
if (pos.x % p.block_width > 0 || pos.y % p.block_height > 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
ushort block_score = 0;
|
||||
|
||||
for (uchar x = 0; x < p.block_width; x++) {
|
||||
for (uchar y = 0; y < p.block_height; y++) {
|
||||
const ushort2 block_pos = ushort2(pos.x + x, pos.y + y);
|
||||
const short2 left = short2(pos.x -1 +x, pos.y +y);
|
||||
const short2 right = short2(pos.x +1 +x, pos.y +y);
|
||||
block_score += tex2Dc<ushort>(mask, left) & tex2Dc<ushort>(mask, block_pos) & tex2Dc<ushort>(mask, right);
|
||||
}
|
||||
}
|
||||
write_result(combed, p.block_threshold, block_score);
|
||||
}
|
||||
|
||||
kernel void dilate_mask(
|
||||
texture2d<ushort, access::sample> src [[texture(3)]],
|
||||
texture2d<ushort, access::write> dst [[texture(4)]],
|
||||
ushort2 pos [[thread_position_in_grid]])
|
||||
{
|
||||
if (tex2D<ushort>(src, pos)) {
|
||||
dst.write(1, pos);
|
||||
return;
|
||||
}
|
||||
|
||||
const short2 up = short2(pos.x, pos.y -1);
|
||||
const short2 up_left = short2(pos.x -1, pos.y -1);
|
||||
const short2 up_right = short2(pos.x +1, pos.y -1);
|
||||
const short2 down = short2(pos.x, pos.y +1);
|
||||
const short2 down_left = short2(pos.x -1, pos.y +1);
|
||||
const short2 down_right = short2(pos.x +1, pos.y +1);
|
||||
const short2 left = short2(pos.x -1, pos.y);
|
||||
const short2 right = short2(pos.x +1, pos.y);
|
||||
|
||||
constexpr uchar dilation_threshold = 4;
|
||||
const uchar count = tex2D<ushort>(src, up_left) + tex2D<ushort>(src, up) + tex2D<ushort>(src, up_right) +
|
||||
tex2D<ushort>(src, left) + tex2D<ushort>(src, right) +
|
||||
tex2D<ushort>(src, down_left) + tex2D<ushort>(src, down) + tex2D<ushort>(src, down_right);
|
||||
|
||||
dst.write(count >= dilation_threshold, pos);
|
||||
|
||||
}
|
||||
|
||||
kernel void erode_mask(
|
||||
texture2d<ushort, access::sample> src [[texture(4)]],
|
||||
texture2d<ushort, access::write> dst [[texture(3)]],
|
||||
ushort2 pos [[thread_position_in_grid]])
|
||||
{
|
||||
if (tex2D<ushort>(src, pos) == 0) {
|
||||
dst.write(0, pos);
|
||||
return;
|
||||
}
|
||||
|
||||
const short2 up = short2(pos.x, pos.y -1);
|
||||
const short2 up_left = short2(pos.x -1, pos.y -1);
|
||||
const short2 up_right = short2(pos.x +1, pos.y -1);
|
||||
const short2 down = short2(pos.x, pos.y +1);
|
||||
const short2 down_left = short2(pos.x -1, pos.y +1);
|
||||
const short2 down_right = short2(pos.x +1, pos.y +1);
|
||||
const short2 left = short2(pos.x -1, pos.y);
|
||||
const short2 right = short2(pos.x +1, pos.y);
|
||||
|
||||
constexpr uchar erosion_threshold = 2;
|
||||
const uchar count = tex2D<ushort>(src, up_left) + tex2D<ushort>(src, up) + tex2D<ushort>(src, up_right) +
|
||||
tex2D<ushort>(src, left) + tex2D<ushort>(src, right) +
|
||||
tex2D<ushort>(src, down_left) + tex2D<ushort>(src, down) + tex2D<ushort>(src, down_right);
|
||||
|
||||
dst.write(count >= erosion_threshold, pos);
|
||||
}
|
||||
|
||||
kernel void filter_classic(
|
||||
texture2d<ushort, access::sample> src [[texture(3)]],
|
||||
texture2d<ushort, access::write> dst [[texture(4)]],
|
||||
ushort2 pos [[thread_position_in_grid]])
|
||||
{
|
||||
const short2 left = short2(pos.x -1, pos.y);
|
||||
const short2 right = short2(pos.x +1, pos.y);
|
||||
|
||||
const uchar h_count = tex2D<ushort>(src, left) & tex2D<ushort>(src, pos) & tex2D<ushort>(src, right);
|
||||
|
||||
dst.write(h_count, pos);
|
||||
}
|
||||
|
||||
kernel void filter_erode_dilate(
|
||||
texture2d<ushort, access::sample> src [[texture(3)]],
|
||||
texture2d<ushort, access::write> dst [[texture(4)]],
|
||||
ushort2 pos [[thread_position_in_grid]])
|
||||
{
|
||||
const short2 up = short2(pos.x, pos.y -1);
|
||||
const short2 down = short2(pos.x, pos.y +1);
|
||||
const short2 left = short2(pos.x -1, pos.y);
|
||||
const short2 right = short2(pos.x +1, pos.y);
|
||||
|
||||
const uchar h_count = tex2D<ushort>(src, left) & tex2D<ushort>(src, pos) & tex2D<ushort>(src, right);
|
||||
const uchar v_count = tex2D<ushort>(src, up) & tex2D<ushort>(src, pos) & tex2D<ushort>(src, down);
|
||||
|
||||
dst.write(h_count & v_count, pos);
|
||||
}
|
||||
|
||||
kernel void comb_detect(
|
||||
texture2d<float, access::sample> prev [[texture(0)]],
|
||||
texture2d<float, access::sample> cur [[texture(1)]],
|
||||
texture2d<float, access::sample> next [[texture(2)]],
|
||||
texture2d<ushort, access::write> mask [[texture(3)]],
|
||||
constant params& p [[buffer(1)]],
|
||||
ushort2 pos [[thread_position_in_grid]])
|
||||
{
|
||||
detect_combed_segment<float>(prev, cur, next, mask, p, pos);
|
||||
}
|
||||
|
||||
kernel void comb_detect_gamma(
|
||||
texture2d<float, access::sample> prev [[texture(0)]],
|
||||
texture2d<float, access::sample> cur [[texture(1)]],
|
||||
texture2d<float, access::sample> next [[texture(2)]],
|
||||
texture2d<ushort, access::write> mask [[texture(3)]],
|
||||
constant params& p [[buffer(1)]],
|
||||
ushort2 pos [[thread_position_in_grid]])
|
||||
{
|
||||
detect_gamma_combed_segment<float>(prev, cur, next, mask, p, pos);
|
||||
}
|
||||
|
@ -403,6 +403,8 @@ int hb_vt_are_filters_supported(hb_list_t *filters)
|
||||
switch (filter->id)
|
||||
{
|
||||
case HB_FILTER_PRE_VT:
|
||||
case HB_FILTER_COMB_DETECT:
|
||||
case HB_FILTER_COMB_DETECT_VT:
|
||||
case HB_FILTER_YADIF:
|
||||
case HB_FILTER_YADIF_VT:
|
||||
case HB_FILTER_BWDIF:
|
||||
@ -498,6 +500,7 @@ void hb_vt_setup_hw_filters(hb_job_t *job)
|
||||
hb_filter_object_t *filter = hb_filter_init(HB_FILTER_PRE_VT);
|
||||
hb_add_filter(job, filter, NULL);
|
||||
|
||||
replace_filter(job, HB_FILTER_COMB_DETECT, HB_FILTER_COMB_DETECT_VT);
|
||||
replace_filter(job, HB_FILTER_YADIF, HB_FILTER_YADIF_VT);
|
||||
replace_filter(job, HB_FILTER_BWDIF, HB_FILTER_BWDIF_VT);
|
||||
replace_filter(job, HB_FILTER_CROP_SCALE, HB_FILTER_CROP_SCALE_VT);
|
||||
|
@ -16,6 +16,7 @@ namespace HandBrake.Interop.Interop.HbLib
|
||||
// First, filters that may change the framerate (drop or dup frames)
|
||||
HB_FILTER_DETELECINE,
|
||||
HB_FILTER_COMB_DETECT,
|
||||
HB_FILTER_COMB_DETECT_VT,
|
||||
HB_FILTER_DECOMB,
|
||||
HB_FILTER_YADIF,
|
||||
HB_FILTER_YADIF_VT,
|
||||
|
Loading…
x
Reference in New Issue
Block a user