libwebp: Update to 1.5.0
This commit is contained in:
parent
1f787b63a5
commit
01f88ff138
2
thirdparty/README.md
vendored
2
thirdparty/README.md
vendored
@ -582,7 +582,7 @@ Files extracted from upstream source:
|
||||
## libwebp
|
||||
|
||||
- Upstream: https://chromium.googlesource.com/webm/libwebp/
|
||||
- Version: 1.4.0 (845d5476a866141ba35ac133f856fa62f0b7445f, 2024)
|
||||
- Version: 1.5.0 (a4d7a715337ded4451fec90ff8ce79728e04126c, 2024)
|
||||
- License: BSD-3-Clause
|
||||
|
||||
Files extracted from upstream source:
|
||||
|
3
thirdparty/libwebp/AUTHORS
vendored
3
thirdparty/libwebp/AUTHORS
vendored
@ -11,11 +11,13 @@ Contributors:
|
||||
- Christopher Degawa (ccom at randomderp dot com)
|
||||
- Clement Courbet (courbet at google dot com)
|
||||
- Djordje Pesut (djordje dot pesut at imgtec dot com)
|
||||
- Frank (1433351828 at qq dot com)
|
||||
- Frank Barchard (fbarchard at google dot com)
|
||||
- Hui Su (huisu at google dot com)
|
||||
- H. Vetinari (h dot vetinari at gmx dot com)
|
||||
- Ilya Kurdyukov (jpegqs at gmail dot com)
|
||||
- Ingvar Stepanyan (rreverser at google dot com)
|
||||
- Istvan Stefan (Istvan dot Stefan at arm dot com)
|
||||
- James Zern (jzern at google dot com)
|
||||
- Jan Engelhardt (jengelh at medozas dot de)
|
||||
- Jehan (jehan at girinstud dot io)
|
||||
@ -62,6 +64,7 @@ Contributors:
|
||||
- Vincent Rabaud (vrabaud at google dot com)
|
||||
- Vlad Tsyrklevich (vtsyrklevich at chromium dot org)
|
||||
- Wan-Teh Chang (wtc at google dot com)
|
||||
- wrv (wrv at utexas dot edu)
|
||||
- Yang Zhang (yang dot zhang at arm dot com)
|
||||
- Yannis Guyon (yguyon at google dot com)
|
||||
- Zhi An Ng (zhin at chromium dot org)
|
||||
|
9
thirdparty/libwebp/sharpyuv/sharpyuv.c
vendored
9
thirdparty/libwebp/sharpyuv/sharpyuv.c
vendored
@ -565,10 +565,11 @@ int SharpYuvConvertWithOptions(const void* r_ptr, const void* g_ptr,
|
||||
scaled_matrix.rgb_to_u[3] = Shift(yuv_matrix->rgb_to_u[3], sfix);
|
||||
scaled_matrix.rgb_to_v[3] = Shift(yuv_matrix->rgb_to_v[3], sfix);
|
||||
|
||||
return DoSharpArgbToYuv(r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride,
|
||||
rgb_bit_depth, y_ptr, y_stride, u_ptr, u_stride,
|
||||
v_ptr, v_stride, yuv_bit_depth, width, height,
|
||||
&scaled_matrix, transfer_type);
|
||||
return DoSharpArgbToYuv(
|
||||
(const uint8_t*)r_ptr, (const uint8_t*)g_ptr, (const uint8_t*)b_ptr,
|
||||
rgb_step, rgb_stride, rgb_bit_depth, (uint8_t*)y_ptr, y_stride,
|
||||
(uint8_t*)u_ptr, u_stride, (uint8_t*)v_ptr, v_stride, yuv_bit_depth,
|
||||
width, height, &scaled_matrix, transfer_type);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
19
thirdparty/libwebp/sharpyuv/sharpyuv.h
vendored
19
thirdparty/libwebp/sharpyuv/sharpyuv.h
vendored
@ -52,7 +52,7 @@ extern "C" {
|
||||
// SharpYUV API version following the convention from semver.org
|
||||
#define SHARPYUV_VERSION_MAJOR 0
|
||||
#define SHARPYUV_VERSION_MINOR 4
|
||||
#define SHARPYUV_VERSION_PATCH 0
|
||||
#define SHARPYUV_VERSION_PATCH 1
|
||||
// Version as a uint32_t. The major number is the high 8 bits.
|
||||
// The minor number is the middle 8 bits. The patch number is the low 16 bits.
|
||||
#define SHARPYUV_MAKE_VERSION(MAJOR, MINOR, PATCH) \
|
||||
@ -66,10 +66,17 @@ extern "C" {
|
||||
SHARPYUV_EXTERN int SharpYuvGetVersion(void);
|
||||
|
||||
// RGB to YUV conversion matrix, in 16 bit fixed point.
|
||||
// y = rgb_to_y[0] * r + rgb_to_y[1] * g + rgb_to_y[2] * b + rgb_to_y[3]
|
||||
// u = rgb_to_u[0] * r + rgb_to_u[1] * g + rgb_to_u[2] * b + rgb_to_u[3]
|
||||
// v = rgb_to_v[0] * r + rgb_to_v[1] * g + rgb_to_v[2] * b + rgb_to_v[3]
|
||||
// Then y, u and v values are divided by 1<<16 and rounded.
|
||||
// y_ = rgb_to_y[0] * r + rgb_to_y[1] * g + rgb_to_y[2] * b + rgb_to_y[3]
|
||||
// u_ = rgb_to_u[0] * r + rgb_to_u[1] * g + rgb_to_u[2] * b + rgb_to_u[3]
|
||||
// v_ = rgb_to_v[0] * r + rgb_to_v[1] * g + rgb_to_v[2] * b + rgb_to_v[3]
|
||||
// Then the values are divided by 1<<16 and rounded.
|
||||
// y = (y_ + (1 << 15)) >> 16
|
||||
// u = (u_ + (1 << 15)) >> 16
|
||||
// v = (v_ + (1 << 15)) >> 16
|
||||
//
|
||||
// Typically, the offset values rgb_to_y[3], rgb_to_u[3] and rgb_to_v[3] depend
|
||||
// on the input's bit depth, e.g., rgb_to_u[3] = 1 << (rgb_bit_depth - 1 + 16).
|
||||
// See also sharpyuv_csp.h to get a predefined matrix or generate a matrix.
|
||||
typedef struct {
|
||||
int rgb_to_y[4];
|
||||
int rgb_to_u[4];
|
||||
@ -127,6 +134,8 @@ typedef enum SharpYuvTransferFunctionType {
|
||||
// adjacent pixels on the y, u and v channels. If yuv_bit_depth > 8, they
|
||||
// should be multiples of 2.
|
||||
// width, height: width and height of the image in pixels
|
||||
// yuv_matrix: RGB to YUV conversion matrix. The matrix values typically
|
||||
// depend on the input's rgb_bit_depth.
|
||||
// This function calls SharpYuvConvertWithOptions with a default transfer
|
||||
// function of kSharpYuvTransferFunctionSrgb.
|
||||
SHARPYUV_EXTERN int SharpYuvConvert(const void* r_ptr, const void* g_ptr,
|
||||
|
22
thirdparty/libwebp/sharpyuv/sharpyuv_csp.c
vendored
22
thirdparty/libwebp/sharpyuv/sharpyuv_csp.c
vendored
@ -22,16 +22,16 @@ void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
|
||||
const float kr = yuv_color_space->kr;
|
||||
const float kb = yuv_color_space->kb;
|
||||
const float kg = 1.0f - kr - kb;
|
||||
const float cr = 0.5f / (1.0f - kb);
|
||||
const float cb = 0.5f / (1.0f - kr);
|
||||
const float cb = 0.5f / (1.0f - kb);
|
||||
const float cr = 0.5f / (1.0f - kr);
|
||||
|
||||
const int shift = yuv_color_space->bit_depth - 8;
|
||||
|
||||
const float denom = (float)((1 << yuv_color_space->bit_depth) - 1);
|
||||
float scale_y = 1.0f;
|
||||
float add_y = 0.0f;
|
||||
float scale_u = cr;
|
||||
float scale_v = cb;
|
||||
float scale_u = cb;
|
||||
float scale_v = cr;
|
||||
float add_uv = (float)(128 << shift);
|
||||
assert(yuv_color_space->bit_depth >= 8);
|
||||
|
||||
@ -59,31 +59,35 @@ void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
|
||||
}
|
||||
|
||||
// Matrices are in YUV_FIX fixed point precision.
|
||||
// WebP's matrix, similar but not identical to kRec601LimitedMatrix.
|
||||
// WebP's matrix, similar but not identical to kRec601LimitedMatrix
|
||||
// Derived using the following formulas:
|
||||
// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
|
||||
// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
|
||||
// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
|
||||
static const SharpYuvConversionMatrix kWebpMatrix = {
|
||||
{16839, 33059, 6420, 16 << 16},
|
||||
{-9719, -19081, 28800, 128 << 16},
|
||||
{28800, -24116, -4684, 128 << 16},
|
||||
};
|
||||
// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeLimited
|
||||
// Kr=0.2990f Kb=0.1140f bit_depth=8 range=kSharpYuvRangeLimited
|
||||
static const SharpYuvConversionMatrix kRec601LimitedMatrix = {
|
||||
{16829, 33039, 6416, 16 << 16},
|
||||
{-9714, -19071, 28784, 128 << 16},
|
||||
{28784, -24103, -4681, 128 << 16},
|
||||
};
|
||||
// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeFull
|
||||
// Kr=0.2990f Kb=0.1140f bit_depth=8 range=kSharpYuvRangeFull
|
||||
static const SharpYuvConversionMatrix kRec601FullMatrix = {
|
||||
{19595, 38470, 7471, 0},
|
||||
{-11058, -21710, 32768, 128 << 16},
|
||||
{32768, -27439, -5329, 128 << 16},
|
||||
};
|
||||
// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeLimited
|
||||
// Kr=0.2126f Kb=0.0722f bit_depth=8 range=kSharpYuvRangeLimited
|
||||
static const SharpYuvConversionMatrix kRec709LimitedMatrix = {
|
||||
{11966, 40254, 4064, 16 << 16},
|
||||
{-6596, -22189, 28784, 128 << 16},
|
||||
{28784, -26145, -2639, 128 << 16},
|
||||
};
|
||||
// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeFull
|
||||
// Kr=0.2126f Kb=0.0722f bit_depth=8 range=kSharpYuvRangeFull
|
||||
static const SharpYuvConversionMatrix kRec709FullMatrix = {
|
||||
{13933, 46871, 4732, 0},
|
||||
{-7509, -25259, 32768, 128 << 16},
|
||||
|
5
thirdparty/libwebp/sharpyuv/sharpyuv_csp.h
vendored
5
thirdparty/libwebp/sharpyuv/sharpyuv_csp.h
vendored
@ -41,10 +41,15 @@ SHARPYUV_EXTERN void SharpYuvComputeConversionMatrix(
|
||||
|
||||
// Enums for precomputed conversion matrices.
|
||||
typedef enum {
|
||||
// WebP's matrix, similar but not identical to kSharpYuvMatrixRec601Limited
|
||||
kSharpYuvMatrixWebp = 0,
|
||||
// Kr=0.2990f Kb=0.1140f bit_depth=8 range=kSharpYuvRangeLimited
|
||||
kSharpYuvMatrixRec601Limited,
|
||||
// Kr=0.2990f Kb=0.1140f bit_depth=8 range=kSharpYuvRangeFull
|
||||
kSharpYuvMatrixRec601Full,
|
||||
// Kr=0.2126f Kb=0.0722f bit_depth=8 range=kSharpYuvRangeLimited
|
||||
kSharpYuvMatrixRec709Limited,
|
||||
// Kr=0.2126f Kb=0.0722f bit_depth=8 range=kSharpYuvRangeFull
|
||||
kSharpYuvMatrixRec709Full,
|
||||
kSharpYuvMatrixNum
|
||||
} SharpYuvMatrixType;
|
||||
|
3
thirdparty/libwebp/src/dec/tree_dec.c
vendored
3
thirdparty/libwebp/src/dec/tree_dec.c
vendored
@ -16,7 +16,8 @@
|
||||
#include "src/utils/bit_reader_inl_utils.h"
|
||||
|
||||
#if !defined(USE_GENERIC_TREE)
|
||||
#if !defined(__arm__) && !defined(_M_ARM) && !WEBP_AARCH64
|
||||
#if !defined(__arm__) && !defined(_M_ARM) && !WEBP_AARCH64 && \
|
||||
!defined(__wasm__)
|
||||
// using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then.
|
||||
#define USE_GENERIC_TREE 1 // ALTERNATE_CODE
|
||||
#else
|
||||
|
2
thirdparty/libwebp/src/dec/vp8i_dec.h
vendored
2
thirdparty/libwebp/src/dec/vp8i_dec.h
vendored
@ -32,7 +32,7 @@ extern "C" {
|
||||
|
||||
// version numbers
|
||||
#define DEC_MAJ_VERSION 1
|
||||
#define DEC_MIN_VERSION 4
|
||||
#define DEC_MIN_VERSION 5
|
||||
#define DEC_REV_VERSION 0
|
||||
|
||||
// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
|
||||
|
13
thirdparty/libwebp/src/dec/vp8l_dec.c
vendored
13
thirdparty/libwebp/src/dec/vp8l_dec.c
vendored
@ -20,10 +20,9 @@
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/dsp/yuv.h"
|
||||
#include "src/utils/endian_inl_utils.h"
|
||||
#include "src/utils/huffman_utils.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/format_constants.h"
|
||||
|
||||
#define NUM_ARGB_CACHE_ROWS 16
|
||||
|
||||
@ -381,7 +380,8 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
|
||||
|
||||
if (allow_recursion && VP8LReadBits(br, 1)) {
|
||||
// use meta Huffman codes.
|
||||
const int huffman_precision = VP8LReadBits(br, 3) + 2;
|
||||
const int huffman_precision =
|
||||
MIN_HUFFMAN_BITS + VP8LReadBits(br, NUM_HUFFMAN_BITS);
|
||||
const int huffman_xsize = VP8LSubSampleSize(xsize, huffman_precision);
|
||||
const int huffman_ysize = VP8LSubSampleSize(ysize, huffman_precision);
|
||||
const int huffman_pixs = huffman_xsize * huffman_ysize;
|
||||
@ -1351,7 +1351,8 @@ static int ReadTransform(int* const xsize, int const* ysize,
|
||||
switch (type) {
|
||||
case PREDICTOR_TRANSFORM:
|
||||
case CROSS_COLOR_TRANSFORM:
|
||||
transform->bits_ = VP8LReadBits(br, 3) + 2;
|
||||
transform->bits_ =
|
||||
MIN_TRANSFORM_BITS + VP8LReadBits(br, NUM_TRANSFORM_BITS);
|
||||
ok = DecodeImageStream(VP8LSubSampleSize(transform->xsize_,
|
||||
transform->bits_),
|
||||
VP8LSubSampleSize(transform->ysize_,
|
||||
@ -1416,7 +1417,9 @@ VP8LDecoder* VP8LNew(void) {
|
||||
return dec;
|
||||
}
|
||||
|
||||
void VP8LClear(VP8LDecoder* const dec) {
|
||||
// Resets the decoder in its initial state, reclaiming memory.
|
||||
// Preserves the dec->status_ value.
|
||||
static void VP8LClear(VP8LDecoder* const dec) {
|
||||
int i;
|
||||
if (dec == NULL) return;
|
||||
ClearMetadata(&dec->hdr_);
|
||||
|
4
thirdparty/libwebp/src/dec/vp8li_dec.h
vendored
4
thirdparty/libwebp/src/dec/vp8li_dec.h
vendored
@ -121,10 +121,6 @@ WEBP_NODISCARD int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io);
|
||||
// this function. Returns false in case of error, with updated dec->status_.
|
||||
WEBP_NODISCARD int VP8LDecodeImage(VP8LDecoder* const dec);
|
||||
|
||||
// Resets the decoder in its initial state, reclaiming memory.
|
||||
// Preserves the dec->status_ value.
|
||||
void VP8LClear(VP8LDecoder* const dec);
|
||||
|
||||
// Clears and deallocate a lossless decoder instance.
|
||||
void VP8LDelete(VP8LDecoder* const dec);
|
||||
|
||||
|
2
thirdparty/libwebp/src/demux/demux.c
vendored
2
thirdparty/libwebp/src/demux/demux.c
vendored
@ -24,7 +24,7 @@
|
||||
#include "src/webp/format_constants.h"
|
||||
|
||||
#define DMUX_MAJ_VERSION 1
|
||||
#define DMUX_MIN_VERSION 4
|
||||
#define DMUX_MIN_VERSION 5
|
||||
#define DMUX_REV_VERSION 0
|
||||
|
||||
typedef struct {
|
||||
|
4
thirdparty/libwebp/src/dsp/cost.c
vendored
4
thirdparty/libwebp/src/dsp/cost.c
vendored
@ -354,8 +354,8 @@ static int GetResidualCost_C(int ctx0, const VP8Residual* const res) {
|
||||
return cost;
|
||||
}
|
||||
|
||||
static void SetResidualCoeffs_C(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
static void SetResidualCoeffs_C(const int16_t* WEBP_RESTRICT const coeffs,
|
||||
VP8Residual* WEBP_RESTRICT const res) {
|
||||
int n;
|
||||
res->last = -1;
|
||||
assert(res->first == 0 || coeffs[0] == 0);
|
||||
|
4
thirdparty/libwebp/src/dsp/cost_mips32.c
vendored
4
thirdparty/libwebp/src/dsp/cost_mips32.c
vendored
@ -96,8 +96,8 @@ static int GetResidualCost_MIPS32(int ctx0, const VP8Residual* const res) {
|
||||
return cost;
|
||||
}
|
||||
|
||||
static void SetResidualCoeffs_MIPS32(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
static void SetResidualCoeffs_MIPS32(const int16_t* WEBP_RESTRICT const coeffs,
|
||||
VP8Residual* WEBP_RESTRICT const res) {
|
||||
const int16_t* p_coeffs = (int16_t*)coeffs;
|
||||
int temp0, temp1, temp2, n, n1;
|
||||
assert(res->first == 0 || coeffs[0] == 0);
|
||||
|
4
thirdparty/libwebp/src/dsp/cost_neon.c
vendored
4
thirdparty/libwebp/src/dsp/cost_neon.c
vendored
@ -19,8 +19,8 @@
|
||||
static const uint8_t position[16] = { 1, 2, 3, 4, 5, 6, 7, 8,
|
||||
9, 10, 11, 12, 13, 14, 15, 16 };
|
||||
|
||||
static void SetResidualCoeffs_NEON(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
static void SetResidualCoeffs_NEON(const int16_t* WEBP_RESTRICT const coeffs,
|
||||
VP8Residual* WEBP_RESTRICT const res) {
|
||||
const int16x8_t minus_one = vdupq_n_s16(-1);
|
||||
const int16x8_t coeffs_0 = vld1q_s16(coeffs);
|
||||
const int16x8_t coeffs_1 = vld1q_s16(coeffs + 8);
|
||||
|
4
thirdparty/libwebp/src/dsp/cost_sse2.c
vendored
4
thirdparty/libwebp/src/dsp/cost_sse2.c
vendored
@ -22,8 +22,8 @@
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void SetResidualCoeffs_SSE2(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
static void SetResidualCoeffs_SSE2(const int16_t* WEBP_RESTRICT const coeffs,
|
||||
VP8Residual* WEBP_RESTRICT const res) {
|
||||
const __m128i c0 = _mm_loadu_si128((const __m128i*)(coeffs + 0));
|
||||
const __m128i c1 = _mm_loadu_si128((const __m128i*)(coeffs + 8));
|
||||
// Use SSE2 to compare 16 values with a single instruction.
|
||||
|
47
thirdparty/libwebp/src/dsp/dec.c
vendored
47
thirdparty/libwebp/src/dsp/dec.c
vendored
@ -38,7 +38,8 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
|
||||
} while (0)
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformOne_C(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformOne_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int C[4 * 4], *tmp;
|
||||
int i;
|
||||
tmp = C;
|
||||
@ -82,7 +83,8 @@ static void TransformOne_C(const int16_t* in, uint8_t* dst) {
|
||||
}
|
||||
|
||||
// Simplified transform when only in[0], in[1] and in[4] are non-zero
|
||||
static void TransformAC3_C(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformAC3_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int a = in[0] + 4;
|
||||
const int c4 = WEBP_TRANSFORM_AC3_MUL2(in[4]);
|
||||
const int d4 = WEBP_TRANSFORM_AC3_MUL1(in[4]);
|
||||
@ -95,7 +97,8 @@ static void TransformAC3_C(const int16_t* in, uint8_t* dst) {
|
||||
}
|
||||
#undef STORE2
|
||||
|
||||
static void TransformTwo_C(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void TransformTwo_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
TransformOne_C(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne_C(in + 16, dst + 4);
|
||||
@ -103,13 +106,15 @@ static void TransformTwo_C(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void TransformUV_C(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformUV_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
VP8Transform(in + 0 * 16, dst, 1);
|
||||
VP8Transform(in + 2 * 16, dst + 4 * BPS, 1);
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformDC_C(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformDC_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int DC = in[0] + 4;
|
||||
int i, j;
|
||||
for (j = 0; j < 4; ++j) {
|
||||
@ -120,7 +125,8 @@ static void TransformDC_C(const int16_t* in, uint8_t* dst) {
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void TransformDCUV_C(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformDCUV_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst);
|
||||
if (in[1 * 16]) VP8TransformDC(in + 1 * 16, dst + 4);
|
||||
if (in[2 * 16]) VP8TransformDC(in + 2 * 16, dst + 4 * BPS);
|
||||
@ -133,7 +139,8 @@ static void TransformDCUV_C(const int16_t* in, uint8_t* dst) {
|
||||
// Paragraph 14.3
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformWHT_C(const int16_t* in, int16_t* out) {
|
||||
static void TransformWHT_C(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
int tmp[16];
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
@ -161,7 +168,7 @@ static void TransformWHT_C(const int16_t* in, int16_t* out) {
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
|
||||
VP8WHT VP8TransformWHT;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Intra predictions
|
||||
@ -661,32 +668,32 @@ static void HFilter16i_C(uint8_t* p, int stride,
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26_C(u, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26_C(v, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static void HFilter8_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26_C(u, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26_C(v, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void VFilter8i_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8i_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24_C(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24_C(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static void HFilter8i_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8i_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24_C(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24_C(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
@ -694,8 +701,8 @@ static void HFilter8i_C(uint8_t* u, uint8_t* v, int stride,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void DitherCombine8x8_C(const uint8_t* dither, uint8_t* dst,
|
||||
int dst_stride) {
|
||||
static void DitherCombine8x8_C(const uint8_t* WEBP_RESTRICT dither,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride) {
|
||||
int i, j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
for (i = 0; i < 8; ++i) {
|
||||
@ -730,8 +737,8 @@ VP8SimpleFilterFunc VP8SimpleHFilter16;
|
||||
VP8SimpleFilterFunc VP8SimpleVFilter16i;
|
||||
VP8SimpleFilterFunc VP8SimpleHFilter16i;
|
||||
|
||||
void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst,
|
||||
int dst_stride);
|
||||
void (*VP8DitherCombine8x8)(const uint8_t* WEBP_RESTRICT dither,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride);
|
||||
|
||||
extern VP8CPUInfo VP8GetCPUInfo;
|
||||
extern void VP8DspInitSSE2(void);
|
||||
|
22
thirdparty/libwebp/src/dsp/dec_mips32.c
vendored
22
thirdparty/libwebp/src/dsp/dec_mips32.c
vendored
@ -133,26 +133,26 @@ static void HFilter16(uint8_t* p, int stride,
|
||||
}
|
||||
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void HFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
@ -215,7 +215,8 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
|
||||
}
|
||||
}
|
||||
|
||||
static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformOne(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
int temp10, temp11, temp12, temp13, temp14;
|
||||
@ -532,7 +533,8 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
);
|
||||
}
|
||||
|
||||
static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void TransformTwo(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
TransformOne(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne(in + 16, dst + 4);
|
||||
|
28
thirdparty/libwebp/src/dsp/dec_mips_dsp_r2.c
vendored
28
thirdparty/libwebp/src/dsp/dec_mips_dsp_r2.c
vendored
@ -21,7 +21,8 @@
|
||||
static const int kC1 = WEBP_TRANSFORM_AC3_C1;
|
||||
static const int kC2 = WEBP_TRANSFORM_AC3_C2;
|
||||
|
||||
static void TransformDC(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformDC(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10;
|
||||
|
||||
__asm__ volatile (
|
||||
@ -45,7 +46,8 @@ static void TransformDC(const int16_t* in, uint8_t* dst) {
|
||||
);
|
||||
}
|
||||
|
||||
static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformAC3(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int a = in[0] + 4;
|
||||
int c4 = WEBP_TRANSFORM_AC3_MUL2(in[4]);
|
||||
const int d4 = WEBP_TRANSFORM_AC3_MUL1(in[4]);
|
||||
@ -81,7 +83,8 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
);
|
||||
}
|
||||
|
||||
static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformOne(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
|
||||
int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
|
||||
|
||||
@ -148,7 +151,8 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
);
|
||||
}
|
||||
|
||||
static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void TransformTwo(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
TransformOne(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne(in + 16, dst + 4);
|
||||
@ -434,14 +438,14 @@ static void HFilter16(uint8_t* p, int stride,
|
||||
}
|
||||
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void HFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
@ -465,14 +469,14 @@ static void HFilter16i(uint8_t* p, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
29
thirdparty/libwebp/src/dsp/dec_msa.c
vendored
29
thirdparty/libwebp/src/dsp/dec_msa.c
vendored
@ -38,7 +38,8 @@
|
||||
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
|
||||
}
|
||||
|
||||
static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformOne(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
v8i16 input0, input1;
|
||||
v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
|
||||
v4i32 res0, res1, res2, res3;
|
||||
@ -65,14 +66,16 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
ST4x4_UB(res0, res0, 3, 2, 1, 0, dst, BPS);
|
||||
}
|
||||
|
||||
static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void TransformTwo(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
TransformOne(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne(in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void TransformWHT(const int16_t* in, int16_t* out) {
|
||||
static void TransformWHT(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
v8i16 input0, input1;
|
||||
const v8i16 mask0 = { 0, 1, 2, 3, 8, 9, 10, 11 };
|
||||
const v8i16 mask1 = { 4, 5, 6, 7, 12, 13, 14, 15 };
|
||||
@ -114,13 +117,15 @@ static void TransformWHT(const int16_t* in, int16_t* out) {
|
||||
out[240] = __msa_copy_s_h(out1, 7);
|
||||
}
|
||||
|
||||
static void TransformDC(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformDC(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int DC = (in[0] + 4) >> 3;
|
||||
const v8i16 tmp0 = __msa_fill_h(DC);
|
||||
ADDBLK_ST4x4_UB(tmp0, tmp0, tmp0, tmp0, dst, BPS);
|
||||
}
|
||||
|
||||
static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformAC3(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int a = in[0] + 4;
|
||||
const int c4 = WEBP_TRANSFORM_AC3_MUL2(in[4]);
|
||||
const int d4 = WEBP_TRANSFORM_AC3_MUL1(in[4]);
|
||||
@ -475,8 +480,8 @@ static void HFilter16i(uint8_t* src_y, int stride,
|
||||
}
|
||||
|
||||
// 8-pixels wide variants, for chroma filtering
|
||||
static void VFilter8(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
int b_limit_in, int limit_in, int thresh_in) {
|
||||
static void VFilter8(uint8_t* WEBP_RESTRICT src_u, uint8_t* WEBP_RESTRICT src_v,
|
||||
int stride, int b_limit_in, int limit_in, int thresh_in) {
|
||||
uint8_t* ptmp_src_u = src_u - 4 * stride;
|
||||
uint8_t* ptmp_src_v = src_v - 4 * stride;
|
||||
uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
|
||||
@ -520,8 +525,8 @@ static void VFilter8(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
SD(q2_d, ptmp_src_v);
|
||||
}
|
||||
|
||||
static void HFilter8(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
int b_limit_in, int limit_in, int thresh_in) {
|
||||
static void HFilter8(uint8_t* WEBP_RESTRICT src_u, uint8_t* WEBP_RESTRICT src_v,
|
||||
int stride, int b_limit_in, int limit_in, int thresh_in) {
|
||||
uint8_t* ptmp_src_u = src_u - 4;
|
||||
uint8_t* ptmp_src_v = src_v - 4;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0, mask, hev;
|
||||
@ -556,7 +561,8 @@ static void HFilter8(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
ST6x4_UB(tmp7, 0, tmp5, 4, ptmp_src_v, stride);
|
||||
}
|
||||
|
||||
static void VFilter8i(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
static void VFilter8i(uint8_t* WEBP_RESTRICT src_u,
|
||||
uint8_t* WEBP_RESTRICT src_v, int stride,
|
||||
int b_limit_in, int limit_in, int thresh_in) {
|
||||
uint64_t p1_d, p0_d, q0_d, q1_d;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0, mask, hev;
|
||||
@ -587,7 +593,8 @@ static void VFilter8i(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
SD4(q1_d, q0_d, p0_d, p1_d, src_v, -stride);
|
||||
}
|
||||
|
||||
static void HFilter8i(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
static void HFilter8i(uint8_t* WEBP_RESTRICT src_u,
|
||||
uint8_t* WEBP_RESTRICT src_v, int stride,
|
||||
int b_limit_in, int limit_in, int thresh_in) {
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0, mask, hev;
|
||||
v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
|
||||
|
80
thirdparty/libwebp/src/dsp/dec_neon.c
vendored
80
thirdparty/libwebp/src/dsp/dec_neon.c
vendored
@ -916,8 +916,8 @@ static void HFilter16i_NEON(uint8_t* p, int stride,
|
||||
#endif // !WORK_AROUND_GCC
|
||||
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
Load8x8x2_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
|
||||
{
|
||||
@ -932,7 +932,8 @@ static void VFilter8_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
Store8x2x2_NEON(oq1, oq2, u + 2 * stride, v + 2 * stride, stride);
|
||||
}
|
||||
}
|
||||
static void VFilter8i_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
static void VFilter8i_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
u += 4 * stride;
|
||||
@ -949,8 +950,8 @@ static void VFilter8i_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
}
|
||||
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
static void HFilter8_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
Load8x8x2T_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
|
||||
{
|
||||
@ -964,7 +965,8 @@ static void HFilter8_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
static void HFilter8i_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
static void HFilter8i_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
u += 4;
|
||||
@ -1041,7 +1043,8 @@ static WEBP_INLINE void TransformPass_NEON(int16x8x2_t* const rows) {
|
||||
Transpose8x2_NEON(E0, E1, rows);
|
||||
}
|
||||
|
||||
static void TransformOne_NEON(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformOne_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int16x8x2_t rows;
|
||||
INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
|
||||
TransformPass_NEON(&rows);
|
||||
@ -1051,7 +1054,8 @@ static void TransformOne_NEON(const int16_t* in, uint8_t* dst) {
|
||||
|
||||
#else
|
||||
|
||||
static void TransformOne_NEON(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformOne_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int kBPS = BPS;
|
||||
// kC1, kC2. Padded because vld1.16 loads 8 bytes
|
||||
const int16_t constants[4] = { kC1, kC2, 0, 0 };
|
||||
@ -1184,14 +1188,16 @@ static void TransformOne_NEON(const int16_t* in, uint8_t* dst) {
|
||||
|
||||
#endif // WEBP_USE_INTRINSICS
|
||||
|
||||
static void TransformTwo_NEON(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void TransformTwo_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
TransformOne_NEON(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne_NEON(in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void TransformDC_NEON(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformDC_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int16x8_t DC = vdupq_n_s16(in[0]);
|
||||
Add4x4_NEON(DC, DC, dst);
|
||||
}
|
||||
@ -1205,7 +1211,8 @@ static void TransformDC_NEON(const int16_t* in, uint8_t* dst) {
|
||||
*dst = vgetq_lane_s32(rows.val[3], col); (dst) += 16; \
|
||||
} while (0)
|
||||
|
||||
static void TransformWHT_NEON(const int16_t* in, int16_t* out) {
|
||||
static void TransformWHT_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
int32x4x4_t tmp;
|
||||
|
||||
{
|
||||
@ -1256,7 +1263,8 @@ static void TransformWHT_NEON(const int16_t* in, int16_t* out) {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void TransformAC3_NEON(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformAC3_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int16x4_t A = vld1_dup_s16(in);
|
||||
const int16x4_t c4 = vdup_n_s16(WEBP_TRANSFORM_AC3_MUL2(in[4]));
|
||||
const int16x4_t d4 = vdup_n_s16(WEBP_TRANSFORM_AC3_MUL1(in[4]));
|
||||
@ -1300,18 +1308,19 @@ static void DC4_NEON(uint8_t* dst) { // DC
|
||||
static WEBP_INLINE void TrueMotion_NEON(uint8_t* dst, int size) {
|
||||
const uint8x8_t TL = vld1_dup_u8(dst - BPS - 1); // top-left pixel 'A[-1]'
|
||||
const uint8x8_t T = vld1_u8(dst - BPS); // top row 'A[0..3]'
|
||||
const int16x8_t d = vreinterpretq_s16_u16(vsubl_u8(T, TL)); // A[c] - A[-1]
|
||||
const uint16x8_t d = vsubl_u8(T, TL); // A[c] - A[-1]
|
||||
int y;
|
||||
for (y = 0; y < size; y += 4) {
|
||||
// left edge
|
||||
const int16x8_t L0 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 0 * BPS - 1));
|
||||
const int16x8_t L1 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 1 * BPS - 1));
|
||||
const int16x8_t L2 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 2 * BPS - 1));
|
||||
const int16x8_t L3 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 3 * BPS - 1));
|
||||
const int16x8_t r0 = vaddq_s16(L0, d); // L[r] + A[c] - A[-1]
|
||||
const int16x8_t r1 = vaddq_s16(L1, d);
|
||||
const int16x8_t r2 = vaddq_s16(L2, d);
|
||||
const int16x8_t r3 = vaddq_s16(L3, d);
|
||||
const uint8x8_t L0 = vld1_dup_u8(dst + 0 * BPS - 1);
|
||||
const uint8x8_t L1 = vld1_dup_u8(dst + 1 * BPS - 1);
|
||||
const uint8x8_t L2 = vld1_dup_u8(dst + 2 * BPS - 1);
|
||||
const uint8x8_t L3 = vld1_dup_u8(dst + 3 * BPS - 1);
|
||||
// L[r] + A[c] - A[-1]
|
||||
const int16x8_t r0 = vreinterpretq_s16_u16(vaddw_u8(d, L0));
|
||||
const int16x8_t r1 = vreinterpretq_s16_u16(vaddw_u8(d, L1));
|
||||
const int16x8_t r2 = vreinterpretq_s16_u16(vaddw_u8(d, L2));
|
||||
const int16x8_t r3 = vreinterpretq_s16_u16(vaddw_u8(d, L3));
|
||||
// Saturate and store the result.
|
||||
const uint32x2_t r0_u32 = vreinterpret_u32_u8(vqmovun_s16(r0));
|
||||
const uint32x2_t r1_u32 = vreinterpret_u32_u8(vqmovun_s16(r1));
|
||||
@ -1572,23 +1581,24 @@ static void TM16_NEON(uint8_t* dst) {
|
||||
const uint8x8_t TL = vld1_dup_u8(dst - BPS - 1); // top-left pixel 'A[-1]'
|
||||
const uint8x16_t T = vld1q_u8(dst - BPS); // top row 'A[0..15]'
|
||||
// A[c] - A[-1]
|
||||
const int16x8_t d_lo = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(T), TL));
|
||||
const int16x8_t d_hi = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(T), TL));
|
||||
const uint16x8_t d_lo = vsubl_u8(vget_low_u8(T), TL);
|
||||
const uint16x8_t d_hi = vsubl_u8(vget_high_u8(T), TL);
|
||||
int y;
|
||||
for (y = 0; y < 16; y += 4) {
|
||||
// left edge
|
||||
const int16x8_t L0 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 0 * BPS - 1));
|
||||
const int16x8_t L1 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 1 * BPS - 1));
|
||||
const int16x8_t L2 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 2 * BPS - 1));
|
||||
const int16x8_t L3 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 3 * BPS - 1));
|
||||
const int16x8_t r0_lo = vaddq_s16(L0, d_lo); // L[r] + A[c] - A[-1]
|
||||
const int16x8_t r1_lo = vaddq_s16(L1, d_lo);
|
||||
const int16x8_t r2_lo = vaddq_s16(L2, d_lo);
|
||||
const int16x8_t r3_lo = vaddq_s16(L3, d_lo);
|
||||
const int16x8_t r0_hi = vaddq_s16(L0, d_hi);
|
||||
const int16x8_t r1_hi = vaddq_s16(L1, d_hi);
|
||||
const int16x8_t r2_hi = vaddq_s16(L2, d_hi);
|
||||
const int16x8_t r3_hi = vaddq_s16(L3, d_hi);
|
||||
const uint8x8_t L0 = vld1_dup_u8(dst + 0 * BPS - 1);
|
||||
const uint8x8_t L1 = vld1_dup_u8(dst + 1 * BPS - 1);
|
||||
const uint8x8_t L2 = vld1_dup_u8(dst + 2 * BPS - 1);
|
||||
const uint8x8_t L3 = vld1_dup_u8(dst + 3 * BPS - 1);
|
||||
// L[r] + A[c] - A[-1]
|
||||
const int16x8_t r0_lo = vreinterpretq_s16_u16(vaddw_u8(d_lo, L0));
|
||||
const int16x8_t r1_lo = vreinterpretq_s16_u16(vaddw_u8(d_lo, L1));
|
||||
const int16x8_t r2_lo = vreinterpretq_s16_u16(vaddw_u8(d_lo, L2));
|
||||
const int16x8_t r3_lo = vreinterpretq_s16_u16(vaddw_u8(d_lo, L3));
|
||||
const int16x8_t r0_hi = vreinterpretq_s16_u16(vaddw_u8(d_hi, L0));
|
||||
const int16x8_t r1_hi = vreinterpretq_s16_u16(vaddw_u8(d_hi, L1));
|
||||
const int16x8_t r2_hi = vreinterpretq_s16_u16(vaddw_u8(d_hi, L2));
|
||||
const int16x8_t r3_hi = vreinterpretq_s16_u16(vaddw_u8(d_hi, L3));
|
||||
// Saturate and store the result.
|
||||
const uint8x16_t row0 = vcombine_u8(vqmovun_s16(r0_lo), vqmovun_s16(r0_hi));
|
||||
const uint8x16_t row1 = vcombine_u8(vqmovun_s16(r1_lo), vqmovun_s16(r1_hi));
|
||||
|
20
thirdparty/libwebp/src/dsp/dec_sse2.c
vendored
20
thirdparty/libwebp/src/dsp/dec_sse2.c
vendored
@ -30,7 +30,8 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// Transforms (Paragraph 14.4)
|
||||
|
||||
static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void Transform_SSE2(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
// This implementation makes use of 16-bit fixed point versions of two
|
||||
// multiply constants:
|
||||
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
||||
@ -197,7 +198,8 @@ static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
|
||||
#if (USE_TRANSFORM_AC3 == 1)
|
||||
|
||||
static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
static void TransformAC3_SSE2(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i A = _mm_set1_epi16(in[0] + 4);
|
||||
const __m128i c4 = _mm_set1_epi16(WEBP_TRANSFORM_AC3_MUL2(in[4]));
|
||||
const __m128i d4 = _mm_set1_epi16(WEBP_TRANSFORM_AC3_MUL1(in[4]));
|
||||
@ -792,8 +794,8 @@ static void HFilter16i_SSE2(uint8_t* p, int stride,
|
||||
}
|
||||
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, p2, p1, p0, q0, q1, q2;
|
||||
|
||||
@ -817,8 +819,8 @@ static void VFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
STOREUV(q2, u, v, 2 * stride);
|
||||
}
|
||||
|
||||
static void HFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
|
||||
@ -837,7 +839,8 @@ static void HFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
Store16x4_SSE2(&q0, &q1, &q2, &q3, u, v, stride);
|
||||
}
|
||||
|
||||
static void VFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
static void VFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, t2, p1, p0, q0, q1;
|
||||
@ -863,7 +866,8 @@ static void VFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
STOREUV(q1, u, v, 1 * stride);
|
||||
}
|
||||
|
||||
static void HFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
static void HFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, t2, p1, p0, q0, q1;
|
||||
|
153
thirdparty/libwebp/src/dsp/dsp.h
vendored
153
thirdparty/libwebp/src/dsp/dsp.h
vendored
@ -60,53 +60,66 @@ extern "C" {
|
||||
// Transforms
|
||||
// VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms
|
||||
// will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4).
|
||||
typedef void (*VP8Idct)(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two);
|
||||
typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out);
|
||||
typedef void (*VP8WHT)(const int16_t* in, int16_t* out);
|
||||
typedef void (*VP8Idct)(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two);
|
||||
typedef void (*VP8Fdct)(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out);
|
||||
typedef void (*VP8WHT)(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out);
|
||||
extern VP8Idct VP8ITransform;
|
||||
extern VP8Fdct VP8FTransform;
|
||||
extern VP8Fdct VP8FTransform2; // performs two transforms at a time
|
||||
extern VP8WHT VP8FTransformWHT;
|
||||
// Predictions
|
||||
// *dst is the destination block. *top and *left can be NULL.
|
||||
typedef void (*VP8IntraPreds)(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top);
|
||||
typedef void (*VP8Intra4Preds)(uint8_t* dst, const uint8_t* top);
|
||||
typedef void (*VP8IntraPreds)(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top);
|
||||
typedef void (*VP8Intra4Preds)(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top);
|
||||
extern VP8Intra4Preds VP8EncPredLuma4;
|
||||
extern VP8IntraPreds VP8EncPredLuma16;
|
||||
extern VP8IntraPreds VP8EncPredChroma8;
|
||||
|
||||
typedef int (*VP8Metric)(const uint8_t* pix, const uint8_t* ref);
|
||||
typedef int (*VP8Metric)(const uint8_t* WEBP_RESTRICT pix,
|
||||
const uint8_t* WEBP_RESTRICT ref);
|
||||
extern VP8Metric VP8SSE16x16, VP8SSE16x8, VP8SSE8x8, VP8SSE4x4;
|
||||
typedef int (*VP8WMetric)(const uint8_t* pix, const uint8_t* ref,
|
||||
const uint16_t* const weights);
|
||||
typedef int (*VP8WMetric)(const uint8_t* WEBP_RESTRICT pix,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint16_t* WEBP_RESTRICT const weights);
|
||||
// The weights for VP8TDisto4x4 and VP8TDisto16x16 contain a row-major
|
||||
// 4 by 4 symmetric matrix.
|
||||
extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;
|
||||
|
||||
// Compute the average (DC) of four 4x4 blocks.
|
||||
// Each sub-4x4 block #i sum is stored in dc[i].
|
||||
typedef void (*VP8MeanMetric)(const uint8_t* ref, uint32_t dc[4]);
|
||||
typedef void (*VP8MeanMetric)(const uint8_t* WEBP_RESTRICT ref,
|
||||
uint32_t dc[4]);
|
||||
extern VP8MeanMetric VP8Mean16x4;
|
||||
|
||||
typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst);
|
||||
typedef void (*VP8BlockCopy)(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
extern VP8BlockCopy VP8Copy4x4;
|
||||
extern VP8BlockCopy VP8Copy16x8;
|
||||
// Quantization
|
||||
struct VP8Matrix; // forward declaration
|
||||
typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
|
||||
const struct VP8Matrix* const mtx);
|
||||
typedef int (*VP8QuantizeBlock)(
|
||||
int16_t in[16], int16_t out[16],
|
||||
const struct VP8Matrix* WEBP_RESTRICT const mtx);
|
||||
// Same as VP8QuantizeBlock, but quantizes two consecutive blocks.
|
||||
typedef int (*VP8Quantize2Blocks)(int16_t in[32], int16_t out[32],
|
||||
const struct VP8Matrix* const mtx);
|
||||
typedef int (*VP8Quantize2Blocks)(
|
||||
int16_t in[32], int16_t out[32],
|
||||
const struct VP8Matrix* WEBP_RESTRICT const mtx);
|
||||
|
||||
extern VP8QuantizeBlock VP8EncQuantizeBlock;
|
||||
extern VP8Quantize2Blocks VP8EncQuantize2Blocks;
|
||||
|
||||
// specific to 2nd transform:
|
||||
typedef int (*VP8QuantizeBlockWHT)(int16_t in[16], int16_t out[16],
|
||||
const struct VP8Matrix* const mtx);
|
||||
typedef int (*VP8QuantizeBlockWHT)(
|
||||
int16_t in[16], int16_t out[16],
|
||||
const struct VP8Matrix* WEBP_RESTRICT const mtx);
|
||||
extern VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
|
||||
|
||||
extern const int VP8DspScan[16 + 4 + 4];
|
||||
@ -118,9 +131,10 @@ typedef struct {
|
||||
int max_value;
|
||||
int last_non_zero;
|
||||
} VP8Histogram;
|
||||
typedef void (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
|
||||
typedef void (*VP8CHisto)(const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo);
|
||||
VP8Histogram* WEBP_RESTRICT const histo);
|
||||
extern VP8CHisto VP8CollectHistogram;
|
||||
// General-purpose util function to help VP8CollectHistogram().
|
||||
void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
|
||||
@ -138,8 +152,9 @@ extern const uint16_t VP8LevelFixedCosts[2047 /*MAX_LEVEL*/ + 1];
|
||||
extern const uint8_t VP8EncBands[16 + 1];
|
||||
|
||||
struct VP8Residual;
|
||||
typedef void (*VP8SetResidualCoeffsFunc)(const int16_t* const coeffs,
|
||||
struct VP8Residual* const res);
|
||||
typedef void (*VP8SetResidualCoeffsFunc)(
|
||||
const int16_t* WEBP_RESTRICT const coeffs,
|
||||
struct VP8Residual* WEBP_RESTRICT const res);
|
||||
extern VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
|
||||
|
||||
// Cost calculation function.
|
||||
@ -193,9 +208,11 @@ void VP8SSIMDspInit(void);
|
||||
//------------------------------------------------------------------------------
|
||||
// Decoding
|
||||
|
||||
typedef void (*VP8DecIdct)(const int16_t* coeffs, uint8_t* dst);
|
||||
typedef void (*VP8DecIdct)(const int16_t* WEBP_RESTRICT coeffs,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
// when doing two transforms, coeffs is actually int16_t[2][16].
|
||||
typedef void (*VP8DecIdct2)(const int16_t* coeffs, uint8_t* dst, int do_two);
|
||||
typedef void (*VP8DecIdct2)(const int16_t* WEBP_RESTRICT coeffs,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two);
|
||||
extern VP8DecIdct2 VP8Transform;
|
||||
extern VP8DecIdct VP8TransformAC3;
|
||||
extern VP8DecIdct VP8TransformUV;
|
||||
@ -233,7 +250,8 @@ extern VP8SimpleFilterFunc VP8SimpleHFilter16i;
|
||||
// regular filter (on both macroblock edges and inner edges)
|
||||
typedef void (*VP8LumaFilterFunc)(uint8_t* luma, int stride,
|
||||
int thresh, int ithresh, int hev_t);
|
||||
typedef void (*VP8ChromaFilterFunc)(uint8_t* u, uint8_t* v, int stride,
|
||||
typedef void (*VP8ChromaFilterFunc)(uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int stride,
|
||||
int thresh, int ithresh, int hev_t);
|
||||
// on outer edge
|
||||
extern VP8LumaFilterFunc VP8VFilter16;
|
||||
@ -253,8 +271,8 @@ extern VP8ChromaFilterFunc VP8HFilter8i;
|
||||
#define VP8_DITHER_DESCALE_ROUNDER (1 << (VP8_DITHER_DESCALE - 1))
|
||||
#define VP8_DITHER_AMP_BITS 7
|
||||
#define VP8_DITHER_AMP_CENTER (1 << VP8_DITHER_AMP_BITS)
|
||||
extern void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst,
|
||||
int dst_stride);
|
||||
extern void (*VP8DitherCombine8x8)(const uint8_t* WEBP_RESTRICT dither,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride);
|
||||
|
||||
// must be called before anything using the above
|
||||
void VP8DspInit(void);
|
||||
@ -267,10 +285,10 @@ void VP8DspInit(void);
|
||||
// Convert a pair of y/u/v lines together to the output rgb/a colorspace.
|
||||
// bottom_y can be NULL if only one line of output is needed (at top/bottom).
|
||||
typedef void (*WebPUpsampleLinePairFunc)(
|
||||
const uint8_t* top_y, const uint8_t* bottom_y,
|
||||
const uint8_t* top_u, const uint8_t* top_v,
|
||||
const uint8_t* cur_u, const uint8_t* cur_v,
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len);
|
||||
const uint8_t* WEBP_RESTRICT top_y, const uint8_t* WEBP_RESTRICT bottom_y,
|
||||
const uint8_t* WEBP_RESTRICT top_u, const uint8_t* WEBP_RESTRICT top_v,
|
||||
const uint8_t* WEBP_RESTRICT cur_u, const uint8_t* WEBP_RESTRICT cur_v,
|
||||
uint8_t* WEBP_RESTRICT top_dst, uint8_t* WEBP_RESTRICT bottom_dst, int len);
|
||||
|
||||
#ifdef FANCY_UPSAMPLING
|
||||
|
||||
@ -280,13 +298,15 @@ extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
|
||||
#endif // FANCY_UPSAMPLING
|
||||
|
||||
// Per-row point-sampling methods.
|
||||
typedef void (*WebPSamplerRowFunc)(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len);
|
||||
typedef void (*WebPSamplerRowFunc)(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len);
|
||||
// Generic function to apply 'WebPSamplerRowFunc' to the whole plane:
|
||||
void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
|
||||
const uint8_t* u, const uint8_t* v, int uv_stride,
|
||||
uint8_t* dst, int dst_stride,
|
||||
void WebPSamplerProcessPlane(const uint8_t* WEBP_RESTRICT y, int y_stride,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v, int uv_stride,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride,
|
||||
int width, int height, WebPSamplerRowFunc func);
|
||||
|
||||
// Sampling functions to convert rows of YUV to RGB(A)
|
||||
@ -298,9 +318,10 @@ extern WebPSamplerRowFunc WebPSamplers[/* MODE_LAST */];
|
||||
WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last);
|
||||
|
||||
// YUV444->RGB converters
|
||||
typedef void (*WebPYUV444Converter)(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len);
|
||||
typedef void (*WebPYUV444Converter)(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len);
|
||||
|
||||
extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
|
||||
|
||||
@ -316,26 +337,35 @@ void WebPInitYUV444Converters(void);
|
||||
// ARGB -> YUV converters
|
||||
|
||||
// Convert ARGB samples to luma Y.
|
||||
extern void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
|
||||
extern void (*WebPConvertARGBToY)(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
// Convert ARGB samples to U/V with downsampling. do_store should be '1' for
|
||||
// even lines and '0' for odd ones. 'src_width' is the original width, not
|
||||
// the U/V one.
|
||||
extern void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
extern void (*WebPConvertARGBToUV)(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v,
|
||||
int src_width, int do_store);
|
||||
|
||||
// Convert a row of accumulated (four-values) of rgba32 toward U/V
|
||||
extern void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width);
|
||||
extern void (*WebPConvertRGBA32ToUV)(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int width);
|
||||
|
||||
// Convert RGB or BGR to Y
|
||||
extern void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
|
||||
extern void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
|
||||
extern void (*WebPConvertRGB24ToY)(const uint8_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
extern void (*WebPConvertBGR24ToY)(const uint8_t* WEBP_RESTRICT bgr,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
|
||||
// used for plain-C fallback.
|
||||
extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
extern void WebPConvertARGBToUV_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v,
|
||||
int src_width, int do_store);
|
||||
extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width);
|
||||
extern void WebPConvertRGBA32ToUV_C(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int width);
|
||||
|
||||
// Must be called before using the above.
|
||||
void WebPInitConvertARGBToYUV(void);
|
||||
@ -348,8 +378,9 @@ struct WebPRescaler;
|
||||
// Import a row of data and save its contribution in the rescaler.
|
||||
// 'channel' denotes the channel number to be imported. 'Expand' corresponds to
|
||||
// the wrk->x_expand case. Otherwise, 'Shrink' is to be used.
|
||||
typedef void (*WebPRescalerImportRowFunc)(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
typedef void (*WebPRescalerImportRowFunc)(
|
||||
struct WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src);
|
||||
|
||||
extern WebPRescalerImportRowFunc WebPRescalerImportRowExpand;
|
||||
extern WebPRescalerImportRowFunc WebPRescalerImportRowShrink;
|
||||
@ -362,16 +393,19 @@ extern WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
|
||||
extern WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
|
||||
|
||||
// Plain-C implementation, as fall-back.
|
||||
extern void WebPRescalerImportRowExpand_C(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerImportRowShrink_C(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerImportRowExpand_C(
|
||||
struct WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src);
|
||||
extern void WebPRescalerImportRowShrink_C(
|
||||
struct WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src);
|
||||
extern void WebPRescalerExportRowExpand_C(struct WebPRescaler* const wrk);
|
||||
extern void WebPRescalerExportRowShrink_C(struct WebPRescaler* const wrk);
|
||||
|
||||
// Main entry calls:
|
||||
extern void WebPRescalerImportRow(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerImportRow(
|
||||
struct WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src);
|
||||
// Export one row (starting at x_out position) from rescaler.
|
||||
extern void WebPRescalerExportRow(struct WebPRescaler* const wrk);
|
||||
|
||||
@ -480,8 +514,9 @@ typedef enum { // Filter types.
|
||||
WEBP_FILTER_FAST
|
||||
} WEBP_FILTER_TYPE;
|
||||
|
||||
typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height,
|
||||
int stride, uint8_t* out);
|
||||
typedef void (*WebPFilterFunc)(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out);
|
||||
// In-place un-filtering.
|
||||
// Warning! 'prev_line' pointer can be equal to 'cur_line' or 'preds'.
|
||||
typedef void (*WebPUnfilterFunc)(const uint8_t* prev_line, const uint8_t* preds,
|
||||
|
143
thirdparty/libwebp/src/dsp/enc.c
vendored
143
thirdparty/libwebp/src/dsp/enc.c
vendored
@ -59,9 +59,10 @@ void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred,
|
||||
static void CollectHistogram_C(const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
VP8Histogram* WEBP_RESTRICT const histo) {
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
for (j = start_block; j < end_block; ++j) {
|
||||
@ -109,8 +110,9 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
|
||||
#define STORE(x, y, v) \
|
||||
dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
|
||||
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int C[4 * 4], *tmp;
|
||||
int i;
|
||||
tmp = C;
|
||||
@ -146,7 +148,9 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
}
|
||||
}
|
||||
|
||||
static void ITransform_C(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
static void ITransform_C(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst,
|
||||
int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
if (do_two) {
|
||||
@ -154,7 +158,9 @@ static void ITransform_C(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
static void FTransform_C(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
int tmp[16];
|
||||
for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
|
||||
@ -184,14 +190,16 @@ static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void FTransform2_C(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform2_C(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
VP8FTransform(src, ref, out);
|
||||
VP8FTransform(src + 4, ref + 4, out + 16);
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void FTransformWHT_C(const int16_t* in, int16_t* out) {
|
||||
static void FTransformWHT_C(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
// input is 12b signed
|
||||
int32_t tmp[16];
|
||||
int i;
|
||||
@ -234,8 +242,9 @@ static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VerticalPred(uint8_t* dst,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void VerticalPred(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top,
|
||||
int size) {
|
||||
int j;
|
||||
if (top != NULL) {
|
||||
for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size);
|
||||
@ -244,8 +253,9 @@ static WEBP_INLINE void VerticalPred(uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HorizontalPred(uint8_t* dst,
|
||||
const uint8_t* left, int size) {
|
||||
static WEBP_INLINE void HorizontalPred(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
int size) {
|
||||
if (left != NULL) {
|
||||
int j;
|
||||
for (j = 0; j < size; ++j) {
|
||||
@ -256,8 +266,9 @@ static WEBP_INLINE void HorizontalPred(uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void TrueMotion(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top, int size) {
|
||||
int y;
|
||||
if (left != NULL) {
|
||||
if (top != NULL) {
|
||||
@ -286,8 +297,9 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top,
|
||||
static WEBP_INLINE void DCMode(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top,
|
||||
int size, int round, int shift) {
|
||||
int DC = 0;
|
||||
int j;
|
||||
@ -312,8 +324,9 @@ static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma 8x8 prediction (paragraph 12.2)
|
||||
|
||||
static void IntraChromaPreds_C(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void IntraChromaPreds_C(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
// U block
|
||||
DCMode(C8DC8 + dst, left, top, 8, 8, 4);
|
||||
VerticalPred(C8VE8 + dst, top, 8);
|
||||
@ -332,22 +345,28 @@ static void IntraChromaPreds_C(uint8_t* dst, const uint8_t* left,
|
||||
//------------------------------------------------------------------------------
|
||||
// luma 16x16 prediction (paragraph 12.3)
|
||||
|
||||
static void Intra16Preds_C(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64
|
||||
static void Intra16Preds_C(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DCMode(I16DC16 + dst, left, top, 16, 16, 5);
|
||||
VerticalPred(I16VE16 + dst, top, 16);
|
||||
HorizontalPred(I16HE16 + dst, left, 16);
|
||||
TrueMotion(I16TM16 + dst, left, top, 16);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// luma 4x4 prediction
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64 || BPS != 32
|
||||
|
||||
#define DST(x, y) dst[(x) + (y) * BPS]
|
||||
#define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
|
||||
#define AVG2(a, b) (((a) + (b) + 1) >> 1)
|
||||
|
||||
static void VE4(uint8_t* dst, const uint8_t* top) { // vertical
|
||||
// vertical
|
||||
static void VE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const uint8_t vals[4] = {
|
||||
AVG3(top[-1], top[0], top[1]),
|
||||
AVG3(top[ 0], top[1], top[2]),
|
||||
@ -360,7 +379,8 @@ static void VE4(uint8_t* dst, const uint8_t* top) { // vertical
|
||||
}
|
||||
}
|
||||
|
||||
static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
|
||||
// horizontal
|
||||
static void HE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -372,14 +392,14 @@ static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
|
||||
WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L));
|
||||
}
|
||||
|
||||
static void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
static void DC4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
uint32_t dc = 4;
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
|
||||
Fill(dst, dc >> 3, 4);
|
||||
}
|
||||
|
||||
static void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
static void RD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -398,7 +418,7 @@ static void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(3, 0) = AVG3(D, C, B);
|
||||
}
|
||||
|
||||
static void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
static void LD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int A = top[0];
|
||||
const int B = top[1];
|
||||
const int C = top[2];
|
||||
@ -416,7 +436,7 @@ static void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(3, 3) = AVG3(G, H, H);
|
||||
}
|
||||
|
||||
static void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
static void VR4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -438,7 +458,7 @@ static void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(3, 1) = AVG3(B, C, D);
|
||||
}
|
||||
|
||||
static void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
static void VL4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int A = top[0];
|
||||
const int B = top[1];
|
||||
const int C = top[2];
|
||||
@ -460,7 +480,7 @@ static void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(3, 3) = AVG3(F, G, H);
|
||||
}
|
||||
|
||||
static void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
static void HU4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
const int K = top[-4];
|
||||
@ -475,7 +495,7 @@ static void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
|
||||
}
|
||||
|
||||
static void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -498,7 +518,7 @@ static void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(1, 3) = AVG3(L, K, J);
|
||||
}
|
||||
|
||||
static void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
static void TM4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int x, y;
|
||||
const uint8_t* const clip = clip1 + 255 - top[-1];
|
||||
for (y = 0; y < 4; ++y) {
|
||||
@ -516,7 +536,8 @@ static void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
|
||||
// Left samples are top[-5 .. -2], top_left is top[-1], top are
|
||||
// located at top[0..3], and top right is top[4..7]
|
||||
static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) {
|
||||
static void Intra4Preds_C(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DC4(I4DC4 + dst, top);
|
||||
TM4(I4TM4 + dst, top);
|
||||
VE4(I4VE4 + dst, top);
|
||||
@ -529,11 +550,14 @@ static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) {
|
||||
HU4(I4HU4 + dst, top);
|
||||
}
|
||||
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64 || BPS != 32
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Metric
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
|
||||
static WEBP_INLINE int GetSSE(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b,
|
||||
int w, int h) {
|
||||
int count = 0;
|
||||
int y, x;
|
||||
@ -548,21 +572,25 @@ static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE16x16_C(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_C(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
return GetSSE(a, b, 16, 16);
|
||||
}
|
||||
static int SSE16x8_C(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_C(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
return GetSSE(a, b, 16, 8);
|
||||
}
|
||||
static int SSE8x8_C(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_C(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
return GetSSE(a, b, 8, 8);
|
||||
}
|
||||
static int SSE4x4_C(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_C(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
return GetSSE(a, b, 4, 4);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
|
||||
static void Mean16x4_C(const uint8_t* WEBP_RESTRICT ref, uint32_t dc[4]) {
|
||||
int k, x, y;
|
||||
for (k = 0; k < 4; ++k) {
|
||||
uint32_t avg = 0;
|
||||
@ -586,7 +614,8 @@ static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// w[] contains a row-major 4 by 4 symmetric matrix.
|
||||
static int TTransform(const uint8_t* in, const uint16_t* w) {
|
||||
static int TTransform(const uint8_t* WEBP_RESTRICT in,
|
||||
const uint16_t* WEBP_RESTRICT w) {
|
||||
int sum = 0;
|
||||
int tmp[16];
|
||||
int i;
|
||||
@ -620,15 +649,17 @@ static int TTransform(const uint8_t* in, const uint16_t* w) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int Disto4x4_C(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_C(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
const int sum1 = TTransform(a, w);
|
||||
const int sum2 = TTransform(b, w);
|
||||
return abs(sum2 - sum1) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_C(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_C(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -644,13 +675,14 @@ static int Disto16x16_C(const uint8_t* const a, const uint8_t* const b,
|
||||
// Quantization
|
||||
//
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static const uint8_t kZigzag[16] = {
|
||||
0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
|
||||
};
|
||||
|
||||
// Simple quantization
|
||||
static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int last = -1;
|
||||
int n;
|
||||
for (n = 0; n < 16; ++n) {
|
||||
@ -675,9 +707,8 @@ static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
|
||||
return (last >= 0);
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
@ -688,7 +719,8 @@ static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
|
||||
//------------------------------------------------------------------------------
|
||||
// Block copy
|
||||
|
||||
static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) {
|
||||
static WEBP_INLINE void Copy(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst, int w, int h) {
|
||||
int y;
|
||||
for (y = 0; y < h; ++y) {
|
||||
memcpy(dst, src, w);
|
||||
@ -697,11 +729,13 @@ static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) {
|
||||
}
|
||||
}
|
||||
|
||||
static void Copy4x4_C(const uint8_t* src, uint8_t* dst) {
|
||||
static void Copy4x4_C(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
Copy(src, dst, 4, 4);
|
||||
}
|
||||
|
||||
static void Copy16x8_C(const uint8_t* src, uint8_t* dst) {
|
||||
static void Copy16x8_C(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
Copy(src, dst, 16, 8);
|
||||
}
|
||||
|
||||
@ -760,14 +794,19 @@ WEBP_DSP_INIT_FUNC(VP8EncDspInit) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
VP8EncQuantizeBlock = QuantizeBlock_C;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks_C;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlock_C;
|
||||
#endif
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64 || BPS != 32
|
||||
VP8EncPredLuma4 = Intra4Preds_C;
|
||||
#endif
|
||||
#if !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64
|
||||
VP8EncPredLuma16 = Intra16Preds_C;
|
||||
#endif
|
||||
|
||||
VP8FTransform2 = FTransform2_C;
|
||||
VP8EncPredLuma4 = Intra4Preds_C;
|
||||
VP8EncPredLuma16 = Intra16Preds_C;
|
||||
VP8EncPredChroma8 = IntraChromaPreds_C;
|
||||
VP8Mean16x4 = Mean16x4_C;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlock_C;
|
||||
VP8Copy4x4 = Copy4x4_C;
|
||||
VP8Copy16x8 = Copy16x8_C;
|
||||
|
||||
|
40
thirdparty/libwebp/src/dsp/enc_mips32.c
vendored
40
thirdparty/libwebp/src/dsp/enc_mips32.c
vendored
@ -109,9 +109,9 @@ static const int kC2 = WEBP_TRANSFORM_AC3_C2;
|
||||
"sb %[" #TEMP12 "], 3+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"
|
||||
|
||||
// Does one or two inverse transforms.
|
||||
static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* ref,
|
||||
const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
|
||||
int temp7, temp8, temp9, temp10, temp11, temp12, temp13;
|
||||
int temp14, temp15, temp16, temp17, temp18, temp19, temp20;
|
||||
@ -141,8 +141,9 @@ static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* ref,
|
||||
);
|
||||
}
|
||||
|
||||
static void ITransform_MIPS32(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst, int do_two) {
|
||||
static void ITransform_MIPS32(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
ITransformOne_MIPS32(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne_MIPS32(ref + 4, in + 16, dst + 4);
|
||||
@ -236,7 +237,7 @@ static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
nz = QuantizeBlock_MIPS32(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock_MIPS32(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
@ -358,8 +359,9 @@ static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
|
||||
"msub %[temp6], %[temp0] \n\t" \
|
||||
"msub %[temp7], %[temp1] \n\t"
|
||||
|
||||
static int Disto4x4_MIPS32(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_MIPS32(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int tmp[32];
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
|
||||
|
||||
@ -393,8 +395,9 @@ static int Disto4x4_MIPS32(const uint8_t* const a, const uint8_t* const b,
|
||||
#undef VERTICAL_PASS
|
||||
#undef HORIZONTAL_PASS
|
||||
|
||||
static int Disto16x16_MIPS32(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_MIPS32(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -475,8 +478,9 @@ static int Disto16x16_MIPS32(const uint8_t* const a, const uint8_t* const b,
|
||||
"sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
|
||||
"sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
|
||||
|
||||
static void FTransform_MIPS32(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform_MIPS32(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
|
||||
int temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
|
||||
int temp17, temp18, temp19, temp20;
|
||||
@ -537,7 +541,8 @@ static void FTransform_MIPS32(const uint8_t* src, const uint8_t* ref,
|
||||
GET_SSE_INNER(C, C + 1, C + 2, C + 3) \
|
||||
GET_SSE_INNER(D, D + 1, D + 2, D + 3)
|
||||
|
||||
static int SSE16x16_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_MIPS32(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
@ -571,7 +576,8 @@ static int SSE16x16_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE16x8_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_MIPS32(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
@ -597,7 +603,8 @@ static int SSE16x8_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE8x8_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_MIPS32(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
@ -619,7 +626,8 @@ static int SSE8x8_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE4x4_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_MIPS32(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
|
101
thirdparty/libwebp/src/dsp/enc_mips_dsp_r2.c
vendored
101
thirdparty/libwebp/src/dsp/enc_mips_dsp_r2.c
vendored
@ -141,8 +141,9 @@ static const int kC2 = WEBP_TRANSFORM_AC3_C2;
|
||||
"sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
|
||||
"sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
|
||||
|
||||
static void FTransform_MIPSdspR2(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
const int c2217 = 2217;
|
||||
const int c5352 = 5352;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
|
||||
@ -171,8 +172,9 @@ static void FTransform_MIPSdspR2(const uint8_t* src, const uint8_t* ref,
|
||||
#undef VERTICAL_PASS
|
||||
#undef HORIZONTAL_PASS
|
||||
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
|
||||
int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
|
||||
|
||||
@ -239,16 +241,18 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
);
|
||||
}
|
||||
|
||||
static void ITransform_MIPSdspR2(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst, int do_two) {
|
||||
static void ITransform_MIPSdspR2(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne(ref + 4, in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static int Disto4x4_MIPSdspR2(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
|
||||
int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17;
|
||||
|
||||
@ -314,9 +318,9 @@ static int Disto4x4_MIPSdspR2(const uint8_t* const a, const uint8_t* const b,
|
||||
return abs(temp3 - temp17) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_MIPSdspR2(const uint8_t* const a,
|
||||
const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -367,8 +371,8 @@ static int Disto16x16_MIPSdspR2(const uint8_t* const a,
|
||||
} while (0)
|
||||
|
||||
#define VERTICAL_PRED(DST, TOP, SIZE) \
|
||||
static WEBP_INLINE void VerticalPred##SIZE(uint8_t* (DST), \
|
||||
const uint8_t* (TOP)) { \
|
||||
static WEBP_INLINE void VerticalPred##SIZE( \
|
||||
uint8_t* WEBP_RESTRICT (DST), const uint8_t* WEBP_RESTRICT (TOP)) { \
|
||||
int j; \
|
||||
if ((TOP)) { \
|
||||
for (j = 0; j < (SIZE); ++j) memcpy((DST) + j * BPS, (TOP), (SIZE)); \
|
||||
@ -383,8 +387,8 @@ VERTICAL_PRED(dst, top, 16)
|
||||
#undef VERTICAL_PRED
|
||||
|
||||
#define HORIZONTAL_PRED(DST, LEFT, SIZE) \
|
||||
static WEBP_INLINE void HorizontalPred##SIZE(uint8_t* (DST), \
|
||||
const uint8_t* (LEFT)) { \
|
||||
static WEBP_INLINE void HorizontalPred##SIZE( \
|
||||
uint8_t* WEBP_RESTRICT (DST), const uint8_t* WEBP_RESTRICT (LEFT)) { \
|
||||
if (LEFT) { \
|
||||
int j; \
|
||||
for (j = 0; j < (SIZE); ++j) { \
|
||||
@ -451,8 +455,9 @@ HORIZONTAL_PRED(dst, left, 16)
|
||||
} while (0)
|
||||
|
||||
#define TRUE_MOTION(DST, LEFT, TOP, SIZE) \
|
||||
static WEBP_INLINE void TrueMotion##SIZE(uint8_t* (DST), const uint8_t* (LEFT),\
|
||||
const uint8_t* (TOP)) { \
|
||||
static WEBP_INLINE void TrueMotion##SIZE(uint8_t* WEBP_RESTRICT (DST), \
|
||||
const uint8_t* WEBP_RESTRICT (LEFT), \
|
||||
const uint8_t* WEBP_RESTRICT (TOP)) { \
|
||||
if ((LEFT) != NULL) { \
|
||||
if ((TOP) != NULL) { \
|
||||
CLIP_TO_DST((DST), (LEFT), (TOP), (SIZE)); \
|
||||
@ -480,8 +485,9 @@ TRUE_MOTION(dst, left, top, 16)
|
||||
#undef CLIP_8B_TO_DST
|
||||
#undef CLIPPING
|
||||
|
||||
static WEBP_INLINE void DCMode16(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DCMode16(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
int DC, DC1;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
|
||||
@ -543,8 +549,9 @@ static WEBP_INLINE void DCMode16(uint8_t* dst, const uint8_t* left,
|
||||
FILL_8_OR_16(dst, DC, 16);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DCMode8(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DCMode8(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
int DC, DC1;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
|
||||
@ -588,7 +595,7 @@ static WEBP_INLINE void DCMode8(uint8_t* dst, const uint8_t* left,
|
||||
FILL_8_OR_16(dst, DC, 8);
|
||||
}
|
||||
|
||||
static void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
static void DC4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1;
|
||||
__asm__ volatile(
|
||||
"ulw %[temp0], 0(%[top]) \n\t"
|
||||
@ -609,7 +616,7 @@ static void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
static void TM4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int a10, a32, temp0, temp1, temp2, temp3, temp4, temp5;
|
||||
const int c35 = 0xff00ff;
|
||||
__asm__ volatile (
|
||||
@ -664,7 +671,7 @@ static void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void VE4(uint8_t* dst, const uint8_t* top) {
|
||||
static void VE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
|
||||
__asm__ volatile(
|
||||
"ulw %[temp0], -1(%[top]) \n\t"
|
||||
@ -695,7 +702,7 @@ static void VE4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void HE4(uint8_t* dst, const uint8_t* top) {
|
||||
static void HE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
|
||||
__asm__ volatile(
|
||||
"ulw %[temp0], -4(%[top]) \n\t"
|
||||
@ -731,7 +738,7 @@ static void HE4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
static void RD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5;
|
||||
int temp6, temp7, temp8, temp9, temp10, temp11;
|
||||
__asm__ volatile(
|
||||
@ -780,7 +787,7 @@ static void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
static void VR4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
__asm__ volatile (
|
||||
@ -830,7 +837,7 @@ static void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
static void LD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5;
|
||||
int temp6, temp7, temp8, temp9, temp10, temp11;
|
||||
__asm__ volatile(
|
||||
@ -877,7 +884,7 @@ static void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
static void VL4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
__asm__ volatile (
|
||||
@ -926,7 +933,7 @@ static void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
__asm__ volatile (
|
||||
@ -974,7 +981,7 @@ static void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
static void HU4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
__asm__ volatile (
|
||||
"ulw %[temp0], -5(%[top]) \n\t"
|
||||
@ -1013,8 +1020,9 @@ static void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma 8x8 prediction (paragraph 12.2)
|
||||
|
||||
static void IntraChromaPreds_MIPSdspR2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void IntraChromaPreds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
// U block
|
||||
DCMode8(C8DC8 + dst, left, top);
|
||||
VerticalPred8(C8VE8 + dst, top);
|
||||
@ -1033,8 +1041,9 @@ static void IntraChromaPreds_MIPSdspR2(uint8_t* dst, const uint8_t* left,
|
||||
//------------------------------------------------------------------------------
|
||||
// luma 16x16 prediction (paragraph 12.3)
|
||||
|
||||
static void Intra16Preds_MIPSdspR2(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
static void Intra16Preds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DCMode16(I16DC16 + dst, left, top);
|
||||
VerticalPred16(I16VE16 + dst, top);
|
||||
HorizontalPred16(I16HE16 + dst, left);
|
||||
@ -1043,7 +1052,8 @@ static void Intra16Preds_MIPSdspR2(uint8_t* dst,
|
||||
|
||||
// Left samples are top[-5 .. -2], top_left is top[-1], top are
|
||||
// located at top[0..3], and top right is top[4..7]
|
||||
static void Intra4Preds_MIPSdspR2(uint8_t* dst, const uint8_t* top) {
|
||||
static void Intra4Preds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DC4(I4DC4 + dst, top);
|
||||
TM4(I4TM4 + dst, top);
|
||||
VE4(I4VE4 + dst, top);
|
||||
@ -1079,7 +1089,8 @@ static void Intra4Preds_MIPSdspR2(uint8_t* dst, const uint8_t* top) {
|
||||
GET_SSE_INNER(C) \
|
||||
GET_SSE_INNER(D)
|
||||
|
||||
static int SSE16x16_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1109,7 +1120,8 @@ static int SSE16x16_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE16x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1131,7 +1143,8 @@ static int SSE16x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE8x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1149,7 +1162,8 @@ static int SSE8x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE4x4_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1273,7 +1287,7 @@ static int SSE4x4_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
"3: \n\t"
|
||||
|
||||
static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5,temp6;
|
||||
int sign, coeff, level;
|
||||
int max_level = MAX_LEVEL;
|
||||
@ -1314,7 +1328,7 @@ static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
nz = QuantizeBlock_MIPSdspR2(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock_MIPSdspR2(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
@ -1360,7 +1374,8 @@ static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
|
||||
"usw %[" #TEMP4 "], " #C "(%[out]) \n\t" \
|
||||
"usw %[" #TEMP6 "], " #D "(%[out]) \n\t"
|
||||
|
||||
static void FTransformWHT_MIPSdspR2(const int16_t* in, int16_t* out) {
|
||||
static void FTransformWHT_MIPSdspR2(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
|
||||
|
125
thirdparty/libwebp/src/dsp/enc_msa.c
vendored
125
thirdparty/libwebp/src/dsp/enc_msa.c
vendored
@ -41,8 +41,9 @@
|
||||
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
v8i16 input0, input1;
|
||||
v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
|
||||
v4i32 res0, res1, res2, res3;
|
||||
@ -69,16 +70,18 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
ST4x4_UB(res0, res0, 3, 2, 1, 0, dst, BPS);
|
||||
}
|
||||
|
||||
static void ITransform_MSA(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
static void ITransform_MSA(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne(ref + 4, in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void FTransform_MSA(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform_MSA(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
uint64_t out0, out1, out2, out3;
|
||||
uint32_t in0, in1, in2, in3;
|
||||
v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
|
||||
@ -131,7 +134,8 @@ static void FTransform_MSA(const uint8_t* src, const uint8_t* ref,
|
||||
SD4(out0, out1, out2, out3, out, 8);
|
||||
}
|
||||
|
||||
static void FTransformWHT_MSA(const int16_t* in, int16_t* out) {
|
||||
static void FTransformWHT_MSA(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
v8i16 in0 = { 0 };
|
||||
v8i16 in1 = { 0 };
|
||||
v8i16 tmp0, tmp1, tmp2, tmp3;
|
||||
@ -168,7 +172,8 @@ static void FTransformWHT_MSA(const int16_t* in, int16_t* out) {
|
||||
ST_SH2(out0, out1, out, 8);
|
||||
}
|
||||
|
||||
static int TTransform_MSA(const uint8_t* in, const uint16_t* w) {
|
||||
static int TTransform_MSA(const uint8_t* WEBP_RESTRICT in,
|
||||
const uint16_t* WEBP_RESTRICT w) {
|
||||
int sum;
|
||||
uint32_t in0_m, in1_m, in2_m, in3_m;
|
||||
v16i8 src0 = { 0 };
|
||||
@ -200,15 +205,17 @@ static int TTransform_MSA(const uint8_t* in, const uint16_t* w) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int Disto4x4_MSA(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_MSA(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
const int sum1 = TTransform_MSA(a, w);
|
||||
const int sum2 = TTransform_MSA(b, w);
|
||||
return abs(sum2 - sum1) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_MSA(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_MSA(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -259,7 +266,9 @@ static void CollectHistogram_MSA(const uint8_t* ref, const uint8_t* pred,
|
||||
#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
|
||||
#define AVG2(a, b) (((a) + (b) + 1) >> 1)
|
||||
|
||||
static WEBP_INLINE void VE4(uint8_t* dst, const uint8_t* top) { // vertical
|
||||
// vertical
|
||||
static WEBP_INLINE void VE4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const v16u8 A1 = { 0 };
|
||||
const uint64_t val_m = LD(top - 1);
|
||||
const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
|
||||
@ -272,7 +281,9 @@ static WEBP_INLINE void VE4(uint8_t* dst, const uint8_t* top) { // vertical
|
||||
SW4(out, out, out, out, dst, BPS);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
|
||||
// horizontal
|
||||
static WEBP_INLINE void HE4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -284,7 +295,8 @@ static WEBP_INLINE void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
|
||||
WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void DC4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
uint32_t dc = 4;
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
|
||||
@ -293,7 +305,8 @@ static WEBP_INLINE void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
SW4(dc, dc, dc, dc, dst, BPS);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void RD4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const v16u8 A2 = { 0 };
|
||||
const uint64_t val_m = LD(top - 5);
|
||||
const v16u8 A1 = (v16u8)__msa_insert_d((v2i64)A2, 0, val_m);
|
||||
@ -313,7 +326,8 @@ static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
SW4(val3, val2, val1, val0, dst, BPS);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void LD4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const v16u8 A1 = { 0 };
|
||||
const uint64_t val_m = LD(top);
|
||||
const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
|
||||
@ -333,7 +347,8 @@ static WEBP_INLINE void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
SW4(val0, val1, val2, val3, dst, BPS);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VR4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -354,7 +369,8 @@ static WEBP_INLINE void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(3, 1) = AVG3(B, C, D);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VL4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int A = top[0];
|
||||
const int B = top[1];
|
||||
const int C = top[2];
|
||||
@ -375,7 +391,8 @@ static WEBP_INLINE void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(3, 3) = AVG3(F, G, H);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void HU4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
const int K = top[-4];
|
||||
@ -390,7 +407,8 @@ static WEBP_INLINE void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void HD4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -411,7 +429,8 @@ static WEBP_INLINE void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
DST(1, 3) = AVG3(L, K, J);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void TM4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const v16i8 zero = { 0 };
|
||||
const v8i16 TL = (v8i16)__msa_fill_h(top[-1]);
|
||||
const v8i16 L0 = (v8i16)__msa_fill_h(top[-2]);
|
||||
@ -431,7 +450,8 @@ static WEBP_INLINE void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
#undef AVG3
|
||||
#undef AVG2
|
||||
|
||||
static void Intra4Preds_MSA(uint8_t* dst, const uint8_t* top) {
|
||||
static void Intra4Preds_MSA(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DC4(I4DC4 + dst, top);
|
||||
TM4(I4TM4 + dst, top);
|
||||
VE4(I4VE4 + dst, top);
|
||||
@ -451,7 +471,8 @@ static void Intra4Preds_MSA(uint8_t* dst, const uint8_t* top) {
|
||||
ST_UB8(out, out, out, out, out, out, out, out, dst + 8 * BPS, BPS); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void VerticalPred16x16(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VerticalPred16x16(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
if (top != NULL) {
|
||||
const v16u8 out = LD_UB(top);
|
||||
STORE16x16(out, dst);
|
||||
@ -461,8 +482,8 @@ static WEBP_INLINE void VerticalPred16x16(uint8_t* dst, const uint8_t* top) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HorizontalPred16x16(uint8_t* dst,
|
||||
const uint8_t* left) {
|
||||
static WEBP_INLINE void HorizontalPred16x16(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
if (left != NULL) {
|
||||
int j;
|
||||
for (j = 0; j < 16; j += 4) {
|
||||
@ -480,8 +501,9 @@ static WEBP_INLINE void HorizontalPred16x16(uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotion16x16(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void TrueMotion16x16(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
if (left != NULL) {
|
||||
if (top != NULL) {
|
||||
int j;
|
||||
@ -519,8 +541,9 @@ static WEBP_INLINE void TrueMotion16x16(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DCMode16x16(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DCMode16x16(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
int DC;
|
||||
v16u8 out;
|
||||
if (top != NULL && left != NULL) {
|
||||
@ -548,8 +571,9 @@ static WEBP_INLINE void DCMode16x16(uint8_t* dst, const uint8_t* left,
|
||||
STORE16x16(out, dst);
|
||||
}
|
||||
|
||||
static void Intra16Preds_MSA(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
static void Intra16Preds_MSA(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DCMode16x16(I16DC16 + dst, left, top);
|
||||
VerticalPred16x16(I16VE16 + dst, top);
|
||||
HorizontalPred16x16(I16HE16 + dst, left);
|
||||
@ -574,7 +598,8 @@ static void Intra16Preds_MSA(uint8_t* dst,
|
||||
SD4(out, out, out, out, dst + 4 * BPS, BPS); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void VerticalPred8x8(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VerticalPred8x8(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
if (top != NULL) {
|
||||
const uint64_t out = LD(top);
|
||||
STORE8x8(out, dst);
|
||||
@ -584,7 +609,8 @@ static WEBP_INLINE void VerticalPred8x8(uint8_t* dst, const uint8_t* top) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HorizontalPred8x8(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void HorizontalPred8x8(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
if (left != NULL) {
|
||||
int j;
|
||||
for (j = 0; j < 8; j += 4) {
|
||||
@ -606,8 +632,9 @@ static WEBP_INLINE void HorizontalPred8x8(uint8_t* dst, const uint8_t* left) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotion8x8(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void TrueMotion8x8(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
if (left != NULL) {
|
||||
if (top != NULL) {
|
||||
int j;
|
||||
@ -646,8 +673,9 @@ static WEBP_INLINE void TrueMotion8x8(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DCMode8x8(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DCMode8x8(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
uint64_t out;
|
||||
v16u8 src = { 0 };
|
||||
if (top != NULL && left != NULL) {
|
||||
@ -670,8 +698,9 @@ static WEBP_INLINE void DCMode8x8(uint8_t* dst, const uint8_t* left,
|
||||
STORE8x8(out, dst);
|
||||
}
|
||||
|
||||
static void IntraChromaPreds_MSA(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void IntraChromaPreds_MSA(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
// U block
|
||||
DCMode8x8(C8DC8 + dst, left, top);
|
||||
VerticalPred8x8(C8VE8 + dst, top);
|
||||
@ -712,7 +741,8 @@ static void IntraChromaPreds_MSA(uint8_t* dst, const uint8_t* left,
|
||||
DPADD_SH2_SW(tmp2, tmp3, tmp2, tmp3, out2, out3); \
|
||||
} while (0)
|
||||
|
||||
static int SSE16x16_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_MSA(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32_t sum;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
|
||||
@ -739,7 +769,8 @@ static int SSE16x16_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int SSE16x8_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_MSA(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32_t sum;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
|
||||
@ -758,7 +789,8 @@ static int SSE16x8_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int SSE8x8_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_MSA(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32_t sum;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
|
||||
@ -778,7 +810,8 @@ static int SSE8x8_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int SSE4x4_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_MSA(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32_t sum = 0;
|
||||
uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3;
|
||||
v16u8 src = { 0 }, ref = { 0 }, tmp0, tmp1;
|
||||
@ -801,7 +834,7 @@ static int SSE4x4_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
// Quantization
|
||||
|
||||
static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int sum;
|
||||
v8i16 in0, in1, sh0, sh1, out0, out1;
|
||||
v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, sign0, sign1;
|
||||
@ -854,7 +887,7 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_MSA(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
|
347
thirdparty/libwebp/src/dsp/enc_neon.c
vendored
347
thirdparty/libwebp/src/dsp/enc_neon.c
vendored
@ -60,8 +60,8 @@ static WEBP_INLINE void SaturateAndStore4x4_NEON(uint8_t* const dst,
|
||||
|
||||
static WEBP_INLINE void Add4x4_NEON(const int16x8_t row01,
|
||||
const int16x8_t row23,
|
||||
const uint8_t* const ref,
|
||||
uint8_t* const dst) {
|
||||
const uint8_t* WEBP_RESTRICT const ref,
|
||||
uint8_t* WEBP_RESTRICT const dst) {
|
||||
uint32x2_t dst01 = vdup_n_u32(0);
|
||||
uint32x2_t dst23 = vdup_n_u32(0);
|
||||
|
||||
@ -120,8 +120,9 @@ static WEBP_INLINE void TransformPass_NEON(int16x8x2_t* const rows) {
|
||||
Transpose8x2_NEON(E0, E1, rows);
|
||||
}
|
||||
|
||||
static void ITransformOne_NEON(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst) {
|
||||
static void ITransformOne_NEON(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int16x8x2_t rows;
|
||||
INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
|
||||
TransformPass_NEON(&rows);
|
||||
@ -131,8 +132,9 @@ static void ITransformOne_NEON(const uint8_t* ref,
|
||||
|
||||
#else
|
||||
|
||||
static void ITransformOne_NEON(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst) {
|
||||
static void ITransformOne_NEON(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const int kBPS = BPS;
|
||||
const int16_t kC1C2[] = { kC1, kC2, 0, 0 };
|
||||
|
||||
@ -247,8 +249,9 @@ static void ITransformOne_NEON(const uint8_t* ref,
|
||||
|
||||
#endif // WEBP_USE_INTRINSICS
|
||||
|
||||
static void ITransform_NEON(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst, int do_two) {
|
||||
static void ITransform_NEON(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
ITransformOne_NEON(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne_NEON(ref + 4, in + 16, dst + 4);
|
||||
@ -294,8 +297,9 @@ static WEBP_INLINE int16x8_t DiffU8ToS16_NEON(const uint8x8_t a,
|
||||
return vreinterpretq_s16_u16(vsubl_u8(a, b));
|
||||
}
|
||||
|
||||
static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
int16x8_t d0d1, d3d2; // working 4x4 int16 variables
|
||||
{
|
||||
const uint8x16_t S0 = Load4x4_NEON(src);
|
||||
@ -364,8 +368,9 @@ static const int32_t kCoeff32[] = {
|
||||
51000, 51000, 51000, 51000
|
||||
};
|
||||
|
||||
static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
const int kBPS = BPS;
|
||||
const uint8_t* src_ptr = src;
|
||||
const uint8_t* ref_ptr = ref;
|
||||
@ -484,7 +489,8 @@ static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
|
||||
src += stride; \
|
||||
} while (0)
|
||||
|
||||
static void FTransformWHT_NEON(const int16_t* src, int16_t* out) {
|
||||
static void FTransformWHT_NEON(const int16_t* WEBP_RESTRICT src,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
const int stride = 16;
|
||||
const int16x4_t zero = vdup_n_s16(0);
|
||||
int32x4x4_t tmp0;
|
||||
@ -659,8 +665,9 @@ static WEBP_INLINE int32x2_t DistoSum_NEON(const int16x8x4_t q4_in,
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// w[] contains a row-major 4 by 4 symmetric matrix.
|
||||
static int Disto4x4_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_NEON(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
uint32x2_t d_in_ab_0123 = vdup_n_u32(0);
|
||||
uint32x2_t d_in_ab_4567 = vdup_n_u32(0);
|
||||
uint32x2_t d_in_ab_89ab = vdup_n_u32(0);
|
||||
@ -701,8 +708,9 @@ static int Disto4x4_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
}
|
||||
#undef LOAD_LANE_32b
|
||||
|
||||
static int Disto16x16_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_NEON(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -715,9 +723,10 @@ static int Disto16x16_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void CollectHistogram_NEON(const uint8_t* ref, const uint8_t* pred,
|
||||
static void CollectHistogram_NEON(const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
VP8Histogram* WEBP_RESTRICT const histo) {
|
||||
const uint16x8_t max_coeff_thresh = vdupq_n_u16(MAX_COEFF_THRESH);
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
@ -747,9 +756,9 @@ static void CollectHistogram_NEON(const uint8_t* ref, const uint8_t* pred,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static WEBP_INLINE void AccumulateSSE16_NEON(const uint8_t* const a,
|
||||
const uint8_t* const b,
|
||||
uint32x4_t* const sum) {
|
||||
static WEBP_INLINE void AccumulateSSE16_NEON(
|
||||
const uint8_t* WEBP_RESTRICT const a, const uint8_t* WEBP_RESTRICT const b,
|
||||
uint32x4_t* const sum) {
|
||||
const uint8x16_t a0 = vld1q_u8(a);
|
||||
const uint8x16_t b0 = vld1q_u8(b);
|
||||
const uint8x16_t abs_diff = vabdq_u8(a0, b0);
|
||||
@ -775,7 +784,8 @@ static int SumToInt_NEON(uint32x4_t sum) {
|
||||
#endif
|
||||
}
|
||||
|
||||
static int SSE16x16_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_NEON(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32x4_t sum = vdupq_n_u32(0);
|
||||
int y;
|
||||
for (y = 0; y < 16; ++y) {
|
||||
@ -784,7 +794,8 @@ static int SSE16x16_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
return SumToInt_NEON(sum);
|
||||
}
|
||||
|
||||
static int SSE16x8_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_NEON(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32x4_t sum = vdupq_n_u32(0);
|
||||
int y;
|
||||
for (y = 0; y < 8; ++y) {
|
||||
@ -793,7 +804,8 @@ static int SSE16x8_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
return SumToInt_NEON(sum);
|
||||
}
|
||||
|
||||
static int SSE8x8_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_NEON(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
uint32x4_t sum = vdupq_n_u32(0);
|
||||
int y;
|
||||
for (y = 0; y < 8; ++y) {
|
||||
@ -806,7 +818,8 @@ static int SSE8x8_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
return SumToInt_NEON(sum);
|
||||
}
|
||||
|
||||
static int SSE4x4_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_NEON(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
const uint8x16_t a0 = Load4x4_NEON(a);
|
||||
const uint8x16_t b0 = Load4x4_NEON(b);
|
||||
const uint8x16_t abs_diff = vabdq_u8(a0, b0);
|
||||
@ -825,8 +838,9 @@ static int SSE4x4_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
// Compilation with gcc-4.6.x is problematic for now.
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
|
||||
static int16x8_t Quantize_NEON(int16_t* const in,
|
||||
const VP8Matrix* const mtx, int offset) {
|
||||
static int16x8_t Quantize_NEON(int16_t* WEBP_RESTRICT const in,
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx,
|
||||
int offset) {
|
||||
const uint16x8_t sharp = vld1q_u16(&mtx->sharpen_[offset]);
|
||||
const uint16x8_t q = vld1q_u16(&mtx->q_[offset]);
|
||||
const uint16x8_t iq = vld1q_u16(&mtx->iq_[offset]);
|
||||
@ -860,7 +874,7 @@ static const uint8_t kShuffles[4][8] = {
|
||||
};
|
||||
|
||||
static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const int16x8_t out0 = Quantize_NEON(in, mtx, 0);
|
||||
const int16x8_t out1 = Quantize_NEON(in, mtx, 8);
|
||||
uint8x8x4_t shuffles;
|
||||
@ -902,7 +916,7 @@ static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_NEON(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
nz = QuantizeBlock_NEON(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock_NEON(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
@ -911,6 +925,271 @@ static int Quantize2Blocks_NEON(int16_t in[32], int16_t out[32],
|
||||
|
||||
#endif // !WORK_AROUND_GCC
|
||||
|
||||
#if WEBP_AARCH64
|
||||
|
||||
#if BPS == 32
|
||||
#define DC4_VE4_HE4_TM4_NEON(dst, tbl, res, lane) \
|
||||
do { \
|
||||
uint8x16_t r; \
|
||||
r = vqtbl2q_u8(qcombined, tbl); \
|
||||
r = vreinterpretq_u8_u32( \
|
||||
vsetq_lane_u32(vget_lane_u32(vreinterpret_u32_u8(res), lane), \
|
||||
vreinterpretq_u32_u8(r), 1)); \
|
||||
vst1q_u8(dst, r); \
|
||||
} while (0)
|
||||
|
||||
#define RD4_VR4_LD4_VL4_NEON(dst, tbl) \
|
||||
do { \
|
||||
uint8x16_t r; \
|
||||
r = vqtbl2q_u8(qcombined, tbl); \
|
||||
vst1q_u8(dst, r); \
|
||||
} while (0)
|
||||
|
||||
static void Intra4Preds_NEON(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13
|
||||
// L K J I X A B C D E F G H
|
||||
// -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7
|
||||
static const uint8_t kLookupTbl1[64] = {
|
||||
0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 12,
|
||||
3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0,
|
||||
4, 20, 21, 22, 3, 18, 2, 17, 3, 19, 4, 20, 2, 17, 1, 16,
|
||||
2, 18, 3, 19, 1, 16, 31, 31, 1, 17, 2, 18, 31, 31, 31, 31
|
||||
};
|
||||
|
||||
static const uint8_t kLookupTbl2[64] = {
|
||||
20, 21, 22, 23, 5, 6, 7, 8, 22, 23, 24, 25, 6, 7, 8, 9,
|
||||
19, 20, 21, 22, 20, 21, 22, 23, 23, 24, 25, 26, 22, 23, 24, 25,
|
||||
18, 19, 20, 21, 19, 5, 6, 7, 24, 25, 26, 27, 7, 8, 9, 26,
|
||||
17, 18, 19, 20, 18, 20, 21, 22, 25, 26, 27, 28, 23, 24, 25, 27
|
||||
};
|
||||
|
||||
static const uint8_t kLookupTbl3[64] = {
|
||||
30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 19, 19, 19, 19,
|
||||
30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 18, 18, 18, 18,
|
||||
30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 17, 17, 17, 17,
|
||||
30, 30, 30, 30, 0, 0, 0, 0, 21, 22, 23, 24, 16, 16, 16, 16
|
||||
};
|
||||
|
||||
const uint8x16x4_t lookup_avgs1 = vld1q_u8_x4(kLookupTbl1);
|
||||
const uint8x16x4_t lookup_avgs2 = vld1q_u8_x4(kLookupTbl2);
|
||||
const uint8x16x4_t lookup_avgs3 = vld1q_u8_x4(kLookupTbl3);
|
||||
|
||||
const uint8x16_t preload = vld1q_u8(top - 5);
|
||||
uint8x16x2_t qcombined;
|
||||
uint8x16_t result0, result1;
|
||||
|
||||
uint8x16_t a = vqtbl1q_u8(preload, lookup_avgs1.val[0]);
|
||||
uint8x16_t b = preload;
|
||||
uint8x16_t c = vextq_u8(a, a, 2);
|
||||
|
||||
uint8x16_t avg3_all = vrhaddq_u8(vhaddq_u8(a, c), b);
|
||||
uint8x16_t avg2_all = vrhaddq_u8(a, b);
|
||||
|
||||
uint8x8_t preload_x8, sub_a, sub_c;
|
||||
uint8_t result_u8;
|
||||
uint8x8_t res_lo, res_hi;
|
||||
uint8x16_t full_b;
|
||||
uint16x8_t sub, sum_lo, sum_hi;
|
||||
|
||||
preload_x8 = vget_low_u8(c);
|
||||
preload_x8 = vset_lane_u8(vgetq_lane_u8(preload, 0), preload_x8, 3);
|
||||
|
||||
result_u8 = (vaddlv_u8(preload_x8) + 4) >> 3;
|
||||
|
||||
avg3_all = vsetq_lane_u8(vgetq_lane_u8(preload, 0), avg3_all, 15);
|
||||
avg3_all = vsetq_lane_u8(result_u8, avg3_all, 14);
|
||||
|
||||
qcombined.val[0] = avg2_all;
|
||||
qcombined.val[1] = avg3_all;
|
||||
|
||||
sub_a = vdup_laneq_u8(preload, 4);
|
||||
|
||||
// preload = {a,b,c,d,...} => full_b = {d,d,d,d,c,c,c,c,b,b,b,b,a,a,a,a}
|
||||
full_b = vqtbl1q_u8(preload, lookup_avgs1.val[1]);
|
||||
// preload = {a,b,c,d,...} => sub_c = {a,b,c,d,a,b,c,d,a,b,c,d,a,b,c,d}
|
||||
sub_c = vreinterpret_u8_u32(vdup_n_u32(
|
||||
vgetq_lane_u32(vreinterpretq_u32_u8(vextq_u8(preload, preload, 5)), 0)));
|
||||
|
||||
sub = vsubl_u8(sub_c, sub_a);
|
||||
sum_lo = vaddw_u8(sub, vget_low_u8(full_b));
|
||||
res_lo = vqmovun_s16(vreinterpretq_s16_u16(sum_lo));
|
||||
|
||||
sum_hi = vaddw_u8(sub, vget_high_u8(full_b));
|
||||
res_hi = vqmovun_s16(vreinterpretq_s16_u16(sum_hi));
|
||||
|
||||
// DC4, VE4, HE4, TM4
|
||||
DC4_VE4_HE4_TM4_NEON(dst + I4DC4 + BPS * 0, lookup_avgs3.val[0], res_lo, 0);
|
||||
DC4_VE4_HE4_TM4_NEON(dst + I4DC4 + BPS * 1, lookup_avgs3.val[1], res_lo, 1);
|
||||
DC4_VE4_HE4_TM4_NEON(dst + I4DC4 + BPS * 2, lookup_avgs3.val[2], res_hi, 0);
|
||||
DC4_VE4_HE4_TM4_NEON(dst + I4DC4 + BPS * 3, lookup_avgs3.val[3], res_hi, 1);
|
||||
|
||||
// RD4, VR4, LD4, VL4
|
||||
RD4_VR4_LD4_VL4_NEON(dst + I4RD4 + BPS * 0, lookup_avgs2.val[0]);
|
||||
RD4_VR4_LD4_VL4_NEON(dst + I4RD4 + BPS * 1, lookup_avgs2.val[1]);
|
||||
RD4_VR4_LD4_VL4_NEON(dst + I4RD4 + BPS * 2, lookup_avgs2.val[2]);
|
||||
RD4_VR4_LD4_VL4_NEON(dst + I4RD4 + BPS * 3, lookup_avgs2.val[3]);
|
||||
|
||||
// HD4, HU4
|
||||
result0 = vqtbl2q_u8(qcombined, lookup_avgs1.val[2]);
|
||||
result1 = vqtbl2q_u8(qcombined, lookup_avgs1.val[3]);
|
||||
|
||||
vst1_u8(dst + I4HD4 + BPS * 0, vget_low_u8(result0));
|
||||
vst1_u8(dst + I4HD4 + BPS * 1, vget_high_u8(result0));
|
||||
vst1_u8(dst + I4HD4 + BPS * 2, vget_low_u8(result1));
|
||||
vst1_u8(dst + I4HD4 + BPS * 3, vget_high_u8(result1));
|
||||
}
|
||||
#endif // BPS == 32
|
||||
|
||||
static WEBP_INLINE void Fill_NEON(uint8_t* dst, const uint8_t value) {
|
||||
uint8x16_t a = vdupq_n_u8(value);
|
||||
int i;
|
||||
for (i = 0; i < 16; i++) {
|
||||
vst1q_u8(dst + BPS * i, a);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Fill16_NEON(uint8_t* dst, const uint8_t* src) {
|
||||
uint8x16_t a = vld1q_u8(src);
|
||||
int i;
|
||||
for (i = 0; i < 16; i++) {
|
||||
vst1q_u8(dst + BPS * i, a);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HorizontalPred16_NEON(uint8_t* dst,
|
||||
const uint8_t* left) {
|
||||
uint8x16_t a;
|
||||
|
||||
if (left == NULL) {
|
||||
Fill_NEON(dst, 129);
|
||||
return;
|
||||
}
|
||||
|
||||
a = vld1q_u8(left + 0);
|
||||
vst1q_u8(dst + BPS * 0, vdupq_laneq_u8(a, 0));
|
||||
vst1q_u8(dst + BPS * 1, vdupq_laneq_u8(a, 1));
|
||||
vst1q_u8(dst + BPS * 2, vdupq_laneq_u8(a, 2));
|
||||
vst1q_u8(dst + BPS * 3, vdupq_laneq_u8(a, 3));
|
||||
vst1q_u8(dst + BPS * 4, vdupq_laneq_u8(a, 4));
|
||||
vst1q_u8(dst + BPS * 5, vdupq_laneq_u8(a, 5));
|
||||
vst1q_u8(dst + BPS * 6, vdupq_laneq_u8(a, 6));
|
||||
vst1q_u8(dst + BPS * 7, vdupq_laneq_u8(a, 7));
|
||||
vst1q_u8(dst + BPS * 8, vdupq_laneq_u8(a, 8));
|
||||
vst1q_u8(dst + BPS * 9, vdupq_laneq_u8(a, 9));
|
||||
vst1q_u8(dst + BPS * 10, vdupq_laneq_u8(a, 10));
|
||||
vst1q_u8(dst + BPS * 11, vdupq_laneq_u8(a, 11));
|
||||
vst1q_u8(dst + BPS * 12, vdupq_laneq_u8(a, 12));
|
||||
vst1q_u8(dst + BPS * 13, vdupq_laneq_u8(a, 13));
|
||||
vst1q_u8(dst + BPS * 14, vdupq_laneq_u8(a, 14));
|
||||
vst1q_u8(dst + BPS * 15, vdupq_laneq_u8(a, 15));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VerticalPred16_NEON(uint8_t* dst, const uint8_t* top) {
|
||||
if (top != NULL) {
|
||||
Fill16_NEON(dst, top);
|
||||
} else {
|
||||
Fill_NEON(dst, 127);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DCMode_NEON(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
uint8_t s;
|
||||
|
||||
if (top != NULL) {
|
||||
uint16_t dc;
|
||||
dc = vaddlvq_u8(vld1q_u8(top));
|
||||
if (left != NULL) {
|
||||
// top and left present.
|
||||
dc += vaddlvq_u8(vld1q_u8(left));
|
||||
s = vqrshrnh_n_u16(dc, 5);
|
||||
} else {
|
||||
// top but no left.
|
||||
s = vqrshrnh_n_u16(dc, 4);
|
||||
}
|
||||
} else {
|
||||
if (left != NULL) {
|
||||
uint16_t dc;
|
||||
// left but no top.
|
||||
dc = vaddlvq_u8(vld1q_u8(left));
|
||||
s = vqrshrnh_n_u16(dc, 4);
|
||||
} else {
|
||||
// No top, no left, nothing.
|
||||
s = 0x80;
|
||||
}
|
||||
}
|
||||
Fill_NEON(dst, s);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotionHelper_NEON(uint8_t* dst,
|
||||
const uint8x8_t outer,
|
||||
const uint8x8x2_t inner,
|
||||
const uint16x8_t a, int i,
|
||||
const int n) {
|
||||
uint8x8_t d1, d2;
|
||||
uint16x8_t r1, r2;
|
||||
|
||||
r1 = vaddl_u8(outer, inner.val[0]);
|
||||
r1 = vqsubq_u16(r1, a);
|
||||
d1 = vqmovun_s16(vreinterpretq_s16_u16(r1));
|
||||
r2 = vaddl_u8(outer, inner.val[1]);
|
||||
r2 = vqsubq_u16(r2, a);
|
||||
d2 = vqmovun_s16(vreinterpretq_s16_u16(r2));
|
||||
vst1_u8(dst + BPS * (i * 4 + n), d1);
|
||||
vst1_u8(dst + BPS * (i * 4 + n) + 8, d2);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotion_NEON(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
int i;
|
||||
uint16x8_t a;
|
||||
uint8x8x2_t inner;
|
||||
|
||||
if (left == NULL) {
|
||||
// True motion without left samples (hence: with default 129 value) is
|
||||
// equivalent to VE prediction where you just copy the top samples.
|
||||
// Note that if top samples are not available, the default value is then
|
||||
// 129, and not 127 as in the VerticalPred case.
|
||||
if (top != NULL) {
|
||||
VerticalPred16_NEON(dst, top);
|
||||
} else {
|
||||
Fill_NEON(dst, 129);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// left is not NULL.
|
||||
if (top == NULL) {
|
||||
HorizontalPred16_NEON(dst, left);
|
||||
return;
|
||||
}
|
||||
|
||||
// Neither left nor top are NULL.
|
||||
a = vdupq_n_u16(left[-1]);
|
||||
inner = vld1_u8_x2(top);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
const uint8x8x4_t outer = vld4_dup_u8(&left[i * 4]);
|
||||
|
||||
TrueMotionHelper_NEON(dst, outer.val[0], inner, a, i, 0);
|
||||
TrueMotionHelper_NEON(dst, outer.val[1], inner, a, i, 1);
|
||||
TrueMotionHelper_NEON(dst, outer.val[2], inner, a, i, 2);
|
||||
TrueMotionHelper_NEON(dst, outer.val[3], inner, a, i, 3);
|
||||
}
|
||||
}
|
||||
|
||||
static void Intra16Preds_NEON(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DCMode_NEON(I16DC16 + dst, left, top);
|
||||
VerticalPred16_NEON(I16VE16 + dst, top);
|
||||
HorizontalPred16_NEON(I16HE16 + dst, left);
|
||||
TrueMotion_NEON(I16TM16 + dst, left, top);
|
||||
}
|
||||
|
||||
#endif // WEBP_AARCH64
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
||||
@ -931,9 +1210,17 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitNEON(void) {
|
||||
VP8SSE8x8 = SSE8x8_NEON;
|
||||
VP8SSE4x4 = SSE4x4_NEON;
|
||||
|
||||
#if WEBP_AARCH64
|
||||
#if BPS == 32
|
||||
VP8EncPredLuma4 = Intra4Preds_NEON;
|
||||
#endif
|
||||
VP8EncPredLuma16 = Intra16Preds_NEON;
|
||||
#endif
|
||||
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
VP8EncQuantizeBlock = QuantizeBlock_NEON;
|
||||
VP8EncQuantize2Blocks = Quantize2Blocks_NEON;
|
||||
VP8EncQuantizeBlockWHT = QuantizeBlock_NEON;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
202
thirdparty/libwebp/src/dsp/enc_sse2.c
vendored
202
thirdparty/libwebp/src/dsp/enc_sse2.c
vendored
@ -26,8 +26,9 @@
|
||||
// Transforms (Paragraph 14.4)
|
||||
|
||||
// Does one inverse transform.
|
||||
static void ITransform_One_SSE2(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
static void ITransform_One_SSE2(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
// This implementation makes use of 16-bit fixed point versions of two
|
||||
// multiply constants:
|
||||
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
||||
@ -177,8 +178,9 @@ static void ITransform_One_SSE2(const uint8_t* ref, const int16_t* in,
|
||||
}
|
||||
|
||||
// Does two inverse transforms.
|
||||
static void ITransform_Two_SSE2(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
static void ITransform_Two_SSE2(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
// This implementation makes use of 16-bit fixed point versions of two
|
||||
// multiply constants:
|
||||
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
||||
@ -316,7 +318,9 @@ static void ITransform_Two_SSE2(const uint8_t* ref, const int16_t* in,
|
||||
}
|
||||
|
||||
// Does one or two inverse transforms.
|
||||
static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
static void ITransform_SSE2(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst,
|
||||
int do_two) {
|
||||
if (do_two) {
|
||||
ITransform_Two_SSE2(ref, in, dst);
|
||||
@ -373,7 +377,7 @@ static void FTransformPass1_SSE2(const __m128i* const in01,
|
||||
|
||||
static void FTransformPass2_SSE2(const __m128i* const v01,
|
||||
const __m128i* const v32,
|
||||
int16_t* out) {
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i seven = _mm_set1_epi16(7);
|
||||
const __m128i k5352_2217 = _mm_set_epi16(5352, 2217, 5352, 2217,
|
||||
@ -424,8 +428,9 @@ static void FTransformPass2_SSE2(const __m128i* const v01,
|
||||
_mm_storeu_si128((__m128i*)&out[8], d2_f3);
|
||||
}
|
||||
|
||||
static void FTransform_SSE2(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform_SSE2(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
// Load src.
|
||||
const __m128i src0 = _mm_loadl_epi64((const __m128i*)&src[0 * BPS]);
|
||||
@ -468,8 +473,9 @@ static void FTransform_SSE2(const uint8_t* src, const uint8_t* ref,
|
||||
FTransformPass2_SSE2(&v01, &v32, out);
|
||||
}
|
||||
|
||||
static void FTransform2_SSE2(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
static void FTransform2_SSE2(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
|
||||
// Load src and convert to 16b.
|
||||
@ -517,7 +523,8 @@ static void FTransform2_SSE2(const uint8_t* src, const uint8_t* ref,
|
||||
FTransformPass2_SSE2(&v01h, &v32h, out + 16);
|
||||
}
|
||||
|
||||
static void FTransformWHTRow_SSE2(const int16_t* const in, __m128i* const out) {
|
||||
static void FTransformWHTRow_SSE2(const int16_t* WEBP_RESTRICT const in,
|
||||
__m128i* const out) {
|
||||
const __m128i kMult = _mm_set_epi16(-1, 1, -1, 1, 1, 1, 1, 1);
|
||||
const __m128i src0 = _mm_loadl_epi64((__m128i*)&in[0 * 16]);
|
||||
const __m128i src1 = _mm_loadl_epi64((__m128i*)&in[1 * 16]);
|
||||
@ -533,7 +540,8 @@ static void FTransformWHTRow_SSE2(const int16_t* const in, __m128i* const out) {
|
||||
*out = _mm_madd_epi16(D, kMult);
|
||||
}
|
||||
|
||||
static void FTransformWHT_SSE2(const int16_t* in, int16_t* out) {
|
||||
static void FTransformWHT_SSE2(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
// Input is 12b signed.
|
||||
__m128i row0, row1, row2, row3;
|
||||
// Rows are 14b signed.
|
||||
@ -566,9 +574,10 @@ static void FTransformWHT_SSE2(const int16_t* in, int16_t* out) {
|
||||
// Compute susceptibility based on DCT-coeff histograms:
|
||||
// the higher, the "easier" the macroblock is to compress.
|
||||
|
||||
static void CollectHistogram_SSE2(const uint8_t* ref, const uint8_t* pred,
|
||||
static void CollectHistogram_SSE2(const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
VP8Histogram* WEBP_RESTRICT const histo) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
|
||||
int j;
|
||||
@ -640,7 +649,8 @@ static WEBP_INLINE void Fill_SSE2(uint8_t* dst, int value, int size) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VE8uv_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VE8uv_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
int j;
|
||||
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
|
||||
for (j = 0; j < 8; ++j) {
|
||||
@ -648,7 +658,8 @@ static WEBP_INLINE void VE8uv_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VE16_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void VE16_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i top_values = _mm_load_si128((const __m128i*)top);
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@ -656,8 +667,9 @@ static WEBP_INLINE void VE16_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VerticalPred_SSE2(uint8_t* dst,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void VerticalPred_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top,
|
||||
int size) {
|
||||
if (top != NULL) {
|
||||
if (size == 8) {
|
||||
VE8uv_SSE2(dst, top);
|
||||
@ -669,7 +681,8 @@ static WEBP_INLINE void VerticalPred_SSE2(uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void HE8uv_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
int j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
const __m128i values = _mm_set1_epi8((char)left[j]);
|
||||
@ -678,7 +691,8 @@ static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HE16_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void HE16_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
const __m128i values = _mm_set1_epi8((char)left[j]);
|
||||
@ -687,8 +701,9 @@ static WEBP_INLINE void HE16_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HorizontalPred_SSE2(uint8_t* dst,
|
||||
const uint8_t* left, int size) {
|
||||
static WEBP_INLINE void HorizontalPred_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
int size) {
|
||||
if (left != NULL) {
|
||||
if (size == 8) {
|
||||
HE8uv_SSE2(dst, left);
|
||||
@ -700,8 +715,9 @@ static WEBP_INLINE void HorizontalPred_SSE2(uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TM_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void TM_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top, int size) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
int y;
|
||||
if (size == 8) {
|
||||
@ -728,8 +744,10 @@ static WEBP_INLINE void TM_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top, int size) {
|
||||
static WEBP_INLINE void TrueMotion_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top,
|
||||
int size) {
|
||||
if (left != NULL) {
|
||||
if (top != NULL) {
|
||||
TM_SSE2(dst, left, top, size);
|
||||
@ -749,8 +767,9 @@ static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uv_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DC8uv_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i left_values = _mm_loadl_epi64((const __m128i*)left);
|
||||
const __m128i combined = _mm_unpacklo_epi64(top_values, left_values);
|
||||
@ -758,7 +777,8 @@ static WEBP_INLINE void DC8uv_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
Put8x8uv_SSE2(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uvNoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void DC8uvNoLeft_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i sum = _mm_sad_epu8(top_values, zero);
|
||||
@ -766,7 +786,8 @@ static WEBP_INLINE void DC8uvNoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
Put8x8uv_SSE2(DC >> 3, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uvNoTop_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void DC8uvNoTop_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
// 'left' is contiguous so we can reuse the top summation.
|
||||
DC8uvNoLeft_SSE2(dst, left);
|
||||
}
|
||||
@ -775,8 +796,9 @@ static WEBP_INLINE void DC8uvNoTopLeft_SSE2(uint8_t* dst) {
|
||||
Put8x8uv_SSE2(0x80, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uvMode_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DC8uvMode_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
if (top != NULL) {
|
||||
if (left != NULL) { // top and left present
|
||||
DC8uv_SSE2(dst, left, top);
|
||||
@ -790,8 +812,9 @@ static WEBP_INLINE void DC8uvMode_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DC16_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i top_row = _mm_load_si128((const __m128i*)top);
|
||||
const __m128i left_row = _mm_load_si128((const __m128i*)left);
|
||||
const int DC =
|
||||
@ -799,13 +822,15 @@ static WEBP_INLINE void DC16_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
Put16_SSE2(DC >> 5, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16NoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void DC16NoLeft_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i top_row = _mm_load_si128((const __m128i*)top);
|
||||
const int DC = VP8HorizontalAdd8b(&top_row) + 8;
|
||||
Put16_SSE2(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16NoTop_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
static WEBP_INLINE void DC16NoTop_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
// 'left' is contiguous so we can reuse the top summation.
|
||||
DC16NoLeft_SSE2(dst, left);
|
||||
}
|
||||
@ -814,8 +839,9 @@ static WEBP_INLINE void DC16NoTopLeft_SSE2(uint8_t* dst) {
|
||||
Put16_SSE2(0x80, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16Mode_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static WEBP_INLINE void DC16Mode_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
if (top != NULL) {
|
||||
if (left != NULL) { // top and left present
|
||||
DC16_SSE2(dst, left, top);
|
||||
@ -844,8 +870,9 @@ static WEBP_INLINE void DC16Mode_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
// where: AC = (a + b + 1) >> 1, BC = (b + c + 1) >> 1
|
||||
// and ab = a ^ b, bc = b ^ c, lsb = (AC^BC)&1
|
||||
|
||||
static WEBP_INLINE void VE4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // vertical
|
||||
// vertical
|
||||
static WEBP_INLINE void VE4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(top - 1));
|
||||
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -861,8 +888,9 @@ static WEBP_INLINE void VE4_SSE2(uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HE4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // horizontal
|
||||
// horizontal
|
||||
static WEBP_INLINE void HE4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -874,15 +902,17 @@ static WEBP_INLINE void HE4_SSE2(uint8_t* dst,
|
||||
WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void DC4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
uint32_t dc = 4;
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
|
||||
Fill_SSE2(dst, dc >> 3, 4);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void LD4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Down-Left
|
||||
// Down-Left
|
||||
static WEBP_INLINE void LD4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -898,8 +928,9 @@ static WEBP_INLINE void LD4_SSE2(uint8_t* dst,
|
||||
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VR4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Vertical-Right
|
||||
// Vertical-Right
|
||||
static WEBP_INLINE void VR4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -924,8 +955,9 @@ static WEBP_INLINE void VR4_SSE2(uint8_t* dst,
|
||||
DST(0, 3) = AVG3(K, J, I);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VL4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Vertical-Left
|
||||
// Vertical-Left
|
||||
static WEBP_INLINE void VL4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i BCDEFGH_ = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -951,8 +983,9 @@ static WEBP_INLINE void VL4_SSE2(uint8_t* dst,
|
||||
DST(3, 3) = (extra_out >> 8) & 0xff;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void RD4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Down-right
|
||||
// Down-right
|
||||
static WEBP_INLINE void RD4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i LKJIXABC = _mm_loadl_epi64((const __m128i*)(top - 5));
|
||||
const __m128i LKJIXABCD = _mm_insert_epi16(LKJIXABC, top[3], 4);
|
||||
@ -968,7 +1001,8 @@ static WEBP_INLINE void RD4_SSE2(uint8_t* dst,
|
||||
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HU4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void HU4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
const int K = top[-4];
|
||||
@ -983,7 +1017,8 @@ static WEBP_INLINE void HU4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HD4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void HD4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -1006,7 +1041,8 @@ static WEBP_INLINE void HD4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
DST(1, 3) = AVG3(L, K, J);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TM4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static WEBP_INLINE void TM4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top_values = _mm_cvtsi32_si128(WebPMemToInt32(top));
|
||||
const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
|
||||
@ -1028,7 +1064,8 @@ static WEBP_INLINE void TM4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
|
||||
// Left samples are top[-5 .. -2], top_left is top[-1], top are
|
||||
// located at top[0..3], and top right is top[4..7]
|
||||
static void Intra4Preds_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
static void Intra4Preds_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DC4_SSE2(I4DC4 + dst, top);
|
||||
TM4_SSE2(I4TM4 + dst, top);
|
||||
VE4_SSE2(I4VE4 + dst, top);
|
||||
@ -1044,8 +1081,9 @@ static void Intra4Preds_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma 8x8 prediction (paragraph 12.2)
|
||||
|
||||
static void IntraChromaPreds_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
static void IntraChromaPreds_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
// U block
|
||||
DC8uvMode_SSE2(C8DC8 + dst, left, top);
|
||||
VerticalPred_SSE2(C8VE8 + dst, top, 8);
|
||||
@ -1064,8 +1102,9 @@ static void IntraChromaPreds_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
//------------------------------------------------------------------------------
|
||||
// luma 16x16 prediction (paragraph 12.3)
|
||||
|
||||
static void Intra16Preds_SSE2(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
static void Intra16Preds_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
DC16Mode_SSE2(I16DC16 + dst, left, top);
|
||||
VerticalPred_SSE2(I16VE16 + dst, top, 16);
|
||||
HorizontalPred_SSE2(I16HE16 + dst, left, 16);
|
||||
@ -1092,7 +1131,8 @@ static WEBP_INLINE void SubtractAndAccumulate_SSE2(const __m128i a,
|
||||
*sum = _mm_add_epi32(sum1, sum2);
|
||||
}
|
||||
|
||||
static WEBP_INLINE int SSE_16xN_SSE2(const uint8_t* a, const uint8_t* b,
|
||||
static WEBP_INLINE int SSE_16xN_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b,
|
||||
int num_pairs) {
|
||||
__m128i sum = _mm_setzero_si128();
|
||||
int32_t tmp[4];
|
||||
@ -1114,18 +1154,21 @@ static WEBP_INLINE int SSE_16xN_SSE2(const uint8_t* a, const uint8_t* b,
|
||||
return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
|
||||
}
|
||||
|
||||
static int SSE16x16_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x16_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
return SSE_16xN_SSE2(a, b, 8);
|
||||
}
|
||||
|
||||
static int SSE16x8_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE16x8_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
return SSE_16xN_SSE2(a, b, 4);
|
||||
}
|
||||
|
||||
#define LOAD_8x16b(ptr) \
|
||||
_mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(ptr)), zero)
|
||||
|
||||
static int SSE8x8_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE8x8_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
int num_pairs = 4;
|
||||
__m128i sum = zero;
|
||||
@ -1152,7 +1195,8 @@ static int SSE8x8_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
}
|
||||
#undef LOAD_8x16b
|
||||
|
||||
static int SSE4x4_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
|
||||
// Load values. Note that we read 8 pixels instead of 4,
|
||||
@ -1189,7 +1233,7 @@ static int SSE4x4_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void Mean16x4_SSE2(const uint8_t* ref, uint32_t dc[4]) {
|
||||
static void Mean16x4_SSE2(const uint8_t* WEBP_RESTRICT ref, uint32_t dc[4]) {
|
||||
const __m128i mask = _mm_set1_epi16(0x00ff);
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&ref[BPS * 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&ref[BPS * 1]);
|
||||
@ -1227,8 +1271,9 @@ static void Mean16x4_SSE2(const uint8_t* ref, uint32_t dc[4]) {
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// w[] contains a row-major 4 by 4 symmetric matrix.
|
||||
static int TTransform_SSE2(const uint8_t* inA, const uint8_t* inB,
|
||||
const uint16_t* const w) {
|
||||
static int TTransform_SSE2(const uint8_t* WEBP_RESTRICT inA,
|
||||
const uint8_t* WEBP_RESTRICT inB,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int32_t sum[4];
|
||||
__m128i tmp_0, tmp_1, tmp_2, tmp_3;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
@ -1328,14 +1373,16 @@ static int TTransform_SSE2(const uint8_t* inA, const uint8_t* inB,
|
||||
return sum[0] + sum[1] + sum[2] + sum[3];
|
||||
}
|
||||
|
||||
static int Disto4x4_SSE2(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_SSE2(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
const int diff_sum = TTransform_SSE2(a, b, w);
|
||||
return abs(diff_sum) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_SSE2(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_SSE2(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -1350,9 +1397,10 @@ static int Disto16x16_SSE2(const uint8_t* const a, const uint8_t* const b,
|
||||
// Quantization
|
||||
//
|
||||
|
||||
static WEBP_INLINE int DoQuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
|
||||
const uint16_t* const sharpen,
|
||||
const VP8Matrix* const mtx) {
|
||||
static WEBP_INLINE int DoQuantizeBlock_SSE2(
|
||||
int16_t in[16], int16_t out[16],
|
||||
const uint16_t* WEBP_RESTRICT const sharpen,
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
__m128i coeff0, coeff8;
|
||||
@ -1463,17 +1511,17 @@ static WEBP_INLINE int DoQuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int QuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
return DoQuantizeBlock_SSE2(in, out, &mtx->sharpen_[0], mtx);
|
||||
}
|
||||
|
||||
static int QuantizeBlockWHT_SSE2(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
return DoQuantizeBlock_SSE2(in, out, NULL, mtx);
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_SSE2(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
const uint16_t* const sharpen = &mtx->sharpen_[0];
|
||||
nz = DoQuantizeBlock_SSE2(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
|
||||
|
21
thirdparty/libwebp/src/dsp/enc_sse41.c
vendored
21
thirdparty/libwebp/src/dsp/enc_sse41.c
vendored
@ -23,9 +23,10 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// Compute susceptibility based on DCT-coeff histograms.
|
||||
|
||||
static void CollectHistogram_SSE41(const uint8_t* ref, const uint8_t* pred,
|
||||
static void CollectHistogram_SSE41(const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* const histo) {
|
||||
VP8Histogram* WEBP_RESTRICT const histo) {
|
||||
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
@ -168,14 +169,16 @@ static int TTransform_SSE41(const uint8_t* inA, const uint8_t* inB,
|
||||
return sum[0] + sum[1] + sum[2] + sum[3];
|
||||
}
|
||||
|
||||
static int Disto4x4_SSE41(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto4x4_SSE41(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
const int diff_sum = TTransform_SSE41(a, b, w);
|
||||
return abs(diff_sum) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_SSE41(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
static int Disto16x16_SSE41(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -301,17 +304,17 @@ static WEBP_INLINE int DoQuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
|
||||
#undef PSHUFB_CST
|
||||
|
||||
static int QuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
return DoQuantizeBlock_SSE41(in, out, &mtx->sharpen_[0], mtx);
|
||||
}
|
||||
|
||||
static int QuantizeBlockWHT_SSE41(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
return DoQuantizeBlock_SSE41(in, out, NULL, mtx);
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_SSE41(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* const mtx) {
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
int nz;
|
||||
const uint16_t* const sharpen = &mtx->sharpen_[0];
|
||||
nz = DoQuantizeBlock_SSE41(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
|
||||
|
140
thirdparty/libwebp/src/dsp/filters.c
vendored
140
thirdparty/libwebp/src/dsp/filters.c
vendored
@ -23,55 +23,42 @@
|
||||
do { \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert((in) != (out)); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
|
||||
(void)height; /* Silence unused warning. */ \
|
||||
} while (0)
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE void PredictLine_C(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length, int inverse) {
|
||||
static WEBP_INLINE void PredictLine_C(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
int i;
|
||||
if (inverse) {
|
||||
for (i = 0; i < length; ++i) dst[i] = (uint8_t)(src[i] + pred[i]);
|
||||
} else {
|
||||
for (i = 0; i < length; ++i) dst[i] = (uint8_t)(src[i] - pred[i]);
|
||||
}
|
||||
for (i = 0; i < length; ++i) dst[i] = (uint8_t)(src[i] - pred[i]);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Horizontal filter.
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* in,
|
||||
static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
preds = inverse ? out : in;
|
||||
|
||||
if (row == 0) {
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
|
||||
row = 1;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1);
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
for (row = 1; row < height; ++row) {
|
||||
// Leftmost pixel is predicted from above.
|
||||
PredictLine_C(in, preds - stride, out, 1, inverse);
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
|
||||
++row;
|
||||
PredictLine_C(in, preds - stride, out, 1);
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1);
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -81,35 +68,23 @@ static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* in,
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_C(const uint8_t* in,
|
||||
static WEBP_INLINE void DoVerticalFilter_C(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
preds = inverse ? out : in;
|
||||
|
||||
if (row == 0) {
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
} else {
|
||||
// We are starting from in-between. Make sure 'preds' points to prev row.
|
||||
preds -= stride;
|
||||
}
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1);
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
PredictLine_C(in, preds, out, width, inverse);
|
||||
++row;
|
||||
for (row = 1; row < height; ++row) {
|
||||
PredictLine_C(in, preds, out, width);
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -126,40 +101,31 @@ static WEBP_INLINE int GradientPredictor_C(uint8_t a, uint8_t b, uint8_t c) {
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
|
||||
static WEBP_INLINE void DoGradientFilter_C(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
int inverse, uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
preds = inverse ? out : in;
|
||||
|
||||
// left prediction for top scan-line
|
||||
if (row == 0) {
|
||||
out[0] = in[0];
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
|
||||
row = 1;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
out[0] = in[0];
|
||||
PredictLine_C(in + 1, preds, out + 1, width - 1);
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
for (row = 1; row < height; ++row) {
|
||||
int w;
|
||||
// leftmost pixel: predict from above.
|
||||
PredictLine_C(in, preds - stride, out, 1, inverse);
|
||||
PredictLine_C(in, preds - stride, out, 1);
|
||||
for (w = 1; w < width; ++w) {
|
||||
const int pred = GradientPredictor_C(preds[w - 1],
|
||||
preds[w - stride],
|
||||
preds[w - stride - 1]);
|
||||
out[w] = (uint8_t)(in[w] + (inverse ? pred : -pred));
|
||||
out[w] = (uint8_t)(in[w] - pred);
|
||||
}
|
||||
++row;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
@ -172,20 +138,22 @@ static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void HorizontalFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter_C(data, width, height, stride, 0, height, 0,
|
||||
filtered_data);
|
||||
static void HorizontalFilter_C(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoHorizontalFilter_C(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
static void VerticalFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter_C(data, width, height, stride, 0, height, 0, filtered_data);
|
||||
static void VerticalFilter_C(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoVerticalFilter_C(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
static void GradientFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_C(data, width, height, stride, 0, height, 0, filtered_data);
|
||||
static void GradientFilter_C(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoGradientFilter_C(data, width, height, stride, filtered_data);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
|
134
thirdparty/libwebp/src/dsp/filters_mips_dsp_r2.c
vendored
134
thirdparty/libwebp/src/dsp/filters_mips_dsp_r2.c
vendored
@ -26,13 +26,12 @@
|
||||
|
||||
#define DCHECK(in, out) \
|
||||
do { \
|
||||
assert(in != NULL); \
|
||||
assert(out != NULL); \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert((in) != (out)); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
|
||||
(void)height; /* Silence unused warning. */ \
|
||||
} while (0)
|
||||
|
||||
#define DO_PREDICT_LINE(SRC, DST, LENGTH, INVERSE) do { \
|
||||
@ -103,7 +102,8 @@
|
||||
); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* src, uint8_t* dst,
|
||||
static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst,
|
||||
int length) {
|
||||
DO_PREDICT_LINE(src, dst, length, 0);
|
||||
}
|
||||
@ -184,99 +184,75 @@ static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* src, uint8_t* dst,
|
||||
// Horizontal filter.
|
||||
|
||||
#define FILTER_LINE_BY_LINE do { \
|
||||
while (row < last_row) { \
|
||||
for (row = 1; row < height; ++row) { \
|
||||
PREDICT_LINE_ONE_PASS(in, preds - stride, out); \
|
||||
DO_PREDICT_LINE(in + 1, out + 1, width - 1, 0); \
|
||||
++row; \
|
||||
preds += stride; \
|
||||
in += stride; \
|
||||
out += stride; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(
|
||||
const uint8_t* WEBP_RESTRICT in, int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
preds = in;
|
||||
|
||||
if (row == 0) {
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
FILTER_LINE_BY_LINE;
|
||||
}
|
||||
#undef FILTER_LINE_BY_LINE
|
||||
|
||||
static void HorizontalFilter_MIPSdspR2(const uint8_t* data,
|
||||
int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter_MIPSdspR2(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
static void HorizontalFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoHorizontalFilter_MIPSdspR2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
#define FILTER_LINE_BY_LINE do { \
|
||||
while (row < last_row) { \
|
||||
for (row = 1; row < height; ++row) { \
|
||||
DO_PREDICT_LINE_VERTICAL(in, preds, out, width, 0); \
|
||||
++row; \
|
||||
preds += stride; \
|
||||
in += stride; \
|
||||
out += stride; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(
|
||||
const uint8_t* WEBP_RESTRICT in, int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
preds = in;
|
||||
|
||||
if (row == 0) {
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
} else {
|
||||
// We are starting from in-between. Make sure 'preds' points to prev row.
|
||||
preds -= stride;
|
||||
}
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
FILTER_LINE_BY_LINE;
|
||||
}
|
||||
#undef FILTER_LINE_BY_LINE
|
||||
|
||||
static void VerticalFilter_MIPSdspR2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter_MIPSdspR2(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
static void VerticalFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoVerticalFilter_MIPSdspR2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -297,7 +273,7 @@ static int GradientPredictor_MIPSdspR2(uint8_t a, uint8_t b, uint8_t c) {
|
||||
}
|
||||
|
||||
#define FILTER_LINE_BY_LINE(PREDS, OPERATION) do { \
|
||||
while (row < last_row) { \
|
||||
for (row = 1; row < height; ++row) { \
|
||||
int w; \
|
||||
PREDICT_LINE_ONE_PASS(in, PREDS - stride, out); \
|
||||
for (w = 1; w < width; ++w) { \
|
||||
@ -306,42 +282,34 @@ static int GradientPredictor_MIPSdspR2(uint8_t a, uint8_t b, uint8_t c) {
|
||||
PREDS[w - stride - 1]); \
|
||||
out[w] = in[w] OPERATION pred; \
|
||||
} \
|
||||
++row; \
|
||||
in += stride; \
|
||||
out += stride; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void DoGradientFilter_MIPSdspR2(const uint8_t* in,
|
||||
static void DoGradientFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows, uint8_t* out) {
|
||||
const uint8_t* preds;
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
preds = in;
|
||||
|
||||
// left prediction for top scan-line
|
||||
if (row == 0) {
|
||||
out[0] = in[0];
|
||||
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
out[0] = in[0];
|
||||
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
|
||||
preds += stride;
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
FILTER_LINE_BY_LINE(in, -);
|
||||
}
|
||||
#undef FILTER_LINE_BY_LINE
|
||||
|
||||
static void GradientFilter_MIPSdspR2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_MIPSdspR2(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
static void GradientFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoGradientFilter_MIPSdspR2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
27
thirdparty/libwebp/src/dsp/filters_msa.c
vendored
27
thirdparty/libwebp/src/dsp/filters_msa.c
vendored
@ -21,7 +21,8 @@
|
||||
|
||||
static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
|
||||
const uint8_t* pred,
|
||||
uint8_t* dst, int length) {
|
||||
uint8_t* WEBP_RESTRICT dst,
|
||||
int length) {
|
||||
v16u8 src0, pred0, dst0;
|
||||
assert(length >= 0);
|
||||
while (length >= 32) {
|
||||
@ -58,8 +59,9 @@ static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
|
||||
|
||||
#define DCHECK(in, out) \
|
||||
do { \
|
||||
assert(in != NULL); \
|
||||
assert(out != NULL); \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert((in) != (out)); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
@ -68,8 +70,9 @@ static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
|
||||
//------------------------------------------------------------------------------
|
||||
// Horrizontal filter
|
||||
|
||||
static void HorizontalFilter_MSA(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void HorizontalFilter_MSA(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
const uint8_t* preds = data;
|
||||
const uint8_t* in = data;
|
||||
uint8_t* out = filtered_data;
|
||||
@ -99,8 +102,8 @@ static void HorizontalFilter_MSA(const uint8_t* data, int width, int height,
|
||||
|
||||
static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
|
||||
const uint8_t* ppred,
|
||||
uint8_t* poutput, int stride,
|
||||
int size) {
|
||||
uint8_t* WEBP_RESTRICT poutput,
|
||||
int stride, int size) {
|
||||
int w;
|
||||
const v16i8 zero = { 0 };
|
||||
while (size >= 16) {
|
||||
@ -131,8 +134,9 @@ static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
|
||||
}
|
||||
|
||||
|
||||
static void GradientFilter_MSA(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void GradientFilter_MSA(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
const uint8_t* in = data;
|
||||
const uint8_t* preds = data;
|
||||
uint8_t* out = filtered_data;
|
||||
@ -159,8 +163,9 @@ static void GradientFilter_MSA(const uint8_t* data, int width, int height,
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter
|
||||
|
||||
static void VerticalFilter_MSA(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
static void VerticalFilter_MSA(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
const uint8_t* in = data;
|
||||
const uint8_t* preds = data;
|
||||
uint8_t* out = filtered_data;
|
||||
|
123
thirdparty/libwebp/src/dsp/filters_neon.c
vendored
123
thirdparty/libwebp/src/dsp/filters_neon.c
vendored
@ -23,13 +23,12 @@
|
||||
|
||||
#define DCHECK(in, out) \
|
||||
do { \
|
||||
assert(in != NULL); \
|
||||
assert(out != NULL); \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert((in) != (out)); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
|
||||
(void)height; /* Silence unused warning. */ \
|
||||
} while (0)
|
||||
|
||||
// load eight u8 and widen to s16
|
||||
@ -46,7 +45,7 @@
|
||||
#define ROTATE_RIGHT_N(A, N) vext_u8((A), (A), (8 - (N)) % 8)
|
||||
|
||||
static void PredictLine_NEON(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length) {
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
int i;
|
||||
assert(length >= 0);
|
||||
for (i = 0; i + 16 <= length; i += 16) {
|
||||
@ -59,86 +58,70 @@ static void PredictLine_NEON(const uint8_t* src, const uint8_t* pred,
|
||||
}
|
||||
|
||||
// Special case for left-based prediction (when preds==dst-1 or preds==src-1).
|
||||
static void PredictLineLeft_NEON(const uint8_t* src, uint8_t* dst, int length) {
|
||||
static void PredictLineLeft_NEON(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
PredictLine_NEON(src, src - 1, dst, length);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Horizontal filter.
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter_NEON(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
static WEBP_INLINE void DoHorizontalFilter_NEON(
|
||||
const uint8_t* WEBP_RESTRICT in, int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
|
||||
if (row == 0) {
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLineLeft_NEON(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLineLeft_NEON(in + 1, out + 1, width - 1);
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
for (row = 1; row < height; ++row) {
|
||||
// Leftmost pixel is predicted from above.
|
||||
out[0] = in[0] - in[-stride];
|
||||
PredictLineLeft_NEON(in + 1, out + 1, width - 1);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void HorizontalFilter_NEON(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter_NEON(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
static void HorizontalFilter_NEON(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoHorizontalFilter_NEON(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_NEON(const uint8_t* in,
|
||||
static WEBP_INLINE void DoVerticalFilter_NEON(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
|
||||
if (row == 0) {
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLineLeft_NEON(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLineLeft_NEON(in + 1, out + 1, width - 1);
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
for (row = 1; row < height; ++row) {
|
||||
PredictLine_NEON(in, in - stride, out, width);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void VerticalFilter_NEON(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter_NEON(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
static void VerticalFilter_NEON(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoVerticalFilter_NEON(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -151,7 +134,8 @@ static WEBP_INLINE int GradientPredictor_C(uint8_t a, uint8_t b, uint8_t c) {
|
||||
|
||||
static void GradientPredictDirect_NEON(const uint8_t* const row,
|
||||
const uint8_t* const top,
|
||||
uint8_t* const out, int length) {
|
||||
uint8_t* WEBP_RESTRICT const out,
|
||||
int length) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= length; i += 8) {
|
||||
const uint8x8_t A = vld1_u8(&row[i - 1]);
|
||||
@ -167,40 +151,31 @@ static void GradientPredictDirect_NEON(const uint8_t* const row,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DoGradientFilter_NEON(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
static WEBP_INLINE void DoGradientFilter_NEON(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
|
||||
// left prediction for top scan-line
|
||||
if (row == 0) {
|
||||
out[0] = in[0];
|
||||
PredictLineLeft_NEON(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
out[0] = in[0];
|
||||
PredictLineLeft_NEON(in + 1, out + 1, width - 1);
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
for (row = 1; row < height; ++row) {
|
||||
out[0] = in[0] - in[-stride];
|
||||
GradientPredictDirect_NEON(in + 1, in + 1 - stride, out + 1, width - 1);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void GradientFilter_NEON(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_NEON(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
static void GradientFilter_NEON(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoGradientFilter_NEON(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
#undef DCHECK
|
||||
|
117
thirdparty/libwebp/src/dsp/filters_sse2.c
vendored
117
thirdparty/libwebp/src/dsp/filters_sse2.c
vendored
@ -27,15 +27,15 @@
|
||||
do { \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert((in) != (out)); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
|
||||
(void)height; /* Silence unused warning. */ \
|
||||
} while (0)
|
||||
|
||||
static void PredictLineTop_SSE2(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length) {
|
||||
static void PredictLineTop_SSE2(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
int i;
|
||||
const int max_pos = length & ~31;
|
||||
assert(length >= 0);
|
||||
@ -53,7 +53,8 @@ static void PredictLineTop_SSE2(const uint8_t* src, const uint8_t* pred,
|
||||
}
|
||||
|
||||
// Special case for left-based prediction (when preds==dst-1 or preds==src-1).
|
||||
static void PredictLineLeft_SSE2(const uint8_t* src, uint8_t* dst, int length) {
|
||||
static void PredictLineLeft_SSE2(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
int i;
|
||||
const int max_pos = length & ~31;
|
||||
assert(length >= 0);
|
||||
@ -73,32 +74,23 @@ static void PredictLineLeft_SSE2(const uint8_t* src, uint8_t* dst, int length) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Horizontal filter.
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
static WEBP_INLINE void DoHorizontalFilter_SSE2(
|
||||
const uint8_t* WEBP_RESTRICT in, int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
|
||||
if (row == 0) {
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
// Leftmost pixel is the same as input for topmost scanline.
|
||||
out[0] = in[0];
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
for (row = 1; row < height; ++row) {
|
||||
// Leftmost pixel is predicted from above.
|
||||
out[0] = in[0] - in[-stride];
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
@ -107,30 +99,22 @@ static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* in,
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_SSE2(const uint8_t* in,
|
||||
static WEBP_INLINE void DoVerticalFilter_SSE2(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
|
||||
if (row == 0) {
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
// Very first top-left pixel is copied.
|
||||
out[0] = in[0];
|
||||
// Rest of top scan-line is left-predicted.
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
for (row = 1; row < height; ++row) {
|
||||
PredictLineTop_SSE2(in, in - stride, out, width);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
@ -146,7 +130,8 @@ static WEBP_INLINE int GradientPredictor_SSE2(uint8_t a, uint8_t b, uint8_t c) {
|
||||
|
||||
static void GradientPredictDirect_SSE2(const uint8_t* const row,
|
||||
const uint8_t* const top,
|
||||
uint8_t* const out, int length) {
|
||||
uint8_t* WEBP_RESTRICT const out,
|
||||
int length) {
|
||||
const int max_pos = length & ~7;
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
@ -170,30 +155,22 @@ static void GradientPredictDirect_SSE2(const uint8_t* const row,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
|
||||
static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
int row, int num_rows,
|
||||
uint8_t* out) {
|
||||
const size_t start_offset = row * stride;
|
||||
const int last_row = row + num_rows;
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
in += start_offset;
|
||||
out += start_offset;
|
||||
|
||||
// left prediction for top scan-line
|
||||
if (row == 0) {
|
||||
out[0] = in[0];
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
row = 1;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
out[0] = in[0];
|
||||
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
|
||||
in += stride;
|
||||
out += stride;
|
||||
|
||||
// Filter line-by-line.
|
||||
while (row < last_row) {
|
||||
for (row = 1; row < height; ++row) {
|
||||
out[0] = (uint8_t)(in[0] - in[-stride]);
|
||||
GradientPredictDirect_SSE2(in + 1, in + 1 - stride, out + 1, width - 1);
|
||||
++row;
|
||||
in += stride;
|
||||
out += stride;
|
||||
}
|
||||
@ -203,20 +180,22 @@ static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void HorizontalFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter_SSE2(data, width, height, stride, 0, height,
|
||||
filtered_data);
|
||||
static void HorizontalFilter_SSE2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoHorizontalFilter_SSE2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
static void VerticalFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter_SSE2(data, width, height, stride, 0, height, filtered_data);
|
||||
static void VerticalFilter_SSE2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoVerticalFilter_SSE2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
static void GradientFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_SSE2(data, width, height, stride, 0, height, filtered_data);
|
||||
static void GradientFilter_SSE2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
DoGradientFilter_SSE2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
36
thirdparty/libwebp/src/dsp/lossless.c
vendored
36
thirdparty/libwebp/src/dsp/lossless.c
vendored
@ -107,14 +107,14 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Predictors
|
||||
|
||||
uint32_t VP8LPredictor0_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
static uint32_t VP8LPredictor0_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
(void)top;
|
||||
(void)left;
|
||||
return ARGB_BLACK;
|
||||
}
|
||||
uint32_t VP8LPredictor1_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
static uint32_t VP8LPredictor1_C(const uint32_t* const left,
|
||||
const uint32_t* const top) {
|
||||
(void)top;
|
||||
return *left;
|
||||
}
|
||||
@ -182,13 +182,13 @@ uint32_t VP8LPredictor13_C(const uint32_t* const left,
|
||||
}
|
||||
|
||||
static void PredictorAdd0_C(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int x;
|
||||
(void)upper;
|
||||
for (x = 0; x < num_pixels; ++x) out[x] = VP8LAddPixels(in[x], ARGB_BLACK);
|
||||
}
|
||||
static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint32_t left = out[-1];
|
||||
(void)upper;
|
||||
@ -441,8 +441,8 @@ static int is_big_endian(void) {
|
||||
return (tmp.b[0] != 1);
|
||||
}
|
||||
|
||||
void VP8LConvertBGRAToRGB_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
void VP8LConvertBGRAToRGB_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
@ -452,8 +452,8 @@ void VP8LConvertBGRAToRGB_C(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
void VP8LConvertBGRAToRGBA_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
@ -464,8 +464,8 @@ void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
void VP8LConvertBGRAToRGBA4444_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
@ -481,8 +481,8 @@ void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
void VP8LConvertBGRAToRGB565_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
@ -498,8 +498,8 @@ void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LConvertBGRAToBGR_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
void VP8LConvertBGRAToBGR_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
@ -509,8 +509,8 @@ void VP8LConvertBGRAToBGR_C(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
|
||||
int swap_on_big_endian) {
|
||||
static void CopyOrSwap(const uint32_t* WEBP_RESTRICT src, int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst, int swap_on_big_endian) {
|
||||
if (is_big_endian() == swap_on_big_endian) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
|
103
thirdparty/libwebp/src/dsp/lossless.h
vendored
103
thirdparty/libwebp/src/dsp/lossless.h
vendored
@ -18,6 +18,7 @@
|
||||
#include "src/webp/types.h"
|
||||
#include "src/webp/decode.h"
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/enc/histogram_enc.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
@ -32,10 +33,6 @@ typedef uint32_t (*VP8LPredictorFunc)(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
extern VP8LPredictorFunc VP8LPredictors[16];
|
||||
|
||||
uint32_t VP8LPredictor0_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor1_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor2_C(const uint32_t* const left,
|
||||
const uint32_t* const top);
|
||||
uint32_t VP8LPredictor3_C(const uint32_t* const left,
|
||||
@ -64,7 +61,7 @@ uint32_t VP8LPredictor13_C(const uint32_t* const left,
|
||||
// These Add/Sub function expects upper[-1] and out[-1] to be readable.
|
||||
typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in,
|
||||
const uint32_t* upper, int num_pixels,
|
||||
uint32_t* out);
|
||||
uint32_t* WEBP_RESTRICT out);
|
||||
extern VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
|
||||
extern VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];
|
||||
|
||||
@ -95,8 +92,8 @@ void VP8LInverseTransform(const struct VP8LTransform* const transform,
|
||||
const uint32_t* const in, uint32_t* const out);
|
||||
|
||||
// Color space conversion.
|
||||
typedef void (*VP8LConvertFunc)(const uint32_t* src, int num_pixels,
|
||||
uint8_t* dst);
|
||||
typedef void (*VP8LConvertFunc)(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst);
|
||||
extern VP8LConvertFunc VP8LConvertBGRAToRGB;
|
||||
extern VP8LConvertFunc VP8LConvertBGRAToRGBA;
|
||||
extern VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
|
||||
@ -131,13 +128,16 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
|
||||
const uint32_t* src, int num_pixels,
|
||||
uint32_t* dst);
|
||||
|
||||
void VP8LConvertBGRAToRGB_C(const uint32_t* src, int num_pixels, uint8_t* dst);
|
||||
void VP8LConvertBGRAToRGBA_C(const uint32_t* src, int num_pixels, uint8_t* dst);
|
||||
void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst);
|
||||
void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst);
|
||||
void VP8LConvertBGRAToBGR_C(const uint32_t* src, int num_pixels, uint8_t* dst);
|
||||
void VP8LConvertBGRAToRGB_C(const uint32_t* WEBP_RESTRICT src, int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8LConvertBGRAToRGBA_C(const uint32_t* WEBP_RESTRICT src, int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8LConvertBGRAToRGBA4444_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8LConvertBGRAToRGB565_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8LConvertBGRAToBGR_C(const uint32_t* WEBP_RESTRICT src, int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8LAddGreenToBlueAndRed_C(const uint32_t* src, int num_pixels,
|
||||
uint32_t* dst);
|
||||
|
||||
@ -149,32 +149,35 @@ void VP8LDspInit(void);
|
||||
|
||||
typedef void (*VP8LProcessEncBlueAndRedFunc)(uint32_t* dst, int num_pixels);
|
||||
extern VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
|
||||
typedef void (*VP8LTransformColorFunc)(const VP8LMultipliers* const m,
|
||||
uint32_t* dst, int num_pixels);
|
||||
typedef void (*VP8LTransformColorFunc)(
|
||||
const VP8LMultipliers* WEBP_RESTRICT const m, uint32_t* WEBP_RESTRICT dst,
|
||||
int num_pixels);
|
||||
extern VP8LTransformColorFunc VP8LTransformColor;
|
||||
typedef void (*VP8LCollectColorBlueTransformsFunc)(
|
||||
const uint32_t* argb, int stride,
|
||||
const uint32_t* WEBP_RESTRICT argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue, int histo[]);
|
||||
int green_to_blue, int red_to_blue, uint32_t histo[]);
|
||||
extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
|
||||
|
||||
typedef void (*VP8LCollectColorRedTransformsFunc)(
|
||||
const uint32_t* argb, int stride,
|
||||
const uint32_t* WEBP_RESTRICT argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red, int histo[]);
|
||||
int green_to_red, uint32_t histo[]);
|
||||
extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
|
||||
|
||||
// Expose some C-only fallback functions
|
||||
void VP8LTransformColor_C(const VP8LMultipliers* const m,
|
||||
uint32_t* data, int num_pixels);
|
||||
void VP8LTransformColor_C(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT data, int num_pixels);
|
||||
void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels);
|
||||
void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
|
||||
void VP8LCollectColorRedTransforms_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red, int histo[]);
|
||||
void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
|
||||
int green_to_red, uint32_t histo[]);
|
||||
void VP8LCollectColorBlueTransforms_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue,
|
||||
int histo[]);
|
||||
uint32_t histo[]);
|
||||
|
||||
extern VP8LPredictorAddSubFunc VP8LPredictorsSub[16];
|
||||
extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
|
||||
@ -183,14 +186,17 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
|
||||
// Huffman-cost related functions.
|
||||
|
||||
typedef uint32_t (*VP8LCostFunc)(const uint32_t* population, int length);
|
||||
typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
|
||||
typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* WEBP_RESTRICT X,
|
||||
const uint32_t* WEBP_RESTRICT Y,
|
||||
int length);
|
||||
typedef float (*VP8LCombinedShannonEntropyFunc)(const int X[256],
|
||||
const int Y[256]);
|
||||
typedef uint64_t (*VP8LCombinedShannonEntropyFunc)(const uint32_t X[256],
|
||||
const uint32_t Y[256]);
|
||||
typedef uint64_t (*VP8LShannonEntropyFunc)(const uint32_t* X, int length);
|
||||
|
||||
extern VP8LCostFunc VP8LExtraCost;
|
||||
extern VP8LCostCombinedFunc VP8LExtraCostCombined;
|
||||
extern VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
|
||||
extern VP8LShannonEntropyFunc VP8LShannonEntropy;
|
||||
|
||||
typedef struct { // small struct to hold counters
|
||||
int counts[2]; // index: 0=zero streak, 1=non-zero streak
|
||||
@ -198,7 +204,7 @@ typedef struct { // small struct to hold counters
|
||||
} VP8LStreaks;
|
||||
|
||||
typedef struct { // small struct to hold bit entropy results
|
||||
float entropy; // entropy
|
||||
uint64_t entropy; // entropy
|
||||
uint32_t sum; // sum of the population
|
||||
int nonzeros; // number of non-zero elements in the population
|
||||
uint32_t max_val; // maximum value in the population
|
||||
@ -212,26 +218,30 @@ void VP8LBitEntropyInit(VP8LBitEntropy* const entropy);
|
||||
// codec specific heuristics.
|
||||
typedef void (*VP8LGetCombinedEntropyUnrefinedFunc)(
|
||||
const uint32_t X[], const uint32_t Y[], int length,
|
||||
VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats);
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats);
|
||||
extern VP8LGetCombinedEntropyUnrefinedFunc VP8LGetCombinedEntropyUnrefined;
|
||||
|
||||
// Get the entropy for the distribution 'X'.
|
||||
typedef void (*VP8LGetEntropyUnrefinedFunc)(const uint32_t X[], int length,
|
||||
VP8LBitEntropy* const bit_entropy,
|
||||
VP8LStreaks* const stats);
|
||||
typedef void (*VP8LGetEntropyUnrefinedFunc)(
|
||||
const uint32_t X[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats);
|
||||
extern VP8LGetEntropyUnrefinedFunc VP8LGetEntropyUnrefined;
|
||||
|
||||
void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
|
||||
VP8LBitEntropy* const entropy);
|
||||
void VP8LBitsEntropyUnrefined(const uint32_t* WEBP_RESTRICT const array, int n,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const entropy);
|
||||
|
||||
typedef void (*VP8LAddVectorFunc)(const uint32_t* a, const uint32_t* b,
|
||||
uint32_t* out, int size);
|
||||
typedef void (*VP8LAddVectorFunc)(const uint32_t* WEBP_RESTRICT a,
|
||||
const uint32_t* WEBP_RESTRICT b,
|
||||
uint32_t* WEBP_RESTRICT out, int size);
|
||||
extern VP8LAddVectorFunc VP8LAddVector;
|
||||
typedef void (*VP8LAddVectorEqFunc)(const uint32_t* a, uint32_t* out, int size);
|
||||
typedef void (*VP8LAddVectorEqFunc)(const uint32_t* WEBP_RESTRICT a,
|
||||
uint32_t* WEBP_RESTRICT out, int size);
|
||||
extern VP8LAddVectorEqFunc VP8LAddVectorEq;
|
||||
void VP8LHistogramAdd(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
VP8LHistogram* const out);
|
||||
void VP8LHistogramAdd(const VP8LHistogram* WEBP_RESTRICT const a,
|
||||
const VP8LHistogram* WEBP_RESTRICT const b,
|
||||
VP8LHistogram* WEBP_RESTRICT const out);
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// PrefixEncode()
|
||||
@ -241,11 +251,12 @@ typedef int (*VP8LVectorMismatchFunc)(const uint32_t* const array1,
|
||||
// Returns the first index where array1 and array2 are different.
|
||||
extern VP8LVectorMismatchFunc VP8LVectorMismatch;
|
||||
|
||||
typedef void (*VP8LBundleColorMapFunc)(const uint8_t* const row, int width,
|
||||
int xbits, uint32_t* dst);
|
||||
typedef void (*VP8LBundleColorMapFunc)(const uint8_t* WEBP_RESTRICT const row,
|
||||
int width, int xbits,
|
||||
uint32_t* WEBP_RESTRICT dst);
|
||||
extern VP8LBundleColorMapFunc VP8LBundleColorMap;
|
||||
void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
|
||||
uint32_t* dst);
|
||||
void VP8LBundleColorMap_C(const uint8_t* WEBP_RESTRICT const row,
|
||||
int width, int xbits, uint32_t* WEBP_RESTRICT dst);
|
||||
|
||||
// Must be called before calling any of the above methods.
|
||||
void VP8LEncDspInit(void);
|
||||
|
51
thirdparty/libwebp/src/dsp/lossless_common.h
vendored
51
thirdparty/libwebp/src/dsp/lossless_common.h
vendored
@ -73,23 +73,44 @@ static WEBP_INLINE int VP8LNearLosslessBits(int near_lossless_quality) {
|
||||
// Keeping a high threshold for now.
|
||||
#define APPROX_LOG_WITH_CORRECTION_MAX 65536
|
||||
#define APPROX_LOG_MAX 4096
|
||||
// VP8LFastLog2 and VP8LFastSLog2 are used on elements from image histograms.
|
||||
// The histogram values cannot exceed the maximum number of pixels, which
|
||||
// is (1 << 14) * (1 << 14). Therefore S * log(S) < (1 << 33).
|
||||
// No more than 32 bits of precision should be chosen.
|
||||
// To match the original float implementation, 23 bits of precision are used.
|
||||
#define LOG_2_PRECISION_BITS 23
|
||||
#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
|
||||
// LOG_2_RECIPROCAL * (1 << LOG_2_PRECISION_BITS)
|
||||
#define LOG_2_RECIPROCAL_FIXED_DOUBLE 12102203.161561485379934310913085937500
|
||||
#define LOG_2_RECIPROCAL_FIXED ((uint64_t)12102203)
|
||||
#define LOG_LOOKUP_IDX_MAX 256
|
||||
extern const float kLog2Table[LOG_LOOKUP_IDX_MAX];
|
||||
extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX];
|
||||
typedef float (*VP8LFastLog2SlowFunc)(uint32_t v);
|
||||
extern const uint32_t kLog2Table[LOG_LOOKUP_IDX_MAX];
|
||||
extern const uint64_t kSLog2Table[LOG_LOOKUP_IDX_MAX];
|
||||
typedef uint32_t (*VP8LFastLog2SlowFunc)(uint32_t v);
|
||||
typedef uint64_t (*VP8LFastSLog2SlowFunc)(uint32_t v);
|
||||
|
||||
extern VP8LFastLog2SlowFunc VP8LFastLog2Slow;
|
||||
extern VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
|
||||
extern VP8LFastSLog2SlowFunc VP8LFastSLog2Slow;
|
||||
|
||||
static WEBP_INLINE float VP8LFastLog2(uint32_t v) {
|
||||
static WEBP_INLINE uint32_t VP8LFastLog2(uint32_t v) {
|
||||
return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v);
|
||||
}
|
||||
// Fast calculation of v * log2(v) for integer input.
|
||||
static WEBP_INLINE float VP8LFastSLog2(uint32_t v) {
|
||||
static WEBP_INLINE uint64_t VP8LFastSLog2(uint32_t v) {
|
||||
return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v);
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint64_t RightShiftRound(uint64_t v, uint32_t shift) {
|
||||
return (v + (1ull << shift >> 1)) >> shift;
|
||||
}
|
||||
|
||||
static WEBP_INLINE int64_t DivRound(int64_t a, int64_t b) {
|
||||
return ((a < 0) == (b < 0)) ? ((a + b / 2) / b) : ((a - b / 2) / b);
|
||||
}
|
||||
|
||||
#define WEBP_INT64_MAX ((int64_t)((1ull << 63) - 1))
|
||||
#define WEBP_UINT64_MAX (~0ull)
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// PrefixEncode()
|
||||
|
||||
@ -173,15 +194,15 @@ uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
|
||||
|
||||
// The predictor is added to the output pixel (which
|
||||
// is therefore considered as a residual) to get the final prediction.
|
||||
#define GENERATE_PREDICTOR_ADD(PREDICTOR, PREDICTOR_ADD) \
|
||||
static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
|
||||
int num_pixels, uint32_t* out) { \
|
||||
int x; \
|
||||
assert(upper != NULL); \
|
||||
for (x = 0; x < num_pixels; ++x) { \
|
||||
const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x); \
|
||||
out[x] = VP8LAddPixels(in[x], pred); \
|
||||
} \
|
||||
#define GENERATE_PREDICTOR_ADD(PREDICTOR, PREDICTOR_ADD) \
|
||||
static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) { \
|
||||
int x; \
|
||||
assert(upper != NULL); \
|
||||
for (x = 0; x < num_pixels; ++x) { \
|
||||
const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x); \
|
||||
out[x] = VP8LAddPixels(in[x], pred); \
|
||||
} \
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
472
thirdparty/libwebp/src/dsp/lossless_enc.c
vendored
472
thirdparty/libwebp/src/dsp/lossless_enc.c
vendored
@ -24,203 +24,123 @@
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/dsp/yuv.h"
|
||||
|
||||
// lookup table for small values of log2(int)
|
||||
const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
|
||||
0.0000000000000000f, 0.0000000000000000f,
|
||||
1.0000000000000000f, 1.5849625007211560f,
|
||||
2.0000000000000000f, 2.3219280948873621f,
|
||||
2.5849625007211560f, 2.8073549220576041f,
|
||||
3.0000000000000000f, 3.1699250014423121f,
|
||||
3.3219280948873621f, 3.4594316186372973f,
|
||||
3.5849625007211560f, 3.7004397181410921f,
|
||||
3.8073549220576041f, 3.9068905956085187f,
|
||||
4.0000000000000000f, 4.0874628412503390f,
|
||||
4.1699250014423121f, 4.2479275134435852f,
|
||||
4.3219280948873626f, 4.3923174227787606f,
|
||||
4.4594316186372973f, 4.5235619560570130f,
|
||||
4.5849625007211560f, 4.6438561897747243f,
|
||||
4.7004397181410917f, 4.7548875021634682f,
|
||||
4.8073549220576037f, 4.8579809951275718f,
|
||||
4.9068905956085187f, 4.9541963103868749f,
|
||||
5.0000000000000000f, 5.0443941193584533f,
|
||||
5.0874628412503390f, 5.1292830169449663f,
|
||||
5.1699250014423121f, 5.2094533656289501f,
|
||||
5.2479275134435852f, 5.2854022188622487f,
|
||||
5.3219280948873626f, 5.3575520046180837f,
|
||||
5.3923174227787606f, 5.4262647547020979f,
|
||||
5.4594316186372973f, 5.4918530963296747f,
|
||||
5.5235619560570130f, 5.5545888516776376f,
|
||||
5.5849625007211560f, 5.6147098441152083f,
|
||||
5.6438561897747243f, 5.6724253419714951f,
|
||||
5.7004397181410917f, 5.7279204545631987f,
|
||||
5.7548875021634682f, 5.7813597135246599f,
|
||||
5.8073549220576037f, 5.8328900141647412f,
|
||||
5.8579809951275718f, 5.8826430493618415f,
|
||||
5.9068905956085187f, 5.9307373375628866f,
|
||||
5.9541963103868749f, 5.9772799234999167f,
|
||||
6.0000000000000000f, 6.0223678130284543f,
|
||||
6.0443941193584533f, 6.0660891904577720f,
|
||||
6.0874628412503390f, 6.1085244567781691f,
|
||||
6.1292830169449663f, 6.1497471195046822f,
|
||||
6.1699250014423121f, 6.1898245588800175f,
|
||||
6.2094533656289501f, 6.2288186904958804f,
|
||||
6.2479275134435852f, 6.2667865406949010f,
|
||||
6.2854022188622487f, 6.3037807481771030f,
|
||||
6.3219280948873626f, 6.3398500028846243f,
|
||||
6.3575520046180837f, 6.3750394313469245f,
|
||||
6.3923174227787606f, 6.4093909361377017f,
|
||||
6.4262647547020979f, 6.4429434958487279f,
|
||||
6.4594316186372973f, 6.4757334309663976f,
|
||||
6.4918530963296747f, 6.5077946401986963f,
|
||||
6.5235619560570130f, 6.5391588111080309f,
|
||||
6.5545888516776376f, 6.5698556083309478f,
|
||||
6.5849625007211560f, 6.5999128421871278f,
|
||||
6.6147098441152083f, 6.6293566200796094f,
|
||||
6.6438561897747243f, 6.6582114827517946f,
|
||||
6.6724253419714951f, 6.6865005271832185f,
|
||||
6.7004397181410917f, 6.7142455176661224f,
|
||||
6.7279204545631987f, 6.7414669864011464f,
|
||||
6.7548875021634682f, 6.7681843247769259f,
|
||||
6.7813597135246599f, 6.7944158663501061f,
|
||||
6.8073549220576037f, 6.8201789624151878f,
|
||||
6.8328900141647412f, 6.8454900509443747f,
|
||||
6.8579809951275718f, 6.8703647195834047f,
|
||||
6.8826430493618415f, 6.8948177633079437f,
|
||||
6.9068905956085187f, 6.9188632372745946f,
|
||||
6.9307373375628866f, 6.9425145053392398f,
|
||||
6.9541963103868749f, 6.9657842846620869f,
|
||||
6.9772799234999167f, 6.9886846867721654f,
|
||||
7.0000000000000000f, 7.0112272554232539f,
|
||||
7.0223678130284543f, 7.0334230015374501f,
|
||||
7.0443941193584533f, 7.0552824355011898f,
|
||||
7.0660891904577720f, 7.0768155970508308f,
|
||||
7.0874628412503390f, 7.0980320829605263f,
|
||||
7.1085244567781691f, 7.1189410727235076f,
|
||||
7.1292830169449663f, 7.1395513523987936f,
|
||||
7.1497471195046822f, 7.1598713367783890f,
|
||||
7.1699250014423121f, 7.1799090900149344f,
|
||||
7.1898245588800175f, 7.1996723448363644f,
|
||||
7.2094533656289501f, 7.2191685204621611f,
|
||||
7.2288186904958804f, 7.2384047393250785f,
|
||||
7.2479275134435852f, 7.2573878426926521f,
|
||||
7.2667865406949010f, 7.2761244052742375f,
|
||||
7.2854022188622487f, 7.2946207488916270f,
|
||||
7.3037807481771030f, 7.3128829552843557f,
|
||||
7.3219280948873626f, 7.3309168781146167f,
|
||||
7.3398500028846243f, 7.3487281542310771f,
|
||||
7.3575520046180837f, 7.3663222142458160f,
|
||||
7.3750394313469245f, 7.3837042924740519f,
|
||||
7.3923174227787606f, 7.4008794362821843f,
|
||||
7.4093909361377017f, 7.4178525148858982f,
|
||||
7.4262647547020979f, 7.4346282276367245f,
|
||||
7.4429434958487279f, 7.4512111118323289f,
|
||||
7.4594316186372973f, 7.4676055500829976f,
|
||||
7.4757334309663976f, 7.4838157772642563f,
|
||||
7.4918530963296747f, 7.4998458870832056f,
|
||||
7.5077946401986963f, 7.5156998382840427f,
|
||||
7.5235619560570130f, 7.5313814605163118f,
|
||||
7.5391588111080309f, 7.5468944598876364f,
|
||||
7.5545888516776376f, 7.5622424242210728f,
|
||||
7.5698556083309478f, 7.5774288280357486f,
|
||||
7.5849625007211560f, 7.5924570372680806f,
|
||||
7.5999128421871278f, 7.6073303137496104f,
|
||||
7.6147098441152083f, 7.6220518194563764f,
|
||||
7.6293566200796094f, 7.6366246205436487f,
|
||||
7.6438561897747243f, 7.6510516911789281f,
|
||||
7.6582114827517946f, 7.6653359171851764f,
|
||||
7.6724253419714951f, 7.6794800995054464f,
|
||||
7.6865005271832185f, 7.6934869574993252f,
|
||||
7.7004397181410917f, 7.7073591320808825f,
|
||||
7.7142455176661224f, 7.7210991887071855f,
|
||||
7.7279204545631987f, 7.7347096202258383f,
|
||||
7.7414669864011464f, 7.7481928495894605f,
|
||||
7.7548875021634682f, 7.7615512324444795f,
|
||||
7.7681843247769259f, 7.7747870596011736f,
|
||||
7.7813597135246599f, 7.7879025593914317f,
|
||||
7.7944158663501061f, 7.8008998999203047f,
|
||||
7.8073549220576037f, 7.8137811912170374f,
|
||||
7.8201789624151878f, 7.8265484872909150f,
|
||||
7.8328900141647412f, 7.8392037880969436f,
|
||||
7.8454900509443747f, 7.8517490414160571f,
|
||||
7.8579809951275718f, 7.8641861446542797f,
|
||||
7.8703647195834047f, 7.8765169465649993f,
|
||||
7.8826430493618415f, 7.8887432488982591f,
|
||||
7.8948177633079437f, 7.9008668079807486f,
|
||||
7.9068905956085187f, 7.9128893362299619f,
|
||||
7.9188632372745946f, 7.9248125036057812f,
|
||||
7.9307373375628866f, 7.9366379390025709f,
|
||||
7.9425145053392398f, 7.9483672315846778f,
|
||||
7.9541963103868749f, 7.9600019320680805f,
|
||||
7.9657842846620869f, 7.9715435539507719f,
|
||||
7.9772799234999167f, 7.9829935746943103f,
|
||||
7.9886846867721654f, 7.9943534368588577f
|
||||
// lookup table for small values of log2(int) * (1 << LOG_2_PRECISION_BITS).
|
||||
// Obtained in Python with:
|
||||
// a = [ str(round((1<<23)*math.log2(i))) if i else "0" for i in range(256)]
|
||||
// print(',\n'.join([' '+','.join(v)
|
||||
// for v in batched([i.rjust(9) for i in a],7)]))
|
||||
const uint32_t kLog2Table[LOG_LOOKUP_IDX_MAX] = {
|
||||
0, 0, 8388608, 13295629, 16777216, 19477745, 21684237,
|
||||
23549800, 25165824, 26591258, 27866353, 29019816, 30072845, 31041538,
|
||||
31938408, 32773374, 33554432, 34288123, 34979866, 35634199, 36254961,
|
||||
36845429, 37408424, 37946388, 38461453, 38955489, 39430146, 39886887,
|
||||
40327016, 40751698, 41161982, 41558811, 41943040, 42315445, 42676731,
|
||||
43027545, 43368474, 43700062, 44022807, 44337167, 44643569, 44942404,
|
||||
45234037, 45518808, 45797032, 46069003, 46334996, 46595268, 46850061,
|
||||
47099600, 47344097, 47583753, 47818754, 48049279, 48275495, 48497560,
|
||||
48715624, 48929828, 49140306, 49347187, 49550590, 49750631, 49947419,
|
||||
50141058, 50331648, 50519283, 50704053, 50886044, 51065339, 51242017,
|
||||
51416153, 51587818, 51757082, 51924012, 52088670, 52251118, 52411415,
|
||||
52569616, 52725775, 52879946, 53032177, 53182516, 53331012, 53477707,
|
||||
53622645, 53765868, 53907416, 54047327, 54185640, 54322389, 54457611,
|
||||
54591338, 54723604, 54854440, 54983876, 55111943, 55238669, 55364082,
|
||||
55488208, 55611074, 55732705, 55853126, 55972361, 56090432, 56207362,
|
||||
56323174, 56437887, 56551524, 56664103, 56775645, 56886168, 56995691,
|
||||
57104232, 57211808, 57318436, 57424133, 57528914, 57632796, 57735795,
|
||||
57837923, 57939198, 58039632, 58139239, 58238033, 58336027, 58433234,
|
||||
58529666, 58625336, 58720256, 58814437, 58907891, 59000628, 59092661,
|
||||
59183999, 59274652, 59364632, 59453947, 59542609, 59630625, 59718006,
|
||||
59804761, 59890898, 59976426, 60061354, 60145690, 60229443, 60312620,
|
||||
60395229, 60477278, 60558775, 60639726, 60720140, 60800023, 60879382,
|
||||
60958224, 61036555, 61114383, 61191714, 61268554, 61344908, 61420785,
|
||||
61496188, 61571124, 61645600, 61719620, 61793189, 61866315, 61939001,
|
||||
62011253, 62083076, 62154476, 62225457, 62296024, 62366182, 62435935,
|
||||
62505289, 62574248, 62642816, 62710997, 62778797, 62846219, 62913267,
|
||||
62979946, 63046260, 63112212, 63177807, 63243048, 63307939, 63372484,
|
||||
63436687, 63500551, 63564080, 63627277, 63690146, 63752690, 63814912,
|
||||
63876816, 63938405, 63999682, 64060650, 64121313, 64181673, 64241734,
|
||||
64301498, 64360969, 64420148, 64479040, 64537646, 64595970, 64654014,
|
||||
64711782, 64769274, 64826495, 64883447, 64940132, 64996553, 65052711,
|
||||
65108611, 65164253, 65219641, 65274776, 65329662, 65384299, 65438691,
|
||||
65492840, 65546747, 65600416, 65653847, 65707044, 65760008, 65812741,
|
||||
65865245, 65917522, 65969575, 66021404, 66073013, 66124403, 66175575,
|
||||
66226531, 66277275, 66327806, 66378127, 66428240, 66478146, 66527847,
|
||||
66577345, 66626641, 66675737, 66724635, 66773336, 66821842, 66870154,
|
||||
66918274, 66966204, 67013944, 67061497
|
||||
};
|
||||
|
||||
const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
|
||||
0.00000000f, 0.00000000f, 2.00000000f, 4.75488750f,
|
||||
8.00000000f, 11.60964047f, 15.50977500f, 19.65148445f,
|
||||
24.00000000f, 28.52932501f, 33.21928095f, 38.05374781f,
|
||||
43.01955001f, 48.10571634f, 53.30296891f, 58.60335893f,
|
||||
64.00000000f, 69.48686830f, 75.05865003f, 80.71062276f,
|
||||
86.43856190f, 92.23866588f, 98.10749561f, 104.04192499f,
|
||||
110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f,
|
||||
134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f,
|
||||
160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f,
|
||||
186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f,
|
||||
212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f,
|
||||
240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f,
|
||||
268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f,
|
||||
296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f,
|
||||
325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f,
|
||||
354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f,
|
||||
384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f,
|
||||
413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f,
|
||||
444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f,
|
||||
474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f,
|
||||
505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f,
|
||||
536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f,
|
||||
568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f,
|
||||
600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f,
|
||||
632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f,
|
||||
664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f,
|
||||
696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f,
|
||||
729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f,
|
||||
762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f,
|
||||
795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f,
|
||||
828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f,
|
||||
862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f,
|
||||
896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f,
|
||||
929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f,
|
||||
963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f,
|
||||
998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f,
|
||||
1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f,
|
||||
1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f,
|
||||
1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f,
|
||||
1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f,
|
||||
1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f,
|
||||
1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f,
|
||||
1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f,
|
||||
1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f,
|
||||
1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f,
|
||||
1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f,
|
||||
1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f,
|
||||
1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f,
|
||||
1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f,
|
||||
1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f,
|
||||
1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f,
|
||||
1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f,
|
||||
1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f,
|
||||
1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f,
|
||||
1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f,
|
||||
1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f,
|
||||
1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f,
|
||||
1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f,
|
||||
1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f,
|
||||
1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f,
|
||||
1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f,
|
||||
1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f,
|
||||
1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f,
|
||||
2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f
|
||||
// lookup table for small values of int*log2(int) * (1 << LOG_2_PRECISION_BITS).
|
||||
// Obtained in Python with:
|
||||
// a=[ "%d"%i if i<(1<<32) else "%dull"%i
|
||||
// for i in [ round((1<<LOG_2_PRECISION_BITS)*math.log2(i)*i) if i
|
||||
// else 0 for i in range(256)]]
|
||||
// print(',\n '.join([','.join(v) for v in batched([i.rjust(15)
|
||||
// for i in a],4)]))
|
||||
const uint64_t kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
|
||||
0, 0, 16777216, 39886887,
|
||||
67108864, 97388723, 130105423, 164848600,
|
||||
201326592, 239321324, 278663526, 319217973,
|
||||
360874141, 403539997, 447137711, 491600606,
|
||||
536870912, 582898099, 629637592, 677049776,
|
||||
725099212, 773754010, 822985323, 872766924,
|
||||
923074875, 973887230, 1025183802, 1076945958,
|
||||
1129156447, 1181799249, 1234859451, 1288323135,
|
||||
1342177280, 1396409681, 1451008871, 1505964059,
|
||||
1561265072, 1616902301, 1672866655, 1729149526,
|
||||
1785742744, 1842638548, 1899829557, 1957308741,
|
||||
2015069397, 2073105127, 2131409817, 2189977618ull,
|
||||
2248802933ull, 2307880396ull, 2367204859ull, 2426771383ull,
|
||||
2486575220ull, 2546611805ull, 2606876748ull, 2667365819ull,
|
||||
2728074942ull, 2789000187ull, 2850137762ull, 2911484006ull,
|
||||
2973035382ull, 3034788471ull, 3096739966ull, 3158886666ull,
|
||||
3221225472ull, 3283753383ull, 3346467489ull, 3409364969ull,
|
||||
3472443085ull, 3535699182ull, 3599130679ull, 3662735070ull,
|
||||
3726509920ull, 3790452862ull, 3854561593ull, 3918833872ull,
|
||||
3983267519ull, 4047860410ull, 4112610476ull, 4177515704ull,
|
||||
4242574127ull, 4307783833ull, 4373142952ull, 4438649662ull,
|
||||
4504302186ull, 4570098787ull, 4636037770ull, 4702117480ull,
|
||||
4768336298ull, 4834692645ull, 4901184974ull, 4967811774ull,
|
||||
5034571569ull, 5101462912ull, 5168484389ull, 5235634615ull,
|
||||
5302912235ull, 5370315922ull, 5437844376ull, 5505496324ull,
|
||||
5573270518ull, 5641165737ull, 5709180782ull, 5777314477ull,
|
||||
5845565671ull, 5913933235ull, 5982416059ull, 6051013057ull,
|
||||
6119723161ull, 6188545324ull, 6257478518ull, 6326521733ull,
|
||||
6395673979ull, 6464934282ull, 6534301685ull, 6603775250ull,
|
||||
6673354052ull, 6743037185ull, 6812823756ull, 6882712890ull,
|
||||
6952703725ull, 7022795412ull, 7092987118ull, 7163278025ull,
|
||||
7233667324ull, 7304154222ull, 7374737939ull, 7445417707ull,
|
||||
7516192768ull, 7587062379ull, 7658025806ull, 7729082328ull,
|
||||
7800231234ull, 7871471825ull, 7942803410ull, 8014225311ull,
|
||||
8085736859ull, 8157337394ull, 8229026267ull, 8300802839ull,
|
||||
8372666477ull, 8444616560ull, 8516652476ull, 8588773618ull,
|
||||
8660979393ull, 8733269211ull, 8805642493ull, 8878098667ull,
|
||||
8950637170ull, 9023257446ull, 9095958945ull, 9168741125ull,
|
||||
9241603454ull, 9314545403ull, 9387566451ull, 9460666086ull,
|
||||
9533843800ull, 9607099093ull, 9680431471ull, 9753840445ull,
|
||||
9827325535ull, 9900886263ull, 9974522161ull, 10048232765ull,
|
||||
10122017615ull, 10195876260ull, 10269808253ull, 10343813150ull,
|
||||
10417890516ull, 10492039919ull, 10566260934ull, 10640553138ull,
|
||||
10714916116ull, 10789349456ull, 10863852751ull, 10938425600ull,
|
||||
11013067604ull, 11087778372ull, 11162557513ull, 11237404645ull,
|
||||
11312319387ull, 11387301364ull, 11462350205ull, 11537465541ull,
|
||||
11612647010ull, 11687894253ull, 11763206912ull, 11838584638ull,
|
||||
11914027082ull, 11989533899ull, 12065104750ull, 12140739296ull,
|
||||
12216437206ull, 12292198148ull, 12368021795ull, 12443907826ull,
|
||||
12519855920ull, 12595865759ull, 12671937032ull, 12748069427ull,
|
||||
12824262637ull, 12900516358ull, 12976830290ull, 13053204134ull,
|
||||
13129637595ull, 13206130381ull, 13282682202ull, 13359292772ull,
|
||||
13435961806ull, 13512689025ull, 13589474149ull, 13666316903ull,
|
||||
13743217014ull, 13820174211ull, 13897188225ull, 13974258793ull,
|
||||
14051385649ull, 14128568535ull, 14205807192ull, 14283101363ull,
|
||||
14360450796ull, 14437855239ull, 14515314443ull, 14592828162ull,
|
||||
14670396151ull, 14748018167ull, 14825693972ull, 14903423326ull,
|
||||
14981205995ull, 15059041743ull, 15136930339ull, 15214871554ull,
|
||||
15292865160ull, 15370910930ull, 15449008641ull, 15527158071ull,
|
||||
15605359001ull, 15683611210ull, 15761914485ull, 15840268608ull,
|
||||
15918673369ull, 15997128556ull, 16075633960ull, 16154189373ull,
|
||||
16232794589ull, 16311449405ull, 16390153617ull, 16468907026ull,
|
||||
16547709431ull, 16626560636ull, 16705460444ull, 16784408661ull,
|
||||
16863405094ull, 16942449552ull, 17021541845ull, 17100681785ull
|
||||
};
|
||||
|
||||
const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX] = {
|
||||
@ -326,23 +246,19 @@ const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
|
||||
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
|
||||
};
|
||||
|
||||
static float FastSLog2Slow_C(uint32_t v) {
|
||||
static uint64_t FastSLog2Slow_C(uint32_t v) {
|
||||
assert(v >= LOG_LOOKUP_IDX_MAX);
|
||||
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
||||
const uint64_t orig_v = v;
|
||||
uint64_t correction;
|
||||
#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ)
|
||||
// use clz if available
|
||||
const int log_cnt = BitsLog2Floor(v) - 7;
|
||||
const uint64_t log_cnt = BitsLog2Floor(v) - 7;
|
||||
const uint32_t y = 1 << log_cnt;
|
||||
int correction = 0;
|
||||
const float v_f = (float)v;
|
||||
const uint32_t orig_v = v;
|
||||
v >>= log_cnt;
|
||||
#else
|
||||
int log_cnt = 0;
|
||||
uint64_t log_cnt = 0;
|
||||
uint32_t y = 1;
|
||||
int correction = 0;
|
||||
const float v_f = (float)v;
|
||||
const uint32_t orig_v = v;
|
||||
do {
|
||||
++log_cnt;
|
||||
v = v >> 1;
|
||||
@ -354,45 +270,43 @@ static float FastSLog2Slow_C(uint32_t v) {
|
||||
// log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
|
||||
// The correction factor: log(1 + d) ~ d; for very small d values, so
|
||||
// log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
|
||||
// LOG_2_RECIPROCAL ~ 23/16
|
||||
correction = (23 * (orig_v & (y - 1))) >> 4;
|
||||
return v_f * (kLog2Table[v] + log_cnt) + correction;
|
||||
correction = LOG_2_RECIPROCAL_FIXED * (orig_v & (y - 1));
|
||||
return orig_v * (kLog2Table[v] + (log_cnt << LOG_2_PRECISION_BITS)) +
|
||||
correction;
|
||||
} else {
|
||||
return (float)(LOG_2_RECIPROCAL * v * log((double)v));
|
||||
return (uint64_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * v * log((double)v) + .5);
|
||||
}
|
||||
}
|
||||
|
||||
static float FastLog2Slow_C(uint32_t v) {
|
||||
static uint32_t FastLog2Slow_C(uint32_t v) {
|
||||
assert(v >= LOG_LOOKUP_IDX_MAX);
|
||||
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
||||
const uint32_t orig_v = v;
|
||||
uint32_t log_2;
|
||||
#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ)
|
||||
// use clz if available
|
||||
const int log_cnt = BitsLog2Floor(v) - 7;
|
||||
const uint32_t log_cnt = BitsLog2Floor(v) - 7;
|
||||
const uint32_t y = 1 << log_cnt;
|
||||
const uint32_t orig_v = v;
|
||||
double log_2;
|
||||
v >>= log_cnt;
|
||||
#else
|
||||
int log_cnt = 0;
|
||||
uint32_t log_cnt = 0;
|
||||
uint32_t y = 1;
|
||||
const uint32_t orig_v = v;
|
||||
double log_2;
|
||||
do {
|
||||
++log_cnt;
|
||||
v = v >> 1;
|
||||
y = y << 1;
|
||||
} while (v >= LOG_LOOKUP_IDX_MAX);
|
||||
#endif
|
||||
log_2 = kLog2Table[v] + log_cnt;
|
||||
log_2 = kLog2Table[v] + (log_cnt << LOG_2_PRECISION_BITS);
|
||||
if (orig_v >= APPROX_LOG_MAX) {
|
||||
// Since the division is still expensive, add this correction factor only
|
||||
// for large values of 'v'.
|
||||
const int correction = (23 * (orig_v & (y - 1))) >> 4;
|
||||
log_2 += (double)correction / orig_v;
|
||||
const uint64_t correction = LOG_2_RECIPROCAL_FIXED * (orig_v & (y - 1));
|
||||
log_2 += (uint32_t)DivRound(correction, orig_v);
|
||||
}
|
||||
return (float)log_2;
|
||||
return log_2;
|
||||
} else {
|
||||
return (float)(LOG_2_RECIPROCAL * log((double)v));
|
||||
return (uint32_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * log((double)v) + .5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -400,37 +314,53 @@ static float FastLog2Slow_C(uint32_t v) {
|
||||
// Methods to calculate Entropy (Shannon).
|
||||
|
||||
// Compute the combined Shanon's entropy for distribution {X} and {X+Y}
|
||||
static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
|
||||
static uint64_t CombinedShannonEntropy_C(const uint32_t X[256],
|
||||
const uint32_t Y[256]) {
|
||||
int i;
|
||||
float retval = 0.f;
|
||||
int sumX = 0, sumXY = 0;
|
||||
uint64_t retval = 0;
|
||||
uint32_t sumX = 0, sumXY = 0;
|
||||
for (i = 0; i < 256; ++i) {
|
||||
const int x = X[i];
|
||||
const uint32_t x = X[i];
|
||||
if (x != 0) {
|
||||
const int xy = x + Y[i];
|
||||
const uint32_t xy = x + Y[i];
|
||||
sumX += x;
|
||||
retval -= VP8LFastSLog2(x);
|
||||
retval += VP8LFastSLog2(x);
|
||||
sumXY += xy;
|
||||
retval -= VP8LFastSLog2(xy);
|
||||
retval += VP8LFastSLog2(xy);
|
||||
} else if (Y[i] != 0) {
|
||||
sumXY += Y[i];
|
||||
retval -= VP8LFastSLog2(Y[i]);
|
||||
retval += VP8LFastSLog2(Y[i]);
|
||||
}
|
||||
}
|
||||
retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
|
||||
retval = VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY) - retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static uint64_t ShannonEntropy_C(const uint32_t* X, int n) {
|
||||
int i;
|
||||
uint64_t retval = 0;
|
||||
uint32_t sumX = 0;
|
||||
for (i = 0; i < n; ++i) {
|
||||
const int x = X[i];
|
||||
if (x != 0) {
|
||||
sumX += x;
|
||||
retval += VP8LFastSLog2(x);
|
||||
}
|
||||
}
|
||||
retval = VP8LFastSLog2(sumX) - retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
|
||||
entropy->entropy = 0.;
|
||||
entropy->entropy = 0;
|
||||
entropy->sum = 0;
|
||||
entropy->nonzeros = 0;
|
||||
entropy->max_val = 0;
|
||||
entropy->nonzero_code = VP8L_NON_TRIVIAL_SYM;
|
||||
}
|
||||
|
||||
void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
|
||||
VP8LBitEntropy* const entropy) {
|
||||
void VP8LBitsEntropyUnrefined(const uint32_t* WEBP_RESTRICT const array, int n,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const entropy) {
|
||||
int i;
|
||||
|
||||
VP8LBitEntropyInit(entropy);
|
||||
@ -440,18 +370,20 @@ void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
|
||||
entropy->sum += array[i];
|
||||
entropy->nonzero_code = i;
|
||||
++entropy->nonzeros;
|
||||
entropy->entropy -= VP8LFastSLog2(array[i]);
|
||||
entropy->entropy += VP8LFastSLog2(array[i]);
|
||||
if (entropy->max_val < array[i]) {
|
||||
entropy->max_val = array[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
entropy->entropy += VP8LFastSLog2(entropy->sum);
|
||||
entropy->entropy = VP8LFastSLog2(entropy->sum) - entropy->entropy;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
||||
uint32_t val, int i, uint32_t* const val_prev, int* const i_prev,
|
||||
VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats) {
|
||||
uint32_t val, int i, uint32_t* WEBP_RESTRICT const val_prev,
|
||||
int* WEBP_RESTRICT const i_prev,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
const int streak = i - *i_prev;
|
||||
|
||||
// Gather info for the bit entropy.
|
||||
@ -459,7 +391,7 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
||||
bit_entropy->sum += (*val_prev) * streak;
|
||||
bit_entropy->nonzeros += streak;
|
||||
bit_entropy->nonzero_code = *i_prev;
|
||||
bit_entropy->entropy -= VP8LFastSLog2(*val_prev) * streak;
|
||||
bit_entropy->entropy += VP8LFastSLog2(*val_prev) * streak;
|
||||
if (bit_entropy->max_val < *val_prev) {
|
||||
bit_entropy->max_val = *val_prev;
|
||||
}
|
||||
@ -473,9 +405,10 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
||||
*i_prev = i;
|
||||
}
|
||||
|
||||
static void GetEntropyUnrefined_C(const uint32_t X[], int length,
|
||||
VP8LBitEntropy* const bit_entropy,
|
||||
VP8LStreaks* const stats) {
|
||||
static void GetEntropyUnrefined_C(
|
||||
const uint32_t X[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
int i;
|
||||
int i_prev = 0;
|
||||
uint32_t x_prev = X[0];
|
||||
@ -491,14 +424,13 @@ static void GetEntropyUnrefined_C(const uint32_t X[], int length,
|
||||
}
|
||||
GetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats);
|
||||
|
||||
bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
|
||||
bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;
|
||||
}
|
||||
|
||||
static void GetCombinedEntropyUnrefined_C(const uint32_t X[],
|
||||
const uint32_t Y[],
|
||||
int length,
|
||||
VP8LBitEntropy* const bit_entropy,
|
||||
VP8LStreaks* const stats) {
|
||||
static void GetCombinedEntropyUnrefined_C(
|
||||
const uint32_t X[], const uint32_t Y[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
int i = 1;
|
||||
int i_prev = 0;
|
||||
uint32_t xy_prev = X[0] + Y[0];
|
||||
@ -514,7 +446,7 @@ static void GetCombinedEntropyUnrefined_C(const uint32_t X[],
|
||||
}
|
||||
GetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, bit_entropy, stats);
|
||||
|
||||
bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
|
||||
bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -538,8 +470,8 @@ static WEBP_INLINE int8_t U32ToS8(uint32_t v) {
|
||||
return (int8_t)(v & 0xff);
|
||||
}
|
||||
|
||||
void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
|
||||
int num_pixels) {
|
||||
void VP8LTransformColor_C(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT data, int num_pixels) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) {
|
||||
const uint32_t argb = data[i];
|
||||
@ -575,9 +507,10 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
|
||||
return (new_blue & 0xff);
|
||||
}
|
||||
|
||||
void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
|
||||
void VP8LCollectColorRedTransforms_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red, int histo[]) {
|
||||
int green_to_red, uint32_t histo[]) {
|
||||
while (tile_height-- > 0) {
|
||||
int x;
|
||||
for (x = 0; x < tile_width; ++x) {
|
||||
@ -587,10 +520,11 @@ void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
|
||||
void VP8LCollectColorBlueTransforms_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue,
|
||||
int histo[]) {
|
||||
uint32_t histo[]) {
|
||||
while (tile_height-- > 0) {
|
||||
int x;
|
||||
for (x = 0; x < tile_width; ++x) {
|
||||
@ -614,8 +548,8 @@ static int VectorMismatch_C(const uint32_t* const array1,
|
||||
}
|
||||
|
||||
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
|
||||
void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
|
||||
uint32_t* dst) {
|
||||
void VP8LBundleColorMap_C(const uint8_t* WEBP_RESTRICT const row,
|
||||
int width, int xbits, uint32_t* WEBP_RESTRICT dst) {
|
||||
int x;
|
||||
if (xbits > 0) {
|
||||
const int bit_depth = 1 << (3 - xbits);
|
||||
@ -646,7 +580,8 @@ static uint32_t ExtraCost_C(const uint32_t* population, int length) {
|
||||
return cost;
|
||||
}
|
||||
|
||||
static uint32_t ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
|
||||
static uint32_t ExtraCostCombined_C(const uint32_t* WEBP_RESTRICT X,
|
||||
const uint32_t* WEBP_RESTRICT Y,
|
||||
int length) {
|
||||
int i;
|
||||
uint32_t cost = X[4] + Y[4] + X[5] + Y[5];
|
||||
@ -661,13 +596,15 @@ static uint32_t ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void AddVector_C(const uint32_t* a, const uint32_t* b, uint32_t* out,
|
||||
int size) {
|
||||
static void AddVector_C(const uint32_t* WEBP_RESTRICT a,
|
||||
const uint32_t* WEBP_RESTRICT b,
|
||||
uint32_t* WEBP_RESTRICT out, int size) {
|
||||
int i;
|
||||
for (i = 0; i < size; ++i) out[i] = a[i] + b[i];
|
||||
}
|
||||
|
||||
static void AddVectorEq_C(const uint32_t* a, uint32_t* out, int size) {
|
||||
static void AddVectorEq_C(const uint32_t* WEBP_RESTRICT a,
|
||||
uint32_t* WEBP_RESTRICT out, int size) {
|
||||
int i;
|
||||
for (i = 0; i < size; ++i) out[i] += a[i];
|
||||
}
|
||||
@ -696,8 +633,9 @@ static void AddVectorEq_C(const uint32_t* a, uint32_t* out, int size) {
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void VP8LHistogramAdd(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b, VP8LHistogram* const out) {
|
||||
void VP8LHistogramAdd(const VP8LHistogram* WEBP_RESTRICT const a,
|
||||
const VP8LHistogram* WEBP_RESTRICT const b,
|
||||
VP8LHistogram* WEBP_RESTRICT const out) {
|
||||
int i;
|
||||
const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
|
||||
assert(a->palette_code_bits_ == b->palette_code_bits_);
|
||||
@ -727,14 +665,14 @@ void VP8LHistogramAdd(const VP8LHistogram* const a,
|
||||
// Image transforms.
|
||||
|
||||
static void PredictorSub0_C(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) out[i] = VP8LSubPixels(in[i], ARGB_BLACK);
|
||||
(void)upper;
|
||||
}
|
||||
|
||||
static void PredictorSub1_C(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) out[i] = VP8LSubPixels(in[i], in[i - 1]);
|
||||
(void)upper;
|
||||
@ -745,7 +683,8 @@ static void PredictorSub1_C(const uint32_t* in, const uint32_t* upper,
|
||||
#define GENERATE_PREDICTOR_SUB(PREDICTOR_I) \
|
||||
static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in, \
|
||||
const uint32_t* upper, \
|
||||
int num_pixels, uint32_t* out) { \
|
||||
int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int x; \
|
||||
assert(upper != NULL); \
|
||||
for (x = 0; x < num_pixels; ++x) { \
|
||||
@ -778,11 +717,12 @@ VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
|
||||
VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
|
||||
|
||||
VP8LFastLog2SlowFunc VP8LFastLog2Slow;
|
||||
VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
|
||||
VP8LFastSLog2SlowFunc VP8LFastSLog2Slow;
|
||||
|
||||
VP8LCostFunc VP8LExtraCost;
|
||||
VP8LCostCombinedFunc VP8LExtraCostCombined;
|
||||
VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
|
||||
VP8LShannonEntropyFunc VP8LShannonEntropy;
|
||||
|
||||
VP8LGetEntropyUnrefinedFunc VP8LGetEntropyUnrefined;
|
||||
VP8LGetCombinedEntropyUnrefinedFunc VP8LGetCombinedEntropyUnrefined;
|
||||
@ -822,6 +762,7 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
|
||||
VP8LExtraCost = ExtraCost_C;
|
||||
VP8LExtraCostCombined = ExtraCostCombined_C;
|
||||
VP8LCombinedShannonEntropy = CombinedShannonEntropy_C;
|
||||
VP8LShannonEntropy = ShannonEntropy_C;
|
||||
|
||||
VP8LGetEntropyUnrefined = GetEntropyUnrefined_C;
|
||||
VP8LGetCombinedEntropyUnrefined = GetCombinedEntropyUnrefined_C;
|
||||
@ -911,6 +852,7 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
|
||||
assert(VP8LExtraCost != NULL);
|
||||
assert(VP8LExtraCostCombined != NULL);
|
||||
assert(VP8LCombinedShannonEntropy != NULL);
|
||||
assert(VP8LShannonEntropy != NULL);
|
||||
assert(VP8LGetEntropyUnrefined != NULL);
|
||||
assert(VP8LGetCombinedEntropyUnrefined != NULL);
|
||||
assert(VP8LAddVector != NULL);
|
||||
|
71
thirdparty/libwebp/src/dsp/lossless_enc_mips32.c
vendored
71
thirdparty/libwebp/src/dsp/lossless_enc_mips32.c
vendored
@ -23,12 +23,12 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
static float FastSLog2Slow_MIPS32(uint32_t v) {
|
||||
static uint64_t FastSLog2Slow_MIPS32(uint32_t v) {
|
||||
assert(v >= LOG_LOOKUP_IDX_MAX);
|
||||
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
||||
uint32_t log_cnt, y, correction;
|
||||
uint32_t log_cnt, y;
|
||||
uint64_t correction;
|
||||
const int c24 = 24;
|
||||
const float v_f = (float)v;
|
||||
uint32_t temp;
|
||||
|
||||
// Xf = 256 = 2^8
|
||||
@ -49,22 +49,23 @@ static float FastSLog2Slow_MIPS32(uint32_t v) {
|
||||
// log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
|
||||
// The correction factor: log(1 + d) ~ d; for very small d values, so
|
||||
// log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
|
||||
// LOG_2_RECIPROCAL ~ 23/16
|
||||
|
||||
// (v % y) = (v % 2^log_cnt) = v & (2^log_cnt - 1)
|
||||
correction = (23 * (v & (y - 1))) >> 4;
|
||||
return v_f * (kLog2Table[temp] + log_cnt) + correction;
|
||||
correction = LOG_2_RECIPROCAL_FIXED * (v & (y - 1));
|
||||
return (uint64_t)v * (kLog2Table[temp] +
|
||||
((uint64_t)log_cnt << LOG_2_PRECISION_BITS)) +
|
||||
correction;
|
||||
} else {
|
||||
return (float)(LOG_2_RECIPROCAL * v * log((double)v));
|
||||
return (uint64_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * v * log((double)v) + .5);
|
||||
}
|
||||
}
|
||||
|
||||
static float FastLog2Slow_MIPS32(uint32_t v) {
|
||||
static uint32_t FastLog2Slow_MIPS32(uint32_t v) {
|
||||
assert(v >= LOG_LOOKUP_IDX_MAX);
|
||||
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
||||
uint32_t log_cnt, y;
|
||||
const int c24 = 24;
|
||||
double log_2;
|
||||
uint32_t log_2;
|
||||
uint32_t temp;
|
||||
|
||||
__asm__ volatile(
|
||||
@ -78,17 +79,16 @@ static float FastLog2Slow_MIPS32(uint32_t v) {
|
||||
: [c24]"r"(c24), [v]"r"(v)
|
||||
);
|
||||
|
||||
log_2 = kLog2Table[temp] + log_cnt;
|
||||
log_2 = kLog2Table[temp] + (log_cnt << LOG_2_PRECISION_BITS);
|
||||
if (v >= APPROX_LOG_MAX) {
|
||||
// Since the division is still expensive, add this correction factor only
|
||||
// for large values of 'v'.
|
||||
|
||||
const uint32_t correction = (23 * (v & (y - 1))) >> 4;
|
||||
log_2 += (double)correction / v;
|
||||
const uint64_t correction = LOG_2_RECIPROCAL_FIXED * (v & (y - 1));
|
||||
log_2 += (uint32_t)DivRound(correction, v);
|
||||
}
|
||||
return (float)log_2;
|
||||
return log_2;
|
||||
} else {
|
||||
return (float)(LOG_2_RECIPROCAL * log((double)v));
|
||||
return (uint32_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * log((double)v) + .5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -149,8 +149,9 @@ static uint32_t ExtraCost_MIPS32(const uint32_t* const population, int length) {
|
||||
// pY += 2;
|
||||
// }
|
||||
// return cost;
|
||||
static uint32_t ExtraCostCombined_MIPS32(const uint32_t* const X,
|
||||
const uint32_t* const Y, int length) {
|
||||
static uint32_t ExtraCostCombined_MIPS32(const uint32_t* WEBP_RESTRICT const X,
|
||||
const uint32_t* WEBP_RESTRICT const Y,
|
||||
int length) {
|
||||
int i, temp0, temp1, temp2, temp3;
|
||||
const uint32_t* pX = &X[4];
|
||||
const uint32_t* pY = &Y[4];
|
||||
@ -215,8 +216,10 @@ static uint32_t ExtraCostCombined_MIPS32(const uint32_t* const X,
|
||||
|
||||
// Returns the various RLE counts
|
||||
static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
||||
uint32_t val, int i, uint32_t* const val_prev, int* const i_prev,
|
||||
VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats) {
|
||||
uint32_t val, int i, uint32_t* WEBP_RESTRICT const val_prev,
|
||||
int* WEBP_RESTRICT const i_prev,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
int* const pstreaks = &stats->streaks[0][0];
|
||||
int* const pcnts = &stats->counts[0];
|
||||
int temp0, temp1, temp2, temp3;
|
||||
@ -227,7 +230,7 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
||||
bit_entropy->sum += (*val_prev) * streak;
|
||||
bit_entropy->nonzeros += streak;
|
||||
bit_entropy->nonzero_code = *i_prev;
|
||||
bit_entropy->entropy -= VP8LFastSLog2(*val_prev) * streak;
|
||||
bit_entropy->entropy += VP8LFastSLog2(*val_prev) * streak;
|
||||
if (bit_entropy->max_val < *val_prev) {
|
||||
bit_entropy->max_val = *val_prev;
|
||||
}
|
||||
@ -241,9 +244,10 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
||||
*i_prev = i;
|
||||
}
|
||||
|
||||
static void GetEntropyUnrefined_MIPS32(const uint32_t X[], int length,
|
||||
VP8LBitEntropy* const bit_entropy,
|
||||
VP8LStreaks* const stats) {
|
||||
static void GetEntropyUnrefined_MIPS32(
|
||||
const uint32_t X[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
int i;
|
||||
int i_prev = 0;
|
||||
uint32_t x_prev = X[0];
|
||||
@ -259,14 +263,13 @@ static void GetEntropyUnrefined_MIPS32(const uint32_t X[], int length,
|
||||
}
|
||||
GetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats);
|
||||
|
||||
bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
|
||||
bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;
|
||||
}
|
||||
|
||||
static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
|
||||
const uint32_t Y[],
|
||||
int length,
|
||||
VP8LBitEntropy* const entropy,
|
||||
VP8LStreaks* const stats) {
|
||||
static void GetCombinedEntropyUnrefined_MIPS32(
|
||||
const uint32_t X[], const uint32_t Y[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
int i = 1;
|
||||
int i_prev = 0;
|
||||
uint32_t xy_prev = X[0] + Y[0];
|
||||
@ -282,7 +285,7 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
|
||||
}
|
||||
GetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, entropy, stats);
|
||||
|
||||
entropy->entropy += VP8LFastSLog2(entropy->sum);
|
||||
entropy->entropy = VP8LFastSLog2(entropy->sum) - entropy->entropy;
|
||||
}
|
||||
|
||||
#define ASM_START \
|
||||
@ -344,8 +347,9 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
|
||||
ASM_END_COMMON_0 \
|
||||
ASM_END_COMMON_1
|
||||
|
||||
static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb,
|
||||
uint32_t* pout, int size) {
|
||||
static void AddVector_MIPS32(const uint32_t* WEBP_RESTRICT pa,
|
||||
const uint32_t* WEBP_RESTRICT pb,
|
||||
uint32_t* WEBP_RESTRICT pout, int size) {
|
||||
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
const int end = ((size) / 4) * 4;
|
||||
const uint32_t* const LoopEnd = pa + end;
|
||||
@ -356,7 +360,8 @@ static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb,
|
||||
for (i = 0; i < size - end; ++i) pout[i] = pa[i] + pb[i];
|
||||
}
|
||||
|
||||
static void AddVectorEq_MIPS32(const uint32_t* pa, uint32_t* pout, int size) {
|
||||
static void AddVectorEq_MIPS32(const uint32_t* WEBP_RESTRICT pa,
|
||||
uint32_t* WEBP_RESTRICT pout, int size) {
|
||||
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
const int end = ((size) / 4) * 4;
|
||||
const uint32_t* const LoopEnd = pa + end;
|
||||
|
@ -78,8 +78,9 @@ static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
|
||||
return (uint32_t)((int)(color_pred) * color) >> 5;
|
||||
}
|
||||
|
||||
static void TransformColor_MIPSdspR2(const VP8LMultipliers* const m,
|
||||
uint32_t* data, int num_pixels) {
|
||||
static void TransformColor_MIPSdspR2(
|
||||
const VP8LMultipliers* WEBP_RESTRICT const m, uint32_t* WEBP_RESTRICT data,
|
||||
int num_pixels) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5;
|
||||
uint32_t argb, argb1, new_red, new_red1;
|
||||
const uint32_t G_to_R = m->green_to_red_;
|
||||
@ -171,13 +172,10 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
|
||||
return (new_blue & 0xff);
|
||||
}
|
||||
|
||||
static void CollectColorBlueTransforms_MIPSdspR2(const uint32_t* argb,
|
||||
int stride,
|
||||
int tile_width,
|
||||
int tile_height,
|
||||
int green_to_blue,
|
||||
int red_to_blue,
|
||||
int histo[]) {
|
||||
static void CollectColorBlueTransforms_MIPSdspR2(
|
||||
const uint32_t* WEBP_RESTRICT argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue, uint32_t histo[]) {
|
||||
const int rtb = (red_to_blue << 16) | (red_to_blue & 0xffff);
|
||||
const int gtb = (green_to_blue << 16) | (green_to_blue & 0xffff);
|
||||
const uint32_t mask = 0xff00ffu;
|
||||
@ -225,12 +223,9 @@ static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
|
||||
return (new_red & 0xff);
|
||||
}
|
||||
|
||||
static void CollectColorRedTransforms_MIPSdspR2(const uint32_t* argb,
|
||||
int stride,
|
||||
int tile_width,
|
||||
int tile_height,
|
||||
int green_to_red,
|
||||
int histo[]) {
|
||||
static void CollectColorRedTransforms_MIPSdspR2(
|
||||
const uint32_t* WEBP_RESTRICT argb, int stride,
|
||||
int tile_width, int tile_height, int green_to_red, uint32_t histo[]) {
|
||||
const int gtr = (green_to_red << 16) | (green_to_red & 0xffff);
|
||||
while (tile_height-- > 0) {
|
||||
int x;
|
||||
|
@ -48,8 +48,8 @@
|
||||
dst = VSHF_UB(src, t0, mask1); \
|
||||
} while (0)
|
||||
|
||||
static void TransformColor_MSA(const VP8LMultipliers* const m, uint32_t* data,
|
||||
int num_pixels) {
|
||||
static void TransformColor_MSA(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT data, int num_pixels) {
|
||||
v16u8 src0, dst0;
|
||||
const v16i8 g2br = (v16i8)__msa_fill_w(m->green_to_blue_ |
|
||||
(m->green_to_red_ << 16));
|
||||
|
@ -72,8 +72,9 @@ static void SubtractGreenFromBlueAndRed_NEON(uint32_t* argb_data,
|
||||
//------------------------------------------------------------------------------
|
||||
// Color Transform
|
||||
|
||||
static void TransformColor_NEON(const VP8LMultipliers* const m,
|
||||
uint32_t* argb_data, int num_pixels) {
|
||||
static void TransformColor_NEON(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT argb_data,
|
||||
int num_pixels) {
|
||||
// sign-extended multiplying constants, pre-shifted by 6.
|
||||
#define CST(X) (((int16_t)(m->X << 8)) >> 6)
|
||||
const int16_t rb[8] = {
|
||||
|
148
thirdparty/libwebp/src/dsp/lossless_enc_sse2.c
vendored
148
thirdparty/libwebp/src/dsp/lossless_enc_sse2.c
vendored
@ -49,8 +49,9 @@ static void SubtractGreenFromBlueAndRed_SSE2(uint32_t* argb_data,
|
||||
#define MK_CST_16(HI, LO) \
|
||||
_mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
|
||||
|
||||
static void TransformColor_SSE2(const VP8LMultipliers* const m,
|
||||
uint32_t* argb_data, int num_pixels) {
|
||||
static void TransformColor_SSE2(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT argb_data,
|
||||
int num_pixels) {
|
||||
const __m128i mults_rb = MK_CST_16(CST_5b(m->green_to_red_),
|
||||
CST_5b(m->green_to_blue_));
|
||||
const __m128i mults_b2 = MK_CST_16(CST_5b(m->red_to_blue_), 0);
|
||||
@ -79,10 +80,11 @@ static void TransformColor_SSE2(const VP8LMultipliers* const m,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
#define SPAN 8
|
||||
static void CollectColorBlueTransforms_SSE2(const uint32_t* argb, int stride,
|
||||
static void CollectColorBlueTransforms_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue,
|
||||
int histo[]) {
|
||||
uint32_t histo[]) {
|
||||
const __m128i mults_r = MK_CST_16(CST_5b(red_to_blue), 0);
|
||||
const __m128i mults_g = MK_CST_16(0, CST_5b(green_to_blue));
|
||||
const __m128i mask_g = _mm_set1_epi32(0x00ff00); // green mask
|
||||
@ -126,9 +128,10 @@ static void CollectColorBlueTransforms_SSE2(const uint32_t* argb, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride,
|
||||
static void CollectColorRedTransforms_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red, int histo[]) {
|
||||
int green_to_red, uint32_t histo[]) {
|
||||
const __m128i mults_g = MK_CST_16(0, CST_5b(green_to_red));
|
||||
const __m128i mask_g = _mm_set1_epi32(0x00ff00); // green mask
|
||||
const __m128i mask = _mm_set1_epi32(0xff);
|
||||
@ -172,75 +175,113 @@ static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride,
|
||||
|
||||
// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
|
||||
// that's ok since the histogram values are less than 1<<28 (max picture size).
|
||||
#define LINE_SIZE 16 // 8 or 16
|
||||
static void AddVector_SSE2(const uint32_t* a, const uint32_t* b, uint32_t* out,
|
||||
int size) {
|
||||
int i;
|
||||
for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) {
|
||||
static void AddVector_SSE2(const uint32_t* WEBP_RESTRICT a,
|
||||
const uint32_t* WEBP_RESTRICT b,
|
||||
uint32_t* WEBP_RESTRICT out, int size) {
|
||||
int i = 0;
|
||||
int aligned_size = size & ~15;
|
||||
// Size is, at minimum, NUM_DISTANCE_CODES (40) and may be as large as
|
||||
// NUM_LITERAL_CODES (256) + NUM_LENGTH_CODES (24) + (0 or a non-zero power of
|
||||
// 2). See the usage in VP8LHistogramAdd().
|
||||
assert(size >= 16);
|
||||
assert(size % 2 == 0);
|
||||
|
||||
do {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
#if (LINE_SIZE == 16)
|
||||
const __m128i a2 = _mm_loadu_si128((const __m128i*)&a[i + 8]);
|
||||
const __m128i a3 = _mm_loadu_si128((const __m128i*)&a[i + 12]);
|
||||
#endif
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i + 0]);
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i + 4]);
|
||||
#if (LINE_SIZE == 16)
|
||||
const __m128i b2 = _mm_loadu_si128((const __m128i*)&b[i + 8]);
|
||||
const __m128i b3 = _mm_loadu_si128((const __m128i*)&b[i + 12]);
|
||||
#endif
|
||||
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||
#if (LINE_SIZE == 16)
|
||||
_mm_storeu_si128((__m128i*)&out[i + 8], _mm_add_epi32(a2, b2));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
|
||||
#endif
|
||||
i += 16;
|
||||
} while (i != aligned_size);
|
||||
|
||||
if ((size & 8) != 0) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i + 0]);
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i + 4]);
|
||||
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||
i += 8;
|
||||
}
|
||||
for (; i < size; ++i) {
|
||||
out[i] = a[i] + b[i];
|
||||
|
||||
size &= 7;
|
||||
if (size == 4) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]);
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i]);
|
||||
_mm_storeu_si128((__m128i*)&out[i], _mm_add_epi32(a0, b0));
|
||||
} else if (size == 2) {
|
||||
const __m128i a0 = _mm_loadl_epi64((const __m128i*)&a[i]);
|
||||
const __m128i b0 = _mm_loadl_epi64((const __m128i*)&b[i]);
|
||||
_mm_storel_epi64((__m128i*)&out[i], _mm_add_epi32(a0, b0));
|
||||
}
|
||||
}
|
||||
|
||||
static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
|
||||
int i;
|
||||
for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) {
|
||||
static void AddVectorEq_SSE2(const uint32_t* WEBP_RESTRICT a,
|
||||
uint32_t* WEBP_RESTRICT out, int size) {
|
||||
int i = 0;
|
||||
int aligned_size = size & ~15;
|
||||
// Size is, at minimum, NUM_DISTANCE_CODES (40) and may be as large as
|
||||
// NUM_LITERAL_CODES (256) + NUM_LENGTH_CODES (24) + (0 or a non-zero power of
|
||||
// 2). See the usage in VP8LHistogramAdd().
|
||||
assert(size >= 16);
|
||||
assert(size % 2 == 0);
|
||||
|
||||
do {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
#if (LINE_SIZE == 16)
|
||||
const __m128i a2 = _mm_loadu_si128((const __m128i*)&a[i + 8]);
|
||||
const __m128i a3 = _mm_loadu_si128((const __m128i*)&a[i + 12]);
|
||||
#endif
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&out[i + 0]);
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&out[i + 4]);
|
||||
#if (LINE_SIZE == 16)
|
||||
const __m128i b2 = _mm_loadu_si128((const __m128i*)&out[i + 8]);
|
||||
const __m128i b3 = _mm_loadu_si128((const __m128i*)&out[i + 12]);
|
||||
#endif
|
||||
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||
#if (LINE_SIZE == 16)
|
||||
_mm_storeu_si128((__m128i*)&out[i + 8], _mm_add_epi32(a2, b2));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
|
||||
#endif
|
||||
i += 16;
|
||||
} while (i != aligned_size);
|
||||
|
||||
if ((size & 8) != 0) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&out[i + 0]);
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&out[i + 4]);
|
||||
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||
i += 8;
|
||||
}
|
||||
for (; i < size; ++i) {
|
||||
out[i] += a[i];
|
||||
|
||||
size &= 7;
|
||||
if (size == 4) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]);
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&out[i]);
|
||||
_mm_storeu_si128((__m128i*)&out[i], _mm_add_epi32(a0, b0));
|
||||
} else if (size == 2) {
|
||||
const __m128i a0 = _mm_loadl_epi64((const __m128i*)&a[i]);
|
||||
const __m128i b0 = _mm_loadl_epi64((const __m128i*)&out[i]);
|
||||
_mm_storel_epi64((__m128i*)&out[i], _mm_add_epi32(a0, b0));
|
||||
}
|
||||
}
|
||||
#undef LINE_SIZE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entropy
|
||||
|
||||
// TODO(https://crbug.com/webp/499): this function produces different results
|
||||
// from the C code due to use of double/float resulting in output differences
|
||||
// when compared to -noasm.
|
||||
#if !(defined(WEBP_HAVE_SLOW_CLZ_CTZ) || defined(__i386__) || defined(_M_IX86))
|
||||
#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ)
|
||||
|
||||
static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
|
||||
static uint64_t CombinedShannonEntropy_SSE2(const uint32_t X[256],
|
||||
const uint32_t Y[256]) {
|
||||
int i;
|
||||
float retval = 0.f;
|
||||
int sumX = 0, sumXY = 0;
|
||||
uint64_t retval = 0;
|
||||
uint32_t sumX = 0, sumXY = 0;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
|
||||
for (i = 0; i < 256; i += 16) {
|
||||
@ -260,19 +301,19 @@ static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
|
||||
int32_t my = _mm_movemask_epi8(_mm_cmpgt_epi8(y4, zero)) | mx;
|
||||
while (my) {
|
||||
const int32_t j = BitsCtz(my);
|
||||
int xy;
|
||||
uint32_t xy;
|
||||
if ((mx >> j) & 1) {
|
||||
const int x = X[i + j];
|
||||
sumXY += x;
|
||||
retval -= VP8LFastSLog2(x);
|
||||
retval += VP8LFastSLog2(x);
|
||||
}
|
||||
xy = X[i + j] + Y[i + j];
|
||||
sumX += xy;
|
||||
retval -= VP8LFastSLog2(xy);
|
||||
retval += VP8LFastSLog2(xy);
|
||||
my &= my - 1;
|
||||
}
|
||||
}
|
||||
retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
|
||||
retval = VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY) - retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -335,8 +376,9 @@ static int VectorMismatch_SSE2(const uint32_t* const array1,
|
||||
}
|
||||
|
||||
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
|
||||
static void BundleColorMap_SSE2(const uint8_t* const row, int width, int xbits,
|
||||
uint32_t* dst) {
|
||||
static void BundleColorMap_SSE2(const uint8_t* WEBP_RESTRICT const row,
|
||||
int width, int xbits,
|
||||
uint32_t* WEBP_RESTRICT dst) {
|
||||
int x;
|
||||
assert(xbits >= 0);
|
||||
assert(xbits <= 3);
|
||||
@ -425,7 +467,7 @@ static WEBP_INLINE void Average2_m128i(const __m128i* const a0,
|
||||
|
||||
// Predictor0: ARGB_BLACK.
|
||||
static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m128i black = _mm_set1_epi32((int)ARGB_BLACK);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -442,7 +484,8 @@ static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
#define GENERATE_PREDICTOR_1(X, IN) \
|
||||
static void PredictorSub##X##_SSE2(const uint32_t* const in, \
|
||||
const uint32_t* const upper, \
|
||||
int num_pixels, uint32_t* const out) { \
|
||||
int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT const out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); \
|
||||
@ -464,7 +507,7 @@ GENERATE_PREDICTOR_1(4, upper[i - 1]) // Predictor4: TL
|
||||
|
||||
// Predictor5: avg2(avg2(L, TR), T)
|
||||
static void PredictorSub5_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]);
|
||||
@ -484,7 +527,8 @@ static void PredictorSub5_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
#define GENERATE_PREDICTOR_2(X, A, B) \
|
||||
static void PredictorSub##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
|
||||
int num_pixels, uint32_t* out) { \
|
||||
int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const __m128i tA = _mm_loadu_si128((const __m128i*)&(A)); \
|
||||
@ -508,7 +552,7 @@ GENERATE_PREDICTOR_2(9, upper[i], upper[i + 1]) // Predictor9: average(T, TR)
|
||||
|
||||
// Predictor10: avg(avg(L,TL), avg(T, TR)).
|
||||
static void PredictorSub10_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]);
|
||||
@ -543,7 +587,7 @@ static void GetSumAbsDiff32_SSE2(const __m128i* const A, const __m128i* const B,
|
||||
}
|
||||
|
||||
static void PredictorSub11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]);
|
||||
@ -569,7 +613,7 @@ static void PredictorSub11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictor12: ClampedSubSubtractFull.
|
||||
static void PredictorSub12_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -598,7 +642,7 @@ static void PredictorSub12_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictors13: ClampedAddSubtractHalf
|
||||
static void PredictorSub13_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
for (i = 0; i + 2 <= num_pixels; i += 2) {
|
||||
|
17
thirdparty/libwebp/src/dsp/lossless_enc_sse41.c
vendored
17
thirdparty/libwebp/src/dsp/lossless_enc_sse41.c
vendored
@ -44,8 +44,9 @@ static uint32_t ExtraCost_SSE41(const uint32_t* const a, int length) {
|
||||
return HorizontalSum_SSE41(cost);
|
||||
}
|
||||
|
||||
static uint32_t ExtraCostCombined_SSE41(const uint32_t* const a,
|
||||
const uint32_t* const b, int length) {
|
||||
static uint32_t ExtraCostCombined_SSE41(const uint32_t* WEBP_RESTRICT const a,
|
||||
const uint32_t* WEBP_RESTRICT const b,
|
||||
int length) {
|
||||
int i;
|
||||
__m128i cost = _mm_add_epi32(_mm_set_epi32(2 * a[7], 2 * a[6], a[5], a[4]),
|
||||
_mm_set_epi32(2 * b[7], 2 * b[6], b[5], b[4]));
|
||||
@ -95,10 +96,11 @@ static void SubtractGreenFromBlueAndRed_SSE41(uint32_t* argb_data,
|
||||
#define MK_CST_16(HI, LO) \
|
||||
_mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
|
||||
|
||||
static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride,
|
||||
static void CollectColorBlueTransforms_SSE41(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue,
|
||||
int histo[]) {
|
||||
uint32_t histo[]) {
|
||||
const __m128i mult =
|
||||
MK_CST_16(CST_5b(red_to_blue) + 256,CST_5b(green_to_blue));
|
||||
const __m128i perm =
|
||||
@ -141,10 +143,11 @@ static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride,
|
||||
static void CollectColorRedTransforms_SSE41(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red, int histo[]) {
|
||||
|
||||
int green_to_red,
|
||||
uint32_t histo[]) {
|
||||
const __m128i mult = MK_CST_16(0, CST_5b(green_to_red));
|
||||
const __m128i mask_g = _mm_set1_epi32(0x0000ff00);
|
||||
if (tile_width >= 4) {
|
||||
|
46
thirdparty/libwebp/src/dsp/lossless_neon.c
vendored
46
thirdparty/libwebp/src/dsp/lossless_neon.c
vendored
@ -26,8 +26,8 @@
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
// gcc 4.6.0 had some trouble (NDK-r9) with this code. We only use it for
|
||||
// gcc-4.8.x at least.
|
||||
static void ConvertBGRAToRGBA_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGBA_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~15);
|
||||
for (; src < end; src += 16) {
|
||||
uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
|
||||
@ -41,8 +41,8 @@ static void ConvertBGRAToRGBA_NEON(const uint32_t* src,
|
||||
VP8LConvertBGRAToRGBA_C(src, num_pixels & 15, dst); // left-overs
|
||||
}
|
||||
|
||||
static void ConvertBGRAToBGR_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToBGR_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~15);
|
||||
for (; src < end; src += 16) {
|
||||
const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
|
||||
@ -53,8 +53,8 @@ static void ConvertBGRAToBGR_NEON(const uint32_t* src,
|
||||
VP8LConvertBGRAToBGR_C(src, num_pixels & 15, dst); // left-overs
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGB_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGB_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~15);
|
||||
for (; src < end; src += 16) {
|
||||
const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
|
||||
@ -71,8 +71,8 @@ static void ConvertBGRAToRGB_NEON(const uint32_t* src,
|
||||
|
||||
static const uint8_t kRGBAShuffle[8] = { 2, 1, 0, 3, 6, 5, 4, 7 };
|
||||
|
||||
static void ConvertBGRAToRGBA_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGBA_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~1);
|
||||
const uint8x8_t shuffle = vld1_u8(kRGBAShuffle);
|
||||
for (; src < end; src += 2) {
|
||||
@ -89,8 +89,8 @@ static const uint8_t kBGRShuffle[3][8] = {
|
||||
{ 21, 22, 24, 25, 26, 28, 29, 30 }
|
||||
};
|
||||
|
||||
static void ConvertBGRAToBGR_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToBGR_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~7);
|
||||
const uint8x8_t shuffle0 = vld1_u8(kBGRShuffle[0]);
|
||||
const uint8x8_t shuffle1 = vld1_u8(kBGRShuffle[1]);
|
||||
@ -116,8 +116,8 @@ static const uint8_t kRGBShuffle[3][8] = {
|
||||
{ 21, 20, 26, 25, 24, 30, 29, 28 }
|
||||
};
|
||||
|
||||
static void ConvertBGRAToRGB_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGB_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~7);
|
||||
const uint8x8_t shuffle0 = vld1_u8(kRGBShuffle[0]);
|
||||
const uint8x8_t shuffle1 = vld1_u8(kRGBShuffle[1]);
|
||||
@ -209,7 +209,7 @@ static uint32_t Predictor13_NEON(const uint32_t* const left,
|
||||
|
||||
// Predictor0: ARGB_BLACK.
|
||||
static void PredictorAdd0_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const uint8x16_t black = vreinterpretq_u8_u32(vdupq_n_u32(ARGB_BLACK));
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -222,7 +222,7 @@ static void PredictorAdd0_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictor1: left.
|
||||
static void PredictorAdd1_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const uint8x16_t zero = LOADQ_U32_AS_U8(0);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -248,7 +248,7 @@ static void PredictorAdd1_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
#define GENERATE_PREDICTOR_1(X, IN) \
|
||||
static void PredictorAdd##X##_NEON(const uint32_t* in, \
|
||||
const uint32_t* upper, int num_pixels, \
|
||||
uint32_t* out) { \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const uint8x16_t src = LOADQ_U32P_AS_U8(&in[i]); \
|
||||
@ -276,7 +276,7 @@ GENERATE_PREDICTOR_1(4, upper[i - 1])
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd5_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -301,7 +301,7 @@ static void PredictorAdd5_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictor6: average(left, TL)
|
||||
static void PredictorAdd6_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -317,7 +317,7 @@ static void PredictorAdd6_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictor7: average(left, T)
|
||||
static void PredictorAdd7_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -335,7 +335,7 @@ static void PredictorAdd7_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
#define GENERATE_PREDICTOR_2(X, IN) \
|
||||
static void PredictorAdd##X##_NEON(const uint32_t* in, \
|
||||
const uint32_t* upper, int num_pixels, \
|
||||
uint32_t* out) { \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const uint8x16_t src = LOADQ_U32P_AS_U8(&in[i]); \
|
||||
@ -363,7 +363,7 @@ GENERATE_PREDICTOR_2(9, upper[i + 1])
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd10_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -394,7 +394,7 @@ static void PredictorAdd10_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd11_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -427,7 +427,7 @@ static void PredictorAdd11_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd12_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint16x8_t L = vmovl_u8(LOAD_U32_AS_U8(out[-1]));
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -468,7 +468,7 @@ static void PredictorAdd12_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd13_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
|
38
thirdparty/libwebp/src/dsp/lossless_sse2.c
vendored
38
thirdparty/libwebp/src/dsp/lossless_sse2.c
vendored
@ -186,7 +186,7 @@ static uint32_t Predictor13_SSE2(const uint32_t* const left,
|
||||
|
||||
// Predictor0: ARGB_BLACK.
|
||||
static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m128i black = _mm_set1_epi32((int)ARGB_BLACK);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -202,7 +202,7 @@ static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictor1: left.
|
||||
static void PredictorAdd1_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
__m128i prev = _mm_set1_epi32((int)out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -230,7 +230,8 @@ static void PredictorAdd1_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
// per 8 bit channel.
|
||||
#define GENERATE_PREDICTOR_1(X, IN) \
|
||||
static void PredictorAdd##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
|
||||
int num_pixels, uint32_t* out) { \
|
||||
int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); \
|
||||
@ -259,7 +260,8 @@ GENERATE_PREDICTOR_ADD(Predictor7_SSE2, PredictorAdd7_SSE2)
|
||||
|
||||
#define GENERATE_PREDICTOR_2(X, IN) \
|
||||
static void PredictorAdd##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
|
||||
int num_pixels, uint32_t* out) { \
|
||||
int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const __m128i Tother = _mm_loadu_si128((const __m128i*)&(IN)); \
|
||||
@ -297,7 +299,7 @@ GENERATE_PREDICTOR_2(9, upper[i + 1])
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
__m128i L = _mm_cvtsi32_si128((int)out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -344,7 +346,7 @@ static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
__m128i pa;
|
||||
__m128i L = _mm_cvtsi32_si128((int)out[-1]);
|
||||
@ -395,7 +397,7 @@ static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd12_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* out) {
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i L8 = _mm_cvtsi32_si128((int)out[-1]);
|
||||
@ -490,8 +492,8 @@ static void TransformColorInverse_SSE2(const VP8LMultipliers* const m,
|
||||
//------------------------------------------------------------------------------
|
||||
// Color-space conversion functions
|
||||
|
||||
static void ConvertBGRAToRGB_SSE2(const uint32_t* src, int num_pixels,
|
||||
uint8_t* dst) {
|
||||
static void ConvertBGRAToRGB_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
|
||||
@ -526,8 +528,8 @@ static void ConvertBGRAToRGB_SSE2(const uint32_t* src, int num_pixels,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGBA_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGBA_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ff);
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
@ -554,8 +556,9 @@ static void ConvertBGRAToRGBA_SSE2(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i mask_0x0f = _mm_set1_epi8(0x0f);
|
||||
const __m128i mask_0xf0 = _mm_set1_epi8((char)0xf0);
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
@ -590,8 +593,9 @@ static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGB565_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToRGB565_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i mask_0xe0 = _mm_set1_epi8((char)0xe0);
|
||||
const __m128i mask_0xf8 = _mm_set1_epi8((char)0xf8);
|
||||
const __m128i mask_0x07 = _mm_set1_epi8(0x07);
|
||||
@ -631,8 +635,8 @@ static void ConvertBGRAToRGB565_SSE2(const uint32_t* src,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToBGR_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToBGR_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i mask_l = _mm_set_epi32(0, 0x00ffffff, 0, 0x00ffffff);
|
||||
const __m128i mask_h = _mm_set_epi32(0x00ffffff, 0, 0x00ffffff, 0);
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
|
8
thirdparty/libwebp/src/dsp/lossless_sse41.c
vendored
8
thirdparty/libwebp/src/dsp/lossless_sse41.c
vendored
@ -77,8 +77,8 @@ static void TransformColorInverse_SSE41(const VP8LMultipliers* const m,
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void ConvertBGRAToRGB_SSE41(const uint32_t* src, int num_pixels,
|
||||
uint8_t* dst) {
|
||||
static void ConvertBGRAToRGB_SSE41(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
const __m128i perm0 = _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9,
|
||||
@ -95,8 +95,8 @@ static void ConvertBGRAToRGB_SSE41(const uint32_t* src, int num_pixels,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToBGR_SSE41(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
static void ConvertBGRAToBGR_SSE41(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
const __m128i perm0 = _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10,
|
||||
|
11
thirdparty/libwebp/src/dsp/rescaler.c
vendored
11
thirdparty/libwebp/src/dsp/rescaler.c
vendored
@ -26,8 +26,8 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// Row import
|
||||
|
||||
void WebPRescalerImportRowExpand_C(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
void WebPRescalerImportRowExpand_C(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
const int x_stride = wrk->num_channels;
|
||||
const int x_out_max = wrk->dst_width * wrk->num_channels;
|
||||
int channel;
|
||||
@ -59,8 +59,8 @@ void WebPRescalerImportRowExpand_C(WebPRescaler* const wrk,
|
||||
}
|
||||
}
|
||||
|
||||
void WebPRescalerImportRowShrink_C(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
void WebPRescalerImportRowShrink_C(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
const int x_stride = wrk->num_channels;
|
||||
const int x_out_max = wrk->dst_width * wrk->num_channels;
|
||||
int channel;
|
||||
@ -158,7 +158,8 @@ void WebPRescalerExportRowShrink_C(WebPRescaler* const wrk) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Main entry calls
|
||||
|
||||
void WebPRescalerImportRow(WebPRescaler* const wrk, const uint8_t* src) {
|
||||
void WebPRescalerImportRow(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
assert(!WebPRescalerInputDone(wrk));
|
||||
if (!wrk->x_expand) {
|
||||
WebPRescalerImportRowShrink(wrk, src);
|
||||
|
8
thirdparty/libwebp/src/dsp/rescaler_mips32.c
vendored
8
thirdparty/libwebp/src/dsp/rescaler_mips32.c
vendored
@ -21,8 +21,8 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// Row import
|
||||
|
||||
static void ImportRowShrink_MIPS32(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
static void ImportRowShrink_MIPS32(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
const int x_stride = wrk->num_channels;
|
||||
const int x_out_max = wrk->dst_width * wrk->num_channels;
|
||||
const int fx_scale = wrk->fx_scale;
|
||||
@ -81,8 +81,8 @@ static void ImportRowShrink_MIPS32(WebPRescaler* const wrk,
|
||||
}
|
||||
}
|
||||
|
||||
static void ImportRowExpand_MIPS32(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
static void ImportRowExpand_MIPS32(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
const int x_stride = wrk->num_channels;
|
||||
const int x_out_max = wrk->dst_width * wrk->num_channels;
|
||||
const int x_add = wrk->x_add;
|
||||
|
27
thirdparty/libwebp/src/dsp/rescaler_msa.c
vendored
27
thirdparty/libwebp/src/dsp/rescaler_msa.c
vendored
@ -114,9 +114,9 @@
|
||||
dst = __msa_copy_s_w((v4i32)t0, 0); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void ExportRowExpand_0(const uint32_t* frow, uint8_t* dst,
|
||||
int length,
|
||||
WebPRescaler* const wrk) {
|
||||
static WEBP_INLINE void ExportRowExpand_0(
|
||||
const uint32_t* WEBP_RESTRICT frow, uint8_t* WEBP_RESTRICT dst, int length,
|
||||
WebPRescaler* WEBP_RESTRICT const wrk) {
|
||||
const v4u32 scale = (v4u32)__msa_fill_w(wrk->fy_scale);
|
||||
const v4u32 shift = (v4u32)__msa_fill_w(WEBP_RESCALER_RFIX);
|
||||
const v4i32 zero = { 0 };
|
||||
@ -171,9 +171,10 @@ static WEBP_INLINE void ExportRowExpand_0(const uint32_t* frow, uint8_t* dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void ExportRowExpand_1(const uint32_t* frow, uint32_t* irow,
|
||||
uint8_t* dst, int length,
|
||||
WebPRescaler* const wrk) {
|
||||
static WEBP_INLINE void ExportRowExpand_1(
|
||||
const uint32_t* WEBP_RESTRICT frow, uint32_t* WEBP_RESTRICT irow,
|
||||
uint8_t* WEBP_RESTRICT dst, int length,
|
||||
WebPRescaler* WEBP_RESTRICT const wrk) {
|
||||
const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
|
||||
const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
|
||||
const v4i32 B1 = __msa_fill_w(B);
|
||||
@ -262,10 +263,10 @@ static void RescalerExportRowExpand_MIPSdspR2(WebPRescaler* const wrk) {
|
||||
}
|
||||
|
||||
#if 0 // disabled for now. TODO(skal): make match the C-code
|
||||
static WEBP_INLINE void ExportRowShrink_0(const uint32_t* frow, uint32_t* irow,
|
||||
uint8_t* dst, int length,
|
||||
const uint32_t yscale,
|
||||
WebPRescaler* const wrk) {
|
||||
static WEBP_INLINE void ExportRowShrink_0(
|
||||
const uint32_t* WEBP_RESTRICT frow, uint32_t* WEBP_RESTRICT irow,
|
||||
uint8_t* WEBP_RESTRICT dst, int length, const uint32_t yscale,
|
||||
WebPRescaler* WEBP_RESTRICT const wrk) {
|
||||
const v4u32 y_scale = (v4u32)__msa_fill_w(yscale);
|
||||
const v4u32 fxyscale = (v4u32)__msa_fill_w(wrk->fxy_scale);
|
||||
const v4u32 shiftval = (v4u32)__msa_fill_w(WEBP_RESCALER_RFIX);
|
||||
@ -348,9 +349,9 @@ static WEBP_INLINE void ExportRowShrink_0(const uint32_t* frow, uint32_t* irow,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void ExportRowShrink_1(uint32_t* irow, uint8_t* dst,
|
||||
int length,
|
||||
WebPRescaler* const wrk) {
|
||||
static WEBP_INLINE void ExportRowShrink_1(
|
||||
uint32_t* WEBP_RESTRICT irow, uint8_t* WEBP_RESTRICT dst, int length,
|
||||
WebPRescaler* WEBP_RESTRICT const wrk) {
|
||||
const v4u32 scale = (v4u32)__msa_fill_w(wrk->fxy_scale);
|
||||
const v4u32 shift = (v4u32)__msa_fill_w(WEBP_RESCALER_RFIX);
|
||||
const v4i32 zero = { 0 };
|
||||
|
4
thirdparty/libwebp/src/dsp/rescaler_neon.c
vendored
4
thirdparty/libwebp/src/dsp/rescaler_neon.c
vendored
@ -45,8 +45,8 @@
|
||||
#error "MULT_FIX/WEBP_RESCALER_RFIX need some more work"
|
||||
#endif
|
||||
|
||||
static uint32x4_t Interpolate_NEON(const rescaler_t* const frow,
|
||||
const rescaler_t* const irow,
|
||||
static uint32x4_t Interpolate_NEON(const rescaler_t* WEBP_RESTRICT const frow,
|
||||
const rescaler_t* WEBP_RESTRICT const irow,
|
||||
uint32_t A, uint32_t B) {
|
||||
LOAD_32x4(frow, A0);
|
||||
LOAD_32x4(irow, B0);
|
||||
|
18
thirdparty/libwebp/src/dsp/rescaler_sse2.c
vendored
18
thirdparty/libwebp/src/dsp/rescaler_sse2.c
vendored
@ -43,8 +43,8 @@ static void LoadEightPixels_SSE2(const uint8_t* const src, __m128i* out) {
|
||||
*out = _mm_unpacklo_epi8(A, zero);
|
||||
}
|
||||
|
||||
static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
static void RescalerImportRowExpand_SSE2(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
rescaler_t* frow = wrk->frow;
|
||||
const rescaler_t* const frow_end = frow + wrk->dst_width * wrk->num_channels;
|
||||
const int x_add = wrk->x_add;
|
||||
@ -109,8 +109,8 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
|
||||
assert(accum == 0);
|
||||
}
|
||||
|
||||
static void RescalerImportRowShrink_SSE2(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
static void RescalerImportRowShrink_SSE2(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
const int x_sub = wrk->x_sub;
|
||||
int accum = 0;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
@ -168,12 +168,10 @@ static void RescalerImportRowShrink_SSE2(WebPRescaler* const wrk,
|
||||
// Row export
|
||||
|
||||
// load *src as epi64, multiply by mult and store result in [out0 ... out3]
|
||||
static WEBP_INLINE void LoadDispatchAndMult_SSE2(const rescaler_t* const src,
|
||||
const __m128i* const mult,
|
||||
__m128i* const out0,
|
||||
__m128i* const out1,
|
||||
__m128i* const out2,
|
||||
__m128i* const out3) {
|
||||
static WEBP_INLINE void LoadDispatchAndMult_SSE2(
|
||||
const rescaler_t* WEBP_RESTRICT const src, const __m128i* const mult,
|
||||
__m128i* const out0, __m128i* const out1, __m128i* const out2,
|
||||
__m128i* const out3) {
|
||||
const __m128i A0 = _mm_loadu_si128((const __m128i*)(src + 0));
|
||||
const __m128i A1 = _mm_loadu_si128((const __m128i*)(src + 4));
|
||||
const __m128i A2 = _mm_srli_epi64(A0, 32);
|
||||
|
36
thirdparty/libwebp/src/dsp/upsampling.c
vendored
36
thirdparty/libwebp/src/dsp/upsampling.c
vendored
@ -35,10 +35,14 @@ WebPUpsampleLinePairFunc WebPUpsamplers[MODE_LAST];
|
||||
#define LOAD_UV(u, v) ((u) | ((v) << 16))
|
||||
|
||||
#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* cur_u, const uint8_t* cur_v, \
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bottom_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT cur_u, \
|
||||
const uint8_t* WEBP_RESTRICT cur_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bottom_dst, int len) { \
|
||||
int x; \
|
||||
const int last_pixel_pair = (len - 1) >> 1; \
|
||||
uint32_t tl_uv = LOAD_UV(top_u[0], top_v[0]); /* top-left sample */ \
|
||||
@ -136,10 +140,14 @@ static void EmptyUpsampleFunc(const uint8_t* top_y, const uint8_t* bottom_y,
|
||||
|
||||
#if !defined(FANCY_UPSAMPLING)
|
||||
#define DUAL_SAMPLE_FUNC(FUNC_NAME, FUNC) \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bot_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* bot_u, const uint8_t* bot_v, \
|
||||
uint8_t* top_dst, uint8_t* bot_dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bot_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT bot_u, \
|
||||
const uint8_t* WEBP_RESTRICT bot_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bot_dst, int len) { \
|
||||
const int half_len = len >> 1; \
|
||||
int x; \
|
||||
assert(top_dst != NULL); \
|
||||
@ -178,10 +186,14 @@ WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last) {
|
||||
// YUV444 converter
|
||||
|
||||
#define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
extern void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len); \
|
||||
void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
extern void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len); \
|
||||
void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
int i; \
|
||||
for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * (XSTEP)]); \
|
||||
}
|
||||
|
@ -143,10 +143,14 @@ static WEBP_INLINE void YuvToRgba(uint8_t y, uint8_t u, uint8_t v,
|
||||
#define LOAD_UV(u, v) ((u) | ((v) << 16))
|
||||
|
||||
#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* cur_u, const uint8_t* cur_v, \
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bottom_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT cur_u, \
|
||||
const uint8_t* WEBP_RESTRICT cur_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bottom_dst, int len) { \
|
||||
int x; \
|
||||
const int last_pixel_pair = (len - 1) >> 1; \
|
||||
uint32_t tl_uv = LOAD_UV(top_u[0], top_v[0]); /* top-left sample */ \
|
||||
@ -241,8 +245,10 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersMIPSdspR2(void) {
|
||||
// YUV444 converter
|
||||
|
||||
#define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
int i; \
|
||||
for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]); \
|
||||
}
|
||||
|
55
thirdparty/libwebp/src/dsp/upsampling_msa.c
vendored
55
thirdparty/libwebp/src/dsp/upsampling_msa.c
vendored
@ -320,8 +320,10 @@ static void YuvToRgba(uint8_t y, uint8_t u, uint8_t v, uint8_t* const rgba) {
|
||||
}
|
||||
|
||||
#if !defined(WEBP_REDUCE_CSP)
|
||||
static void YuvToRgbLine(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
static void YuvToRgbLine(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
v16u8 R, G, B;
|
||||
while (length >= 16) {
|
||||
CALC_RGB16(y, u, v, R, G, B);
|
||||
@ -347,8 +349,10 @@ static void YuvToRgbLine(const uint8_t* y, const uint8_t* u,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToBgrLine(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
static void YuvToBgrLine(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
v16u8 R, G, B;
|
||||
while (length >= 16) {
|
||||
CALC_RGB16(y, u, v, R, G, B);
|
||||
@ -375,8 +379,10 @@ static void YuvToBgrLine(const uint8_t* y, const uint8_t* u,
|
||||
}
|
||||
#endif // WEBP_REDUCE_CSP
|
||||
|
||||
static void YuvToRgbaLine(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
static void YuvToRgbaLine(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
v16u8 R, G, B;
|
||||
const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL);
|
||||
while (length >= 16) {
|
||||
@ -403,8 +409,10 @@ static void YuvToRgbaLine(const uint8_t* y, const uint8_t* u,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToBgraLine(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
static void YuvToBgraLine(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
v16u8 R, G, B;
|
||||
const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL);
|
||||
while (length >= 16) {
|
||||
@ -432,8 +440,10 @@ static void YuvToBgraLine(const uint8_t* y, const uint8_t* u,
|
||||
}
|
||||
|
||||
#if !defined(WEBP_REDUCE_CSP)
|
||||
static void YuvToArgbLine(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
static void YuvToArgbLine(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
v16u8 R, G, B;
|
||||
const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL);
|
||||
while (length >= 16) {
|
||||
@ -460,8 +470,10 @@ static void YuvToArgbLine(const uint8_t* y, const uint8_t* u,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToRgba4444Line(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
static void YuvToRgba4444Line(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
v16u8 R, G, B, RG, BA, tmp0, tmp1;
|
||||
while (length >= 16) {
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
@ -496,8 +508,10 @@ static void YuvToRgba4444Line(const uint8_t* y, const uint8_t* u,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToRgb565Line(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
static void YuvToRgb565Line(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
v16u8 R, G, B, RG, GB, tmp0, tmp1;
|
||||
while (length >= 16) {
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
@ -564,11 +578,14 @@ static void YuvToRgb565Line(const uint8_t* y, const uint8_t* u,
|
||||
} while (0)
|
||||
|
||||
#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bot_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* cur_u, const uint8_t* cur_v, \
|
||||
uint8_t* top_dst, uint8_t* bot_dst, int len) \
|
||||
{ \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bot_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT cur_u, \
|
||||
const uint8_t* WEBP_RESTRICT cur_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bot_dst, int len) { \
|
||||
int size = (len - 1) >> 1; \
|
||||
uint8_t temp_u[64]; \
|
||||
uint8_t temp_v[64]; \
|
||||
|
111
thirdparty/libwebp/src/dsp/upsampling_neon.c
vendored
111
thirdparty/libwebp/src/dsp/upsampling_neon.c
vendored
@ -58,8 +58,9 @@
|
||||
} while (0)
|
||||
|
||||
// Turn the macro into a function for reducing code-size when non-critical
|
||||
static void Upsample16Pixels_NEON(const uint8_t* r1, const uint8_t* r2,
|
||||
uint8_t* out) {
|
||||
static void Upsample16Pixels_NEON(const uint8_t* WEBP_RESTRICT const r1,
|
||||
const uint8_t* WEBP_RESTRICT const r2,
|
||||
uint8_t* WEBP_RESTRICT const out) {
|
||||
UPSAMPLE_16PIXELS(r1, r2, out);
|
||||
}
|
||||
|
||||
@ -189,57 +190,61 @@ static const int16_t kCoeffs1[4] = { 19077, 26149, 6419, 13320 };
|
||||
} \
|
||||
}
|
||||
|
||||
#define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* cur_u, const uint8_t* cur_v, \
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
|
||||
int block; \
|
||||
/* 16 byte aligned array to cache reconstructed u and v */ \
|
||||
uint8_t uv_buf[2 * 32 + 15]; \
|
||||
uint8_t* const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \
|
||||
const int uv_len = (len + 1) >> 1; \
|
||||
/* 9 pixels must be read-able for each block */ \
|
||||
const int num_blocks = (uv_len - 1) >> 3; \
|
||||
const int leftover = uv_len - num_blocks * 8; \
|
||||
const int last_pos = 1 + 16 * num_blocks; \
|
||||
\
|
||||
const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1; \
|
||||
const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1; \
|
||||
\
|
||||
const int16x4_t coeff1 = vld1_s16(kCoeffs1); \
|
||||
const int16x8_t R_Rounder = vdupq_n_s16(-14234); \
|
||||
const int16x8_t G_Rounder = vdupq_n_s16(8708); \
|
||||
const int16x8_t B_Rounder = vdupq_n_s16(-17685); \
|
||||
\
|
||||
/* Treat the first pixel in regular way */ \
|
||||
assert(top_y != NULL); \
|
||||
{ \
|
||||
const int u0 = (top_u[0] + u_diag) >> 1; \
|
||||
const int v0 = (top_v[0] + v_diag) >> 1; \
|
||||
VP8YuvTo ## FMT(top_y[0], u0, v0, top_dst); \
|
||||
} \
|
||||
if (bottom_y != NULL) { \
|
||||
const int u0 = (cur_u[0] + u_diag) >> 1; \
|
||||
const int v0 = (cur_v[0] + v_diag) >> 1; \
|
||||
VP8YuvTo ## FMT(bottom_y[0], u0, v0, bottom_dst); \
|
||||
} \
|
||||
\
|
||||
for (block = 0; block < num_blocks; ++block) { \
|
||||
UPSAMPLE_16PIXELS(top_u, cur_u, r_uv); \
|
||||
UPSAMPLE_16PIXELS(top_v, cur_v, r_uv + 16); \
|
||||
CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, r_uv, \
|
||||
top_dst, bottom_dst, 16 * block + 1, 16); \
|
||||
top_u += 8; \
|
||||
cur_u += 8; \
|
||||
top_v += 8; \
|
||||
cur_v += 8; \
|
||||
} \
|
||||
\
|
||||
UPSAMPLE_LAST_BLOCK(top_u, cur_u, leftover, r_uv); \
|
||||
UPSAMPLE_LAST_BLOCK(top_v, cur_v, leftover, r_uv + 16); \
|
||||
CONVERT2RGB_1(VP8YuvTo ## FMT, XSTEP, top_y, bottom_y, r_uv, \
|
||||
top_dst, bottom_dst, last_pos, len - last_pos); \
|
||||
#define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bottom_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT cur_u, \
|
||||
const uint8_t* WEBP_RESTRICT cur_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bottom_dst, int len) { \
|
||||
int block; \
|
||||
/* 16 byte aligned array to cache reconstructed u and v */ \
|
||||
uint8_t uv_buf[2 * 32 + 15]; \
|
||||
uint8_t* const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~(uintptr_t)15); \
|
||||
const int uv_len = (len + 1) >> 1; \
|
||||
/* 9 pixels must be read-able for each block */ \
|
||||
const int num_blocks = (uv_len - 1) >> 3; \
|
||||
const int leftover = uv_len - num_blocks * 8; \
|
||||
const int last_pos = 1 + 16 * num_blocks; \
|
||||
\
|
||||
const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1; \
|
||||
const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1; \
|
||||
\
|
||||
const int16x4_t coeff1 = vld1_s16(kCoeffs1); \
|
||||
const int16x8_t R_Rounder = vdupq_n_s16(-14234); \
|
||||
const int16x8_t G_Rounder = vdupq_n_s16(8708); \
|
||||
const int16x8_t B_Rounder = vdupq_n_s16(-17685); \
|
||||
\
|
||||
/* Treat the first pixel in regular way */ \
|
||||
assert(top_y != NULL); \
|
||||
{ \
|
||||
const int u0 = (top_u[0] + u_diag) >> 1; \
|
||||
const int v0 = (top_v[0] + v_diag) >> 1; \
|
||||
VP8YuvTo ## FMT(top_y[0], u0, v0, top_dst); \
|
||||
} \
|
||||
if (bottom_y != NULL) { \
|
||||
const int u0 = (cur_u[0] + u_diag) >> 1; \
|
||||
const int v0 = (cur_v[0] + v_diag) >> 1; \
|
||||
VP8YuvTo ## FMT(bottom_y[0], u0, v0, bottom_dst); \
|
||||
} \
|
||||
\
|
||||
for (block = 0; block < num_blocks; ++block) { \
|
||||
UPSAMPLE_16PIXELS(top_u, cur_u, r_uv); \
|
||||
UPSAMPLE_16PIXELS(top_v, cur_v, r_uv + 16); \
|
||||
CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, r_uv, \
|
||||
top_dst, bottom_dst, 16 * block + 1, 16); \
|
||||
top_u += 8; \
|
||||
cur_u += 8; \
|
||||
top_v += 8; \
|
||||
cur_v += 8; \
|
||||
} \
|
||||
\
|
||||
UPSAMPLE_LAST_BLOCK(top_u, cur_u, leftover, r_uv); \
|
||||
UPSAMPLE_LAST_BLOCK(top_v, cur_v, leftover, r_uv + 16); \
|
||||
CONVERT2RGB_1(VP8YuvTo ## FMT, XSTEP, top_y, bottom_y, r_uv, \
|
||||
top_dst, bottom_dst, last_pos, len - last_pos); \
|
||||
}
|
||||
|
||||
// NEON variants of the fancy upsampler.
|
||||
|
29
thirdparty/libwebp/src/dsp/upsampling_sse2.c
vendored
29
thirdparty/libwebp/src/dsp/upsampling_sse2.c
vendored
@ -88,8 +88,9 @@
|
||||
} while (0)
|
||||
|
||||
// Turn the macro into a function for reducing code-size when non-critical
|
||||
static void Upsample32Pixels_SSE2(const uint8_t r1[], const uint8_t r2[],
|
||||
uint8_t* const out) {
|
||||
static void Upsample32Pixels_SSE2(const uint8_t* WEBP_RESTRICT const r1,
|
||||
const uint8_t* WEBP_RESTRICT const r2,
|
||||
uint8_t* WEBP_RESTRICT const out) {
|
||||
UPSAMPLE_32PIXELS(r1, r2, out);
|
||||
}
|
||||
|
||||
@ -114,10 +115,14 @@ static void Upsample32Pixels_SSE2(const uint8_t r1[], const uint8_t r2[],
|
||||
} while (0)
|
||||
|
||||
#define SSE2_UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* cur_u, const uint8_t* cur_v, \
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bottom_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT cur_u, \
|
||||
const uint8_t* WEBP_RESTRICT cur_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bottom_dst, int len) { \
|
||||
int uv_pos, pos; \
|
||||
/* 16byte-aligned array to cache reconstructed u and v */ \
|
||||
uint8_t uv_buf[14 * 32 + 15] = { 0 }; \
|
||||
@ -215,10 +220,14 @@ extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
|
||||
extern void WebPInitYUV444ConvertersSSE2(void);
|
||||
|
||||
#define YUV444_FUNC(FUNC_NAME, CALL, CALL_C, XSTEP) \
|
||||
extern void CALL_C(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len); \
|
||||
static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
extern void CALL_C(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len); \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
int i; \
|
||||
const int max_len = len & ~31; \
|
||||
for (i = 0; i < max_len; i += 32) { \
|
||||
|
31
thirdparty/libwebp/src/dsp/upsampling_sse41.c
vendored
31
thirdparty/libwebp/src/dsp/upsampling_sse41.c
vendored
@ -90,8 +90,9 @@
|
||||
} while (0)
|
||||
|
||||
// Turn the macro into a function for reducing code-size when non-critical
|
||||
static void Upsample32Pixels_SSE41(const uint8_t r1[], const uint8_t r2[],
|
||||
uint8_t* const out) {
|
||||
static void Upsample32Pixels_SSE41(const uint8_t* WEBP_RESTRICT const r1,
|
||||
const uint8_t* WEBP_RESTRICT const r2,
|
||||
uint8_t* WEBP_RESTRICT const out) {
|
||||
UPSAMPLE_32PIXELS(r1, r2, out);
|
||||
}
|
||||
|
||||
@ -116,14 +117,18 @@ static void Upsample32Pixels_SSE41(const uint8_t r1[], const uint8_t r2[],
|
||||
} while (0)
|
||||
|
||||
#define SSE4_UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* cur_u, const uint8_t* cur_v, \
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bottom_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT cur_u, \
|
||||
const uint8_t* WEBP_RESTRICT cur_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bottom_dst, int len) { \
|
||||
int uv_pos, pos; \
|
||||
/* 16byte-aligned array to cache reconstructed u and v */ \
|
||||
uint8_t uv_buf[14 * 32 + 15] = { 0 }; \
|
||||
uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \
|
||||
uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~(uintptr_t)15); \
|
||||
uint8_t* const r_v = r_u + 32; \
|
||||
\
|
||||
assert(top_y != NULL); \
|
||||
@ -202,10 +207,14 @@ extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
|
||||
extern void WebPInitYUV444ConvertersSSE41(void);
|
||||
|
||||
#define YUV444_FUNC(FUNC_NAME, CALL, CALL_C, XSTEP) \
|
||||
extern void CALL_C(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len); \
|
||||
static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
extern void CALL_C(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len); \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
int i; \
|
||||
const int max_len = len & ~31; \
|
||||
for (i = 0; i < max_len; i += 32) { \
|
||||
|
48
thirdparty/libwebp/src/dsp/yuv.c
vendored
48
thirdparty/libwebp/src/dsp/yuv.c
vendored
@ -20,9 +20,10 @@
|
||||
// Plain-C version
|
||||
|
||||
#define ROW_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* y, \
|
||||
const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
const uint8_t* const end = dst + (len & ~1) * (XSTEP); \
|
||||
while (dst != end) { \
|
||||
FUNC(y[0], u[0], v[0], dst); \
|
||||
@ -49,9 +50,10 @@ ROW_FUNC(YuvToRgb565Row, VP8YuvToRgb565, 2)
|
||||
#undef ROW_FUNC
|
||||
|
||||
// Main call for processing a plane with a WebPSamplerRowFunc function:
|
||||
void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
|
||||
const uint8_t* u, const uint8_t* v, int uv_stride,
|
||||
uint8_t* dst, int dst_stride,
|
||||
void WebPSamplerProcessPlane(const uint8_t* WEBP_RESTRICT y, int y_stride,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v, int uv_stride,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride,
|
||||
int width, int height, WebPSamplerRowFunc func) {
|
||||
int j;
|
||||
for (j = 0; j < height; ++j) {
|
||||
@ -117,7 +119,8 @@ WEBP_DSP_INIT_FUNC(WebPInitSamplers) {
|
||||
//-----------------------------------------------------------------------------
|
||||
// ARGB -> YUV converters
|
||||
|
||||
static void ConvertARGBToY_C(const uint32_t* argb, uint8_t* y, int width) {
|
||||
static void ConvertARGBToY_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
const uint32_t p = argb[i];
|
||||
@ -126,7 +129,8 @@ static void ConvertARGBToY_C(const uint32_t* argb, uint8_t* y, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
void WebPConvertARGBToUV_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int src_width, int do_store) {
|
||||
// No rounding. Last pixel is dealt with separately.
|
||||
const int uv_width = src_width >> 1;
|
||||
@ -169,22 +173,25 @@ void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
static void ConvertRGB24ToY_C(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
static void ConvertRGB24ToY_C(const uint8_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i, rgb += 3) {
|
||||
y[i] = VP8RGBToY(rgb[0], rgb[1], rgb[2], YUV_HALF);
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGR24ToY_C(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
static void ConvertBGR24ToY_C(const uint8_t* WEBP_RESTRICT bgr,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i, bgr += 3) {
|
||||
y[i] = VP8RGBToY(bgr[2], bgr[1], bgr[0], YUV_HALF);
|
||||
}
|
||||
}
|
||||
|
||||
void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width) {
|
||||
void WebPConvertRGBA32ToUV_C(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; i += 1, rgb += 4) {
|
||||
const int r = rgb[0], g = rgb[1], b = rgb[2];
|
||||
@ -195,13 +202,18 @@ void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
|
||||
void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
|
||||
void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width);
|
||||
void (*WebPConvertRGB24ToY)(const uint8_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
void (*WebPConvertBGR24ToY)(const uint8_t* WEBP_RESTRICT bgr,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
void (*WebPConvertRGBA32ToUV)(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int width);
|
||||
|
||||
void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
|
||||
void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
void (*WebPConvertARGBToY)(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
void (*WebPConvertARGBToUV)(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int src_width, int do_store);
|
||||
|
||||
extern void WebPInitConvertARGBToYUVSSE2(void);
|
||||
|
64
thirdparty/libwebp/src/dsp/yuv.h
vendored
64
thirdparty/libwebp/src/dsp/yuv.h
vendored
@ -11,15 +11,15 @@
|
||||
//
|
||||
// The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
|
||||
// More information at: https://en.wikipedia.org/wiki/YCbCr
|
||||
// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
|
||||
// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
|
||||
// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
|
||||
// Y = 0.2568 * R + 0.5041 * G + 0.0979 * B + 16
|
||||
// U = -0.1482 * R - 0.2910 * G + 0.4392 * B + 128
|
||||
// V = 0.4392 * R - 0.3678 * G - 0.0714 * B + 128
|
||||
// We use 16bit fixed point operations for RGB->YUV conversion (YUV_FIX).
|
||||
//
|
||||
// For the Y'CbCr to RGB conversion, the BT.601 specification reads:
|
||||
// R = 1.164 * (Y-16) + 1.596 * (V-128)
|
||||
// G = 1.164 * (Y-16) - 0.813 * (V-128) - 0.391 * (U-128)
|
||||
// B = 1.164 * (Y-16) + 2.018 * (U-128)
|
||||
// G = 1.164 * (Y-16) - 0.813 * (V-128) - 0.392 * (U-128)
|
||||
// B = 1.164 * (Y-16) + 2.017 * (U-128)
|
||||
// where Y is in the [16,235] range, and U/V in the [16,240] range.
|
||||
//
|
||||
// The fixed-point implementation used here is:
|
||||
@ -149,20 +149,34 @@ static WEBP_INLINE void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v,
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
// Process 32 pixels and store the result (16b, 24b or 32b per pixel) in *dst.
|
||||
void VP8YuvToRgba32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToRgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToBgra32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToBgr32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToArgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToRgba444432_SSE2(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst);
|
||||
void VP8YuvToRgb56532_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToRgba32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToRgb32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToBgra32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToBgr32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToArgb32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToRgba444432_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToRgb56532_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
|
||||
#endif // WEBP_USE_SSE2
|
||||
|
||||
@ -172,10 +186,14 @@ void VP8YuvToRgb56532_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
|
||||
// Process 32 pixels and store the result (16b, 24b or 32b per pixel) in *dst.
|
||||
void VP8YuvToRgb32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToBgr32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToRgb32_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToBgr32_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
|
||||
#endif // WEBP_USE_SSE41
|
||||
|
||||
|
7
thirdparty/libwebp/src/dsp/yuv_mips32.c
vendored
7
thirdparty/libwebp/src/dsp/yuv_mips32.c
vendored
@ -22,9 +22,10 @@
|
||||
// simple point-sampling
|
||||
|
||||
#define ROW_FUNC(FUNC_NAME, XSTEP, R, G, B, A) \
|
||||
static void FUNC_NAME(const uint8_t* y, \
|
||||
const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
int i, r, g, b; \
|
||||
int temp0, temp1, temp2, temp3, temp4; \
|
||||
for (i = 0; i < (len >> 1); i++) { \
|
||||
|
7
thirdparty/libwebp/src/dsp/yuv_mips_dsp_r2.c
vendored
7
thirdparty/libwebp/src/dsp/yuv_mips_dsp_r2.c
vendored
@ -69,9 +69,10 @@
|
||||
: "memory", "hi", "lo" \
|
||||
|
||||
#define ROW_FUNC(FUNC_NAME, XSTEP, R, G, B, A) \
|
||||
static void FUNC_NAME(const uint8_t* y, \
|
||||
const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
int i; \
|
||||
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; \
|
||||
const int t_con_1 = 26149; \
|
||||
|
18
thirdparty/libwebp/src/dsp/yuv_neon.c
vendored
18
thirdparty/libwebp/src/dsp/yuv_neon.c
vendored
@ -46,7 +46,8 @@ static uint8x8_t ConvertRGBToY_NEON(const uint8x8_t R,
|
||||
return vqmovn_u16(Y2);
|
||||
}
|
||||
|
||||
static void ConvertRGB24ToY_NEON(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
static void ConvertRGB24ToY_NEON(const uint8_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= width; i += 8, rgb += 3 * 8) {
|
||||
const uint8x8x3_t RGB = vld3_u8(rgb);
|
||||
@ -58,7 +59,8 @@ static void ConvertRGB24ToY_NEON(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGR24ToY_NEON(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
static void ConvertBGR24ToY_NEON(const uint8_t* WEBP_RESTRICT bgr,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= width; i += 8, bgr += 3 * 8) {
|
||||
const uint8x8x3_t BGR = vld3_u8(bgr);
|
||||
@ -70,7 +72,8 @@ static void ConvertBGR24ToY_NEON(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertARGBToY_NEON(const uint32_t* argb, uint8_t* y, int width) {
|
||||
static void ConvertARGBToY_NEON(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= width; i += 8) {
|
||||
const uint8x8x4_t RGB = vld4_u8((const uint8_t*)&argb[i]);
|
||||
@ -114,8 +117,9 @@ static void ConvertARGBToY_NEON(const uint32_t* argb, uint8_t* y, int width) {
|
||||
MULTIPLY_16b(28800, -24116, -4684, 128 << SHIFT, V_DST); \
|
||||
} while (0)
|
||||
|
||||
static void ConvertRGBA32ToUV_NEON(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width) {
|
||||
static void ConvertRGBA32ToUV_NEON(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int width) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= width; i += 8, rgb += 4 * 8) {
|
||||
const uint16x8x4_t RGB = vld4q_u16((const uint16_t*)rgb);
|
||||
@ -131,7 +135,9 @@ static void ConvertRGBA32ToUV_NEON(const uint16_t* rgb,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertARGBToUV_NEON(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
static void ConvertARGBToUV_NEON(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v,
|
||||
int src_width, int do_store) {
|
||||
int i;
|
||||
for (i = 0; i + 16 <= src_width; i += 16, u += 8, v += 8) {
|
||||
|
130
thirdparty/libwebp/src/dsp/yuv_sse2.c
vendored
130
thirdparty/libwebp/src/dsp/yuv_sse2.c
vendored
@ -82,9 +82,9 @@ static WEBP_INLINE __m128i Load_UV_HI_8_SSE2(const uint8_t* src) {
|
||||
}
|
||||
|
||||
// Convert 32 samples of YUV444 to R/G/B
|
||||
static void YUV444ToRGB_SSE2(const uint8_t* const y,
|
||||
const uint8_t* const u,
|
||||
const uint8_t* const v,
|
||||
static void YUV444ToRGB_SSE2(const uint8_t* WEBP_RESTRICT const y,
|
||||
const uint8_t* WEBP_RESTRICT const u,
|
||||
const uint8_t* WEBP_RESTRICT const v,
|
||||
__m128i* const R, __m128i* const G,
|
||||
__m128i* const B) {
|
||||
const __m128i Y0 = Load_HI_16_SSE2(y), U0 = Load_HI_16_SSE2(u),
|
||||
@ -93,9 +93,9 @@ static void YUV444ToRGB_SSE2(const uint8_t* const y,
|
||||
}
|
||||
|
||||
// Convert 32 samples of YUV420 to R/G/B
|
||||
static void YUV420ToRGB_SSE2(const uint8_t* const y,
|
||||
const uint8_t* const u,
|
||||
const uint8_t* const v,
|
||||
static void YUV420ToRGB_SSE2(const uint8_t* WEBP_RESTRICT const y,
|
||||
const uint8_t* WEBP_RESTRICT const u,
|
||||
const uint8_t* WEBP_RESTRICT const v,
|
||||
__m128i* const R, __m128i* const G,
|
||||
__m128i* const B) {
|
||||
const __m128i Y0 = Load_HI_16_SSE2(y), U0 = Load_UV_HI_8_SSE2(u),
|
||||
@ -108,7 +108,7 @@ static WEBP_INLINE void PackAndStore4_SSE2(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
const __m128i* const A,
|
||||
uint8_t* const dst) {
|
||||
uint8_t* WEBP_RESTRICT const dst) {
|
||||
const __m128i rb = _mm_packus_epi16(*R, *B);
|
||||
const __m128i ga = _mm_packus_epi16(*G, *A);
|
||||
const __m128i rg = _mm_unpacklo_epi8(rb, ga);
|
||||
@ -120,11 +120,9 @@ static WEBP_INLINE void PackAndStore4_SSE2(const __m128i* const R,
|
||||
}
|
||||
|
||||
// Pack R/G/B/A results into 16b output.
|
||||
static WEBP_INLINE void PackAndStore4444_SSE2(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
const __m128i* const A,
|
||||
uint8_t* const dst) {
|
||||
static WEBP_INLINE void PackAndStore4444_SSE2(
|
||||
const __m128i* const R, const __m128i* const G, const __m128i* const B,
|
||||
const __m128i* const A, uint8_t* WEBP_RESTRICT const dst) {
|
||||
#if (WEBP_SWAP_16BIT_CSP == 0)
|
||||
const __m128i rg0 = _mm_packus_epi16(*R, *G);
|
||||
const __m128i ba0 = _mm_packus_epi16(*B, *A);
|
||||
@ -145,7 +143,7 @@ static WEBP_INLINE void PackAndStore4444_SSE2(const __m128i* const R,
|
||||
static WEBP_INLINE void PackAndStore565_SSE2(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
uint8_t* const dst) {
|
||||
uint8_t* WEBP_RESTRICT const dst) {
|
||||
const __m128i r0 = _mm_packus_epi16(*R, *R);
|
||||
const __m128i g0 = _mm_packus_epi16(*G, *G);
|
||||
const __m128i b0 = _mm_packus_epi16(*B, *B);
|
||||
@ -170,7 +168,7 @@ static WEBP_INLINE void PackAndStore565_SSE2(const __m128i* const R,
|
||||
static WEBP_INLINE void PlanarTo24b_SSE2(__m128i* const in0, __m128i* const in1,
|
||||
__m128i* const in2, __m128i* const in3,
|
||||
__m128i* const in4, __m128i* const in5,
|
||||
uint8_t* const rgb) {
|
||||
uint8_t* WEBP_RESTRICT const rgb) {
|
||||
// The input is 6 registers of sixteen 8b but for the sake of explanation,
|
||||
// let's take 6 registers of four 8b values.
|
||||
// To pack, we will keep taking one every two 8b integer and move it
|
||||
@ -193,8 +191,10 @@ static WEBP_INLINE void PlanarTo24b_SSE2(__m128i* const in0, __m128i* const in1,
|
||||
_mm_storeu_si128((__m128i*)(rgb + 80), *in5);
|
||||
}
|
||||
|
||||
void VP8YuvToRgba32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
void VP8YuvToRgba32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 32) {
|
||||
@ -204,8 +204,10 @@ void VP8YuvToRgba32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToBgra32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
void VP8YuvToBgra32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 32) {
|
||||
@ -215,8 +217,10 @@ void VP8YuvToBgra32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToArgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
void VP8YuvToArgb32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 32) {
|
||||
@ -226,8 +230,10 @@ void VP8YuvToArgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToRgba444432_SSE2(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst) {
|
||||
void VP8YuvToRgba444432_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 16) {
|
||||
@ -237,8 +243,10 @@ void VP8YuvToRgba444432_SSE2(const uint8_t* y, const uint8_t* u,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToRgb56532_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
void VP8YuvToRgb56532_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 16) {
|
||||
__m128i R, G, B;
|
||||
@ -247,8 +255,10 @@ void VP8YuvToRgb56532_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToRgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
void VP8YuvToRgb32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
__m128i rgb0, rgb1, rgb2, rgb3, rgb4, rgb5;
|
||||
|
||||
@ -269,8 +279,10 @@ void VP8YuvToRgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
PlanarTo24b_SSE2(&rgb0, &rgb1, &rgb2, &rgb3, &rgb4, &rgb5, dst);
|
||||
}
|
||||
|
||||
void VP8YuvToBgr32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
void VP8YuvToBgr32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
__m128i bgr0, bgr1, bgr2, bgr3, bgr4, bgr5;
|
||||
|
||||
@ -294,9 +306,10 @@ void VP8YuvToBgr32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
//-----------------------------------------------------------------------------
|
||||
// Arbitrary-length row conversion functions
|
||||
|
||||
static void YuvToRgbaRow_SSE2(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
static void YuvToRgbaRow_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n + 8 <= len; n += 8, dst += 32) {
|
||||
@ -316,9 +329,10 @@ static void YuvToRgbaRow_SSE2(const uint8_t* y,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToBgraRow_SSE2(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
static void YuvToBgraRow_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n + 8 <= len; n += 8, dst += 32) {
|
||||
@ -338,9 +352,10 @@ static void YuvToBgraRow_SSE2(const uint8_t* y,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToArgbRow_SSE2(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
static void YuvToArgbRow_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n + 8 <= len; n += 8, dst += 32) {
|
||||
@ -360,9 +375,10 @@ static void YuvToArgbRow_SSE2(const uint8_t* y,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToRgbRow_SSE2(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
static void YuvToRgbRow_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
int n;
|
||||
for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
@ -397,9 +413,10 @@ static void YuvToRgbRow_SSE2(const uint8_t* y,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToBgrRow_SSE2(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
static void YuvToBgrRow_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
int n;
|
||||
for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
@ -471,7 +488,7 @@ static WEBP_INLINE void RGB24PackedToPlanarHelper_SSE2(
|
||||
// rrrr... rrrr... gggg... gggg... bbbb... bbbb....
|
||||
// Similar to PlanarTo24bHelper(), but in reverse order.
|
||||
static WEBP_INLINE void RGB24PackedToPlanar_SSE2(
|
||||
const uint8_t* const rgb, __m128i* const out /*out[6]*/) {
|
||||
const uint8_t* WEBP_RESTRICT const rgb, __m128i* const out /*out[6]*/) {
|
||||
__m128i tmp[6];
|
||||
tmp[0] = _mm_loadu_si128((const __m128i*)(rgb + 0));
|
||||
tmp[1] = _mm_loadu_si128((const __m128i*)(rgb + 16));
|
||||
@ -488,8 +505,8 @@ static WEBP_INLINE void RGB24PackedToPlanar_SSE2(
|
||||
}
|
||||
|
||||
// Convert 8 packed ARGB to r[], g[], b[]
|
||||
static WEBP_INLINE void RGB32PackedToPlanar_SSE2(const uint32_t* const argb,
|
||||
__m128i* const rgb /*in[6]*/) {
|
||||
static WEBP_INLINE void RGB32PackedToPlanar_SSE2(
|
||||
const uint32_t* WEBP_RESTRICT const argb, __m128i* const rgb /*in[6]*/) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
__m128i a0 = LOAD_16(argb + 0);
|
||||
__m128i a1 = LOAD_16(argb + 4);
|
||||
@ -562,7 +579,8 @@ static WEBP_INLINE void ConvertRGBToUV_SSE2(const __m128i* const R,
|
||||
#undef MK_CST_16
|
||||
#undef TRANSFORM
|
||||
|
||||
static void ConvertRGB24ToY_SSE2(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
static void ConvertRGB24ToY_SSE2(const uint8_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
const int max_width = width & ~31;
|
||||
int i;
|
||||
for (i = 0; i < max_width; rgb += 3 * 16 * 2) {
|
||||
@ -596,7 +614,8 @@ static void ConvertRGB24ToY_SSE2(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGR24ToY_SSE2(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
static void ConvertBGR24ToY_SSE2(const uint8_t* WEBP_RESTRICT bgr,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
const int max_width = width & ~31;
|
||||
int i;
|
||||
for (i = 0; i < max_width; bgr += 3 * 16 * 2) {
|
||||
@ -630,7 +649,8 @@ static void ConvertBGR24ToY_SSE2(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertARGBToY_SSE2(const uint32_t* argb, uint8_t* y, int width) {
|
||||
static void ConvertARGBToY_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
const int max_width = width & ~15;
|
||||
int i;
|
||||
for (i = 0; i < max_width; i += 16) {
|
||||
@ -658,8 +678,9 @@ static void HorizontalAddPack_SSE2(const __m128i* const A,
|
||||
*out = _mm_packs_epi32(C, D);
|
||||
}
|
||||
|
||||
static void ConvertARGBToUV_SSE2(const uint32_t* argb,
|
||||
uint8_t* u, uint8_t* v,
|
||||
static void ConvertARGBToUV_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v,
|
||||
int src_width, int do_store) {
|
||||
const int max_width = src_width & ~31;
|
||||
int i;
|
||||
@ -695,7 +716,7 @@ static void ConvertARGBToUV_SSE2(const uint32_t* argb,
|
||||
|
||||
// Convert 16 packed ARGB 16b-values to r[], g[], b[]
|
||||
static WEBP_INLINE void RGBA32PackedToPlanar_16b_SSE2(
|
||||
const uint16_t* const rgbx,
|
||||
const uint16_t* WEBP_RESTRICT const rgbx,
|
||||
__m128i* const r, __m128i* const g, __m128i* const b) {
|
||||
const __m128i in0 = LOAD_16(rgbx + 0); // r0 | g0 | b0 |x| r1 | g1 | b1 |x
|
||||
const __m128i in1 = LOAD_16(rgbx + 8); // r2 | g2 | b2 |x| r3 | g3 | b3 |x
|
||||
@ -715,8 +736,9 @@ static WEBP_INLINE void RGBA32PackedToPlanar_16b_SSE2(
|
||||
*b = _mm_unpacklo_epi64(B1, B3);
|
||||
}
|
||||
|
||||
static void ConvertRGBA32ToUV_SSE2(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width) {
|
||||
static void ConvertRGBA32ToUV_SSE2(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int width) {
|
||||
const int max_width = width & ~15;
|
||||
const uint16_t* const last_rgb = rgb + 4 * max_width;
|
||||
while (rgb < last_rgb) {
|
||||
|
65
thirdparty/libwebp/src/dsp/yuv_sse41.c
vendored
65
thirdparty/libwebp/src/dsp/yuv_sse41.c
vendored
@ -82,9 +82,9 @@ static WEBP_INLINE __m128i Load_UV_HI_8_SSE41(const uint8_t* src) {
|
||||
}
|
||||
|
||||
// Convert 32 samples of YUV444 to R/G/B
|
||||
static void YUV444ToRGB_SSE41(const uint8_t* const y,
|
||||
const uint8_t* const u,
|
||||
const uint8_t* const v,
|
||||
static void YUV444ToRGB_SSE41(const uint8_t* WEBP_RESTRICT const y,
|
||||
const uint8_t* WEBP_RESTRICT const u,
|
||||
const uint8_t* WEBP_RESTRICT const v,
|
||||
__m128i* const R, __m128i* const G,
|
||||
__m128i* const B) {
|
||||
const __m128i Y0 = Load_HI_16_SSE41(y), U0 = Load_HI_16_SSE41(u),
|
||||
@ -93,9 +93,9 @@ static void YUV444ToRGB_SSE41(const uint8_t* const y,
|
||||
}
|
||||
|
||||
// Convert 32 samples of YUV420 to R/G/B
|
||||
static void YUV420ToRGB_SSE41(const uint8_t* const y,
|
||||
const uint8_t* const u,
|
||||
const uint8_t* const v,
|
||||
static void YUV420ToRGB_SSE41(const uint8_t* WEBP_RESTRICT const y,
|
||||
const uint8_t* WEBP_RESTRICT const u,
|
||||
const uint8_t* WEBP_RESTRICT const v,
|
||||
__m128i* const R, __m128i* const G,
|
||||
__m128i* const B) {
|
||||
const __m128i Y0 = Load_HI_16_SSE41(y), U0 = Load_UV_HI_8_SSE41(u),
|
||||
@ -109,7 +109,7 @@ static void YUV420ToRGB_SSE41(const uint8_t* const y,
|
||||
static WEBP_INLINE void PlanarTo24b_SSE41(
|
||||
__m128i* const in0, __m128i* const in1, __m128i* const in2,
|
||||
__m128i* const in3, __m128i* const in4, __m128i* const in5,
|
||||
uint8_t* const rgb) {
|
||||
uint8_t* WEBP_RESTRICT const rgb) {
|
||||
// The input is 6 registers of sixteen 8b but for the sake of explanation,
|
||||
// let's take 6 registers of four 8b values.
|
||||
// To pack, we will keep taking one every two 8b integer and move it
|
||||
@ -132,8 +132,10 @@ static WEBP_INLINE void PlanarTo24b_SSE41(
|
||||
_mm_storeu_si128((__m128i*)(rgb + 80), *in5);
|
||||
}
|
||||
|
||||
void VP8YuvToRgb32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
void VP8YuvToRgb32_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
__m128i rgb0, rgb1, rgb2, rgb3, rgb4, rgb5;
|
||||
|
||||
@ -154,8 +156,10 @@ void VP8YuvToRgb32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
PlanarTo24b_SSE41(&rgb0, &rgb1, &rgb2, &rgb3, &rgb4, &rgb5, dst);
|
||||
}
|
||||
|
||||
void VP8YuvToBgr32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
void VP8YuvToBgr32_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
__m128i bgr0, bgr1, bgr2, bgr3, bgr4, bgr5;
|
||||
|
||||
@ -179,9 +183,10 @@ void VP8YuvToBgr32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
//-----------------------------------------------------------------------------
|
||||
// Arbitrary-length row conversion functions
|
||||
|
||||
static void YuvToRgbRow_SSE41(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
static void YuvToRgbRow_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
int n;
|
||||
for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
@ -216,9 +221,10 @@ static void YuvToRgbRow_SSE41(const uint8_t* y,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToBgrRow_SSE41(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
static void YuvToBgrRow_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
int n;
|
||||
for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
@ -290,7 +296,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplersSSE41(void) {
|
||||
// rrrr... rrrr... gggg... gggg... bbbb... bbbb....
|
||||
// Similar to PlanarTo24bHelper(), but in reverse order.
|
||||
static WEBP_INLINE void RGB24PackedToPlanar_SSE41(
|
||||
const uint8_t* const rgb, __m128i* const out /*out[6]*/) {
|
||||
const uint8_t* WEBP_RESTRICT const rgb, __m128i* const out /*out[6]*/) {
|
||||
const __m128i A0 = _mm_loadu_si128((const __m128i*)(rgb + 0));
|
||||
const __m128i A1 = _mm_loadu_si128((const __m128i*)(rgb + 16));
|
||||
const __m128i A2 = _mm_loadu_si128((const __m128i*)(rgb + 32));
|
||||
@ -334,7 +340,7 @@ static WEBP_INLINE void RGB24PackedToPlanar_SSE41(
|
||||
|
||||
// Convert 8 packed ARGB to r[], g[], b[]
|
||||
static WEBP_INLINE void RGB32PackedToPlanar_SSE41(
|
||||
const uint32_t* const argb, __m128i* const rgb /*in[6]*/) {
|
||||
const uint32_t* WEBP_RESTRICT const argb, __m128i* const rgb /*in[6]*/) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
__m128i a0 = LOAD_16(argb + 0);
|
||||
__m128i a1 = LOAD_16(argb + 4);
|
||||
@ -407,7 +413,8 @@ static WEBP_INLINE void ConvertRGBToUV_SSE41(const __m128i* const R,
|
||||
#undef MK_CST_16
|
||||
#undef TRANSFORM
|
||||
|
||||
static void ConvertRGB24ToY_SSE41(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
static void ConvertRGB24ToY_SSE41(const uint8_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
const int max_width = width & ~31;
|
||||
int i;
|
||||
for (i = 0; i < max_width; rgb += 3 * 16 * 2) {
|
||||
@ -441,7 +448,8 @@ static void ConvertRGB24ToY_SSE41(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGR24ToY_SSE41(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
static void ConvertBGR24ToY_SSE41(const uint8_t* WEBP_RESTRICT bgr,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
const int max_width = width & ~31;
|
||||
int i;
|
||||
for (i = 0; i < max_width; bgr += 3 * 16 * 2) {
|
||||
@ -475,7 +483,8 @@ static void ConvertBGR24ToY_SSE41(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertARGBToY_SSE41(const uint32_t* argb, uint8_t* y, int width) {
|
||||
static void ConvertARGBToY_SSE41(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
const int max_width = width & ~15;
|
||||
int i;
|
||||
for (i = 0; i < max_width; i += 16) {
|
||||
@ -503,8 +512,9 @@ static void HorizontalAddPack_SSE41(const __m128i* const A,
|
||||
*out = _mm_packs_epi32(C, D);
|
||||
}
|
||||
|
||||
static void ConvertARGBToUV_SSE41(const uint32_t* argb,
|
||||
uint8_t* u, uint8_t* v,
|
||||
static void ConvertARGBToUV_SSE41(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v,
|
||||
int src_width, int do_store) {
|
||||
const int max_width = src_width & ~31;
|
||||
int i;
|
||||
@ -540,7 +550,7 @@ static void ConvertARGBToUV_SSE41(const uint32_t* argb,
|
||||
|
||||
// Convert 16 packed ARGB 16b-values to r[], g[], b[]
|
||||
static WEBP_INLINE void RGBA32PackedToPlanar_16b_SSE41(
|
||||
const uint16_t* const rgbx,
|
||||
const uint16_t* WEBP_RESTRICT const rgbx,
|
||||
__m128i* const r, __m128i* const g, __m128i* const b) {
|
||||
const __m128i in0 = LOAD_16(rgbx + 0); // r0 | g0 | b0 |x| r1 | g1 | b1 |x
|
||||
const __m128i in1 = LOAD_16(rgbx + 8); // r2 | g2 | b2 |x| r3 | g3 | b3 |x
|
||||
@ -570,8 +580,9 @@ static WEBP_INLINE void RGBA32PackedToPlanar_16b_SSE41(
|
||||
*b = _mm_unpackhi_epi64(B1, B3);
|
||||
}
|
||||
|
||||
static void ConvertRGBA32ToUV_SSE41(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width) {
|
||||
static void ConvertRGBA32ToUV_SSE41(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int width) {
|
||||
const int max_width = width & ~15;
|
||||
const uint16_t* const last_rgb = rgb + 4 * max_width;
|
||||
while (rgb < last_rgb) {
|
||||
|
1
thirdparty/libwebp/src/enc/alpha_enc.c
vendored
1
thirdparty/libwebp/src/enc/alpha_enc.c
vendored
@ -276,6 +276,7 @@ static int ApplyFiltersAndEncode(const uint8_t* alpha, int width, int height,
|
||||
stats->lossless_features = best.stats.lossless_features;
|
||||
stats->histogram_bits = best.stats.histogram_bits;
|
||||
stats->transform_bits = best.stats.transform_bits;
|
||||
stats->cross_color_transform_bits = best.stats.cross_color_transform_bits;
|
||||
stats->cache_bits = best.stats.cache_bits;
|
||||
stats->palette_size = best.stats.palette_size;
|
||||
stats->lossless_size = best.stats.lossless_size;
|
||||
|
@ -15,7 +15,7 @@
|
||||
//
|
||||
|
||||
#include <assert.h>
|
||||
#include <float.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/enc/backward_references_enc.h"
|
||||
@ -31,15 +31,15 @@ extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
|
||||
const PixOrCopy v);
|
||||
|
||||
typedef struct {
|
||||
float alpha_[VALUES_IN_BYTE];
|
||||
float red_[VALUES_IN_BYTE];
|
||||
float blue_[VALUES_IN_BYTE];
|
||||
float distance_[NUM_DISTANCE_CODES];
|
||||
float* literal_;
|
||||
uint32_t alpha_[VALUES_IN_BYTE];
|
||||
uint32_t red_[VALUES_IN_BYTE];
|
||||
uint32_t blue_[VALUES_IN_BYTE];
|
||||
uint32_t distance_[NUM_DISTANCE_CODES];
|
||||
uint32_t* literal_;
|
||||
} CostModel;
|
||||
|
||||
static void ConvertPopulationCountTableToBitEstimates(
|
||||
int num_symbols, const uint32_t population_counts[], float output[]) {
|
||||
int num_symbols, const uint32_t population_counts[], uint32_t output[]) {
|
||||
uint32_t sum = 0;
|
||||
int nonzeros = 0;
|
||||
int i;
|
||||
@ -52,7 +52,7 @@ static void ConvertPopulationCountTableToBitEstimates(
|
||||
if (nonzeros <= 1) {
|
||||
memset(output, 0, num_symbols * sizeof(*output));
|
||||
} else {
|
||||
const float logsum = VP8LFastLog2(sum);
|
||||
const uint32_t logsum = VP8LFastLog2(sum);
|
||||
for (i = 0; i < num_symbols; ++i) {
|
||||
output[i] = logsum - VP8LFastLog2(population_counts[i]);
|
||||
}
|
||||
@ -93,47 +93,47 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits,
|
||||
return ok;
|
||||
}
|
||||
|
||||
static WEBP_INLINE float GetLiteralCost(const CostModel* const m, uint32_t v) {
|
||||
return m->alpha_[v >> 24] +
|
||||
m->red_[(v >> 16) & 0xff] +
|
||||
m->literal_[(v >> 8) & 0xff] +
|
||||
m->blue_[v & 0xff];
|
||||
static WEBP_INLINE int64_t GetLiteralCost(const CostModel* const m,
|
||||
uint32_t v) {
|
||||
return (int64_t)m->alpha_[v >> 24] + m->red_[(v >> 16) & 0xff] +
|
||||
m->literal_[(v >> 8) & 0xff] + m->blue_[v & 0xff];
|
||||
}
|
||||
|
||||
static WEBP_INLINE float GetCacheCost(const CostModel* const m, uint32_t idx) {
|
||||
static WEBP_INLINE int64_t GetCacheCost(const CostModel* const m,
|
||||
uint32_t idx) {
|
||||
const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx;
|
||||
return m->literal_[literal_idx];
|
||||
return (int64_t)m->literal_[literal_idx];
|
||||
}
|
||||
|
||||
static WEBP_INLINE float GetLengthCost(const CostModel* const m,
|
||||
uint32_t length) {
|
||||
static WEBP_INLINE int64_t GetLengthCost(const CostModel* const m,
|
||||
uint32_t length) {
|
||||
int code, extra_bits;
|
||||
VP8LPrefixEncodeBits(length, &code, &extra_bits);
|
||||
return m->literal_[VALUES_IN_BYTE + code] + extra_bits;
|
||||
return (int64_t)m->literal_[VALUES_IN_BYTE + code] +
|
||||
((int64_t)extra_bits << LOG_2_PRECISION_BITS);
|
||||
}
|
||||
|
||||
static WEBP_INLINE float GetDistanceCost(const CostModel* const m,
|
||||
uint32_t distance) {
|
||||
static WEBP_INLINE int64_t GetDistanceCost(const CostModel* const m,
|
||||
uint32_t distance) {
|
||||
int code, extra_bits;
|
||||
VP8LPrefixEncodeBits(distance, &code, &extra_bits);
|
||||
return m->distance_[code] + extra_bits;
|
||||
return (int64_t)m->distance_[code] +
|
||||
((int64_t)extra_bits << LOG_2_PRECISION_BITS);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void AddSingleLiteralWithCostModel(
|
||||
const uint32_t* const argb, VP8LColorCache* const hashers,
|
||||
const CostModel* const cost_model, int idx, int use_color_cache,
|
||||
float prev_cost, float* const cost, uint16_t* const dist_array) {
|
||||
float cost_val = prev_cost;
|
||||
int64_t prev_cost, int64_t* const cost, uint16_t* const dist_array) {
|
||||
int64_t cost_val = prev_cost;
|
||||
const uint32_t color = argb[idx];
|
||||
const int ix = use_color_cache ? VP8LColorCacheContains(hashers, color) : -1;
|
||||
if (ix >= 0) {
|
||||
// use_color_cache is true and hashers contains color
|
||||
const float mul0 = 0.68f;
|
||||
cost_val += GetCacheCost(cost_model, ix) * mul0;
|
||||
cost_val += DivRound(GetCacheCost(cost_model, ix) * 68, 100);
|
||||
} else {
|
||||
const float mul1 = 0.82f;
|
||||
if (use_color_cache) VP8LColorCacheInsert(hashers, color);
|
||||
cost_val += GetLiteralCost(cost_model, color) * mul1;
|
||||
cost_val += DivRound(GetLiteralCost(cost_model, color) * 82, 100);
|
||||
}
|
||||
if (cost[idx] > cost_val) {
|
||||
cost[idx] = cost_val;
|
||||
@ -163,7 +163,7 @@ static WEBP_INLINE void AddSingleLiteralWithCostModel(
|
||||
// therefore no overlapping intervals.
|
||||
typedef struct CostInterval CostInterval;
|
||||
struct CostInterval {
|
||||
float cost_;
|
||||
int64_t cost_;
|
||||
int start_;
|
||||
int end_;
|
||||
int index_;
|
||||
@ -173,7 +173,7 @@ struct CostInterval {
|
||||
|
||||
// The GetLengthCost(cost_model, k) are cached in a CostCacheInterval.
|
||||
typedef struct {
|
||||
float cost_;
|
||||
int64_t cost_;
|
||||
int start_;
|
||||
int end_; // Exclusive.
|
||||
} CostCacheInterval;
|
||||
@ -188,8 +188,9 @@ typedef struct {
|
||||
int count_; // The number of stored intervals.
|
||||
CostCacheInterval* cache_intervals_;
|
||||
size_t cache_intervals_size_;
|
||||
float cost_cache_[MAX_LENGTH]; // Contains the GetLengthCost(cost_model, k).
|
||||
float* costs_;
|
||||
// Contains the GetLengthCost(cost_model, k).
|
||||
int64_t cost_cache_[MAX_LENGTH];
|
||||
int64_t* costs_;
|
||||
uint16_t* dist_array_;
|
||||
// Most of the time, we only need few intervals -> use a free-list, to avoid
|
||||
// fragmentation with small allocs in most common cases.
|
||||
@ -298,7 +299,7 @@ static int CostManagerInit(CostManager* const manager,
|
||||
cur->end_ = 1;
|
||||
cur->cost_ = manager->cost_cache_[0];
|
||||
for (i = 1; i < cost_cache_size; ++i) {
|
||||
const float cost_val = manager->cost_cache_[i];
|
||||
const int64_t cost_val = manager->cost_cache_[i];
|
||||
if (cost_val != cur->cost_) {
|
||||
++cur;
|
||||
// Initialize an interval.
|
||||
@ -311,13 +312,15 @@ static int CostManagerInit(CostManager* const manager,
|
||||
manager->cache_intervals_size_);
|
||||
}
|
||||
|
||||
manager->costs_ = (float*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_));
|
||||
manager->costs_ =
|
||||
(int64_t*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_));
|
||||
if (manager->costs_ == NULL) {
|
||||
CostManagerClear(manager);
|
||||
return 0;
|
||||
}
|
||||
// Set the initial costs_ high for every pixel as we will keep the minimum.
|
||||
for (i = 0; i < pix_count; ++i) manager->costs_[i] = FLT_MAX;
|
||||
// Set the initial costs_ to INT64_MAX for every pixel as we will keep the
|
||||
// minimum.
|
||||
for (i = 0; i < pix_count; ++i) manager->costs_[i] = WEBP_INT64_MAX;
|
||||
|
||||
return 1;
|
||||
}
|
||||
@ -325,7 +328,7 @@ static int CostManagerInit(CostManager* const manager,
|
||||
// Given the cost and the position that define an interval, update the cost at
|
||||
// pixel 'i' if it is smaller than the previously computed value.
|
||||
static WEBP_INLINE void UpdateCost(CostManager* const manager, int i,
|
||||
int position, float cost) {
|
||||
int position, int64_t cost) {
|
||||
const int k = i - position;
|
||||
assert(k >= 0 && k < MAX_LENGTH);
|
||||
|
||||
@ -339,7 +342,7 @@ static WEBP_INLINE void UpdateCost(CostManager* const manager, int i,
|
||||
// all the pixels between 'start' and 'end' excluded.
|
||||
static WEBP_INLINE void UpdateCostPerInterval(CostManager* const manager,
|
||||
int start, int end, int position,
|
||||
float cost) {
|
||||
int64_t cost) {
|
||||
int i;
|
||||
for (i = start; i < end; ++i) UpdateCost(manager, i, position, cost);
|
||||
}
|
||||
@ -424,7 +427,7 @@ static WEBP_INLINE void PositionOrphanInterval(CostManager* const manager,
|
||||
// interval_in as a hint. The intervals are sorted by start_ value.
|
||||
static WEBP_INLINE void InsertInterval(CostManager* const manager,
|
||||
CostInterval* const interval_in,
|
||||
float cost, int position, int start,
|
||||
int64_t cost, int position, int start,
|
||||
int end) {
|
||||
CostInterval* interval_new;
|
||||
|
||||
@ -463,7 +466,7 @@ static WEBP_INLINE void InsertInterval(CostManager* const manager,
|
||||
// If handling the interval or one of its subintervals becomes to heavy, its
|
||||
// contribution is added to the costs right away.
|
||||
static WEBP_INLINE void PushInterval(CostManager* const manager,
|
||||
float distance_cost, int position,
|
||||
int64_t distance_cost, int position,
|
||||
int len) {
|
||||
size_t i;
|
||||
CostInterval* interval = manager->head_;
|
||||
@ -478,7 +481,7 @@ static WEBP_INLINE void PushInterval(CostManager* const manager,
|
||||
int j;
|
||||
for (j = position; j < position + len; ++j) {
|
||||
const int k = j - position;
|
||||
float cost_tmp;
|
||||
int64_t cost_tmp;
|
||||
assert(k >= 0 && k < MAX_LENGTH);
|
||||
cost_tmp = distance_cost + manager->cost_cache_[k];
|
||||
|
||||
@ -498,7 +501,7 @@ static WEBP_INLINE void PushInterval(CostManager* const manager,
|
||||
const int end = position + (cost_cache_intervals[i].end_ > len
|
||||
? len
|
||||
: cost_cache_intervals[i].end_);
|
||||
const float cost = distance_cost + cost_cache_intervals[i].cost_;
|
||||
const int64_t cost = distance_cost + cost_cache_intervals[i].cost_;
|
||||
|
||||
for (; interval != NULL && interval->start_ < end;
|
||||
interval = interval_next) {
|
||||
@ -576,7 +579,7 @@ static int BackwardReferencesHashChainDistanceOnly(
|
||||
const int pix_count = xsize * ysize;
|
||||
const int use_color_cache = (cache_bits > 0);
|
||||
const size_t literal_array_size =
|
||||
sizeof(float) * (VP8LHistogramNumCodes(cache_bits));
|
||||
sizeof(*((CostModel*)NULL)->literal_) * VP8LHistogramNumCodes(cache_bits);
|
||||
const size_t cost_model_size = sizeof(CostModel) + literal_array_size;
|
||||
CostModel* const cost_model =
|
||||
(CostModel*)WebPSafeCalloc(1ULL, cost_model_size);
|
||||
@ -584,13 +587,13 @@ static int BackwardReferencesHashChainDistanceOnly(
|
||||
CostManager* cost_manager =
|
||||
(CostManager*)WebPSafeCalloc(1ULL, sizeof(*cost_manager));
|
||||
int offset_prev = -1, len_prev = -1;
|
||||
float offset_cost = -1.f;
|
||||
int64_t offset_cost = -1;
|
||||
int first_offset_is_constant = -1; // initialized with 'impossible' value
|
||||
int reach = 0;
|
||||
|
||||
if (cost_model == NULL || cost_manager == NULL) goto Error;
|
||||
|
||||
cost_model->literal_ = (float*)(cost_model + 1);
|
||||
cost_model->literal_ = (uint32_t*)(cost_model + 1);
|
||||
if (use_color_cache) {
|
||||
cc_init = VP8LColorCacheInit(&hashers, cache_bits);
|
||||
if (!cc_init) goto Error;
|
||||
@ -608,11 +611,12 @@ static int BackwardReferencesHashChainDistanceOnly(
|
||||
// non-processed locations from this point.
|
||||
dist_array[0] = 0;
|
||||
// Add first pixel as literal.
|
||||
AddSingleLiteralWithCostModel(argb, &hashers, cost_model, 0, use_color_cache,
|
||||
0.f, cost_manager->costs_, dist_array);
|
||||
AddSingleLiteralWithCostModel(argb, &hashers, cost_model, /*idx=*/0,
|
||||
use_color_cache, /*prev_cost=*/0,
|
||||
cost_manager->costs_, dist_array);
|
||||
|
||||
for (i = 1; i < pix_count; ++i) {
|
||||
const float prev_cost = cost_manager->costs_[i - 1];
|
||||
const int64_t prev_cost = cost_manager->costs_[i - 1];
|
||||
int offset, len;
|
||||
VP8LHashChainFindCopy(hash_chain, i, &offset, &len);
|
||||
|
||||
|
@ -13,8 +13,6 @@
|
||||
#include "src/enc/backward_references_enc.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dsp/lossless.h"
|
||||
@ -27,8 +25,6 @@
|
||||
|
||||
#define MIN_BLOCK_SIZE 256 // minimum block size for backward references
|
||||
|
||||
#define MAX_ENTROPY (1e30f)
|
||||
|
||||
// 1M window (4M bytes) minus 120 special codes for short distances.
|
||||
#define WINDOW_SIZE ((1 << WINDOW_SIZE_BITS) - 120)
|
||||
|
||||
@ -758,7 +754,7 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality,
|
||||
int* const best_cache_bits) {
|
||||
int i;
|
||||
const int cache_bits_max = (quality <= 25) ? 0 : *best_cache_bits;
|
||||
float entropy_min = MAX_ENTROPY;
|
||||
uint64_t entropy_min = WEBP_UINT64_MAX;
|
||||
int cc_init[MAX_COLOR_CACHE_BITS + 1] = { 0 };
|
||||
VP8LColorCache hashers[MAX_COLOR_CACHE_BITS + 1];
|
||||
VP8LRefsCursor c = VP8LRefsCursorInit(refs);
|
||||
@ -843,7 +839,7 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality,
|
||||
}
|
||||
|
||||
for (i = 0; i <= cache_bits_max; ++i) {
|
||||
const float entropy = VP8LHistogramEstimateBits(histos[i]);
|
||||
const uint64_t entropy = VP8LHistogramEstimateBits(histos[i]);
|
||||
if (i == 0 || entropy < entropy_min) {
|
||||
entropy_min = entropy;
|
||||
*best_cache_bits = i;
|
||||
@ -920,7 +916,7 @@ static int GetBackwardReferences(int width, int height,
|
||||
int i, lz77_type;
|
||||
// Index 0 is for a color cache, index 1 for no cache (if needed).
|
||||
int lz77_types_best[2] = {0, 0};
|
||||
float bit_costs_best[2] = {FLT_MAX, FLT_MAX};
|
||||
uint64_t bit_costs_best[2] = {WEBP_UINT64_MAX, WEBP_UINT64_MAX};
|
||||
VP8LHashChain hash_chain_box;
|
||||
VP8LBackwardRefs* const refs_tmp = &refs[do_no_cache ? 2 : 1];
|
||||
int status = 0;
|
||||
@ -932,7 +928,7 @@ static int GetBackwardReferences(int width, int height,
|
||||
for (lz77_type = 1; lz77_types_to_try;
|
||||
lz77_types_to_try &= ~lz77_type, lz77_type <<= 1) {
|
||||
int res = 0;
|
||||
float bit_cost = 0.f;
|
||||
uint64_t bit_cost = 0u;
|
||||
if ((lz77_types_to_try & lz77_type) == 0) continue;
|
||||
switch (lz77_type) {
|
||||
case kLZ77RLE:
|
||||
@ -1006,7 +1002,7 @@ static int GetBackwardReferences(int width, int height,
|
||||
const VP8LHashChain* const hash_chain_tmp =
|
||||
(lz77_types_best[i] == kLZ77Standard) ? hash_chain : &hash_chain_box;
|
||||
const int cache_bits = (i == 1) ? 0 : *cache_bits_best;
|
||||
float bit_cost_trace;
|
||||
uint64_t bit_cost_trace;
|
||||
if (!VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits,
|
||||
hash_chain_tmp, &refs[i],
|
||||
refs_tmp)) {
|
||||
|
4
thirdparty/libwebp/src/enc/config_enc.c
vendored
4
thirdparty/libwebp/src/enc/config_enc.c
vendored
@ -55,7 +55,6 @@ int WebPConfigInitInternal(WebPConfig* config,
|
||||
config->thread_level = 0;
|
||||
config->low_memory = 0;
|
||||
config->near_lossless = 100;
|
||||
config->use_delta_palette = 0;
|
||||
config->use_sharp_yuv = 0;
|
||||
|
||||
// TODO(skal): tune.
|
||||
@ -125,9 +124,6 @@ int WebPValidateConfig(const WebPConfig* config) {
|
||||
if (config->thread_level < 0 || config->thread_level > 1) return 0;
|
||||
if (config->low_memory < 0 || config->low_memory > 1) return 0;
|
||||
if (config->exact < 0 || config->exact > 1) return 0;
|
||||
if (config->use_delta_palette < 0 || config->use_delta_palette > 1) {
|
||||
return 0;
|
||||
}
|
||||
if (config->use_sharp_yuv < 0 || config->use_sharp_yuv > 1) return 0;
|
||||
|
||||
return 1;
|
||||
|
2
thirdparty/libwebp/src/enc/cost_enc.c
vendored
2
thirdparty/libwebp/src/enc/cost_enc.c
vendored
@ -19,7 +19,7 @@
|
||||
// For each given level, the following table gives the pattern of contexts to
|
||||
// use for coding it (in [][0]) as well as the bit value to use for each
|
||||
// context (in [][1]).
|
||||
const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2] = {
|
||||
static const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2] = {
|
||||
{0x001, 0x000}, {0x007, 0x001}, {0x00f, 0x005},
|
||||
{0x00f, 0x00d}, {0x033, 0x003}, {0x033, 0x003}, {0x033, 0x023},
|
||||
{0x033, 0x023}, {0x033, 0x023}, {0x033, 0x023}, {0x0d3, 0x013},
|
||||
|
1
thirdparty/libwebp/src/enc/cost_enc.h
vendored
1
thirdparty/libwebp/src/enc/cost_enc.h
vendored
@ -61,7 +61,6 @@ static WEBP_INLINE int VP8BitCost(int bit, uint8_t proba) {
|
||||
}
|
||||
|
||||
// Level cost calculations
|
||||
extern const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2];
|
||||
void VP8CalculateLevelCosts(VP8EncProba* const proba);
|
||||
static WEBP_INLINE int VP8LevelCost(const uint16_t* const table, int level) {
|
||||
return VP8LevelFixedCosts[level]
|
||||
|
385
thirdparty/libwebp/src/enc/histogram_enc.c
vendored
385
thirdparty/libwebp/src/enc/histogram_enc.c
vendored
@ -13,8 +13,7 @@
|
||||
#include "src/webp/config.h"
|
||||
#endif
|
||||
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
@ -23,8 +22,6 @@
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
#define MAX_BIT_COST FLT_MAX
|
||||
|
||||
// Number of partitions for the three dominant (literal, red and blue) symbol
|
||||
// costs.
|
||||
#define NUM_PARTITIONS 4
|
||||
@ -33,10 +30,18 @@
|
||||
// Maximum number of histograms allowed in greedy combining algorithm.
|
||||
#define MAX_HISTO_GREEDY 100
|
||||
|
||||
// Return the size of the histogram for a given cache_bits.
|
||||
static int GetHistogramSize(int cache_bits) {
|
||||
const int literal_size = VP8LHistogramNumCodes(cache_bits);
|
||||
const size_t total_size = sizeof(VP8LHistogram) + sizeof(int) * literal_size;
|
||||
assert(total_size <= (size_t)0x7fffffff);
|
||||
return (int)total_size;
|
||||
}
|
||||
|
||||
static void HistogramClear(VP8LHistogram* const p) {
|
||||
uint32_t* const literal = p->literal_;
|
||||
const int cache_bits = p->palette_code_bits_;
|
||||
const int histo_size = VP8LGetHistogramSize(cache_bits);
|
||||
const int histo_size = GetHistogramSize(cache_bits);
|
||||
memset(p, 0, histo_size);
|
||||
p->palette_code_bits_ = cache_bits;
|
||||
p->literal_ = literal;
|
||||
@ -54,20 +59,13 @@ static void HistogramCopy(const VP8LHistogram* const src,
|
||||
uint32_t* const dst_literal = dst->literal_;
|
||||
const int dst_cache_bits = dst->palette_code_bits_;
|
||||
const int literal_size = VP8LHistogramNumCodes(dst_cache_bits);
|
||||
const int histo_size = VP8LGetHistogramSize(dst_cache_bits);
|
||||
const int histo_size = GetHistogramSize(dst_cache_bits);
|
||||
assert(src->palette_code_bits_ == dst_cache_bits);
|
||||
memcpy(dst, src, histo_size);
|
||||
dst->literal_ = dst_literal;
|
||||
memcpy(dst->literal_, src->literal_, literal_size * sizeof(*dst->literal_));
|
||||
}
|
||||
|
||||
int VP8LGetHistogramSize(int cache_bits) {
|
||||
const int literal_size = VP8LHistogramNumCodes(cache_bits);
|
||||
const size_t total_size = sizeof(VP8LHistogram) + sizeof(int) * literal_size;
|
||||
assert(total_size <= (size_t)0x7fffffff);
|
||||
return (int)total_size;
|
||||
}
|
||||
|
||||
void VP8LFreeHistogram(VP8LHistogram* const histo) {
|
||||
WebPSafeFree(histo);
|
||||
}
|
||||
@ -102,17 +100,17 @@ void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits,
|
||||
HistogramClear(p);
|
||||
} else {
|
||||
p->trivial_symbol_ = 0;
|
||||
p->bit_cost_ = 0.;
|
||||
p->literal_cost_ = 0.;
|
||||
p->red_cost_ = 0.;
|
||||
p->blue_cost_ = 0.;
|
||||
p->bit_cost_ = 0;
|
||||
p->literal_cost_ = 0;
|
||||
p->red_cost_ = 0;
|
||||
p->blue_cost_ = 0;
|
||||
memset(p->is_used_, 0, sizeof(p->is_used_));
|
||||
}
|
||||
}
|
||||
|
||||
VP8LHistogram* VP8LAllocateHistogram(int cache_bits) {
|
||||
VP8LHistogram* histo = NULL;
|
||||
const int total_size = VP8LGetHistogramSize(cache_bits);
|
||||
const int total_size = GetHistogramSize(cache_bits);
|
||||
uint8_t* const memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
|
||||
if (memory == NULL) return NULL;
|
||||
histo = (VP8LHistogram*)memory;
|
||||
@ -126,7 +124,7 @@ VP8LHistogram* VP8LAllocateHistogram(int cache_bits) {
|
||||
static void HistogramSetResetPointers(VP8LHistogramSet* const set,
|
||||
int cache_bits) {
|
||||
int i;
|
||||
const int histo_size = VP8LGetHistogramSize(cache_bits);
|
||||
const int histo_size = GetHistogramSize(cache_bits);
|
||||
uint8_t* memory = (uint8_t*) (set->histograms);
|
||||
memory += set->max_size * sizeof(*set->histograms);
|
||||
for (i = 0; i < set->max_size; ++i) {
|
||||
@ -140,7 +138,7 @@ static void HistogramSetResetPointers(VP8LHistogramSet* const set,
|
||||
|
||||
// Returns the total size of the VP8LHistogramSet.
|
||||
static size_t HistogramSetTotalSize(int size, int cache_bits) {
|
||||
const int histo_size = VP8LGetHistogramSize(cache_bits);
|
||||
const int histo_size = GetHistogramSize(cache_bits);
|
||||
return (sizeof(VP8LHistogramSet) + size * (sizeof(VP8LHistogram*) +
|
||||
histo_size + WEBP_ALIGN_CST));
|
||||
}
|
||||
@ -230,8 +228,8 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
|
||||
// -----------------------------------------------------------------------------
|
||||
// Entropy-related functions.
|
||||
|
||||
static WEBP_INLINE float BitsEntropyRefine(const VP8LBitEntropy* entropy) {
|
||||
float mix;
|
||||
static WEBP_INLINE uint64_t BitsEntropyRefine(const VP8LBitEntropy* entropy) {
|
||||
uint64_t mix;
|
||||
if (entropy->nonzeros < 5) {
|
||||
if (entropy->nonzeros <= 1) {
|
||||
return 0;
|
||||
@ -240,67 +238,72 @@ static WEBP_INLINE float BitsEntropyRefine(const VP8LBitEntropy* entropy) {
|
||||
// Let's mix in a bit of entropy to favor good clustering when
|
||||
// distributions of these are combined.
|
||||
if (entropy->nonzeros == 2) {
|
||||
return 0.99f * entropy->sum + 0.01f * entropy->entropy;
|
||||
return DivRound(99 * ((uint64_t)entropy->sum << LOG_2_PRECISION_BITS) +
|
||||
entropy->entropy,
|
||||
100);
|
||||
}
|
||||
// No matter what the entropy says, we cannot be better than min_limit
|
||||
// with Huffman coding. I am mixing a bit of entropy into the
|
||||
// min_limit since it produces much better (~0.5 %) compression results
|
||||
// perhaps because of better entropy clustering.
|
||||
if (entropy->nonzeros == 3) {
|
||||
mix = 0.95f;
|
||||
mix = 950;
|
||||
} else {
|
||||
mix = 0.7f; // nonzeros == 4.
|
||||
mix = 700; // nonzeros == 4.
|
||||
}
|
||||
} else {
|
||||
mix = 0.627f;
|
||||
mix = 627;
|
||||
}
|
||||
|
||||
{
|
||||
float min_limit = 2.f * entropy->sum - entropy->max_val;
|
||||
min_limit = mix * min_limit + (1.f - mix) * entropy->entropy;
|
||||
uint64_t min_limit = (uint64_t)(2 * entropy->sum - entropy->max_val)
|
||||
<< LOG_2_PRECISION_BITS;
|
||||
min_limit =
|
||||
DivRound(mix * min_limit + (1000 - mix) * entropy->entropy, 1000);
|
||||
return (entropy->entropy < min_limit) ? min_limit : entropy->entropy;
|
||||
}
|
||||
}
|
||||
|
||||
float VP8LBitsEntropy(const uint32_t* const array, int n) {
|
||||
uint64_t VP8LBitsEntropy(const uint32_t* const array, int n) {
|
||||
VP8LBitEntropy entropy;
|
||||
VP8LBitsEntropyUnrefined(array, n, &entropy);
|
||||
|
||||
return BitsEntropyRefine(&entropy);
|
||||
}
|
||||
|
||||
static float InitialHuffmanCost(void) {
|
||||
static uint64_t InitialHuffmanCost(void) {
|
||||
// Small bias because Huffman code length is typically not stored in
|
||||
// full length.
|
||||
static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;
|
||||
static const float kSmallBias = 9.1f;
|
||||
return kHuffmanCodeOfHuffmanCodeSize - kSmallBias;
|
||||
static const uint64_t kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;
|
||||
// Subtract a bias of 9.1.
|
||||
return (kHuffmanCodeOfHuffmanCodeSize << LOG_2_PRECISION_BITS) -
|
||||
DivRound(91ll << LOG_2_PRECISION_BITS, 10);
|
||||
}
|
||||
|
||||
// Finalize the Huffman cost based on streak numbers and length type (<3 or >=3)
|
||||
static float FinalHuffmanCost(const VP8LStreaks* const stats) {
|
||||
// The constants in this function are experimental and got rounded from
|
||||
static uint64_t FinalHuffmanCost(const VP8LStreaks* const stats) {
|
||||
// The constants in this function are empirical and got rounded from
|
||||
// their original values in 1/8 when switched to 1/1024.
|
||||
float retval = InitialHuffmanCost();
|
||||
uint64_t retval = InitialHuffmanCost();
|
||||
// Second coefficient: Many zeros in the histogram are covered efficiently
|
||||
// by a run-length encode. Originally 2/8.
|
||||
retval += stats->counts[0] * 1.5625f + 0.234375f * stats->streaks[0][1];
|
||||
uint32_t retval_extra = stats->counts[0] * 1600 + 240 * stats->streaks[0][1];
|
||||
// Second coefficient: Constant values are encoded less efficiently, but still
|
||||
// RLE'ed. Originally 6/8.
|
||||
retval += stats->counts[1] * 2.578125f + 0.703125f * stats->streaks[1][1];
|
||||
retval_extra += stats->counts[1] * 2640 + 720 * stats->streaks[1][1];
|
||||
// 0s are usually encoded more efficiently than non-0s.
|
||||
// Originally 15/8.
|
||||
retval += 1.796875f * stats->streaks[0][0];
|
||||
retval_extra += 1840 * stats->streaks[0][0];
|
||||
// Originally 26/8.
|
||||
retval += 3.28125f * stats->streaks[1][0];
|
||||
return retval;
|
||||
retval_extra += 3360 * stats->streaks[1][0];
|
||||
return retval + ((uint64_t)retval_extra << (LOG_2_PRECISION_BITS - 10));
|
||||
}
|
||||
|
||||
// Get the symbol entropy for the distribution 'population'.
|
||||
// Set 'trivial_sym', if there's only one symbol present in the distribution.
|
||||
static float PopulationCost(const uint32_t* const population, int length,
|
||||
uint32_t* const trivial_sym,
|
||||
uint8_t* const is_used) {
|
||||
static uint64_t PopulationCost(const uint32_t* const population, int length,
|
||||
uint32_t* const trivial_sym,
|
||||
uint8_t* const is_used) {
|
||||
VP8LBitEntropy bit_entropy;
|
||||
VP8LStreaks stats;
|
||||
VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats);
|
||||
@ -316,10 +319,11 @@ static float PopulationCost(const uint32_t* const population, int length,
|
||||
|
||||
// trivial_at_end is 1 if the two histograms only have one element that is
|
||||
// non-zero: both the zero-th one, or both the last one.
|
||||
static WEBP_INLINE float GetCombinedEntropy(const uint32_t* const X,
|
||||
const uint32_t* const Y, int length,
|
||||
int is_X_used, int is_Y_used,
|
||||
int trivial_at_end) {
|
||||
static WEBP_INLINE uint64_t GetCombinedEntropy(const uint32_t* const X,
|
||||
const uint32_t* const Y,
|
||||
int length, int is_X_used,
|
||||
int is_Y_used,
|
||||
int trivial_at_end) {
|
||||
VP8LStreaks stats;
|
||||
if (trivial_at_end) {
|
||||
// This configuration is due to palettization that transforms an indexed
|
||||
@ -357,7 +361,7 @@ static WEBP_INLINE float GetCombinedEntropy(const uint32_t* const X,
|
||||
}
|
||||
|
||||
// Estimates the Entropy + Huffman + other block overhead size cost.
|
||||
float VP8LHistogramEstimateBits(VP8LHistogram* const p) {
|
||||
uint64_t VP8LHistogramEstimateBits(VP8LHistogram* const p) {
|
||||
return PopulationCost(p->literal_,
|
||||
VP8LHistogramNumCodes(p->palette_code_bits_), NULL,
|
||||
&p->is_used_[0]) +
|
||||
@ -366,27 +370,42 @@ float VP8LHistogramEstimateBits(VP8LHistogram* const p) {
|
||||
PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL, &p->is_used_[3]) +
|
||||
PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL,
|
||||
&p->is_used_[4]) +
|
||||
(float)VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES,
|
||||
NUM_LENGTH_CODES) +
|
||||
(float)VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
|
||||
((uint64_t)(VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES,
|
||||
NUM_LENGTH_CODES) +
|
||||
VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES))
|
||||
<< LOG_2_PRECISION_BITS);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Various histogram combine/cost-eval functions
|
||||
|
||||
static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
float cost_threshold, float* cost) {
|
||||
// Set a + b in b, saturating at WEBP_INT64_MAX.
|
||||
static WEBP_INLINE void SaturateAdd(uint64_t a, int64_t* b) {
|
||||
if (*b < 0 || (int64_t)a <= WEBP_INT64_MAX - *b) {
|
||||
*b += (int64_t)a;
|
||||
} else {
|
||||
*b = WEBP_INT64_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns 1 if the cost of the combined histogram is less than the threshold.
|
||||
// Otherwise returns 0 and the cost is invalid due to early bail-out.
|
||||
WEBP_NODISCARD static int GetCombinedHistogramEntropy(
|
||||
const VP8LHistogram* const a, const VP8LHistogram* const b,
|
||||
int64_t cost_threshold_in, uint64_t* cost) {
|
||||
const int palette_code_bits = a->palette_code_bits_;
|
||||
int trivial_at_end = 0;
|
||||
const uint64_t cost_threshold = (uint64_t)cost_threshold_in;
|
||||
assert(a->palette_code_bits_ == b->palette_code_bits_);
|
||||
*cost += GetCombinedEntropy(a->literal_, b->literal_,
|
||||
VP8LHistogramNumCodes(palette_code_bits),
|
||||
a->is_used_[0], b->is_used_[0], 0);
|
||||
*cost += (float)VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES,
|
||||
b->literal_ + NUM_LITERAL_CODES,
|
||||
NUM_LENGTH_CODES);
|
||||
if (*cost > cost_threshold) return 0;
|
||||
if (cost_threshold_in <= 0) return 0;
|
||||
*cost = GetCombinedEntropy(a->literal_, b->literal_,
|
||||
VP8LHistogramNumCodes(palette_code_bits),
|
||||
a->is_used_[0], b->is_used_[0], 0);
|
||||
*cost += (uint64_t)VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES,
|
||||
b->literal_ + NUM_LITERAL_CODES,
|
||||
NUM_LENGTH_CODES)
|
||||
<< LOG_2_PRECISION_BITS;
|
||||
if (*cost >= cost_threshold) return 0;
|
||||
|
||||
if (a->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM &&
|
||||
a->trivial_symbol_ == b->trivial_symbol_) {
|
||||
@ -401,27 +420,24 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
|
||||
}
|
||||
}
|
||||
|
||||
*cost +=
|
||||
GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES, a->is_used_[1],
|
||||
b->is_used_[1], trivial_at_end);
|
||||
if (*cost > cost_threshold) return 0;
|
||||
*cost += GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES,
|
||||
a->is_used_[1], b->is_used_[1], trivial_at_end);
|
||||
if (*cost >= cost_threshold) return 0;
|
||||
|
||||
*cost +=
|
||||
GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES, a->is_used_[2],
|
||||
b->is_used_[2], trivial_at_end);
|
||||
if (*cost > cost_threshold) return 0;
|
||||
*cost += GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES,
|
||||
a->is_used_[2], b->is_used_[2], trivial_at_end);
|
||||
if (*cost >= cost_threshold) return 0;
|
||||
|
||||
*cost +=
|
||||
GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES,
|
||||
a->is_used_[3], b->is_used_[3], trivial_at_end);
|
||||
if (*cost > cost_threshold) return 0;
|
||||
*cost += GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES,
|
||||
a->is_used_[3], b->is_used_[3], trivial_at_end);
|
||||
if (*cost >= cost_threshold) return 0;
|
||||
|
||||
*cost +=
|
||||
GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES,
|
||||
a->is_used_[4], b->is_used_[4], 0);
|
||||
*cost += (float)VP8LExtraCostCombined(a->distance_, b->distance_,
|
||||
NUM_DISTANCE_CODES);
|
||||
if (*cost > cost_threshold) return 0;
|
||||
*cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES,
|
||||
a->is_used_[4], b->is_used_[4], 0);
|
||||
*cost += (uint64_t)VP8LExtraCostCombined(a->distance_, b->distance_,
|
||||
NUM_DISTANCE_CODES)
|
||||
<< LOG_2_PRECISION_BITS;
|
||||
if (*cost >= cost_threshold) return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
@ -441,33 +457,39 @@ static WEBP_INLINE void HistogramAdd(const VP8LHistogram* const a,
|
||||
// Since the previous score passed is 'cost_threshold', we only need to compare
|
||||
// the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out
|
||||
// early.
|
||||
static float HistogramAddEval(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
VP8LHistogram* const out, float cost_threshold) {
|
||||
float cost = 0;
|
||||
const float sum_cost = a->bit_cost_ + b->bit_cost_;
|
||||
cost_threshold += sum_cost;
|
||||
// Returns 1 if the cost is less than the threshold.
|
||||
// Otherwise returns 0 and the cost is invalid due to early bail-out.
|
||||
WEBP_NODISCARD static int HistogramAddEval(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
VP8LHistogram* const out,
|
||||
int64_t cost_threshold) {
|
||||
uint64_t cost;
|
||||
const uint64_t sum_cost = a->bit_cost_ + b->bit_cost_;
|
||||
SaturateAdd(sum_cost, &cost_threshold);
|
||||
if (!GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) return 0;
|
||||
|
||||
if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) {
|
||||
HistogramAdd(a, b, out);
|
||||
out->bit_cost_ = cost;
|
||||
out->palette_code_bits_ = a->palette_code_bits_;
|
||||
}
|
||||
|
||||
return cost - sum_cost;
|
||||
HistogramAdd(a, b, out);
|
||||
out->bit_cost_ = cost;
|
||||
out->palette_code_bits_ = a->palette_code_bits_;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Same as HistogramAddEval(), except that the resulting histogram
|
||||
// is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit
|
||||
// the term C(b) which is constant over all the evaluations.
|
||||
static float HistogramAddThresh(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
float cost_threshold) {
|
||||
float cost;
|
||||
// Returns 1 if the cost is less than the threshold.
|
||||
// Otherwise returns 0 and the cost is invalid due to early bail-out.
|
||||
WEBP_NODISCARD static int HistogramAddThresh(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
int64_t cost_threshold,
|
||||
int64_t* cost_out) {
|
||||
uint64_t cost;
|
||||
assert(a != NULL && b != NULL);
|
||||
cost = -a->bit_cost_;
|
||||
GetCombinedHistogramEntropy(a, b, cost_threshold, &cost);
|
||||
return cost;
|
||||
SaturateAdd(a->bit_cost_, &cost_threshold);
|
||||
if (!GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) return 0;
|
||||
|
||||
*cost_out = (int64_t)cost - (int64_t)a->bit_cost_;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
@ -475,21 +497,21 @@ static float HistogramAddThresh(const VP8LHistogram* const a,
|
||||
// The structure to keep track of cost range for the three dominant entropy
|
||||
// symbols.
|
||||
typedef struct {
|
||||
float literal_max_;
|
||||
float literal_min_;
|
||||
float red_max_;
|
||||
float red_min_;
|
||||
float blue_max_;
|
||||
float blue_min_;
|
||||
uint64_t literal_max_;
|
||||
uint64_t literal_min_;
|
||||
uint64_t red_max_;
|
||||
uint64_t red_min_;
|
||||
uint64_t blue_max_;
|
||||
uint64_t blue_min_;
|
||||
} DominantCostRange;
|
||||
|
||||
static void DominantCostRangeInit(DominantCostRange* const c) {
|
||||
c->literal_max_ = 0.;
|
||||
c->literal_min_ = MAX_BIT_COST;
|
||||
c->red_max_ = 0.;
|
||||
c->red_min_ = MAX_BIT_COST;
|
||||
c->blue_max_ = 0.;
|
||||
c->blue_min_ = MAX_BIT_COST;
|
||||
c->literal_max_ = 0;
|
||||
c->literal_min_ = WEBP_UINT64_MAX;
|
||||
c->red_max_ = 0;
|
||||
c->red_min_ = WEBP_UINT64_MAX;
|
||||
c->blue_max_ = 0;
|
||||
c->blue_min_ = WEBP_UINT64_MAX;
|
||||
}
|
||||
|
||||
static void UpdateDominantCostRange(
|
||||
@ -504,15 +526,18 @@ static void UpdateDominantCostRange(
|
||||
|
||||
static void UpdateHistogramCost(VP8LHistogram* const h) {
|
||||
uint32_t alpha_sym, red_sym, blue_sym;
|
||||
const float alpha_cost =
|
||||
const uint64_t alpha_cost =
|
||||
PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym, &h->is_used_[3]);
|
||||
const float distance_cost =
|
||||
const uint64_t distance_cost =
|
||||
PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL, &h->is_used_[4]) +
|
||||
(float)VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
|
||||
((uint64_t)VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES)
|
||||
<< LOG_2_PRECISION_BITS);
|
||||
const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
|
||||
h->literal_cost_ =
|
||||
PopulationCost(h->literal_, num_codes, NULL, &h->is_used_[0]) +
|
||||
(float)VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES);
|
||||
((uint64_t)VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES,
|
||||
NUM_LENGTH_CODES)
|
||||
<< LOG_2_PRECISION_BITS);
|
||||
h->red_cost_ =
|
||||
PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym, &h->is_used_[1]);
|
||||
h->blue_cost_ =
|
||||
@ -527,10 +552,10 @@ static void UpdateHistogramCost(VP8LHistogram* const h) {
|
||||
}
|
||||
}
|
||||
|
||||
static int GetBinIdForEntropy(float min, float max, float val) {
|
||||
const float range = max - min;
|
||||
if (range > 0.) {
|
||||
const float delta = val - min;
|
||||
static int GetBinIdForEntropy(uint64_t min, uint64_t max, uint64_t val) {
|
||||
const uint64_t range = max - min;
|
||||
if (range > 0) {
|
||||
const uint64_t delta = val - min;
|
||||
return (int)((NUM_PARTITIONS - 1e-6) * delta / range);
|
||||
} else {
|
||||
return 0;
|
||||
@ -576,11 +601,11 @@ static void HistogramBuild(
|
||||
}
|
||||
|
||||
// Copies the histograms and computes its bit_cost.
|
||||
static const uint16_t kInvalidHistogramSymbol = (uint16_t)(-1);
|
||||
static const uint32_t kInvalidHistogramSymbol = (uint32_t)(-1);
|
||||
static void HistogramCopyAndAnalyze(VP8LHistogramSet* const orig_histo,
|
||||
VP8LHistogramSet* const image_histo,
|
||||
int* const num_used,
|
||||
uint16_t* const histogram_symbols) {
|
||||
uint32_t* const histogram_symbols) {
|
||||
int i, cluster_id;
|
||||
int num_used_orig = *num_used;
|
||||
VP8LHistogram** const orig_histograms = orig_histo->histograms;
|
||||
@ -639,11 +664,12 @@ static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo,
|
||||
|
||||
// Merges some histograms with same bin_id together if it's advantageous.
|
||||
// Sets the remaining histograms to NULL.
|
||||
// 'combine_cost_factor' has to be divided by 100.
|
||||
static void HistogramCombineEntropyBin(
|
||||
VP8LHistogramSet* const image_histo, int* num_used,
|
||||
const uint16_t* const clusters, uint16_t* const cluster_mappings,
|
||||
const uint32_t* const clusters, uint16_t* const cluster_mappings,
|
||||
VP8LHistogram* cur_combo, const uint16_t* const bin_map, int num_bins,
|
||||
float combine_cost_factor, int low_effort) {
|
||||
int32_t combine_cost_factor, int low_effort) {
|
||||
VP8LHistogram** const histograms = image_histo->histograms;
|
||||
int idx;
|
||||
struct {
|
||||
@ -673,11 +699,11 @@ static void HistogramCombineEntropyBin(
|
||||
cluster_mappings[clusters[idx]] = clusters[first];
|
||||
} else {
|
||||
// try to merge #idx into #first (both share the same bin_id)
|
||||
const float bit_cost = histograms[idx]->bit_cost_;
|
||||
const float bit_cost_thresh = -bit_cost * combine_cost_factor;
|
||||
const float curr_cost_diff = HistogramAddEval(
|
||||
histograms[first], histograms[idx], cur_combo, bit_cost_thresh);
|
||||
if (curr_cost_diff < bit_cost_thresh) {
|
||||
const uint64_t bit_cost = histograms[idx]->bit_cost_;
|
||||
const int64_t bit_cost_thresh =
|
||||
-DivRound((int64_t)bit_cost * combine_cost_factor, 100);
|
||||
if (HistogramAddEval(histograms[first], histograms[idx], cur_combo,
|
||||
bit_cost_thresh)) {
|
||||
// Try to merge two histograms only if the combo is a trivial one or
|
||||
// the two candidate histograms are already non-trivial.
|
||||
// For some images, 'try_combine' turns out to be false for a lot of
|
||||
@ -724,8 +750,8 @@ static uint32_t MyRand(uint32_t* const seed) {
|
||||
typedef struct {
|
||||
int idx1;
|
||||
int idx2;
|
||||
float cost_diff;
|
||||
float cost_combo;
|
||||
int64_t cost_diff;
|
||||
uint64_t cost_combo;
|
||||
} HistogramPair;
|
||||
|
||||
typedef struct {
|
||||
@ -765,7 +791,7 @@ static void HistoQueuePopPair(HistoQueue* const histo_queue,
|
||||
// Check whether a pair in the queue should be updated as head or not.
|
||||
static void HistoQueueUpdateHead(HistoQueue* const histo_queue,
|
||||
HistogramPair* const pair) {
|
||||
assert(pair->cost_diff < 0.);
|
||||
assert(pair->cost_diff < 0);
|
||||
assert(pair >= histo_queue->queue &&
|
||||
pair < (histo_queue->queue + histo_queue->size));
|
||||
assert(histo_queue->size > 0);
|
||||
@ -778,29 +804,35 @@ static void HistoQueueUpdateHead(HistoQueue* const histo_queue,
|
||||
}
|
||||
|
||||
// Update the cost diff and combo of a pair of histograms. This needs to be
|
||||
// called when the the histograms have been merged with a third one.
|
||||
static void HistoQueueUpdatePair(const VP8LHistogram* const h1,
|
||||
const VP8LHistogram* const h2, float threshold,
|
||||
HistogramPair* const pair) {
|
||||
const float sum_cost = h1->bit_cost_ + h2->bit_cost_;
|
||||
pair->cost_combo = 0.;
|
||||
GetCombinedHistogramEntropy(h1, h2, sum_cost + threshold, &pair->cost_combo);
|
||||
pair->cost_diff = pair->cost_combo - sum_cost;
|
||||
// called when the histograms have been merged with a third one.
|
||||
// Returns 1 if the cost diff is less than the threshold.
|
||||
// Otherwise returns 0 and the cost is invalid due to early bail-out.
|
||||
WEBP_NODISCARD static int HistoQueueUpdatePair(const VP8LHistogram* const h1,
|
||||
const VP8LHistogram* const h2,
|
||||
int64_t cost_threshold,
|
||||
HistogramPair* const pair) {
|
||||
const int64_t sum_cost = h1->bit_cost_ + h2->bit_cost_;
|
||||
SaturateAdd(sum_cost, &cost_threshold);
|
||||
if (!GetCombinedHistogramEntropy(h1, h2, cost_threshold, &pair->cost_combo)) {
|
||||
return 0;
|
||||
}
|
||||
pair->cost_diff = (int64_t)pair->cost_combo - sum_cost;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Create a pair from indices "idx1" and "idx2" provided its cost
|
||||
// is inferior to "threshold", a negative entropy.
|
||||
// It returns the cost of the pair, or 0. if it superior to threshold.
|
||||
static float HistoQueuePush(HistoQueue* const histo_queue,
|
||||
VP8LHistogram** const histograms, int idx1,
|
||||
int idx2, float threshold) {
|
||||
// It returns the cost of the pair, or 0 if it superior to threshold.
|
||||
static int64_t HistoQueuePush(HistoQueue* const histo_queue,
|
||||
VP8LHistogram** const histograms, int idx1,
|
||||
int idx2, int64_t threshold) {
|
||||
const VP8LHistogram* h1;
|
||||
const VP8LHistogram* h2;
|
||||
HistogramPair pair;
|
||||
|
||||
// Stop here if the queue is full.
|
||||
if (histo_queue->size == histo_queue->max_size) return 0.;
|
||||
assert(threshold <= 0.);
|
||||
if (histo_queue->size == histo_queue->max_size) return 0;
|
||||
assert(threshold <= 0);
|
||||
if (idx1 > idx2) {
|
||||
const int tmp = idx2;
|
||||
idx2 = idx1;
|
||||
@ -811,10 +843,8 @@ static float HistoQueuePush(HistoQueue* const histo_queue,
|
||||
h1 = histograms[idx1];
|
||||
h2 = histograms[idx2];
|
||||
|
||||
HistoQueueUpdatePair(h1, h2, threshold, &pair);
|
||||
|
||||
// Do not even consider the pair if it does not improve the entropy.
|
||||
if (pair.cost_diff >= threshold) return 0.;
|
||||
if (!HistoQueueUpdatePair(h1, h2, threshold, &pair)) return 0;
|
||||
|
||||
histo_queue->queue[histo_queue->size++] = pair;
|
||||
HistoQueueUpdateHead(histo_queue, &histo_queue->queue[histo_queue->size - 1]);
|
||||
@ -851,7 +881,7 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo,
|
||||
for (j = i + 1; j < image_histo_size; ++j) {
|
||||
// Initialize queue.
|
||||
if (image_histo->histograms[j] == NULL) continue;
|
||||
HistoQueuePush(&histo_queue, histograms, i, j, 0.);
|
||||
HistoQueuePush(&histo_queue, histograms, i, j, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -879,7 +909,7 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo,
|
||||
// Push new pairs formed with combined histogram to the queue.
|
||||
for (i = 0; i < image_histo->size; ++i) {
|
||||
if (i == idx1 || image_histo->histograms[i] == NULL) continue;
|
||||
HistoQueuePush(&histo_queue, image_histo->histograms, idx1, i, 0.);
|
||||
HistoQueuePush(&histo_queue, image_histo->histograms, idx1, i, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -937,8 +967,8 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
|
||||
++tries_with_no_success < num_tries_no_success;
|
||||
++iter) {
|
||||
int* mapping_index;
|
||||
float best_cost =
|
||||
(histo_queue.size == 0) ? 0.f : histo_queue.queue[0].cost_diff;
|
||||
int64_t best_cost =
|
||||
(histo_queue.size == 0) ? 0 : histo_queue.queue[0].cost_diff;
|
||||
int best_idx1 = -1, best_idx2 = 1;
|
||||
const uint32_t rand_range = (*num_used - 1) * (*num_used);
|
||||
// (*num_used) / 2 was chosen empirically. Less means faster but worse
|
||||
@ -947,7 +977,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
|
||||
|
||||
// Pick random samples.
|
||||
for (j = 0; *num_used >= 2 && j < num_tries; ++j) {
|
||||
float curr_cost;
|
||||
int64_t curr_cost;
|
||||
// Choose two different histograms at random and try to combine them.
|
||||
const uint32_t tmp = MyRand(&seed) % rand_range;
|
||||
uint32_t idx1 = tmp / (*num_used - 1);
|
||||
@ -1012,8 +1042,8 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
|
||||
}
|
||||
if (do_eval) {
|
||||
// Re-evaluate the cost of an updated pair.
|
||||
HistoQueueUpdatePair(histograms[p->idx1], histograms[p->idx2], 0., p);
|
||||
if (p->cost_diff >= 0.) {
|
||||
if (!HistoQueueUpdatePair(histograms[p->idx1], histograms[p->idx2], 0,
|
||||
p)) {
|
||||
HistoQueuePopPair(&histo_queue, p);
|
||||
continue;
|
||||
}
|
||||
@ -1040,7 +1070,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
|
||||
// Note: we assume that out[]->bit_cost_ is already up-to-date.
|
||||
static void HistogramRemap(const VP8LHistogramSet* const in,
|
||||
VP8LHistogramSet* const out,
|
||||
uint16_t* const symbols) {
|
||||
uint32_t* const symbols) {
|
||||
int i;
|
||||
VP8LHistogram** const in_histo = in->histograms;
|
||||
VP8LHistogram** const out_histo = out->histograms;
|
||||
@ -1049,7 +1079,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
|
||||
if (out_size > 1) {
|
||||
for (i = 0; i < in_size; ++i) {
|
||||
int best_out = 0;
|
||||
float best_bits = MAX_BIT_COST;
|
||||
int64_t best_bits = WEBP_INT64_MAX;
|
||||
int k;
|
||||
if (in_histo[i] == NULL) {
|
||||
// Arbitrarily set to the previous value if unused to help future LZ77.
|
||||
@ -1057,9 +1087,9 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
|
||||
continue;
|
||||
}
|
||||
for (k = 0; k < out_size; ++k) {
|
||||
float cur_bits;
|
||||
cur_bits = HistogramAddThresh(out_histo[k], in_histo[i], best_bits);
|
||||
if (k == 0 || cur_bits < best_bits) {
|
||||
int64_t cur_bits;
|
||||
if (HistogramAddThresh(out_histo[k], in_histo[i], best_bits,
|
||||
&cur_bits)) {
|
||||
best_bits = cur_bits;
|
||||
best_out = k;
|
||||
}
|
||||
@ -1085,13 +1115,13 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
|
||||
}
|
||||
}
|
||||
|
||||
static float GetCombineCostFactor(int histo_size, int quality) {
|
||||
float combine_cost_factor = 0.16f;
|
||||
static int32_t GetCombineCostFactor(int histo_size, int quality) {
|
||||
int32_t combine_cost_factor = 16;
|
||||
if (quality < 90) {
|
||||
if (histo_size > 256) combine_cost_factor /= 2.f;
|
||||
if (histo_size > 512) combine_cost_factor /= 2.f;
|
||||
if (histo_size > 1024) combine_cost_factor /= 2.f;
|
||||
if (quality <= 50) combine_cost_factor /= 2.f;
|
||||
if (histo_size > 256) combine_cost_factor /= 2;
|
||||
if (histo_size > 512) combine_cost_factor /= 2;
|
||||
if (histo_size > 1024) combine_cost_factor /= 2;
|
||||
if (quality <= 50) combine_cost_factor /= 2;
|
||||
}
|
||||
return combine_cost_factor;
|
||||
}
|
||||
@ -1101,10 +1131,10 @@ static float GetCombineCostFactor(int histo_size, int quality) {
|
||||
// assign the smallest possible clusters values.
|
||||
static void OptimizeHistogramSymbols(const VP8LHistogramSet* const set,
|
||||
uint16_t* const cluster_mappings,
|
||||
int num_clusters,
|
||||
uint32_t num_clusters,
|
||||
uint16_t* const cluster_mappings_tmp,
|
||||
uint16_t* const symbols) {
|
||||
int i, cluster_max;
|
||||
uint32_t* const symbols) {
|
||||
uint32_t i, cluster_max;
|
||||
int do_continue = 1;
|
||||
// First, assign the lowest cluster to each pixel.
|
||||
while (do_continue) {
|
||||
@ -1128,7 +1158,7 @@ static void OptimizeHistogramSymbols(const VP8LHistogramSet* const set,
|
||||
set->max_size * sizeof(*cluster_mappings_tmp));
|
||||
assert(cluster_mappings[0] == 0);
|
||||
// Re-map the ids.
|
||||
for (i = 0; i < set->max_size; ++i) {
|
||||
for (i = 0; i < (uint32_t)set->max_size; ++i) {
|
||||
int cluster;
|
||||
if (symbols[i] == kInvalidHistogramSymbol) continue;
|
||||
cluster = cluster_mappings[symbols[i]];
|
||||
@ -1142,7 +1172,7 @@ static void OptimizeHistogramSymbols(const VP8LHistogramSet* const set,
|
||||
|
||||
// Make sure all cluster values are used.
|
||||
cluster_max = 0;
|
||||
for (i = 0; i < set->max_size; ++i) {
|
||||
for (i = 0; i < (uint32_t)set->max_size; ++i) {
|
||||
if (symbols[i] == kInvalidHistogramSymbol) continue;
|
||||
if (symbols[i] <= cluster_max) continue;
|
||||
++cluster_max;
|
||||
@ -1165,7 +1195,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
||||
int low_effort, int histogram_bits, int cache_bits,
|
||||
VP8LHistogramSet* const image_histo,
|
||||
VP8LHistogram* const tmp_histo,
|
||||
uint16_t* const histogram_symbols,
|
||||
uint32_t* const histogram_symbols,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent) {
|
||||
const int histo_xsize =
|
||||
@ -1181,7 +1211,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
||||
const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE;
|
||||
int entropy_combine;
|
||||
uint16_t* const map_tmp =
|
||||
WebPSafeMalloc(2 * image_histo_raw_size, sizeof(*map_tmp));
|
||||
(uint16_t*)WebPSafeMalloc(2 * image_histo_raw_size, sizeof(*map_tmp));
|
||||
uint16_t* const cluster_mappings = map_tmp + image_histo_raw_size;
|
||||
int num_used = image_histo_raw_size;
|
||||
if (orig_histo == NULL || map_tmp == NULL) {
|
||||
@ -1201,7 +1231,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
||||
|
||||
if (entropy_combine) {
|
||||
uint16_t* const bin_map = map_tmp;
|
||||
const float combine_cost_factor =
|
||||
const int32_t combine_cost_factor =
|
||||
GetCombineCostFactor(image_histo_raw_size, quality);
|
||||
const uint32_t num_clusters = num_used;
|
||||
|
||||
@ -1217,9 +1247,10 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
||||
// Don't combine the histograms using stochastic and greedy heuristics for
|
||||
// low-effort compression mode.
|
||||
if (!low_effort || !entropy_combine) {
|
||||
const float x = quality / 100.f;
|
||||
// cubic ramp between 1 and MAX_HISTO_GREEDY:
|
||||
const int threshold_size = (int)(1 + (x * x * x) * (MAX_HISTO_GREEDY - 1));
|
||||
const int threshold_size =
|
||||
(int)(1 + DivRound(quality * quality * quality * (MAX_HISTO_GREEDY - 1),
|
||||
100 * 100 * 100));
|
||||
int do_greedy;
|
||||
if (!HistogramCombineStochastic(image_histo, &num_used, threshold_size,
|
||||
&do_greedy)) {
|
||||
|
17
thirdparty/libwebp/src/enc/histogram_enc.h
vendored
17
thirdparty/libwebp/src/enc/histogram_enc.h
vendored
@ -40,10 +40,10 @@ typedef struct {
|
||||
int palette_code_bits_;
|
||||
uint32_t trivial_symbol_; // True, if histograms for Red, Blue & Alpha
|
||||
// literal symbols are single valued.
|
||||
float bit_cost_; // cached value of bit cost.
|
||||
float literal_cost_; // Cached values of dominant entropy costs:
|
||||
float red_cost_; // literal, red & blue.
|
||||
float blue_cost_;
|
||||
uint64_t bit_cost_; // cached value of bit cost.
|
||||
uint64_t literal_cost_; // Cached values of dominant entropy costs:
|
||||
uint64_t red_cost_; // literal, red & blue.
|
||||
uint64_t blue_cost_;
|
||||
uint8_t is_used_[5]; // 5 for literal, red, blue, alpha, distance
|
||||
} VP8LHistogram;
|
||||
|
||||
@ -64,9 +64,6 @@ void VP8LHistogramCreate(VP8LHistogram* const p,
|
||||
const VP8LBackwardRefs* const refs,
|
||||
int palette_code_bits);
|
||||
|
||||
// Return the size of the histogram for a given cache_bits.
|
||||
int VP8LGetHistogramSize(int cache_bits);
|
||||
|
||||
// Set the palette_code_bits and reset the stats.
|
||||
// If init_arrays is true, the arrays are also filled with 0's.
|
||||
void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits,
|
||||
@ -112,16 +109,16 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
||||
int low_effort, int histogram_bits, int cache_bits,
|
||||
VP8LHistogramSet* const image_histo,
|
||||
VP8LHistogram* const tmp_histo,
|
||||
uint16_t* const histogram_symbols,
|
||||
uint32_t* const histogram_symbols,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent);
|
||||
|
||||
// Returns the entropy for the symbols in the input array.
|
||||
float VP8LBitsEntropy(const uint32_t* const array, int n);
|
||||
uint64_t VP8LBitsEntropy(const uint32_t* const array, int n);
|
||||
|
||||
// Estimate how many bits the combined entropy of literals and distance
|
||||
// approximately maps to.
|
||||
float VP8LHistogramEstimateBits(VP8LHistogram* const p);
|
||||
uint64_t VP8LHistogramEstimateBits(VP8LHistogram* const p);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
13
thirdparty/libwebp/src/enc/iterator_enc.c
vendored
13
thirdparty/libwebp/src/enc/iterator_enc.c
vendored
@ -13,6 +13,7 @@
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -54,7 +55,8 @@ void VP8IteratorSetRow(VP8EncIterator* const it, int y) {
|
||||
InitLeft(it);
|
||||
}
|
||||
|
||||
void VP8IteratorReset(VP8EncIterator* const it) {
|
||||
// restart a scan
|
||||
static void VP8IteratorReset(VP8EncIterator* const it) {
|
||||
VP8Encoder* const enc = it->enc_;
|
||||
VP8IteratorSetRow(it, 0);
|
||||
VP8IteratorSetCountDown(it, enc->mb_w_ * enc->mb_h_); // default
|
||||
@ -424,6 +426,15 @@ void VP8IteratorStartI4(VP8EncIterator* const it) {
|
||||
it->i4_boundary_[17 + i] = it->i4_boundary_[17 + 15];
|
||||
}
|
||||
}
|
||||
#if WEBP_AARCH64 && BPS == 32 && defined(WEBP_MSAN)
|
||||
// Intra4Preds_NEON() reads 3 uninitialized bytes from i4_boundary_ when top
|
||||
// is positioned at offset 29 (VP8TopLeftI4[3]). The values are not used
|
||||
// meaningfully, but due to limitations in MemorySanitizer related to
|
||||
// modeling of tbl instructions, a warning will be issued. This can be
|
||||
// removed if MSan is updated to support the instructions. See
|
||||
// https://issues.webmproject.org/372109644.
|
||||
memset(it->i4_boundary_ + sizeof(it->i4_boundary_) - 3, 0xaa, 3);
|
||||
#endif
|
||||
VP8IteratorNzToBytes(it); // import the non-zero context
|
||||
}
|
||||
|
||||
|
631
thirdparty/libwebp/src/enc/predictor_enc.c
vendored
631
thirdparty/libwebp/src/enc/predictor_enc.c
vendored
@ -14,53 +14,75 @@
|
||||
// Urvang Joshi (urvang@google.com)
|
||||
// Vincent Rabaud (vrabaud@google.com)
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "src/dsp/lossless.h"
|
||||
#include "src/dsp/lossless_common.h"
|
||||
#include "src/enc/vp8i_enc.h"
|
||||
#include "src/enc/vp8li_enc.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "src/webp/encode.h"
|
||||
#include "src/webp/format_constants.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
#define MAX_DIFF_COST (1e30f)
|
||||
|
||||
static const float kSpatialPredictorBias = 15.f;
|
||||
#define HISTO_SIZE (4 * 256)
|
||||
static const int64_t kSpatialPredictorBias = 15ll << LOG_2_PRECISION_BITS;
|
||||
static const int kPredLowEffort = 11;
|
||||
static const uint32_t kMaskAlpha = 0xff000000;
|
||||
static const int kNumPredModes = 14;
|
||||
|
||||
// Mostly used to reduce code size + readability
|
||||
static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
|
||||
static WEBP_INLINE int GetMax(int a, int b) { return (a < b) ? b : a; }
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Methods to calculate Entropy (Shannon).
|
||||
|
||||
static float PredictionCostSpatial(const int counts[256], int weight_0,
|
||||
float exp_val) {
|
||||
// Compute a bias for prediction entropy using a global heuristic to favor
|
||||
// values closer to 0. Hence the final negative sign.
|
||||
// 'exp_val' has a scaling factor of 1/100.
|
||||
static int64_t PredictionCostBias(const uint32_t counts[256], uint64_t weight_0,
|
||||
uint64_t exp_val) {
|
||||
const int significant_symbols = 256 >> 4;
|
||||
const float exp_decay_factor = 0.6f;
|
||||
float bits = (float)weight_0 * counts[0];
|
||||
const uint64_t exp_decay_factor = 6; // has a scaling factor of 1/10
|
||||
uint64_t bits = (weight_0 * counts[0]) << LOG_2_PRECISION_BITS;
|
||||
int i;
|
||||
exp_val <<= LOG_2_PRECISION_BITS;
|
||||
for (i = 1; i < significant_symbols; ++i) {
|
||||
bits += exp_val * (counts[i] + counts[256 - i]);
|
||||
exp_val *= exp_decay_factor;
|
||||
bits += DivRound(exp_val * (counts[i] + counts[256 - i]), 100);
|
||||
exp_val = DivRound(exp_decay_factor * exp_val, 10);
|
||||
}
|
||||
return (float)(-0.1 * bits);
|
||||
return -DivRound((int64_t)bits, 10);
|
||||
}
|
||||
|
||||
static float PredictionCostSpatialHistogram(const int accumulated[4][256],
|
||||
const int tile[4][256]) {
|
||||
static int64_t PredictionCostSpatialHistogram(
|
||||
const uint32_t accumulated[HISTO_SIZE], const uint32_t tile[HISTO_SIZE],
|
||||
int mode, int left_mode, int above_mode) {
|
||||
int i;
|
||||
float retval = 0.f;
|
||||
int64_t retval = 0;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
const float kExpValue = 0.94f;
|
||||
retval += PredictionCostSpatial(tile[i], 1, kExpValue);
|
||||
retval += VP8LCombinedShannonEntropy(tile[i], accumulated[i]);
|
||||
const uint64_t kExpValue = 94;
|
||||
retval += PredictionCostBias(&tile[i * 256], 1, kExpValue);
|
||||
// Compute the new cost if 'tile' is added to 'accumulate' but also add the
|
||||
// cost of the current histogram to guide the spatial predictor selection.
|
||||
// Basically, favor low entropy, locally and globally.
|
||||
retval += (int64_t)VP8LCombinedShannonEntropy(&tile[i * 256],
|
||||
&accumulated[i * 256]);
|
||||
}
|
||||
return (float)retval;
|
||||
// Favor keeping the areas locally similar.
|
||||
if (mode == left_mode) retval -= kSpatialPredictorBias;
|
||||
if (mode == above_mode) retval -= kSpatialPredictorBias;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {
|
||||
++histo_argb[0][argb >> 24];
|
||||
++histo_argb[1][(argb >> 16) & 0xff];
|
||||
++histo_argb[2][(argb >> 8) & 0xff];
|
||||
++histo_argb[3][argb & 0xff];
|
||||
static WEBP_INLINE void UpdateHisto(uint32_t histo_argb[HISTO_SIZE],
|
||||
uint32_t argb) {
|
||||
++histo_argb[0 * 256 + (argb >> 24)];
|
||||
++histo_argb[1 * 256 + ((argb >> 16) & 0xff)];
|
||||
++histo_argb[2 * 256 + ((argb >> 8) & 0xff)];
|
||||
++histo_argb[3 * 256 + (argb & 0xff)];
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -91,8 +113,6 @@ static WEBP_INLINE void PredictBatch(int mode, int x_start, int y,
|
||||
}
|
||||
|
||||
#if (WEBP_NEAR_LOSSLESS == 1)
|
||||
static WEBP_INLINE int GetMax(int a, int b) { return (a < b) ? b : a; }
|
||||
|
||||
static int MaxDiffBetweenPixels(uint32_t p1, uint32_t p2) {
|
||||
const int diff_a = abs((int)(p1 >> 24) - (int)(p2 >> 24));
|
||||
const int diff_r = abs((int)((p1 >> 16) & 0xff) - (int)((p2 >> 16) & 0xff));
|
||||
@ -291,23 +311,80 @@ static WEBP_INLINE void GetResidual(
|
||||
}
|
||||
}
|
||||
|
||||
// Returns best predictor and updates the accumulated histogram.
|
||||
// Accessors to residual histograms.
|
||||
static WEBP_INLINE uint32_t* GetHistoArgb(uint32_t* const all_histos,
|
||||
int subsampling_index, int mode) {
|
||||
return &all_histos[(subsampling_index * kNumPredModes + mode) * HISTO_SIZE];
|
||||
}
|
||||
|
||||
static WEBP_INLINE const uint32_t* GetHistoArgbConst(
|
||||
const uint32_t* const all_histos, int subsampling_index, int mode) {
|
||||
return &all_histos[subsampling_index * kNumPredModes * HISTO_SIZE +
|
||||
mode * HISTO_SIZE];
|
||||
}
|
||||
|
||||
// Accessors to accumulated residual histogram.
|
||||
static WEBP_INLINE uint32_t* GetAccumulatedHisto(uint32_t* all_accumulated,
|
||||
int subsampling_index) {
|
||||
return &all_accumulated[subsampling_index * HISTO_SIZE];
|
||||
}
|
||||
|
||||
// Find and store the best predictor for a tile at subsampling
|
||||
// 'subsampling_index'.
|
||||
static void GetBestPredictorForTile(const uint32_t* const all_argb,
|
||||
int subsampling_index, int tile_x,
|
||||
int tile_y, int tiles_per_row,
|
||||
uint32_t* all_accumulated_argb,
|
||||
uint32_t** const all_modes,
|
||||
uint32_t* const all_pred_histos) {
|
||||
uint32_t* const accumulated_argb =
|
||||
GetAccumulatedHisto(all_accumulated_argb, subsampling_index);
|
||||
uint32_t* const modes = all_modes[subsampling_index];
|
||||
uint32_t* const pred_histos =
|
||||
&all_pred_histos[subsampling_index * kNumPredModes];
|
||||
// Prediction modes of the left and above neighbor tiles.
|
||||
const int left_mode =
|
||||
(tile_x > 0) ? (modes[tile_y * tiles_per_row + tile_x - 1] >> 8) & 0xff
|
||||
: 0xff;
|
||||
const int above_mode =
|
||||
(tile_y > 0) ? (modes[(tile_y - 1) * tiles_per_row + tile_x] >> 8) & 0xff
|
||||
: 0xff;
|
||||
int mode;
|
||||
int64_t best_diff = WEBP_INT64_MAX;
|
||||
uint32_t best_mode = 0;
|
||||
const uint32_t* best_histo =
|
||||
GetHistoArgbConst(all_argb, /*subsampling_index=*/0, best_mode);
|
||||
for (mode = 0; mode < kNumPredModes; ++mode) {
|
||||
const uint32_t* const histo_argb =
|
||||
GetHistoArgbConst(all_argb, subsampling_index, mode);
|
||||
const int64_t cur_diff = PredictionCostSpatialHistogram(
|
||||
accumulated_argb, histo_argb, mode, left_mode, above_mode);
|
||||
|
||||
if (cur_diff < best_diff) {
|
||||
best_histo = histo_argb;
|
||||
best_diff = cur_diff;
|
||||
best_mode = mode;
|
||||
}
|
||||
}
|
||||
// Update the accumulated histogram.
|
||||
VP8LAddVectorEq(best_histo, accumulated_argb, HISTO_SIZE);
|
||||
modes[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (best_mode << 8);
|
||||
++pred_histos[best_mode];
|
||||
}
|
||||
|
||||
// Computes the residuals for the different predictors.
|
||||
// If max_quantization > 1, assumes that near lossless processing will be
|
||||
// applied, quantizing residuals to multiples of quantization levels up to
|
||||
// max_quantization (the actual quantization level depends on smoothness near
|
||||
// the given pixel).
|
||||
static int GetBestPredictorForTile(int width, int height,
|
||||
int tile_x, int tile_y, int bits,
|
||||
int accumulated[4][256],
|
||||
uint32_t* const argb_scratch,
|
||||
const uint32_t* const argb,
|
||||
int max_quantization,
|
||||
int exact, int used_subtract_green,
|
||||
const uint32_t* const modes) {
|
||||
const int kNumPredModes = 14;
|
||||
const int start_x = tile_x << bits;
|
||||
const int start_y = tile_y << bits;
|
||||
const int tile_size = 1 << bits;
|
||||
static void ComputeResidualsForTile(
|
||||
int width, int height, int tile_x, int tile_y, int min_bits,
|
||||
uint32_t update_up_to_index, uint32_t* const all_argb,
|
||||
uint32_t* const argb_scratch, const uint32_t* const argb,
|
||||
int max_quantization, int exact, int used_subtract_green) {
|
||||
const int start_x = tile_x << min_bits;
|
||||
const int start_y = tile_y << min_bits;
|
||||
const int tile_size = 1 << min_bits;
|
||||
const int max_y = GetMin(tile_size, height - start_y);
|
||||
const int max_x = GetMin(tile_size, width - start_x);
|
||||
// Whether there exist columns just outside the tile.
|
||||
@ -318,35 +395,20 @@ static int GetBestPredictorForTile(int width, int height,
|
||||
#if (WEBP_NEAR_LOSSLESS == 1)
|
||||
const int context_width = max_x + have_left + (max_x < width - start_x);
|
||||
#endif
|
||||
const int tiles_per_row = VP8LSubSampleSize(width, bits);
|
||||
// Prediction modes of the left and above neighbor tiles.
|
||||
const int left_mode = (tile_x > 0) ?
|
||||
(modes[tile_y * tiles_per_row + tile_x - 1] >> 8) & 0xff : 0xff;
|
||||
const int above_mode = (tile_y > 0) ?
|
||||
(modes[(tile_y - 1) * tiles_per_row + tile_x] >> 8) & 0xff : 0xff;
|
||||
// The width of upper_row and current_row is one pixel larger than image width
|
||||
// to allow the top right pixel to point to the leftmost pixel of the next row
|
||||
// when at the right edge.
|
||||
uint32_t* upper_row = argb_scratch;
|
||||
uint32_t* current_row = upper_row + width + 1;
|
||||
uint8_t* const max_diffs = (uint8_t*)(current_row + width + 1);
|
||||
float best_diff = MAX_DIFF_COST;
|
||||
int best_mode = 0;
|
||||
int mode;
|
||||
int histo_stack_1[4][256];
|
||||
int histo_stack_2[4][256];
|
||||
// Need pointers to be able to swap arrays.
|
||||
int (*histo_argb)[256] = histo_stack_1;
|
||||
int (*best_histo)[256] = histo_stack_2;
|
||||
int i, j;
|
||||
uint32_t residuals[1 << MAX_TRANSFORM_BITS];
|
||||
assert(bits <= MAX_TRANSFORM_BITS);
|
||||
assert(max_x <= (1 << MAX_TRANSFORM_BITS));
|
||||
|
||||
for (mode = 0; mode < kNumPredModes; ++mode) {
|
||||
float cur_diff;
|
||||
int relative_y;
|
||||
memset(histo_argb, 0, sizeof(histo_stack_1));
|
||||
uint32_t* const histo_argb =
|
||||
GetHistoArgb(all_argb, /*subsampling_index=*/0, mode);
|
||||
if (start_y > 0) {
|
||||
// Read the row above the tile which will become the first upper_row.
|
||||
// Include a pixel to the left if it exists; include a pixel to the right
|
||||
@ -382,41 +444,31 @@ static int GetBestPredictorForTile(int width, int height,
|
||||
for (relative_x = 0; relative_x < max_x; ++relative_x) {
|
||||
UpdateHisto(histo_argb, residuals[relative_x]);
|
||||
}
|
||||
}
|
||||
cur_diff = PredictionCostSpatialHistogram(
|
||||
(const int (*)[256])accumulated, (const int (*)[256])histo_argb);
|
||||
// Favor keeping the areas locally similar.
|
||||
if (mode == left_mode) cur_diff -= kSpatialPredictorBias;
|
||||
if (mode == above_mode) cur_diff -= kSpatialPredictorBias;
|
||||
|
||||
if (cur_diff < best_diff) {
|
||||
int (*tmp)[256] = histo_argb;
|
||||
histo_argb = best_histo;
|
||||
best_histo = tmp;
|
||||
best_diff = cur_diff;
|
||||
best_mode = mode;
|
||||
if (update_up_to_index > 0) {
|
||||
uint32_t subsampling_index;
|
||||
for (subsampling_index = 1; subsampling_index <= update_up_to_index;
|
||||
++subsampling_index) {
|
||||
uint32_t* const super_histo =
|
||||
GetHistoArgb(all_argb, subsampling_index, mode);
|
||||
for (relative_x = 0; relative_x < max_x; ++relative_x) {
|
||||
UpdateHisto(super_histo, residuals[relative_x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
for (j = 0; j < 256; j++) {
|
||||
accumulated[i][j] += best_histo[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
return best_mode;
|
||||
}
|
||||
|
||||
// Converts pixels of the image to residuals with respect to predictions.
|
||||
// If max_quantization > 1, applies near lossless processing, quantizing
|
||||
// residuals to multiples of quantization levels up to max_quantization
|
||||
// (the actual quantization level depends on smoothness near the given pixel).
|
||||
static void CopyImageWithPrediction(int width, int height,
|
||||
int bits, uint32_t* const modes,
|
||||
static void CopyImageWithPrediction(int width, int height, int bits,
|
||||
const uint32_t* const modes,
|
||||
uint32_t* const argb_scratch,
|
||||
uint32_t* const argb,
|
||||
int low_effort, int max_quantization,
|
||||
int exact, int used_subtract_green) {
|
||||
uint32_t* const argb, int low_effort,
|
||||
int max_quantization, int exact,
|
||||
int used_subtract_green) {
|
||||
const int tiles_per_row = VP8LSubSampleSize(width, bits);
|
||||
// The width of upper_row and current_row is one pixel larger than image width
|
||||
// to allow the top right pixel to point to the leftmost pixel of the next row
|
||||
@ -469,47 +521,307 @@ static void CopyImageWithPrediction(int width, int height,
|
||||
}
|
||||
}
|
||||
|
||||
// Checks whether 'image' can be subsampled by finding the biggest power of 2
|
||||
// squares (defined by 'best_bits') of uniform value it is made out of.
|
||||
void VP8LOptimizeSampling(uint32_t* const image, int full_width,
|
||||
int full_height, int bits, int max_bits,
|
||||
int* best_bits_out) {
|
||||
int width = VP8LSubSampleSize(full_width, bits);
|
||||
int height = VP8LSubSampleSize(full_height, bits);
|
||||
int old_width, x, y, square_size;
|
||||
int best_bits = bits;
|
||||
*best_bits_out = bits;
|
||||
// Check rows first.
|
||||
while (best_bits < max_bits) {
|
||||
const int new_square_size = 1 << (best_bits + 1 - bits);
|
||||
int is_good = 1;
|
||||
square_size = 1 << (best_bits - bits);
|
||||
for (y = 0; y + square_size < height; y += new_square_size) {
|
||||
// Check the first lines of consecutive line groups.
|
||||
if (memcmp(&image[y * width], &image[(y + square_size) * width],
|
||||
width * sizeof(*image)) != 0) {
|
||||
is_good = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (is_good) {
|
||||
++best_bits;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (best_bits == bits) return;
|
||||
|
||||
// Check columns.
|
||||
while (best_bits > bits) {
|
||||
int is_good = 1;
|
||||
square_size = 1 << (best_bits - bits);
|
||||
for (y = 0; is_good && y < height; ++y) {
|
||||
for (x = 0; is_good && x < width; x += square_size) {
|
||||
int i;
|
||||
for (i = x + 1; i < GetMin(x + square_size, width); ++i) {
|
||||
if (image[y * width + i] != image[y * width + x]) {
|
||||
is_good = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (is_good) {
|
||||
break;
|
||||
}
|
||||
--best_bits;
|
||||
}
|
||||
if (best_bits == bits) return;
|
||||
|
||||
// Subsample the image.
|
||||
old_width = width;
|
||||
square_size = 1 << (best_bits - bits);
|
||||
width = VP8LSubSampleSize(full_width, best_bits);
|
||||
height = VP8LSubSampleSize(full_height, best_bits);
|
||||
for (y = 0; y < height; ++y) {
|
||||
for (x = 0; x < width; ++x) {
|
||||
image[y * width + x] = image[square_size * (y * old_width + x)];
|
||||
}
|
||||
}
|
||||
*best_bits_out = best_bits;
|
||||
}
|
||||
|
||||
// Computes the best predictor image.
|
||||
// Finds the best predictors per tile. Once done, finds the best predictor image
|
||||
// sampling.
|
||||
// best_bits is set to 0 in case of error.
|
||||
// The following requires some glossary:
|
||||
// - a tile is a square of side 2^min_bits pixels.
|
||||
// - a super-tile of a tile is a square of side 2^bits pixels with bits in
|
||||
// [min_bits+1, max_bits].
|
||||
// - the max-tile of a tile is the square of 2^max_bits pixels containing it.
|
||||
// If this max-tile crosses the border of an image, it is cropped.
|
||||
// - tile, super-tiles and max_tile are aligned on powers of 2 in the original
|
||||
// image.
|
||||
// - coordinates for tile, super-tile, max-tile are respectively named
|
||||
// tile_x, super_tile_x, max_tile_x at their bit scale.
|
||||
// - in the max-tile, a tile has local coordinates (local_tile_x, local_tile_y).
|
||||
// The tiles are processed in the following zigzag order to complete the
|
||||
// super-tiles as soon as possible:
|
||||
// 1 2| 5 6
|
||||
// 3 4| 7 8
|
||||
// --------------
|
||||
// 9 10| 13 14
|
||||
// 11 12| 15 16
|
||||
// When computing the residuals for a tile, the histogram of the above
|
||||
// super-tile is updated. If this super-tile is finished, its histogram is used
|
||||
// to update the histogram of the next super-tile and so on up to the max-tile.
|
||||
static void GetBestPredictorsAndSubSampling(
|
||||
int width, int height, const int min_bits, const int max_bits,
|
||||
uint32_t* const argb_scratch, const uint32_t* const argb,
|
||||
int max_quantization, int exact, int used_subtract_green,
|
||||
const WebPPicture* const pic, int percent_range, int* const percent,
|
||||
uint32_t** const all_modes, int* best_bits, uint32_t** best_mode) {
|
||||
const uint32_t tiles_per_row = VP8LSubSampleSize(width, min_bits);
|
||||
const uint32_t tiles_per_col = VP8LSubSampleSize(height, min_bits);
|
||||
int64_t best_cost;
|
||||
uint32_t subsampling_index;
|
||||
const uint32_t max_subsampling_index = max_bits - min_bits;
|
||||
// Compute the needed memory size for residual histograms, accumulated
|
||||
// residual histograms and predictor histograms.
|
||||
const int num_argb = (max_subsampling_index + 1) * kNumPredModes * HISTO_SIZE;
|
||||
const int num_accumulated_rgb = (max_subsampling_index + 1) * HISTO_SIZE;
|
||||
const int num_predictors = (max_subsampling_index + 1) * kNumPredModes;
|
||||
uint32_t* const raw_data = (uint32_t*)WebPSafeCalloc(
|
||||
num_argb + num_accumulated_rgb + num_predictors, sizeof(uint32_t));
|
||||
uint32_t* const all_argb = raw_data;
|
||||
uint32_t* const all_accumulated_argb = all_argb + num_argb;
|
||||
uint32_t* const all_pred_histos = all_accumulated_argb + num_accumulated_rgb;
|
||||
const int max_tile_size = 1 << max_subsampling_index; // in tile size
|
||||
int percent_start = *percent;
|
||||
// When using the residuals of a tile for its super-tiles, you can either:
|
||||
// - use each residual to update the histogram of the super-tile, with a cost
|
||||
// of 4 * (1<<n)^2 increment operations (4 for the number of channels, and
|
||||
// (1<<n)^2 for the number of pixels in the tile)
|
||||
// - use the histogram of the tile to update the histogram of the super-tile,
|
||||
// with a cost of HISTO_SIZE (1024)
|
||||
// The first method is therefore faster until n==4. 'update_up_to_index'
|
||||
// defines the maximum subsampling_index for which the residuals should be
|
||||
// individually added to the super-tile histogram.
|
||||
const uint32_t update_up_to_index =
|
||||
GetMax(GetMin(4, max_bits), min_bits) - min_bits;
|
||||
// Coordinates in the max-tile in tile units.
|
||||
uint32_t local_tile_x = 0, local_tile_y = 0;
|
||||
uint32_t max_tile_x = 0, max_tile_y = 0;
|
||||
uint32_t tile_x = 0, tile_y = 0;
|
||||
|
||||
*best_bits = 0;
|
||||
*best_mode = NULL;
|
||||
if (raw_data == NULL) return;
|
||||
|
||||
while (tile_y < tiles_per_col) {
|
||||
ComputeResidualsForTile(width, height, tile_x, tile_y, min_bits,
|
||||
update_up_to_index, all_argb, argb_scratch, argb,
|
||||
max_quantization, exact, used_subtract_green);
|
||||
|
||||
// Update all the super-tiles that are complete.
|
||||
subsampling_index = 0;
|
||||
while (1) {
|
||||
const uint32_t super_tile_x = tile_x >> subsampling_index;
|
||||
const uint32_t super_tile_y = tile_y >> subsampling_index;
|
||||
const uint32_t super_tiles_per_row =
|
||||
VP8LSubSampleSize(width, min_bits + subsampling_index);
|
||||
GetBestPredictorForTile(all_argb, subsampling_index, super_tile_x,
|
||||
super_tile_y, super_tiles_per_row,
|
||||
all_accumulated_argb, all_modes, all_pred_histos);
|
||||
if (subsampling_index == max_subsampling_index) break;
|
||||
|
||||
// Update the following super-tile histogram if it has not been updated
|
||||
// yet.
|
||||
++subsampling_index;
|
||||
if (subsampling_index > update_up_to_index &&
|
||||
subsampling_index <= max_subsampling_index) {
|
||||
VP8LAddVectorEq(
|
||||
GetHistoArgbConst(all_argb, subsampling_index - 1, /*mode=*/0),
|
||||
GetHistoArgb(all_argb, subsampling_index, /*mode=*/0),
|
||||
HISTO_SIZE * kNumPredModes);
|
||||
}
|
||||
// Check whether the super-tile is not complete (if the smallest tile
|
||||
// is not at the end of a line/column or at the beginning of a super-tile
|
||||
// of size (1 << subsampling_index)).
|
||||
if (!((tile_x == (tiles_per_row - 1) ||
|
||||
(local_tile_x + 1) % (1 << subsampling_index) == 0) &&
|
||||
(tile_y == (tiles_per_col - 1) ||
|
||||
(local_tile_y + 1) % (1 << subsampling_index) == 0))) {
|
||||
--subsampling_index;
|
||||
// subsampling_index now is the index of the last finished super-tile.
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Reset all the histograms belonging to finished tiles.
|
||||
memset(all_argb, 0,
|
||||
HISTO_SIZE * kNumPredModes * (subsampling_index + 1) *
|
||||
sizeof(*all_argb));
|
||||
|
||||
if (subsampling_index == max_subsampling_index) {
|
||||
// If a new max-tile is started.
|
||||
if (tile_x == (tiles_per_row - 1)) {
|
||||
max_tile_x = 0;
|
||||
++max_tile_y;
|
||||
} else {
|
||||
++max_tile_x;
|
||||
}
|
||||
local_tile_x = 0;
|
||||
local_tile_y = 0;
|
||||
} else {
|
||||
// Proceed with the Z traversal.
|
||||
uint32_t coord_x = local_tile_x >> subsampling_index;
|
||||
uint32_t coord_y = local_tile_y >> subsampling_index;
|
||||
if (tile_x == (tiles_per_row - 1) && coord_x % 2 == 0) {
|
||||
++coord_y;
|
||||
} else {
|
||||
if (coord_x % 2 == 0) {
|
||||
++coord_x;
|
||||
} else {
|
||||
// Z traversal.
|
||||
++coord_y;
|
||||
--coord_x;
|
||||
}
|
||||
}
|
||||
local_tile_x = coord_x << subsampling_index;
|
||||
local_tile_y = coord_y << subsampling_index;
|
||||
}
|
||||
tile_x = max_tile_x * max_tile_size + local_tile_x;
|
||||
tile_y = max_tile_y * max_tile_size + local_tile_y;
|
||||
|
||||
if (tile_x == 0 &&
|
||||
!WebPReportProgress(
|
||||
pic, percent_start + percent_range * tile_y / tiles_per_col,
|
||||
percent)) {
|
||||
WebPSafeFree(raw_data);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Figure out the best sampling.
|
||||
best_cost = WEBP_INT64_MAX;
|
||||
for (subsampling_index = 0; subsampling_index <= max_subsampling_index;
|
||||
++subsampling_index) {
|
||||
int plane;
|
||||
const uint32_t* const accumulated =
|
||||
GetAccumulatedHisto(all_accumulated_argb, subsampling_index);
|
||||
int64_t cost = VP8LShannonEntropy(
|
||||
&all_pred_histos[subsampling_index * kNumPredModes], kNumPredModes);
|
||||
for (plane = 0; plane < 4; ++plane) {
|
||||
cost += VP8LShannonEntropy(&accumulated[plane * 256], 256);
|
||||
}
|
||||
if (cost < best_cost) {
|
||||
best_cost = cost;
|
||||
*best_bits = min_bits + subsampling_index;
|
||||
*best_mode = all_modes[subsampling_index];
|
||||
}
|
||||
}
|
||||
|
||||
WebPSafeFree(raw_data);
|
||||
|
||||
VP8LOptimizeSampling(*best_mode, width, height, *best_bits,
|
||||
MAX_TRANSFORM_BITS, best_bits);
|
||||
}
|
||||
|
||||
// Finds the best predictor for each tile, and converts the image to residuals
|
||||
// with respect to predictions. If near_lossless_quality < 100, applies
|
||||
// near lossless processing, shaving off more bits of residuals for lower
|
||||
// qualities.
|
||||
int VP8LResidualImage(int width, int height, int bits, int low_effort,
|
||||
uint32_t* const argb, uint32_t* const argb_scratch,
|
||||
uint32_t* const image, int near_lossless_quality,
|
||||
int exact, int used_subtract_green,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent) {
|
||||
const int tiles_per_row = VP8LSubSampleSize(width, bits);
|
||||
const int tiles_per_col = VP8LSubSampleSize(height, bits);
|
||||
int VP8LResidualImage(int width, int height, int min_bits, int max_bits,
|
||||
int low_effort, uint32_t* const argb,
|
||||
uint32_t* const argb_scratch, uint32_t* const image,
|
||||
int near_lossless_quality, int exact,
|
||||
int used_subtract_green, const WebPPicture* const pic,
|
||||
int percent_range, int* const percent,
|
||||
int* const best_bits) {
|
||||
int percent_start = *percent;
|
||||
int tile_y;
|
||||
int histo[4][256];
|
||||
const int max_quantization = 1 << VP8LNearLosslessBits(near_lossless_quality);
|
||||
if (low_effort) {
|
||||
const int tiles_per_row = VP8LSubSampleSize(width, max_bits);
|
||||
const int tiles_per_col = VP8LSubSampleSize(height, max_bits);
|
||||
int i;
|
||||
for (i = 0; i < tiles_per_row * tiles_per_col; ++i) {
|
||||
image[i] = ARGB_BLACK | (kPredLowEffort << 8);
|
||||
}
|
||||
*best_bits = max_bits;
|
||||
} else {
|
||||
memset(histo, 0, sizeof(histo));
|
||||
for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
|
||||
int tile_x;
|
||||
for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
|
||||
const int pred = GetBestPredictorForTile(
|
||||
width, height, tile_x, tile_y, bits, histo, argb_scratch, argb,
|
||||
max_quantization, exact, used_subtract_green, image);
|
||||
image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8);
|
||||
}
|
||||
|
||||
if (!WebPReportProgress(
|
||||
pic, percent_start + percent_range * tile_y / tiles_per_col,
|
||||
percent)) {
|
||||
return 0;
|
||||
}
|
||||
// Allocate data to try all samplings from min_bits to max_bits.
|
||||
int bits;
|
||||
uint32_t sum_num_pixels = 0u;
|
||||
uint32_t *modes_raw, *best_mode;
|
||||
uint32_t* modes[MAX_TRANSFORM_BITS + 1];
|
||||
uint32_t num_pixels[MAX_TRANSFORM_BITS + 1];
|
||||
for (bits = min_bits; bits <= max_bits; ++bits) {
|
||||
const int tiles_per_row = VP8LSubSampleSize(width, bits);
|
||||
const int tiles_per_col = VP8LSubSampleSize(height, bits);
|
||||
num_pixels[bits] = tiles_per_row * tiles_per_col;
|
||||
sum_num_pixels += num_pixels[bits];
|
||||
}
|
||||
modes_raw = (uint32_t*)WebPSafeMalloc(sum_num_pixels, sizeof(*modes_raw));
|
||||
if (modes_raw == NULL) return 0;
|
||||
// Have modes point to the right global memory modes_raw.
|
||||
modes[min_bits] = modes_raw;
|
||||
for (bits = min_bits + 1; bits <= max_bits; ++bits) {
|
||||
modes[bits] = modes[bits - 1] + num_pixels[bits - 1];
|
||||
}
|
||||
// Find the best sampling.
|
||||
GetBestPredictorsAndSubSampling(
|
||||
width, height, min_bits, max_bits, argb_scratch, argb, max_quantization,
|
||||
exact, used_subtract_green, pic, percent_range, percent,
|
||||
&modes[min_bits], best_bits, &best_mode);
|
||||
if (*best_bits == 0) {
|
||||
WebPSafeFree(modes_raw);
|
||||
return 0;
|
||||
}
|
||||
// Keep the best predictor image.
|
||||
memcpy(image, best_mode,
|
||||
VP8LSubSampleSize(width, *best_bits) *
|
||||
VP8LSubSampleSize(height, *best_bits) * sizeof(*image));
|
||||
WebPSafeFree(modes_raw);
|
||||
}
|
||||
|
||||
CopyImageWithPrediction(width, height, bits, image, argb_scratch, argb,
|
||||
CopyImageWithPrediction(width, height, *best_bits, image, argb_scratch, argb,
|
||||
low_effort, max_quantization, exact,
|
||||
used_subtract_green);
|
||||
return WebPReportProgress(pic, percent_start + percent_range, percent);
|
||||
@ -539,48 +851,51 @@ static WEBP_INLINE uint32_t MultipliersToColorCode(
|
||||
m->green_to_red_;
|
||||
}
|
||||
|
||||
static float PredictionCostCrossColor(const int accumulated[256],
|
||||
const int counts[256]) {
|
||||
static int64_t PredictionCostCrossColor(const uint32_t accumulated[256],
|
||||
const uint32_t counts[256]) {
|
||||
// Favor low entropy, locally and globally.
|
||||
// Favor small absolute values for PredictionCostSpatial
|
||||
static const float kExpValue = 2.4f;
|
||||
return VP8LCombinedShannonEntropy(counts, accumulated) +
|
||||
PredictionCostSpatial(counts, 3, kExpValue);
|
||||
static const uint64_t kExpValue = 240;
|
||||
return (int64_t)VP8LCombinedShannonEntropy(counts, accumulated) +
|
||||
PredictionCostBias(counts, 3, kExpValue);
|
||||
}
|
||||
|
||||
static float GetPredictionCostCrossColorRed(
|
||||
static int64_t GetPredictionCostCrossColorRed(
|
||||
const uint32_t* argb, int stride, int tile_width, int tile_height,
|
||||
VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,
|
||||
const int accumulated_red_histo[256]) {
|
||||
int histo[256] = { 0 };
|
||||
float cur_diff;
|
||||
const uint32_t accumulated_red_histo[256]) {
|
||||
uint32_t histo[256] = { 0 };
|
||||
int64_t cur_diff;
|
||||
|
||||
VP8LCollectColorRedTransforms(argb, stride, tile_width, tile_height,
|
||||
green_to_red, histo);
|
||||
|
||||
cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo);
|
||||
if ((uint8_t)green_to_red == prev_x.green_to_red_) {
|
||||
cur_diff -= 3; // favor keeping the areas locally similar
|
||||
// favor keeping the areas locally similar
|
||||
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
|
||||
}
|
||||
if ((uint8_t)green_to_red == prev_y.green_to_red_) {
|
||||
cur_diff -= 3; // favor keeping the areas locally similar
|
||||
// favor keeping the areas locally similar
|
||||
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
|
||||
}
|
||||
if (green_to_red == 0) {
|
||||
cur_diff -= 3;
|
||||
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
|
||||
}
|
||||
return cur_diff;
|
||||
}
|
||||
|
||||
static void GetBestGreenToRed(
|
||||
const uint32_t* argb, int stride, int tile_width, int tile_height,
|
||||
VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
|
||||
const int accumulated_red_histo[256], VP8LMultipliers* const best_tx) {
|
||||
static void GetBestGreenToRed(const uint32_t* argb, int stride, int tile_width,
|
||||
int tile_height, VP8LMultipliers prev_x,
|
||||
VP8LMultipliers prev_y, int quality,
|
||||
const uint32_t accumulated_red_histo[256],
|
||||
VP8LMultipliers* const best_tx) {
|
||||
const int kMaxIters = 4 + ((7 * quality) >> 8); // in range [4..6]
|
||||
int green_to_red_best = 0;
|
||||
int iter, offset;
|
||||
float best_diff = GetPredictionCostCrossColorRed(
|
||||
argb, stride, tile_width, tile_height, prev_x, prev_y,
|
||||
green_to_red_best, accumulated_red_histo);
|
||||
int64_t best_diff = GetPredictionCostCrossColorRed(
|
||||
argb, stride, tile_width, tile_height, prev_x, prev_y, green_to_red_best,
|
||||
accumulated_red_histo);
|
||||
for (iter = 0; iter < kMaxIters; ++iter) {
|
||||
// ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to
|
||||
// one in color computation. Having initial delta here as 1 is sufficient
|
||||
@ -589,7 +904,7 @@ static void GetBestGreenToRed(
|
||||
// Try a negative and a positive delta from the best known value.
|
||||
for (offset = -delta; offset <= delta; offset += 2 * delta) {
|
||||
const int green_to_red_cur = offset + green_to_red_best;
|
||||
const float cur_diff = GetPredictionCostCrossColorRed(
|
||||
const int64_t cur_diff = GetPredictionCostCrossColorRed(
|
||||
argb, stride, tile_width, tile_height, prev_x, prev_y,
|
||||
green_to_red_cur, accumulated_red_histo);
|
||||
if (cur_diff < best_diff) {
|
||||
@ -601,45 +916,50 @@ static void GetBestGreenToRed(
|
||||
best_tx->green_to_red_ = (green_to_red_best & 0xff);
|
||||
}
|
||||
|
||||
static float GetPredictionCostCrossColorBlue(
|
||||
static int64_t GetPredictionCostCrossColorBlue(
|
||||
const uint32_t* argb, int stride, int tile_width, int tile_height,
|
||||
VP8LMultipliers prev_x, VP8LMultipliers prev_y,
|
||||
int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256]) {
|
||||
int histo[256] = { 0 };
|
||||
float cur_diff;
|
||||
VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_blue,
|
||||
int red_to_blue, const uint32_t accumulated_blue_histo[256]) {
|
||||
uint32_t histo[256] = { 0 };
|
||||
int64_t cur_diff;
|
||||
|
||||
VP8LCollectColorBlueTransforms(argb, stride, tile_width, tile_height,
|
||||
green_to_blue, red_to_blue, histo);
|
||||
|
||||
cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo);
|
||||
if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {
|
||||
cur_diff -= 3; // favor keeping the areas locally similar
|
||||
// favor keeping the areas locally similar
|
||||
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
|
||||
}
|
||||
if ((uint8_t)green_to_blue == prev_y.green_to_blue_) {
|
||||
cur_diff -= 3; // favor keeping the areas locally similar
|
||||
// favor keeping the areas locally similar
|
||||
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
|
||||
}
|
||||
if ((uint8_t)red_to_blue == prev_x.red_to_blue_) {
|
||||
cur_diff -= 3; // favor keeping the areas locally similar
|
||||
// favor keeping the areas locally similar
|
||||
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
|
||||
}
|
||||
if ((uint8_t)red_to_blue == prev_y.red_to_blue_) {
|
||||
cur_diff -= 3; // favor keeping the areas locally similar
|
||||
// favor keeping the areas locally similar
|
||||
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
|
||||
}
|
||||
if (green_to_blue == 0) {
|
||||
cur_diff -= 3;
|
||||
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
|
||||
}
|
||||
if (red_to_blue == 0) {
|
||||
cur_diff -= 3;
|
||||
cur_diff -= 3ll << LOG_2_PRECISION_BITS;
|
||||
}
|
||||
return cur_diff;
|
||||
}
|
||||
|
||||
#define kGreenRedToBlueNumAxis 8
|
||||
#define kGreenRedToBlueMaxIters 7
|
||||
static void GetBestGreenRedToBlue(
|
||||
const uint32_t* argb, int stride, int tile_width, int tile_height,
|
||||
VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
|
||||
const int accumulated_blue_histo[256],
|
||||
VP8LMultipliers* const best_tx) {
|
||||
static void GetBestGreenRedToBlue(const uint32_t* argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
VP8LMultipliers prev_x,
|
||||
VP8LMultipliers prev_y, int quality,
|
||||
const uint32_t accumulated_blue_histo[256],
|
||||
VP8LMultipliers* const best_tx) {
|
||||
const int8_t offset[kGreenRedToBlueNumAxis][2] =
|
||||
{{0, -1}, {0, 1}, {-1, 0}, {1, 0}, {-1, -1}, {-1, 1}, {1, -1}, {1, 1}};
|
||||
const int8_t delta_lut[kGreenRedToBlueMaxIters] = { 16, 16, 8, 4, 2, 2, 2 };
|
||||
@ -649,9 +969,9 @@ static void GetBestGreenRedToBlue(
|
||||
int red_to_blue_best = 0;
|
||||
int iter;
|
||||
// Initial value at origin:
|
||||
float best_diff = GetPredictionCostCrossColorBlue(
|
||||
argb, stride, tile_width, tile_height, prev_x, prev_y,
|
||||
green_to_blue_best, red_to_blue_best, accumulated_blue_histo);
|
||||
int64_t best_diff = GetPredictionCostCrossColorBlue(
|
||||
argb, stride, tile_width, tile_height, prev_x, prev_y, green_to_blue_best,
|
||||
red_to_blue_best, accumulated_blue_histo);
|
||||
for (iter = 0; iter < iters; ++iter) {
|
||||
const int delta = delta_lut[iter];
|
||||
int axis;
|
||||
@ -659,7 +979,7 @@ static void GetBestGreenRedToBlue(
|
||||
const int green_to_blue_cur =
|
||||
offset[axis][0] * delta + green_to_blue_best;
|
||||
const int red_to_blue_cur = offset[axis][1] * delta + red_to_blue_best;
|
||||
const float cur_diff = GetPredictionCostCrossColorBlue(
|
||||
const int64_t cur_diff = GetPredictionCostCrossColorBlue(
|
||||
argb, stride, tile_width, tile_height, prev_x, prev_y,
|
||||
green_to_blue_cur, red_to_blue_cur, accumulated_blue_histo);
|
||||
if (cur_diff < best_diff) {
|
||||
@ -684,13 +1004,10 @@ static void GetBestGreenRedToBlue(
|
||||
#undef kGreenRedToBlueNumAxis
|
||||
|
||||
static VP8LMultipliers GetBestColorTransformForTile(
|
||||
int tile_x, int tile_y, int bits,
|
||||
VP8LMultipliers prev_x,
|
||||
VP8LMultipliers prev_y,
|
||||
int quality, int xsize, int ysize,
|
||||
const int accumulated_red_histo[256],
|
||||
const int accumulated_blue_histo[256],
|
||||
const uint32_t* const argb) {
|
||||
int tile_x, int tile_y, int bits, VP8LMultipliers prev_x,
|
||||
VP8LMultipliers prev_y, int quality, int xsize, int ysize,
|
||||
const uint32_t accumulated_red_histo[256],
|
||||
const uint32_t accumulated_blue_histo[256], const uint32_t* const argb) {
|
||||
const int max_tile_size = 1 << bits;
|
||||
const int tile_y_offset = tile_y * max_tile_size;
|
||||
const int tile_x_offset = tile_x * max_tile_size;
|
||||
@ -728,13 +1045,13 @@ static void CopyTileWithColorTransform(int xsize, int ysize,
|
||||
int VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
||||
uint32_t* const argb, uint32_t* image,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent) {
|
||||
int* const percent, int* const best_bits) {
|
||||
const int max_tile_size = 1 << bits;
|
||||
const int tile_xsize = VP8LSubSampleSize(width, bits);
|
||||
const int tile_ysize = VP8LSubSampleSize(height, bits);
|
||||
int percent_start = *percent;
|
||||
int accumulated_red_histo[256] = { 0 };
|
||||
int accumulated_blue_histo[256] = { 0 };
|
||||
uint32_t accumulated_red_histo[256] = { 0 };
|
||||
uint32_t accumulated_blue_histo[256] = { 0 };
|
||||
int tile_x, tile_y;
|
||||
VP8LMultipliers prev_x, prev_y;
|
||||
MultipliersClear(&prev_y);
|
||||
@ -788,5 +1105,7 @@ int VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
VP8LOptimizeSampling(image, width, height, bits, MAX_TRANSFORM_BITS,
|
||||
best_bits);
|
||||
return 1;
|
||||
}
|
||||
|
12
thirdparty/libwebp/src/enc/quant_enc.c
vendored
12
thirdparty/libwebp/src/enc/quant_enc.c
vendored
@ -462,7 +462,7 @@ const uint16_t VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 };
|
||||
const uint16_t VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 };
|
||||
|
||||
// Must be indexed using {B_DC_PRED -> B_HU_PRED} as index
|
||||
const uint16_t VP8I4ModeOffsets[NUM_BMODES] = {
|
||||
static const uint16_t VP8I4ModeOffsets[NUM_BMODES] = {
|
||||
I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4
|
||||
};
|
||||
|
||||
@ -478,7 +478,9 @@ void VP8MakeChroma8Preds(const VP8EncIterator* const it) {
|
||||
VP8EncPredChroma8(it->yuv_p_, left, top);
|
||||
}
|
||||
|
||||
void VP8MakeIntra4Preds(const VP8EncIterator* const it) {
|
||||
// Form all the ten Intra4x4 predictions in the yuv_p_ cache
|
||||
// for the 4x4 block it->i4_
|
||||
static void MakeIntra4Preds(const VP8EncIterator* const it) {
|
||||
VP8EncPredLuma4(it->yuv_p_, it->i4_top_);
|
||||
}
|
||||
|
||||
@ -1102,7 +1104,7 @@ static int PickBestIntra4(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
uint8_t* tmp_dst = it->yuv_p_ + I4TMP; // scratch buffer.
|
||||
|
||||
InitScore(&rd_i4);
|
||||
VP8MakeIntra4Preds(it);
|
||||
MakeIntra4Preds(it);
|
||||
for (mode = 0; mode < NUM_BMODES; ++mode) {
|
||||
VP8ModeScore rd_tmp;
|
||||
int16_t tmp_levels[16];
|
||||
@ -1237,7 +1239,7 @@ static void SimpleQuantize(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];
|
||||
const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
|
||||
uint8_t* const dst = it->yuv_out_ + Y_OFF_ENC + VP8Scan[it->i4_];
|
||||
VP8MakeIntra4Preds(it);
|
||||
MakeIntra4Preds(it);
|
||||
nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
|
||||
src, dst, mode) << it->i4_;
|
||||
} while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF_ENC));
|
||||
@ -1305,7 +1307,7 @@ static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it,
|
||||
const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
|
||||
const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);
|
||||
|
||||
VP8MakeIntra4Preds(it);
|
||||
MakeIntra4Preds(it);
|
||||
for (mode = 0; mode < NUM_BMODES; ++mode) {
|
||||
const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
|
||||
const score_t score = VP8SSE4x4(src, ref) * RD_DISTO_MULT
|
||||
|
13
thirdparty/libwebp/src/enc/vp8i_enc.h
vendored
13
thirdparty/libwebp/src/enc/vp8i_enc.h
vendored
@ -16,6 +16,7 @@
|
||||
|
||||
#include <string.h> // for memcpy()
|
||||
#include "src/dec/common_dec.h"
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/utils/bit_writer_utils.h"
|
||||
#include "src/utils/thread_utils.h"
|
||||
@ -31,7 +32,7 @@ extern "C" {
|
||||
|
||||
// version numbers
|
||||
#define ENC_MAJ_VERSION 1
|
||||
#define ENC_MIN_VERSION 4
|
||||
#define ENC_MIN_VERSION 5
|
||||
#define ENC_REV_VERSION 0
|
||||
|
||||
enum { MAX_LF_LEVELS = 64, // Maximum loop filter level
|
||||
@ -78,7 +79,6 @@ typedef enum { // Rate-distortion optimization levels
|
||||
extern const uint16_t VP8Scan[16];
|
||||
extern const uint16_t VP8UVModeOffsets[4];
|
||||
extern const uint16_t VP8I16ModeOffsets[4];
|
||||
extern const uint16_t VP8I4ModeOffsets[NUM_BMODES];
|
||||
|
||||
// Layout of prediction blocks
|
||||
// intra 16x16
|
||||
@ -234,7 +234,11 @@ typedef struct {
|
||||
VP8BitWriter* bw_; // current bit-writer
|
||||
uint8_t* preds_; // intra mode predictors (4x4 blocks)
|
||||
uint32_t* nz_; // non-zero pattern
|
||||
#if WEBP_AARCH64 && BPS == 32
|
||||
uint8_t i4_boundary_[40]; // 32+8 boundary samples needed by intra4x4
|
||||
#else
|
||||
uint8_t i4_boundary_[37]; // 32+5 boundary samples needed by intra4x4
|
||||
#endif
|
||||
uint8_t* i4_top_; // pointer to the current top boundary sample
|
||||
int i4_; // current intra4x4 mode being tested
|
||||
int top_nz_[9]; // top-non-zero context.
|
||||
@ -267,8 +271,6 @@ typedef struct {
|
||||
// in iterator.c
|
||||
// must be called first
|
||||
void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it);
|
||||
// restart a scan
|
||||
void VP8IteratorReset(VP8EncIterator* const it);
|
||||
// reset iterator position to row 'y'
|
||||
void VP8IteratorSetRow(VP8EncIterator* const it, int y);
|
||||
// set count down (=number of iterations to go)
|
||||
@ -444,9 +446,6 @@ extern const uint8_t VP8Cat6[];
|
||||
void VP8MakeLuma16Preds(const VP8EncIterator* const it);
|
||||
// Form all the four Chroma8x8 predictions in the yuv_p_ cache
|
||||
void VP8MakeChroma8Preds(const VP8EncIterator* const it);
|
||||
// Form all the ten Intra4x4 predictions in the yuv_p_ cache
|
||||
// for the 4x4 block it->i4_
|
||||
void VP8MakeIntra4Preds(const VP8EncIterator* const it);
|
||||
// Rate calculation
|
||||
int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd);
|
||||
int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]);
|
||||
|
294
thirdparty/libwebp/src/enc/vp8l_enc.c
vendored
294
thirdparty/libwebp/src/enc/vp8l_enc.c
vendored
@ -30,6 +30,10 @@
|
||||
|
||||
// Maximum number of histogram images (sub-blocks).
|
||||
#define MAX_HUFF_IMAGE_SIZE 2600
|
||||
#define MAX_HUFFMAN_BITS (MIN_HUFFMAN_BITS + (1 << NUM_HUFFMAN_BITS) - 1)
|
||||
// Empirical value for which it becomes too computationally expensive to
|
||||
// compute the best predictor image.
|
||||
#define MAX_PREDICTOR_IMAGE_SIZE (1 << 14)
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Palette
|
||||
@ -140,8 +144,8 @@ static int AnalyzeEntropy(const uint32_t* argb,
|
||||
curr_row += argb_stride;
|
||||
}
|
||||
{
|
||||
float entropy_comp[kHistoTotal];
|
||||
float entropy[kNumEntropyIx];
|
||||
uint64_t entropy_comp[kHistoTotal];
|
||||
uint64_t entropy[kNumEntropyIx];
|
||||
int k;
|
||||
int last_mode_to_analyze = use_palette ? kPalette : kSpatialSubGreen;
|
||||
int j;
|
||||
@ -179,19 +183,19 @@ static int AnalyzeEntropy(const uint32_t* argb,
|
||||
// When including transforms, there is an overhead in bits from
|
||||
// storing them. This overhead is small but matters for small images.
|
||||
// For spatial, there are 14 transformations.
|
||||
entropy[kSpatial] += VP8LSubSampleSize(width, transform_bits) *
|
||||
entropy[kSpatial] += (uint64_t)VP8LSubSampleSize(width, transform_bits) *
|
||||
VP8LSubSampleSize(height, transform_bits) *
|
||||
VP8LFastLog2(14);
|
||||
// For color transforms: 24 as only 3 channels are considered in a
|
||||
// ColorTransformElement.
|
||||
entropy[kSpatialSubGreen] += VP8LSubSampleSize(width, transform_bits) *
|
||||
VP8LSubSampleSize(height, transform_bits) *
|
||||
VP8LFastLog2(24);
|
||||
entropy[kSpatialSubGreen] +=
|
||||
(uint64_t)VP8LSubSampleSize(width, transform_bits) *
|
||||
VP8LSubSampleSize(height, transform_bits) * VP8LFastLog2(24);
|
||||
// For palettes, add the cost of storing the palette.
|
||||
// We empirically estimate the cost of a compressed entry as 8 bits.
|
||||
// The palette is differential-coded when compressed hence a much
|
||||
// lower cost than sizeof(uint32_t)*8.
|
||||
entropy[kPalette] += palette_size * 8;
|
||||
entropy[kPalette] += (palette_size * 8ull) << LOG_2_PRECISION_BITS;
|
||||
|
||||
*min_entropy_ix = kDirect;
|
||||
for (k = kDirect + 1; k <= last_mode_to_analyze; ++k) {
|
||||
@ -231,17 +235,33 @@ static int AnalyzeEntropy(const uint32_t* argb,
|
||||
}
|
||||
}
|
||||
|
||||
// Clamp histogram and transform bits.
|
||||
static int ClampBits(int width, int height, int bits, int min_bits,
|
||||
int max_bits, int image_size_max) {
|
||||
int image_size;
|
||||
bits = (bits < min_bits) ? min_bits : (bits > max_bits) ? max_bits : bits;
|
||||
image_size = VP8LSubSampleSize(width, bits) * VP8LSubSampleSize(height, bits);
|
||||
while (bits < max_bits && image_size > image_size_max) {
|
||||
++bits;
|
||||
image_size =
|
||||
VP8LSubSampleSize(width, bits) * VP8LSubSampleSize(height, bits);
|
||||
}
|
||||
// In case the bits reduce the image too much, choose the smallest value
|
||||
// setting the histogram image size to 1.
|
||||
while (bits > min_bits && image_size == 1) {
|
||||
image_size = VP8LSubSampleSize(width, bits - 1) *
|
||||
VP8LSubSampleSize(height, bits - 1);
|
||||
if (image_size != 1) break;
|
||||
--bits;
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
|
||||
static int GetHistoBits(int method, int use_palette, int width, int height) {
|
||||
// Make tile size a function of encoding method (Range: 0 to 6).
|
||||
int histo_bits = (use_palette ? 9 : 7) - method;
|
||||
while (1) {
|
||||
const int huff_image_size = VP8LSubSampleSize(width, histo_bits) *
|
||||
VP8LSubSampleSize(height, histo_bits);
|
||||
if (huff_image_size <= MAX_HUFF_IMAGE_SIZE) break;
|
||||
++histo_bits;
|
||||
}
|
||||
return (histo_bits < MIN_HUFFMAN_BITS) ? MIN_HUFFMAN_BITS :
|
||||
(histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits;
|
||||
const int histo_bits = (use_palette ? 9 : 7) - method;
|
||||
return ClampBits(width, height, histo_bits, MIN_HUFFMAN_BITS,
|
||||
MAX_HUFFMAN_BITS, MAX_HUFF_IMAGE_SIZE);
|
||||
}
|
||||
|
||||
static int GetTransformBits(int method, int histo_bits) {
|
||||
@ -280,7 +300,7 @@ static int EncoderAnalyze(VP8LEncoder* const enc,
|
||||
const int method = config->method;
|
||||
const int low_effort = (config->method == 0);
|
||||
int i;
|
||||
int use_palette;
|
||||
int use_palette, transform_bits;
|
||||
int n_lz77s;
|
||||
// If set to 0, analyze the cache with the computed cache value. If 1, also
|
||||
// analyze with no-cache.
|
||||
@ -297,7 +317,9 @@ static int EncoderAnalyze(VP8LEncoder* const enc,
|
||||
// Empirical bit sizes.
|
||||
enc->histo_bits_ = GetHistoBits(method, use_palette,
|
||||
pic->width, pic->height);
|
||||
enc->transform_bits_ = GetTransformBits(method, enc->histo_bits_);
|
||||
transform_bits = GetTransformBits(method, enc->histo_bits_);
|
||||
enc->predictor_transform_bits_ = transform_bits;
|
||||
enc->cross_color_transform_bits_ = transform_bits;
|
||||
|
||||
if (low_effort) {
|
||||
// AnalyzeEntropy is somewhat slow.
|
||||
@ -311,8 +333,8 @@ static int EncoderAnalyze(VP8LEncoder* const enc,
|
||||
// Try out multiple LZ77 on images with few colors.
|
||||
n_lz77s = (enc->palette_size_ > 0 && enc->palette_size_ <= 16) ? 2 : 1;
|
||||
if (!AnalyzeEntropy(pic->argb, width, height, pic->argb_stride, use_palette,
|
||||
enc->palette_size_, enc->transform_bits_,
|
||||
&min_entropy_ix, red_and_blue_always_zero)) {
|
||||
enc->palette_size_, transform_bits, &min_entropy_ix,
|
||||
red_and_blue_always_zero)) {
|
||||
return 0;
|
||||
}
|
||||
if (method == 6 && config->quality == 100) {
|
||||
@ -661,11 +683,12 @@ static WEBP_INLINE void WriteHuffmanCodeWithExtraBits(
|
||||
VP8LPutBits(bw, (bits << depth) | symbol, depth + n_bits);
|
||||
}
|
||||
|
||||
static int StoreImageToBitMask(
|
||||
VP8LBitWriter* const bw, int width, int histo_bits,
|
||||
const VP8LBackwardRefs* const refs,
|
||||
const uint16_t* histogram_symbols,
|
||||
const HuffmanTreeCode* const huffman_codes, const WebPPicture* const pic) {
|
||||
static int StoreImageToBitMask(VP8LBitWriter* const bw, int width,
|
||||
int histo_bits,
|
||||
const VP8LBackwardRefs* const refs,
|
||||
const uint32_t* histogram_symbols,
|
||||
const HuffmanTreeCode* const huffman_codes,
|
||||
const WebPPicture* const pic) {
|
||||
const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1;
|
||||
const int tile_mask = (histo_bits == 0) ? 0 : -(1 << histo_bits);
|
||||
// x and y trace the position in the image.
|
||||
@ -673,7 +696,7 @@ static int StoreImageToBitMask(
|
||||
int y = 0;
|
||||
int tile_x = x & tile_mask;
|
||||
int tile_y = y & tile_mask;
|
||||
int histogram_ix = histogram_symbols[0];
|
||||
int histogram_ix = (histogram_symbols[0] >> 8) & 0xffff;
|
||||
const HuffmanTreeCode* codes = huffman_codes + 5 * histogram_ix;
|
||||
VP8LRefsCursor c = VP8LRefsCursorInit(refs);
|
||||
while (VP8LRefsCursorOk(&c)) {
|
||||
@ -681,8 +704,10 @@ static int StoreImageToBitMask(
|
||||
if ((tile_x != (x & tile_mask)) || (tile_y != (y & tile_mask))) {
|
||||
tile_x = x & tile_mask;
|
||||
tile_y = y & tile_mask;
|
||||
histogram_ix = histogram_symbols[(y >> histo_bits) * histo_xsize +
|
||||
(x >> histo_bits)];
|
||||
histogram_ix = (histogram_symbols[(y >> histo_bits) * histo_xsize +
|
||||
(x >> histo_bits)] >>
|
||||
8) &
|
||||
0xffff;
|
||||
codes = huffman_codes + 5 * histogram_ix;
|
||||
}
|
||||
if (PixOrCopyIsLiteral(v)) {
|
||||
@ -738,7 +763,7 @@ static int EncodeImageNoHuffman(VP8LBitWriter* const bw,
|
||||
VP8LBackwardRefs* refs;
|
||||
HuffmanTreeToken* tokens = NULL;
|
||||
HuffmanTreeCode huffman_codes[5] = {{0, NULL, NULL}};
|
||||
const uint16_t histogram_symbols[1] = {0}; // only one tree, one symbol
|
||||
const uint32_t histogram_symbols[1] = {0}; // only one tree, one symbol
|
||||
int cache_bits = 0;
|
||||
VP8LHistogramSet* histogram_image = NULL;
|
||||
HuffmanTree* const huff_tree = (HuffmanTree*)WebPSafeMalloc(
|
||||
@ -821,32 +846,32 @@ static int EncodeImageInternal(
|
||||
VP8LBitWriter* const bw, const uint32_t* const argb,
|
||||
VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[4], int width,
|
||||
int height, int quality, int low_effort, const CrunchConfig* const config,
|
||||
int* cache_bits, int histogram_bits, size_t init_byte_position,
|
||||
int* cache_bits, int histogram_bits_in, size_t init_byte_position,
|
||||
int* const hdr_size, int* const data_size, const WebPPicture* const pic,
|
||||
int percent_range, int* const percent) {
|
||||
const uint32_t histogram_image_xysize =
|
||||
VP8LSubSampleSize(width, histogram_bits) *
|
||||
VP8LSubSampleSize(height, histogram_bits);
|
||||
VP8LSubSampleSize(width, histogram_bits_in) *
|
||||
VP8LSubSampleSize(height, histogram_bits_in);
|
||||
int remaining_percent = percent_range;
|
||||
int percent_start = *percent;
|
||||
VP8LHistogramSet* histogram_image = NULL;
|
||||
VP8LHistogram* tmp_histo = NULL;
|
||||
int histogram_image_size = 0;
|
||||
uint32_t i, histogram_image_size = 0;
|
||||
size_t bit_array_size = 0;
|
||||
HuffmanTree* const huff_tree = (HuffmanTree*)WebPSafeMalloc(
|
||||
3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree));
|
||||
HuffmanTreeToken* tokens = NULL;
|
||||
HuffmanTreeCode* huffman_codes = NULL;
|
||||
uint16_t* const histogram_symbols = (uint16_t*)WebPSafeMalloc(
|
||||
histogram_image_xysize, sizeof(*histogram_symbols));
|
||||
uint32_t* const histogram_argb = (uint32_t*)WebPSafeMalloc(
|
||||
histogram_image_xysize, sizeof(*histogram_argb));
|
||||
int sub_configs_idx;
|
||||
int cache_bits_init, write_histogram_image;
|
||||
VP8LBitWriter bw_init = *bw, bw_best;
|
||||
int hdr_size_tmp;
|
||||
VP8LHashChain hash_chain_histogram; // histogram image hash chain
|
||||
size_t bw_size_best = ~(size_t)0;
|
||||
assert(histogram_bits >= MIN_HUFFMAN_BITS);
|
||||
assert(histogram_bits <= MAX_HUFFMAN_BITS);
|
||||
assert(histogram_bits_in >= MIN_HUFFMAN_BITS);
|
||||
assert(histogram_bits_in <= MAX_HUFFMAN_BITS);
|
||||
assert(hdr_size != NULL);
|
||||
assert(data_size != NULL);
|
||||
|
||||
@ -857,7 +882,7 @@ static int EncodeImageInternal(
|
||||
}
|
||||
|
||||
// Make sure we can allocate the different objects.
|
||||
if (huff_tree == NULL || histogram_symbols == NULL ||
|
||||
if (huff_tree == NULL || histogram_argb == NULL ||
|
||||
!VP8LHashChainInit(&hash_chain_histogram, histogram_image_xysize)) {
|
||||
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
goto Error;
|
||||
@ -899,6 +924,7 @@ static int EncodeImageInternal(
|
||||
|
||||
for (i_cache = 0; i_cache < (sub_config->do_no_cache_ ? 2 : 1); ++i_cache) {
|
||||
const int cache_bits_tmp = (i_cache == 0) ? cache_bits_best : 0;
|
||||
int histogram_bits = histogram_bits_in;
|
||||
// Speed-up: no need to study the no-cache case if it was already studied
|
||||
// in i_cache == 0.
|
||||
if (i_cache == 1 && cache_bits_best == 0) break;
|
||||
@ -920,7 +946,7 @@ static int EncodeImageInternal(
|
||||
if (!VP8LGetHistoImageSymbols(
|
||||
width, height, &refs_array[i_cache], quality, low_effort,
|
||||
histogram_bits, cache_bits_tmp, histogram_image, tmp_histo,
|
||||
histogram_symbols, pic, i_percent_range, percent)) {
|
||||
histogram_argb, pic, i_percent_range, percent)) {
|
||||
goto Error;
|
||||
}
|
||||
// Create Huffman bit lengths and codes for each histogram image.
|
||||
@ -953,26 +979,19 @@ static int EncodeImageInternal(
|
||||
}
|
||||
|
||||
// Huffman image + meta huffman.
|
||||
histogram_image_size = 0;
|
||||
for (i = 0; i < histogram_image_xysize; ++i) {
|
||||
if (histogram_argb[i] >= histogram_image_size) {
|
||||
histogram_image_size = histogram_argb[i] + 1;
|
||||
}
|
||||
histogram_argb[i] <<= 8;
|
||||
}
|
||||
|
||||
write_histogram_image = (histogram_image_size > 1);
|
||||
VP8LPutBits(bw, write_histogram_image, 1);
|
||||
if (write_histogram_image) {
|
||||
uint32_t* const histogram_argb = (uint32_t*)WebPSafeMalloc(
|
||||
histogram_image_xysize, sizeof(*histogram_argb));
|
||||
int max_index = 0;
|
||||
uint32_t i;
|
||||
if (histogram_argb == NULL) {
|
||||
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
|
||||
goto Error;
|
||||
}
|
||||
for (i = 0; i < histogram_image_xysize; ++i) {
|
||||
const int symbol_index = histogram_symbols[i] & 0xffff;
|
||||
histogram_argb[i] = (symbol_index << 8);
|
||||
if (symbol_index >= max_index) {
|
||||
max_index = symbol_index + 1;
|
||||
}
|
||||
}
|
||||
histogram_image_size = max_index;
|
||||
|
||||
VP8LOptimizeSampling(histogram_argb, width, height, histogram_bits_in,
|
||||
MAX_HUFFMAN_BITS, &histogram_bits);
|
||||
VP8LPutBits(bw, histogram_bits - 2, 3);
|
||||
i_percent_range = i_remaining_percent / 2;
|
||||
i_remaining_percent -= i_percent_range;
|
||||
@ -981,15 +1000,12 @@ static int EncodeImageInternal(
|
||||
VP8LSubSampleSize(width, histogram_bits),
|
||||
VP8LSubSampleSize(height, histogram_bits), quality, low_effort,
|
||||
pic, i_percent_range, percent)) {
|
||||
WebPSafeFree(histogram_argb);
|
||||
goto Error;
|
||||
}
|
||||
WebPSafeFree(histogram_argb);
|
||||
}
|
||||
|
||||
// Store Huffman codes.
|
||||
{
|
||||
int i;
|
||||
int max_tokens = 0;
|
||||
// Find maximum number of symbols for the huffman tree-set.
|
||||
for (i = 0; i < 5 * histogram_image_size; ++i) {
|
||||
@ -1012,7 +1028,7 @@ static int EncodeImageInternal(
|
||||
// Store actual literals.
|
||||
hdr_size_tmp = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position);
|
||||
if (!StoreImageToBitMask(bw, width, histogram_bits, &refs_array[i_cache],
|
||||
histogram_symbols, huffman_codes, pic)) {
|
||||
histogram_argb, huffman_codes, pic)) {
|
||||
goto Error;
|
||||
}
|
||||
// Keep track of the smallest image so far.
|
||||
@ -1049,7 +1065,7 @@ static int EncodeImageInternal(
|
||||
WebPSafeFree(huffman_codes->codes);
|
||||
WebPSafeFree(huffman_codes);
|
||||
}
|
||||
WebPSafeFree(histogram_symbols);
|
||||
WebPSafeFree(histogram_argb);
|
||||
VP8LBitWriterWipeOut(&bw_best);
|
||||
return (pic->error_code == VP8_ENC_OK);
|
||||
}
|
||||
@ -1064,54 +1080,60 @@ static void ApplySubtractGreen(VP8LEncoder* const enc, int width, int height,
|
||||
VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height);
|
||||
}
|
||||
|
||||
static int ApplyPredictFilter(const VP8LEncoder* const enc, int width,
|
||||
int height, int quality, int low_effort,
|
||||
static int ApplyPredictFilter(VP8LEncoder* const enc, int width, int height,
|
||||
int quality, int low_effort,
|
||||
int used_subtract_green, VP8LBitWriter* const bw,
|
||||
int percent_range, int* const percent) {
|
||||
const int pred_bits = enc->transform_bits_;
|
||||
const int transform_width = VP8LSubSampleSize(width, pred_bits);
|
||||
const int transform_height = VP8LSubSampleSize(height, pred_bits);
|
||||
// we disable near-lossless quantization if palette is used.
|
||||
int best_bits;
|
||||
const int near_lossless_strength =
|
||||
enc->use_palette_ ? 100 : enc->config_->near_lossless;
|
||||
const int max_bits = ClampBits(width, height, enc->predictor_transform_bits_,
|
||||
MIN_TRANSFORM_BITS, MAX_TRANSFORM_BITS,
|
||||
MAX_PREDICTOR_IMAGE_SIZE);
|
||||
const int min_bits = ClampBits(
|
||||
width, height,
|
||||
max_bits - 2 * (enc->config_->method > 4 ? enc->config_->method - 4 : 0),
|
||||
MIN_TRANSFORM_BITS, MAX_TRANSFORM_BITS, MAX_PREDICTOR_IMAGE_SIZE);
|
||||
|
||||
if (!VP8LResidualImage(
|
||||
width, height, pred_bits, low_effort, enc->argb_, enc->argb_scratch_,
|
||||
enc->transform_data_, near_lossless_strength, enc->config_->exact,
|
||||
used_subtract_green, enc->pic_, percent_range / 2, percent)) {
|
||||
if (!VP8LResidualImage(width, height, min_bits, max_bits, low_effort,
|
||||
enc->argb_, enc->argb_scratch_, enc->transform_data_,
|
||||
near_lossless_strength, enc->config_->exact,
|
||||
used_subtract_green, enc->pic_, percent_range / 2,
|
||||
percent, &best_bits)) {
|
||||
return 0;
|
||||
}
|
||||
VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
|
||||
VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2);
|
||||
assert(pred_bits >= 2);
|
||||
VP8LPutBits(bw, pred_bits - 2, 3);
|
||||
assert(best_bits >= MIN_TRANSFORM_BITS && best_bits <= MAX_TRANSFORM_BITS);
|
||||
VP8LPutBits(bw, best_bits - MIN_TRANSFORM_BITS, NUM_TRANSFORM_BITS);
|
||||
enc->predictor_transform_bits_ = best_bits;
|
||||
return EncodeImageNoHuffman(
|
||||
bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_,
|
||||
(VP8LBackwardRefs*)&enc->refs_[0], transform_width, transform_height,
|
||||
bw, enc->transform_data_, &enc->hash_chain_, &enc->refs_[0],
|
||||
VP8LSubSampleSize(width, best_bits), VP8LSubSampleSize(height, best_bits),
|
||||
quality, low_effort, enc->pic_, percent_range - percent_range / 2,
|
||||
percent);
|
||||
}
|
||||
|
||||
static int ApplyCrossColorFilter(const VP8LEncoder* const enc, int width,
|
||||
int height, int quality, int low_effort,
|
||||
static int ApplyCrossColorFilter(VP8LEncoder* const enc, int width, int height,
|
||||
int quality, int low_effort,
|
||||
VP8LBitWriter* const bw, int percent_range,
|
||||
int* const percent) {
|
||||
const int ccolor_transform_bits = enc->transform_bits_;
|
||||
const int transform_width = VP8LSubSampleSize(width, ccolor_transform_bits);
|
||||
const int transform_height = VP8LSubSampleSize(height, ccolor_transform_bits);
|
||||
const int min_bits = enc->cross_color_transform_bits_;
|
||||
int best_bits;
|
||||
|
||||
if (!VP8LColorSpaceTransform(width, height, ccolor_transform_bits, quality,
|
||||
enc->argb_, enc->transform_data_, enc->pic_,
|
||||
percent_range / 2, percent)) {
|
||||
if (!VP8LColorSpaceTransform(width, height, min_bits, quality, enc->argb_,
|
||||
enc->transform_data_, enc->pic_,
|
||||
percent_range / 2, percent, &best_bits)) {
|
||||
return 0;
|
||||
}
|
||||
VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
|
||||
VP8LPutBits(bw, CROSS_COLOR_TRANSFORM, 2);
|
||||
assert(ccolor_transform_bits >= 2);
|
||||
VP8LPutBits(bw, ccolor_transform_bits - 2, 3);
|
||||
assert(best_bits >= MIN_TRANSFORM_BITS && best_bits <= MAX_TRANSFORM_BITS);
|
||||
VP8LPutBits(bw, best_bits - MIN_TRANSFORM_BITS, NUM_TRANSFORM_BITS);
|
||||
enc->cross_color_transform_bits_ = best_bits;
|
||||
return EncodeImageNoHuffman(
|
||||
bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_,
|
||||
(VP8LBackwardRefs*)&enc->refs_[0], transform_width, transform_height,
|
||||
bw, enc->transform_data_, &enc->hash_chain_, &enc->refs_[0],
|
||||
VP8LSubSampleSize(width, best_bits), VP8LSubSampleSize(height, best_bits),
|
||||
quality, low_effort, enc->pic_, percent_range - percent_range / 2,
|
||||
percent);
|
||||
}
|
||||
@ -1199,8 +1221,8 @@ static int AllocateTransformBuffer(VP8LEncoder* const enc, int width,
|
||||
: 0;
|
||||
const uint64_t transform_data_size =
|
||||
(enc->use_predict_ || enc->use_cross_color_)
|
||||
? (uint64_t)VP8LSubSampleSize(width, enc->transform_bits_) *
|
||||
VP8LSubSampleSize(height, enc->transform_bits_)
|
||||
? (uint64_t)VP8LSubSampleSize(width, MIN_TRANSFORM_BITS) *
|
||||
VP8LSubSampleSize(height, MIN_TRANSFORM_BITS)
|
||||
: 0;
|
||||
const uint64_t max_alignment_in_words =
|
||||
(WEBP_ALIGN_CST + sizeof(uint32_t) - 1) / sizeof(uint32_t);
|
||||
@ -1374,13 +1396,11 @@ static int ApplyPalette(const uint32_t* src, uint32_t src_stride, uint32_t* dst,
|
||||
#undef APPLY_PALETTE_GREEDY_MAX
|
||||
|
||||
// Note: Expects "enc->palette_" to be set properly.
|
||||
static int MapImageFromPalette(VP8LEncoder* const enc, int in_place) {
|
||||
static int MapImageFromPalette(VP8LEncoder* const enc) {
|
||||
const WebPPicture* const pic = enc->pic_;
|
||||
const int width = pic->width;
|
||||
const int height = pic->height;
|
||||
const uint32_t* const palette = enc->palette_;
|
||||
const uint32_t* src = in_place ? enc->argb_ : pic->argb;
|
||||
const int src_stride = in_place ? enc->current_width_ : pic->argb_stride;
|
||||
const int palette_size = enc->palette_size_;
|
||||
int xbits;
|
||||
|
||||
@ -1395,9 +1415,9 @@ static int MapImageFromPalette(VP8LEncoder* const enc, int in_place) {
|
||||
if (!AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height)) {
|
||||
return 0;
|
||||
}
|
||||
if (!ApplyPalette(src, src_stride,
|
||||
enc->argb_, enc->current_width_,
|
||||
palette, palette_size, width, height, xbits, pic)) {
|
||||
if (!ApplyPalette(pic->argb, pic->argb_stride, enc->argb_,
|
||||
enc->current_width_, palette, palette_size, width, height,
|
||||
xbits, pic)) {
|
||||
return 0;
|
||||
}
|
||||
enc->argb_content_ = kEncoderPalette;
|
||||
@ -1405,24 +1425,31 @@ static int MapImageFromPalette(VP8LEncoder* const enc, int in_place) {
|
||||
}
|
||||
|
||||
// Save palette_[] to bitstream.
|
||||
static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, int low_effort,
|
||||
VP8LEncoder* const enc,
|
||||
int percent_range, int* const percent) {
|
||||
static int EncodePalette(VP8LBitWriter* const bw, int low_effort,
|
||||
VP8LEncoder* const enc, int percent_range,
|
||||
int* const percent) {
|
||||
int i;
|
||||
uint32_t tmp_palette[MAX_PALETTE_SIZE];
|
||||
const int palette_size = enc->palette_size_;
|
||||
const uint32_t* const palette = enc->palette_;
|
||||
// If the last element is 0, do not store it and count on automatic palette
|
||||
// 0-filling. This can only happen if there is no pixel packing, hence if
|
||||
// there are strictly more than 16 colors (after 0 is removed).
|
||||
const uint32_t encoded_palette_size =
|
||||
(enc->palette_[palette_size - 1] == 0 && palette_size > 17)
|
||||
? palette_size - 1
|
||||
: palette_size;
|
||||
VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
|
||||
VP8LPutBits(bw, COLOR_INDEXING_TRANSFORM, 2);
|
||||
assert(palette_size >= 1 && palette_size <= MAX_PALETTE_SIZE);
|
||||
VP8LPutBits(bw, palette_size - 1, 8);
|
||||
for (i = palette_size - 1; i >= 1; --i) {
|
||||
VP8LPutBits(bw, encoded_palette_size - 1, 8);
|
||||
for (i = encoded_palette_size - 1; i >= 1; --i) {
|
||||
tmp_palette[i] = VP8LSubPixels(palette[i], palette[i - 1]);
|
||||
}
|
||||
tmp_palette[0] = palette[0];
|
||||
return EncodeImageNoHuffman(bw, tmp_palette, &enc->hash_chain_,
|
||||
&enc->refs_[0], palette_size, 1, /*quality=*/20,
|
||||
low_effort, enc->pic_, percent_range, percent);
|
||||
return EncodeImageNoHuffman(
|
||||
bw, tmp_palette, &enc->hash_chain_, &enc->refs_[0], encoded_palette_size,
|
||||
1, /*quality=*/20, low_effort, enc->pic_, percent_range, percent);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
@ -1493,7 +1520,6 @@ static int EncodeStreamHook(void* input, void* data2) {
|
||||
#endif
|
||||
int hdr_size = 0;
|
||||
int data_size = 0;
|
||||
int use_delta_palette = 0;
|
||||
int idx;
|
||||
size_t best_size = ~(size_t)0;
|
||||
VP8LBitWriter bw_init = *bw, bw_best;
|
||||
@ -1558,45 +1584,43 @@ static int EncodeStreamHook(void* input, void* data2) {
|
||||
goto Error;
|
||||
}
|
||||
remaining_percent -= percent_range;
|
||||
if (!MapImageFromPalette(enc, use_delta_palette)) goto Error;
|
||||
if (!MapImageFromPalette(enc)) goto Error;
|
||||
// If using a color cache, do not have it bigger than the number of
|
||||
// colors.
|
||||
if (enc->palette_size_ < (1 << MAX_COLOR_CACHE_BITS)) {
|
||||
enc->cache_bits_ = BitsLog2Floor(enc->palette_size_) + 1;
|
||||
}
|
||||
}
|
||||
if (!use_delta_palette) {
|
||||
// In case image is not packed.
|
||||
if (enc->argb_content_ != kEncoderNearLossless &&
|
||||
enc->argb_content_ != kEncoderPalette) {
|
||||
if (!MakeInputImageCopy(enc)) goto Error;
|
||||
}
|
||||
// In case image is not packed.
|
||||
if (enc->argb_content_ != kEncoderNearLossless &&
|
||||
enc->argb_content_ != kEncoderPalette) {
|
||||
if (!MakeInputImageCopy(enc)) goto Error;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Apply transforms and write transform data.
|
||||
// -------------------------------------------------------------------------
|
||||
// Apply transforms and write transform data.
|
||||
|
||||
if (enc->use_subtract_green_) {
|
||||
ApplySubtractGreen(enc, enc->current_width_, height, bw);
|
||||
}
|
||||
if (enc->use_subtract_green_) {
|
||||
ApplySubtractGreen(enc, enc->current_width_, height, bw);
|
||||
}
|
||||
|
||||
if (enc->use_predict_) {
|
||||
percent_range = remaining_percent / 3;
|
||||
if (!ApplyPredictFilter(enc, enc->current_width_, height, quality,
|
||||
low_effort, enc->use_subtract_green_, bw,
|
||||
percent_range, &percent)) {
|
||||
goto Error;
|
||||
}
|
||||
remaining_percent -= percent_range;
|
||||
if (enc->use_predict_) {
|
||||
percent_range = remaining_percent / 3;
|
||||
if (!ApplyPredictFilter(enc, enc->current_width_, height, quality,
|
||||
low_effort, enc->use_subtract_green_, bw,
|
||||
percent_range, &percent)) {
|
||||
goto Error;
|
||||
}
|
||||
remaining_percent -= percent_range;
|
||||
}
|
||||
|
||||
if (enc->use_cross_color_) {
|
||||
percent_range = remaining_percent / 2;
|
||||
if (!ApplyCrossColorFilter(enc, enc->current_width_, height, quality,
|
||||
low_effort, bw, percent_range, &percent)) {
|
||||
goto Error;
|
||||
}
|
||||
remaining_percent -= percent_range;
|
||||
if (enc->use_cross_color_) {
|
||||
percent_range = remaining_percent / 2;
|
||||
if (!ApplyCrossColorFilter(enc, enc->current_width_, height, quality,
|
||||
low_effort, bw, percent_range, &percent)) {
|
||||
goto Error;
|
||||
}
|
||||
remaining_percent -= percent_range;
|
||||
}
|
||||
|
||||
VP8LPutBits(bw, !TRANSFORM_PRESENT, 1); // No more transforms.
|
||||
@ -1625,7 +1649,8 @@ static int EncodeStreamHook(void* input, void* data2) {
|
||||
if (enc->use_subtract_green_) stats->lossless_features |= 4;
|
||||
if (enc->use_palette_) stats->lossless_features |= 8;
|
||||
stats->histogram_bits = enc->histo_bits_;
|
||||
stats->transform_bits = enc->transform_bits_;
|
||||
stats->transform_bits = enc->predictor_transform_bits_;
|
||||
stats->cross_color_transform_bits = enc->cross_color_transform_bits_;
|
||||
stats->cache_bits = enc->cache_bits_;
|
||||
stats->palette_size = enc->palette_size_;
|
||||
stats->lossless_size = (int)(best_size - byte_position);
|
||||
@ -1735,7 +1760,10 @@ int VP8LEncodeStream(const WebPConfig* const config,
|
||||
}
|
||||
// Copy the values that were computed for the main encoder.
|
||||
enc_side->histo_bits_ = enc_main->histo_bits_;
|
||||
enc_side->transform_bits_ = enc_main->transform_bits_;
|
||||
enc_side->predictor_transform_bits_ =
|
||||
enc_main->predictor_transform_bits_;
|
||||
enc_side->cross_color_transform_bits_ =
|
||||
enc_main->cross_color_transform_bits_;
|
||||
enc_side->palette_size_ = enc_main->palette_size_;
|
||||
memcpy(enc_side->palette_, enc_main->palette_,
|
||||
sizeof(enc_main->palette_));
|
||||
|
22
thirdparty/libwebp/src/enc/vp8li_enc.h
vendored
22
thirdparty/libwebp/src/enc/vp8li_enc.h
vendored
@ -34,7 +34,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
// maximum value of transform_bits_ in VP8LEncoder.
|
||||
#define MAX_TRANSFORM_BITS 6
|
||||
#define MAX_TRANSFORM_BITS (MIN_TRANSFORM_BITS + (1 << NUM_TRANSFORM_BITS) - 1)
|
||||
|
||||
typedef enum {
|
||||
kEncoderNone = 0,
|
||||
@ -59,7 +59,8 @@ typedef struct {
|
||||
|
||||
// Encoding parameters derived from quality parameter.
|
||||
int histo_bits_;
|
||||
int transform_bits_; // <= MAX_TRANSFORM_BITS.
|
||||
int predictor_transform_bits_; // <= MAX_TRANSFORM_BITS
|
||||
int cross_color_transform_bits_; // <= MAX_TRANSFORM_BITS
|
||||
int cache_bits_; // If equal to 0, don't use color cache.
|
||||
|
||||
// Encoding parameters derived from image characteristics.
|
||||
@ -104,16 +105,21 @@ int VP8ApplyNearLossless(const WebPPicture* const picture, int quality,
|
||||
|
||||
// pic and percent are for progress.
|
||||
// Returns false in case of error (stored in pic->error_code).
|
||||
int VP8LResidualImage(int width, int height, int bits, int low_effort,
|
||||
uint32_t* const argb, uint32_t* const argb_scratch,
|
||||
uint32_t* const image, int near_lossless, int exact,
|
||||
int used_subtract_green, const WebPPicture* const pic,
|
||||
int percent_range, int* const percent);
|
||||
int VP8LResidualImage(int width, int height, int min_bits, int max_bits,
|
||||
int low_effort, uint32_t* const argb,
|
||||
uint32_t* const argb_scratch, uint32_t* const image,
|
||||
int near_lossless, int exact, int used_subtract_green,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent, int* const best_bits);
|
||||
|
||||
int VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
||||
uint32_t* const argb, uint32_t* image,
|
||||
const WebPPicture* const pic, int percent_range,
|
||||
int* const percent);
|
||||
int* const percent, int* const best_bits);
|
||||
|
||||
void VP8LOptimizeSampling(uint32_t* const image, int full_width,
|
||||
int full_height, int bits, int max_bits,
|
||||
int* best_bits_out);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
|
3
thirdparty/libwebp/src/mux/anim_encode.c
vendored
3
thirdparty/libwebp/src/mux/anim_encode.c
vendored
@ -191,7 +191,8 @@ int WebPAnimEncoderOptionsInitInternal(WebPAnimEncoderOptions* enc_options,
|
||||
return 1;
|
||||
}
|
||||
|
||||
// This starting value is more fit to WebPCleanupTransparentAreaLossless().
|
||||
// This value is used to match a later call to WebPReplaceTransparentPixels(),
|
||||
// making it a no-op for lossless (see WebPEncode()).
|
||||
#define TRANSPARENT_COLOR 0x00000000
|
||||
|
||||
static void ClearRectangle(WebPPicture* const picture,
|
||||
|
2
thirdparty/libwebp/src/mux/muxi.h
vendored
2
thirdparty/libwebp/src/mux/muxi.h
vendored
@ -28,7 +28,7 @@ extern "C" {
|
||||
// Defines and constants.
|
||||
|
||||
#define MUX_MAJ_VERSION 1
|
||||
#define MUX_MIN_VERSION 4
|
||||
#define MUX_MIN_VERSION 5
|
||||
#define MUX_REV_VERSION 0
|
||||
|
||||
// Chunk object.
|
||||
|
10
thirdparty/libwebp/src/mux/muxread.c
vendored
10
thirdparty/libwebp/src/mux/muxread.c
vendored
@ -223,11 +223,13 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
|
||||
// Note this padding is historical and differs from demux.c which does not
|
||||
// pad the file size.
|
||||
riff_size = SizeWithPadding(riff_size);
|
||||
if (riff_size < CHUNK_HEADER_SIZE) goto Err;
|
||||
// Make sure the whole RIFF header is available.
|
||||
if (riff_size < RIFF_HEADER_SIZE) goto Err;
|
||||
if (riff_size > size) goto Err;
|
||||
// There's no point in reading past the end of the RIFF chunk.
|
||||
if (size > riff_size + CHUNK_HEADER_SIZE) {
|
||||
size = riff_size + CHUNK_HEADER_SIZE;
|
||||
// There's no point in reading past the end of the RIFF chunk. Note riff_size
|
||||
// includes CHUNK_HEADER_SIZE after SizeWithPadding().
|
||||
if (size > riff_size) {
|
||||
size = riff_size;
|
||||
}
|
||||
|
||||
end = data + size;
|
||||
|
@ -124,7 +124,8 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int bits,
|
||||
|
||||
#if defined(__arm__) || defined(_M_ARM) || WEBP_AARCH64 || \
|
||||
defined(__i386__) || defined(_M_IX86) || \
|
||||
defined(__x86_64__) || defined(_M_X64)
|
||||
defined(__x86_64__) || defined(_M_X64) || \
|
||||
defined(__wasm__)
|
||||
#define VP8L_USE_FAST_LOAD
|
||||
#endif
|
||||
|
||||
|
@ -69,6 +69,8 @@ extern "C" {
|
||||
#define BITS 56
|
||||
#elif defined(__mips__) // MIPS
|
||||
#define BITS 24
|
||||
#elif defined(__wasm__) // WASM
|
||||
#define BITS 56
|
||||
#else // reasonable default
|
||||
#define BITS 24
|
||||
#endif
|
||||
|
15
thirdparty/libwebp/src/utils/palette.c
vendored
15
thirdparty/libwebp/src/utils/palette.c
vendored
@ -191,6 +191,12 @@ static void PaletteSortMinimizeDeltas(const uint32_t* const palette_sorted,
|
||||
// Find greedily always the closest color of the predicted color to minimize
|
||||
// deltas in the palette. This reduces storage needs since the
|
||||
// palette is stored with delta encoding.
|
||||
if (num_colors > 17) {
|
||||
if (palette[0] == 0) {
|
||||
--num_colors;
|
||||
SwapColor(&palette[num_colors], &palette[0]);
|
||||
}
|
||||
}
|
||||
for (i = 0; i < num_colors; ++i) {
|
||||
int best_ix = i;
|
||||
uint32_t best_score = ~0U;
|
||||
@ -384,8 +390,13 @@ int PaletteSort(PaletteSorting method, const struct WebPPicture* const pic,
|
||||
uint32_t* const palette) {
|
||||
switch (method) {
|
||||
case kSortedDefault:
|
||||
// Nothing to do, we have already sorted the palette.
|
||||
memcpy(palette, palette_sorted, num_colors * sizeof(*palette));
|
||||
if (palette_sorted[0] == 0 && num_colors > 17) {
|
||||
memcpy(palette, palette_sorted + 1,
|
||||
(num_colors - 1) * sizeof(*palette_sorted));
|
||||
palette[num_colors - 1] = 0;
|
||||
} else {
|
||||
memcpy(palette, palette_sorted, num_colors * sizeof(*palette));
|
||||
}
|
||||
return 1;
|
||||
case kMinimizeDelta:
|
||||
PaletteSortMinimizeDeltas(palette_sorted, num_colors, palette);
|
||||
|
2
thirdparty/libwebp/src/utils/palette.h
vendored
2
thirdparty/libwebp/src/utils/palette.h
vendored
@ -53,6 +53,8 @@ int GetColorPalette(const struct WebPPicture* const pic,
|
||||
// Sorts the palette according to the criterion defined by 'method'.
|
||||
// 'palette_sorted' is the input palette sorted lexicographically, as done in
|
||||
// PrepareMapToPalette. Returns 0 on memory allocation error.
|
||||
// For kSortedDefault and kMinimizeDelta methods, 0 (if present) is set as the
|
||||
// last element to optimize later storage.
|
||||
int PaletteSort(PaletteSorting method, const struct WebPPicture* const pic,
|
||||
const uint32_t* const palette_sorted, uint32_t num_colors,
|
||||
uint32_t* const palette);
|
||||
|
9
thirdparty/libwebp/src/webp/encode.h
vendored
9
thirdparty/libwebp/src/webp/encode.h
vendored
@ -20,7 +20,7 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define WEBP_ENCODER_ABI_VERSION 0x020f // MAJOR(8b) + MINOR(8b)
|
||||
#define WEBP_ENCODER_ABI_VERSION 0x0210 // MAJOR(8b) + MINOR(8b)
|
||||
|
||||
// Note: forward declaring enumerations is not allowed in (strict) C and C++,
|
||||
// the types are left here for reference.
|
||||
@ -145,7 +145,7 @@ struct WebPConfig {
|
||||
// RGB information for better compression. The default
|
||||
// value is 0.
|
||||
|
||||
int use_delta_palette; // reserved for future lossless feature
|
||||
int use_delta_palette; // reserved
|
||||
int use_sharp_yuv; // if needed, use sharp (and slow) RGB->YUV conversion
|
||||
|
||||
int qmin; // minimum permissible quality factor
|
||||
@ -224,14 +224,15 @@ struct WebPAuxStats {
|
||||
uint32_t lossless_features; // bit0:predictor bit1:cross-color transform
|
||||
// bit2:subtract-green bit3:color indexing
|
||||
int histogram_bits; // number of precision bits of histogram
|
||||
int transform_bits; // precision bits for transform
|
||||
int transform_bits; // precision bits for predictor transform
|
||||
int cache_bits; // number of bits for color cache lookup
|
||||
int palette_size; // number of color in palette, if used
|
||||
int lossless_size; // final lossless size
|
||||
int lossless_hdr_size; // lossless header (transform, huffman etc) size
|
||||
int lossless_data_size; // lossless image data size
|
||||
int cross_color_transform_bits; // precision bits for cross-color transform
|
||||
|
||||
uint32_t pad[2]; // padding for later use
|
||||
uint32_t pad[1]; // padding for later use
|
||||
};
|
||||
|
||||
// Signature for output function. Should return true if writing was successful.
|
||||
|
@ -46,7 +46,12 @@
|
||||
#define CODE_LENGTH_CODES 19
|
||||
|
||||
#define MIN_HUFFMAN_BITS 2 // min number of Huffman bits
|
||||
#define MAX_HUFFMAN_BITS 9 // max number of Huffman bits
|
||||
#define NUM_HUFFMAN_BITS 3
|
||||
|
||||
// the maximum number of bits defining a transform is
|
||||
// MIN_TRANSFORM_BITS + (1 << NUM_TRANSFORM_BITS) - 1
|
||||
#define MIN_TRANSFORM_BITS 2
|
||||
#define NUM_TRANSFORM_BITS 3
|
||||
|
||||
#define TRANSFORM_PRESENT 1 // The bit to be written when next data
|
||||
// to be read is a transform.
|
||||
|
4
thirdparty/libwebp/src/webp/types.h
vendored
4
thirdparty/libwebp/src/webp/types.h
vendored
@ -38,11 +38,11 @@ typedef long long int int64_t;
|
||||
|
||||
#ifndef WEBP_NODISCARD
|
||||
#if defined(WEBP_ENABLE_NODISCARD) && WEBP_ENABLE_NODISCARD
|
||||
#if (defined(__cplusplus) && __cplusplus >= 201700L) || \
|
||||
#if (defined(__cplusplus) && __cplusplus >= 201703L) || \
|
||||
(defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L)
|
||||
#define WEBP_NODISCARD [[nodiscard]]
|
||||
#else
|
||||
// gcc's __has_attribute does not work for enums.
|
||||
// gcc's __attribute__((warn_unused_result)) does not work for enums.
|
||||
#if defined(__clang__) && defined(__has_attribute)
|
||||
#if __has_attribute(warn_unused_result)
|
||||
#define WEBP_NODISCARD __attribute__((warn_unused_result))
|
||||
|
Loading…
x
Reference in New Issue
Block a user