Bug 1891459 - Update dav1d to 5b5399911dd24703de641d65eda5b7f1e845d060 r=chunmin

Differential Revision: https://phabricator.services.mozilla.com/D207425
This commit is contained in:
Updatebot 2024-04-16 16:40:31 +00:00
parent be484e5383
commit 16eb058401
25 changed files with 6745 additions and 2663 deletions

View file

@ -211,6 +211,7 @@ elif CONFIG['TARGET_CPU'] == 'arm' or CONFIG['TARGET_CPU'] == 'aarch64':
'../../../third_party/dav1d/src/arm/64/looprestoration_tmpl.S',
'../../../third_party/dav1d/src/arm/64/mc.S',
'../../../third_party/dav1d/src/arm/64/mc16.S',
'../../../third_party/dav1d/src/arm/64/mc_dotprod.S',
'../../../third_party/dav1d/src/arm/64/msac.S',
'../../../third_party/dav1d/src/arm/64/refmvs.S',
]

View file

@ -20,11 +20,11 @@ origin:
# Human-readable identifier for this version/release
# Generally "version NNN", "tag SSS", "bookmark SSS"
release: 8e08426468a76d8a667e8a79d92bafd85d7411ac (2024-03-18T20:50:37.000+00:00).
release: 5b5399911dd24703de641d65eda5b7f1e845d060 (2024-04-15T13:19:42.000+02:00).
# Revision to pull in
# Must be a long or short commit SHA (long preferred)
revision: 8e08426468a76d8a667e8a79d92bafd85d7411ac
revision: 5b5399911dd24703de641d65eda5b7f1e845d060
# The package's license, where possible using the mnemonic from
# https://spdx.org/licenses/

View file

@ -1,2 +1,2 @@
/* auto-generated, do not edit */
#define DAV1D_VERSION "8e08426468a76d8a667e8a79d92bafd85d7411ac"
#define DAV1D_VERSION "5b5399911dd24703de641d65eda5b7f1e845d060"

View file

@ -81,6 +81,8 @@ cdata.set10('TRIM_DSP_FUNCTIONS', get_option('trim_dsp') == 'true' or
# Logging option
cdata.set10('CONFIG_LOG', get_option('logging'))
cdata.set10('CONFIG_MACOS_KPERF', get_option('macos_kperf'))
#
# OS/Compiler checks and defines
#

View file

@ -68,3 +68,8 @@ option('trim_dsp',
choices: ['true', 'false', 'if-release'],
value: 'if-release',
description: 'Eliminate redundant DSP functions where possible')
option('macos_kperf',
type: 'boolean',
value: false,
description: 'Use the private macOS kperf API for benchmarking')

View file

@ -837,7 +837,7 @@ endfunc
// This has got the same signature as the put_8tap functions,
// and assumes that x8 is set to (clz(w)-24).
function put_neon
function put_neon, export=1
adr x9, L(put_tbl)
ldrh w8, [x9, x8, lsl #1]
sub x9, x9, w8, uxtw
@ -939,7 +939,7 @@ endfunc
// This has got the same signature as the prep_8tap functions,
// and assumes that x8 is set to (clz(w)-24), and x7 to w*2.
function prep_neon
function prep_neon, export=1
adr x9, L(prep_tbl)
ldrh w8, [x9, x8, lsl #1]
sub x9, x9, w8, uxtw

1413
third_party/dav1d/src/arm/64/mc_dotprod.S vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -288,10 +288,8 @@ function msac_decode_hi_tok_neon, export=1
mvni v30.4h, #0x3f // 0xffc0
ldrh w9, [x1, #6] // count = cdf[n_symbols]
ld1r {v3.4h}, [x16] // rng
movrel x16, bits
ld1 {v29.4h}, [x17] // EC_MIN_PROB * (n_symbols - ret)
add x17, x0, #DIF + 6
ld1 {v16.8h}, [x16]
mov w13, #-24
and v17.8b, v0.8b, v30.8b // cdf & 0xffc0
ldr w10, [x0, #ALLOW_UPDATE_CDF]
@ -305,30 +303,27 @@ function msac_decode_hi_tok_neon, export=1
add v4.4h, v17.4h, v29.4h // v = cdf + EC_MIN_PROB * (n_symbols - ret)
add v4.4h, v6.4h, v4.4h // v = ((cdf >> EC_PROB_SHIFT) * r) >> 1 + EC_MIN_PROB * (n_symbols - ret)
str h3, [sp, #14] // store original u = s->rng
cmhs v2.8h, v1.8h, v4.8h // c >= v
cmhs v2.4h, v1.4h, v4.4h // c >= v
str q4, [sp, #16] // store v values to allow indexed access
and v6.16b, v2.16b, v16.16b // One bit per halfword set in the mask
addv h6, v6.8h // Aggregate mask bits
umov w3, v6.h[0]
addv h6, v2.4h // -4 + ret
add w13, w13, #5
rbit w3, w3
smov w15, v6.h[0]
add x8, sp, #16
clz w15, w3 // ret
add w15, w15, #4 // ret
cbz w10, 2f
// update_cdf
movi v5.8b, #0xff
sub v5.4h, v0.4h, v2.4h // cdf[i] + (i >= val ? 1 : 0)
mov w4, #-5
urhadd v4.4h, v5.4h, v2.4h // i >= val ? -1 : 32768
orr v2.4h, #0x80, lsl #8 // i >= val ? -1 : 32768
sub w4, w4, w9, lsr #4 // -((count >> 4) + 5)
sub v4.4h, v4.4h, v0.4h // (32768 - cdf[i]) or (-1 - cdf[i])
sub v4.4h, v2.4h, v0.4h // (32768 - cdf[i]) or (-1 - cdf[i])
dup v6.4h, w4 // -rate
sub w9, w9, w9, lsr #5 // count - (count == 32)
sub v0.4h, v0.4h, v2.4h // cdf + (i >= val ? 1 : 0)
sshl v4.4h, v4.4h, v6.4h // ({32768,-1} - cdf[i]) >> rate
add w9, w9, #1 // count + (count < 32)
add v0.4h, v0.4h, v4.4h // cdf + (32768 - cdf[i]) >> rate
add v0.4h, v5.4h, v4.4h // cdf[i] + (32768 - cdf[i]) >> rate
st1 {v0.4h}, [x1]
and v17.8b, v0.8b, v30.8b // cdf & 0xffc0
strh w9, [x1, #6]

View file

@ -28,34 +28,6 @@
#include "src/cpu.h"
#include "src/itx.h"
#define decl_itx2_fns(w, h, opt) \
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_identity_##w##x##h, opt))
#define decl_itx12_fns(w, h, opt) \
decl_itx2_fns(w, h, opt); \
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_adst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_flipadst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_identity_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_dct_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_adst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_flipadst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_dct_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_adst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_flipadst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_dct_##w##x##h, opt))
#define decl_itx16_fns(w, h, opt) \
decl_itx12_fns(w, h, opt); \
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_identity_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_identity_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_adst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_flipadst_##w##x##h, opt))
#define decl_itx17_fns(w, h, opt) \
decl_itx16_fns(w, h, opt); \
decl_itx_fn(BF(dav1d_inv_txfm_add_wht_wht_##w##x##h, opt))
decl_itx17_fns( 4, 4, neon);
decl_itx16_fns( 4, 8, neon);
decl_itx16_fns( 4, 16, neon);
@ -78,41 +50,6 @@ decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_64x32, neon));
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_64x64, neon));
static ALWAYS_INLINE void itx_dsp_init_arm(Dav1dInvTxfmDSPContext *const c, int bpc) {
#define assign_itx_fn(pfx, w, h, type, type_enum, ext) \
c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
#define assign_itx1_fn(pfx, w, h, ext) \
assign_itx_fn(pfx, w, h, dct_dct, DCT_DCT, ext)
#define assign_itx2_fn(pfx, w, h, ext) \
assign_itx1_fn(pfx, w, h, ext); \
assign_itx_fn(pfx, w, h, identity_identity, IDTX, ext)
#define assign_itx12_fn(pfx, w, h, ext) \
assign_itx2_fn(pfx, w, h, ext); \
assign_itx_fn(pfx, w, h, dct_adst, ADST_DCT, ext); \
assign_itx_fn(pfx, w, h, dct_flipadst, FLIPADST_DCT, ext); \
assign_itx_fn(pfx, w, h, dct_identity, H_DCT, ext); \
assign_itx_fn(pfx, w, h, adst_dct, DCT_ADST, ext); \
assign_itx_fn(pfx, w, h, adst_adst, ADST_ADST, ext); \
assign_itx_fn(pfx, w, h, adst_flipadst, FLIPADST_ADST, ext); \
assign_itx_fn(pfx, w, h, flipadst_dct, DCT_FLIPADST, ext); \
assign_itx_fn(pfx, w, h, flipadst_adst, ADST_FLIPADST, ext); \
assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
assign_itx_fn(pfx, w, h, identity_dct, V_DCT, ext)
#define assign_itx16_fn(pfx, w, h, ext) \
assign_itx12_fn(pfx, w, h, ext); \
assign_itx_fn(pfx, w, h, adst_identity, H_ADST, ext); \
assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST, ext); \
assign_itx_fn(pfx, w, h, identity_adst, V_ADST, ext); \
assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST, ext)
#define assign_itx17_fn(pfx, w, h, ext) \
assign_itx16_fn(pfx, w, h, ext); \
assign_itx_fn(pfx, w, h, wht_wht, WHT_WHT, ext)
const unsigned flags = dav1d_get_cpu_flags();
if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;

View file

@ -30,26 +30,40 @@
#include "src/mc.h"
#include "src/cpu.h"
decl_mc_fn(BF(dav1d_put_8tap_regular, neon));
decl_mc_fn(BF(dav1d_put_8tap_regular_smooth, neon));
decl_mc_fn(BF(dav1d_put_8tap_regular_sharp, neon));
decl_mc_fn(BF(dav1d_put_8tap_smooth, neon));
decl_mc_fn(BF(dav1d_put_8tap_smooth_regular, neon));
decl_mc_fn(BF(dav1d_put_8tap_smooth_sharp, neon));
decl_mc_fn(BF(dav1d_put_8tap_sharp, neon));
decl_mc_fn(BF(dav1d_put_8tap_sharp_regular, neon));
decl_mc_fn(BF(dav1d_put_8tap_sharp_smooth, neon));
decl_mc_fn(BF(dav1d_put_bilin, neon));
#define decl_8tap_gen(decl_name, fn_name, opt) \
decl_##decl_name##_fn(BF(dav1d_##fn_name##_8tap_regular, opt)); \
decl_##decl_name##_fn(BF(dav1d_##fn_name##_8tap_regular_smooth, opt)); \
decl_##decl_name##_fn(BF(dav1d_##fn_name##_8tap_regular_sharp, opt)); \
decl_##decl_name##_fn(BF(dav1d_##fn_name##_8tap_smooth_regular, opt)); \
decl_##decl_name##_fn(BF(dav1d_##fn_name##_8tap_smooth, opt)); \
decl_##decl_name##_fn(BF(dav1d_##fn_name##_8tap_smooth_sharp, opt)); \
decl_##decl_name##_fn(BF(dav1d_##fn_name##_8tap_sharp_regular, opt)); \
decl_##decl_name##_fn(BF(dav1d_##fn_name##_8tap_sharp_smooth, opt)); \
decl_##decl_name##_fn(BF(dav1d_##fn_name##_8tap_sharp, opt))
decl_mct_fn(BF(dav1d_prep_8tap_regular, neon));
decl_mct_fn(BF(dav1d_prep_8tap_regular_smooth, neon));
decl_mct_fn(BF(dav1d_prep_8tap_regular_sharp, neon));
decl_mct_fn(BF(dav1d_prep_8tap_smooth, neon));
decl_mct_fn(BF(dav1d_prep_8tap_smooth_regular, neon));
decl_mct_fn(BF(dav1d_prep_8tap_smooth_sharp, neon));
decl_mct_fn(BF(dav1d_prep_8tap_sharp, neon));
decl_mct_fn(BF(dav1d_prep_8tap_sharp_regular, neon));
decl_mct_fn(BF(dav1d_prep_8tap_sharp_smooth, neon));
#define decl_8tap_fns(opt) \
decl_8tap_gen(mc, put, opt); \
decl_8tap_gen(mct, prep, opt)
#define init_8tap_gen(name, opt) \
init_##name##_fn(FILTER_2D_8TAP_REGULAR, 8tap_regular, opt); \
init_##name##_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, opt); \
init_##name##_fn(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp, opt); \
init_##name##_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, opt); \
init_##name##_fn(FILTER_2D_8TAP_SMOOTH, 8tap_smooth, opt); \
init_##name##_fn(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp, opt); \
init_##name##_fn(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular, opt); \
init_##name##_fn(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth, opt); \
init_##name##_fn(FILTER_2D_8TAP_SHARP, 8tap_sharp, opt)
#define init_8tap_fns(opt) \
init_8tap_gen(mc, opt); \
init_8tap_gen(mct, opt)
decl_8tap_fns(neon);
decl_8tap_fns(neon_dotprod);
decl_mc_fn(BF(dav1d_put_bilin, neon));
decl_mct_fn(BF(dav1d_prep_bilin, neon));
decl_avg_fn(BF(dav1d_avg, neon));
@ -77,27 +91,10 @@ static ALWAYS_INLINE void mc_dsp_init_arm(Dav1dMCDSPContext *const c) {
if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
init_mc_fn (FILTER_2D_8TAP_REGULAR, 8tap_regular, neon);
init_mc_fn (FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, neon);
init_mc_fn (FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp, neon);
init_mc_fn (FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, neon);
init_mc_fn (FILTER_2D_8TAP_SMOOTH, 8tap_smooth, neon);
init_mc_fn (FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp, neon);
init_mc_fn (FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular, neon);
init_mc_fn (FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth, neon);
init_mc_fn (FILTER_2D_8TAP_SHARP, 8tap_sharp, neon);
init_mc_fn (FILTER_2D_BILINEAR, bilin, neon);
init_8tap_fns(neon);
init_mct_fn(FILTER_2D_8TAP_REGULAR, 8tap_regular, neon);
init_mct_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, neon);
init_mct_fn(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp, neon);
init_mct_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, neon);
init_mct_fn(FILTER_2D_8TAP_SMOOTH, 8tap_smooth, neon);
init_mct_fn(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp, neon);
init_mct_fn(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular, neon);
init_mct_fn(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth, neon);
init_mct_fn(FILTER_2D_8TAP_SHARP, 8tap_sharp, neon);
init_mct_fn(FILTER_2D_BILINEAR, bilin, neon);
init_mc_fn (FILTER_2D_BILINEAR, bilin, neon);
init_mct_fn(FILTER_2D_BILINEAR, bilin, neon);
c->avg = BF(dav1d_avg, neon);
c->w_avg = BF(dav1d_w_avg, neon);
@ -111,4 +108,12 @@ static ALWAYS_INLINE void mc_dsp_init_arm(Dav1dMCDSPContext *const c) {
c->warp8x8 = BF(dav1d_warp_affine_8x8, neon);
c->warp8x8t = BF(dav1d_warp_affine_8x8t, neon);
c->emu_edge = BF(dav1d_emu_edge, neon);
#if ARCH_AARCH64
#if HAVE_DOTPROD && BITDEPTH == 8
if (!(flags & DAV1D_ARM_CPU_FLAG_DOTPROD)) return;
init_8tap_fns(neon_dotprod);
#endif // HAVE_DOTPROD && BITDEPTH == 8
#endif // ARCH_AARCH64
}

File diff suppressed because it is too large Load diff

View file

@ -34,12 +34,10 @@
#include "src/ref.h"
#include "src/thread_data.h"
/* Buffers padded to [8] or [16] for SIMD where needed. */
/* Buffers padded to [4]/[8]/[16] for SIMD where needed. */
typedef struct CdfModeContext {
ALIGN(uint16_t y_mode[4][N_INTRA_PRED_MODES + 3], 32);
ALIGN(uint16_t uv_mode[2][N_INTRA_PRED_MODES][N_UV_INTRA_PRED_MODES + 2], 32);
ALIGN(uint16_t wedge_idx[9][16], 32);
ALIGN(uint16_t partition[N_BL_LEVELS][4][N_PARTITIONS + 6], 32);
ALIGN(uint16_t cfl_alpha[6][16], 32);
ALIGN(uint16_t txtp_inter1[2][16], 32);
@ -49,23 +47,33 @@ typedef struct CdfModeContext {
ALIGN(uint16_t cfl_sign[8], 16);
ALIGN(uint16_t angle_delta[8][8], 16);
ALIGN(uint16_t filter_intra[5 + 3], 16);
ALIGN(uint16_t comp_inter_mode[8][N_COMP_INTER_PRED_MODES], 16);
ALIGN(uint16_t seg_id[3][DAV1D_MAX_SEGMENTS], 16);
ALIGN(uint16_t pal_sz[2][7][7 + 1], 16);
ALIGN(uint16_t color_map[2][7][5][8], 16);
ALIGN(uint16_t filter[2][8][DAV1D_N_SWITCHABLE_FILTERS + 1], 8);
ALIGN(uint16_t txsz[N_TX_SIZES - 1][3][4], 8);
ALIGN(uint16_t motion_mode[N_BS_SIZES][3 + 1], 8);
ALIGN(uint16_t delta_q[4], 8);
ALIGN(uint16_t delta_lf[5][4], 8);
ALIGN(uint16_t interintra_mode[4][4], 8);
ALIGN(uint16_t restore_switchable[3 + 1], 8);
ALIGN(uint16_t restore_wiener[2], 4);
ALIGN(uint16_t restore_sgrproj[2], 4);
ALIGN(uint16_t interintra[7][2], 4);
ALIGN(uint16_t interintra_wedge[7][2], 4);
ALIGN(uint16_t txtp_inter3[4][2], 4);
ALIGN(uint16_t use_filter_intra[N_BS_SIZES][2], 4);
ALIGN(uint16_t txpart[7][3][2], 4);
ALIGN(uint16_t skip[3][2], 4);
ALIGN(uint16_t pal_y[7][3][2], 4);
ALIGN(uint16_t pal_uv[2][2], 4);
/* key/intra */
ALIGN(uint16_t intrabc[2], 4);
/* inter/switch */
ALIGN(uint16_t y_mode[4][N_INTRA_PRED_MODES + 3], 32);
ALIGN(uint16_t wedge_idx[9][16], 32);
ALIGN(uint16_t comp_inter_mode[8][N_COMP_INTER_PRED_MODES], 16);
ALIGN(uint16_t filter[2][8][DAV1D_N_SWITCHABLE_FILTERS + 1], 8);
ALIGN(uint16_t interintra_mode[4][4], 8);
ALIGN(uint16_t motion_mode[N_BS_SIZES][3 + 1], 8);
ALIGN(uint16_t skip_mode[3][2], 4);
ALIGN(uint16_t newmv_mode[6][2], 4);
ALIGN(uint16_t globalmv_mode[2][2], 4);
ALIGN(uint16_t refmv_mode[6][2], 4);
@ -80,14 +88,10 @@ typedef struct CdfModeContext {
ALIGN(uint16_t comp_fwd_ref[3][3][2], 4);
ALIGN(uint16_t comp_bwd_ref[2][3][2], 4);
ALIGN(uint16_t comp_uni_ref[3][3][2], 4);
ALIGN(uint16_t txpart[7][3][2], 4);
ALIGN(uint16_t skip[3][2], 4);
ALIGN(uint16_t skip_mode[3][2], 4);
ALIGN(uint16_t seg_pred[3][2], 4);
ALIGN(uint16_t interintra[7][2], 4);
ALIGN(uint16_t interintra_wedge[7][2], 4);
ALIGN(uint16_t obmc[N_BS_SIZES][2], 4);
ALIGN(uint16_t pal_y[7][3][2], 4);
ALIGN(uint16_t pal_uv[2][2], 4);
ALIGN(uint16_t intrabc[2], 4);
} CdfModeContext;
typedef struct CdfCoefContext {
@ -108,13 +112,13 @@ typedef struct CdfCoefContext {
typedef struct CdfMvComponent {
ALIGN(uint16_t classes[11 + 5], 32);
ALIGN(uint16_t class0_fp[2][4], 8);
ALIGN(uint16_t classN_fp[4], 8);
ALIGN(uint16_t class0_hp[2], 4);
ALIGN(uint16_t classN_hp[2], 4);
ALIGN(uint16_t class0[2], 4);
ALIGN(uint16_t classN[10][2], 4);
ALIGN(uint16_t sign[2], 4);
ALIGN(uint16_t class0[2], 4);
ALIGN(uint16_t class0_fp[2][4], 8);
ALIGN(uint16_t class0_hp[2], 4);
ALIGN(uint16_t classN[10][2], 4);
ALIGN(uint16_t classN_fp[4], 8);
ALIGN(uint16_t classN_hp[2], 4);
} CdfMvComponent;
typedef struct CdfMvContext {
@ -123,10 +127,10 @@ typedef struct CdfMvContext {
} CdfMvContext;
typedef struct CdfContext {
CdfModeContext m;
ALIGN(uint16_t kfym[5][5][N_INTRA_PRED_MODES + 3], 32);
CdfCoefContext coef;
CdfMvContext mv, dmv;
CdfModeContext m;
CdfMvContext mv;
ALIGN(uint16_t kfym[5][5][N_INTRA_PRED_MODES + 3], 32);
} CdfContext;
typedef struct CdfThreadContext {
@ -138,7 +142,7 @@ typedef struct CdfThreadContext {
atomic_uint *progress;
} CdfThreadContext;
void dav1d_cdf_thread_init_static(CdfThreadContext *cdf, int qidx);
void dav1d_cdf_thread_init_static(CdfThreadContext *cdf, unsigned qidx);
int dav1d_cdf_thread_alloc(Dav1dContext *c, CdfThreadContext *cdf,
const int have_frame_mt);
void dav1d_cdf_thread_copy(CdfContext *dst, const CdfThreadContext *src);

View file

@ -73,42 +73,29 @@ static void init_quant_tables(const Dav1dSequenceHeader *const seq_hdr,
}
}
static int read_mv_component_diff(Dav1dTaskContext *const t,
static int read_mv_component_diff(MsacContext *const msac,
CdfMvComponent *const mv_comp,
const int have_fp)
const int mv_prec)
{
Dav1dTileState *const ts = t->ts;
const Dav1dFrameContext *const f = t->f;
const int have_hp = f->frame_hdr->hp;
const int sign = dav1d_msac_decode_bool_adapt(&ts->msac, mv_comp->sign);
const int cl = dav1d_msac_decode_symbol_adapt16(&ts->msac,
mv_comp->classes, 10);
int up, fp, hp;
const int sign = dav1d_msac_decode_bool_adapt(msac, mv_comp->sign);
const int cl = dav1d_msac_decode_symbol_adapt16(msac, mv_comp->classes, 10);
int up, fp = 3, hp = 1;
if (!cl) {
up = dav1d_msac_decode_bool_adapt(&ts->msac, mv_comp->class0);
if (have_fp) {
fp = dav1d_msac_decode_symbol_adapt4(&ts->msac,
mv_comp->class0_fp[up], 3);
hp = have_hp ? dav1d_msac_decode_bool_adapt(&ts->msac,
mv_comp->class0_hp) : 1;
} else {
fp = 3;
hp = 1;
up = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0);
if (mv_prec >= 0) { // !force_integer_mv
fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->class0_fp[up], 3);
if (mv_prec > 0) // allow_high_precision_mv
hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0_hp);
}
} else {
up = 1 << cl;
for (int n = 0; n < cl; n++)
up |= dav1d_msac_decode_bool_adapt(&ts->msac,
mv_comp->classN[n]) << n;
if (have_fp) {
fp = dav1d_msac_decode_symbol_adapt4(&ts->msac,
mv_comp->classN_fp, 3);
hp = have_hp ? dav1d_msac_decode_bool_adapt(&ts->msac,
mv_comp->classN_hp) : 1;
} else {
fp = 3;
hp = 1;
up |= dav1d_msac_decode_bool_adapt(msac, mv_comp->classN[n]) << n;
if (mv_prec >= 0) { // !force_integer_mv
fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->classN_fp, 3);
if (mv_prec > 0) // allow_high_precision_mv
hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->classN_hp);
}
}
@ -117,25 +104,16 @@ static int read_mv_component_diff(Dav1dTaskContext *const t,
return sign ? -diff : diff;
}
static void read_mv_residual(Dav1dTaskContext *const t, mv *const ref_mv,
CdfMvContext *const mv_cdf, const int have_fp)
static void read_mv_residual(Dav1dTileState *const ts, mv *const ref_mv,
const int mv_prec)
{
switch (dav1d_msac_decode_symbol_adapt4(&t->ts->msac, t->ts->cdf.mv.joint,
N_MV_JOINTS - 1))
{
case MV_JOINT_HV:
ref_mv->y += read_mv_component_diff(t, &mv_cdf->comp[0], have_fp);
ref_mv->x += read_mv_component_diff(t, &mv_cdf->comp[1], have_fp);
break;
case MV_JOINT_H:
ref_mv->x += read_mv_component_diff(t, &mv_cdf->comp[1], have_fp);
break;
case MV_JOINT_V:
ref_mv->y += read_mv_component_diff(t, &mv_cdf->comp[0], have_fp);
break;
default:
break;
}
MsacContext *const msac = &ts->msac;
const enum MVJoint mv_joint =
dav1d_msac_decode_symbol_adapt4(msac, ts->cdf.mv.joint, N_MV_JOINTS - 1);
if (mv_joint & MV_JOINT_V)
ref_mv->y += read_mv_component_diff(msac, &ts->cdf.mv.comp[0], mv_prec);
if (mv_joint & MV_JOINT_H)
ref_mv->x += read_mv_component_diff(msac, &ts->cdf.mv.comp[1], mv_prec);
}
static void read_tx_tree(Dav1dTaskContext *const t,
@ -1001,8 +979,7 @@ static int decode_b(Dav1dTaskContext *const t,
const int have_delta_q = f->frame_hdr->delta.q.present &&
(bs != (f->seq_hdr->sb128 ? BS_128x128 : BS_64x64) || !b->skip);
int8_t prev_delta_lf[4];
memcpy(prev_delta_lf, ts->last_delta_lf, 4);
uint32_t prev_delta_lf = ts->last_delta_lf.u32;
if (have_delta_q) {
int delta_q = dav1d_msac_decode_symbol_adapt4(&ts->msac,
@ -1038,8 +1015,8 @@ static int decode_b(Dav1dTaskContext *const t,
delta_lf = -delta_lf;
delta_lf *= 1 << f->frame_hdr->delta.lf.res_log2;
}
ts->last_delta_lf[i] =
iclip(ts->last_delta_lf[i] + delta_lf, -63, 63);
ts->last_delta_lf.i8[i] =
iclip(ts->last_delta_lf.i8[i] + delta_lf, -63, 63);
if (have_delta_q && DEBUG_BLOCK_INFO)
printf("Post-delta_lf[%d:%d]: r=%d\n", i, delta_lf,
ts->msac.rng);
@ -1054,13 +1031,13 @@ static int decode_b(Dav1dTaskContext *const t,
init_quant_tables(f->seq_hdr, f->frame_hdr, ts->last_qidx, ts->dqmem);
ts->dq = ts->dqmem;
}
if (!memcmp(ts->last_delta_lf, (int8_t[4]) { 0, 0, 0, 0 }, 4)) {
if (!ts->last_delta_lf.u32) {
// assign frame-wide lf values to this sb
ts->lflvl = f->lf.lvl;
} else if (memcmp(ts->last_delta_lf, prev_delta_lf, 4)) {
} else if (ts->last_delta_lf.u32 != prev_delta_lf) {
// find sb-specific lf lvl parameters
dav1d_calc_lf_values(ts->lflvlmem, f->frame_hdr, ts->last_delta_lf);
ts->lflvl = ts->lflvlmem;
dav1d_calc_lf_values(ts->lflvlmem, f->frame_hdr, ts->last_delta_lf.i8);
}
}
@ -1324,7 +1301,7 @@ static int decode_b(Dav1dTaskContext *const t,
}
const union mv ref = b->mv[0];
read_mv_residual(t, &b->mv[0], &ts->cdf.dmv, 0);
read_mv_residual(ts, &b->mv[0], -1);
// clip intrabc motion vector to decoded parts of current tile
int border_left = ts->tiling.col_start * 4;
@ -1586,8 +1563,8 @@ static int decode_b(Dav1dTaskContext *const t,
break; \
case NEWMV: \
b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
read_mv_residual(t, &b->mv[idx], &ts->cdf.mv, \
!f->frame_hdr->force_integer_mv); \
const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv; \
read_mv_residual(ts, &b->mv[idx], mv_prec); \
break; \
}
has_subpel_filter = imin(bw4, bh4) == 1 ||
@ -1775,8 +1752,8 @@ static int decode_b(Dav1dTaskContext *const t,
if (DEBUG_BLOCK_INFO)
printf("Post-intermode[%d,drl=%d]: r=%d\n",
b->inter_mode, b->drl_idx, ts->msac.rng);
read_mv_residual(t, &b->mv[0], &ts->cdf.mv,
!f->frame_hdr->force_integer_mv);
const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv;
read_mv_residual(ts, &b->mv[0], mv_prec);
if (DEBUG_BLOCK_INFO)
printf("Post-residualmv[mv=y:%d,x:%d]: r=%d\n",
b->mv[0].y, b->mv[0].x, ts->msac.rng);
@ -2495,7 +2472,7 @@ static void setup_tile(Dav1dTileState *const ts,
dav1d_cdf_thread_copy(&ts->cdf, &f->in_cdf);
ts->last_qidx = f->frame_hdr->quant.yac;
memset(ts->last_delta_lf, 0, sizeof(ts->last_delta_lf));
ts->last_delta_lf.u32 = 0;
dav1d_msac_init(&ts->msac, data, sz, f->frame_hdr->disable_cdf_update);

View file

@ -303,8 +303,8 @@ struct Dav1dFrameContext {
int lr_buf_plane_sz[2]; /* (stride*sbh*4) << sb128 if n_tc > 1, else stride*4 */
int re_sz /* h */;
ALIGN(Av1FilterLUT lim_lut, 16);
ALIGN(uint8_t lvl[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */], 16);
int last_sharpness;
uint8_t lvl[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */];
uint8_t *tx_lpf_right_edge[2];
uint8_t *cdef_line_buf, *lr_line_buf;
pixel *cdef_line[2 /* pre, post */][3 /* plane */];
@ -376,8 +376,11 @@ struct Dav1dTileState {
const uint16_t (*dq)[3][2];
int last_qidx;
int8_t last_delta_lf[4];
uint8_t lflvlmem[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */];
union {
int8_t i8[4];
uint32_t u32;
} last_delta_lf;
ALIGN(uint8_t lflvlmem[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */], 16);
const uint8_t (*lflvl)[4][8][2];
Av1RestorationUnit *lr_ref[3];

View file

@ -39,10 +39,73 @@ void (name)(pixel *dst, ptrdiff_t dst_stride, coef *coeff, int eob \
HIGHBD_DECL_SUFFIX)
typedef decl_itx_fn(*itxfm_fn);
#define decl_itx2_fns(w, h, opt) \
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_identity_##w##x##h, opt))
#define decl_itx12_fns(w, h, opt) \
decl_itx2_fns(w, h, opt); \
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_adst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_flipadst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_identity_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_dct_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_adst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_flipadst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_dct_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_adst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_flipadst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_dct_##w##x##h, opt))
#define decl_itx16_fns(w, h, opt) \
decl_itx12_fns(w, h, opt); \
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_identity_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_identity_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_adst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_flipadst_##w##x##h, opt))
#define decl_itx17_fns(w, h, opt) \
decl_itx16_fns(w, h, opt); \
decl_itx_fn(BF(dav1d_inv_txfm_add_wht_wht_##w##x##h, opt))
typedef struct Dav1dInvTxfmDSPContext {
itxfm_fn itxfm_add[N_RECT_TX_SIZES][N_TX_TYPES_PLUS_LL];
} Dav1dInvTxfmDSPContext;
bitfn_decls(void dav1d_itx_dsp_init, Dav1dInvTxfmDSPContext *c, int bpc);
#define assign_itx_fn(pfx, w, h, type, type_enum, ext) \
c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
#define assign_itx1_fn(pfx, w, h, ext) \
assign_itx_fn(pfx, w, h, dct_dct, DCT_DCT, ext)
#define assign_itx2_fn(pfx, w, h, ext) \
assign_itx1_fn(pfx, w, h, ext); \
assign_itx_fn(pfx, w, h, identity_identity, IDTX, ext)
#define assign_itx12_fn(pfx, w, h, ext) \
assign_itx2_fn(pfx, w, h, ext); \
assign_itx_fn(pfx, w, h, dct_adst, ADST_DCT, ext); \
assign_itx_fn(pfx, w, h, dct_flipadst, FLIPADST_DCT, ext); \
assign_itx_fn(pfx, w, h, dct_identity, H_DCT, ext); \
assign_itx_fn(pfx, w, h, adst_dct, DCT_ADST, ext); \
assign_itx_fn(pfx, w, h, adst_adst, ADST_ADST, ext); \
assign_itx_fn(pfx, w, h, adst_flipadst, FLIPADST_ADST, ext); \
assign_itx_fn(pfx, w, h, flipadst_dct, DCT_FLIPADST, ext); \
assign_itx_fn(pfx, w, h, flipadst_adst, ADST_FLIPADST, ext); \
assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
assign_itx_fn(pfx, w, h, identity_dct, V_DCT, ext)
#define assign_itx16_fn(pfx, w, h, ext) \
assign_itx12_fn(pfx, w, h, ext); \
assign_itx_fn(pfx, w, h, adst_identity, H_ADST, ext); \
assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST, ext); \
assign_itx_fn(pfx, w, h, identity_adst, V_ADST, ext); \
assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST, ext)
#define assign_itx17_fn(pfx, w, h, ext) \
assign_itx16_fn(pfx, w, h, ext); \
assign_itx_fn(pfx, w, h, wht_wht, WHT_WHT, ext)
#endif /* DAV1D_SRC_ITX_H */

View file

@ -436,7 +436,7 @@ static void calc_lf_value(uint8_t (*const lflvl_values)[2],
const int base = iclip(iclip(base_lvl + lf_delta, 0, 63) + seg_delta, 0, 63);
if (!mr_delta) {
memset(lflvl_values, base, 8 * 2);
memset(lflvl_values, base, sizeof(*lflvl_values) * 8);
} else {
const int sh = base >= 32;
lflvl_values[0][0] = lflvl_values[0][1] =
@ -457,7 +457,7 @@ static inline void calc_lf_value_chroma(uint8_t (*const lflvl_values)[2],
const Dav1dLoopfilterModeRefDeltas *const mr_delta)
{
if (!base_lvl)
memset(lflvl_values, 0, 8 * 2);
memset(lflvl_values, 0, sizeof(*lflvl_values) * 8);
else
calc_lf_value(lflvl_values, base_lvl, lf_delta, seg_delta, mr_delta);
}
@ -469,7 +469,7 @@ void dav1d_calc_lf_values(uint8_t (*const lflvl_values)[4][8][2],
const int n_seg = hdr->segmentation.enabled ? 8 : 1;
if (!hdr->loopfilter.level_y[0] && !hdr->loopfilter.level_y[1]) {
memset(lflvl_values, 0, 8 * 4 * 2 * n_seg);
memset(lflvl_values, 0, sizeof(*lflvl_values) * n_seg);
return;
}

View file

@ -106,6 +106,7 @@ if is_asm_enabled
'arm/64/loopfilter.S',
'arm/64/looprestoration.S',
'arm/64/mc.S',
'arm/64/mc_dotprod.S',
)
endif

View file

@ -817,7 +817,9 @@ int dav1d_refmvs_init_frame(refmvs_frame *const rf,
if (r_stride != rf->r_stride || n_tile_rows != rf->n_tile_rows) {
if (rf->r) dav1d_freep_aligned(&rf->r);
const int uses_2pass = n_tile_threads > 1 && n_frame_threads > 1;
rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, sizeof(*rf->r) * 35 * r_stride * n_tile_rows * (1 + uses_2pass), 64);
/* sizeof(refmvs_block) == 12 but it's accessed using 16-byte loads in asm,
* so add 4 bytes of padding to avoid buffer overreads. */
rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, sizeof(*rf->r) * 35 * r_stride * n_tile_rows * (1 + uses_2pass) + 4, 64);
if (!rf->r) return DAV1D_ERR(ENOMEM);
rf->r_stride = r_stride;
}

View file

@ -28,34 +28,6 @@
#include "src/cpu.h"
#include "src/itx.h"
#define decl_itx2_fns(w, h, opt) \
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_identity_##w##x##h, opt))
#define decl_itx12_fns(w, h, opt) \
decl_itx2_fns(w, h, opt); \
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_adst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_flipadst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_identity_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_dct_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_adst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_flipadst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_dct_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_adst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_flipadst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_dct_##w##x##h, opt))
#define decl_itx16_fns(w, h, opt) \
decl_itx12_fns(w, h, opt); \
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_identity_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_identity_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_adst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_flipadst_##w##x##h, opt))
#define decl_itx17_fns(w, h, opt) \
decl_itx16_fns(w, h, opt); \
decl_itx_fn(BF(dav1d_inv_txfm_add_wht_wht_##w##x##h, opt))
#define decl_itx_fns(ext) \
decl_itx17_fns( 4, 4, ext); \
decl_itx16_fns( 4, 8, ext); \
@ -70,41 +42,6 @@ decl_itx16_fns(16, 16, ext)
decl_itx_fns(rvv);
static ALWAYS_INLINE void itx_dsp_init_riscv(Dav1dInvTxfmDSPContext *const c, int const bpc) {
#define assign_itx_fn(pfx, w, h, type, type_enum, ext) \
c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
#define assign_itx1_fn(pfx, w, h, ext) \
assign_itx_fn(pfx, w, h, dct_dct, DCT_DCT, ext)
#define assign_itx2_fn(pfx, w, h, ext) \
assign_itx1_fn(pfx, w, h, ext); \
assign_itx_fn(pfx, w, h, identity_identity, IDTX, ext)
#define assign_itx12_fn(pfx, w, h, ext) \
assign_itx2_fn(pfx, w, h, ext); \
assign_itx_fn(pfx, w, h, dct_adst, ADST_DCT, ext); \
assign_itx_fn(pfx, w, h, dct_flipadst, FLIPADST_DCT, ext); \
assign_itx_fn(pfx, w, h, dct_identity, H_DCT, ext); \
assign_itx_fn(pfx, w, h, adst_dct, DCT_ADST, ext); \
assign_itx_fn(pfx, w, h, adst_adst, ADST_ADST, ext); \
assign_itx_fn(pfx, w, h, adst_flipadst, FLIPADST_ADST, ext); \
assign_itx_fn(pfx, w, h, flipadst_dct, DCT_FLIPADST, ext); \
assign_itx_fn(pfx, w, h, flipadst_adst, ADST_FLIPADST, ext); \
assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
assign_itx_fn(pfx, w, h, identity_dct, V_DCT, ext)
#define assign_itx16_fn(pfx, w, h, ext) \
assign_itx12_fn(pfx, w, h, ext); \
assign_itx_fn(pfx, w, h, adst_identity, H_ADST, ext); \
assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST, ext); \
assign_itx_fn(pfx, w, h, identity_adst, V_ADST, ext); \
assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST, ext)
#define assign_itx17_fn(pfx, w, h, ext) \
assign_itx16_fn(pfx, w, h, ext); \
assign_itx_fn(pfx, w, h, wht_wht, WHT_WHT, ext)
const unsigned flags = dav1d_get_cpu_flags();
if (!(flags & DAV1D_RISCV_CPU_FLAG_V)) return;

View file

@ -66,7 +66,8 @@ z_filter_wh: db 7, 7, 11, 11, 15, 15, 19, 19, 19, 23, 23, 23, 31, 31, 31, 39
z_filter_k: db 0, 16, 0, 16, 0, 20, 0, 20, 8, 16, 8, 16
db 32, 16, 32, 16, 24, 20, 24, 20, 16, 16, 16, 16
db 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 8, 0
z_filter_s: db 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7
const \
z_filter_s, db 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7
db 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15
db 15, 15, 15, 15, 15, 15, 15, 15 ; should be in one cache line
pb_128: times 4 db 128 ; those are just placed here for alignment.

View file

@ -30,34 +30,6 @@
#define BF_BPC(x, bits, suffix) x##_##bits##bpc_##suffix
#define decl_itx2_fns(w, h, opt) \
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_identity_##w##x##h, opt))
#define decl_itx12_fns(w, h, opt) \
decl_itx2_fns(w, h, opt); \
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_adst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_flipadst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_identity_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_dct_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_adst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_flipadst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_dct_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_adst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_flipadst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_dct_##w##x##h, opt))
#define decl_itx16_fns(w, h, opt) \
decl_itx12_fns(w, h, opt); \
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_identity_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_identity_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_adst_##w##x##h, opt)); \
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_flipadst_##w##x##h, opt))
#define decl_itx17_fns(w, h, opt) \
decl_itx16_fns(w, h, opt); \
decl_itx_fn(BF(dav1d_inv_txfm_add_wht_wht_##w##x##h, opt))
#define decl_itx_fns(ext) \
decl_itx17_fns( 4, 4, ext); \
decl_itx16_fns( 4, 8, ext); \
@ -136,42 +108,6 @@ decl_itx_fn(dav1d_inv_txfm_add_wht_wht_4x4_16bpc_avx2);
decl_itx_fn(BF(dav1d_inv_txfm_add_wht_wht_4x4, sse2));
static ALWAYS_INLINE void itx_dsp_init_x86(Dav1dInvTxfmDSPContext *const c, const int bpc) {
#define assign_itx_fn(pfx, w, h, type, type_enum, ext) \
c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
#define assign_itx1_fn(pfx, w, h, ext) \
assign_itx_fn(pfx, w, h, dct_dct, DCT_DCT, ext)
#define assign_itx2_fn(pfx, w, h, ext) \
assign_itx1_fn(pfx, w, h, ext); \
assign_itx_fn(pfx, w, h, identity_identity, IDTX, ext)
#define assign_itx12_fn(pfx, w, h, ext) \
assign_itx2_fn(pfx, w, h, ext); \
assign_itx_fn(pfx, w, h, dct_adst, ADST_DCT, ext); \
assign_itx_fn(pfx, w, h, dct_flipadst, FLIPADST_DCT, ext); \
assign_itx_fn(pfx, w, h, dct_identity, H_DCT, ext); \
assign_itx_fn(pfx, w, h, adst_dct, DCT_ADST, ext); \
assign_itx_fn(pfx, w, h, adst_adst, ADST_ADST, ext); \
assign_itx_fn(pfx, w, h, adst_flipadst, FLIPADST_ADST, ext); \
assign_itx_fn(pfx, w, h, flipadst_dct, DCT_FLIPADST, ext); \
assign_itx_fn(pfx, w, h, flipadst_adst, ADST_FLIPADST, ext); \
assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
assign_itx_fn(pfx, w, h, identity_dct, V_DCT, ext)
#define assign_itx16_fn(pfx, w, h, ext) \
assign_itx12_fn(pfx, w, h, ext); \
assign_itx_fn(pfx, w, h, adst_identity, H_ADST, ext); \
assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST, ext); \
assign_itx_fn(pfx, w, h, identity_adst, V_ADST, ext); \
assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST, ext)
#define assign_itx17_fn(pfx, w, h, ext) \
assign_itx16_fn(pfx, w, h, ext); \
assign_itx_fn(pfx, w, h, wht_wht, WHT_WHT, ext)
#define assign_itx_bpc_fn(pfx, w, h, type, type_enum, bpc, ext) \
c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
BF_BPC(dav1d_inv_txfm_add_##type##_##w##x##h, bpc, ext)

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -100,7 +100,7 @@ if is_asm_enabled
],
)
test('checkasm', checkasm, suite: 'checkasm', timeout: 180, is_parallel: false)
test('checkasm', checkasm, suite: 'checkasm', timeout: 180)
benchmark('checkasm', checkasm, suite: 'checkasm', timeout: 3600, args: '--bench')
endif