forked from mirrors/gecko-dev
Bug 1891459 - Update dav1d to 5b5399911dd24703de641d65eda5b7f1e845d060 r=chunmin
Differential Revision: https://phabricator.services.mozilla.com/D207425
This commit is contained in:
parent
be484e5383
commit
16eb058401
25 changed files with 6745 additions and 2663 deletions
|
|
@ -211,6 +211,7 @@ elif CONFIG['TARGET_CPU'] == 'arm' or CONFIG['TARGET_CPU'] == 'aarch64':
|
|||
'../../../third_party/dav1d/src/arm/64/looprestoration_tmpl.S',
|
||||
'../../../third_party/dav1d/src/arm/64/mc.S',
|
||||
'../../../third_party/dav1d/src/arm/64/mc16.S',
|
||||
'../../../third_party/dav1d/src/arm/64/mc_dotprod.S',
|
||||
'../../../third_party/dav1d/src/arm/64/msac.S',
|
||||
'../../../third_party/dav1d/src/arm/64/refmvs.S',
|
||||
]
|
||||
|
|
|
|||
|
|
@ -20,11 +20,11 @@ origin:
|
|||
|
||||
# Human-readable identifier for this version/release
|
||||
# Generally "version NNN", "tag SSS", "bookmark SSS"
|
||||
release: 8e08426468a76d8a667e8a79d92bafd85d7411ac (2024-03-18T20:50:37.000+00:00).
|
||||
release: 5b5399911dd24703de641d65eda5b7f1e845d060 (2024-04-15T13:19:42.000+02:00).
|
||||
|
||||
# Revision to pull in
|
||||
# Must be a long or short commit SHA (long preferred)
|
||||
revision: 8e08426468a76d8a667e8a79d92bafd85d7411ac
|
||||
revision: 5b5399911dd24703de641d65eda5b7f1e845d060
|
||||
|
||||
# The package's license, where possible using the mnemonic from
|
||||
# https://spdx.org/licenses/
|
||||
|
|
|
|||
|
|
@ -1,2 +1,2 @@
|
|||
/* auto-generated, do not edit */
|
||||
#define DAV1D_VERSION "8e08426468a76d8a667e8a79d92bafd85d7411ac"
|
||||
#define DAV1D_VERSION "5b5399911dd24703de641d65eda5b7f1e845d060"
|
||||
|
|
|
|||
2
third_party/dav1d/meson.build
vendored
2
third_party/dav1d/meson.build
vendored
|
|
@ -81,6 +81,8 @@ cdata.set10('TRIM_DSP_FUNCTIONS', get_option('trim_dsp') == 'true' or
|
|||
# Logging option
|
||||
cdata.set10('CONFIG_LOG', get_option('logging'))
|
||||
|
||||
cdata.set10('CONFIG_MACOS_KPERF', get_option('macos_kperf'))
|
||||
|
||||
#
|
||||
# OS/Compiler checks and defines
|
||||
#
|
||||
|
|
|
|||
5
third_party/dav1d/meson_options.txt
vendored
5
third_party/dav1d/meson_options.txt
vendored
|
|
@ -68,3 +68,8 @@ option('trim_dsp',
|
|||
choices: ['true', 'false', 'if-release'],
|
||||
value: 'if-release',
|
||||
description: 'Eliminate redundant DSP functions where possible')
|
||||
|
||||
option('macos_kperf',
|
||||
type: 'boolean',
|
||||
value: false,
|
||||
description: 'Use the private macOS kperf API for benchmarking')
|
||||
|
|
|
|||
4
third_party/dav1d/src/arm/64/mc.S
vendored
4
third_party/dav1d/src/arm/64/mc.S
vendored
|
|
@ -837,7 +837,7 @@ endfunc
|
|||
|
||||
// This has got the same signature as the put_8tap functions,
|
||||
// and assumes that x8 is set to (clz(w)-24).
|
||||
function put_neon
|
||||
function put_neon, export=1
|
||||
adr x9, L(put_tbl)
|
||||
ldrh w8, [x9, x8, lsl #1]
|
||||
sub x9, x9, w8, uxtw
|
||||
|
|
@ -939,7 +939,7 @@ endfunc
|
|||
|
||||
// This has got the same signature as the prep_8tap functions,
|
||||
// and assumes that x8 is set to (clz(w)-24), and x7 to w*2.
|
||||
function prep_neon
|
||||
function prep_neon, export=1
|
||||
adr x9, L(prep_tbl)
|
||||
ldrh w8, [x9, x8, lsl #1]
|
||||
sub x9, x9, w8, uxtw
|
||||
|
|
|
|||
1413
third_party/dav1d/src/arm/64/mc_dotprod.S
vendored
Normal file
1413
third_party/dav1d/src/arm/64/mc_dotprod.S
vendored
Normal file
File diff suppressed because it is too large
Load diff
21
third_party/dav1d/src/arm/64/msac.S
vendored
21
third_party/dav1d/src/arm/64/msac.S
vendored
|
|
@ -288,10 +288,8 @@ function msac_decode_hi_tok_neon, export=1
|
|||
mvni v30.4h, #0x3f // 0xffc0
|
||||
ldrh w9, [x1, #6] // count = cdf[n_symbols]
|
||||
ld1r {v3.4h}, [x16] // rng
|
||||
movrel x16, bits
|
||||
ld1 {v29.4h}, [x17] // EC_MIN_PROB * (n_symbols - ret)
|
||||
add x17, x0, #DIF + 6
|
||||
ld1 {v16.8h}, [x16]
|
||||
mov w13, #-24
|
||||
and v17.8b, v0.8b, v30.8b // cdf & 0xffc0
|
||||
ldr w10, [x0, #ALLOW_UPDATE_CDF]
|
||||
|
|
@ -305,30 +303,27 @@ function msac_decode_hi_tok_neon, export=1
|
|||
add v4.4h, v17.4h, v29.4h // v = cdf + EC_MIN_PROB * (n_symbols - ret)
|
||||
add v4.4h, v6.4h, v4.4h // v = ((cdf >> EC_PROB_SHIFT) * r) >> 1 + EC_MIN_PROB * (n_symbols - ret)
|
||||
str h3, [sp, #14] // store original u = s->rng
|
||||
cmhs v2.8h, v1.8h, v4.8h // c >= v
|
||||
cmhs v2.4h, v1.4h, v4.4h // c >= v
|
||||
str q4, [sp, #16] // store v values to allow indexed access
|
||||
and v6.16b, v2.16b, v16.16b // One bit per halfword set in the mask
|
||||
addv h6, v6.8h // Aggregate mask bits
|
||||
umov w3, v6.h[0]
|
||||
addv h6, v2.4h // -4 + ret
|
||||
add w13, w13, #5
|
||||
rbit w3, w3
|
||||
smov w15, v6.h[0]
|
||||
add x8, sp, #16
|
||||
clz w15, w3 // ret
|
||||
add w15, w15, #4 // ret
|
||||
|
||||
cbz w10, 2f
|
||||
// update_cdf
|
||||
movi v5.8b, #0xff
|
||||
sub v5.4h, v0.4h, v2.4h // cdf[i] + (i >= val ? 1 : 0)
|
||||
mov w4, #-5
|
||||
urhadd v4.4h, v5.4h, v2.4h // i >= val ? -1 : 32768
|
||||
orr v2.4h, #0x80, lsl #8 // i >= val ? -1 : 32768
|
||||
sub w4, w4, w9, lsr #4 // -((count >> 4) + 5)
|
||||
sub v4.4h, v4.4h, v0.4h // (32768 - cdf[i]) or (-1 - cdf[i])
|
||||
sub v4.4h, v2.4h, v0.4h // (32768 - cdf[i]) or (-1 - cdf[i])
|
||||
dup v6.4h, w4 // -rate
|
||||
|
||||
sub w9, w9, w9, lsr #5 // count - (count == 32)
|
||||
sub v0.4h, v0.4h, v2.4h // cdf + (i >= val ? 1 : 0)
|
||||
sshl v4.4h, v4.4h, v6.4h // ({32768,-1} - cdf[i]) >> rate
|
||||
add w9, w9, #1 // count + (count < 32)
|
||||
add v0.4h, v0.4h, v4.4h // cdf + (32768 - cdf[i]) >> rate
|
||||
add v0.4h, v5.4h, v4.4h // cdf[i] + (32768 - cdf[i]) >> rate
|
||||
st1 {v0.4h}, [x1]
|
||||
and v17.8b, v0.8b, v30.8b // cdf & 0xffc0
|
||||
strh w9, [x1, #6]
|
||||
|
|
|
|||
63
third_party/dav1d/src/arm/itx.h
vendored
63
third_party/dav1d/src/arm/itx.h
vendored
|
|
@ -28,34 +28,6 @@
|
|||
#include "src/cpu.h"
|
||||
#include "src/itx.h"
|
||||
|
||||
#define decl_itx2_fns(w, h, opt) \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_identity_##w##x##h, opt))
|
||||
|
||||
#define decl_itx12_fns(w, h, opt) \
|
||||
decl_itx2_fns(w, h, opt); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_adst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_flipadst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_identity_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_dct_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_adst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_flipadst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_dct_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_adst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_flipadst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_dct_##w##x##h, opt))
|
||||
|
||||
#define decl_itx16_fns(w, h, opt) \
|
||||
decl_itx12_fns(w, h, opt); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_identity_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_identity_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_adst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_flipadst_##w##x##h, opt))
|
||||
|
||||
#define decl_itx17_fns(w, h, opt) \
|
||||
decl_itx16_fns(w, h, opt); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_wht_wht_##w##x##h, opt))
|
||||
|
||||
decl_itx17_fns( 4, 4, neon);
|
||||
decl_itx16_fns( 4, 8, neon);
|
||||
decl_itx16_fns( 4, 16, neon);
|
||||
|
|
@ -78,41 +50,6 @@ decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_64x32, neon));
|
|||
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_64x64, neon));
|
||||
|
||||
static ALWAYS_INLINE void itx_dsp_init_arm(Dav1dInvTxfmDSPContext *const c, int bpc) {
|
||||
#define assign_itx_fn(pfx, w, h, type, type_enum, ext) \
|
||||
c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
|
||||
BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
|
||||
|
||||
#define assign_itx1_fn(pfx, w, h, ext) \
|
||||
assign_itx_fn(pfx, w, h, dct_dct, DCT_DCT, ext)
|
||||
|
||||
#define assign_itx2_fn(pfx, w, h, ext) \
|
||||
assign_itx1_fn(pfx, w, h, ext); \
|
||||
assign_itx_fn(pfx, w, h, identity_identity, IDTX, ext)
|
||||
|
||||
#define assign_itx12_fn(pfx, w, h, ext) \
|
||||
assign_itx2_fn(pfx, w, h, ext); \
|
||||
assign_itx_fn(pfx, w, h, dct_adst, ADST_DCT, ext); \
|
||||
assign_itx_fn(pfx, w, h, dct_flipadst, FLIPADST_DCT, ext); \
|
||||
assign_itx_fn(pfx, w, h, dct_identity, H_DCT, ext); \
|
||||
assign_itx_fn(pfx, w, h, adst_dct, DCT_ADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, adst_adst, ADST_ADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, adst_flipadst, FLIPADST_ADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, flipadst_dct, DCT_FLIPADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, flipadst_adst, ADST_FLIPADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, identity_dct, V_DCT, ext)
|
||||
|
||||
#define assign_itx16_fn(pfx, w, h, ext) \
|
||||
assign_itx12_fn(pfx, w, h, ext); \
|
||||
assign_itx_fn(pfx, w, h, adst_identity, H_ADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, identity_adst, V_ADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST, ext)
|
||||
|
||||
#define assign_itx17_fn(pfx, w, h, ext) \
|
||||
assign_itx16_fn(pfx, w, h, ext); \
|
||||
assign_itx_fn(pfx, w, h, wht_wht, WHT_WHT, ext)
|
||||
|
||||
const unsigned flags = dav1d_get_cpu_flags();
|
||||
|
||||
if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
|
||||
|
|
|
|||
83
third_party/dav1d/src/arm/mc.h
vendored
83
third_party/dav1d/src/arm/mc.h
vendored
|
|
@ -30,26 +30,40 @@
|
|||
#include "src/mc.h"
|
||||
#include "src/cpu.h"
|
||||
|
||||
decl_mc_fn(BF(dav1d_put_8tap_regular, neon));
|
||||
decl_mc_fn(BF(dav1d_put_8tap_regular_smooth, neon));
|
||||
decl_mc_fn(BF(dav1d_put_8tap_regular_sharp, neon));
|
||||
decl_mc_fn(BF(dav1d_put_8tap_smooth, neon));
|
||||
decl_mc_fn(BF(dav1d_put_8tap_smooth_regular, neon));
|
||||
decl_mc_fn(BF(dav1d_put_8tap_smooth_sharp, neon));
|
||||
decl_mc_fn(BF(dav1d_put_8tap_sharp, neon));
|
||||
decl_mc_fn(BF(dav1d_put_8tap_sharp_regular, neon));
|
||||
decl_mc_fn(BF(dav1d_put_8tap_sharp_smooth, neon));
|
||||
decl_mc_fn(BF(dav1d_put_bilin, neon));
|
||||
#define decl_8tap_gen(decl_name, fn_name, opt) \
|
||||
decl_##decl_name##_fn(BF(dav1d_##fn_name##_8tap_regular, opt)); \
|
||||
decl_##decl_name##_fn(BF(dav1d_##fn_name##_8tap_regular_smooth, opt)); \
|
||||
decl_##decl_name##_fn(BF(dav1d_##fn_name##_8tap_regular_sharp, opt)); \
|
||||
decl_##decl_name##_fn(BF(dav1d_##fn_name##_8tap_smooth_regular, opt)); \
|
||||
decl_##decl_name##_fn(BF(dav1d_##fn_name##_8tap_smooth, opt)); \
|
||||
decl_##decl_name##_fn(BF(dav1d_##fn_name##_8tap_smooth_sharp, opt)); \
|
||||
decl_##decl_name##_fn(BF(dav1d_##fn_name##_8tap_sharp_regular, opt)); \
|
||||
decl_##decl_name##_fn(BF(dav1d_##fn_name##_8tap_sharp_smooth, opt)); \
|
||||
decl_##decl_name##_fn(BF(dav1d_##fn_name##_8tap_sharp, opt))
|
||||
|
||||
decl_mct_fn(BF(dav1d_prep_8tap_regular, neon));
|
||||
decl_mct_fn(BF(dav1d_prep_8tap_regular_smooth, neon));
|
||||
decl_mct_fn(BF(dav1d_prep_8tap_regular_sharp, neon));
|
||||
decl_mct_fn(BF(dav1d_prep_8tap_smooth, neon));
|
||||
decl_mct_fn(BF(dav1d_prep_8tap_smooth_regular, neon));
|
||||
decl_mct_fn(BF(dav1d_prep_8tap_smooth_sharp, neon));
|
||||
decl_mct_fn(BF(dav1d_prep_8tap_sharp, neon));
|
||||
decl_mct_fn(BF(dav1d_prep_8tap_sharp_regular, neon));
|
||||
decl_mct_fn(BF(dav1d_prep_8tap_sharp_smooth, neon));
|
||||
#define decl_8tap_fns(opt) \
|
||||
decl_8tap_gen(mc, put, opt); \
|
||||
decl_8tap_gen(mct, prep, opt)
|
||||
|
||||
#define init_8tap_gen(name, opt) \
|
||||
init_##name##_fn(FILTER_2D_8TAP_REGULAR, 8tap_regular, opt); \
|
||||
init_##name##_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, opt); \
|
||||
init_##name##_fn(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp, opt); \
|
||||
init_##name##_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, opt); \
|
||||
init_##name##_fn(FILTER_2D_8TAP_SMOOTH, 8tap_smooth, opt); \
|
||||
init_##name##_fn(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp, opt); \
|
||||
init_##name##_fn(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular, opt); \
|
||||
init_##name##_fn(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth, opt); \
|
||||
init_##name##_fn(FILTER_2D_8TAP_SHARP, 8tap_sharp, opt)
|
||||
|
||||
#define init_8tap_fns(opt) \
|
||||
init_8tap_gen(mc, opt); \
|
||||
init_8tap_gen(mct, opt)
|
||||
|
||||
decl_8tap_fns(neon);
|
||||
decl_8tap_fns(neon_dotprod);
|
||||
|
||||
decl_mc_fn(BF(dav1d_put_bilin, neon));
|
||||
decl_mct_fn(BF(dav1d_prep_bilin, neon));
|
||||
|
||||
decl_avg_fn(BF(dav1d_avg, neon));
|
||||
|
|
@ -77,27 +91,10 @@ static ALWAYS_INLINE void mc_dsp_init_arm(Dav1dMCDSPContext *const c) {
|
|||
|
||||
if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
|
||||
|
||||
init_mc_fn (FILTER_2D_8TAP_REGULAR, 8tap_regular, neon);
|
||||
init_mc_fn (FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, neon);
|
||||
init_mc_fn (FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp, neon);
|
||||
init_mc_fn (FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, neon);
|
||||
init_mc_fn (FILTER_2D_8TAP_SMOOTH, 8tap_smooth, neon);
|
||||
init_mc_fn (FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp, neon);
|
||||
init_mc_fn (FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular, neon);
|
||||
init_mc_fn (FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth, neon);
|
||||
init_mc_fn (FILTER_2D_8TAP_SHARP, 8tap_sharp, neon);
|
||||
init_mc_fn (FILTER_2D_BILINEAR, bilin, neon);
|
||||
init_8tap_fns(neon);
|
||||
|
||||
init_mct_fn(FILTER_2D_8TAP_REGULAR, 8tap_regular, neon);
|
||||
init_mct_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, neon);
|
||||
init_mct_fn(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp, neon);
|
||||
init_mct_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, neon);
|
||||
init_mct_fn(FILTER_2D_8TAP_SMOOTH, 8tap_smooth, neon);
|
||||
init_mct_fn(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp, neon);
|
||||
init_mct_fn(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular, neon);
|
||||
init_mct_fn(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth, neon);
|
||||
init_mct_fn(FILTER_2D_8TAP_SHARP, 8tap_sharp, neon);
|
||||
init_mct_fn(FILTER_2D_BILINEAR, bilin, neon);
|
||||
init_mc_fn (FILTER_2D_BILINEAR, bilin, neon);
|
||||
init_mct_fn(FILTER_2D_BILINEAR, bilin, neon);
|
||||
|
||||
c->avg = BF(dav1d_avg, neon);
|
||||
c->w_avg = BF(dav1d_w_avg, neon);
|
||||
|
|
@ -111,4 +108,12 @@ static ALWAYS_INLINE void mc_dsp_init_arm(Dav1dMCDSPContext *const c) {
|
|||
c->warp8x8 = BF(dav1d_warp_affine_8x8, neon);
|
||||
c->warp8x8t = BF(dav1d_warp_affine_8x8t, neon);
|
||||
c->emu_edge = BF(dav1d_emu_edge, neon);
|
||||
|
||||
#if ARCH_AARCH64
|
||||
#if HAVE_DOTPROD && BITDEPTH == 8
|
||||
if (!(flags & DAV1D_ARM_CPU_FLAG_DOTPROD)) return;
|
||||
|
||||
init_8tap_fns(neon_dotprod);
|
||||
#endif // HAVE_DOTPROD && BITDEPTH == 8
|
||||
#endif // ARCH_AARCH64
|
||||
}
|
||||
|
|
|
|||
1398
third_party/dav1d/src/cdf.c
vendored
1398
third_party/dav1d/src/cdf.c
vendored
File diff suppressed because it is too large
Load diff
54
third_party/dav1d/src/cdf.h
vendored
54
third_party/dav1d/src/cdf.h
vendored
|
|
@ -34,12 +34,10 @@
|
|||
#include "src/ref.h"
|
||||
#include "src/thread_data.h"
|
||||
|
||||
/* Buffers padded to [8] or [16] for SIMD where needed. */
|
||||
/* Buffers padded to [4]/[8]/[16] for SIMD where needed. */
|
||||
|
||||
typedef struct CdfModeContext {
|
||||
ALIGN(uint16_t y_mode[4][N_INTRA_PRED_MODES + 3], 32);
|
||||
ALIGN(uint16_t uv_mode[2][N_INTRA_PRED_MODES][N_UV_INTRA_PRED_MODES + 2], 32);
|
||||
ALIGN(uint16_t wedge_idx[9][16], 32);
|
||||
ALIGN(uint16_t partition[N_BL_LEVELS][4][N_PARTITIONS + 6], 32);
|
||||
ALIGN(uint16_t cfl_alpha[6][16], 32);
|
||||
ALIGN(uint16_t txtp_inter1[2][16], 32);
|
||||
|
|
@ -49,23 +47,33 @@ typedef struct CdfModeContext {
|
|||
ALIGN(uint16_t cfl_sign[8], 16);
|
||||
ALIGN(uint16_t angle_delta[8][8], 16);
|
||||
ALIGN(uint16_t filter_intra[5 + 3], 16);
|
||||
ALIGN(uint16_t comp_inter_mode[8][N_COMP_INTER_PRED_MODES], 16);
|
||||
ALIGN(uint16_t seg_id[3][DAV1D_MAX_SEGMENTS], 16);
|
||||
ALIGN(uint16_t pal_sz[2][7][7 + 1], 16);
|
||||
ALIGN(uint16_t color_map[2][7][5][8], 16);
|
||||
ALIGN(uint16_t filter[2][8][DAV1D_N_SWITCHABLE_FILTERS + 1], 8);
|
||||
ALIGN(uint16_t txsz[N_TX_SIZES - 1][3][4], 8);
|
||||
ALIGN(uint16_t motion_mode[N_BS_SIZES][3 + 1], 8);
|
||||
ALIGN(uint16_t delta_q[4], 8);
|
||||
ALIGN(uint16_t delta_lf[5][4], 8);
|
||||
ALIGN(uint16_t interintra_mode[4][4], 8);
|
||||
ALIGN(uint16_t restore_switchable[3 + 1], 8);
|
||||
ALIGN(uint16_t restore_wiener[2], 4);
|
||||
ALIGN(uint16_t restore_sgrproj[2], 4);
|
||||
ALIGN(uint16_t interintra[7][2], 4);
|
||||
ALIGN(uint16_t interintra_wedge[7][2], 4);
|
||||
ALIGN(uint16_t txtp_inter3[4][2], 4);
|
||||
ALIGN(uint16_t use_filter_intra[N_BS_SIZES][2], 4);
|
||||
ALIGN(uint16_t txpart[7][3][2], 4);
|
||||
ALIGN(uint16_t skip[3][2], 4);
|
||||
ALIGN(uint16_t pal_y[7][3][2], 4);
|
||||
ALIGN(uint16_t pal_uv[2][2], 4);
|
||||
|
||||
/* key/intra */
|
||||
ALIGN(uint16_t intrabc[2], 4);
|
||||
|
||||
/* inter/switch */
|
||||
ALIGN(uint16_t y_mode[4][N_INTRA_PRED_MODES + 3], 32);
|
||||
ALIGN(uint16_t wedge_idx[9][16], 32);
|
||||
ALIGN(uint16_t comp_inter_mode[8][N_COMP_INTER_PRED_MODES], 16);
|
||||
ALIGN(uint16_t filter[2][8][DAV1D_N_SWITCHABLE_FILTERS + 1], 8);
|
||||
ALIGN(uint16_t interintra_mode[4][4], 8);
|
||||
ALIGN(uint16_t motion_mode[N_BS_SIZES][3 + 1], 8);
|
||||
ALIGN(uint16_t skip_mode[3][2], 4);
|
||||
ALIGN(uint16_t newmv_mode[6][2], 4);
|
||||
ALIGN(uint16_t globalmv_mode[2][2], 4);
|
||||
ALIGN(uint16_t refmv_mode[6][2], 4);
|
||||
|
|
@ -80,14 +88,10 @@ typedef struct CdfModeContext {
|
|||
ALIGN(uint16_t comp_fwd_ref[3][3][2], 4);
|
||||
ALIGN(uint16_t comp_bwd_ref[2][3][2], 4);
|
||||
ALIGN(uint16_t comp_uni_ref[3][3][2], 4);
|
||||
ALIGN(uint16_t txpart[7][3][2], 4);
|
||||
ALIGN(uint16_t skip[3][2], 4);
|
||||
ALIGN(uint16_t skip_mode[3][2], 4);
|
||||
ALIGN(uint16_t seg_pred[3][2], 4);
|
||||
ALIGN(uint16_t interintra[7][2], 4);
|
||||
ALIGN(uint16_t interintra_wedge[7][2], 4);
|
||||
ALIGN(uint16_t obmc[N_BS_SIZES][2], 4);
|
||||
ALIGN(uint16_t pal_y[7][3][2], 4);
|
||||
ALIGN(uint16_t pal_uv[2][2], 4);
|
||||
ALIGN(uint16_t intrabc[2], 4);
|
||||
} CdfModeContext;
|
||||
|
||||
typedef struct CdfCoefContext {
|
||||
|
|
@ -108,13 +112,13 @@ typedef struct CdfCoefContext {
|
|||
|
||||
typedef struct CdfMvComponent {
|
||||
ALIGN(uint16_t classes[11 + 5], 32);
|
||||
ALIGN(uint16_t class0_fp[2][4], 8);
|
||||
ALIGN(uint16_t classN_fp[4], 8);
|
||||
ALIGN(uint16_t class0_hp[2], 4);
|
||||
ALIGN(uint16_t classN_hp[2], 4);
|
||||
ALIGN(uint16_t class0[2], 4);
|
||||
ALIGN(uint16_t classN[10][2], 4);
|
||||
ALIGN(uint16_t sign[2], 4);
|
||||
ALIGN(uint16_t class0[2], 4);
|
||||
ALIGN(uint16_t class0_fp[2][4], 8);
|
||||
ALIGN(uint16_t class0_hp[2], 4);
|
||||
ALIGN(uint16_t classN[10][2], 4);
|
||||
ALIGN(uint16_t classN_fp[4], 8);
|
||||
ALIGN(uint16_t classN_hp[2], 4);
|
||||
} CdfMvComponent;
|
||||
|
||||
typedef struct CdfMvContext {
|
||||
|
|
@ -123,10 +127,10 @@ typedef struct CdfMvContext {
|
|||
} CdfMvContext;
|
||||
|
||||
typedef struct CdfContext {
|
||||
CdfModeContext m;
|
||||
ALIGN(uint16_t kfym[5][5][N_INTRA_PRED_MODES + 3], 32);
|
||||
CdfCoefContext coef;
|
||||
CdfMvContext mv, dmv;
|
||||
CdfModeContext m;
|
||||
CdfMvContext mv;
|
||||
ALIGN(uint16_t kfym[5][5][N_INTRA_PRED_MODES + 3], 32);
|
||||
} CdfContext;
|
||||
|
||||
typedef struct CdfThreadContext {
|
||||
|
|
@ -138,7 +142,7 @@ typedef struct CdfThreadContext {
|
|||
atomic_uint *progress;
|
||||
} CdfThreadContext;
|
||||
|
||||
void dav1d_cdf_thread_init_static(CdfThreadContext *cdf, int qidx);
|
||||
void dav1d_cdf_thread_init_static(CdfThreadContext *cdf, unsigned qidx);
|
||||
int dav1d_cdf_thread_alloc(Dav1dContext *c, CdfThreadContext *cdf,
|
||||
const int have_frame_mt);
|
||||
void dav1d_cdf_thread_copy(CdfContext *dst, const CdfThreadContext *src);
|
||||
|
|
|
|||
95
third_party/dav1d/src/decode.c
vendored
95
third_party/dav1d/src/decode.c
vendored
|
|
@ -73,42 +73,29 @@ static void init_quant_tables(const Dav1dSequenceHeader *const seq_hdr,
|
|||
}
|
||||
}
|
||||
|
||||
static int read_mv_component_diff(Dav1dTaskContext *const t,
|
||||
static int read_mv_component_diff(MsacContext *const msac,
|
||||
CdfMvComponent *const mv_comp,
|
||||
const int have_fp)
|
||||
const int mv_prec)
|
||||
{
|
||||
Dav1dTileState *const ts = t->ts;
|
||||
const Dav1dFrameContext *const f = t->f;
|
||||
const int have_hp = f->frame_hdr->hp;
|
||||
const int sign = dav1d_msac_decode_bool_adapt(&ts->msac, mv_comp->sign);
|
||||
const int cl = dav1d_msac_decode_symbol_adapt16(&ts->msac,
|
||||
mv_comp->classes, 10);
|
||||
int up, fp, hp;
|
||||
const int sign = dav1d_msac_decode_bool_adapt(msac, mv_comp->sign);
|
||||
const int cl = dav1d_msac_decode_symbol_adapt16(msac, mv_comp->classes, 10);
|
||||
int up, fp = 3, hp = 1;
|
||||
|
||||
if (!cl) {
|
||||
up = dav1d_msac_decode_bool_adapt(&ts->msac, mv_comp->class0);
|
||||
if (have_fp) {
|
||||
fp = dav1d_msac_decode_symbol_adapt4(&ts->msac,
|
||||
mv_comp->class0_fp[up], 3);
|
||||
hp = have_hp ? dav1d_msac_decode_bool_adapt(&ts->msac,
|
||||
mv_comp->class0_hp) : 1;
|
||||
} else {
|
||||
fp = 3;
|
||||
hp = 1;
|
||||
up = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0);
|
||||
if (mv_prec >= 0) { // !force_integer_mv
|
||||
fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->class0_fp[up], 3);
|
||||
if (mv_prec > 0) // allow_high_precision_mv
|
||||
hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0_hp);
|
||||
}
|
||||
} else {
|
||||
up = 1 << cl;
|
||||
for (int n = 0; n < cl; n++)
|
||||
up |= dav1d_msac_decode_bool_adapt(&ts->msac,
|
||||
mv_comp->classN[n]) << n;
|
||||
if (have_fp) {
|
||||
fp = dav1d_msac_decode_symbol_adapt4(&ts->msac,
|
||||
mv_comp->classN_fp, 3);
|
||||
hp = have_hp ? dav1d_msac_decode_bool_adapt(&ts->msac,
|
||||
mv_comp->classN_hp) : 1;
|
||||
} else {
|
||||
fp = 3;
|
||||
hp = 1;
|
||||
up |= dav1d_msac_decode_bool_adapt(msac, mv_comp->classN[n]) << n;
|
||||
if (mv_prec >= 0) { // !force_integer_mv
|
||||
fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->classN_fp, 3);
|
||||
if (mv_prec > 0) // allow_high_precision_mv
|
||||
hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->classN_hp);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -117,25 +104,16 @@ static int read_mv_component_diff(Dav1dTaskContext *const t,
|
|||
return sign ? -diff : diff;
|
||||
}
|
||||
|
||||
static void read_mv_residual(Dav1dTaskContext *const t, mv *const ref_mv,
|
||||
CdfMvContext *const mv_cdf, const int have_fp)
|
||||
static void read_mv_residual(Dav1dTileState *const ts, mv *const ref_mv,
|
||||
const int mv_prec)
|
||||
{
|
||||
switch (dav1d_msac_decode_symbol_adapt4(&t->ts->msac, t->ts->cdf.mv.joint,
|
||||
N_MV_JOINTS - 1))
|
||||
{
|
||||
case MV_JOINT_HV:
|
||||
ref_mv->y += read_mv_component_diff(t, &mv_cdf->comp[0], have_fp);
|
||||
ref_mv->x += read_mv_component_diff(t, &mv_cdf->comp[1], have_fp);
|
||||
break;
|
||||
case MV_JOINT_H:
|
||||
ref_mv->x += read_mv_component_diff(t, &mv_cdf->comp[1], have_fp);
|
||||
break;
|
||||
case MV_JOINT_V:
|
||||
ref_mv->y += read_mv_component_diff(t, &mv_cdf->comp[0], have_fp);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
MsacContext *const msac = &ts->msac;
|
||||
const enum MVJoint mv_joint =
|
||||
dav1d_msac_decode_symbol_adapt4(msac, ts->cdf.mv.joint, N_MV_JOINTS - 1);
|
||||
if (mv_joint & MV_JOINT_V)
|
||||
ref_mv->y += read_mv_component_diff(msac, &ts->cdf.mv.comp[0], mv_prec);
|
||||
if (mv_joint & MV_JOINT_H)
|
||||
ref_mv->x += read_mv_component_diff(msac, &ts->cdf.mv.comp[1], mv_prec);
|
||||
}
|
||||
|
||||
static void read_tx_tree(Dav1dTaskContext *const t,
|
||||
|
|
@ -1001,8 +979,7 @@ static int decode_b(Dav1dTaskContext *const t,
|
|||
const int have_delta_q = f->frame_hdr->delta.q.present &&
|
||||
(bs != (f->seq_hdr->sb128 ? BS_128x128 : BS_64x64) || !b->skip);
|
||||
|
||||
int8_t prev_delta_lf[4];
|
||||
memcpy(prev_delta_lf, ts->last_delta_lf, 4);
|
||||
uint32_t prev_delta_lf = ts->last_delta_lf.u32;
|
||||
|
||||
if (have_delta_q) {
|
||||
int delta_q = dav1d_msac_decode_symbol_adapt4(&ts->msac,
|
||||
|
|
@ -1038,8 +1015,8 @@ static int decode_b(Dav1dTaskContext *const t,
|
|||
delta_lf = -delta_lf;
|
||||
delta_lf *= 1 << f->frame_hdr->delta.lf.res_log2;
|
||||
}
|
||||
ts->last_delta_lf[i] =
|
||||
iclip(ts->last_delta_lf[i] + delta_lf, -63, 63);
|
||||
ts->last_delta_lf.i8[i] =
|
||||
iclip(ts->last_delta_lf.i8[i] + delta_lf, -63, 63);
|
||||
if (have_delta_q && DEBUG_BLOCK_INFO)
|
||||
printf("Post-delta_lf[%d:%d]: r=%d\n", i, delta_lf,
|
||||
ts->msac.rng);
|
||||
|
|
@ -1054,13 +1031,13 @@ static int decode_b(Dav1dTaskContext *const t,
|
|||
init_quant_tables(f->seq_hdr, f->frame_hdr, ts->last_qidx, ts->dqmem);
|
||||
ts->dq = ts->dqmem;
|
||||
}
|
||||
if (!memcmp(ts->last_delta_lf, (int8_t[4]) { 0, 0, 0, 0 }, 4)) {
|
||||
if (!ts->last_delta_lf.u32) {
|
||||
// assign frame-wide lf values to this sb
|
||||
ts->lflvl = f->lf.lvl;
|
||||
} else if (memcmp(ts->last_delta_lf, prev_delta_lf, 4)) {
|
||||
} else if (ts->last_delta_lf.u32 != prev_delta_lf) {
|
||||
// find sb-specific lf lvl parameters
|
||||
dav1d_calc_lf_values(ts->lflvlmem, f->frame_hdr, ts->last_delta_lf);
|
||||
ts->lflvl = ts->lflvlmem;
|
||||
dav1d_calc_lf_values(ts->lflvlmem, f->frame_hdr, ts->last_delta_lf.i8);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1324,7 +1301,7 @@ static int decode_b(Dav1dTaskContext *const t,
|
|||
}
|
||||
|
||||
const union mv ref = b->mv[0];
|
||||
read_mv_residual(t, &b->mv[0], &ts->cdf.dmv, 0);
|
||||
read_mv_residual(ts, &b->mv[0], -1);
|
||||
|
||||
// clip intrabc motion vector to decoded parts of current tile
|
||||
int border_left = ts->tiling.col_start * 4;
|
||||
|
|
@ -1586,8 +1563,8 @@ static int decode_b(Dav1dTaskContext *const t,
|
|||
break; \
|
||||
case NEWMV: \
|
||||
b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
|
||||
read_mv_residual(t, &b->mv[idx], &ts->cdf.mv, \
|
||||
!f->frame_hdr->force_integer_mv); \
|
||||
const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv; \
|
||||
read_mv_residual(ts, &b->mv[idx], mv_prec); \
|
||||
break; \
|
||||
}
|
||||
has_subpel_filter = imin(bw4, bh4) == 1 ||
|
||||
|
|
@ -1775,8 +1752,8 @@ static int decode_b(Dav1dTaskContext *const t,
|
|||
if (DEBUG_BLOCK_INFO)
|
||||
printf("Post-intermode[%d,drl=%d]: r=%d\n",
|
||||
b->inter_mode, b->drl_idx, ts->msac.rng);
|
||||
read_mv_residual(t, &b->mv[0], &ts->cdf.mv,
|
||||
!f->frame_hdr->force_integer_mv);
|
||||
const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv;
|
||||
read_mv_residual(ts, &b->mv[0], mv_prec);
|
||||
if (DEBUG_BLOCK_INFO)
|
||||
printf("Post-residualmv[mv=y:%d,x:%d]: r=%d\n",
|
||||
b->mv[0].y, b->mv[0].x, ts->msac.rng);
|
||||
|
|
@ -2495,7 +2472,7 @@ static void setup_tile(Dav1dTileState *const ts,
|
|||
|
||||
dav1d_cdf_thread_copy(&ts->cdf, &f->in_cdf);
|
||||
ts->last_qidx = f->frame_hdr->quant.yac;
|
||||
memset(ts->last_delta_lf, 0, sizeof(ts->last_delta_lf));
|
||||
ts->last_delta_lf.u32 = 0;
|
||||
|
||||
dav1d_msac_init(&ts->msac, data, sz, f->frame_hdr->disable_cdf_update);
|
||||
|
||||
|
|
|
|||
9
third_party/dav1d/src/internal.h
vendored
9
third_party/dav1d/src/internal.h
vendored
|
|
@ -303,8 +303,8 @@ struct Dav1dFrameContext {
|
|||
int lr_buf_plane_sz[2]; /* (stride*sbh*4) << sb128 if n_tc > 1, else stride*4 */
|
||||
int re_sz /* h */;
|
||||
ALIGN(Av1FilterLUT lim_lut, 16);
|
||||
ALIGN(uint8_t lvl[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */], 16);
|
||||
int last_sharpness;
|
||||
uint8_t lvl[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */];
|
||||
uint8_t *tx_lpf_right_edge[2];
|
||||
uint8_t *cdef_line_buf, *lr_line_buf;
|
||||
pixel *cdef_line[2 /* pre, post */][3 /* plane */];
|
||||
|
|
@ -376,8 +376,11 @@ struct Dav1dTileState {
|
|||
const uint16_t (*dq)[3][2];
|
||||
int last_qidx;
|
||||
|
||||
int8_t last_delta_lf[4];
|
||||
uint8_t lflvlmem[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */];
|
||||
union {
|
||||
int8_t i8[4];
|
||||
uint32_t u32;
|
||||
} last_delta_lf;
|
||||
ALIGN(uint8_t lflvlmem[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */], 16);
|
||||
const uint8_t (*lflvl)[4][8][2];
|
||||
|
||||
Av1RestorationUnit *lr_ref[3];
|
||||
|
|
|
|||
63
third_party/dav1d/src/itx.h
vendored
63
third_party/dav1d/src/itx.h
vendored
|
|
@ -39,10 +39,73 @@ void (name)(pixel *dst, ptrdiff_t dst_stride, coef *coeff, int eob \
|
|||
HIGHBD_DECL_SUFFIX)
|
||||
typedef decl_itx_fn(*itxfm_fn);
|
||||
|
||||
#define decl_itx2_fns(w, h, opt) \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_identity_##w##x##h, opt))
|
||||
|
||||
#define decl_itx12_fns(w, h, opt) \
|
||||
decl_itx2_fns(w, h, opt); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_adst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_flipadst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_identity_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_dct_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_adst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_flipadst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_dct_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_adst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_flipadst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_dct_##w##x##h, opt))
|
||||
|
||||
#define decl_itx16_fns(w, h, opt) \
|
||||
decl_itx12_fns(w, h, opt); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_identity_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_identity_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_adst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_flipadst_##w##x##h, opt))
|
||||
|
||||
#define decl_itx17_fns(w, h, opt) \
|
||||
decl_itx16_fns(w, h, opt); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_wht_wht_##w##x##h, opt))
|
||||
|
||||
typedef struct Dav1dInvTxfmDSPContext {
|
||||
itxfm_fn itxfm_add[N_RECT_TX_SIZES][N_TX_TYPES_PLUS_LL];
|
||||
} Dav1dInvTxfmDSPContext;
|
||||
|
||||
bitfn_decls(void dav1d_itx_dsp_init, Dav1dInvTxfmDSPContext *c, int bpc);
|
||||
|
||||
#define assign_itx_fn(pfx, w, h, type, type_enum, ext) \
|
||||
c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
|
||||
BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
|
||||
|
||||
#define assign_itx1_fn(pfx, w, h, ext) \
|
||||
assign_itx_fn(pfx, w, h, dct_dct, DCT_DCT, ext)
|
||||
|
||||
#define assign_itx2_fn(pfx, w, h, ext) \
|
||||
assign_itx1_fn(pfx, w, h, ext); \
|
||||
assign_itx_fn(pfx, w, h, identity_identity, IDTX, ext)
|
||||
|
||||
#define assign_itx12_fn(pfx, w, h, ext) \
|
||||
assign_itx2_fn(pfx, w, h, ext); \
|
||||
assign_itx_fn(pfx, w, h, dct_adst, ADST_DCT, ext); \
|
||||
assign_itx_fn(pfx, w, h, dct_flipadst, FLIPADST_DCT, ext); \
|
||||
assign_itx_fn(pfx, w, h, dct_identity, H_DCT, ext); \
|
||||
assign_itx_fn(pfx, w, h, adst_dct, DCT_ADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, adst_adst, ADST_ADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, adst_flipadst, FLIPADST_ADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, flipadst_dct, DCT_FLIPADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, flipadst_adst, ADST_FLIPADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, identity_dct, V_DCT, ext)
|
||||
|
||||
#define assign_itx16_fn(pfx, w, h, ext) \
|
||||
assign_itx12_fn(pfx, w, h, ext); \
|
||||
assign_itx_fn(pfx, w, h, adst_identity, H_ADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, identity_adst, V_ADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST, ext)
|
||||
|
||||
#define assign_itx17_fn(pfx, w, h, ext) \
|
||||
assign_itx16_fn(pfx, w, h, ext); \
|
||||
assign_itx_fn(pfx, w, h, wht_wht, WHT_WHT, ext)
|
||||
|
||||
#endif /* DAV1D_SRC_ITX_H */
|
||||
|
|
|
|||
6
third_party/dav1d/src/lf_mask.c
vendored
6
third_party/dav1d/src/lf_mask.c
vendored
|
|
@ -436,7 +436,7 @@ static void calc_lf_value(uint8_t (*const lflvl_values)[2],
|
|||
const int base = iclip(iclip(base_lvl + lf_delta, 0, 63) + seg_delta, 0, 63);
|
||||
|
||||
if (!mr_delta) {
|
||||
memset(lflvl_values, base, 8 * 2);
|
||||
memset(lflvl_values, base, sizeof(*lflvl_values) * 8);
|
||||
} else {
|
||||
const int sh = base >= 32;
|
||||
lflvl_values[0][0] = lflvl_values[0][1] =
|
||||
|
|
@ -457,7 +457,7 @@ static inline void calc_lf_value_chroma(uint8_t (*const lflvl_values)[2],
|
|||
const Dav1dLoopfilterModeRefDeltas *const mr_delta)
|
||||
{
|
||||
if (!base_lvl)
|
||||
memset(lflvl_values, 0, 8 * 2);
|
||||
memset(lflvl_values, 0, sizeof(*lflvl_values) * 8);
|
||||
else
|
||||
calc_lf_value(lflvl_values, base_lvl, lf_delta, seg_delta, mr_delta);
|
||||
}
|
||||
|
|
@ -469,7 +469,7 @@ void dav1d_calc_lf_values(uint8_t (*const lflvl_values)[4][8][2],
|
|||
const int n_seg = hdr->segmentation.enabled ? 8 : 1;
|
||||
|
||||
if (!hdr->loopfilter.level_y[0] && !hdr->loopfilter.level_y[1]) {
|
||||
memset(lflvl_values, 0, 8 * 4 * 2 * n_seg);
|
||||
memset(lflvl_values, 0, sizeof(*lflvl_values) * n_seg);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
|||
1
third_party/dav1d/src/meson.build
vendored
1
third_party/dav1d/src/meson.build
vendored
|
|
@ -106,6 +106,7 @@ if is_asm_enabled
|
|||
'arm/64/loopfilter.S',
|
||||
'arm/64/looprestoration.S',
|
||||
'arm/64/mc.S',
|
||||
'arm/64/mc_dotprod.S',
|
||||
)
|
||||
endif
|
||||
|
||||
|
|
|
|||
4
third_party/dav1d/src/refmvs.c
vendored
4
third_party/dav1d/src/refmvs.c
vendored
|
|
@ -817,7 +817,9 @@ int dav1d_refmvs_init_frame(refmvs_frame *const rf,
|
|||
if (r_stride != rf->r_stride || n_tile_rows != rf->n_tile_rows) {
|
||||
if (rf->r) dav1d_freep_aligned(&rf->r);
|
||||
const int uses_2pass = n_tile_threads > 1 && n_frame_threads > 1;
|
||||
rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, sizeof(*rf->r) * 35 * r_stride * n_tile_rows * (1 + uses_2pass), 64);
|
||||
/* sizeof(refmvs_block) == 12 but it's accessed using 16-byte loads in asm,
|
||||
* so add 4 bytes of padding to avoid buffer overreads. */
|
||||
rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, sizeof(*rf->r) * 35 * r_stride * n_tile_rows * (1 + uses_2pass) + 4, 64);
|
||||
if (!rf->r) return DAV1D_ERR(ENOMEM);
|
||||
rf->r_stride = r_stride;
|
||||
}
|
||||
|
|
|
|||
63
third_party/dav1d/src/riscv/itx.h
vendored
63
third_party/dav1d/src/riscv/itx.h
vendored
|
|
@ -28,34 +28,6 @@
|
|||
#include "src/cpu.h"
|
||||
#include "src/itx.h"
|
||||
|
||||
#define decl_itx2_fns(w, h, opt) \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_identity_##w##x##h, opt))
|
||||
|
||||
#define decl_itx12_fns(w, h, opt) \
|
||||
decl_itx2_fns(w, h, opt); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_adst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_flipadst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_identity_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_dct_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_adst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_flipadst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_dct_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_adst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_flipadst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_dct_##w##x##h, opt))
|
||||
|
||||
#define decl_itx16_fns(w, h, opt) \
|
||||
decl_itx12_fns(w, h, opt); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_identity_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_identity_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_adst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_flipadst_##w##x##h, opt))
|
||||
|
||||
#define decl_itx17_fns(w, h, opt) \
|
||||
decl_itx16_fns(w, h, opt); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_wht_wht_##w##x##h, opt))
|
||||
|
||||
#define decl_itx_fns(ext) \
|
||||
decl_itx17_fns( 4, 4, ext); \
|
||||
decl_itx16_fns( 4, 8, ext); \
|
||||
|
|
@ -70,41 +42,6 @@ decl_itx16_fns(16, 16, ext)
|
|||
decl_itx_fns(rvv);
|
||||
|
||||
static ALWAYS_INLINE void itx_dsp_init_riscv(Dav1dInvTxfmDSPContext *const c, int const bpc) {
|
||||
#define assign_itx_fn(pfx, w, h, type, type_enum, ext) \
|
||||
c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
|
||||
BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
|
||||
|
||||
#define assign_itx1_fn(pfx, w, h, ext) \
|
||||
assign_itx_fn(pfx, w, h, dct_dct, DCT_DCT, ext)
|
||||
|
||||
#define assign_itx2_fn(pfx, w, h, ext) \
|
||||
assign_itx1_fn(pfx, w, h, ext); \
|
||||
assign_itx_fn(pfx, w, h, identity_identity, IDTX, ext)
|
||||
|
||||
#define assign_itx12_fn(pfx, w, h, ext) \
|
||||
assign_itx2_fn(pfx, w, h, ext); \
|
||||
assign_itx_fn(pfx, w, h, dct_adst, ADST_DCT, ext); \
|
||||
assign_itx_fn(pfx, w, h, dct_flipadst, FLIPADST_DCT, ext); \
|
||||
assign_itx_fn(pfx, w, h, dct_identity, H_DCT, ext); \
|
||||
assign_itx_fn(pfx, w, h, adst_dct, DCT_ADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, adst_adst, ADST_ADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, adst_flipadst, FLIPADST_ADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, flipadst_dct, DCT_FLIPADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, flipadst_adst, ADST_FLIPADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, identity_dct, V_DCT, ext)
|
||||
|
||||
#define assign_itx16_fn(pfx, w, h, ext) \
|
||||
assign_itx12_fn(pfx, w, h, ext); \
|
||||
assign_itx_fn(pfx, w, h, adst_identity, H_ADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, identity_adst, V_ADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST, ext)
|
||||
|
||||
#define assign_itx17_fn(pfx, w, h, ext) \
|
||||
assign_itx16_fn(pfx, w, h, ext); \
|
||||
assign_itx_fn(pfx, w, h, wht_wht, WHT_WHT, ext)
|
||||
|
||||
const unsigned flags = dav1d_get_cpu_flags();
|
||||
|
||||
if (!(flags & DAV1D_RISCV_CPU_FLAG_V)) return;
|
||||
|
|
|
|||
3
third_party/dav1d/src/x86/ipred_avx2.asm
vendored
3
third_party/dav1d/src/x86/ipred_avx2.asm
vendored
|
|
@ -66,7 +66,8 @@ z_filter_wh: db 7, 7, 11, 11, 15, 15, 19, 19, 19, 23, 23, 23, 31, 31, 31, 39
|
|||
z_filter_k: db 0, 16, 0, 16, 0, 20, 0, 20, 8, 16, 8, 16
|
||||
db 32, 16, 32, 16, 24, 20, 24, 20, 16, 16, 16, 16
|
||||
db 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 8, 0
|
||||
z_filter_s: db 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7
|
||||
const \
|
||||
z_filter_s, db 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7
|
||||
db 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15
|
||||
db 15, 15, 15, 15, 15, 15, 15, 15 ; should be in one cache line
|
||||
pb_128: times 4 db 128 ; those are just placed here for alignment.
|
||||
|
|
|
|||
64
third_party/dav1d/src/x86/itx.h
vendored
64
third_party/dav1d/src/x86/itx.h
vendored
|
|
@ -30,34 +30,6 @@
|
|||
|
||||
#define BF_BPC(x, bits, suffix) x##_##bits##bpc_##suffix
|
||||
|
||||
#define decl_itx2_fns(w, h, opt) \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_identity_##w##x##h, opt))
|
||||
|
||||
#define decl_itx12_fns(w, h, opt) \
|
||||
decl_itx2_fns(w, h, opt); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_adst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_flipadst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_identity_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_dct_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_adst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_flipadst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_dct_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_adst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_flipadst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_dct_##w##x##h, opt))
|
||||
|
||||
#define decl_itx16_fns(w, h, opt) \
|
||||
decl_itx12_fns(w, h, opt); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_adst_identity_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_flipadst_identity_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_adst_##w##x##h, opt)); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_identity_flipadst_##w##x##h, opt))
|
||||
|
||||
#define decl_itx17_fns(w, h, opt) \
|
||||
decl_itx16_fns(w, h, opt); \
|
||||
decl_itx_fn(BF(dav1d_inv_txfm_add_wht_wht_##w##x##h, opt))
|
||||
|
||||
#define decl_itx_fns(ext) \
|
||||
decl_itx17_fns( 4, 4, ext); \
|
||||
decl_itx16_fns( 4, 8, ext); \
|
||||
|
|
@ -136,42 +108,6 @@ decl_itx_fn(dav1d_inv_txfm_add_wht_wht_4x4_16bpc_avx2);
|
|||
decl_itx_fn(BF(dav1d_inv_txfm_add_wht_wht_4x4, sse2));
|
||||
|
||||
static ALWAYS_INLINE void itx_dsp_init_x86(Dav1dInvTxfmDSPContext *const c, const int bpc) {
|
||||
#define assign_itx_fn(pfx, w, h, type, type_enum, ext) \
|
||||
c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
|
||||
BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
|
||||
|
||||
#define assign_itx1_fn(pfx, w, h, ext) \
|
||||
assign_itx_fn(pfx, w, h, dct_dct, DCT_DCT, ext)
|
||||
|
||||
#define assign_itx2_fn(pfx, w, h, ext) \
|
||||
assign_itx1_fn(pfx, w, h, ext); \
|
||||
assign_itx_fn(pfx, w, h, identity_identity, IDTX, ext)
|
||||
|
||||
#define assign_itx12_fn(pfx, w, h, ext) \
|
||||
assign_itx2_fn(pfx, w, h, ext); \
|
||||
assign_itx_fn(pfx, w, h, dct_adst, ADST_DCT, ext); \
|
||||
assign_itx_fn(pfx, w, h, dct_flipadst, FLIPADST_DCT, ext); \
|
||||
assign_itx_fn(pfx, w, h, dct_identity, H_DCT, ext); \
|
||||
assign_itx_fn(pfx, w, h, adst_dct, DCT_ADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, adst_adst, ADST_ADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, adst_flipadst, FLIPADST_ADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, flipadst_dct, DCT_FLIPADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, flipadst_adst, ADST_FLIPADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, identity_dct, V_DCT, ext)
|
||||
|
||||
#define assign_itx16_fn(pfx, w, h, ext) \
|
||||
assign_itx12_fn(pfx, w, h, ext); \
|
||||
assign_itx_fn(pfx, w, h, adst_identity, H_ADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, identity_adst, V_ADST, ext); \
|
||||
assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST, ext)
|
||||
|
||||
#define assign_itx17_fn(pfx, w, h, ext) \
|
||||
assign_itx16_fn(pfx, w, h, ext); \
|
||||
assign_itx_fn(pfx, w, h, wht_wht, WHT_WHT, ext)
|
||||
|
||||
|
||||
#define assign_itx_bpc_fn(pfx, w, h, type, type_enum, bpc, ext) \
|
||||
c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
|
||||
BF_BPC(dav1d_inv_txfm_add_##type##_##w##x##h, bpc, ext)
|
||||
|
|
|
|||
1548
third_party/dav1d/src/x86/mc16_avx2.asm
vendored
1548
third_party/dav1d/src/x86/mc16_avx2.asm
vendored
File diff suppressed because it is too large
Load diff
1471
third_party/dav1d/src/x86/mc_avx2.asm
vendored
1471
third_party/dav1d/src/x86/mc_avx2.asm
vendored
File diff suppressed because it is too large
Load diff
3029
third_party/dav1d/src/x86/mc_avx512.asm
vendored
3029
third_party/dav1d/src/x86/mc_avx512.asm
vendored
File diff suppressed because it is too large
Load diff
2
third_party/dav1d/tests/meson.build
vendored
2
third_party/dav1d/tests/meson.build
vendored
|
|
@ -100,7 +100,7 @@ if is_asm_enabled
|
|||
],
|
||||
)
|
||||
|
||||
test('checkasm', checkasm, suite: 'checkasm', timeout: 180, is_parallel: false)
|
||||
test('checkasm', checkasm, suite: 'checkasm', timeout: 180)
|
||||
benchmark('checkasm', checkasm, suite: 'checkasm', timeout: 3600, args: '--bench')
|
||||
endif
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue