fune/third_party/jpeg-xl/lib/jxl/dec_cache.h

274 lines
8.7 KiB
C++

// Copyright (c) the JPEG XL Project Authors. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#ifndef LIB_JXL_DEC_CACHE_H_
#define LIB_JXL_DEC_CACHE_H_
#include <jxl/decode.h>
#include <jxl/types.h>
#include <stdint.h>
#include <algorithm>
#include <atomic>
#include <cmath>
#include <hwy/base.h> // HWY_ALIGN_MAX
#include <memory>
#include <vector>
#include "hwy/aligned_allocator.h"
#include "lib/jxl/ac_strategy.h"
#include "lib/jxl/base/common.h" // kMaxNumPasses
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/coeff_order.h"
#include "lib/jxl/common.h"
#include "lib/jxl/dct_util.h"
#include "lib/jxl/dec_ans.h"
#include "lib/jxl/dec_xyb.h"
#include "lib/jxl/frame_dimensions.h"
#include "lib/jxl/frame_header.h"
#include "lib/jxl/image.h"
#include "lib/jxl/image_bundle.h"
#include "lib/jxl/image_metadata.h"
#include "lib/jxl/passes_state.h"
#include "lib/jxl/render_pipeline/render_pipeline.h"
#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
#include "lib/jxl/render_pipeline/stage_upsampling.h"
namespace jxl {
constexpr size_t kSigmaBorder = 1;
constexpr size_t kSigmaPadding = 2;
struct PixelCallback {
PixelCallback() = default;
PixelCallback(JxlImageOutInitCallback init, JxlImageOutRunCallback run,
JxlImageOutDestroyCallback destroy, void* init_opaque)
: init(init), run(run), destroy(destroy), init_opaque(init_opaque) {
#if JXL_ENABLE_ASSERT
const bool has_init = init != nullptr;
const bool has_run = run != nullptr;
const bool has_destroy = destroy != nullptr;
const bool healthy = (has_init == has_run) && (has_run == has_destroy);
JXL_ASSERT(healthy);
#endif
}
bool IsPresent() const { return run != nullptr; }
void* Init(size_t num_threads, size_t num_pixels) const {
return init(init_opaque, num_threads, num_pixels);
}
JxlImageOutInitCallback init = nullptr;
JxlImageOutRunCallback run = nullptr;
JxlImageOutDestroyCallback destroy = nullptr;
void* init_opaque = nullptr;
};
struct ImageOutput {
// Pixel format of the output pixels, used for buffer and callback output.
JxlPixelFormat format;
// Output bit depth for unsigned data types, used for float to int conversion.
size_t bits_per_sample;
// Callback for line-by-line output.
PixelCallback callback;
// Pixel buffer for image output.
void* buffer;
size_t buffer_size;
// Length of a row of image_buffer in bytes (based on oriented width).
size_t stride;
};
// Per-frame decoder state. All the images here should be accessed through a
// group rect (either with block units or pixel units).
struct PassesDecoderState {
PassesSharedState shared_storage;
// Allows avoiding copies for encoder loop.
const PassesSharedState* JXL_RESTRICT shared = &shared_storage;
// 8x upsampling stage for DC.
std::unique_ptr<RenderPipelineStage> upsampler8x;
// For ANS decoding.
std::vector<ANSCode> code;
std::vector<std::vector<uint8_t>> context_map;
// Multiplier to be applied to the quant matrices of the x channel.
float x_dm_multiplier;
float b_dm_multiplier;
// Sigma values for EPF.
ImageF sigma;
// Image dimensions before applying undo_orientation.
size_t width;
size_t height;
ImageOutput main_output;
std::vector<ImageOutput> extra_output;
// Whether to use int16 float-XYB-to-uint8-srgb conversion.
bool fast_xyb_srgb8_conversion;
// If true, the RGBA output will be unpremultiplied before writing to the
// output.
bool unpremul_alpha;
// The render pipeline will apply this orientation to bring the image to the
// intended display orientation.
Orientation undo_orientation;
// Used for seeding noise.
size_t visible_frame_index = 0;
size_t nonvisible_frame_index = 0;
// Keep track of the transform types used.
std::atomic<uint32_t> used_acs{0};
// Storage for coefficients if in "accumulate" mode.
std::unique_ptr<ACImage> coefficients = make_unique<ACImageT<int32_t>>();
// Rendering pipeline.
std::unique_ptr<RenderPipeline> render_pipeline;
// Storage for the current frame if it can be referenced by future frames.
ImageBundle frame_storage_for_referencing;
struct PipelineOptions {
bool use_slow_render_pipeline;
bool coalescing;
bool render_spotcolors;
bool render_noise;
};
Status PreparePipeline(const FrameHeader& frame_header, ImageBundle* decoded,
PipelineOptions options);
// Information for colour conversions.
OutputEncodingInfo output_encoding_info;
// Initializes decoder-specific structures using information from *shared.
Status Init(const FrameHeader& frame_header) {
x_dm_multiplier = std::pow(1 / (1.25f), frame_header.x_qm_scale - 2.0f);
b_dm_multiplier = std::pow(1 / (1.25f), frame_header.b_qm_scale - 2.0f);
main_output.callback = PixelCallback();
main_output.buffer = nullptr;
extra_output.clear();
fast_xyb_srgb8_conversion = false;
unpremul_alpha = false;
undo_orientation = Orientation::kIdentity;
used_acs = 0;
upsampler8x = GetUpsamplingStage(shared->metadata->transform_data, 0, 3);
if (frame_header.loop_filter.epf_iters > 0) {
JXL_ASSIGN_OR_RETURN(
sigma,
ImageF::Create(shared->frame_dim.xsize_blocks + 2 * kSigmaPadding,
shared->frame_dim.ysize_blocks + 2 * kSigmaPadding));
}
return true;
}
// Initialize the decoder state after all of DC is decoded.
Status InitForAC(size_t num_passes, ThreadPool* pool) {
shared_storage.coeff_order_size = 0;
for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
if (((1 << o) & used_acs) == 0) continue;
uint8_t ord = kStrategyOrder[o];
shared_storage.coeff_order_size =
std::max(kCoeffOrderOffset[3 * (ord + 1)] * kDCTBlockSize,
shared_storage.coeff_order_size);
}
size_t sz = num_passes * shared_storage.coeff_order_size;
if (sz > shared_storage.coeff_orders.size()) {
shared_storage.coeff_orders.resize(sz);
}
return true;
}
};
// Temp images required for decoding a single group. Reduces memory allocations
// for large images because we only initialize min(#threads, #groups) instances.
struct GroupDecCache {
Status InitOnce(size_t num_passes, size_t used_acs) {
for (size_t i = 0; i < num_passes; i++) {
if (num_nzeroes[i].xsize() == 0) {
// Allocate enough for a whole group - partial groups on the
// right/bottom border just use a subset. The valid size is passed via
// Rect.
JXL_ASSIGN_OR_RETURN(
num_nzeroes[i],
Image3I::Create(kGroupDimInBlocks, kGroupDimInBlocks));
}
}
size_t max_block_area = 0;
for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
AcStrategy acs = AcStrategy::FromRawStrategy(o);
if ((used_acs & (1 << o)) == 0) continue;
size_t area =
acs.covered_blocks_x() * acs.covered_blocks_y() * kDCTBlockSize;
max_block_area = std::max(area, max_block_area);
}
if (max_block_area > max_block_area_) {
max_block_area_ = max_block_area;
// We need 3x float blocks for dequantized coefficients and 1x for scratch
// space for transforms.
float_memory_ = hwy::AllocateAligned<float>(max_block_area_ * 7);
// We need 3x int32 or int16 blocks for quantized coefficients.
int32_memory_ = hwy::AllocateAligned<int32_t>(max_block_area_ * 3);
int16_memory_ = hwy::AllocateAligned<int16_t>(max_block_area_ * 3);
}
dec_group_block = float_memory_.get();
scratch_space = dec_group_block + max_block_area_ * 3;
dec_group_qblock = int32_memory_.get();
dec_group_qblock16 = int16_memory_.get();
return true;
}
Status InitDCBufferOnce() {
if (dc_buffer.xsize() == 0) {
JXL_ASSIGN_OR_RETURN(
dc_buffer,
ImageF::Create(kGroupDimInBlocks + kRenderPipelineXOffset * 2,
kGroupDimInBlocks + 4));
}
return true;
}
// Scratch space used by DecGroupImpl().
float* dec_group_block;
int32_t* dec_group_qblock;
int16_t* dec_group_qblock16;
// For TransformToPixels.
float* scratch_space;
// Note that scratch_space is never used at the same time as dec_group_qblock.
// Moreover, only one of dec_group_qblock16 is ever used.
// TODO(veluca): figure out if we can save allocations.
// AC decoding
Image3I num_nzeroes[kMaxNumPasses];
// Buffer for DC upsampling.
ImageF dc_buffer;
private:
hwy::AlignedFreeUniquePtr<float[]> float_memory_;
hwy::AlignedFreeUniquePtr<int32_t[]> int32_memory_;
hwy::AlignedFreeUniquePtr<int16_t[]> int16_memory_;
size_t max_block_area_ = 0;
};
} // namespace jxl
#endif // LIB_JXL_DEC_CACHE_H_