Bug 1877328 - Update libjxl to 07203da045f6b41f9b3b5b86023fd70b075137f6 r=saschanaz

Differential Revision: https://phabricator.services.mozilla.com/D199944
This commit is contained in:
Updatebot 2024-01-31 16:50:40 +00:00
parent dc85cacfa3
commit f942f8df8e
118 changed files with 1087 additions and 2510 deletions

View file

@ -21,7 +21,6 @@ SOURCES += [
"/third_party/jpeg-xl/lib/jxl/color_encoding_internal.cc",
"/third_party/jpeg-xl/lib/jxl/compressed_dc.cc",
"/third_party/jpeg-xl/lib/jxl/convolve_separable5.cc",
"/third_party/jpeg-xl/lib/jxl/convolve_separable7.cc",
"/third_party/jpeg-xl/lib/jxl/convolve_slow.cc",
"/third_party/jpeg-xl/lib/jxl/convolve_symmetric3.cc",
"/third_party/jpeg-xl/lib/jxl/convolve_symmetric5.cc",
@ -44,7 +43,6 @@ SOURCES += [
"/third_party/jpeg-xl/lib/jxl/fast_dct.cc",
"/third_party/jpeg-xl/lib/jxl/fields.cc",
"/third_party/jpeg-xl/lib/jxl/frame_header.cc",
"/third_party/jpeg-xl/lib/jxl/gauss_blur.cc",
"/third_party/jpeg-xl/lib/jxl/headers.cc",
"/third_party/jpeg-xl/lib/jxl/huffman_table.cc",
"/third_party/jpeg-xl/lib/jxl/icc_codec.cc",

View file

@ -10,9 +10,9 @@ origin:
url: https://github.com/libjxl/libjxl
release: b26041c708d523ac53bb7d95d4f5c4a5d3b1ce30 (2024-01-08T13:55:50Z).
release: 07203da045f6b41f9b3b5b86023fd70b075137f6 (2024-01-29T17:41:05Z).
revision: b26041c708d523ac53bb7d95d4f5c4a5d3b1ce30
revision: 07203da045f6b41f9b3b5b86023fd70b075137f6
license: Apache-2.0

View file

@ -79,6 +79,7 @@ Pieter Wuille
roland-rollo
Samuel Leong <wvvwvvvvwvvw@gmail.com>
Sandro <sandro.jaeckel@gmail.com>
sandstrom
Sergey Fedorov <vital.had@gmail.com>
Stephan T. Lavavej <stl@nuwen.net>
StepSecurity Bot <bot@stepsecurity.io>
@ -87,7 +88,9 @@ Thomas Bonfort <thomas.bonfort@airbus.com>
Timo Rothenpieler <timo@rothenpieler.org>
tmkk <tmkkmac@gmail.com>
Vincent Torri <vincent.torri@gmail.com>
Wonwoo Choi <chwo9843@gmail.com>
xiota
Yonatan Nebenzhal <yonatan.nebenzhl@gmail.com>
Ziemowit Zabawa <ziemek.zabawa@outlook.com>
源文雨 <41315874+fumiama@users.noreply.github.com>
oupson <oupson1er@gmail.com>

View file

@ -160,6 +160,8 @@ set(JPEGXL_ENABLE_AVX512_SPR false CACHE BOOL
"Build with AVX-512FP16 support (faster on CPUs that support it, but larger binary size).")
set(JPEGXL_ENABLE_AVX512_ZEN4 false CACHE BOOL
"Build with Zen4-optimized AVX512 support (faster on CPUs that support it, but larger binary size).")
set(JPEGXL_ENABLE_WASM_TRHEADS true CACHE BOOL
"Builds WASM modules with threads suppurt")
# Force system dependencies.
set(JPEGXL_FORCE_SYSTEM_BROTLI false CACHE BOOL
@ -263,7 +265,7 @@ if(JPEGXL_STATIC)
endif()
endif() # JPEGXL_STATIC
if (EMSCRIPTEN)
if (EMSCRIPTEN AND JPEGXL_ENABLE_WASM_TRHEADS)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")

View file

@ -18,7 +18,7 @@ THIRD_PARTY_BROTLI="36533a866ed1ca4b75cf049f4521e4ec5fe24727"
THIRD_PARTY_HIGHWAY="ba0900a4957b929390ab73827235557959234fea"
THIRD_PARTY_SKCMS="42030a771244ba67f86b1c1c76a6493f873c5f91"
THIRD_PARTY_SJPEG="e5ab13008bb214deb66d5f3e17ca2f8dbff150bf"
THIRD_PARTY_ZLIB="cacf7f1d4e3d44d871b605da3b647f07d718623f"
THIRD_PARTY_ZLIB="51b7f2abdade71cd9bb0e7a373ef2610ec6f9daf" # v1.3.1
THIRD_PARTY_LIBPNG="f135775ad4e5d4408d2e12ffcc71bb36e6b48551" # v1.6.40
THIRD_PARTY_LIBJPEG_TURBO="8ecba3647edb6dd940463fedf38ca33a8e2a73d1" # 2.1.5.1

View file

@ -12,10 +12,12 @@
#endif
#include <inttypes.h>
#include <jxl/codestream_header.h>
#include <jxl/decode.h>
#include <jxl/decode_cxx.h>
#include <jxl/resizable_parallel_runner.h>
#include <jxl/resizable_parallel_runner_cxx.h>
#include <jxl/types.h>
#include <limits.h>
#include <stdint.h>
#include <stdio.h>

View file

@ -6,13 +6,18 @@
// This example encodes a file containing a floating point image to another
// file containing JPEG XL image with a single frame.
#include <jxl/codestream_header.h>
#include <jxl/color_encoding.h>
#include <jxl/encode.h>
#include <jxl/encode_cxx.h>
#include <jxl/thread_parallel_runner.h>
#include <jxl/thread_parallel_runner_cxx.h>
#include <jxl/types.h>
#include <limits.h>
#include <string.h>
#include <cstdint>
#include <cstdio>
#include <sstream>
#include <string>
#include <vector>

View file

@ -200,9 +200,6 @@ cc_library(
compatible_with = DEFAULT_COMPATIBILITY,
defines = [
"JPEGXL_ENABLE_SKCMS=1",
"JPEGXL_MAJOR_VERSION=" + str(libjxl_major_version),
"JPEGXL_MINOR_VERSION=" + str(libjxl_minor_version),
"JPEGXL_PATCH_VERSION=" + str(libjxl_patch_version),
],
deps = [
":base",

View file

@ -163,11 +163,10 @@ if (JPEGXL_ENABLE_JPEGLI)
include(jpegli.cmake)
endif()
# Install all the library headers from the source and the generated ones. There
# is no distinction on which libraries use which header since it is expected
# that all developer libraries are available together at build time.
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/jxl
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
# For simplicity all the library headers, both source and generated ones, are
# gathered in the binary folder. There is no distinction on which libraries use
# which header since it is expected that all developer libraries are available
# together at build time.
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/jxl
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")

View file

@ -40,23 +40,10 @@ Status SetFromBytes(const Span<const uint8_t> bytes,
return JXL_FAILURE("Codecs failed to decode");
}
Status Encode(const CodecInOut& io, const extras::Codec codec,
const ColorEncoding& c_desired, size_t bits_per_sample,
Status Encode(const extras::PackedPixelFile& ppf, const extras::Codec codec,
std::vector<uint8_t>* bytes, ThreadPool* pool) {
bytes->clear();
JXL_CHECK(!io.Main().c_current().ICC().empty());
JXL_CHECK(!c_desired.ICC().empty());
io.CheckMetadata();
if (io.Main().IsJPEG()) {
JXL_WARNING("Writing JPEG data as pixels");
}
JxlPixelFormat format = {
0, // num_channels is ignored by the converter
bits_per_sample <= 8 ? JXL_TYPE_UINT8 : JXL_TYPE_UINT16, JXL_BIG_ENDIAN,
0};
const bool floating_point = bits_per_sample > 16;
std::unique_ptr<extras::Encoder> encoder;
std::ostringstream os;
switch (codec) {
case extras::Codec::kPNG:
encoder = extras::GetAPNGEncoder();
@ -66,25 +53,20 @@ Status Encode(const CodecInOut& io, const extras::Codec codec,
return JXL_FAILURE("JPEG XL was built without (A)PNG support");
}
case extras::Codec::kJPG:
format.data_type = JXL_TYPE_UINT8;
encoder = extras::GetJPEGEncoder();
if (encoder) {
os << io.jpeg_quality;
encoder->SetOption("q", os.str());
break;
} else {
return JXL_FAILURE("JPEG XL was built without JPEG support");
}
case extras::Codec::kPNM:
if (io.Main().HasAlpha()) {
if (ppf.info.alpha_bits > 0) {
encoder = extras::GetPAMEncoder();
} else if (io.Main().IsGray()) {
} else if (ppf.info.num_color_channels == 1) {
encoder = extras::GetPGMEncoder();
} else if (!floating_point) {
} else if (ppf.info.bits_per_sample <= 16) {
encoder = extras::GetPPMEncoder();
} else {
format.data_type = JXL_TYPE_FLOAT;
format.endianness = JXL_LITTLE_ENDIAN;
encoder = extras::GetPFMEncoder();
}
break;
@ -94,7 +76,6 @@ Status Encode(const CodecInOut& io, const extras::Codec codec,
case extras::Codec::kGIF:
return JXL_FAILURE("Encoding to GIF is not implemented");
case extras::Codec::kEXR:
format.data_type = JXL_TYPE_FLOAT;
encoder = extras::GetEXREncoder();
if (encoder) {
break;
@ -112,15 +93,6 @@ Status Encode(const CodecInOut& io, const extras::Codec codec,
if (!encoder) {
return JXL_FAILURE("Invalid codec.");
}
extras::PackedPixelFile ppf;
JXL_RETURN_IF_ERROR(
ConvertCodecInOutToPackedPixelFile(io, format, c_desired, pool, &ppf));
ppf.info.bits_per_sample = bits_per_sample;
if (format.data_type == JXL_TYPE_FLOAT) {
ppf.info.bits_per_sample = 32;
ppf.info.exponent_bits_per_sample = 8;
}
extras::EncodedImage encoded_image;
JXL_RETURN_IF_ERROR(encoder->Encode(ppf, &encoded_image, pool));
JXL_ASSERT(encoded_image.bitstreams.size() == 1);
@ -129,45 +101,12 @@ Status Encode(const CodecInOut& io, const extras::Codec codec,
return true;
}
Status Encode(const CodecInOut& io, const ColorEncoding& c_desired,
size_t bits_per_sample, const std::string& pathname,
Status Encode(const extras::PackedPixelFile& ppf, const std::string& pathname,
std::vector<uint8_t>* bytes, ThreadPool* pool) {
std::string extension;
const extras::Codec codec =
extras::CodecFromPath(pathname, &bits_per_sample, &extension);
// Warn about incorrect usage of PGM/PGX/PPM - only the latter supports
// color, but CodecFromPath lumps them all together.
if (codec == extras::Codec::kPNM && extension != ".pfm") {
if (io.Main().HasAlpha() && extension != ".pam") {
JXL_WARNING(
"For images with alpha, the filename should end with .pam.\n");
} else if (!io.Main().IsGray() && extension == ".pgm") {
JXL_WARNING("For color images, the filename should end with .ppm.\n");
} else if (io.Main().IsGray() && extension == ".ppm") {
JXL_WARNING(
"For grayscale images, the filename should not end with .ppm.\n");
}
if (bits_per_sample > 16) {
JXL_WARNING("PPM only supports up to 16 bits per sample");
bits_per_sample = 16;
}
} else if (codec == extras::Codec::kPGX && !io.Main().IsGray()) {
JXL_WARNING("Storing color image to PGX - use .ppm extension instead.\n");
}
if (bits_per_sample > 16 && codec == extras::Codec::kPNG) {
JXL_WARNING("PNG only supports up to 16 bits per sample");
bits_per_sample = 16;
}
return Encode(io, codec, c_desired, bits_per_sample, bytes, pool);
}
Status Encode(const CodecInOut& io, const std::string& pathname,
std::vector<uint8_t>* bytes, ThreadPool* pool) {
// TODO(lode): need to take the floating_point_sample field into account
return Encode(io, io.metadata.m.color_encoding,
io.metadata.m.bit_depth.bits_per_sample, pathname, bytes, pool);
extras::CodecFromPath(pathname, nullptr, &extension);
return Encode(ppf, codec, bytes, pool);
}
} // namespace jxl

View file

@ -43,18 +43,10 @@ JXL_INLINE Status SetFromBytes(const Span<const uint8_t> bytes, CodecInOut* io,
orig_codec);
}
// Replaces "bytes" with an encoding of pixels transformed from c_current
// color space to c_desired.
Status Encode(const CodecInOut& io, extras::Codec codec,
const ColorEncoding& c_desired, size_t bits_per_sample,
std::vector<uint8_t>* bytes, ThreadPool* pool = nullptr);
Status Encode(const extras::PackedPixelFile& ppf, const extras::Codec codec,
std::vector<uint8_t>* bytes, ThreadPool* pool);
// Deduces codec, calls Encode and writes to file.
Status Encode(const CodecInOut& io, const ColorEncoding& c_desired,
size_t bits_per_sample, const std::string& pathname,
std::vector<uint8_t>* bytes, ThreadPool* pool = nullptr);
// Same, but defaults to metadata.original color_encoding and bits_per_sample.
Status Encode(const CodecInOut& io, const std::string& pathname,
Status Encode(const extras::PackedPixelFile& ppf, const std::string& pathname,
std::vector<uint8_t>* bytes, ThreadPool* pool = nullptr);
} // namespace jxl

View file

@ -3,24 +3,33 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "lib/extras/codec.h"
#include <jxl/codestream_header.h>
#include <jxl/color_encoding.h>
#include <jxl/encode.h>
#include <jxl/types.h>
#include <stddef.h>
#include <algorithm>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "lib/extras/common.h"
#include "lib/extras/dec/color_hints.h"
#include "lib/extras/dec/decode.h"
#include "lib/extras/dec/pnm.h"
#include "lib/extras/enc/encode.h"
#include "lib/extras/packed_image.h"
#include "lib/jxl/base/byte_order.h"
#include "lib/jxl/base/random.h"
#include "lib/jxl/base/span.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/color_encoding_internal.h"
#include "lib/jxl/test_utils.h"
#include "lib/jxl/testing.h"

View file

@ -37,7 +37,6 @@ std::string GetExtension(const std::string& path) {
Codec CodecFromPath(std::string path, size_t* JXL_RESTRICT bits_per_sample,
std::string* extension) {
std::string base;
std::string ext = GetExtension(path);
if (extension) {
if (extension->empty()) {

View file

@ -390,7 +390,7 @@ StatusOr<ChunkedPNMDecoder> ChunkedPNMDecoder::Init(const char* path) {
if (header.ysize * row_size + dec.data_start_ < size) {
return JXL_FAILURE("Invalid ppm");
}
return std::move(dec);
return dec;
}
jxl::Status ChunkedPNMDecoder::InitializePPF(const ColorHints& color_hints,

View file

@ -344,13 +344,11 @@ Status APNGEncoder::EncodePackedPixelFileToAPNG(
PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE,
PNG_FILTER_TYPE_BASE);
if (count == 0) {
if (!MaybeAddSRGB(ppf.color_encoding, png_ptr, info_ptr)) {
if (!ppf.icc.empty()) {
png_set_benign_errors(png_ptr, 1);
png_set_iCCP(png_ptr, info_ptr, "1", 0, ppf.icc.data(), ppf.icc.size());
} else if (!MaybeAddSRGB(ppf.color_encoding, png_ptr, info_ptr)) {
MaybeAddCICP(ppf.color_encoding, png_ptr, info_ptr);
if (!ppf.icc.empty()) {
png_set_benign_errors(png_ptr, 1);
png_set_iCCP(png_ptr, info_ptr, "1", 0, ppf.icc.data(),
ppf.icc.size());
}
MaybeAddCHRM(ppf.color_encoding, png_ptr, info_ptr);
MaybeAddGAMA(ppf.color_encoding, png_ptr, info_ptr);
}

View file

@ -7,12 +7,33 @@
#include <jxl/cms.h>
#include <jxl/codestream_header.h>
#include <jxl/types.h>
#include <setjmp.h>
#include <stdint.h>
#include <algorithm>
#include <cmath>
#include <cstddef>
#include <cstdlib>
#include <cstring>
#include <hwy/aligned_allocator.h>
#include <limits>
#include <string>
#include <utility>
#include <vector>
#include "lib/extras/enc/encode.h"
#include "lib/extras/packed_image.h"
#include "lib/jpegli/common.h"
#include "lib/jpegli/encode.h"
#include "lib/jpegli/types.h"
#include "lib/jxl/base/byte_order.h"
#include "lib/jxl/base/common.h"
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/color_encoding_internal.h"
#include "lib/jxl/enc_xyb.h"
#include "lib/jxl/image.h"
namespace jxl {
namespace extras {

View file

@ -186,14 +186,6 @@ Status SetJpegProgression(int progressive_id,
return true;
}
bool IsSRGBEncoding(const JxlColorEncoding& c) {
return ((c.color_space == JXL_COLOR_SPACE_RGB ||
c.color_space == JXL_COLOR_SPACE_GRAY) &&
c.primaries == JXL_PRIMARIES_SRGB &&
c.white_point == JXL_WHITE_POINT_D65 &&
c.transfer_function == JXL_TRANSFER_FUNCTION_SRGB);
}
void WriteICCProfile(jpeg_compress_struct* const cinfo,
const std::vector<uint8_t>& icc) {
constexpr size_t kMaxIccBytesInMarker =
@ -598,18 +590,14 @@ class JPEGEncoder : public Encoder {
}
}
params.is_xyb = (ppf.color_encoding.color_space == JXL_COLOR_SPACE_XYB);
std::vector<uint8_t> icc;
if (!IsSRGBEncoding(ppf.color_encoding)) {
icc = ppf.icc;
}
encoded_image->bitstreams.clear();
encoded_image->bitstreams.reserve(ppf.frames.size());
for (const auto& frame : ppf.frames) {
JXL_RETURN_IF_ERROR(VerifyPackedImage(frame.color, ppf.info));
encoded_image->bitstreams.emplace_back();
JXL_RETURN_IF_ERROR(EncodeImageJPG(
frame.color, ppf.info, icc, ppf.metadata.exif, jpeg_encoder, params,
pool, &encoded_image->bitstreams.back()));
frame.color, ppf.info, ppf.icc, ppf.metadata.exif, jpeg_encoder,
params, pool, &encoded_image->bitstreams.back()));
}
return true;
}

View file

@ -8,11 +8,18 @@
#include "lib/extras/dec/jpegli.h"
#include <jxl/color_encoding.h>
#include <jxl/types.h>
#include <stdint.h>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <memory>
#include <ostream>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "lib/extras/dec/color_hints.h"

View file

@ -260,6 +260,8 @@ class PackedPixelFile {
size_t num_frames() const {
return chunked_frames.empty() ? frames.size() : chunked_frames.size();
}
size_t xsize() const { return info.xsize; }
size_t ysize() const { return info.ysize; }
};
} // namespace extras

View file

@ -98,6 +98,8 @@ Status ConvertPackedPixelFileToCodecInOut(const PackedPixelFile& ppf,
io->metadata.m.SetAlphaBits(ppf.info.alpha_bits,
ppf.info.alpha_premultiplied);
ExtraChannelInfo* alpha = io->metadata.m.Find(ExtraChannel::kAlpha);
if (alpha) alpha->bit_depth = io->metadata.m.bit_depth;
io->metadata.m.xyb_encoded = !ppf.info.uses_original_profile;
JXL_ASSERT(ppf.info.orientation > 0 && ppf.info.orientation <= 8);
@ -193,6 +195,35 @@ Status ConvertPackedPixelFileToCodecInOut(const PackedPixelFile& ppf,
return true;
}
PackedPixelFile ConvertImage3FToPackedPixelFile(const Image3F& image,
const ColorEncoding& c_enc,
JxlPixelFormat format,
ThreadPool* pool) {
PackedPixelFile ppf;
ppf.info.xsize = image.xsize();
ppf.info.ysize = image.ysize();
ppf.info.num_color_channels = 3;
ppf.info.bits_per_sample = PackedImage::BitsPerChannel(format.data_type);
ppf.info.exponent_bits_per_sample = format.data_type == JXL_TYPE_FLOAT ? 8
: format.data_type == JXL_TYPE_FLOAT16
? 5
: 0;
ppf.color_encoding = c_enc.ToExternal();
ppf.frames.clear();
PackedFrame frame(image.xsize(), image.ysize(), format);
const ImageF* channels[3];
for (int c = 0; c < 3; ++c) {
channels[c] = &image.Plane(c);
}
bool float_samples = ppf.info.exponent_bits_per_sample > 0;
JXL_CHECK(ConvertChannelsToExternal(
channels, 3, ppf.info.bits_per_sample, float_samples, format.endianness,
frame.color.stride, pool, frame.color.pixels(0, 0, 0),
frame.color.pixels_size, PixelCallback(), Orientation::kIdentity));
ppf.frames.emplace_back(std::move(frame));
return ppf;
}
// Allows converting from internal CodecInOut to external PackedPixelFile
Status ConvertCodecInOutToPackedPixelFile(const CodecInOut& io,
const JxlPixelFormat& pixel_format,
@ -200,7 +231,6 @@ Status ConvertCodecInOutToPackedPixelFile(const CodecInOut& io,
ThreadPool* pool,
PackedPixelFile* ppf) {
const bool has_alpha = io.metadata.m.HasAlpha();
bool alpha_premultiplied = false;
JXL_ASSERT(!io.frames.empty());
if (has_alpha) {
@ -209,7 +239,10 @@ Status ConvertCodecInOutToPackedPixelFile(const CodecInOut& io,
const auto* alpha_channel = io.metadata.m.Find(ExtraChannel::kAlpha);
JXL_ASSERT(alpha_channel->bit_depth.exponent_bits_per_sample ==
io.metadata.m.bit_depth.exponent_bits_per_sample);
alpha_premultiplied = alpha_channel->alpha_associated;
ppf->info.alpha_bits = alpha_channel->bit_depth.bits_per_sample;
ppf->info.alpha_exponent_bits =
alpha_channel->bit_depth.exponent_bits_per_sample;
ppf->info.alpha_premultiplied = alpha_channel->alpha_associated;
}
// Convert the image metadata
@ -226,9 +259,6 @@ Status ConvertCodecInOutToPackedPixelFile(const CodecInOut& io,
ppf->info.relative_to_max_display =
io.metadata.m.tone_mapping.relative_to_max_display;
ppf->info.alpha_bits = io.metadata.m.GetAlphaBits();
ppf->info.alpha_premultiplied = alpha_premultiplied;
ppf->info.uses_original_profile = !io.metadata.m.xyb_encoded;
JXL_ASSERT(0 < io.metadata.m.orientation && io.metadata.m.orientation <= 8);
ppf->info.orientation =

View file

@ -30,6 +30,11 @@ Status ConvertCodecInOutToPackedPixelFile(const CodecInOut& io,
const ColorEncoding& c_desired,
ThreadPool* pool,
PackedPixelFile* ppf);
PackedPixelFile ConvertImage3FToPackedPixelFile(const Image3F& image,
const ColorEncoding& c_enc,
JxlPixelFormat format,
ThreadPool* pool);
} // namespace extras
} // namespace jxl

View file

@ -20,6 +20,8 @@
#include <jxl/color_encoding.h>
#include <jxl/types.h>
#include <stddef.h>
#include <stdint.h>
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {

View file

@ -20,7 +20,7 @@
#include <jxl/memory_manager.h>
#include <jxl/parallel_runner.h>
#include <jxl/types.h>
#include <jxl/version.h>
#include <jxl/version.h> // TODO(eustas): remove before v1.0
#include <stddef.h>
#include <stdint.h>
@ -1359,15 +1359,7 @@ JXL_EXPORT JxlDecoderStatus JxlDecoderGetBoxType(JxlDecoder* dec,
/**
* Returns the size of a box as it appears in the container file, after the @ref
* JXL_DEC_BOX event. For a non-compressed box, this is the size of the
* contents, excluding the 4 bytes indicating the box type. For a compressed
* "brob" box, this is the size of the compressed box contents plus the
* additional 4 byte indicating the underlying box type, but excluding the 4
* bytes indicating "brob". This function gives the size of the data that will
* be written in the output buffer when getting boxes in the default raw
* compressed mode. When @ref JxlDecoderSetDecompressBoxes is enabled, the
* return value of function does not change, and the decompressed size is not
* known before it has already been decompressed and output.
* JXL_DEC_BOX event. This includes all the box headers.
*
* @param dec decoder object
* @param size raw size of the box in bytes

View file

@ -16,6 +16,7 @@
#define JXL_DECODE_CXX_H_
#include <jxl/decode.h>
#include <jxl/memory_manager.h>
#include <memory>

View file

@ -15,15 +15,16 @@
#include <jxl/cms_interface.h>
#include <jxl/codestream_header.h>
#include <jxl/color_encoding.h>
#include <jxl/jxl_export.h>
#include <jxl/memory_manager.h>
#include <jxl/parallel_runner.h>
#include <jxl/stats.h>
#include <jxl/version.h>
#include <jxl/types.h>
#include <jxl/version.h> // TODO(eustas): remove before v1.0
#include <stddef.h>
#include <stdint.h>
#include "jxl/types.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
@ -378,6 +379,14 @@ typedef enum {
*/
JXL_ENC_FRAME_SETTING_JPEG_KEEP_JUMBF = 37,
/** If this mode is disabled, the encoder will not make any image quality
* decisions that are computed based on the full image, but stored only once
* (e.g. the X quant multiplier in the frame header). Used mainly for testing
* equivalence of streaming and non-streaming code.
* 0 = disabled, 1 = enabled (default)
*/
JXL_ENC_FRAME_SETTING_USE_FULL_IMAGE_HEURISTICS = 38,
/** Enum value not to be used as an option. This value is added to force the
* C compiler to have the enum to take a known size.
*/
@ -1193,8 +1202,8 @@ JXL_EXPORT JxlEncoderStatus JxlEncoderSetBasicInfo(JxlEncoder* enc,
* JXL_ENC_ERROR or JXL_ENC_NOT_SUPPORTED otherwise
*/
JXL_EXPORT JxlEncoderStatus JxlEncoderSetUpsamplingMode(JxlEncoder* enc,
const int64_t factor,
const int64_t mode);
int64_t factor,
int64_t mode);
/**
* Initializes a JxlExtraChannelInfo struct to default values.

View file

@ -16,6 +16,7 @@
#define JXL_ENCODE_CXX_H_
#include <jxl/encode.h>
#include <jxl/memory_manager.h>
#include <memory>

View file

@ -16,6 +16,7 @@
#ifndef JXL_RESIZABLE_PARALLEL_RUNNER_CXX_H_
#define JXL_RESIZABLE_PARALLEL_RUNNER_CXX_H_
#include <jxl/memory_manager.h>
#include <jxl/resizable_parallel_runner.h>
#include <memory>

View file

@ -15,8 +15,10 @@
#ifndef JXL_THREAD_PARALLEL_RUNNER_CXX_H_
#define JXL_THREAD_PARALLEL_RUNNER_CXX_H_
#include <jxl/memory_manager.h>
#include <jxl/thread_parallel_runner.h>
#include <cstddef>
#include <memory>
#if !(defined(__cplusplus) || defined(c_plusplus))

View file

@ -43,9 +43,6 @@ else()
endif ()
set(OBJ_COMPILE_DEFINITIONS
JPEGXL_MAJOR_VERSION=${JPEGXL_MAJOR_VERSION}
JPEGXL_MINOR_VERSION=${JPEGXL_MINOR_VERSION}
JPEGXL_PATCH_VERSION=${JPEGXL_PATCH_VERSION}
# Used to determine if we are building the library when defined or just
# including the library when not defined. This is public so libjxl shared
# library gets this define too.
@ -55,6 +52,9 @@ set(OBJ_COMPILE_DEFINITIONS
# Generate version.h
configure_file("jxl/version.h.in" "include/jxl/version.h")
list(APPEND JPEGXL_INTERNAL_PUBLIC_HEADERS
${CMAKE_CURRENT_BINARY_DIR}/include/jxl/version.h)
# Headers for exporting/importing public headers
include(GenerateExportHeader)
@ -87,6 +87,16 @@ target_include_directories(jxl_base INTERFACE
${PROJECT_SOURCE_DIR}
${JXL_HWY_INCLUDE_DIRS}
)
# On android, link with log to use android-related log functions.
if(CMAKE_SYSTEM_NAME STREQUAL "Android")
find_library(log-lib log)
if(log-lib)
target_link_libraries(jxl_base INTERFACE ${log-lib})
target_compile_definitions(jxl_base INTERFACE USE_ANDROID_LOGGER)
endif()
endif()
add_dependencies(jxl_base jxl_export)
# Decoder-only object library

View file

@ -13,7 +13,7 @@
#include <stddef.h>
#include <stdint.h>
#include "lib/jxl/base/bits.h"
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/status.h"
#if JXL_COMPILER_MSVC
// suppress warnings about the const & applied to function types

View file

@ -12,6 +12,7 @@
#include <string.h>
#include "lib/jxl/base/byte_order.h"
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/status.h"
namespace jxl {
@ -61,7 +62,9 @@ static Status JXL_INLINE LoadFloatRow(const uint8_t* src, size_t count,
case JXL_TYPE_UINT8:
for (size_t i = 0; i < count; ++i) {
callback(i, src[stride * i] * scale);
// Integer multiply uint8 value before scaling so that the UINT8 value
// and the corresponding UINT16 value convert to the same float
callback(i, (src[stride * i] * 257) * scale);
}
return true;

View file

@ -74,14 +74,31 @@ namespace jxl {
#define JXL_DEBUG_ON_ABORT JXL_DEBUG_ON_ERROR
#endif // JXL_DEBUG_ON_ABORT
// Print a debug message on standard error. You should use the JXL_DEBUG macro
// instead of calling Debug directly. This function returns false, so it can be
// used as a return value in JXL_FAILURE.
#ifdef USE_ANDROID_LOGGER
#include <android/log.h>
#define LIBJXL_ANDROID_LOG_TAG ("libjxl")
inline void android_vprintf(const char* format, va_list args) {
char* message = nullptr;
int res = vasprintf(&message, format, args);
if (res != -1) {
__android_log_write(ANDROID_LOG_DEBUG, LIBJXL_ANDROID_LOG_TAG, message);
free(message);
}
}
#endif
// Print a debug message on standard error or android logs. You should use the
// JXL_DEBUG macro instead of calling Debug directly. This function returns
// false, so it can be used as a return value in JXL_FAILURE.
JXL_FORMAT(1, 2)
inline JXL_NOINLINE bool Debug(const char* format, ...) {
va_list args;
va_start(args, format);
#ifdef USE_ANDROID_LOGGER
android_vprintf(format, args);
#else
vfprintf(stderr, format, args);
#endif
va_end(args);
return false;
}
@ -110,8 +127,12 @@ inline JXL_NOINLINE bool Debug(const char* format, ...) {
// JXL_DEBUG version that prints the debug message if the global verbose level
// defined at compile time by JXL_DEBUG_V_LEVEL is greater or equal than the
// passed level.
#if JXL_DEBUG_V_LEVEL > 0
#define JXL_DEBUG_V(level, format, ...) \
JXL_DEBUG(level <= JXL_DEBUG_V_LEVEL, format, ##__VA_ARGS__)
#else
#define JXL_DEBUG_V(level, format, ...)
#endif
// Warnings (via JXL_WARNING) are enabled by default in debug builds (opt and
// debug).
@ -329,7 +350,11 @@ inline JXL_FORMAT(2, 3) Status
(JXL_DEBUG_ON_ALL_ERROR && !status)) {
va_list args;
va_start(args, format);
#ifdef USE_ANDROID_LOGGER
android_vprintf(format, args);
#else
vfprintf(stderr, format, args);
#endif
va_end(args);
}
#ifdef JXL_CRASH_ON_ERROR

View file

@ -3,11 +3,17 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <jxl/types.h>
#include <cstdint>
#include <sstream>
#include <utility>
#include <vector>
#include "lib/extras/codec.h"
#include "lib/jxl/image_test_utils.h"
#include "lib/extras/dec/decode.h"
#include "lib/extras/dec/jxl.h"
#include "lib/extras/packed_image.h"
#include "lib/jxl/base/span.h"
#include "lib/jxl/test_utils.h"
#include "lib/jxl/testing.h"
@ -19,19 +25,30 @@ using ::testing::SizeIs;
TEST(BlendingTest, Crops) {
const std::vector<uint8_t> compressed =
jxl::test::ReadTestData("jxl/blending/cropped_traffic_light.jxl");
CodecInOut decoded;
ASSERT_TRUE(test::DecodeFile({}, Bytes(compressed), &decoded));
extras::JXLDecompressParams dparams;
dparams.accepted_formats = {{3, JXL_TYPE_UINT16, JXL_LITTLE_ENDIAN, 0}};
extras::PackedPixelFile decoded;
ASSERT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), dparams,
/*decoded_bytes=*/nullptr, &decoded));
ASSERT_THAT(decoded.frames, SizeIs(4));
int i = 0;
for (const ImageBundle& ib : decoded.frames) {
for (auto&& decoded_frame : decoded.frames) {
std::ostringstream filename;
filename << "jxl/blending/cropped_traffic_light_frame-" << i << ".png";
const std::vector<uint8_t> compressed_frame =
jxl::test::ReadTestData(filename.str());
CodecInOut frame;
ASSERT_TRUE(SetFromBytes(Bytes(compressed_frame), &frame));
JXL_EXPECT_OK(SamePixels(ib.color(), *frame.Main().color(), _));
extras::PackedPixelFile decoded_frame_ppf;
decoded_frame_ppf.info = decoded.info;
decoded_frame_ppf.icc = decoded.icc;
decoded_frame_ppf.color_encoding = decoded.color_encoding;
decoded_frame_ppf.extra_channels_info = decoded.extra_channels_info;
decoded_frame_ppf.frames.emplace_back(std::move(decoded_frame));
extras::PackedPixelFile expected_frame_ppf;
ASSERT_TRUE(extras::DecodeBytes(Bytes(compressed_frame),
extras::ColorHints(), &expected_frame_ppf));
EXPECT_EQ(0.0f,
test::ComputeDistance2(decoded_frame_ppf, expected_frame_ppf));
++i;
}
}

View file

@ -11,9 +11,6 @@
#include <stdint.h>
#include <stdlib.h>
#include <memory>
#include <vector>
namespace jxl {
/** Outputs the contents of a box in a streaming fashion, either directly, or

View file

@ -41,7 +41,6 @@
#include "lib/jxl/base/printf_macros.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/convolve.h"
#include "lib/jxl/gauss_blur.h"
#include "lib/jxl/image_ops.h"
#ifndef JXL_BUTTERAUGLI_ONCE

View file

@ -9,12 +9,12 @@
#include <stddef.h>
#include <algorithm>
#include <cstdint>
#include <utility>
#include "lib/extras/metrics.h"
#include "lib/extras/packed_image.h"
#include "lib/jxl/base/random.h"
#include "lib/jxl/base/span.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/enc_external_image.h"
#include "lib/jxl/image.h"
@ -109,7 +109,7 @@ TEST(ButteraugliInPlaceTest, LargeImage) {
EXPECT_TRUE(ButteraugliInterfaceInPlace(std::move(rgb0), std::move(rgb1), ba,
diffmap2, diffval2));
double distp2 = ComputeDistanceP(diffmap2, ba, 3.0);
EXPECT_NEAR(diffval, diffval2, 1e-10);
EXPECT_NEAR(diffval, diffval2, 5e-7);
EXPECT_NEAR(distp, distp2, 1e-7);
}

View file

@ -13,11 +13,9 @@
#include <cmath>
#include <cstdint>
#include <cstring>
#include <string>
#include <utility>
#include <vector>
#include "lib/jxl/base/common.h"
#include "lib/jxl/base/status.h"
namespace jxl {

View file

@ -10,7 +10,6 @@
#include <cstdlib> // rand
#include "lib/jxl/cms/color_encoding_cms.h"
#include "lib/jxl/encode_internal.h"
#include "lib/jxl/test_utils.h"
#include "lib/jxl/testing.h"

View file

@ -8,20 +8,26 @@
#include <stdint.h>
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <new>
#include <cstdio>
#include <cstdlib>
#include <ostream>
#include <string>
#include <utility>
#include <vector>
#include "lib/jxl/base/common.h"
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/base/random.h"
#include "lib/jxl/base/span.h"
#include "lib/jxl/cms/color_encoding_cms.h"
#include "lib/jxl/cms/opsin_params.h"
#include "lib/jxl/color_encoding_internal.h"
#include "lib/jxl/enc_xyb.h"
#include "lib/jxl/image.h"
#include "lib/jxl/image_bundle.h"
#include "lib/jxl/image_metadata.h"
#include "lib/jxl/image_ops.h"
#include "lib/jxl/image_test_utils.h"
#include "lib/jxl/test_utils.h"
#include "lib/jxl/testing.h"

View file

@ -9,8 +9,8 @@
// 2D convolution.
#include <stddef.h>
#include <stdint.h>
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/image.h"
@ -55,19 +55,6 @@ struct WeightsSeparable5 {
float vert[3 * 4];
};
// Weights for separable 7x7 filters (typically but not necessarily the same
// values for horizontal and vertical directions). The kernel must already be
// normalized, but note that values for negative offsets are omitted, so the
// given values do not sum to 1.
//
// NOTE: for >= 7x7 Gaussian kernels, it is faster to use FastGaussian instead,
// at least when images exceed the L1 cache size.
struct WeightsSeparable7 {
// Horizontal 1D, distances 0..3 (each replicated 4x)
float horz[4 * 4];
float vert[4 * 4];
};
const WeightsSymmetric3& WeightsSymmetric3Lowpass();
const WeightsSeparable5& WeightsSeparable5Lowpass();
const WeightsSymmetric5& WeightsSymmetric5Lowpass();
@ -80,10 +67,6 @@ void SlowSeparable5(const ImageF& in, const Rect& in_rect,
const WeightsSeparable5& weights, ThreadPool* pool,
ImageF* out, const Rect& out_rect);
void SlowSeparable7(const ImageF& in, const Rect& in_rect,
const WeightsSeparable7& weights, ThreadPool* pool,
ImageF* out, const Rect& out_rect);
void Symmetric3(const ImageF& in, const Rect& rect,
const WeightsSymmetric3& weights, ThreadPool* pool,
ImageF* out);
@ -100,10 +83,6 @@ void Separable5(const ImageF& in, const Rect& rect,
const WeightsSeparable5& weights, ThreadPool* pool,
ImageF* out);
void Separable7(const ImageF& in, const Rect& rect,
const WeightsSeparable7& weights, ThreadPool* pool,
ImageF* out);
} // namespace jxl
#endif // LIB_JXL_CONVOLVE_H_

View file

@ -1,285 +0,0 @@
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "lib/jxl/convolve.h"
#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "lib/jxl/convolve_separable7.cc"
#include <hwy/foreach_target.h>
#include <hwy/highway.h>
#include "lib/jxl/convolve-inl.h"
HWY_BEFORE_NAMESPACE();
namespace jxl {
namespace HWY_NAMESPACE {
// These templates are not found via ADL.
using hwy::HWY_NAMESPACE::Add;
using hwy::HWY_NAMESPACE::Mul;
using hwy::HWY_NAMESPACE::MulAdd;
using hwy::HWY_NAMESPACE::Vec;
// 7x7 convolution by separable kernel with a single scan through the input.
// Extended version of Separable5, see documentation there.
class Separable7Strategy {
using D = HWY_CAPPED(float, 16);
using V = Vec<D>;
public:
static constexpr int64_t kRadius = 3;
template <size_t kSizeModN, class WrapRow>
static JXL_MAYBE_INLINE void ConvolveRow(
const float* const JXL_RESTRICT row_m, const size_t xsize,
const int64_t stride, const WrapRow& wrap_row,
const WeightsSeparable7& weights, float* const JXL_RESTRICT row_out) {
const D d;
const int64_t neg_stride = -stride; // allows LEA addressing.
const float* const JXL_RESTRICT row_t3 =
wrap_row(row_m + 3 * neg_stride, stride);
const float* const JXL_RESTRICT row_t2 =
wrap_row(row_m + 2 * neg_stride, stride);
const float* const JXL_RESTRICT row_t1 =
wrap_row(row_m + 1 * neg_stride, stride);
const float* const JXL_RESTRICT row_b1 =
wrap_row(row_m + 1 * stride, stride);
const float* const JXL_RESTRICT row_b2 =
wrap_row(row_m + 2 * stride, stride);
const float* const JXL_RESTRICT row_b3 =
wrap_row(row_m + 3 * stride, stride);
const V wh0 = LoadDup128(d, weights.horz + 0 * 4);
const V wh1 = LoadDup128(d, weights.horz + 1 * 4);
const V wh2 = LoadDup128(d, weights.horz + 2 * 4);
const V wh3 = LoadDup128(d, weights.horz + 3 * 4);
const V wv0 = LoadDup128(d, weights.vert + 0 * 4);
const V wv1 = LoadDup128(d, weights.vert + 1 * 4);
const V wv2 = LoadDup128(d, weights.vert + 2 * 4);
const V wv3 = LoadDup128(d, weights.vert + 3 * 4);
size_t x = 0;
// More than one iteration for scalars.
for (; x < kRadius; x += Lanes(d)) {
const V conv0 =
Mul(HorzConvolveFirst(row_m, x, xsize, wh0, wh1, wh2, wh3), wv0);
const V conv1t = HorzConvolveFirst(row_t1, x, xsize, wh0, wh1, wh2, wh3);
const V conv1b = HorzConvolveFirst(row_b1, x, xsize, wh0, wh1, wh2, wh3);
const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0);
const V conv2t = HorzConvolveFirst(row_t2, x, xsize, wh0, wh1, wh2, wh3);
const V conv2b = HorzConvolveFirst(row_b2, x, xsize, wh0, wh1, wh2, wh3);
const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1);
const V conv3t = HorzConvolveFirst(row_t3, x, xsize, wh0, wh1, wh2, wh3);
const V conv3b = HorzConvolveFirst(row_b3, x, xsize, wh0, wh1, wh2, wh3);
const V conv3 = MulAdd(Add(conv3t, conv3b), wv3, conv2);
Store(conv3, d, row_out + x);
}
// Main loop: load inputs without padding
for (; x + Lanes(d) + kRadius <= xsize; x += Lanes(d)) {
const V conv0 = Mul(HorzConvolve(row_m + x, wh0, wh1, wh2, wh3), wv0);
const V conv1t = HorzConvolve(row_t1 + x, wh0, wh1, wh2, wh3);
const V conv1b = HorzConvolve(row_b1 + x, wh0, wh1, wh2, wh3);
const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0);
const V conv2t = HorzConvolve(row_t2 + x, wh0, wh1, wh2, wh3);
const V conv2b = HorzConvolve(row_b2 + x, wh0, wh1, wh2, wh3);
const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1);
const V conv3t = HorzConvolve(row_t3 + x, wh0, wh1, wh2, wh3);
const V conv3b = HorzConvolve(row_b3 + x, wh0, wh1, wh2, wh3);
const V conv3 = MulAdd(Add(conv3t, conv3b), wv3, conv2);
Store(conv3, d, row_out + x);
}
// Last full vector to write (the above loop handled mod >= kRadius)
#if HWY_TARGET == HWY_SCALAR
while (x < xsize) {
#else
if (kSizeModN < kRadius) {
#endif
const V conv0 =
Mul(HorzConvolveLast<kSizeModN>(row_m, x, xsize, wh0, wh1, wh2, wh3),
wv0);
const V conv1t =
HorzConvolveLast<kSizeModN>(row_t1, x, xsize, wh0, wh1, wh2, wh3);
const V conv1b =
HorzConvolveLast<kSizeModN>(row_b1, x, xsize, wh0, wh1, wh2, wh3);
const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0);
const V conv2t =
HorzConvolveLast<kSizeModN>(row_t2, x, xsize, wh0, wh1, wh2, wh3);
const V conv2b =
HorzConvolveLast<kSizeModN>(row_b2, x, xsize, wh0, wh1, wh2, wh3);
const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1);
const V conv3t =
HorzConvolveLast<kSizeModN>(row_t3, x, xsize, wh0, wh1, wh2, wh3);
const V conv3b =
HorzConvolveLast<kSizeModN>(row_b3, x, xsize, wh0, wh1, wh2, wh3);
const V conv3 = MulAdd(Add(conv3t, conv3b), wv3, conv2);
Store(conv3, d, row_out + x);
x += Lanes(d);
}
// If mod = 0, the above vector was the last.
if (kSizeModN != 0) {
for (; x < xsize; ++x) {
float mul = 0.0f;
for (int64_t dy = -kRadius; dy <= kRadius; ++dy) {
const float wy = weights.vert[std::abs(dy) * 4];
const float* clamped_row = wrap_row(row_m + dy * stride, stride);
for (int64_t dx = -kRadius; dx <= kRadius; ++dx) {
const float wx = weights.horz[std::abs(dx) * 4];
const int64_t clamped_x = Mirror(x + dx, xsize);
mul += clamped_row[clamped_x] * wx * wy;
}
}
row_out[x] = mul;
}
}
}
private:
// Same as HorzConvolve for the first/last vector in a row.
static JXL_MAYBE_INLINE V HorzConvolveFirst(
const float* const JXL_RESTRICT row, const int64_t x, const int64_t xsize,
const V wh0, const V wh1, const V wh2, const V wh3) {
const D d;
const V c = LoadU(d, row + x);
const V mul0 = Mul(c, wh0);
#if HWY_TARGET == HWY_SCALAR
const V l1 = LoadU(d, row + Mirror(x - 1, xsize));
const V l2 = LoadU(d, row + Mirror(x - 2, xsize));
const V l3 = LoadU(d, row + Mirror(x - 3, xsize));
#else
(void)xsize;
const V l1 = Neighbors::FirstL1(c);
const V l2 = Neighbors::FirstL2(c);
const V l3 = Neighbors::FirstL3(c);
#endif
const V r1 = LoadU(d, row + x + 1);
const V r2 = LoadU(d, row + x + 2);
const V r3 = LoadU(d, row + x + 3);
const V mul1 = MulAdd(Add(l1, r1), wh1, mul0);
const V mul2 = MulAdd(Add(l2, r2), wh2, mul1);
const V mul3 = MulAdd(Add(l3, r3), wh3, mul2);
return mul3;
}
template <size_t kSizeModN>
static JXL_MAYBE_INLINE V HorzConvolveLast(
const float* const JXL_RESTRICT row, const int64_t x, const int64_t xsize,
const V wh0, const V wh1, const V wh2, const V wh3) {
const D d;
const V c = LoadU(d, row + x);
const V mul0 = Mul(c, wh0);
const V l1 = LoadU(d, row + x - 1);
const V l2 = LoadU(d, row + x - 2);
const V l3 = LoadU(d, row + x - 3);
V r1, r2, r3;
#if HWY_TARGET == HWY_SCALAR
r1 = LoadU(d, row + Mirror(x + 1, xsize));
r2 = LoadU(d, row + Mirror(x + 2, xsize));
r3 = LoadU(d, row + Mirror(x + 3, xsize));
#else
const size_t N = Lanes(d);
if (kSizeModN == 0) {
r3 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 3)));
r2 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 2)));
r1 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 1)));
} else if (kSizeModN == 1) {
const auto last = LoadU(d, row + xsize - N);
r3 = TableLookupLanes(last, SetTableIndices(d, MirrorLanes(N - 2)));
r2 = TableLookupLanes(last, SetTableIndices(d, MirrorLanes(N - 1)));
r1 = last;
} else /* kSizeModN >= 2 */ {
const auto last = LoadU(d, row + xsize - N);
r3 = TableLookupLanes(last, SetTableIndices(d, MirrorLanes(N - 1)));
r2 = last;
r1 = LoadU(d, row + x + 1);
}
#endif
// Sum of pixels with Manhattan distance i, multiplied by weights[i].
const V sum1 = Add(l1, r1);
const V mul1 = MulAdd(sum1, wh1, mul0);
const V sum2 = Add(l2, r2);
const V mul2 = MulAdd(sum2, wh2, mul1);
const V sum3 = Add(l3, r3);
const V mul3 = MulAdd(sum3, wh3, mul2);
return mul3;
}
// Returns one vector of horizontal convolution results; lane i is the result
// for pixel pos + i. This is the fast path for interior pixels, i.e. kRadius
// valid pixels before/after pos.
static JXL_MAYBE_INLINE V HorzConvolve(const float* const JXL_RESTRICT pos,
const V wh0, const V wh1, const V wh2,
const V wh3) {
const D d;
const V c = LoadU(d, pos);
const V mul0 = Mul(c, wh0);
// TODO(janwas): better to Combine
const V l1 = LoadU(d, pos - 1);
const V r1 = LoadU(d, pos + 1);
const V l2 = LoadU(d, pos - 2);
const V r2 = LoadU(d, pos + 2);
const V l3 = LoadU(d, pos - 3);
const V r3 = LoadU(d, pos + 3);
// Sum of pixels with Manhattan distance i, multiplied by weights[i].
const V sum1 = Add(l1, r1);
const V mul1 = MulAdd(sum1, wh1, mul0);
const V sum2 = Add(l2, r2);
const V mul2 = MulAdd(sum2, wh2, mul1);
const V sum3 = Add(l3, r3);
const V mul3 = MulAdd(sum3, wh3, mul2);
return mul3;
}
};
void Separable7(const ImageF& in, const Rect& rect,
const WeightsSeparable7& weights, ThreadPool* pool,
ImageF* out) {
using Conv = ConvolveT<Separable7Strategy>;
if (rect.xsize() >= Conv::MinWidth()) {
return Conv::Run(in, rect, weights, pool, out);
}
return SlowSeparable7(in, rect, weights, pool, out, Rect(*out));
}
// NOLINTNEXTLINE(google-readability-namespace-comments)
} // namespace HWY_NAMESPACE
} // namespace jxl
HWY_AFTER_NAMESPACE();
#if HWY_ONCE
namespace jxl {
HWY_EXPORT(Separable7);
void Separable7(const ImageF& in, const Rect& rect,
const WeightsSeparable7& weights, ThreadPool* pool,
ImageF* out) {
return HWY_DYNAMIC_DISPATCH(Separable7)(in, rect, weights, pool, out);
}
} // namespace jxl
#endif // HWY_ONCE

View file

@ -195,10 +195,4 @@ void SlowSeparable5(const ImageF& in, const Rect& in_rect,
SlowSeparable<2>(in, in_rect, weights, pool, out, out_rect);
}
void SlowSeparable7(const ImageF& in, const Rect& in_rect,
const WeightsSeparable7& weights, ThreadPool* pool,
ImageF* out, const Rect& out_rect) {
SlowSeparable<3>(in, in_rect, weights, pool, out, out_rect);
}
} // namespace jxl

View file

@ -39,7 +39,7 @@ static float WeightedSumBorder(const ImageF& in, const WrapY wrap_y,
const float sum_2 = wx2 * (in_m2 + in_p2);
const float sum_1 = wx1 * (in_m1 + in_p1);
const float sum_0 = wx0 * in_00;
return sum_2 + sum_1 + sum_0;
return sum_2 + (sum_1 + sum_0);
}
template <class WrapY, class V>

View file

@ -151,28 +151,6 @@ void VerifySeparable5(const size_t xsize, const size_t ysize, ThreadPool* pool,
JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
}
void VerifySeparable7(const size_t xsize, const size_t ysize, ThreadPool* pool,
Rng* rng) {
const Rect rect(0, 0, xsize, ysize);
ImageF in(xsize, ysize);
GenerateImage(*rng, &in, 0.0f, 1.0f);
ImageF out_expected(xsize, ysize);
ImageF out_actual(xsize, ysize);
// Gaussian sigma 1.0
const WeightsSeparable7 weights = {{HWY_REP4(0.383103f), HWY_REP4(0.241843f),
HWY_REP4(0.060626f), HWY_REP4(0.00598f)},
{HWY_REP4(0.383103f), HWY_REP4(0.241843f),
HWY_REP4(0.060626f), HWY_REP4(0.00598f)}};
SlowSeparable7(in, rect, weights, pool, &out_expected, rect);
Separable7(in, rect, weights, pool, &out_actual);
JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
}
// For all xsize/ysize and kernels:
void TestConvolve() {
TestNeighbors();
@ -204,10 +182,6 @@ void TestConvolve() {
JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sep5------------------");
VerifySeparable5(xsize, ysize, null_pool, &rng);
VerifySeparable5(xsize, ysize, &pool3, &rng);
JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sep7------------------");
VerifySeparable7(xsize, ysize, null_pool, &rng);
VerifySeparable7(xsize, ysize, &pool3, &rng);
}
},
"TestConvolve"));

View file

@ -7,28 +7,36 @@
#define LIB_JXL_DEC_CACHE_H_
#include <jxl/decode.h>
#include <jxl/types.h>
#include <stdint.h>
#include <algorithm>
#include <atomic>
#include <cmath>
#include <hwy/base.h> // HWY_ALIGN_MAX
#include <memory>
#include <vector>
#include "hwy/aligned_allocator.h"
#include "lib/jxl/ac_strategy.h"
#include "lib/jxl/base/common.h" // kMaxNumPasses
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/coeff_order.h"
#include "lib/jxl/convolve.h"
#include "lib/jxl/common.h"
#include "lib/jxl/dct_util.h"
#include "lib/jxl/dec_ans.h"
#include "lib/jxl/dec_group_border.h"
#include "lib/jxl/dec_noise.h"
#include "lib/jxl/dec_xyb.h"
#include "lib/jxl/frame_dimensions.h"
#include "lib/jxl/frame_header.h"
#include "lib/jxl/image.h"
#include "lib/jxl/image_bundle.h"
#include "lib/jxl/image_metadata.h"
#include "lib/jxl/passes_state.h"
#include "lib/jxl/quant_weights.h"
#include "lib/jxl/render_pipeline/render_pipeline.h"
#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
#include "lib/jxl/render_pipeline/stage_upsampling.h"
#include "lib/jxl/sanitizers.h"
namespace jxl {

View file

@ -42,8 +42,8 @@ void FloatToU32(const float* in, uint32_t* out, size_t num, float mul,
const hwy::HWY_NAMESPACE::Rebind<uint32_t, decltype(d)> du;
// Unpoison accessing partially-uninitialized vectors with memory sanitizer.
// This is because we run NearestInt() on the vector, which triggers msan even
// it it safe to do so since the values are not mixed between lanes.
// This is because we run NearestInt() on the vector, which triggers MSAN even
// it is safe to do so since the values are not mixed between lanes.
const size_t num_round_up = RoundUpTo(num, Lanes(d));
msan::UnpoisonMemory(in + num, sizeof(in[0]) * (num_round_up - num));

View file

@ -8,17 +8,15 @@
// Interleaved image for color transforms and Codec.
#include <jxl/decode.h>
#include <jxl/types.h>
#include <stddef.h>
#include <stdint.h>
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/color_encoding_internal.h"
#include "lib/jxl/dec_cache.h"
#include "lib/jxl/image.h"
#include "lib/jxl/image_bundle.h"
#include "lib/jxl/image_metadata.h"
namespace jxl {

View file

@ -5,20 +5,19 @@
#include "lib/jxl/dec_frame.h"
#include <jxl/types.h>
#include <jxl/decode.h>
#include <stddef.h>
#include <stdint.h>
#include <algorithm>
#include <atomic>
#include <hwy/aligned_allocator.h>
#include <numeric>
#include <cstdlib>
#include <memory>
#include <utility>
#include <vector>
#include "lib/jxl/ac_context.h"
#include "lib/jxl/ac_strategy.h"
#include "lib/jxl/ans_params.h"
#include "lib/jxl/base/bits.h"
#include "lib/jxl/base/common.h"
#include "lib/jxl/base/compiler_specific.h"
@ -30,26 +29,29 @@
#include "lib/jxl/coeff_order_fwd.h"
#include "lib/jxl/common.h" // kMaxNumPasses
#include "lib/jxl/compressed_dc.h"
#include "lib/jxl/dct_util.h"
#include "lib/jxl/dec_ans.h"
#include "lib/jxl/dec_bit_reader.h"
#include "lib/jxl/dec_cache.h"
#include "lib/jxl/dec_group.h"
#include "lib/jxl/dec_modular.h"
#include "lib/jxl/dec_noise.h"
#include "lib/jxl/dec_patch_dictionary.h"
#include "lib/jxl/dec_xyb.h"
#include "lib/jxl/entropy_coder.h"
#include "lib/jxl/epf.h"
#include "lib/jxl/fields.h"
#include "lib/jxl/frame_dimensions.h"
#include "lib/jxl/frame_header.h"
#include "lib/jxl/image.h"
#include "lib/jxl/image_bundle.h"
#include "lib/jxl/image_metadata.h"
#include "lib/jxl/image_ops.h"
#include "lib/jxl/jpeg/jpeg_data.h"
#include "lib/jxl/loop_filter.h"
#include "lib/jxl/passes_state.h"
#include "lib/jxl/quant_weights.h"
#include "lib/jxl/quantizer.h"
#include "lib/jxl/sanitizers.h"
#include "lib/jxl/render_pipeline/render_pipeline.h"
#include "lib/jxl/splines.h"
#include "lib/jxl/toc.h"

View file

@ -10,17 +10,21 @@
#include <jxl/types.h>
#include <stdint.h>
#include <algorithm>
#include <cstddef>
#include <limits>
#include <utility>
#include <vector>
#include "lib/jxl/base/common.h"
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/base/span.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/blending.h"
#include "lib/jxl/common.h" // JXL_HIGH_PRECISION
#include "lib/jxl/dec_bit_reader.h"
#include "lib/jxl/dec_cache.h"
#include "lib/jxl/dec_modular.h"
#include "lib/jxl/frame_header.h"
#include "lib/jxl/headers.h"
#include "lib/jxl/image_bundle.h"
#include "lib/jxl/image_metadata.h"

View file

@ -10,11 +10,13 @@
#include <jxl/cms_interface.h>
#include <cstddef>
#include <cstdint>
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/color_encoding_internal.h"
#include "lib/jxl/dec_bit_reader.h"
#include "lib/jxl/image.h"
#include "lib/jxl/image_metadata.h"

View file

@ -5,6 +5,7 @@
#include <jxl/decode.h>
#include <jxl/types.h>
#include <jxl/version.h>
#include <algorithm>
#include <array>

View file

@ -3,28 +3,45 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "lib/extras/dec/decode.h"
#include <jxl/cms.h>
#include <jxl/codestream_header.h>
#include <jxl/color_encoding.h>
#include <jxl/decode.h>
#include <jxl/decode_cxx.h>
#include <jxl/memory_manager.h>
#include <jxl/parallel_runner.h>
#include <jxl/resizable_parallel_runner.h>
#include <jxl/resizable_parallel_runner_cxx.h>
#include <jxl/thread_parallel_runner.h>
#include <jxl/thread_parallel_runner_cxx.h>
#include <jxl/types.h>
#include <algorithm>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ostream>
#include <set>
#include <sstream>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
#include "lib/extras/codec.h"
#include "lib/extras/dec/color_description.h"
#include "lib/extras/enc/encode.h"
#include "lib/extras/enc/jpg.h"
#include "lib/extras/packed_image.h"
#include "lib/jxl/base/byte_order.h"
#include "lib/jxl/base/common.h"
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/override.h"
#include "lib/jxl/base/span.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/butteraugli/butteraugli.h"
#include "lib/jxl/cms/color_encoding_cms.h"
#include "lib/jxl/color_encoding_internal.h"
#include "lib/jxl/dec_bit_reader.h"
@ -35,14 +52,17 @@
#include "lib/jxl/enc_fields.h"
#include "lib/jxl/enc_frame.h"
#include "lib/jxl/enc_icc_codec.h"
#include "lib/jxl/enc_params.h"
#include "lib/jxl/enc_progressive_split.h"
#include "lib/jxl/encode_internal.h"
#include "lib/jxl/fields.h"
#include "lib/jxl/frame_dimensions.h"
#include "lib/jxl/frame_header.h"
#include "lib/jxl/headers.h"
#include "lib/jxl/icc_codec.h"
#include "lib/jxl/image.h"
#include "lib/jxl/image_bundle.h"
#include "lib/jxl/image_metadata.h"
#include "lib/jxl/image_ops.h"
#include "lib/jxl/jpeg/enc_jpeg_data.h"
#include "lib/jxl/padded_bytes.h"
#include "lib/jxl/test_image.h"
@ -275,9 +295,20 @@ std::vector<uint8_t> CreateTestJXLCodestream(
if (params.jpeg_codestream != nullptr) {
if (jxl::extras::CanDecode(jxl::extras::Codec::kJPG)) {
std::vector<uint8_t> jpeg_bytes;
io.jpeg_quality = 70;
EXPECT_TRUE(Encode(io, extras::Codec::kJPG, io.metadata.m.color_encoding,
/*bits_per_sample=*/8, &jpeg_bytes));
extras::PackedPixelFile ppf;
extras::PackedFrame frame(xsize, ysize, format);
JXL_ASSERT(frame.color.pixels_size == pixels.size());
memcpy(frame.color.pixels(0, 0, 0), pixels.data(), pixels.size());
ppf.frames.emplace_back(std::move(frame));
ppf.info.xsize = xsize;
ppf.info.ysize = ysize;
ppf.info.num_color_channels = grayscale ? 1 : 3;
ppf.info.bits_per_sample = 16;
auto encoder = extras::GetJPEGEncoder();
encoder->SetOption("quality", "70");
extras::EncodedImage encoded;
EXPECT_TRUE(encoder->Encode(ppf, &encoded));
jpeg_bytes = encoded.bitstreams[0];
Bytes(jpeg_bytes).AppendTo(params.jpeg_codestream);
EXPECT_TRUE(jxl::jpeg::DecodeImageJPG(
jxl::Bytes(jpeg_bytes.data(), jpeg_bytes.size()), &io));
@ -1660,7 +1691,7 @@ TEST(DecodeTest, PixelTestWithICCProfileLossy) {
EXPECT_THAT(
ButteraugliDistance(io0.frames, io1.frames, ba, *JxlGetDefaultCms(),
/*distmap=*/nullptr, nullptr),
IsSlightlyBelow(0.55f));
IsSlightlyBelow(0.56f));
JxlDecoderDestroy(dec);
}
@ -2104,7 +2135,7 @@ TEST(DecodeTest, PixelTestOpaqueSrgbLossyNoise) {
EXPECT_THAT(
ButteraugliDistance(io0.frames, io1.frames, ba, *JxlGetDefaultCms(),
/*distmap=*/nullptr, nullptr),
IsSlightlyBelow(1.2222f));
IsSlightlyBelow(1.3f));
JxlDecoderDestroy(dec);
}

View file

@ -5,7 +5,18 @@
#include "lib/jxl/decode_to_jpeg.h"
#include <jxl/decode.h>
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include "lib/jxl/base/span.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/common.h" // JPEGXL_ENABLE_TRANSCODE_JPEG
#include "lib/jxl/jpeg/dec_jpeg_data.h"
#include "lib/jxl/jpeg/jpeg_data.h"
namespace jxl {

View file

@ -14,13 +14,16 @@
#include <stdint.h>
#include <stdlib.h>
#include <algorithm>
#include <cstring>
#include <memory>
#include <utility>
#include <vector>
#include "lib/jxl/base/status.h"
#include "lib/jxl/common.h" // JPEGXL_ENABLE_TRANSCODE_JPEG
#include "lib/jxl/common.h"
#include "lib/jxl/image_bundle.h"
#include "lib/jxl/jpeg/dec_jpeg_data.h"
#include "lib/jxl/jpeg/jpeg_data.h"
#if JPEGXL_ENABLE_TRANSCODE_JPEG
#include "lib/jxl/jpeg/dec_jpeg_data_writer.h"
#endif // JPEGXL_ENABLE_TRANSCODE_JPEG
@ -109,7 +112,7 @@ class JxlToJpegDecoder {
jpeg_data_.get())) {
return false;
}
ib->jpeg_data.reset(jpeg_data_.release());
ib->jpeg_data = std::move(jpeg_data_);
}
return true;
}

View file

@ -8,9 +8,13 @@
#include <cstddef>
#include "lib/jxl/ac_strategy.h"
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/chroma_from_luma.h"
#include "lib/jxl/enc_cache.h"
#include "lib/jxl/enc_params.h"
#include "lib/jxl/frame_dimensions.h"
#include "lib/jxl/image.h"
#include "lib/jxl/quant_weights.h"

View file

@ -41,7 +41,6 @@
#include "lib/jxl/enc_transforms-inl.h"
#include "lib/jxl/epf.h"
#include "lib/jxl/frame_dimensions.h"
#include "lib/jxl/gauss_blur.h"
#include "lib/jxl/image.h"
#include "lib/jxl/image_bundle.h"
#include "lib/jxl/image_ops.h"
@ -408,8 +407,10 @@ struct AdaptiveQuantizationImpl {
void ComputeTile(float butteraugli_target, float scale, const Image3F& xyb,
const Rect& rect_in, const Rect& rect_out, const int thread,
ImageF* mask, ImageF* mask1x1) {
const size_t xsize = rect_in.xsize();
const size_t ysize = rect_in.ysize();
JXL_ASSERT(rect_in.x0() % 8 == 0);
JXL_ASSERT(rect_in.y0() % 8 == 0);
const size_t xsize = xyb.xsize();
const size_t ysize = xyb.ysize();
// The XYB gamma is 3.0 to be able to decode faster with two muls.
// Butteraugli's gamma is matching the gamma of human eye, around 2.6.
@ -420,21 +421,30 @@ struct AdaptiveQuantizationImpl {
const HWY_FULL(float) df;
size_t y_start = rect_out.y0() * 8;
size_t y_end = y_start + rect_out.ysize() * 8;
size_t y_start_1x1 = rect_in.y0() + rect_out.y0() * 8;
size_t y_end_1x1 = y_start_1x1 + rect_out.ysize() * 8;
size_t x_start = rect_out.x0() * 8;
size_t x_end = x_start + rect_out.xsize() * 8;
size_t x_start_1x1 = rect_in.x0() + rect_out.x0() * 8;
size_t x_end_1x1 = x_start_1x1 + rect_out.xsize() * 8;
if (rect_in.x0() != 0 && rect_out.x0() == 0) x_start_1x1 -= 2;
if (rect_in.x1() < xsize && rect_out.x1() * 8 == rect_in.xsize()) {
x_end_1x1 += 2;
}
if (rect_in.y0() != 0 && rect_out.y0() == 0) y_start_1x1 -= 2;
if (rect_in.y1() < ysize && rect_out.y1() * 8 == rect_in.ysize()) {
y_end_1x1 += 2;
}
// Computes image (padded to multiple of 8x8) of local pixel differences.
// Subsample both directions by 4.
// 1x1 Laplacian of intensity.
for (size_t y = y_start; y < y_end; ++y) {
for (size_t y = y_start_1x1; y < y_end_1x1; ++y) {
const size_t y2 = y + 1 < ysize ? y + 1 : y;
const size_t y1 = y > 0 ? y - 1 : y;
const float* row_in = rect_in.ConstPlaneRow(xyb, 1, y);
const float* row_in1 = rect_in.ConstPlaneRow(xyb, 1, y1);
const float* row_in2 = rect_in.ConstPlaneRow(xyb, 1, y2);
const float* row_in = xyb.ConstPlaneRow(1, y);
const float* row_in1 = xyb.ConstPlaneRow(1, y1);
const float* row_in2 = xyb.ConstPlaneRow(1, y2);
float* mask1x1_out = mask1x1->Row(y);
auto scalar_pixel1x1 = [&](size_t x) {
const size_t x2 = x + 1 < xsize ? x + 1 : x;
@ -451,15 +461,21 @@ struct AdaptiveQuantizationImpl {
static const float kOffset = 0.01;
mask1x1_out[x] = kMul / (diff + kOffset);
};
for (size_t x = x_start; x < x_end; ++x) {
for (size_t x = x_start_1x1; x < x_end_1x1; ++x) {
scalar_pixel1x1(x);
}
}
size_t y_start = rect_in.y0() + rect_out.y0() * 8;
size_t y_end = y_start + rect_out.ysize() * 8;
size_t x_start = rect_in.x0() + rect_out.x0() * 8;
size_t x_end = x_start + rect_out.xsize() * 8;
if (x_start != 0) x_start -= 4;
if (x_end != rect_in.xsize()) x_end += 4;
if (x_end != xsize) x_end += 4;
if (y_start != 0) y_start -= 4;
if (y_end != rect_in.ysize()) y_end += 4;
if (y_end != ysize) y_end += 4;
pre_erosion[thread].ShrinkTo((x_end - x_start) / 4, (y_end - y_start) / 4);
static const float limit = 0.2f;
@ -467,9 +483,9 @@ struct AdaptiveQuantizationImpl {
size_t y2 = y + 1 < ysize ? y + 1 : y;
size_t y1 = y > 0 ? y - 1 : y;
const float* row_in = rect_in.ConstPlaneRow(xyb, 1, y);
const float* row_in1 = rect_in.ConstPlaneRow(xyb, 1, y1);
const float* row_in2 = rect_in.ConstPlaneRow(xyb, 1, y2);
const float* row_in = xyb.ConstPlaneRow(1, y);
const float* row_in1 = xyb.ConstPlaneRow(1, y1);
const float* row_in2 = xyb.ConstPlaneRow(1, y2);
float* JXL_RESTRICT row_out = diff_buffer.Row(thread);
auto scalar_pixel = [&](size_t x) {
@ -552,7 +568,8 @@ struct AdaptiveQuantizationImpl {
ImageF diff_buffer;
};
static void Blur1x1Masking(ThreadPool* pool, ImageF* mask1x1) {
static void Blur1x1Masking(ThreadPool* pool, ImageF* mask1x1,
const Rect& rect) {
// Blur the mask1x1 to obtain the masking image.
// Before blurring it contains an image of absolute value of the
// Laplacian of the intensity channel.
@ -578,10 +595,9 @@ static void Blur1x1Masking(ThreadPool* pool, ImageF* mask1x1) {
{HWY_REP4(normalize_mul * kFilterMask1x1[1])},
{HWY_REP4(normalize_mul * kFilterMask1x1[4])},
{HWY_REP4(normalize_mul * kFilterMask1x1[3])}};
Rect from_rect(0, 0, mask1x1->xsize(), mask1x1->ysize());
ImageF temp(mask1x1->xsize(), mask1x1->ysize());
Symmetric5(*mask1x1, from_rect, weights, pool, &temp);
CopyImageTo(temp, mask1x1); // TODO: make it a swap
ImageF temp(rect.xsize(), rect.ysize());
Symmetric5(*mask1x1, rect, weights, pool, &temp);
*mask1x1 = std::move(temp);
}
ImageF AdaptiveQuantizationMap(const float butteraugli_target,
@ -595,7 +611,7 @@ ImageF AdaptiveQuantizationMap(const float butteraugli_target,
const size_t ysize_blocks = rect.ysize() / kBlockDim;
impl.aq_map = ImageF(xsize_blocks, ysize_blocks);
*mask = ImageF(xsize_blocks, ysize_blocks);
*mask1x1 = ImageF(rect.xsize(), rect.ysize());
*mask1x1 = ImageF(xyb.xsize(), xyb.ysize());
JXL_CHECK(RunOnPool(
pool, 0,
DivCeil(xsize_blocks, kEncTileDimInBlocks) *
@ -618,7 +634,7 @@ ImageF AdaptiveQuantizationMap(const float butteraugli_target,
},
"AQ DiffPrecompute"));
Blur1x1Masking(pool, mask1x1);
Blur1x1Masking(pool, mask1x1, rect);
return std::move(impl).aq_map;
}

View file

@ -6,19 +6,14 @@
#ifndef LIB_JXL_ENC_ADAPTIVE_QUANTIZATION_H_
#define LIB_JXL_ENC_ADAPTIVE_QUANTIZATION_H_
#include <jxl/cms_interface.h>
#include <stddef.h>
#include "lib/jxl/ac_strategy.h"
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/enc_cache.h"
#include "lib/jxl/enc_params.h"
#include "lib/jxl/frame_header.h"
#include "lib/jxl/image.h"
#include "lib/jxl/image_bundle.h"
#include "lib/jxl/loop_filter.h"
#include "lib/jxl/quant_weights.h"
#include "lib/jxl/quantizer.h"
#include "lib/jxl/splines.h"
// Heuristics to find a good quantizer for a given image. InitialQuantField
// produces a quantization field (i.e. relative quantization amounts for each

View file

@ -45,6 +45,10 @@ void ProcessTile(const CompressParams& cparams, const FrameHeader& frame_header,
const ImageF& quant_field, const AcStrategyImage& ac_strategy,
ImageB* epf_sharpness, const Rect& rect,
ArControlFieldHeuristics::TempImages* temp_image) {
JXL_ASSERT(opsin_rect.x0() % 8 == 0);
JXL_ASSERT(opsin_rect.y0() % 8 == 0);
JXL_ASSERT(opsin_rect.xsize() % 8 == 0);
JXL_ASSERT(opsin_rect.ysize() % 8 == 0);
constexpr size_t N = kBlockDim;
if (cparams.butteraugli_distance < kMinButteraugliForDynamicAR ||
cparams.speed_tier > SpeedTier::kWombat ||
@ -62,73 +66,65 @@ void ProcessTile(const CompressParams& cparams, const FrameHeader& frame_header,
const size_t sharpness_stride =
static_cast<size_t>(epf_sharpness->PixelsPerRow());
size_t by0 = rect.y0();
size_t by1 = rect.y0() + rect.ysize();
size_t bx0 = rect.x0();
size_t bx1 = rect.x0() + rect.xsize();
size_t by0 = opsin_rect.y0() / 8 + rect.y0();
size_t by1 = by0 + rect.ysize();
size_t bx0 = opsin_rect.x0() / 8 + rect.x0();
size_t bx1 = bx0 + rect.xsize();
temp_image->InitOnce();
ImageF& laplacian_sqrsum = temp_image->laplacian_sqrsum;
// Calculate the L2 of the 3x3 Laplacian in an integral transform
// (for example 32x32 dct). This relates to transforms ability
// to propagate artefacts.
size_t y0 = by0 == 0 ? 2 : 0;
size_t y1 = by1 * N + 4 <= opsin_rect.ysize() + 2
? (by1 - by0) * N + 4
: opsin_rect.ysize() + 2 - by0 * N;
size_t x0 = bx0 == 0 ? 2 : 0;
size_t x1 = bx1 * N + 4 <= opsin_rect.xsize() + 2
? (bx1 - bx0) * N + 4
: opsin_rect.xsize() + 2 - bx0 * N;
size_t y0 = by0 == 0 ? 0 : by0 * N - 2;
size_t y1 = by1 * N == opsin.ysize() ? by1 * N : by1 * N + 2;
size_t x0 = bx0 == 0 ? 0 : bx0 * N - 2;
size_t x1 = bx1 * N == opsin.xsize() ? bx1 * N : bx1 * N + 2;
HWY_FULL(float) df;
for (size_t y = y0; y < y1; y++) {
float* JXL_RESTRICT laplacian_sqrsum_row = laplacian_sqrsum.Row(y);
size_t cy = y + by0 * N - 2;
float* JXL_RESTRICT laplacian_sqrsum_row =
laplacian_sqrsum.Row(y + 2 - by0 * N);
const float* JXL_RESTRICT in_row_t[3];
const float* JXL_RESTRICT in_row[3];
const float* JXL_RESTRICT in_row_b[3];
for (size_t c = 0; c < 3; c++) {
in_row_t[c] = opsin_rect.ConstPlaneRow(opsin, c, cy > 0 ? cy - 1 : cy);
in_row[c] = opsin_rect.ConstPlaneRow(opsin, c, cy);
in_row_b[c] = opsin_rect.ConstPlaneRow(
opsin, c, cy + 1 < opsin_rect.ysize() ? cy + 1 : cy);
in_row_t[c] = opsin.ConstPlaneRow(c, y > 0 ? y - 1 : y);
in_row[c] = opsin.ConstPlaneRow(c, y);
in_row_b[c] = opsin.ConstPlaneRow(c, y + 1 < opsin.ysize() ? y + 1 : y);
}
auto compute_laplacian_scalar = [&](size_t x) {
size_t cx = x + bx0 * N - 2;
const size_t prevX = cx >= 1 ? cx - 1 : cx;
const size_t nextX = cx + 1 < opsin_rect.xsize() ? cx + 1 : cx;
const size_t prevX = x >= 1 ? x - 1 : x;
const size_t nextX = x + 1 < opsin.xsize() ? x + 1 : x;
float sumsqr = 0;
for (size_t c = 0; c < 3; c++) {
float laplacian =
kChannelWeights[c] * in_row[c][cx] +
kChannelWeights[c] * in_row[c][x] +
kChannelWeightsLapNeg[c] *
(in_row[c][prevX] + in_row[c][nextX] + in_row_b[c][prevX] +
in_row_b[c][cx] + in_row_b[c][nextX] + in_row_t[c][prevX] +
in_row_t[c][cx] + in_row_t[c][nextX]);
in_row_b[c][x] + in_row_b[c][nextX] + in_row_t[c][prevX] +
in_row_t[c][x] + in_row_t[c][nextX]);
sumsqr += laplacian * laplacian;
}
laplacian_sqrsum_row[x] = sumsqr;
laplacian_sqrsum_row[x + 2 - bx0 * N] = sumsqr;
};
size_t x = x0;
for (; x + bx0 * N < 3; x++) {
for (; x < 1; x++) {
compute_laplacian_scalar(x);
}
// Interior. One extra pixel of border as the last pixel is special.
for (; x + Lanes(df) <= x1 &&
x + Lanes(df) + bx0 * N - 1 <= opsin_rect.xsize();
for (; x + Lanes(df) <= x1 && x + Lanes(df) + 1 <= opsin.xsize();
x += Lanes(df)) {
size_t cx = x + bx0 * N - 2;
auto sumsqr = Zero(df);
for (size_t c = 0; c < 3; c++) {
auto laplacian =
Mul(LoadU(df, in_row[c] + cx), Set(df, kChannelWeights[c]));
auto sum_oth0 = LoadU(df, in_row[c] + cx - 1);
auto sum_oth1 = LoadU(df, in_row[c] + cx + 1);
auto sum_oth2 = LoadU(df, in_row_t[c] + cx - 1);
auto sum_oth3 = LoadU(df, in_row_t[c] + cx);
sum_oth0 = Add(sum_oth0, LoadU(df, in_row_t[c] + cx + 1));
sum_oth1 = Add(sum_oth1, LoadU(df, in_row_b[c] + cx - 1));
sum_oth2 = Add(sum_oth2, LoadU(df, in_row_b[c] + cx));
sum_oth3 = Add(sum_oth3, LoadU(df, in_row_b[c] + cx + 1));
Mul(LoadU(df, in_row[c] + x), Set(df, kChannelWeights[c]));
auto sum_oth0 = LoadU(df, in_row[c] + x - 1);
auto sum_oth1 = LoadU(df, in_row[c] + x + 1);
auto sum_oth2 = LoadU(df, in_row_t[c] + x - 1);
auto sum_oth3 = LoadU(df, in_row_t[c] + x);
sum_oth0 = Add(sum_oth0, LoadU(df, in_row_t[c] + x + 1));
sum_oth1 = Add(sum_oth1, LoadU(df, in_row_b[c] + x - 1));
sum_oth2 = Add(sum_oth2, LoadU(df, in_row_b[c] + x));
sum_oth3 = Add(sum_oth3, LoadU(df, in_row_b[c] + x + 1));
sum_oth0 = Add(sum_oth0, sum_oth1);
sum_oth2 = Add(sum_oth2, sum_oth3);
sum_oth0 = Add(sum_oth0, sum_oth2);
@ -136,7 +132,7 @@ void ProcessTile(const CompressParams& cparams, const FrameHeader& frame_header,
MulAdd(Set(df, kChannelWeightsLapNeg[c]), sum_oth0, laplacian);
sumsqr = MulAdd(laplacian, laplacian, sumsqr);
}
StoreU(sumsqr, df, laplacian_sqrsum_row + x);
StoreU(sumsqr, df, laplacian_sqrsum_row + x + 2 - bx0 * N);
}
for (; x < x1; x++) {
compute_laplacian_scalar(x);
@ -150,13 +146,13 @@ void ProcessTile(const CompressParams& cparams, const FrameHeader& frame_header,
ImageF& sqrsum_00 = temp_image->sqrsum_00;
size_t sqrsum_00_stride = sqrsum_00.PixelsPerRow();
float* JXL_RESTRICT sqrsum_00_row = sqrsum_00.Row(0);
for (size_t y = 0; y < (by1 - by0) * 2; y++) {
for (size_t y = 0; y < rect.ysize() * 2; y++) {
const float* JXL_RESTRICT rows_in[4];
for (size_t iy = 0; iy < 4; iy++) {
rows_in[iy] = laplacian_sqrsum.ConstRow(y * 4 + iy + 2);
}
float* JXL_RESTRICT row_out = sqrsum_00_row + y * sqrsum_00_stride;
for (size_t x = 0; x < (bx1 - bx0) * 2; x++) {
for (size_t x = 0; x < rect.xsize() * 2; x++) {
auto sum = Zero(df4);
for (size_t iy = 0; iy < 4; iy++) {
for (size_t ix = 0; ix < 4; ix += Lanes(df4)) {
@ -173,7 +169,7 @@ void ProcessTile(const CompressParams& cparams, const FrameHeader& frame_header,
ImageF& sqrsum_22 = temp_image->sqrsum_22;
size_t sqrsum_22_stride = sqrsum_22.PixelsPerRow();
float* JXL_RESTRICT sqrsum_22_row = sqrsum_22.Row(0);
for (size_t y = 0; y < (by1 - by0) * 2 + 1; y++) {
for (size_t y = 0; y < rect.ysize() * 2 + 1; y++) {
const float* JXL_RESTRICT rows_in[4];
for (size_t iy = 0; iy < 4; iy++) {
rows_in[iy] = laplacian_sqrsum.ConstRow(y * 4 + iy);
@ -182,21 +178,21 @@ void ProcessTile(const CompressParams& cparams, const FrameHeader& frame_header,
// ignore pixels outside the image.
// Y coordinates are relative to by0*8+y*4.
size_t sy = y * 4 + by0 * 8 > 0 ? 0 : 2;
size_t ey = y * 4 + by0 * 8 + 4 <= opsin_rect.ysize() + 2
size_t ey = y * 4 + by0 * 8 + 2 <= opsin.ysize()
? 4
: opsin_rect.ysize() - y * 4 - by0 * 8 + 2;
for (size_t x = 0; x < (bx1 - bx0) * 2 + 1; x++) {
: opsin.ysize() - y * 4 - by0 * 8 + 2;
for (size_t x = 0; x < rect.xsize() * 2 + 1; x++) {
// ignore pixels outside the image.
// X coordinates are relative to bx0*8.
size_t sx = x * 4 + bx0 * 8 > 0 ? x * 4 : x * 4 + 2;
size_t ex = x * 4 + bx0 * 8 + 4 <= opsin_rect.xsize() + 2
size_t ex = x * 4 + bx0 * 8 + 2 <= opsin.xsize()
? x * 4 + 4
: opsin_rect.xsize() - bx0 * 8 + 2;
: opsin.xsize() - bx0 * 8 + 2;
if (ex - sx == 4 && ey - sy == 4) {
auto sum = Zero(df4);
for (size_t iy = 0; iy < 4; iy++) {
for (size_t ix = 0; ix < 4; ix += Lanes(df4)) {
sum = Add(sum, Load(df4, rows_in[iy] + sx + ix));
for (size_t iy = sy; iy < ey; iy++) {
for (size_t ix = sx; ix < ex; ix += Lanes(df4)) {
sum = Add(sum, Load(df4, rows_in[iy] + ix));
}
}
row_out[x] = GetLane(Sqrt(SumOfLanes(df4, sum))) * (1.0f / 4.0f);
@ -211,11 +207,11 @@ void ProcessTile(const CompressParams& cparams, const FrameHeader& frame_header,
}
}
}
for (size_t by = by0; by < by1; by++) {
for (size_t by = rect.y0(); by < rect.y1(); by++) {
AcStrategyRow acs_row = ac_strategy.ConstRow(by);
uint8_t* JXL_RESTRICT out_row = epf_sharpness->Row(by);
const float* JXL_RESTRICT quant_row = quant_field.Row(by);
for (size_t bx = bx0; bx < bx1; bx++) {
for (size_t bx = rect.x0(); bx < rect.x1(); bx++) {
AcStrategy acs = acs_row[bx];
if (!acs.IsFirstBlock()) continue;
// The errors are going to be linear to the quantization value in this
@ -223,12 +219,12 @@ void ProcessTile(const CompressParams& cparams, const FrameHeader& frame_header,
float quant_val = 1.0f / quant_row[bx];
const auto sq00 = [&](size_t y, size_t x) {
return sqrsum_00_row[((by - by0) * 2 + y) * sqrsum_00_stride +
(bx - bx0) * 2 + x];
return sqrsum_00_row[((by - rect.y0()) * 2 + y) * sqrsum_00_stride +
(bx - rect.x0()) * 2 + x];
};
const auto sq22 = [&](size_t y, size_t x) {
return sqrsum_22_row[((by - by0) * 2 + y) * sqrsum_22_stride +
(bx - bx0) * 2 + x];
return sqrsum_22_row[((by - rect.y0()) * 2 + y) * sqrsum_22_stride +
(bx - rect.x0()) * 2 + x];
};
float sqrsum_integral_transform = 0;
for (size_t iy = 0; iy < acs.covered_blocks_y() * 2; iy++) {

View file

@ -10,6 +10,7 @@
#include <vector>
#include "lib/jxl/ac_strategy.h"
#include "lib/jxl/enc_params.h"
#include "lib/jxl/frame_header.h"
#include "lib/jxl/image.h"

View file

@ -10,6 +10,7 @@
#include <stddef.h>
#include <memory>
#include <vector>
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/base/status.h"

View file

@ -10,23 +10,20 @@
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include <vector>
#include "lib/jxl/ac_strategy.h"
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/coeff_order.h"
#include "lib/jxl/coeff_order_fwd.h"
#include "lib/jxl/dct_util.h"
#include "lib/jxl/enc_ans.h"
#include "lib/jxl/enc_bit_writer.h"
#include "lib/jxl/enc_params.h"
#include "lib/jxl/enc_progressive_split.h"
#include "lib/jxl/frame_header.h"
#include "lib/jxl/image.h"
#include "lib/jxl/image_bundle.h"
#include "lib/jxl/passes_state.h"
#include "lib/jxl/quant_weights.h"
#include "lib/jxl/quantizer.h"
namespace jxl {

View file

@ -192,19 +192,6 @@ void InitDCStorage(size_t num_blocks, ImageF* dc_values) {
}
}
void ComputeDC(const ImageF& dc_values, bool fast, int32_t* dc_x,
int32_t* dc_b) {
constexpr float kDistanceMultiplierDC = 1e-5f;
const float* JXL_RESTRICT dc_values_yx = dc_values.Row(0);
const float* JXL_RESTRICT dc_values_x = dc_values.Row(1);
const float* JXL_RESTRICT dc_values_yb = dc_values.Row(2);
const float* JXL_RESTRICT dc_values_b = dc_values.Row(3);
*dc_x = FindBestMultiplier(dc_values_yx, dc_values_x, dc_values.xsize(), 0.0f,
kDistanceMultiplierDC, fast);
*dc_b = FindBestMultiplier(dc_values_yb, dc_values_b, dc_values.xsize(),
jxl::cms::kYToBRatio, kDistanceMultiplierDC, fast);
}
void ComputeTile(const Image3F& opsin, const Rect& opsin_rect,
const DequantMatrices& dequant,
const AcStrategyImage* ac_strategy,
@ -363,7 +350,6 @@ HWY_AFTER_NAMESPACE();
namespace jxl {
HWY_EXPORT(InitDCStorage);
HWY_EXPORT(ComputeDC);
HWY_EXPORT(ComputeTile);
void CfLHeuristics::Init(const Rect& rect) {
@ -387,14 +373,6 @@ void CfLHeuristics::ComputeTile(const Rect& r, const Image3F& opsin,
mem.get() + thread * ItemsPerThread());
}
void CfLHeuristics::ComputeDC(bool fast, ColorCorrelationMap* cmap) {
int32_t ytob_dc = 0;
int32_t ytox_dc = 0;
HWY_DYNAMIC_DISPATCH(ComputeDC)(dc_values, fast, &ytox_dc, &ytob_dc);
cmap->SetYToBDC(ytob_dc);
cmap->SetYToXDC(ytox_dc);
}
void ColorCorrelationMapEncodeDC(const ColorCorrelationMap& map,
BitWriter* writer, size_t layer,
AuxOut* aux_out) {

View file

@ -41,8 +41,6 @@ struct CfLHeuristics {
const ImageI* raw_quant_field, const Quantizer* quantizer,
bool fast, size_t thread, ColorCorrelationMap* cmap);
void ComputeDC(bool fast, ColorCorrelationMap* cmap);
ImageF dc_values;
hwy::AlignedFreeUniquePtr<float[]> mem;

View file

@ -62,7 +62,14 @@ Status ConvertFromExternalNoSizeCheck(const uint8_t* data, size_t xsize,
size_t bytes_per_pixel = format.num_channels * bytes_per_channel;
size_t pixel_offset = c * bytes_per_channel;
// Only for uint8/16.
float scale = 1. / ((1ull << bits_per_sample) - 1);
float scale = 1.0f;
if (format.data_type == JXL_TYPE_UINT8) {
// We will do an integer multiplication by 257 in LoadFloatRow so that a
// UINT8 value and the corresponding UINT16 value convert to the same float
scale = 1.0f / (257 * ((1ull << bits_per_sample) - 1));
} else {
scale = 1.0f / ((1ull << bits_per_sample) - 1);
}
const bool little_endian =
format.endianness == JXL_LITTLE_ENDIAN ||

View file

@ -3867,8 +3867,8 @@ void LLProcess(JxlFastLosslessFrameState* frame_state, bool is_last,
bool streaming = !onegroup && output_processor;
size_t total_groups = frame_state->num_groups_x * frame_state->num_groups_y;
size_t max_groups = streaming ? kMaxLocalGroups : total_groups;
size_t start_pos = 0;
#if !FJXL_STANDALONE
size_t start_pos = 0;
if (streaming) {
start_pos = output_processor->CurrentPosition();
output_processor->Seek(start_pos + frame_state->ac_group_data_offset);

View file

@ -10,11 +10,7 @@
// FJXL_STANDALONE=1 for a stand-alone jxl encoder
// FJXL_STANDALONE=0 for use in libjxl to encode frames (but no image header)
#ifndef FJXL_STANDALONE
#ifdef JPEGXL_MAJOR_VERSION
#define FJXL_STANDALONE 0
#else
#define FJXL_STANDALONE 1
#endif
#endif
#if !FJXL_STANDALONE

View file

@ -145,7 +145,6 @@ Status CopyColorChannels(JxlChunkedFrameInputSource input, Rect rect,
" color channels, received only %u channels",
color_channels, format.num_channels);
}
*color = Image3F(rect.xsize(), rect.ysize());
const uint8_t* data = reinterpret_cast<const uint8_t*>(buffer.get());
for (size_t c = 0; c < color_channels; ++c) {
JXL_RETURN_IF_ERROR(ConvertFromExternalNoSizeCheck(
@ -221,12 +220,14 @@ void SetProgressiveMode(const CompressParams& cparams,
{/*num_coefficients=*/8, /*shift=*/0,
/*suitable_for_downsampling_of_at_least=*/0},
};
bool progressive_mode = ApplyOverride(cparams.progressive_mode, false);
bool qprogressive_mode = ApplyOverride(cparams.qprogressive_mode, false);
if (cparams.custom_progressive_mode) {
progressive_splitter->SetProgressiveMode(*cparams.custom_progressive_mode);
} else if (cparams.qprogressive_mode) {
} else if (qprogressive_mode) {
progressive_splitter->SetProgressiveMode(
ProgressiveMode{progressive_passes_dc_quant_ac_full_ac});
} else if (cparams.progressive_mode) {
} else if (progressive_mode) {
progressive_splitter->SetProgressiveMode(
ProgressiveMode{progressive_passes_dc_vlf_lf_full_ac});
}
@ -630,7 +631,7 @@ void ComputeChromacityAdjustments(const CompressParams& cparams,
// look at the individual pixels and make a guess how difficult
// the image would be based on the worst case pixel.
PixelStatsForChromacityAdjustment pixel_stats;
if (cparams.speed_tier <= SpeedTier::kWombat) {
if (cparams.speed_tier <= SpeedTier::kSquirrel) {
pixel_stats.Calc(&opsin, rect);
}
// For X take the most severe adjustment.
@ -640,8 +641,9 @@ void ComputeChromacityAdjustments(const CompressParams& cparams,
frame_header->b_qm_scale = 2 + pixel_stats.HowMuchIsBChannelPixelized();
}
void ComputeNoiseParams(const CompressParams& cparams, bool color_is_jpeg,
const Image3F& opsin, const FrameDimensions& frame_dim,
void ComputeNoiseParams(const CompressParams& cparams, bool streaming_mode,
bool color_is_jpeg, const Image3F& opsin,
const FrameDimensions& frame_dim,
FrameHeader* frame_header, NoiseParams* noise_params) {
if (cparams.photon_noise_iso > 0) {
*noise_params = SimulatePhotonNoise(frame_dim.xsize, frame_dim.ysize,
@ -651,7 +653,8 @@ void ComputeNoiseParams(const CompressParams& cparams, bool color_is_jpeg,
noise_params->lut[i] = cparams.manual_noise[i];
}
} else if (frame_header->encoding == FrameEncoding::kVarDCT &&
frame_header->flags & FrameHeader::kNoise && !color_is_jpeg) {
frame_header->flags & FrameHeader::kNoise && !color_is_jpeg &&
!streaming_mode) {
// Don't start at zero amplitude since adding noise is expensive -- it
// significantly slows down decoding, and this is unlikely to
// completely go away even with advanced optimizations. After the
@ -1397,10 +1400,13 @@ Status ComputeEncodingData(
Rect patch_rect = Rect(x0, y0, xsize, ysize).Extend(max_border, frame_rect);
JXL_ASSERT(patch_rect.IsInside(frame_rect));
Image3F color;
// Allocating a large enough image avoids a copy when padding.
Image3F color(RoundUpToBlockDim(patch_rect.xsize()),
RoundUpToBlockDim(patch_rect.ysize()));
color.ShrinkTo(patch_rect.xsize(), patch_rect.ysize());
std::vector<ImageF> extra_channels(num_extra_channels);
for (auto& extra_channel : extra_channels) {
extra_channel = jxl::ImageF(xsize, ysize);
extra_channel = jxl::ImageF(patch_rect.xsize(), patch_rect.ysize());
}
ImageF* alpha = alpha_eci ? &extra_channels[alpha_idx] : nullptr;
ImageF* black = black_eci ? &extra_channels[black_idx] : nullptr;
@ -1421,27 +1427,21 @@ Status ComputeEncodingData(
Image3F linear_storage;
Image3F* linear = nullptr;
Image3F opsin;
if (!jpeg_data) {
// Allocating a large enough image avoids a copy when padding.
opsin = Image3F(RoundUpToBlockDim(color.xsize()),
RoundUpToBlockDim(color.ysize()));
opsin.ShrinkTo(color.xsize(), color.ysize());
if (frame_header.color_transform == ColorTransform::kXYB &&
frame_info.ib_needs_color_transform) {
if (frame_header.encoding == FrameEncoding::kVarDCT &&
cparams.speed_tier <= SpeedTier::kKitten) {
linear_storage = Image3F(color.xsize(), color.ysize());
linear_storage = Image3F(patch_rect.xsize(), patch_rect.ysize());
linear = &linear_storage;
}
ToXYB(color, c_enc, metadata->m.IntensityTarget(), black, pool, &opsin,
cms, linear);
} else { // RGB or YCbCr: don't do anything (forward YCbCr is not
// implemented, this is only used when the input is already in
// YCbCr)
// If encoding a special DC or reference frame, don't do anything:
// input is already in XYB.
CopyImageTo(color, &opsin);
ToXYB(c_enc, metadata->m.IntensityTarget(), black, pool, &color, cms,
linear);
} else {
// Nothing to do.
// RGB or YCbCr: forward YCbCr is not implemented, this is only used when
// the input is already in YCbCr
// If encoding a special DC or reference frame: input is already in XYB.
}
bool lossless = cparams.IsLossless();
if (alpha && !alpha_eci->alpha_associated &&
@ -1449,32 +1449,29 @@ Status ComputeEncodingData(
!ApplyOverride(cparams.keep_invisible, lossless) &&
cparams.ec_resampling == cparams.resampling) {
// simplify invisible pixels
SimplifyInvisible(&opsin, *alpha, lossless);
SimplifyInvisible(&color, *alpha, lossless);
if (linear) {
SimplifyInvisible(linear, *alpha, lossless);
}
}
PadImageToBlockMultipleInPlace(&opsin);
PadImageToBlockMultipleInPlace(&color);
}
color = Image3F();
// Rectangle within opsin that corresponds to the currently processed group in
// Rectangle within color that corresponds to the currently processed group in
// streaming mode.
Rect opsin_rect(x0 - patch_rect.x0(), y0 - patch_rect.y0(),
Rect group_rect(x0 - patch_rect.x0(), y0 - patch_rect.y0(),
RoundUpToBlockDim(xsize), RoundUpToBlockDim(ysize));
if (enc_state.initialize_global_state && !jpeg_data) {
ComputeChromacityAdjustments(cparams, opsin, opsin_rect,
ComputeChromacityAdjustments(cparams, color, group_rect,
&mutable_frame_header);
}
if (!enc_state.streaming_mode) {
ComputeNoiseParams(cparams, !!jpeg_data, opsin, frame_dim,
&mutable_frame_header,
&shared.image_features.noise_params);
}
ComputeNoiseParams(cparams, enc_state.streaming_mode, !!jpeg_data, color,
frame_dim, &mutable_frame_header,
&shared.image_features.noise_params);
DownsampleColorChannels(cparams, frame_header, !!jpeg_data, &opsin);
DownsampleColorChannels(cparams, frame_header, !!jpeg_data, &color);
if (cparams.ec_resampling != 1 && !cparams.already_downsampled) {
for (ImageF& ec : extra_channels) {
@ -1483,7 +1480,7 @@ Status ComputeEncodingData(
}
if (!enc_state.streaming_mode) {
opsin_rect = Rect(opsin);
group_rect = Rect(color);
}
if (frame_header.encoding == FrameEncoding::kVarDCT) {
@ -1496,7 +1493,7 @@ Status ComputeEncodingData(
*jpeg_data, frame_header, pool, &enc_modular, &enc_state));
} else {
JXL_RETURN_IF_ERROR(ComputeVarDCTEncodingData(
frame_header, linear, &opsin, opsin_rect, cms, pool, &enc_modular,
frame_header, linear, &color, group_rect, cms, pool, &enc_modular,
&enc_state, aux_out));
}
ComputeAllCoeffOrders(enc_state, frame_dim);
@ -1508,16 +1505,15 @@ Status ComputeEncodingData(
TokenizeAllCoefficients(frame_header, pool, &enc_state));
}
JXL_RETURN_IF_ERROR(enc_modular.ComputeEncodingData(
frame_header, metadata->m, &opsin, extra_channels, &enc_state, cms, pool,
aux_out,
/* do_color=*/frame_header.encoding == FrameEncoding::kModular));
if (enc_state.initialize_global_state) {
JXL_RETURN_IF_ERROR(enc_modular.ComputeTree(pool));
}
JXL_RETURN_IF_ERROR(enc_modular.ComputeTokens(pool));
if (!enc_state.streaming_mode) {
if (cparams.modular_mode || !extra_channels.empty()) {
JXL_RETURN_IF_ERROR(enc_modular.ComputeEncodingData(
frame_header, metadata->m, &color, extra_channels, &enc_state, cms,
pool, aux_out, /*do_color=*/cparams.modular_mode));
}
JXL_RETURN_IF_ERROR(enc_modular.ComputeTree(pool));
JXL_RETURN_IF_ERROR(enc_modular.ComputeTokens(pool));
mutable_frame_header.UpdateFlag(shared.image_features.patches.HasAny(),
FrameHeader::kPatches);
mutable_frame_header.UpdateFlag(shared.image_features.splines.HasAny(),
@ -1683,10 +1679,10 @@ void ComputePermutationForStreaming(size_t xsize, size_t ysize,
size_t ac_x0 = dc_x * kBlockDim;
size_t ac_y1 = std::min<size_t>(group_ysize, ac_y0 + kBlockDim);
size_t ac_x1 = std::min<size_t>(group_xsize, ac_x0 + kBlockDim);
for (size_t ac_y = ac_y0; ac_y < ac_y1; ++ac_y) {
for (size_t ac_x = ac_x0; ac_x < ac_x1; ++ac_x) {
size_t group_ix = ac_y * group_xsize + ac_x;
for (size_t pass = 0; pass < num_passes; ++pass) {
for (size_t pass = 0; pass < num_passes; ++pass) {
for (size_t ac_y = ac_y0; ac_y < ac_y1; ++ac_y) {
for (size_t ac_x = ac_x0; ac_x < ac_x1; ++ac_x) {
size_t group_ix = ac_y * group_xsize + ac_x;
size_t old_ix =
AcGroupIndex(pass, group_ix, num_groups, num_dc_groups);
permutation[old_ix] = new_ix++;

View file

@ -6,9 +6,13 @@
#ifndef LIB_JXL_ENC_FRAME_H_
#define LIB_JXL_ENC_FRAME_H_
#include <jxl/cms_interface.h>
#include <jxl/types.h>
#include <cstddef>
#include <cstdint>
#include <string>
#include <vector>
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/base/status.h"
@ -18,6 +22,7 @@
#include "lib/jxl/encode_internal.h"
#include "lib/jxl/frame_header.h"
#include "lib/jxl/image_bundle.h"
#include "lib/jxl/image_metadata.h"
namespace jxl {

View file

@ -49,9 +49,12 @@ void GaborishInverse(Image3F* in_out, const Rect& rect, float mul[3],
// image and reuse the existing planes of the in/out image.
ImageF temp(in_out->Plane(2).xsize(), in_out->Plane(2).ysize());
CopyImageTo(in_out->Plane(2), &temp);
Symmetric5(in_out->Plane(0), rect, weights[0], pool, &in_out->Plane(2), rect);
Symmetric5(in_out->Plane(1), rect, weights[1], pool, &in_out->Plane(0), rect);
Symmetric5(temp, rect, weights[2], pool, &in_out->Plane(1), rect);
Rect xrect = rect.Extend(3, Rect(*in_out));
Symmetric5(in_out->Plane(0), xrect, weights[0], pool, &in_out->Plane(2),
xrect);
Symmetric5(in_out->Plane(1), xrect, weights[1], pool, &in_out->Plane(0),
xrect);
Symmetric5(temp, xrect, weights[2], pool, &in_out->Plane(1), xrect);
// Now planes are 1, 2, 0.
in_out->Plane(0).Swap(in_out->Plane(1));
// 2 1 0

View file

@ -8,9 +8,6 @@
// Linear smoothing (3x3 convolution) for deblocking without too much blur.
#include <stdint.h>
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/image.h"

View file

@ -7,7 +7,11 @@
#include <hwy/base.h>
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/convolve.h"
#include "lib/jxl/image.h"
#include "lib/jxl/image_ops.h"
#include "lib/jxl/image_test_utils.h"
#include "lib/jxl/testing.h"

View file

@ -7,7 +7,6 @@
#define LIB_JXL_ENC_GROUP_H_
#include <stddef.h>
#include <stdint.h>
#include "lib/jxl/base/status.h"
#include "lib/jxl/enc_bit_writer.h"

View file

@ -5,13 +5,31 @@
#include "lib/jxl/enc_heuristics.h"
#include <jxl/cms_interface.h>
#include <stddef.h>
#include <stdint.h>
#include <algorithm>
#include <cstdlib>
#include <limits>
#include <memory>
#include <numeric>
#include <string>
#include <utility>
#include <vector>
#include "lib/jxl/ac_context.h"
#include "lib/jxl/ac_strategy.h"
#include "lib/jxl/base/common.h"
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/base/override.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/butteraugli/butteraugli.h"
#include "lib/jxl/chroma_from_luma.h"
#include "lib/jxl/coeff_order.h"
#include "lib/jxl/coeff_order_fwd.h"
#include "lib/jxl/dec_xyb.h"
#include "lib/jxl/enc_ac_strategy.h"
#include "lib/jxl/enc_adaptive_quantization.h"
#include "lib/jxl/enc_ar_control_field.h"
@ -20,11 +38,16 @@
#include "lib/jxl/enc_gaborish.h"
#include "lib/jxl/enc_modular.h"
#include "lib/jxl/enc_noise.h"
#include "lib/jxl/enc_params.h"
#include "lib/jxl/enc_patch_dictionary.h"
#include "lib/jxl/enc_photon_noise.h"
#include "lib/jxl/enc_quant_weights.h"
#include "lib/jxl/enc_splines.h"
#include "lib/jxl/enc_xyb.h"
#include "lib/jxl/frame_dimensions.h"
#include "lib/jxl/frame_header.h"
#include "lib/jxl/image.h"
#include "lib/jxl/image_ops.h"
#include "lib/jxl/passes_state.h"
#include "lib/jxl/quant_weights.h"
namespace jxl {
@ -735,14 +758,7 @@ Status LossyFrameHeuristics(const FrameHeader& frame_header,
PatchDictionaryEncoder::SubtractFrom(image_features.patches, opsin);
}
static const float kAcQuant = 0.79f;
const float quant_dc = InitialQuantDC(cparams.butteraugli_distance);
// We don't know the quant field yet, but for computing the global scale
// assuming that it will be the same as for Falcon mode is good enough.
if (initialize_global_state) {
quantizer.ComputeGlobalScaleAndQuant(
quant_dc, kAcQuant / cparams.butteraugli_distance, 0);
}
// TODO(veluca): we can now run all the code from here to FindBestQuantizer
// (excluded) one rect at a time. Do that.
@ -779,9 +795,10 @@ Status LossyFrameHeuristics(const FrameHeader& frame_header,
ImageF(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
initial_quant_masking =
ImageF(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
float q = kAcQuant / cparams.butteraugli_distance;
float q = 0.79 / cparams.butteraugli_distance;
FillImage(q, &initial_quant_field);
FillImage(1.0f / (q + 0.001f), &initial_quant_masking);
quantizer.ComputeGlobalScaleAndQuant(quant_dc, q, 0);
} else {
// Call this here, as it relies on pre-gaborish values.
float butteraugli_distance_for_iqf = cparams.butteraugli_distance;
@ -791,9 +808,8 @@ Status LossyFrameHeuristics(const FrameHeader& frame_header,
initial_quant_field = InitialQuantField(
butteraugli_distance_for_iqf, *opsin, rect, pool, 1.0f,
&initial_quant_masking, &initial_quant_masking1x1);
if (initialize_global_state) {
quantizer.SetQuantField(quant_dc, initial_quant_field, nullptr);
}
float q = 0.39 / cparams.butteraugli_distance;
quantizer.ComputeGlobalScaleAndQuant(quant_dc, q, 0);
}
// TODO(veluca): do something about animations.
@ -875,10 +891,6 @@ Status LossyFrameHeuristics(const FrameHeader& frame_header,
process_tile, "Enc Heuristics"));
acs_heuristics.Finalize(frame_dim, ac_strategy, aux_out);
if (cparams.speed_tier <= SpeedTier::kHare && initialize_global_state) {
cfl_heuristics.ComputeDC(/*fast=*/cparams.speed_tier >= SpeedTier::kWombat,
&cmap);
}
// Refine quantization levels.
if (!streaming_mode) {

View file

@ -10,15 +10,11 @@
#include <jxl/cms_interface.h>
#include <stddef.h>
#include <stdint.h>
#include <string>
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/frame_header.h"
#include "lib/jxl/image.h"
#include "lib/jxl/modular/encoding/enc_ma.h"
namespace jxl {

View file

@ -7,6 +7,7 @@
#include <stdint.h>
#include <limits>
#include <map>
#include <string>
#include <vector>
@ -93,6 +94,8 @@ static inline void EncodeVarInt(uint64_t value, PaddedBytes* data) {
data->resize(pos);
}
constexpr size_t kSizeLimit = std::numeric_limits<uint32_t>::max() >> 2;
} // namespace
// Outputs a transformed form of the given icc profile. The result itself is
@ -103,6 +106,13 @@ Status PredictICC(const uint8_t* icc, size_t size, PaddedBytes* result) {
PaddedBytes commands;
PaddedBytes data;
static_assert(sizeof(size_t) >= 4, "size_t is too short");
// Fuzzer expects that PredictICC can accept any input,
// but 1GB should be enough for any purpose.
if (size > kSizeLimit) {
return JXL_FAILURE("ICC profile is too large");
}
EncodeVarInt(size, result);
// Header
@ -227,6 +237,11 @@ Status PredictICC(const uint8_t* icc, size_t size, PaddedBytes* result) {
Tag tag;
size_t tagstart = 0, tagsize = 0, clutstart = 0;
// Should always check tag_sane before doing math with tagsize.
const auto tag_sane = [&tagsize]() {
return (tagsize > 8) && (tagsize < kSizeLimit);
};
size_t last0 = pos;
// This loop appends commands to the output, processing some sub-section of a
// current tagged element each time. We need to keep track of the tagtype of
@ -241,7 +256,8 @@ Status PredictICC(const uint8_t* icc, size_t size, PaddedBytes* result) {
PaddedBytes data_add;
// This means the loop brought the position beyond the tag end.
if (pos > tagstart + tagsize) {
// If tagsize is nonsensical, any pos looks "ok-ish".
if ((pos > tagstart + tagsize) && (tagsize < kSizeLimit)) {
tag = {{0, 0, 0, 0}}; // nonsensical value
}
@ -252,7 +268,7 @@ Status PredictICC(const uint8_t* icc, size_t size, PaddedBytes* result) {
tagstart = tagstarts[index];
tagsize = tagsizes[index];
if (tag == kMlucTag && pos + tagsize <= size && tagsize > 8 &&
if (tag == kMlucTag && tag_sane() && pos + tagsize <= size &&
icc[pos + 4] == 0 && icc[pos + 5] == 0 && icc[pos + 6] == 0 &&
icc[pos + 7] == 0) {
size_t num = tagsize - 8;
@ -268,7 +284,7 @@ Status PredictICC(const uint8_t* icc, size_t size, PaddedBytes* result) {
Unshuffle(data_add.data() + start, num, 2);
}
if (tag == kCurvTag && pos + tagsize <= size && tagsize > 8 &&
if (tag == kCurvTag && tag_sane() && pos + tagsize <= size &&
icc[pos + 4] == 0 && icc[pos + 5] == 0 && icc[pos + 6] == 0 &&
icc[pos + 7] == 0) {
size_t num = tagsize - 8;
@ -334,8 +350,8 @@ Status PredictICC(const uint8_t* icc, size_t size, PaddedBytes* result) {
}
if (commands_add.empty() && data_add.empty() && tag == kGbd_Tag &&
pos == tagstart + 8 && pos + tagsize - 8 <= size && pos > 16 &&
tagsize > 8) {
tag_sane() && pos == tagstart + 8 && pos + tagsize - 8 <= size &&
pos > 16) {
size_t width = 4, order = 0, stride = width;
size_t num = tagsize - 8;
uint8_t flags = (order << 2) | (width - 1) | (stride == width ? 0 : 16);

View file

@ -73,115 +73,6 @@ static const float squeeze_luma_qtable[16] = {
static const float squeeze_chroma_qtable[16] = {
1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5, 0.5};
// `cutoffs` must be sorted.
Tree MakeFixedTree(int property, const std::vector<int32_t>& cutoffs,
Predictor pred, size_t num_pixels) {
size_t log_px = CeilLog2Nonzero(num_pixels);
size_t min_gap = 0;
// Reduce fixed tree height when encoding small images.
if (log_px < 14) {
min_gap = 8 * (14 - log_px);
}
Tree tree;
struct NodeInfo {
size_t begin, end, pos;
};
std::queue<NodeInfo> q;
// Leaf IDs will be set by roundtrip decoding the tree.
tree.push_back(PropertyDecisionNode::Leaf(pred));
q.push(NodeInfo{0, cutoffs.size(), 0});
while (!q.empty()) {
NodeInfo info = q.front();
q.pop();
if (info.begin + min_gap >= info.end) continue;
uint32_t split = (info.begin + info.end) / 2;
tree[info.pos] =
PropertyDecisionNode::Split(property, cutoffs[split], tree.size());
q.push(NodeInfo{split + 1, info.end, tree.size()});
tree.push_back(PropertyDecisionNode::Leaf(pred));
q.push(NodeInfo{info.begin, split, tree.size()});
tree.push_back(PropertyDecisionNode::Leaf(pred));
}
return tree;
}
Tree PredefinedTree(ModularOptions::TreeKind tree_kind, size_t total_pixels) {
if (tree_kind == ModularOptions::TreeKind::kJpegTranscodeACMeta ||
tree_kind == ModularOptions::TreeKind::kTrivialTreeNoPredictor) {
// All the data is 0, so no need for a fancy tree.
return {PropertyDecisionNode::Leaf(Predictor::Zero)};
}
if (tree_kind == ModularOptions::TreeKind::kFalconACMeta) {
// All the data is 0 except the quant field. TODO(veluca): make that 0 too.
return {PropertyDecisionNode::Leaf(Predictor::Left)};
}
if (tree_kind == ModularOptions::TreeKind::kACMeta) {
// Small image.
if (total_pixels < 1024) {
return {PropertyDecisionNode::Leaf(Predictor::Left)};
}
Tree tree;
// 0: c > 1
tree.push_back(PropertyDecisionNode::Split(0, 1, 1));
// 1: c > 2
tree.push_back(PropertyDecisionNode::Split(0, 2, 3));
// 2: c > 0
tree.push_back(PropertyDecisionNode::Split(0, 0, 5));
// 3: EPF control field (all 0 or 4), top > 0
tree.push_back(PropertyDecisionNode::Split(6, 0, 21));
// 4: ACS+QF, y > 0
tree.push_back(PropertyDecisionNode::Split(2, 0, 7));
// 5: CfL x
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Gradient));
// 6: CfL b
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Gradient));
// 7: QF: split according to the left quant value.
tree.push_back(PropertyDecisionNode::Split(7, 5, 9));
// 8: ACS: split in 4 segments (8x8 from 0 to 3, large square 4-5, large
// rectangular 6-11, 8x8 12+), according to previous ACS value.
tree.push_back(PropertyDecisionNode::Split(7, 5, 15));
// QF
tree.push_back(PropertyDecisionNode::Split(7, 11, 11));
tree.push_back(PropertyDecisionNode::Split(7, 3, 13));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
// ACS
tree.push_back(PropertyDecisionNode::Split(7, 11, 17));
tree.push_back(PropertyDecisionNode::Split(7, 3, 19));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
// EPF, left > 0
tree.push_back(PropertyDecisionNode::Split(7, 0, 23));
tree.push_back(PropertyDecisionNode::Split(7, 0, 25));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
return tree;
}
if (tree_kind == ModularOptions::TreeKind::kWPFixedDC) {
std::vector<int32_t> cutoffs = {
-500, -392, -255, -191, -127, -95, -63, -47, -31, -23, -15,
-11, -7, -4, -3, -1, 0, 1, 3, 5, 7, 11,
15, 23, 31, 47, 63, 95, 127, 191, 255, 392, 500};
return MakeFixedTree(kWPProp, cutoffs, Predictor::Weighted, total_pixels);
}
if (tree_kind == ModularOptions::TreeKind::kGradientFixedDC) {
std::vector<int32_t> cutoffs = {
-500, -392, -255, -191, -127, -95, -63, -47, -31, -23, -15,
-11, -7, -4, -3, -1, 0, 1, 3, 5, 7, 11,
15, 23, 31, 47, 63, 95, 127, 191, 255, 392, 500};
return MakeFixedTree(kGradientProp, cutoffs, Predictor::Gradient,
total_pixels);
}
JXL_UNREACHABLE("Unreachable");
return {};
}
// Merges the trees in `trees` using nodes that decide on stream_id, as defined
// by `tree_splits`.
void MergeTrees(const std::vector<Tree>& trees,
@ -759,7 +650,6 @@ Status ModularFrameEncoder::ComputeEncodingData(
if (cparams_.responsive && !gi.channel.empty() &&
max_bitdepth + 2 < level_max_bitdepth) {
Transform t(TransformId::kSqueeze);
t.squeezes = cparams_.squeezes;
do_transform(gi, t, weighted::Header(), pool);
max_bitdepth += 2;
}
@ -1194,19 +1084,23 @@ Status ModularFrameEncoder::EncodeStream(BitWriter* writer, AuxOut* aux_out,
if (stream_images_[stream_id].channel.empty()) {
return true; // Image with no channels, header never gets decoded.
}
JXL_RETURN_IF_ERROR(
Bundle::Write(stream_headers_[stream_id], writer, layer, aux_out));
WriteTokens(tokens_[stream_id], code_, context_map_, 0, writer, layer,
aux_out);
if (tokens_.empty()) {
JXL_RETURN_IF_ERROR(ModularGenericCompress(
stream_images_[stream_id], stream_options_[stream_id], writer, aux_out,
layer, stream_id));
} else {
JXL_RETURN_IF_ERROR(
Bundle::Write(stream_headers_[stream_id], writer, layer, aux_out));
WriteTokens(tokens_[stream_id], code_, context_map_, 0, writer, layer,
aux_out);
}
return true;
}
void ModularFrameEncoder::ClearStreamData(const ModularStreamId& stream) {
size_t stream_id = stream.ID(frame_dim_);
Image empty_image;
std::vector<Token> empty_tokens;
std::swap(stream_images_[stream_id], empty_image);
std::swap(tokens_[stream_id], empty_tokens);
}
namespace {

View file

@ -8,19 +8,18 @@
// Parameters and flags that govern JXL compression.
#include <jxl/cms_interface.h>
#include <jxl/encode.h>
#include <stddef.h>
#include <stdint.h>
#include <string>
#include <vector>
#include "lib/jxl/base/override.h"
#include "lib/jxl/butteraugli/butteraugli.h"
#include "lib/jxl/enc_progressive_split.h"
#include "lib/jxl/frame_dimensions.h"
#include "lib/jxl/frame_header.h"
#include "lib/jxl/modular/encoding/dec_ma.h"
#include "lib/jxl/modular/options.h"
#include "lib/jxl/modular/transform/transform.h"
#include "lib/jxl/splines.h"
namespace jxl {
@ -92,10 +91,10 @@ struct CompressParams {
int epf = -1;
// Progressive mode.
bool progressive_mode = false;
Override progressive_mode = Override::kDefault;
// Quantized-progressive mode.
bool qprogressive_mode = false;
Override qprogressive_mode = Override::kDefault;
// Put center groups first in the bitstream.
bool centerfirst = false;
@ -137,8 +136,6 @@ struct CompressParams {
// modular mode options below
ModularOptions options;
int responsive = -1;
// empty for default squeeze
std::vector<SqueezeParams> squeezes;
int colorspace = -1;
// Use Global channel palette if #colors < this percentage of range
float channel_colors_pre_transform_percent = 95.f;
@ -173,7 +170,7 @@ struct CompressParams {
void SetLossless() {
modular_mode = true;
butteraugli_distance = 0.0f;
for (float &f : ec_distance) f = 0.0f;
for (float& f : ec_distance) f = 0.0f;
color_transform = jxl::ColorTransform::kNone;
}
@ -198,6 +195,8 @@ struct CompressParams {
// See JXL_ENC_FRAME_SETTING_BUFFERING option value.
int buffering = 0;
// See JXL_ENC_FRAME_SETTING_USE_FULL_IMAGE_HEURISTICS option value.
bool use_full_image_heuristics = true;
std::vector<float> manual_noise;
std::vector<float> manual_xyb_factors;

View file

@ -748,8 +748,8 @@ void RoundtripPatchFrame(Image3F* reference_frame,
cparams.modular_mode = true;
cparams.responsive = 0;
cparams.progressive_dc = 0;
cparams.progressive_mode = false;
cparams.qprogressive_mode = false;
cparams.progressive_mode = Override::kOff;
cparams.qprogressive_mode = Override::kOff;
// Use gradient predictor and not Predictor::Best.
cparams.options.predictor = Predictor::Gradient;
patch_frame_info.save_as_reference = idx; // always saved.

View file

@ -402,7 +402,7 @@ void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
HWY_ALIGN float scratch_space[4 * 8 * 5];
size_t afv_x = afv_kind & 1;
size_t afv_y = afv_kind / 2;
HWY_ALIGN float block[4 * 8];
HWY_ALIGN float block[4 * 8] = {};
for (size_t iy = 0; iy < 4; iy++) {
for (size_t ix = 0; ix < 4; ix++) {
block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =

View file

@ -113,95 +113,82 @@ V LinearFromSRGB(V encoded) {
return TF_SRGB().DisplayFromEncoded(encoded);
}
Status LinearSRGBToXYB(const Image3F& linear,
const float* JXL_RESTRICT premul_absorb,
ThreadPool* pool, Image3F* JXL_RESTRICT xyb) {
const size_t xsize = linear.xsize();
Status LinearSRGBToXYB(const float* JXL_RESTRICT premul_absorb,
ThreadPool* pool, Image3F* JXL_RESTRICT image) {
const size_t xsize = image->xsize();
const HWY_FULL(float) d;
return RunOnPool(
pool, 0, static_cast<uint32_t>(linear.ysize()), ThreadPool::NoInit,
pool, 0, static_cast<uint32_t>(image->ysize()), ThreadPool::NoInit,
[&](const uint32_t task, size_t /*thread*/) {
const size_t y = static_cast<size_t>(task);
const float* JXL_RESTRICT row_in0 = linear.ConstPlaneRow(0, y);
const float* JXL_RESTRICT row_in1 = linear.ConstPlaneRow(1, y);
const float* JXL_RESTRICT row_in2 = linear.ConstPlaneRow(2, y);
float* JXL_RESTRICT row_xyb0 = xyb->PlaneRow(0, y);
float* JXL_RESTRICT row_xyb1 = xyb->PlaneRow(1, y);
float* JXL_RESTRICT row_xyb2 = xyb->PlaneRow(2, y);
float* JXL_RESTRICT row0 = image->PlaneRow(0, y);
float* JXL_RESTRICT row1 = image->PlaneRow(1, y);
float* JXL_RESTRICT row2 = image->PlaneRow(2, y);
for (size_t x = 0; x < xsize; x += Lanes(d)) {
const auto in_r = Load(d, row_in0 + x);
const auto in_g = Load(d, row_in1 + x);
const auto in_b = Load(d, row_in2 + x);
LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row_xyb0 + x,
row_xyb1 + x, row_xyb2 + x);
const auto in_r = Load(d, row0 + x);
const auto in_g = Load(d, row1 + x);
const auto in_b = Load(d, row2 + x);
LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row0 + x, row1 + x,
row2 + x);
}
},
"LinearToXYB");
}
Status SRGBToXYB(const Image3F& srgb, const float* JXL_RESTRICT premul_absorb,
ThreadPool* pool, Image3F* JXL_RESTRICT xyb) {
const size_t xsize = srgb.xsize();
Status SRGBToXYB(const float* JXL_RESTRICT premul_absorb, ThreadPool* pool,
Image3F* JXL_RESTRICT image) {
const size_t xsize = image->xsize();
const HWY_FULL(float) d;
return RunOnPool(
pool, 0, static_cast<uint32_t>(srgb.ysize()), ThreadPool::NoInit,
pool, 0, static_cast<uint32_t>(image->ysize()), ThreadPool::NoInit,
[&](const uint32_t task, size_t /*thread*/) {
const size_t y = static_cast<size_t>(task);
const float* JXL_RESTRICT row_srgb0 = srgb.ConstPlaneRow(0, y);
const float* JXL_RESTRICT row_srgb1 = srgb.ConstPlaneRow(1, y);
const float* JXL_RESTRICT row_srgb2 = srgb.ConstPlaneRow(2, y);
float* JXL_RESTRICT row_xyb0 = xyb->PlaneRow(0, y);
float* JXL_RESTRICT row_xyb1 = xyb->PlaneRow(1, y);
float* JXL_RESTRICT row_xyb2 = xyb->PlaneRow(2, y);
float* JXL_RESTRICT row0 = image->PlaneRow(0, y);
float* JXL_RESTRICT row1 = image->PlaneRow(1, y);
float* JXL_RESTRICT row2 = image->PlaneRow(2, y);
for (size_t x = 0; x < xsize; x += Lanes(d)) {
const auto in_r = LinearFromSRGB(Load(d, row_srgb0 + x));
const auto in_g = LinearFromSRGB(Load(d, row_srgb1 + x));
const auto in_b = LinearFromSRGB(Load(d, row_srgb2 + x));
LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row_xyb0 + x,
row_xyb1 + x, row_xyb2 + x);
const auto in_r = LinearFromSRGB(Load(d, row0 + x));
const auto in_g = LinearFromSRGB(Load(d, row1 + x));
const auto in_b = LinearFromSRGB(Load(d, row2 + x));
LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row0 + x, row1 + x,
row2 + x);
}
},
"SRGBToXYB");
}
Status SRGBToXYBAndLinear(const Image3F& srgb,
const float* JXL_RESTRICT premul_absorb,
ThreadPool* pool, Image3F* JXL_RESTRICT xyb,
Status SRGBToXYBAndLinear(const float* JXL_RESTRICT premul_absorb,
ThreadPool* pool, Image3F* JXL_RESTRICT image,
Image3F* JXL_RESTRICT linear) {
const size_t xsize = srgb.xsize();
const size_t xsize = image->xsize();
const HWY_FULL(float) d;
return RunOnPool(
pool, 0, static_cast<uint32_t>(srgb.ysize()), ThreadPool::NoInit,
pool, 0, static_cast<uint32_t>(image->ysize()), ThreadPool::NoInit,
[&](const uint32_t task, size_t /*thread*/) {
const size_t y = static_cast<size_t>(task);
const float* JXL_RESTRICT row_srgb0 = srgb.ConstPlaneRow(0, y);
const float* JXL_RESTRICT row_srgb1 = srgb.ConstPlaneRow(1, y);
const float* JXL_RESTRICT row_srgb2 = srgb.ConstPlaneRow(2, y);
float* JXL_RESTRICT row_image0 = image->PlaneRow(0, y);
float* JXL_RESTRICT row_image1 = image->PlaneRow(1, y);
float* JXL_RESTRICT row_image2 = image->PlaneRow(2, y);
float* JXL_RESTRICT row_linear0 = linear->PlaneRow(0, y);
float* JXL_RESTRICT row_linear1 = linear->PlaneRow(1, y);
float* JXL_RESTRICT row_linear2 = linear->PlaneRow(2, y);
float* JXL_RESTRICT row_xyb0 = xyb->PlaneRow(0, y);
float* JXL_RESTRICT row_xyb1 = xyb->PlaneRow(1, y);
float* JXL_RESTRICT row_xyb2 = xyb->PlaneRow(2, y);
for (size_t x = 0; x < xsize; x += Lanes(d)) {
const auto in_r = LinearFromSRGB(Load(d, row_srgb0 + x));
const auto in_g = LinearFromSRGB(Load(d, row_srgb1 + x));
const auto in_b = LinearFromSRGB(Load(d, row_srgb2 + x));
const auto in_r = LinearFromSRGB(Load(d, row_image0 + x));
const auto in_g = LinearFromSRGB(Load(d, row_image1 + x));
const auto in_b = LinearFromSRGB(Load(d, row_image2 + x));
Store(in_r, d, row_linear0 + x);
Store(in_g, d, row_linear1 + x);
Store(in_b, d, row_linear2 + x);
LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row_xyb0 + x,
row_xyb1 + x, row_xyb2 + x);
LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row_image0 + x,
row_image1 + x, row_image2 + x);
}
},
"SRGBToXYBAndLinear");
@ -281,38 +268,13 @@ Image3F TransformToLinearRGB(const Image3F& in,
return out;
}
void Image3FToXYB(const Image3F& in, const ColorEncoding& color_encoding,
float intensity_target, ThreadPool* pool,
Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms) {
JXL_ASSERT(SameSize(in, *xyb));
const HWY_FULL(float) d;
// Pre-broadcasted constants
HWY_ALIGN float premul_absorb[MaxLanes(d) * 12];
ComputePremulAbsorb(intensity_target, premul_absorb);
bool is_gray = color_encoding.IsGray();
const ColorEncoding& c_linear_srgb = ColorEncoding::LinearSRGB(is_gray);
if (c_linear_srgb.SameColorEncoding(color_encoding)) {
JXL_CHECK(LinearSRGBToXYB(in, premul_absorb, pool, xyb));
} else if (color_encoding.IsSRGB()) {
JXL_CHECK(SRGBToXYB(in, premul_absorb, pool, xyb));
} else {
Image3F linear =
TransformToLinearRGB(in, color_encoding, intensity_target, cms, pool);
JXL_CHECK(LinearSRGBToXYB(linear, premul_absorb, pool, xyb));
}
}
// This is different from Butteraugli's OpsinDynamicsImage() in the sense that
// it does not contain a sensitivity multiplier based on the blurred image.
void ToXYB(const Image3F& color, const ColorEncoding& c_current,
float intensity_target, const ImageF* black, ThreadPool* pool,
Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms,
Image3F* const JXL_RESTRICT linear) {
JXL_ASSERT(SameSize(color, *xyb));
if (black) JXL_ASSERT(SameSize(color, *black));
if (linear) JXL_ASSERT(SameSize(color, *linear));
void ToXYB(const ColorEncoding& c_current, float intensity_target,
const ImageF* black, ThreadPool* pool, Image3F* JXL_RESTRICT image,
const JxlCmsInterface& cms, Image3F* const JXL_RESTRICT linear) {
if (black) JXL_ASSERT(SameSize(*image, *black));
if (linear) JXL_ASSERT(SameSize(*image, *linear));
const HWY_FULL(float) d;
// Pre-broadcasted constants
@ -326,12 +288,12 @@ void ToXYB(const Image3F& color, const ColorEncoding& c_current,
// Linear sRGB inputs are rare but can be useful for the fastest encoders, for
// which undoing the sRGB transfer function would be a large part of the cost.
if (c_linear_srgb.SameColorEncoding(c_current)) {
JXL_CHECK(LinearSRGBToXYB(color, premul_absorb, pool, xyb));
// This only happens if kitten or slower, moving ImageBundle might be
// possible but the encoder is much slower than this copy.
if (want_linear) {
CopyImageTo(color, linear);
CopyImageTo(*image, linear);
}
JXL_CHECK(LinearSRGBToXYB(premul_absorb, pool, image));
return;
}
@ -340,31 +302,20 @@ void ToXYB(const Image3F& color, const ColorEncoding& c_current,
// Common case: can avoid allocating/copying
if (want_linear) {
// Slow encoder also wants linear sRGB.
JXL_CHECK(SRGBToXYBAndLinear(color, premul_absorb, pool, xyb, linear));
JXL_CHECK(SRGBToXYBAndLinear(premul_absorb, pool, image, linear));
} else {
JXL_CHECK(SRGBToXYB(color, premul_absorb, pool, xyb));
JXL_CHECK(SRGBToXYB(premul_absorb, pool, image));
}
return;
}
// General case: not sRGB, need color transform.
Image3F linear_storage; // Local storage only used if !want_linear.
Image3F* linear_storage_ptr;
JXL_CHECK(ApplyColorTransform(c_current, intensity_target, *image, black,
Rect(*image), c_linear_srgb, cms, pool,
want_linear ? linear : image));
if (want_linear) {
// Caller asked for linear, use that storage directly.
linear_storage_ptr = linear;
} else {
// Caller didn't ask for linear, create our own local storage
// OK to reuse metadata, it will not be changed.
linear_storage = Image3F(color.xsize(), color.ysize());
linear_storage_ptr = &linear_storage;
CopyImageTo(*linear, image);
}
JXL_CHECK(ApplyColorTransform(c_current, intensity_target, color, black,
Rect(color), c_linear_srgb, cms, pool,
linear_storage_ptr));
JXL_CHECK(LinearSRGBToXYB(*linear_storage_ptr, premul_absorb, pool, xyb));
JXL_CHECK(LinearSRGBToXYB(premul_absorb, pool, image));
}
// Transform RGB to YCbCr.
@ -436,17 +387,18 @@ HWY_AFTER_NAMESPACE();
#if HWY_ONCE
namespace jxl {
HWY_EXPORT(ToXYB);
void ToXYB(const Image3F& color, const ColorEncoding& c_current,
float intensity_target, const ImageF* black, ThreadPool* pool,
Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms,
Image3F* const JXL_RESTRICT linear) {
void ToXYB(const ColorEncoding& c_current, float intensity_target,
const ImageF* black, ThreadPool* pool, Image3F* JXL_RESTRICT image,
const JxlCmsInterface& cms, Image3F* const JXL_RESTRICT linear) {
HWY_DYNAMIC_DISPATCH(ToXYB)
(color, c_current, intensity_target, black, pool, xyb, cms, linear);
(c_current, intensity_target, black, pool, image, cms, linear);
}
void ToXYB(const ImageBundle& in, ThreadPool* pool, Image3F* JXL_RESTRICT xyb,
const JxlCmsInterface& cms, Image3F* JXL_RESTRICT linear) {
ToXYB(in.color(), in.c_current(), in.metadata()->IntensityTarget(),
*xyb = Image3F(in.xsize(), in.ysize());
CopyImageTo(in.color(), xyb);
ToXYB(in.c_current(), in.metadata()->IntensityTarget(),
in.HasBlack() ? &in.black() : nullptr, pool, xyb, cms, linear);
}
@ -484,14 +436,6 @@ void ScaleXYB(Image3F* opsin) {
}
}
HWY_EXPORT(Image3FToXYB);
void Image3FToXYB(const Image3F& in, const ColorEncoding& color_encoding,
float intensity_target, ThreadPool* pool,
Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms) {
return HWY_DYNAMIC_DISPATCH(Image3FToXYB)(in, color_encoding,
intensity_target, pool, xyb, cms);
}
HWY_EXPORT(RgbToYcbcr);
Status RgbToYcbcr(const ImageF& r_plane, const ImageF& g_plane,
const ImageF& b_plane, ImageF* y_plane, ImageF* cb_plane,

View file

@ -10,29 +10,26 @@
#include <jxl/cms_interface.h>
#include <cstddef>
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/enc_bit_writer.h"
#include "lib/jxl/color_encoding_internal.h"
#include "lib/jxl/image.h"
#include "lib/jxl/image_bundle.h"
namespace jxl {
// Converts any color space to XYB. If `linear` is not null, fills it with a
// linear sRGB copy of `in`.
void ToXYB(const Image3F& color, const ColorEncoding& c_current,
float intensity_target, const ImageF* black, ThreadPool* pool,
Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms,
Image3F* const JXL_RESTRICT linear);
// Converts any color space to XYB in-place. If `linear` is not null, fills it
// with a linear sRGB copy of `image`.
void ToXYB(const ColorEncoding& c_current, float intensity_target,
const ImageF* black, ThreadPool* pool, Image3F* JXL_RESTRICT image,
const JxlCmsInterface& cms, Image3F* JXL_RESTRICT linear);
void ToXYB(const ImageBundle& in, ThreadPool* pool, Image3F* JXL_RESTRICT xyb,
const JxlCmsInterface& cms, Image3F* JXL_RESTRICT linear = nullptr);
void Image3FToXYB(const Image3F& in, const ColorEncoding& color_encoding,
float intensity_target, ThreadPool* pool,
Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms);
void LinearRGBRowToXYB(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1,
float* JXL_RESTRICT row2,
const float* JXL_RESTRICT premul_absorb, size_t xsize);

View file

@ -8,6 +8,7 @@
#include <jxl/codestream_header.h>
#include <jxl/encode.h>
#include <jxl/types.h>
#include <jxl/version.h>
#include <algorithm>
#include <cstddef>
@ -1641,10 +1642,12 @@ JxlEncoderStatus JxlEncoderFrameSettingsSetOption(
frame_settings->values.cparams.responsive = value;
break;
case JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC:
frame_settings->values.cparams.progressive_mode = value;
frame_settings->values.cparams.progressive_mode =
static_cast<jxl::Override>(value);
break;
case JXL_ENC_FRAME_SETTING_QPROGRESSIVE_AC:
frame_settings->values.cparams.qprogressive_mode = value;
frame_settings->values.cparams.qprogressive_mode =
static_cast<jxl::Override>(value);
break;
case JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC:
if (value < -1 || value > 2) {
@ -1671,7 +1674,6 @@ JxlEncoderStatus JxlEncoderFrameSettingsSetOption(
// alternatively, in the cjxl binary like now)
frame_settings->values.cparams.lossy_palette = (value == 1);
break;
return JXL_ENC_SUCCESS;
case JXL_ENC_FRAME_SETTING_COLOR_TRANSFORM:
if (value < -1 || value > 2) {
return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
@ -1760,6 +1762,13 @@ JxlEncoderStatus JxlEncoderFrameSettingsSetOption(
case JXL_ENC_FRAME_SETTING_JPEG_KEEP_JUMBF:
frame_settings->values.cparams.jpeg_keep_jumbf = value;
break;
case JXL_ENC_FRAME_SETTING_USE_FULL_IMAGE_HEURISTICS:
if (value < 0 || value > 1) {
return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED,
"Option value has to be 0 or 1");
}
frame_settings->values.cparams.use_full_image_heuristics = value;
break;
default:
return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED,
@ -1855,6 +1864,7 @@ JxlEncoderStatus JxlEncoderFrameSettingsSetFloatOption(
case JXL_ENC_FRAME_SETTING_JPEG_KEEP_EXIF:
case JXL_ENC_FRAME_SETTING_JPEG_KEEP_XMP:
case JXL_ENC_FRAME_SETTING_JPEG_KEEP_JUMBF:
case JXL_ENC_FRAME_SETTING_USE_FULL_IMAGE_HEURISTICS:
return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED,
"Int option, try setting it with "
"JxlEncoderFrameSettingsSetOption");

View file

@ -7,17 +7,21 @@
#ifndef LIB_JXL_ENCODE_INTERNAL_H_
#define LIB_JXL_ENCODE_INTERNAL_H_
#include <jxl/cms_interface.h>
#include <jxl/codestream_header.h>
#include <jxl/encode.h>
#include <jxl/memory_manager.h>
#include <jxl/parallel_runner.h>
#include <jxl/types.h>
#include <sys/types.h>
#include <algorithm>
#include <array>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
@ -29,6 +33,7 @@
#include "lib/jxl/enc_aux_out.h"
#include "lib/jxl/enc_fast_lossless.h"
#include "lib/jxl/enc_params.h"
#include "lib/jxl/image_metadata.h"
#include "lib/jxl/jpeg/jpeg_data.h"
#include "lib/jxl/memory_manager_internal.h"
#include "lib/jxl/padded_bytes.h"

View file

@ -6,14 +6,18 @@
#include <jxl/cms.h>
#include <jxl/cms_interface.h>
#include <jxl/codestream_header.h>
#include <jxl/color_encoding.h>
#include <jxl/decode.h>
#include <jxl/decode_cxx.h>
#include <jxl/encode.h>
#include <jxl/encode_cxx.h>
#include <jxl/memory_manager.h>
#include <jxl/types.h>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <mutex>
#include <ostream>
@ -23,16 +27,18 @@
#include <utility>
#include <vector>
#include "jxl/types.h"
#include "lib/extras/codec.h"
#include "lib/extras/dec/jxl.h"
#include "lib/extras/metrics.h"
#include "lib/extras/packed_image.h"
#include "lib/jxl/base/byte_order.h"
#include "lib/jxl/base/c_callback_support.h"
#include "lib/jxl/base/span.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/common.h" // JXL_HIGH_PRECISION
#include "lib/jxl/enc_params.h"
#include "lib/jxl/encode_internal.h"
#include "lib/jxl/jpeg/dec_jpeg_data.h"
#include "lib/jxl/jpeg/dec_jpeg_data_writer.h"
#include "lib/jxl/modular/options.h"
#include "lib/jxl/test_image.h"
#include "lib/jxl/test_utils.h"
#include "lib/jxl/testing.h"
@ -516,7 +522,7 @@ TEST(EncodeTest, frame_settingsTest) {
VerifyFrameEncoding(63, 129, enc.get(), frame_settings, 2830,
/*lossy_use_original_profile=*/false);
EXPECT_EQ(false, enc->last_used_cparams.responsive);
EXPECT_EQ(true, enc->last_used_cparams.progressive_mode);
EXPECT_EQ(jxl::Override::kOn, enc->last_used_cparams.progressive_mode);
EXPECT_EQ(2, enc->last_used_cparams.progressive_dc);
}
@ -979,7 +985,7 @@ TEST(EncodeTest, JXL_TRANSCODE_JPEG_TEST(ProgressiveJPEGReconstructionTest)) {
JxlEncoderFrameSettings* frame_settings =
JxlEncoderFrameSettingsCreate(enc.get(), NULL);
frame_settings->values.cparams.progressive_mode = true;
frame_settings->values.cparams.progressive_mode = jxl::Override::kOn;
EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderStoreJPEGMetadata(enc.get(), JXL_TRUE));
EXPECT_EQ(JXL_ENC_SUCCESS,
@ -1774,7 +1780,7 @@ class EncoderStreamingTest : public testing::TestWithParam<StreamingTestParam> {
static void SetupEncoder(JxlEncoderFrameSettings* frame_settings,
const StreamingTestParam& p,
const JxlBasicInfo& basic_info,
size_t number_extra_channels) {
size_t number_extra_channels, bool streaming) {
JxlEncoderStruct* enc = frame_settings->enc;
EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc, &basic_info));
if (p.fast_lossless()) {
@ -1788,9 +1794,14 @@ class EncoderStreamingTest : public testing::TestWithParam<StreamingTestParam> {
JxlColorEncodingSetToSRGB(&color_encoding, /*is_gray=*/false);
EXPECT_EQ(JXL_ENC_SUCCESS,
JxlEncoderSetColorEncoding(enc, &color_encoding));
EXPECT_EQ(JXL_ENC_SUCCESS,
JxlEncoderFrameSettingsSetOption(frame_settings,
JXL_ENC_FRAME_SETTING_BUFFERING,
streaming ? 3 : 0));
EXPECT_EQ(JXL_ENC_SUCCESS,
JxlEncoderFrameSettingsSetOption(
frame_settings, JXL_ENC_FRAME_SETTING_BUFFERING, 3));
frame_settings,
JXL_ENC_FRAME_SETTING_USE_FULL_IMAGE_HEURISTICS, 0));
if (p.use_container()) {
EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc, 10));
}
@ -1875,7 +1886,7 @@ TEST_P(EncoderStreamingTest, OutputCallback) {
ASSERT_NE(nullptr, enc.get());
JxlEncoderFrameSettings* frame_settings =
JxlEncoderFrameSettingsCreate(enc.get(), NULL);
SetupEncoder(frame_settings, p, basic_info, number_extra_channels);
SetupEncoder(frame_settings, p, basic_info, number_extra_channels, false);
SetupInputNonStreaming(frame_settings, p, number_extra_channels, frame,
ec_frame);
uint8_t* next_out = compressed.data();
@ -1890,7 +1901,7 @@ TEST_P(EncoderStreamingTest, OutputCallback) {
ASSERT_NE(nullptr, enc.get());
JxlEncoderFrameSettings* frame_settings =
JxlEncoderFrameSettingsCreate(enc.get(), NULL);
SetupEncoder(frame_settings, p, basic_info, number_extra_channels);
SetupEncoder(frame_settings, p, basic_info, number_extra_channels, true);
SetupInputNonStreaming(frame_settings, p, number_extra_channels, frame,
ec_frame);
JxlStreamingAdapter streaming_adapter(enc.get(), p.return_large_buffers(),
@ -1901,7 +1912,7 @@ TEST_P(EncoderStreamingTest, OutputCallback) {
}
EXPECT_TRUE(SameDecodedPixels(compressed, streaming_compressed));
EXPECT_LE(streaming_compressed.size(), compressed.size() + 16);
EXPECT_LE(streaming_compressed.size(), compressed.size() + 1024);
}
TEST_P(EncoderStreamingTest, ChunkedFrame) {
@ -1928,7 +1939,7 @@ TEST_P(EncoderStreamingTest, ChunkedFrame) {
ASSERT_NE(nullptr, enc.get());
JxlEncoderFrameSettings* frame_settings =
JxlEncoderFrameSettingsCreate(enc.get(), NULL);
SetupEncoder(frame_settings, p, basic_info, number_extra_channels);
SetupEncoder(frame_settings, p, basic_info, number_extra_channels, false);
SetupInputNonStreaming(frame_settings, p, number_extra_channels, frame,
ec_frame);
uint8_t* next_out = compressed.data();
@ -1942,7 +1953,7 @@ TEST_P(EncoderStreamingTest, ChunkedFrame) {
ASSERT_NE(nullptr, enc.get());
JxlEncoderFrameSettings* frame_settings =
JxlEncoderFrameSettingsCreate(enc.get(), NULL);
SetupEncoder(frame_settings, p, basic_info, number_extra_channels);
SetupEncoder(frame_settings, p, basic_info, number_extra_channels, true);
SetupInputStreaming(frame_settings, p, number_extra_channels, frame,
ec_frame);
uint8_t* next_out = streaming_compressed.data();
@ -1951,7 +1962,7 @@ TEST_P(EncoderStreamingTest, ChunkedFrame) {
}
EXPECT_TRUE(SameDecodedPixels(compressed, streaming_compressed));
EXPECT_LE(streaming_compressed.size(), compressed.size() + 16);
EXPECT_LE(streaming_compressed.size(), compressed.size() + 1024);
}
TEST_P(EncoderStreamingTest, ChunkedAndOutputCallback) {
@ -1978,7 +1989,7 @@ TEST_P(EncoderStreamingTest, ChunkedAndOutputCallback) {
ASSERT_NE(nullptr, enc.get());
JxlEncoderFrameSettings* frame_settings =
JxlEncoderFrameSettingsCreate(enc.get(), NULL);
SetupEncoder(frame_settings, p, basic_info, number_extra_channels);
SetupEncoder(frame_settings, p, basic_info, number_extra_channels, false);
SetupInputNonStreaming(frame_settings, p, number_extra_channels, frame,
ec_frame);
uint8_t* next_out = compressed.data();
@ -1993,7 +2004,7 @@ TEST_P(EncoderStreamingTest, ChunkedAndOutputCallback) {
ASSERT_NE(nullptr, enc.get());
JxlEncoderFrameSettings* frame_settings =
JxlEncoderFrameSettingsCreate(enc.get(), NULL);
SetupEncoder(frame_settings, p, basic_info, number_extra_channels);
SetupEncoder(frame_settings, p, basic_info, number_extra_channels, true);
JxlStreamingAdapter streaming_adapter =
JxlStreamingAdapter(enc.get(), p.return_large_buffers(), p.can_seek());
SetupInputStreaming(frame_settings, p, number_extra_channels, frame,
@ -2003,7 +2014,7 @@ TEST_P(EncoderStreamingTest, ChunkedAndOutputCallback) {
}
EXPECT_TRUE(SameDecodedPixels(compressed, streaming_compressed));
EXPECT_LE(streaming_compressed.size(), compressed.size() + 16);
EXPECT_LE(streaming_compressed.size(), compressed.size() + 1024);
}
JXL_GTEST_INSTANTIATE_TEST_SUITE_P(

View file

@ -368,8 +368,7 @@ Status FrameHeader::VisitFields(Visitor* JXL_RESTRICT visitor) {
JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&animation_frame));
}
JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(true, &is_last));
}
if (frame_type != FrameType::kRegularFrame) {
} else {
is_last = false;
}

View file

@ -1,623 +0,0 @@
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "lib/jxl/gauss_blur.h"
#include <string.h>
#include <algorithm>
#include <cmath>
#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "lib/jxl/gauss_blur.cc"
#include <hwy/cache_control.h>
#include <hwy/foreach_target.h>
#include <hwy/highway.h>
#include "lib/jxl/base/common.h"
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/matrix_ops.h"
#include "lib/jxl/image_ops.h"
HWY_BEFORE_NAMESPACE();
namespace jxl {
namespace HWY_NAMESPACE {
// These templates are not found via ADL.
using hwy::HWY_NAMESPACE::Add;
using hwy::HWY_NAMESPACE::Broadcast;
using hwy::HWY_NAMESPACE::GetLane;
using hwy::HWY_NAMESPACE::Mul;
using hwy::HWY_NAMESPACE::MulAdd;
using hwy::HWY_NAMESPACE::NegMulSub;
#if HWY_TARGET != HWY_SCALAR
using hwy::HWY_NAMESPACE::ShiftLeftLanes;
#endif
using hwy::HWY_NAMESPACE::Vec;
void FastGaussian1D(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
const float* JXL_RESTRICT in, intptr_t width,
float* JXL_RESTRICT out) {
// Although the current output depends on the previous output, we can unroll
// up to 4x by precomputing up to fourth powers of the constants. Beyond that,
// numerical precision might become a problem. Macro because this is tested
// in #if alongside HWY_TARGET.
#define JXL_GAUSS_MAX_LANES 4
using D = HWY_CAPPED(float, JXL_GAUSS_MAX_LANES);
using V = Vec<D>;
const D d;
const V mul_in_1 = Load(d, rg->mul_in + 0 * 4);
const V mul_in_3 = Load(d, rg->mul_in + 1 * 4);
const V mul_in_5 = Load(d, rg->mul_in + 2 * 4);
const V mul_prev_1 = Load(d, rg->mul_prev + 0 * 4);
const V mul_prev_3 = Load(d, rg->mul_prev + 1 * 4);
const V mul_prev_5 = Load(d, rg->mul_prev + 2 * 4);
const V mul_prev2_1 = Load(d, rg->mul_prev2 + 0 * 4);
const V mul_prev2_3 = Load(d, rg->mul_prev2 + 1 * 4);
const V mul_prev2_5 = Load(d, rg->mul_prev2 + 2 * 4);
V prev_1 = Zero(d);
V prev_3 = Zero(d);
V prev_5 = Zero(d);
V prev2_1 = Zero(d);
V prev2_3 = Zero(d);
V prev2_5 = Zero(d);
const intptr_t N = rg->radius;
intptr_t n = -N + 1;
// Left side with bounds checks and only write output after n >= 0.
const intptr_t first_aligned = RoundUpTo(N + 1, Lanes(d));
for (; n < std::min(first_aligned, width); ++n) {
const intptr_t left = n - N - 1;
const intptr_t right = n + N - 1;
const float left_val = left >= 0 ? in[left] : 0.0f;
const float right_val = right < width ? in[right] : 0.0f;
const V sum = Set(d, left_val + right_val);
// (Only processing a single lane here, no need to broadcast)
V out_1 = Mul(sum, mul_in_1);
V out_3 = Mul(sum, mul_in_3);
V out_5 = Mul(sum, mul_in_5);
out_1 = MulAdd(mul_prev2_1, prev2_1, out_1);
out_3 = MulAdd(mul_prev2_3, prev2_3, out_3);
out_5 = MulAdd(mul_prev2_5, prev2_5, out_5);
prev2_1 = prev_1;
prev2_3 = prev_3;
prev2_5 = prev_5;
out_1 = MulAdd(mul_prev_1, prev_1, out_1);
out_3 = MulAdd(mul_prev_3, prev_3, out_3);
out_5 = MulAdd(mul_prev_5, prev_5, out_5);
prev_1 = out_1;
prev_3 = out_3;
prev_5 = out_5;
if (n >= 0) {
out[n] = GetLane(Add(out_1, Add(out_3, out_5)));
}
}
// The above loop is effectively scalar but it is convenient to use the same
// prev/prev2 variables, so broadcast to each lane before the unrolled loop.
#if HWY_TARGET != HWY_SCALAR && JXL_GAUSS_MAX_LANES > 1
prev2_1 = Broadcast<0>(prev2_1);
prev2_3 = Broadcast<0>(prev2_3);
prev2_5 = Broadcast<0>(prev2_5);
prev_1 = Broadcast<0>(prev_1);
prev_3 = Broadcast<0>(prev_3);
prev_5 = Broadcast<0>(prev_5);
#endif
// Unrolled, no bounds checking needed.
for (; n < width - N + 1 - (JXL_GAUSS_MAX_LANES - 1); n += Lanes(d)) {
const V sum = Add(LoadU(d, in + n - N - 1), LoadU(d, in + n + N - 1));
// To get a vector of output(s), we multiply broadcasted vectors (of each
// input plus the two previous outputs) and add them all together.
// Incremental broadcasting and shifting is expected to be cheaper than
// horizontal adds or transposing 4x4 values because they run on a different
// port, concurrently with the FMA.
const V in0 = Broadcast<0>(sum);
V out_1 = Mul(in0, mul_in_1);
V out_3 = Mul(in0, mul_in_3);
V out_5 = Mul(in0, mul_in_5);
#if HWY_TARGET != HWY_SCALAR && JXL_GAUSS_MAX_LANES >= 2
const V in1 = Broadcast<1>(sum);
out_1 = MulAdd(ShiftLeftLanes<1>(mul_in_1), in1, out_1);
out_3 = MulAdd(ShiftLeftLanes<1>(mul_in_3), in1, out_3);
out_5 = MulAdd(ShiftLeftLanes<1>(mul_in_5), in1, out_5);
#if JXL_GAUSS_MAX_LANES >= 4
const V in2 = Broadcast<2>(sum);
out_1 = MulAdd(ShiftLeftLanes<2>(mul_in_1), in2, out_1);
out_3 = MulAdd(ShiftLeftLanes<2>(mul_in_3), in2, out_3);
out_5 = MulAdd(ShiftLeftLanes<2>(mul_in_5), in2, out_5);
const V in3 = Broadcast<3>(sum);
out_1 = MulAdd(ShiftLeftLanes<3>(mul_in_1), in3, out_1);
out_3 = MulAdd(ShiftLeftLanes<3>(mul_in_3), in3, out_3);
out_5 = MulAdd(ShiftLeftLanes<3>(mul_in_5), in3, out_5);
#endif
#endif
out_1 = MulAdd(mul_prev2_1, prev2_1, out_1);
out_3 = MulAdd(mul_prev2_3, prev2_3, out_3);
out_5 = MulAdd(mul_prev2_5, prev2_5, out_5);
out_1 = MulAdd(mul_prev_1, prev_1, out_1);
out_3 = MulAdd(mul_prev_3, prev_3, out_3);
out_5 = MulAdd(mul_prev_5, prev_5, out_5);
#if HWY_TARGET == HWY_SCALAR || JXL_GAUSS_MAX_LANES == 1
prev2_1 = prev_1;
prev2_3 = prev_3;
prev2_5 = prev_5;
prev_1 = out_1;
prev_3 = out_3;
prev_5 = out_5;
#else
prev2_1 = Broadcast<JXL_GAUSS_MAX_LANES - 2>(out_1);
prev2_3 = Broadcast<JXL_GAUSS_MAX_LANES - 2>(out_3);
prev2_5 = Broadcast<JXL_GAUSS_MAX_LANES - 2>(out_5);
prev_1 = Broadcast<JXL_GAUSS_MAX_LANES - 1>(out_1);
prev_3 = Broadcast<JXL_GAUSS_MAX_LANES - 1>(out_3);
prev_5 = Broadcast<JXL_GAUSS_MAX_LANES - 1>(out_5);
#endif
Store(Add(out_1, Add(out_3, out_5)), d, out + n);
}
// Remainder handling with bounds checks
for (; n < width; ++n) {
const intptr_t left = n - N - 1;
const intptr_t right = n + N - 1;
const float left_val = left >= 0 ? in[left] : 0.0f;
const float right_val = right < width ? in[right] : 0.0f;
const V sum = Set(d, left_val + right_val);
// (Only processing a single lane here, no need to broadcast)
V out_1 = Mul(sum, mul_in_1);
V out_3 = Mul(sum, mul_in_3);
V out_5 = Mul(sum, mul_in_5);
out_1 = MulAdd(mul_prev2_1, prev2_1, out_1);
out_3 = MulAdd(mul_prev2_3, prev2_3, out_3);
out_5 = MulAdd(mul_prev2_5, prev2_5, out_5);
prev2_1 = prev_1;
prev2_3 = prev_3;
prev2_5 = prev_5;
out_1 = MulAdd(mul_prev_1, prev_1, out_1);
out_3 = MulAdd(mul_prev_3, prev_3, out_3);
out_5 = MulAdd(mul_prev_5, prev_5, out_5);
prev_1 = out_1;
prev_3 = out_3;
prev_5 = out_5;
out[n] = GetLane(Add(out_1, Add(out_3, out_5)));
}
}
// Ring buffer is for n, n-1, n-2; round up to 4 for faster modulo.
constexpr size_t kMod = 4;
// Avoids an unnecessary store during warmup.
struct OutputNone {
template <class V>
void operator()(const V& /*unused*/, float* JXL_RESTRICT /*pos*/,
ptrdiff_t /*offset*/) const {}
};
// Common case: write output vectors in all VerticalBlock except warmup.
struct OutputStore {
template <class V>
void operator()(const V& out, float* JXL_RESTRICT pos,
ptrdiff_t offset) const {
// Stream helps for large images but is slower for images that fit in cache.
const HWY_FULL(float) df;
Store(out, df, pos + offset);
}
};
// At top/bottom borders, we don't have two inputs to load, so avoid addition.
// pos may even point to all zeros if the row is outside the input image.
class SingleInput {
public:
explicit SingleInput(const float* pos) : pos_(pos) {}
Vec<HWY_FULL(float)> operator()(const size_t offset) const {
const HWY_FULL(float) df;
return Load(df, pos_ + offset);
}
const float* pos_;
};
// In the middle of the image, we need to load from a row above and below, and
// return the sum.
class TwoInputs {
public:
TwoInputs(const float* pos1, const float* pos2) : pos1_(pos1), pos2_(pos2) {}
Vec<HWY_FULL(float)> operator()(const size_t offset) const {
const HWY_FULL(float) df;
const auto in1 = Load(df, pos1_ + offset);
const auto in2 = Load(df, pos2_ + offset);
return Add(in1, in2);
}
private:
const float* pos1_;
const float* pos2_;
};
// Block := kVectors consecutive full vectors (one cache line except on the
// right boundary, where we can only rely on having one vector). Unrolling to
// the cache line size improves cache utilization.
template <size_t kVectors, class V, class Input, class Output>
void VerticalBlock(const V& d1_1, const V& d1_3, const V& d1_5, const V& n2_1,
const V& n2_3, const V& n2_5, const Input& input,
size_t& ctr, float* ring_buffer, const Output output,
float* JXL_RESTRICT out_pos) {
const HWY_FULL(float) d;
constexpr size_t kVN = MaxLanes(d);
// More cache-friendly to process an entirely cache line at a time
constexpr size_t kLanes = kVectors * kVN;
float* JXL_RESTRICT y_1 = ring_buffer + 0 * kLanes * kMod;
float* JXL_RESTRICT y_3 = ring_buffer + 1 * kLanes * kMod;
float* JXL_RESTRICT y_5 = ring_buffer + 2 * kLanes * kMod;
const size_t n_0 = (++ctr) % kMod;
const size_t n_1 = (ctr - 1) % kMod;
const size_t n_2 = (ctr - 2) % kMod;
for (size_t idx_vec = 0; idx_vec < kVectors; ++idx_vec) {
const V sum = input(idx_vec * kVN);
const V y_n1_1 = Load(d, y_1 + kLanes * n_1 + idx_vec * kVN);
const V y_n1_3 = Load(d, y_3 + kLanes * n_1 + idx_vec * kVN);
const V y_n1_5 = Load(d, y_5 + kLanes * n_1 + idx_vec * kVN);
const V y_n2_1 = Load(d, y_1 + kLanes * n_2 + idx_vec * kVN);
const V y_n2_3 = Load(d, y_3 + kLanes * n_2 + idx_vec * kVN);
const V y_n2_5 = Load(d, y_5 + kLanes * n_2 + idx_vec * kVN);
// (35)
const V y1 = MulAdd(n2_1, sum, NegMulSub(d1_1, y_n1_1, y_n2_1));
const V y3 = MulAdd(n2_3, sum, NegMulSub(d1_3, y_n1_3, y_n2_3));
const V y5 = MulAdd(n2_5, sum, NegMulSub(d1_5, y_n1_5, y_n2_5));
Store(y1, d, y_1 + kLanes * n_0 + idx_vec * kVN);
Store(y3, d, y_3 + kLanes * n_0 + idx_vec * kVN);
Store(y5, d, y_5 + kLanes * n_0 + idx_vec * kVN);
output(Add(y1, Add(y3, y5)), out_pos, idx_vec * kVN);
}
// NOTE: flushing cache line out_pos hurts performance - less so with
// clflushopt than clflush but still a significant slowdown.
}
// Reads/writes one block (kVectors full vectors) in each row.
template <size_t kVectors>
void VerticalStrip(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
const ImageF& in, const size_t x, ImageF* JXL_RESTRICT out) {
// We're iterating vertically, so use multiple full-length vectors (each lane
// is one column of row n).
using D = HWY_FULL(float);
using V = Vec<D>;
const D d;
constexpr size_t kVN = MaxLanes(d);
// More cache-friendly to process an entirely cache line at a time
constexpr size_t kLanes = kVectors * kVN;
#if HWY_TARGET == HWY_SCALAR
const V d1_1 = Set(d, rg->d1[0 * 4]);
const V d1_3 = Set(d, rg->d1[1 * 4]);
const V d1_5 = Set(d, rg->d1[2 * 4]);
const V n2_1 = Set(d, rg->n2[0 * 4]);
const V n2_3 = Set(d, rg->n2[1 * 4]);
const V n2_5 = Set(d, rg->n2[2 * 4]);
#else
const V d1_1 = LoadDup128(d, rg->d1 + 0 * 4);
const V d1_3 = LoadDup128(d, rg->d1 + 1 * 4);
const V d1_5 = LoadDup128(d, rg->d1 + 2 * 4);
const V n2_1 = LoadDup128(d, rg->n2 + 0 * 4);
const V n2_3 = LoadDup128(d, rg->n2 + 1 * 4);
const V n2_5 = LoadDup128(d, rg->n2 + 2 * 4);
#endif
const size_t N = rg->radius;
const size_t ysize = in.ysize();
size_t ctr = 0;
HWY_ALIGN float ring_buffer[3 * kLanes * kMod] = {0};
HWY_ALIGN static constexpr float zero[kLanes] = {0};
// Warmup: top is out of bounds (zero padded), bottom is usually in-bounds.
ssize_t n = -static_cast<ssize_t>(N) + 1;
for (; n < 0; ++n) {
// bottom is always non-negative since n is initialized in -N + 1.
const size_t bottom = n + N - 1;
VerticalBlock<kVectors>(
d1_1, d1_3, d1_5, n2_1, n2_3, n2_5,
SingleInput(bottom < ysize ? in.ConstRow(bottom) + x : zero), ctr,
ring_buffer, OutputNone(), nullptr);
}
JXL_DASSERT(n >= 0);
// Start producing output; top is still out of bounds.
for (; static_cast<size_t>(n) < std::min(N + 1, ysize); ++n) {
const size_t bottom = n + N - 1;
VerticalBlock<kVectors>(
d1_1, d1_3, d1_5, n2_1, n2_3, n2_5,
SingleInput(bottom < ysize ? in.ConstRow(bottom) + x : zero), ctr,
ring_buffer, OutputStore(), out->Row(n) + x);
}
// Interior outputs with prefetching and without bounds checks.
constexpr size_t kPrefetchRows = 8;
for (; n < static_cast<ssize_t>(ysize - N + 1 - kPrefetchRows); ++n) {
const size_t top = n - N - 1;
const size_t bottom = n + N - 1;
VerticalBlock<kVectors>(
d1_1, d1_3, d1_5, n2_1, n2_3, n2_5,
TwoInputs(in.ConstRow(top) + x, in.ConstRow(bottom) + x), ctr,
ring_buffer, OutputStore(), out->Row(n) + x);
hwy::Prefetch(in.ConstRow(top + kPrefetchRows) + x);
hwy::Prefetch(in.ConstRow(bottom + kPrefetchRows) + x);
}
// Bottom border without prefetching and with bounds checks.
for (; static_cast<size_t>(n) < ysize; ++n) {
const size_t top = n - N - 1;
const size_t bottom = n + N - 1;
VerticalBlock<kVectors>(
d1_1, d1_3, d1_5, n2_1, n2_3, n2_5,
TwoInputs(in.ConstRow(top) + x,
bottom < ysize ? in.ConstRow(bottom) + x : zero),
ctr, ring_buffer, OutputStore(), out->Row(n) + x);
}
}
// Apply 1D vertical scan to multiple columns (one per vector lane).
// Not yet parallelized.
void FastGaussianVertical(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
const ImageF& in, ThreadPool* /*pool*/,
ImageF* JXL_RESTRICT out) {
JXL_CHECK(SameSize(in, *out));
const HWY_FULL(float) df;
constexpr size_t kCacheLineLanes = 64 / sizeof(float);
constexpr size_t kVN = MaxLanes(df);
constexpr size_t kCacheLineVectors =
(kVN < kCacheLineLanes) ? (kCacheLineLanes / kVN) : 4;
constexpr size_t kFastPace = kCacheLineVectors * kVN;
size_t x = 0;
for (; x + kFastPace <= in.xsize(); x += kFastPace) {
VerticalStrip<kCacheLineVectors>(rg, in, x, out);
}
for (; x < in.xsize(); x += kVN) {
VerticalStrip<1>(rg, in, x, out);
}
}
// TODO(veluca): consider replacing with FastGaussian.
ImageF ConvolveXSampleAndTranspose(const ImageF& in,
const std::vector<float>& kernel,
const size_t res) {
JXL_ASSERT(kernel.size() % 2 == 1);
JXL_ASSERT(in.xsize() % res == 0);
const size_t offset = res / 2;
const size_t out_xsize = in.xsize() / res;
ImageF out(in.ysize(), out_xsize);
const int r = kernel.size() / 2;
HWY_FULL(float) df;
std::vector<float> row_tmp(in.xsize() + 2 * r + Lanes(df));
float* const JXL_RESTRICT rowp = &row_tmp[r];
std::vector<float> padded_k = kernel;
padded_k.resize(padded_k.size() + Lanes(df));
const float* const kernelp = &padded_k[r];
for (size_t y = 0; y < in.ysize(); ++y) {
ExtrapolateBorders(in.Row(y), rowp, in.xsize(), r);
size_t x = offset, ox = 0;
for (; x < static_cast<uint32_t>(r) && x < in.xsize(); x += res, ++ox) {
float sum = 0.0f;
for (int i = -r; i <= r; ++i) {
sum += rowp[std::max<int>(
0, std::min<int>(static_cast<int>(x) + i, in.xsize()))] *
kernelp[i];
}
out.Row(ox)[y] = sum;
}
for (; x + r < in.xsize(); x += res, ++ox) {
auto sum = Zero(df);
for (int i = -r; i <= r; i += Lanes(df)) {
sum = MulAdd(LoadU(df, rowp + x + i), LoadU(df, kernelp + i), sum);
}
out.Row(ox)[y] = GetLane(SumOfLanes(df, sum));
}
for (; x < in.xsize(); x += res, ++ox) {
float sum = 0.0f;
for (int i = -r; i <= r; ++i) {
sum += rowp[std::max<int>(
0, std::min<int>(static_cast<int>(x) + i, in.xsize()))] *
kernelp[i];
}
out.Row(ox)[y] = sum;
}
}
return out;
}
// NOLINTNEXTLINE(google-readability-namespace-comments)
} // namespace HWY_NAMESPACE
} // namespace jxl
HWY_AFTER_NAMESPACE();
#if HWY_ONCE
namespace jxl {
HWY_EXPORT(FastGaussian1D);
HWY_EXPORT(ConvolveXSampleAndTranspose);
void FastGaussian1D(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
const float* JXL_RESTRICT in, intptr_t width,
float* JXL_RESTRICT out) {
return HWY_DYNAMIC_DISPATCH(FastGaussian1D)(rg, in, width, out);
}
HWY_EXPORT(FastGaussianVertical); // Local function.
void ExtrapolateBorders(const float* const JXL_RESTRICT row_in,
float* const JXL_RESTRICT row_out, const int xsize,
const int radius) {
const int lastcol = xsize - 1;
for (int x = 1; x <= radius; ++x) {
row_out[-x] = row_in[std::min(x, xsize - 1)];
}
memcpy(row_out, row_in, xsize * sizeof(row_out[0]));
for (int x = 1; x <= radius; ++x) {
row_out[lastcol + x] = row_in[std::max(0, lastcol - x)];
}
}
ImageF ConvolveXSampleAndTranspose(const ImageF& in,
const std::vector<float>& kernel,
const size_t res) {
return HWY_DYNAMIC_DISPATCH(ConvolveXSampleAndTranspose)(in, kernel, res);
}
Image3F ConvolveXSampleAndTranspose(const Image3F& in,
const std::vector<float>& kernel,
const size_t res) {
return Image3F(ConvolveXSampleAndTranspose(in.Plane(0), kernel, res),
ConvolveXSampleAndTranspose(in.Plane(1), kernel, res),
ConvolveXSampleAndTranspose(in.Plane(2), kernel, res));
}
ImageF ConvolveAndSample(const ImageF& in, const std::vector<float>& kernel,
const size_t res) {
ImageF tmp = ConvolveXSampleAndTranspose(in, kernel, res);
return ConvolveXSampleAndTranspose(tmp, kernel, res);
}
// Implements "Recursive Implementation of the Gaussian Filter Using Truncated
// Cosine Functions" by Charalampidis [2016].
hwy::AlignedUniquePtr<RecursiveGaussian> CreateRecursiveGaussian(double sigma) {
auto rg = hwy::MakeUniqueAligned<RecursiveGaussian>();
constexpr double kPi = 3.141592653589793238;
const double radius = roundf(3.2795 * sigma + 0.2546); // (57), "N"
// Table I, first row
const double pi_div_2r = kPi / (2.0 * radius);
const double omega[3] = {pi_div_2r, 3.0 * pi_div_2r, 5.0 * pi_div_2r};
// (37), k={1,3,5}
const double p_1 = +1.0 / std::tan(0.5 * omega[0]);
const double p_3 = -1.0 / std::tan(0.5 * omega[1]);
const double p_5 = +1.0 / std::tan(0.5 * omega[2]);
// (44), k={1,3,5}
const double r_1 = +p_1 * p_1 / std::sin(omega[0]);
const double r_3 = -p_3 * p_3 / std::sin(omega[1]);
const double r_5 = +p_5 * p_5 / std::sin(omega[2]);
// (50), k={1,3,5}
const double neg_half_sigma2 = -0.5 * sigma * sigma;
const double recip_radius = 1.0 / radius;
double rho[3];
for (size_t i = 0; i < 3; ++i) {
rho[i] = std::exp(neg_half_sigma2 * omega[i] * omega[i]) * recip_radius;
}
// second part of (52), k1,k2 = 1,3; 3,5; 5,1
const double D_13 = p_1 * r_3 - r_1 * p_3;
const double D_35 = p_3 * r_5 - r_3 * p_5;
const double D_51 = p_5 * r_1 - r_5 * p_1;
// (52), k=5
const double recip_d13 = 1.0 / D_13;
const double zeta_15 = D_35 * recip_d13;
const double zeta_35 = D_51 * recip_d13;
double A[9] = {p_1, p_3, p_5, //
r_1, r_3, r_5, // (56)
zeta_15, zeta_35, 1};
JXL_CHECK(Inv3x3Matrix(A));
const double gamma[3] = {1, radius * radius - sigma * sigma, // (55)
zeta_15 * rho[0] + zeta_35 * rho[1] + rho[2]};
double beta[3];
Mul3x3Vector(A, gamma, beta); // (53)
// Sanity check: correctly solved for beta (IIR filter weights are normalized)
const double sum = beta[0] * p_1 + beta[1] * p_3 + beta[2] * p_5; // (39)
JXL_ASSERT(std::abs(sum - 1) < 1E-12);
(void)sum;
rg->radius = static_cast<int>(radius);
double n2[3];
double d1[3];
for (size_t i = 0; i < 3; ++i) {
n2[i] = -beta[i] * std::cos(omega[i] * (radius + 1.0)); // (33)
d1[i] = -2.0 * std::cos(omega[i]); // (33)
for (size_t lane = 0; lane < 4; ++lane) {
rg->n2[4 * i + lane] = static_cast<float>(n2[i]);
rg->d1[4 * i + lane] = static_cast<float>(d1[i]);
}
const double d_2 = d1[i] * d1[i];
// Obtained by expanding (35) for four consecutive outputs via sympy:
// n, d, p, pp = symbols('n d p pp')
// i0, i1, i2, i3 = symbols('i0 i1 i2 i3')
// o0, o1, o2, o3 = symbols('o0 o1 o2 o3')
// o0 = n*i0 - d*p - pp
// o1 = n*i1 - d*o0 - p
// o2 = n*i2 - d*o1 - o0
// o3 = n*i3 - d*o2 - o1
// Then expand(o3) and gather terms for p(prev), pp(prev2) etc.
rg->mul_prev[4 * i + 0] = -d1[i];
rg->mul_prev[4 * i + 1] = d_2 - 1.0;
rg->mul_prev[4 * i + 2] = -d_2 * d1[i] + 2.0 * d1[i];
rg->mul_prev[4 * i + 3] = d_2 * d_2 - 3.0 * d_2 + 1.0;
rg->mul_prev2[4 * i + 0] = -1.0;
rg->mul_prev2[4 * i + 1] = d1[i];
rg->mul_prev2[4 * i + 2] = -d_2 + 1.0;
rg->mul_prev2[4 * i + 3] = d_2 * d1[i] - 2.0 * d1[i];
rg->mul_in[4 * i + 0] = n2[i];
rg->mul_in[4 * i + 1] = -d1[i] * n2[i];
rg->mul_in[4 * i + 2] = d_2 * n2[i] - n2[i];
rg->mul_in[4 * i + 3] = -d_2 * d1[i] * n2[i] + 2.0 * d1[i] * n2[i];
}
return rg;
}
namespace {
// Apply 1D horizontal scan to each row.
void FastGaussianHorizontal(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
const ImageF& in, ThreadPool* pool,
ImageF* JXL_RESTRICT out) {
JXL_CHECK(SameSize(in, *out));
const intptr_t xsize = in.xsize();
JXL_CHECK(RunOnPool(
pool, 0, in.ysize(), ThreadPool::NoInit,
[&](const uint32_t task, size_t /*thread*/) {
const size_t y = task;
const float* row_in = in.ConstRow(y);
float* JXL_RESTRICT row_out = out->Row(y);
FastGaussian1D(rg, row_in, xsize, row_out);
},
"FastGaussianHorizontal"));
}
} // namespace
void FastGaussian(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
const ImageF& in, ThreadPool* pool, ImageF* JXL_RESTRICT temp,
ImageF* JXL_RESTRICT out) {
FastGaussianHorizontal(rg, in, pool, temp);
HWY_DYNAMIC_DISPATCH(FastGaussianVertical)(rg, *temp, pool, out);
}
} // namespace jxl
#endif // HWY_ONCE

View file

@ -1,94 +0,0 @@
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#ifndef LIB_JXL_GAUSS_BLUR_H_
#define LIB_JXL_GAUSS_BLUR_H_
#include <stddef.h>
#include <cmath>
#include <hwy/aligned_allocator.h>
#include <vector>
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/image.h"
namespace jxl {
template <typename T>
std::vector<T> GaussianKernel(int radius, T sigma) {
JXL_ASSERT(sigma > 0.0);
std::vector<T> kernel(2 * radius + 1);
const T scaler = -1.0 / (2 * sigma * sigma);
double sum = 0.0;
for (int i = -radius; i <= radius; ++i) {
const T val = std::exp(scaler * i * i);
kernel[i + radius] = val;
sum += val;
}
for (size_t i = 0; i < kernel.size(); ++i) {
kernel[i] /= sum;
}
return kernel;
}
// All convolution functions below apply mirroring of the input on the borders
// in the following way:
//
// input: [a0 a1 a2 ... aN]
// mirrored input: [aR ... a1 | a0 a1 a2 .... aN | aN-1 ... aN-R]
//
// where R is the radius of the kernel (i.e. kernel size is 2*R+1).
// REQUIRES: in.xsize() and in.ysize() are integer multiples of res.
ImageF ConvolveAndSample(const ImageF& in, const std::vector<float>& kernel,
const size_t res);
// Private, used by test.
void ExtrapolateBorders(const float* const JXL_RESTRICT row_in,
float* const JXL_RESTRICT row_out, const int xsize,
const int radius);
// Only for use by CreateRecursiveGaussian and FastGaussian*.
#pragma pack(push, 1)
struct RecursiveGaussian {
// For k={1,3,5} in that order, each broadcasted 4x for LoadDup128. Used only
// for vertical passes.
float n2[3 * 4];
float d1[3 * 4];
// We unroll horizontal passes 4x - one output per lane. These are each lane's
// multiplier for the previous output (relative to the first of the four
// outputs). Indexing: 4 * 0..2 (for {1,3,5}) + 0..3 for the lane index.
float mul_prev[3 * 4];
// Ditto for the second to last output.
float mul_prev2[3 * 4];
// We multiply a vector of inputs 0..3 by a vector shifted from this array.
// in=0 uses all 4 (nonzero) terms; for in=3, the lower three lanes are 0.
float mul_in[3 * 4];
size_t radius;
};
#pragma pack(pop)
// Precomputation for FastGaussian*; users may use the same pointer/storage in
// subsequent calls to FastGaussian* with the same sigma.
hwy::AlignedUniquePtr<RecursiveGaussian> CreateRecursiveGaussian(double sigma);
// 1D Gaussian with zero-pad boundary handling and runtime independent of sigma.
void FastGaussian1D(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
const float* JXL_RESTRICT in, intptr_t width,
float* JXL_RESTRICT out);
// 2D Gaussian with zero-pad boundary handling and runtime independent of sigma.
void FastGaussian(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
const ImageF& in, ThreadPool* pool, ImageF* JXL_RESTRICT temp,
ImageF* JXL_RESTRICT out);
} // namespace jxl
#endif // LIB_JXL_GAUSS_BLUR_H_

View file

@ -1,126 +0,0 @@
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <hwy/targets.h>
#include "benchmark/benchmark.h"
#include "lib/jxl/convolve.h"
#include "lib/jxl/gauss_blur.h"
#include "lib/jxl/image_ops.h"
namespace jxl {
namespace {
JXL_MAYBE_UNUSED ImageF Convolve(const ImageF& in,
const std::vector<float>& kernel) {
return ConvolveAndSample(in, kernel, 1);
}
void BM_GaussBlur1d(benchmark::State& state) {
// Uncomment to disable SIMD and force and scalar implementation
// hwy::DisableTargets(~HWY_SCALAR);
// Uncomment to run AVX2
// hwy::DisableTargets(HWY_AVX3);
const size_t length = state.range();
const double sigma = 7.0; // (from Butteraugli application)
ImageF in(length, 1);
const float expected = length;
FillImage(expected, &in);
ImageF temp(length, 1);
ImageF out(length, 1);
const auto rg = CreateRecursiveGaussian(sigma);
for (auto _ : state) {
FastGaussian1D(rg, in.Row(0), length, out.Row(0));
// Prevent optimizing out
JXL_ASSERT(std::abs(out.ConstRow(0)[length / 2] - expected) / expected <
9E-5);
}
state.SetItemsProcessed(length * state.iterations());
}
void BM_GaussBlur2d(benchmark::State& state) {
// See GaussBlur1d for SIMD changes.
const size_t xsize = state.range();
const size_t ysize = xsize;
const double sigma = 7.0; // (from Butteraugli application)
ImageF in(xsize, ysize);
const float expected = xsize + ysize;
FillImage(expected, &in);
ImageF temp(xsize, ysize);
ImageF out(xsize, ysize);
ThreadPool* null_pool = nullptr;
const auto rg = CreateRecursiveGaussian(sigma);
for (auto _ : state) {
FastGaussian(rg, in, null_pool, &temp, &out);
// Prevent optimizing out
JXL_ASSERT(std::abs(out.ConstRow(ysize / 2)[xsize / 2] - expected) /
expected <
9E-5);
}
state.SetItemsProcessed(xsize * ysize * state.iterations());
}
void BM_GaussBlurFir(benchmark::State& state) {
// See GaussBlur1d for SIMD changes.
const size_t xsize = state.range();
const size_t ysize = xsize;
const double sigma = 7.0; // (from Butteraugli application)
ImageF in(xsize, ysize);
const float expected = xsize + ysize;
FillImage(expected, &in);
ImageF temp(xsize, ysize);
ImageF out(xsize, ysize);
const std::vector<float> kernel =
GaussianKernel(static_cast<int>(4 * sigma), static_cast<float>(sigma));
for (auto _ : state) {
// Prevent optimizing out
JXL_ASSERT(std::abs(Convolve(in, kernel).ConstRow(ysize / 2)[xsize / 2] -
expected) /
expected <
9E-5);
}
state.SetItemsProcessed(xsize * ysize * state.iterations());
}
void BM_GaussBlurSep7(benchmark::State& state) {
// See GaussBlur1d for SIMD changes.
const size_t xsize = state.range();
const size_t ysize = xsize;
ImageF in(xsize, ysize);
const float expected = xsize + ysize;
FillImage(expected, &in);
ImageF temp(xsize, ysize);
ImageF out(xsize, ysize);
ThreadPool* null_pool = nullptr;
// Gaussian with sigma 1
const WeightsSeparable7 weights = {{HWY_REP4(0.383103f), HWY_REP4(0.241843f),
HWY_REP4(0.060626f), HWY_REP4(0.00598f)},
{HWY_REP4(0.383103f), HWY_REP4(0.241843f),
HWY_REP4(0.060626f), HWY_REP4(0.00598f)}};
for (auto _ : state) {
Separable7(in, Rect(in), weights, null_pool, &out);
// Prevent optimizing out
JXL_ASSERT(std::abs(out.ConstRow(ysize / 2)[xsize / 2] - expected) /
expected <
9E-5);
}
state.SetItemsProcessed(xsize * ysize * state.iterations());
}
BENCHMARK(BM_GaussBlur1d)->Range(1 << 8, 1 << 14);
BENCHMARK(BM_GaussBlur2d)->Range(1 << 7, 1 << 10);
BENCHMARK(BM_GaussBlurFir)->Range(1 << 7, 1 << 10);
BENCHMARK(BM_GaussBlurSep7)->Range(1 << 7, 1 << 10);
} // namespace
} // namespace jxl

View file

@ -1,453 +0,0 @@
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "lib/jxl/gauss_blur.h"
#include <cmath>
#include <hwy/targets.h>
#include <vector>
#include "lib/extras/time.h"
#include "lib/jxl/base/printf_macros.h"
#include "lib/jxl/convolve.h"
#include "lib/jxl/image_ops.h"
#include "lib/jxl/image_test_utils.h"
#include "lib/jxl/testing.h"
namespace jxl {
bool NearEdge(const int64_t width, const int64_t peak) {
// When around 3*sigma from the edge, there is negligible truncation.
return peak < 10 || peak > width - 10;
}
// Follow the curve downwards by scanning right from `peak` and verifying
// identical values at the same offset to the left.
void VerifySymmetric(const int64_t width, const int64_t peak,
const float* out) {
const double tolerance = NearEdge(width, peak) ? 0.015 : 6E-7;
for (int64_t i = 1;; ++i) {
// Stop if we passed either end of the array
if (peak - i < 0 || peak + i >= width) break;
EXPECT_GT(out[peak + i - 1] + tolerance, out[peak + i]); // descending
EXPECT_NEAR(out[peak - i], out[peak + i], tolerance); // symmetric
}
}
void TestImpulseResponse(size_t width, size_t peak) {
const auto rg3 = CreateRecursiveGaussian(3.0);
const auto rg4 = CreateRecursiveGaussian(4.0);
const auto rg5 = CreateRecursiveGaussian(5.0);
// Extra padding for 4x unrolling
auto in = hwy::AllocateAligned<float>(width + 3);
memset(in.get(), 0, sizeof(float) * (width + 3));
in[peak] = 1.0f;
auto out3 = hwy::AllocateAligned<float>(width + 3);
auto out4 = hwy::AllocateAligned<float>(width + 3);
auto out5 = hwy::AllocateAligned<float>(width + 3);
FastGaussian1D(rg3, in.get(), width, out3.get());
FastGaussian1D(rg4, out3.get(), width, out4.get());
FastGaussian1D(rg5, in.get(), width, out5.get());
VerifySymmetric(width, peak, out3.get());
VerifySymmetric(width, peak, out4.get());
VerifySymmetric(width, peak, out5.get());
// Wider kernel has flatter peak
EXPECT_LT(out5[peak] + 0.05, out3[peak]);
// Gauss3 o Gauss4 ~= Gauss5
const double tolerance = NearEdge(width, peak) ? 0.04 : 0.01;
for (size_t i = 0; i < width; ++i) {
EXPECT_NEAR(out4[i], out5[i], tolerance);
}
}
void TestImpulseResponseForWidth(size_t width) {
for (size_t i = 0; i < width; ++i) {
TestImpulseResponse(width, i);
}
}
TEST(GaussBlurTest, ImpulseResponse) {
TestImpulseResponseForWidth(10); // tiny even
TestImpulseResponseForWidth(15); // small odd
TestImpulseResponseForWidth(32); // power of two
TestImpulseResponseForWidth(31); // power of two - 1
TestImpulseResponseForWidth(33); // power of two + 1
}
ImageF Convolve(const ImageF& in, const std::vector<float>& kernel) {
return ConvolveAndSample(in, kernel, 1);
}
// Higher-precision version for accuracy test.
ImageF ConvolveAndTransposeF64(const ImageF& in,
const std::vector<double>& kernel) {
JXL_ASSERT(kernel.size() % 2 == 1);
ImageF out(in.ysize(), in.xsize());
const int r = kernel.size() / 2;
std::vector<float> row_tmp(in.xsize() + 2 * r);
float* const JXL_RESTRICT rowp = &row_tmp[r];
const double* const kernelp = &kernel[r];
for (size_t y = 0; y < in.ysize(); ++y) {
ExtrapolateBorders(in.Row(y), rowp, in.xsize(), r);
for (size_t x = 0, ox = 0; x < in.xsize(); ++x, ++ox) {
double sum = 0.0;
for (int i = -r; i <= r; ++i) {
sum += rowp[std::max<int>(
0, std::min<int>(static_cast<int>(x) + i, in.xsize()))] *
kernelp[i];
}
out.Row(ox)[y] = static_cast<float>(sum);
}
}
return out;
}
ImageF ConvolveF64(const ImageF& in, const std::vector<double>& kernel) {
ImageF tmp = ConvolveAndTransposeF64(in, kernel);
return ConvolveAndTransposeF64(tmp, kernel);
}
void TestDirac2D(size_t xsize, size_t ysize, double sigma) {
ImageF in(xsize, ysize);
ZeroFillImage(&in);
// We anyway ignore the border below, so might as well choose the middle.
in.Row(ysize / 2)[xsize / 2] = 1.0f;
ImageF temp(xsize, ysize);
ImageF out(xsize, ysize);
const auto rg = CreateRecursiveGaussian(sigma);
ThreadPool* null_pool = nullptr;
FastGaussian(rg, in, null_pool, &temp, &out);
const std::vector<float> kernel =
GaussianKernel(static_cast<int>(4 * sigma), static_cast<float>(sigma));
const ImageF expected = Convolve(in, kernel);
const double max_l1 = sigma < 1.5 ? 5E-3 : 6E-4;
const size_t border = 2 * sigma;
JXL_ASSERT_OK(VerifyRelativeError(expected, out, max_l1, 1E-8, _, border));
}
TEST(GaussBlurTest, Test2D) {
const std::vector<int> dimensions{6, 15, 17, 64, 50, 49};
for (int xsize : dimensions) {
for (int ysize : dimensions) {
for (double sigma : {1.0, 2.5, 3.6, 7.0}) {
TestDirac2D(static_cast<size_t>(xsize), static_cast<size_t>(ysize),
sigma);
}
}
}
}
// Slow (44 sec). To run, remove the disabled prefix.
TEST(GaussBlurTest, DISABLED_SlowTestDirac1D) {
const double sigma = 7.0;
const auto rg = CreateRecursiveGaussian(sigma);
// IPOL accuracy test uses 10^-15 tolerance, this is 2*10^-11.
const size_t radius = static_cast<size_t>(7 * sigma);
const std::vector<double> kernel = GaussianKernel(radius, sigma);
const size_t length = 16384;
ImageF inputs(length, 1);
ZeroFillImage(&inputs);
auto outputs = hwy::AllocateAligned<float>(length);
// One per center position
auto sum_abs_err = hwy::AllocateAligned<double>(length);
std::fill(sum_abs_err.get(), sum_abs_err.get() + length, 0.0);
for (size_t center = radius; center < length - radius; ++center) {
inputs.Row(0)[center - 1] = 0.0f; // reset last peak, entire array now 0
inputs.Row(0)[center] = 1.0f;
FastGaussian1D(rg, inputs.Row(0), length, outputs.get());
const ImageF outputs_fir = ConvolveF64(inputs, kernel);
for (size_t i = 0; i < length; ++i) {
const float abs_err = std::abs(outputs[i] - outputs_fir.Row(0)[i]);
sum_abs_err[i] += static_cast<double>(abs_err);
}
}
const double max_abs_err =
*std::max_element(sum_abs_err.get(), sum_abs_err.get() + length);
printf("Max abs err: %.8e\n", max_abs_err);
}
void TestRandom(size_t xsize, size_t ysize, float min, float max, double sigma,
double max_l1, double max_rel) {
printf("%4" PRIuS " x %4" PRIuS " %4.1f %4.1f sigma %.1f\n", xsize, ysize,
min, max, sigma);
ImageF in(xsize, ysize);
RandomFillImage(&in, min, max, 65537 + xsize * 129 + ysize);
// FastGaussian/Convolve handle borders differently, so keep those pixels 0.
const size_t border = 4 * sigma;
SetBorder(border, 0.0f, &in);
ImageF temp(xsize, ysize);
ImageF out(xsize, ysize);
const auto rg = CreateRecursiveGaussian(sigma);
ThreadPool* null_pool = nullptr;
FastGaussian(rg, in, null_pool, &temp, &out);
const std::vector<float> kernel =
GaussianKernel(static_cast<int>(4 * sigma), static_cast<float>(sigma));
const ImageF expected = Convolve(in, kernel);
JXL_ASSERT_OK(VerifyRelativeError(expected, out, max_l1, max_rel, _, border));
}
void TestRandomForSizes(float min, float max, double sigma) {
double max_l1 = 6E-3;
double max_rel = 3E-3;
TestRandom(128, 1, min, max, sigma, max_l1, max_rel);
TestRandom(1, 128, min, max, sigma, max_l1, max_rel);
TestRandom(30, 201, min, max, sigma, max_l1 * 1.6, max_rel * 1.2);
TestRandom(201, 30, min, max, sigma, max_l1 * 1.6, max_rel * 1.2);
TestRandom(201, 201, min, max, sigma, max_l1 * 2.0, max_rel * 1.2);
}
TEST(GaussBlurTest, TestRandom) {
// small non-negative
TestRandomForSizes(0.0f, 10.0f, 3.0f);
TestRandomForSizes(0.0f, 10.0f, 7.0f);
// small negative
TestRandomForSizes(-4.0f, -1.0f, 3.0f);
TestRandomForSizes(-4.0f, -1.0f, 7.0f);
// mixed positive/negative
TestRandomForSizes(-6.0f, 6.0f, 3.0f);
TestRandomForSizes(-6.0f, 6.0f, 7.0f);
}
TEST(GaussBlurTest, TestSign) {
const size_t xsize = 500;
const size_t ysize = 606;
ImageF in(xsize, ysize);
ZeroFillImage(&in);
const float center[33 * 33] = {
-0.128445f, -0.098473f, -0.121883f, -0.093601f, 0.095665f, -0.271332f,
-0.705475f, -1.324005f, -2.020741f, -1.329464f, 1.834064f, 4.787300f,
5.834560f, 5.272720f, 3.967960f, 3.547935f, 3.432732f, 3.383015f,
3.239326f, 3.290806f, 3.298954f, 3.397808f, 3.359730f, 3.533844f,
3.511856f, 3.436787f, 3.428310f, 3.460209f, 3.550011f, 3.590942f,
3.593109f, 3.560005f, 3.443165f, 0.089741f, 0.179230f, -0.032997f,
-0.182610f, 0.005669f, -0.244759f, -0.395123f, -0.514961f, -1.003529f,
-1.798656f, -2.377975f, 0.222191f, 3.957664f, 5.946804f, 5.543129f,
4.290096f, 3.621010f, 3.407257f, 3.392494f, 3.345367f, 3.391903f,
3.441605f, 3.429260f, 3.444969f, 3.507130f, 3.518612f, 3.443111f,
3.475948f, 3.536148f, 3.470333f, 3.628311f, 3.600243f, 3.292892f,
-0.226730f, -0.573616f, -0.762165f, -0.398739f, -0.189842f, -0.275921f,
-0.446739f, -0.550037f, -0.461033f, -0.724792f, -1.448349f, -1.814064f,
-0.491032f, 2.817703f, 5.213242f, 5.675629f, 4.864548f, 3.876324f,
3.535587f, 3.530312f, 3.413765f, 3.386261f, 3.404854f, 3.383472f,
3.420830f, 3.326496f, 3.257877f, 3.362152f, 3.489609f, 3.619587f,
3.555805f, 3.423164f, 3.309708f, -0.483940f, -0.502926f, -0.592983f,
-0.492527f, -0.413616f, -0.482555f, -0.475506f, -0.447990f, -0.338120f,
-0.189072f, -0.376427f, -0.910828f, -1.878044f, -1.937927f, 1.423218f,
4.871609f, 5.767548f, 5.103741f, 3.983868f, 3.633003f, 3.458263f,
3.507309f, 3.247021f, 3.220612f, 3.326061f, 3.352814f, 3.291061f,
3.322739f, 3.444302f, 3.506207f, 3.556839f, 3.529575f, 3.457024f,
-0.408161f, -0.431343f, -0.454369f, -0.356419f, -0.380924f, -0.399452f,
-0.439476f, -0.412189f, -0.306816f, -0.008213f, -0.325813f, -0.537842f,
-0.984100f, -1.805332f, -2.028198f, 0.773205f, 4.423046f, 5.604839f,
5.231617f, 4.080299f, 3.603008f, 3.498741f, 3.517010f, 3.333897f,
3.381336f, 3.342617f, 3.369686f, 3.434155f, 3.490452f, 3.607029f,
3.555298f, 3.702297f, 3.618679f, -0.503609f, -0.578564f, -0.419014f,
-0.239883f, 0.269836f, 0.022984f, -0.455067f, -0.621777f, -0.304176f,
-0.163792f, -0.490250f, -0.466637f, -0.391792f, -0.657940f, -1.498035f,
-1.895836f, 0.036537f, 3.462456f, 5.586445f, 5.658791f, 4.434784f,
3.423435f, 3.318848f, 3.202328f, 3.532764f, 3.436687f, 3.354881f,
3.356941f, 3.382645f, 3.503902f, 3.512867f, 3.632366f, 3.537312f,
-0.274734f, -0.658829f, -0.726532f, -0.281254f, 0.053196f, -0.064991f,
-0.608517f, -0.720966f, -0.070602f, -0.111320f, -0.440956f, -0.492180f,
-0.488762f, -0.569283f, -1.012741f, -1.582779f, -2.101479f, -1.392380f,
2.451153f, 5.555855f, 6.096313f, 5.230045f, 4.068172f, 3.404274f,
3.392586f, 3.326065f, 3.156670f, 3.284828f, 3.347012f, 3.319252f,
3.352310f, 3.610790f, 3.499847f, -0.150600f, -0.314445f, -0.093575f,
-0.057384f, 0.053688f, -0.189255f, -0.263515f, -0.318653f, 0.053246f,
0.080627f, -0.119553f, -0.152454f, -0.305420f, -0.404869f, -0.385944f,
-0.689949f, -1.204914f, -1.985748f, -1.711361f, 1.260658f, 4.626896f,
5.888351f, 5.450989f, 4.070587f, 3.539200f, 3.383492f, 3.296318f,
3.267334f, 3.436028f, 3.463005f, 3.502625f, 3.522282f, 3.403763f,
-0.348049f, -0.302303f, -0.137016f, -0.041737f, -0.164001f, -0.358849f,
-0.469627f, -0.428291f, -0.375797f, -0.246346f, -0.118950f, -0.084229f,
-0.205681f, -0.241199f, -0.391796f, -0.323151f, -0.241211f, -0.834137f,
-1.684219f, -1.972137f, 0.448399f, 4.019985f, 5.648144f, 5.647846f,
4.295094f, 3.641884f, 3.374790f, 3.197342f, 3.425545f, 3.507481f,
3.478065f, 3.430889f, 3.341900f, -1.016304f, -0.959221f, -0.909466f,
-0.810715f, -0.590729f, -0.594467f, -0.646721f, -0.629364f, -0.528561f,
-0.551819f, -0.301086f, -0.149101f, -0.060146f, -0.162220f, -0.326210f,
-0.156548f, -0.036293f, -0.426098f, -1.145470f, -1.628998f, -2.003052f,
-1.142891f, 2.885162f, 5.652863f, 5.718426f, 4.911140f, 3.234222f,
3.473373f, 3.577183f, 3.271603f, 3.410435f, 3.505489f, 3.434032f,
-0.508911f, -0.438797f, -0.437450f, -0.627426f, -0.511745f, -0.304874f,
-0.274246f, -0.261841f, -0.228466f, -0.342491f, -0.528206f, -0.490082f,
-0.516350f, -0.361694f, -0.398514f, -0.276020f, -0.210369f, -0.355938f,
-0.402622f, -0.538864f, -1.249573f, -2.100105f, -0.996178f, 1.886410f,
4.929745f, 5.630871f, 5.444199f, 4.042740f, 3.739189f, 3.691399f,
3.391956f, 3.469696f, 3.431232f, 0.204849f, 0.205433f, -0.131927f,
-0.367908f, -0.374378f, -0.126820f, -0.186951f, -0.228565f, -0.081776f,
-0.143143f, -0.379230f, -0.598701f, -0.458019f, -0.295586f, -0.407730f,
-0.245853f, -0.043140f, 0.024242f, -0.038998f, -0.044151f, -0.425991f,
-1.240753f, -1.943146f, -2.174755f, 0.523415f, 4.376751f, 5.956558f,
5.850082f, 4.403152f, 3.517399f, 3.560753f, 3.554836f, 3.471985f,
-0.508503f, -0.109783f, 0.057747f, 0.190079f, -0.257153f, -0.591980f,
-0.666771f, -0.525391f, -0.293060f, -0.489731f, -0.304855f, -0.259644f,
-0.367825f, -0.346977f, -0.292889f, -0.215652f, -0.120705f, -0.176010f,
-0.422905f, -0.114647f, -0.289749f, -0.374203f, -0.606754f, -1.127949f,
-1.994583f, -0.588058f, 3.415840f, 5.603470f, 5.811581f, 4.959423f,
3.721760f, 3.710499f, 3.785461f, -0.554588f, -0.565517f, -0.434578f,
-0.012482f, -0.284660f, -0.699795f, -0.957535f, -0.755135f, -0.382034f,
-0.321552f, -0.287571f, -0.279537f, -0.314972f, -0.256287f, -0.372818f,
-0.316017f, -0.287975f, -0.365639f, -0.512589f, -0.420692f, -0.436485f,
-0.295353f, -0.451958f, -0.755459f, -1.272358f, -2.301353f, -1.776161f,
1.572483f, 4.826286f, 5.741898f, 5.162853f, 4.028049f, 3.686325f,
-0.495590f, -0.664413f, -0.760044f, -0.152634f, -0.286480f, -0.340462f,
0.076477f, 0.187706f, -0.068787f, -0.293491f, -0.361145f, -0.292515f,
-0.140671f, -0.190723f, -0.333302f, -0.368168f, -0.192581f, -0.154499f,
-0.236544f, -0.124405f, -0.208321f, -0.465607f, -0.883080f, -1.104813f,
-1.210567f, -1.415665f, -1.924683f, -1.634758f, 0.601017f, 4.276672f,
5.501350f, 5.331257f, 3.809288f, -0.727722f, -0.533619f, -0.511524f,
-0.470688f, -0.610710f, -0.575130f, -0.311115f, -0.090420f, -0.297676f,
-0.646118f, -0.742805f, -0.485050f, -0.330910f, -0.275417f, -0.357037f,
-0.425598f, -0.481876f, -0.488941f, -0.393551f, -0.051105f, -0.090755f,
-0.328674f, -0.536369f, -0.533684f, -0.336960f, -0.689194f, -1.187195f,
-1.860954f, -2.290253f, -0.424774f, 3.050060f, 5.083332f, 5.291920f,
-0.343605f, -0.190975f, -0.303692f, -0.456512f, -0.681820f, -0.690693f,
-0.416729f, -0.286446f, -0.442055f, -0.709148f, -0.569160f, -0.382423f,
-0.402321f, -0.383362f, -0.366413f, -0.290718f, -0.110069f, -0.220280f,
-0.279018f, -0.255424f, -0.262081f, -0.487556f, -0.444492f, -0.250500f,
-0.119583f, -0.291557f, -0.537781f, -1.104073f, -1.737091f, -1.697441f,
-0.323456f, 2.042049f, 4.605103f, -0.310631f, -0.279568f, -0.012695f,
-0.160130f, -0.358746f, -0.421101f, -0.559677f, -0.474136f, -0.416565f,
-0.561817f, -0.534672f, -0.519157f, -0.767197f, -0.605831f, -0.186523f,
0.219872f, 0.264984f, -0.193432f, -0.363182f, -0.467472f, -0.462009f,
-0.571053f, -0.522476f, -0.315903f, -0.237427f, -0.147320f, -0.100201f,
-0.237568f, -0.763435f, -1.242043f, -2.135159f, -1.409485f, 1.236370f,
-0.474247f, -0.517906f, -0.410217f, -0.542244f, -0.795986f, -0.590004f,
-0.388863f, -0.462921f, -0.810627f, -0.778637f, -0.512486f, -0.718025f,
-0.710854f, -0.482513f, -0.318233f, -0.194962f, -0.220116f, -0.421673f,
-0.534233f, -0.403339f, -0.389332f, -0.407303f, -0.437355f, -0.469730f,
-0.359600f, -0.352745f, -0.466755f, -0.414585f, -0.430756f, -0.656822f,
-1.237038f, -2.046097f, -1.574898f, -0.593815f, -0.582165f, -0.336098f,
-0.372612f, -0.554386f, -0.410603f, -0.428276f, -0.647644f, -0.640720f,
-0.582207f, -0.414112f, -0.435547f, -0.435505f, -0.332561f, -0.248116f,
-0.340221f, -0.277855f, -0.352699f, -0.377319f, -0.230850f, -0.313267f,
-0.446270f, -0.346237f, -0.420422f, -0.530781f, -0.400341f, -0.463661f,
-0.209091f, -0.056705f, -0.011772f, -0.169388f, -0.736275f, -1.463017f,
-0.752701f, -0.668865f, -0.329765f, -0.299347f, -0.245667f, -0.286999f,
-0.520420f, -0.675438f, -0.255753f, 0.141357f, -0.079639f, -0.419476f,
-0.374069f, -0.046253f, 0.116116f, -0.145847f, -0.380371f, -0.563412f,
-0.638634f, -0.310116f, -0.260914f, -0.508404f, -0.465508f, -0.527824f,
-0.370979f, -0.305595f, -0.244694f, -0.254490f, 0.009968f, -0.050201f,
-0.331219f, -0.614960f, -0.788208f, -0.483242f, -0.367516f, -0.186951f,
-0.180031f, 0.129711f, -0.127811f, -0.384750f, -0.499542f, -0.418613f,
-0.121635f, 0.203197f, -0.167290f, -0.397270f, -0.355461f, -0.218746f,
-0.376785f, -0.521698f, -0.721581f, -0.845741f, -0.535439f, -0.220882f,
-0.309067f, -0.555248f, -0.690342f, -0.664948f, -0.390102f, 0.020355f,
-0.130447f, -0.173252f, -0.170059f, -0.633663f, -0.956001f, -0.621696f,
-0.388302f, -0.342262f, -0.244370f, -0.386948f, -0.401421f, -0.172979f,
-0.206163f, -0.450058f, -0.525789f, -0.549274f, -0.349251f, -0.474613f,
-0.667976f, -0.435600f, -0.175369f, -0.196877f, -0.202976f, -0.242481f,
-0.258369f, -0.189133f, -0.395397f, -0.765499f, -0.944016f, -0.850967f,
-0.631561f, -0.152493f, -0.046432f, -0.262066f, -0.195919f, 0.048218f,
0.084972f, 0.039902f, 0.000618f, -0.404430f, -0.447456f, -0.418076f,
-0.631935f, -0.717415f, -0.502888f, -0.530514f, -0.747826f, -0.704041f,
-0.674969f, -0.516853f, -0.418446f, -0.327740f, -0.308815f, -0.481636f,
-0.440083f, -0.481720f, -0.341053f, -0.283897f, -0.324368f, -0.352829f,
-0.434349f, -0.545589f, -0.533104f, -0.472755f, -0.570496f, -0.557735f,
-0.708176f, -0.493332f, -0.194416f, -0.186249f, -0.256710f, -0.271835f,
-0.304752f, -0.431267f, -0.422398f, -0.646725f, -0.680801f, -0.249031f,
-0.058567f, -0.213890f, -0.383949f, -0.540291f, -0.549877f, -0.225567f,
-0.037174f, -0.499874f, -0.641010f, -0.628044f, -0.390549f, -0.311497f,
-0.542313f, -0.569565f, -0.473408f, -0.331245f, -0.357197f, -0.285599f,
-0.200157f, -0.201866f, -0.124428f, -0.346016f, -0.392311f, -0.264496f,
-0.285370f, -0.436974f, -0.523483f, -0.410461f, -0.267925f, -0.055016f,
-0.382458f, -0.319771f, -0.049927f, 0.124329f, 0.266102f, -0.106606f,
-0.773647f, -0.973053f, -0.708206f, -0.486137f, -0.319923f, -0.493900f,
-0.490860f, -0.324986f, -0.147346f, -0.146088f, -0.161758f, -0.084396f,
-0.379494f, 0.041626f, -0.113361f, -0.277767f, 0.083366f, 0.126476f,
0.139057f, 0.038040f, 0.038162f, -0.242126f, -0.411736f, -0.370049f,
-0.455357f, -0.039257f, 0.264442f, -0.271492f, -0.425346f, -0.514847f,
-0.448650f, -0.580399f, -0.652603f, -0.774803f, -0.692524f, -0.579578f,
-0.465206f, -0.386265f, -0.458012f, -0.446594f, -0.284893f, -0.345448f,
-0.350876f, -0.440350f, -0.360378f, -0.270428f, 0.237213f, -0.063602f,
-0.364529f, -0.179867f, 0.078197f, 0.117947f, -0.093410f, -0.359119f,
-0.480961f, -0.540638f, -0.436287f, -0.598576f, -0.253735f, -0.060093f,
-0.549145f, -0.808327f, -0.698593f, -0.595764f, -0.582508f, -0.497353f,
-0.480892f, -0.584240f, -0.665791f, -0.690903f, -0.743446f, -0.796677f,
-0.782391f, -0.649010f, -0.628139f, -0.880848f, -0.829361f, -0.373272f,
-0.223667f, 0.174572f, -0.348743f, -0.798901f, -0.692307f, -0.607609f,
-0.401455f, -0.480919f, -0.450798f, -0.435413f, -0.322338f, -0.228382f,
-0.450466f, -0.504440f, -0.477402f, -0.662224f, -0.583397f, -0.217445f,
-0.157459f, -0.079584f, -0.226168f, -0.488720f, -0.669624f, -0.666878f,
-0.565311f, -0.549625f, -0.364601f, -0.497627f, -0.736897f, -0.763023f,
-0.741020f, -0.404503f, 0.184814f, -0.075315f, -0.281513f, -0.532906f,
-0.405800f, -0.313438f, -0.536652f, -0.403381f, 0.011967f, 0.103310f,
-0.269848f, -0.508656f, -0.445923f, -0.644859f, -0.617870f, -0.500927f,
-0.371559f, -0.125580f, 0.028625f, -0.154713f, -0.442024f, -0.492764f,
-0.199371f, 0.236305f, 0.225925f, 0.075577f, -0.285812f, -0.437145f,
-0.374260f, -0.156693f, -0.129635f, -0.243206f, -0.123058f, 0.162148f,
-0.313152f, -0.337982f, -0.358421f, 0.040070f, 0.038925f, -0.333313f,
-0.351662f, 0.023014f, 0.091362f, -0.282890f, -0.373253f, -0.389050f,
-0.532707f, -0.423347f, -0.349968f, -0.287045f, -0.202442f, -0.308430f,
-0.222801f, -0.106323f, -0.056358f, 0.027222f, 0.390732f, 0.033558f,
-0.160088f, -0.382217f, -0.535282f, -0.515900f, -0.022736f, 0.165665f,
-0.111408f, -0.233784f, -0.312357f, -0.541885f, -0.480022f, -0.482513f,
-0.246254f, 0.132244f, 0.090134f, 0.234634f, -0.089249f, -0.460854f,
-0.515457f, -0.450874f, -0.311031f, -0.387680f, -0.360554f, -0.179241f,
-0.283817f, -0.475815f, -0.246399f, -0.388958f, -0.551140f, -0.496239f,
-0.559879f, -0.379761f, -0.254288f, -0.395111f, -0.613018f, -0.459427f,
-0.263580f, -0.268929f, 0.080826f, 0.115616f, -0.097324f, -0.325310f,
-0.480450f, -0.313286f, -0.310371f, -0.517361f, -0.288288f, -0.112679f,
-0.173241f, -0.221664f, -0.039452f, -0.107578f, -0.089630f, -0.483768f,
-0.571087f, -0.497108f, -0.321533f, -0.375492f, -0.540363f, -0.406815f,
-0.388512f, -0.514561f, -0.540192f, -0.402412f, -0.232246f, -0.304749f,
-0.383724f, -0.679596f, -0.685463f, -0.694538f, -0.642937f, -0.425789f,
0.103271f, -0.194862f, -0.487999f, -0.717281f, -0.681850f, -0.709286f,
-0.615398f, -0.554245f, -0.254681f, -0.049950f, -0.002914f, -0.095383f,
-0.370911f, -0.564224f, -0.242714f};
const size_t xtest = xsize / 2;
const size_t ytest = ysize / 2;
for (intptr_t dy = -16; dy <= 16; ++dy) {
float* row = in.Row(ytest + dy);
for (intptr_t dx = -16; dx <= 16; ++dx)
row[xtest + dx] = center[(dy + 16) * 33 + (dx + 16)];
}
const double sigma = 7.155933;
ImageF temp(xsize, ysize);
ImageF out_rg(xsize, ysize);
const auto rg = CreateRecursiveGaussian(sigma);
ThreadPool* null_pool = nullptr;
FastGaussian(rg, in, null_pool, &temp, &out_rg);
ImageF out_old;
{
const std::vector<float> kernel =
GaussianKernel(static_cast<int>(4 * sigma), static_cast<float>(sigma));
printf("old kernel size %" PRIuS "\n", kernel.size());
out_old = Convolve(in, kernel);
}
printf("rg %.4f old %.4f\n", out_rg.Row(ytest)[xtest],
out_old.Row(ytest)[xtest]);
}
} // namespace jxl

View file

@ -10,15 +10,16 @@
#include <algorithm>
#include <array>
#include <cmath>
#include <utility>
#include <vector>
#include "lib/jxl/base/common.h"
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/base/override.h"
#include "lib/jxl/base/span.h"
#include "lib/jxl/codec_in_out.h"
#include "lib/jxl/color_encoding_internal.h"
#include "lib/jxl/enc_cache.h"
#include "lib/jxl/enc_params.h"
#include "lib/jxl/image.h"
#include "lib/jxl/image_bundle.h"

View file

@ -19,6 +19,7 @@
#include <algorithm>
#include <sstream>
#include <string>
#include <utility> // std::move
#include "lib/jxl/base/compiler_specific.h"
@ -328,7 +329,7 @@ class RectT {
return CeilShiftRight(shift, shift);
}
RectT<T> Extend(T border, RectT<T> parent) {
RectT<T> Extend(T border, RectT<T> parent) const {
T new_x0 = x0() > parent.x0() + border ? x0() - border : parent.x0();
T new_y0 = y0() > parent.y0() + border ? y0() - border : parent.y0();
T new_x1 = x1() + border > parent.x1() ? parent.x1() : x1() + border;

View file

@ -12,22 +12,21 @@
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/common.h"
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/color_encoding_internal.h"
#include "lib/jxl/common.h" // JPEGXL_ENABLE_TRANSCODE_JPEG
#include "lib/jxl/dec_bit_reader.h"
#include "lib/jxl/dec_xyb.h"
#include "lib/jxl/field_encodings.h"
#include "lib/jxl/frame_header.h"
#include "lib/jxl/headers.h"
#include "lib/jxl/image.h"
#include "lib/jxl/image_metadata.h"
#include "lib/jxl/image_ops.h"
#include "lib/jxl/jpeg/jpeg_data.h"
#include "lib/jxl/quantizer.h"
namespace jxl {

View file

@ -16,10 +16,13 @@
#include <string>
#include <vector>
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/color_encoding_internal.h"
#include "lib/jxl/dec_bit_reader.h"
#include "lib/jxl/field_encodings.h"
#include "lib/jxl/fields.h"
#include "lib/jxl/headers.h"
#include "lib/jxl/jpeg/jpeg_data.h"
namespace jxl {

View file

@ -6,42 +6,40 @@
#include "lib/extras/dec/jxl.h"
#include <jxl/cms.h>
#include <jxl/color_encoding.h>
#include <jxl/encode.h>
#include <jxl/types.h>
#include <array>
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <future>
#include <ostream>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
#include "lib/extras/codec.h"
#include "lib/extras/dec/decode.h"
#include "lib/extras/enc/encode.h"
#include "lib/extras/enc/jxl.h"
#include "lib/extras/packed_image.h"
#include "lib/jxl/alpha.h"
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/base/override.h"
#include "lib/jxl/base/span.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/codec_in_out.h"
#include "lib/jxl/color_encoding_internal.h"
#include "lib/jxl/common.h" // JXL_HIGH_PRECISION
#include "lib/jxl/enc_butteraugli_comparator.h"
#include "lib/jxl/enc_cache.h"
#include "lib/jxl/enc_params.h"
#include "lib/jxl/fake_parallel_runner_testonly.h"
#include "lib/jxl/image.h"
#include "lib/jxl/image_bundle.h"
#include "lib/jxl/image_ops.h"
#include "lib/jxl/image_test_utils.h"
#include "lib/jxl/jpeg/dec_jpeg_data.h"
#include "lib/jxl/jpeg/dec_jpeg_data_writer.h"
#include "lib/jxl/image_metadata.h"
#include "lib/jxl/jpeg/enc_jpeg_data.h"
#include "lib/jxl/jpeg/jpeg_data.h"
#include "lib/jxl/modular/options.h"
#include "lib/jxl/test_image.h"
#include "lib/jxl/test_utils.h"
#include "lib/jxl/testing.h"
@ -122,7 +120,7 @@ TEST(JxlTest, RoundtripSmallD1) {
{
PackedPixelFile ppf_out;
EXPECT_NEAR(Roundtrip(t.ppf(), {}, {}, pool, &ppf_out), 1027, 40);
EXPECT_NEAR(Roundtrip(t.ppf(), {}, {}, pool, &ppf_out), 916, 40);
EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(0.888));
}
@ -357,8 +355,8 @@ TEST(JxlTest, RoundtripLargeFast) {
cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7); // kSquirrel
PackedPixelFile ppf_out;
EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 505555, 5000);
EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(75));
EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 492867, 5000);
EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(78));
}
TEST(JxlTest, RoundtripDotsForceEpf) {
@ -374,7 +372,7 @@ TEST(JxlTest, RoundtripDotsForceEpf) {
cparams.AddOption(JXL_ENC_FRAME_SETTING_DOTS, 1);
PackedPixelFile ppf_out;
EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 40777, 300);
EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 41355, 300);
EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(18));
}
@ -454,7 +452,7 @@ TEST(JxlTest, RoundtripSmallNL) {
t.SetDimensions(xsize, ysize);
PackedPixelFile ppf_out;
EXPECT_NEAR(Roundtrip(t.ppf(), {}, {}, pool, &ppf_out), 1027, 45);
EXPECT_NEAR(Roundtrip(t.ppf(), {}, {}, pool, &ppf_out), 916, 45);
EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(0.82));
}
@ -470,7 +468,7 @@ TEST(JxlTest, RoundtripNoGaborishNoAR) {
cparams.AddOption(JXL_ENC_FRAME_SETTING_GABORISH, 0);
PackedPixelFile ppf_out;
EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 41769, 400);
EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 41142, 400);
EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.8));
}
@ -488,7 +486,7 @@ TEST(JxlTest, RoundtripSmallNoGaborish) {
cparams.AddOption(JXL_ENC_FRAME_SETTING_GABORISH, 0);
PackedPixelFile ppf_out;
EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 1032, 20);
EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 1006, 20);
EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.1));
}
@ -861,7 +859,7 @@ TEST(JxlTest, RoundtripAlphaResampling) {
cparams.AddOption(JXL_ENC_FRAME_SETTING_EXTRA_CHANNEL_RESAMPLING, 2);
PackedPixelFile ppf_out;
EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 13655, 130);
EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 13507, 130);
EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(5.2));
}
@ -952,9 +950,11 @@ TEST(JxlTest, JXL_SLOW_TEST(RoundtripLossless8)) {
t.DecodeFromBytes(orig).ClearMetadata();
JXLCompressParams cparams = CompressParamsForLossless();
JXLDecompressParams dparams;
dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
PackedPixelFile ppf_out;
EXPECT_EQ(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 223058);
EXPECT_EQ(Roundtrip(t.ppf(), cparams, dparams, &pool, &ppf_out), 223058);
EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
}
@ -968,9 +968,11 @@ TEST(JxlTest, JXL_SLOW_TEST(RoundtripLossless8ThunderGradient)) {
JXLCompressParams cparams = CompressParamsForLossless();
cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 2); // kThunder
cparams.AddOption(JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR, 5); // Gradient
JXLDecompressParams dparams;
dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
PackedPixelFile ppf_out;
EXPECT_EQ(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 261684);
EXPECT_EQ(Roundtrip(t.ppf(), cparams, dparams, &pool, &ppf_out), 261684);
EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
}
@ -983,10 +985,12 @@ TEST(JxlTest, JXL_SLOW_TEST(RoundtripLossless8LightningGradient)) {
JXLCompressParams cparams = CompressParamsForLossless();
cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 1); // kLightning
JXLDecompressParams dparams;
dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
PackedPixelFile ppf_out;
// Lax comparison because different SIMD will cause different compression.
EXPECT_THAT(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out),
EXPECT_THAT(Roundtrip(t.ppf(), cparams, dparams, &pool, &ppf_out),
IsSlightlyBelow(286848u));
EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
}
@ -1000,9 +1004,11 @@ TEST(JxlTest, JXL_SLOW_TEST(RoundtripLossless8Falcon)) {
JXLCompressParams cparams = CompressParamsForLossless();
cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 3); // kFalcon
JXLDecompressParams dparams;
dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
PackedPixelFile ppf_out;
EXPECT_EQ(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 230766);
EXPECT_EQ(Roundtrip(t.ppf(), cparams, dparams, &pool, &ppf_out), 230766);
EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
}
@ -1136,8 +1142,8 @@ TEST(JxlTest, RoundtripNoise) {
cparams.AddOption(JXL_ENC_FRAME_SETTING_NOISE, 1);
PackedPixelFile ppf_out;
EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 42345, 750);
EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.35));
EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 41009, 750);
EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.42));
}
TEST(JxlTest, RoundtripLossless8Gray) {
@ -1235,7 +1241,7 @@ TEST(JxlTest, RoundtripAnimationPatches) {
PackedPixelFile ppf_out;
// 40k with no patches, 27k with patch frames encoded multiple times.
EXPECT_THAT(Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out),
IsSlightlyBelow(19252));
IsSlightlyBelow(19300));
EXPECT_EQ(ppf_out.frames.size(), t.ppf().frames.size());
// >10 with broken patches; not all patches are detected on borders.
EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.9));
@ -1468,7 +1474,7 @@ TEST(JxlTest, RoundtripProgressive) {
cparams.AddOption(JXL_ENC_FRAME_SETTING_RESPONSIVE, 1);
PackedPixelFile ppf_out;
EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 71444, 750);
EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 70544, 750);
EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.4));
}
@ -1569,9 +1575,11 @@ TEST_P(JxlTest, LosslessSmallFewColors) {
JXLCompressParams cparams;
cparams.distance = 0;
cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 1);
JXLDecompressParams dparams;
dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
PackedPixelFile ppf_out;
Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out);
Roundtrip(t.ppf(), cparams, dparams, &pool, &ppf_out);
EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
}
@ -1585,6 +1593,7 @@ struct StreamingTestParam {
size_t ysize;
bool is_grey;
int effort;
bool progressive;
size_t num_channels() const { return is_grey ? 1 : 3; }
@ -1594,10 +1603,11 @@ struct StreamingTestParam {
std::vector<StreamingTestParam> params;
for (int e : {1, 3, 4, 7}) {
for (bool g : {false, true}) {
params.push_back(StreamingTestParam{357, 517, g, e});
params.push_back(StreamingTestParam{2247, 2357, g, e});
params.push_back(StreamingTestParam{357, 517, g, e, false});
params.push_back(StreamingTestParam{2247, 2357, g, e, false});
}
}
params.push_back(StreamingTestParam{2247, 2357, false, 1, true});
return params;
}
};
@ -1606,6 +1616,9 @@ std::ostream& operator<<(std::ostream& out, StreamingTestParam p) {
out << (p.is_grey ? "Grey" : "RGB");
out << p.xsize << "x" << p.ysize;
out << "e" << p.effort;
if (p.progressive) {
out << "Progressive";
}
return out;
}
@ -1624,6 +1637,9 @@ TEST_P(JxlStreamingTest, Roundtrip) {
cparams.distance = 0.1;
cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, p.effort);
cparams.AddOption(JXL_ENC_FRAME_SETTING_BUFFERING, 3);
if (p.progressive) {
cparams.AddOption(JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC, 1);
}
ThreadPoolForTests pool(8);
PackedPixelFile ppf_out;
@ -1635,5 +1651,27 @@ JXL_GTEST_INSTANTIATE_TEST_SUITE_P(
JxlStreamingTest, JxlStreamingTest,
testing::ValuesIn(StreamingTestParam::All()));
// This is broken on mingw32, so we only enable it for x86_64 now.
TEST(JxlTest, JXL_X86_64_TEST(StreamingSamePixels)) {
const std::vector<uint8_t> orig = ReadTestData("jxl/flower/flower.png");
jxl::test::TestImage image;
image.DecodeFromBytes(orig);
JXLCompressParams cparams;
cparams.distance = 1.0;
cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 6);
cparams.AddOption(JXL_ENC_FRAME_SETTING_USE_FULL_IMAGE_HEURISTICS, 0);
ThreadPoolForTests pool(8);
PackedPixelFile ppf_out;
Roundtrip(image.ppf(), cparams, {}, &pool, &ppf_out);
cparams.AddOption(JXL_ENC_FRAME_SETTING_BUFFERING, 3);
PackedPixelFile ppf_out_streaming;
Roundtrip(image.ppf(), cparams, {}, &pool, &ppf_out_streaming);
EXPECT_TRUE(jxl::test::SamePixels(ppf_out, ppf_out_streaming));
}
} // namespace
} // namespace jxl

View file

@ -10,11 +10,9 @@
#include <jxl/memory_manager.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h> // memcpy
#include <atomic>
#include <memory>
#include "lib/jxl/base/compiler_specific.h"

View file

@ -67,6 +67,38 @@ inline std::array<uint8_t, 3> PredictorColor(Predictor p) {
};
}
// `cutoffs` must be sorted.
Tree MakeFixedTree(int property, const std::vector<int32_t> &cutoffs,
Predictor pred, size_t num_pixels) {
size_t log_px = CeilLog2Nonzero(num_pixels);
size_t min_gap = 0;
// Reduce fixed tree height when encoding small images.
if (log_px < 14) {
min_gap = 8 * (14 - log_px);
}
Tree tree;
struct NodeInfo {
size_t begin, end, pos;
};
std::queue<NodeInfo> q;
// Leaf IDs will be set by roundtrip decoding the tree.
tree.push_back(PropertyDecisionNode::Leaf(pred));
q.push(NodeInfo{0, cutoffs.size(), 0});
while (!q.empty()) {
NodeInfo info = q.front();
q.pop();
if (info.begin + min_gap >= info.end) continue;
uint32_t split = (info.begin + info.end) / 2;
tree[info.pos] =
PropertyDecisionNode::Split(property, cutoffs[split], tree.size());
q.push(NodeInfo{split + 1, info.end, tree.size()});
tree.push_back(PropertyDecisionNode::Leaf(pred));
q.push(NodeInfo{info.begin, split, tree.size()});
tree.push_back(PropertyDecisionNode::Leaf(pred));
}
return tree;
}
} // namespace
void GatherTreeData(const Image &image, pixel_type chan, size_t group_id,
@ -168,6 +200,83 @@ void GatherTreeData(const Image &image, pixel_type chan, size_t group_id,
}
}
Tree PredefinedTree(ModularOptions::TreeKind tree_kind, size_t total_pixels) {
if (tree_kind == ModularOptions::TreeKind::kJpegTranscodeACMeta ||
tree_kind == ModularOptions::TreeKind::kTrivialTreeNoPredictor) {
// All the data is 0, so no need for a fancy tree.
return {PropertyDecisionNode::Leaf(Predictor::Zero)};
}
if (tree_kind == ModularOptions::TreeKind::kFalconACMeta) {
// All the data is 0 except the quant field. TODO(veluca): make that 0 too.
return {PropertyDecisionNode::Leaf(Predictor::Left)};
}
if (tree_kind == ModularOptions::TreeKind::kACMeta) {
// Small image.
if (total_pixels < 1024) {
return {PropertyDecisionNode::Leaf(Predictor::Left)};
}
Tree tree;
// 0: c > 1
tree.push_back(PropertyDecisionNode::Split(0, 1, 1));
// 1: c > 2
tree.push_back(PropertyDecisionNode::Split(0, 2, 3));
// 2: c > 0
tree.push_back(PropertyDecisionNode::Split(0, 0, 5));
// 3: EPF control field (all 0 or 4), top > 0
tree.push_back(PropertyDecisionNode::Split(6, 0, 21));
// 4: ACS+QF, y > 0
tree.push_back(PropertyDecisionNode::Split(2, 0, 7));
// 5: CfL x
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Gradient));
// 6: CfL b
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Gradient));
// 7: QF: split according to the left quant value.
tree.push_back(PropertyDecisionNode::Split(7, 5, 9));
// 8: ACS: split in 4 segments (8x8 from 0 to 3, large square 4-5, large
// rectangular 6-11, 8x8 12+), according to previous ACS value.
tree.push_back(PropertyDecisionNode::Split(7, 5, 15));
// QF
tree.push_back(PropertyDecisionNode::Split(7, 11, 11));
tree.push_back(PropertyDecisionNode::Split(7, 3, 13));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
// ACS
tree.push_back(PropertyDecisionNode::Split(7, 11, 17));
tree.push_back(PropertyDecisionNode::Split(7, 3, 19));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
// EPF, left > 0
tree.push_back(PropertyDecisionNode::Split(7, 0, 23));
tree.push_back(PropertyDecisionNode::Split(7, 0, 25));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
return tree;
}
if (tree_kind == ModularOptions::TreeKind::kWPFixedDC) {
std::vector<int32_t> cutoffs = {
-500, -392, -255, -191, -127, -95, -63, -47, -31, -23, -15,
-11, -7, -4, -3, -1, 0, 1, 3, 5, 7, 11,
15, 23, 31, 47, 63, 95, 127, 191, 255, 392, 500};
return MakeFixedTree(kWPProp, cutoffs, Predictor::Weighted, total_pixels);
}
if (tree_kind == ModularOptions::TreeKind::kGradientFixedDC) {
std::vector<int32_t> cutoffs = {
-500, -392, -255, -191, -127, -95, -63, -47, -31, -23, -15,
-11, -7, -4, -3, -1, 0, 1, 3, 5, 7, 11,
15, 23, 31, 47, 63, 95, 127, 191, 255, 392, 500};
return MakeFixedTree(kGradientProp, cutoffs, Predictor::Gradient,
total_pixels);
}
JXL_UNREACHABLE("Unreachable");
return {};
}
Tree LearnTree(TreeSamples &&tree_samples, size_t total_pixels,
const ModularOptions &options,
const std::vector<ModularMultiplierInfo> &multiplier_info = {},
@ -494,8 +603,11 @@ Status ModularEncode(const Image &image, const ModularOptions &options,
std::vector<uint8_t> context_map;
std::vector<std::vector<Token>> tree_tokens(1);
tree_storage =
LearnTree(std::move(tree_samples_storage), *total_pixels, options);
options.tree_kind == ModularOptions::TreeKind::kLearn
? LearnTree(std::move(tree_samples_storage), *total_pixels, options)
: PredefinedTree(options.tree_kind, *total_pixels);
tree = &tree_storage;
tokens = &tokens_storage[0];

View file

@ -9,11 +9,12 @@
#include <cstddef>
#include <vector>
#include "lib/jxl/base/status.h"
#include "lib/jxl/enc_ans.h"
#include "lib/jxl/enc_bit_writer.h"
#include "lib/jxl/image.h"
#include "lib/jxl/modular/encoding/dec_ma.h"
#include "lib/jxl/modular/encoding/enc_ma.h"
#include "lib/jxl/modular/modular_image.h"
#include "lib/jxl/modular/options.h"
namespace jxl {
@ -21,6 +22,8 @@ namespace jxl {
struct AuxOut;
struct GroupHeader;
Tree PredefinedTree(ModularOptions::TreeKind tree_kind, size_t total_pixels);
Tree LearnTree(TreeSamples &&tree_samples, size_t total_pixels,
const ModularOptions &options,
const std::vector<ModularMultiplierInfo> &multiplier_info = {},

View file

@ -4,36 +4,50 @@
// license that can be found in the LICENSE file.
#include <jxl/cms.h>
#include <jxl/encode.h>
#include <jxl/types.h>
#include <array>
#include <cstddef>
#include <cstdint>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "lib/extras/codec.h"
#include "lib/extras/dec/jxl.h"
#include "lib/extras/enc/jxl.h"
#include "lib/extras/metrics.h"
#include "lib/extras/packed_image.h"
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/base/override.h"
#include "lib/jxl/base/random.h"
#include "lib/jxl/base/span.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/codec_in_out.h"
#include "lib/jxl/color_encoding_internal.h"
#include "lib/jxl/dec_bit_reader.h"
#include "lib/jxl/enc_aux_out.h"
#include "lib/jxl/enc_bit_writer.h"
#include "lib/jxl/enc_butteraugli_comparator.h"
#include "lib/jxl/enc_cache.h"
#include "lib/jxl/enc_fields.h"
#include "lib/jxl/enc_params.h"
#include "lib/jxl/enc_toc.h"
#include "lib/jxl/fields.h"
#include "lib/jxl/frame_header.h"
#include "lib/jxl/headers.h"
#include "lib/jxl/image.h"
#include "lib/jxl/image_bundle.h"
#include "lib/jxl/image_metadata.h"
#include "lib/jxl/image_ops.h"
#include "lib/jxl/image_test_utils.h"
#include "lib/jxl/modular/encoding/enc_encoding.h"
#include "lib/jxl/modular/encoding/encoding.h"
#include "lib/jxl/modular/encoding/ma_common.h"
#include "lib/jxl/modular/modular_image.h"
#include "lib/jxl/modular/options.h"
#include "lib/jxl/modular/transform/transform.h"
#include "lib/jxl/padded_bytes.h"
#include "lib/jxl/test_image.h"
#include "lib/jxl/test_utils.h"
#include "lib/jxl/testing.h"
@ -42,23 +56,25 @@ namespace {
using test::ReadTestData;
using test::Roundtrip;
using test::TestImage;
void TestLosslessGroups(size_t group_size_shift) {
const std::vector<uint8_t> orig = ReadTestData("jxl/flower/flower.png");
CompressParams cparams;
cparams.SetLossless();
cparams.modular_group_size_shift = group_size_shift;
TestImage t;
t.DecodeFromBytes(orig).ClearMetadata();
t.SetDimensions(t.ppf().xsize() / 4, t.ppf().ysize() / 4);
CodecInOut io_out;
extras::JXLCompressParams cparams;
cparams.distance = 0.0f;
cparams.AddOption(JXL_ENC_FRAME_SETTING_MODULAR_GROUP_SIZE, group_size_shift);
extras::JXLDecompressParams dparams;
dparams.accepted_formats = {{3, JXL_TYPE_UINT16, JXL_LITTLE_ENDIAN, 0}};
CodecInOut io;
ASSERT_TRUE(SetFromBytes(Bytes(orig), &io));
io.ShrinkTo(io.xsize() / 4, io.ysize() / 4);
size_t compressed_size;
JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io_out, _, &compressed_size));
extras::PackedPixelFile ppf_out;
size_t compressed_size =
Roundtrip(t.ppf(), cparams, dparams, nullptr, &ppf_out);
EXPECT_LE(compressed_size, 280000u);
JXL_EXPECT_OK(SamePixels(*io.Main().color(), *io_out.Main().color(), _));
EXPECT_EQ(0.0f, test::ComputeDistance2(t.ppf(), ppf_out));
}
TEST(ModularTest, RoundtripLosslessGroups128) { TestLosslessGroups(0); }
@ -74,24 +90,26 @@ TEST(ModularTest, JXL_TSAN_SLOW_TEST(RoundtripLosslessGroups1024)) {
TEST(ModularTest, RoundtripLosslessCustomWP_PermuteRCT) {
const std::vector<uint8_t> orig =
ReadTestData("external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
CompressParams cparams;
cparams.SetLossless();
TestImage t;
t.DecodeFromBytes(orig).ClearMetadata();
t.SetDimensions(100, 100);
extras::JXLCompressParams cparams;
cparams.distance = 0.0f;
// 9 = permute to GBR, to test the special case of permutation-only
cparams.colorspace = 9;
cparams.AddOption(JXL_ENC_FRAME_SETTING_MODULAR_COLOR_SPACE, 9);
cparams.AddOption(JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR,
static_cast<int64_t>(Predictor::Weighted));
// slowest speed so different WP modes are tried
cparams.speed_tier = SpeedTier::kTortoise;
cparams.options.predictor = {Predictor::Weighted};
cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 9);
extras::JXLDecompressParams dparams;
dparams.accepted_formats = {{3, JXL_TYPE_UINT16, JXL_LITTLE_ENDIAN, 0}};
CodecInOut io_out;
CodecInOut io;
ASSERT_TRUE(SetFromBytes(Bytes(orig), &io));
io.ShrinkTo(100, 100);
size_t compressed_size;
JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io_out, _, &compressed_size));
extras::PackedPixelFile ppf_out;
size_t compressed_size =
Roundtrip(t.ppf(), cparams, dparams, nullptr, &ppf_out);
EXPECT_LE(compressed_size, 10169u);
JXL_EXPECT_OK(SamePixels(*io.Main().color(), *io_out.Main().color(), _));
EXPECT_EQ(0.0f, test::ComputeDistance2(t.ppf(), ppf_out));
}
TEST(ModularTest, RoundtripLossyDeltaPalette) {
@ -231,38 +249,6 @@ TEST(ModularTest, RoundtripExtraProperties) {
}
}
TEST(ModularTest, RoundtripLosslessCustomSqueeze) {
const std::vector<uint8_t> orig =
ReadTestData("external/wesaturate/500px/tmshre_riaphotographs_srgb8.png");
CodecInOut io;
ASSERT_TRUE(SetFromBytes(Bytes(orig), &io));
CompressParams cparams;
cparams.modular_mode = true;
cparams.color_transform = jxl::ColorTransform::kNone;
cparams.butteraugli_distance = 0.f;
cparams.options.predictor = {Predictor::Zero};
cparams.speed_tier = SpeedTier::kThunder;
cparams.responsive = 1;
// Custom squeeze params, atm just for testing
SqueezeParams p;
p.horizontal = true;
p.in_place = false;
p.begin_c = 0;
p.num_c = 3;
cparams.squeezes.push_back(p);
p.begin_c = 1;
p.in_place = true;
p.horizontal = false;
cparams.squeezes.push_back(p);
CodecInOut io2;
size_t compressed_size;
JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, &compressed_size));
EXPECT_LE(compressed_size, 265000u);
JXL_EXPECT_OK(SamePixels(*io.Main().color(), *io2.Main().color(), _));
}
struct RoundtripLosslessConfig {
int bitdepth;
int responsive;

View file

@ -5,12 +5,17 @@
#include <jxl/cms.h>
#include <cstddef>
#include <utility>
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/matrix_ops.h"
#include "lib/jxl/cms/opsin_params.h"
#include "lib/jxl/dec_xyb.h"
#include "lib/jxl/enc_xyb.h"
#include "lib/jxl/image.h"
#include "lib/jxl/image_bundle.h"
#include "lib/jxl/image_metadata.h"
#include "lib/jxl/opsin_params.h"
#include "lib/jxl/testing.h"

View file

@ -5,13 +5,15 @@
#include <jxl/cms.h>
#include <utility>
#include "lib/jxl/base/data_parallel.h"
#include "lib/jxl/codec_in_out.h"
#include "lib/jxl/color_encoding_internal.h"
#include "lib/jxl/dec_xyb.h"
#include "lib/jxl/enc_xyb.h"
#include "lib/jxl/image.h"
#include "lib/jxl/image_bundle.h"
#include "lib/jxl/image_ops.h"
#include "lib/jxl/image_test_utils.h"
#include "lib/jxl/testing.h"

Some files were not shown because too many files have changed in this diff Show more