mirror of
				https://github.com/mozilla/gecko-dev.git
				synced 2025-11-04 02:09:05 +02:00 
			
		
		
		
	Depends on D196459 Differential Revision: https://phabricator.services.mozilla.com/D196860
		
			
				
	
	
		
			129 lines
		
	
	
	
		
			4.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			129 lines
		
	
	
	
		
			4.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*-
 | 
						|
 * This Source Code Form is subject to the terms of the Mozilla Public
 | 
						|
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 | 
						|
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 | 
						|
#ifndef _GFXALPHARECOVERY_GENERIC_H_
 | 
						|
#define _GFXALPHARECOVERY_GENERIC_H_
 | 
						|
 | 
						|
#include "gfxAlphaRecovery.h"
 | 
						|
#include "gfxImageSurface.h"
 | 
						|
#include "nsDebug.h"
 | 
						|
#include <xsimd/xsimd.hpp>
 | 
						|
 | 
						|
template <typename Arch>
 | 
						|
bool gfxAlphaRecovery::RecoverAlphaGeneric(gfxImageSurface* blackSurf,
 | 
						|
                                           const gfxImageSurface* whiteSurf) {
 | 
						|
  mozilla::gfx::IntSize size = blackSurf->GetSize();
 | 
						|
 | 
						|
  if (size != whiteSurf->GetSize() ||
 | 
						|
      (blackSurf->Format() != mozilla::gfx::SurfaceFormat::A8R8G8B8_UINT32 &&
 | 
						|
       blackSurf->Format() != mozilla::gfx::SurfaceFormat::X8R8G8B8_UINT32) ||
 | 
						|
      (whiteSurf->Format() != mozilla::gfx::SurfaceFormat::A8R8G8B8_UINT32 &&
 | 
						|
       whiteSurf->Format() != mozilla::gfx::SurfaceFormat::X8R8G8B8_UINT32))
 | 
						|
    return false;
 | 
						|
 | 
						|
  blackSurf->Flush();
 | 
						|
  whiteSurf->Flush();
 | 
						|
 | 
						|
  unsigned char* blackData = blackSurf->Data();
 | 
						|
  unsigned char* whiteData = whiteSurf->Data();
 | 
						|
 | 
						|
  if ((NS_PTR_TO_UINT32(blackData) & 0xf) !=
 | 
						|
          (NS_PTR_TO_UINT32(whiteData) & 0xf) ||
 | 
						|
      (blackSurf->Stride() - whiteSurf->Stride()) & 0xf) {
 | 
						|
    // Cannot keep these in alignment.
 | 
						|
    return false;
 | 
						|
  }
 | 
						|
 | 
						|
  alignas(Arch::alignment()) static const uint8_t greenMaski[] = {
 | 
						|
      0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
 | 
						|
      0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
 | 
						|
  };
 | 
						|
  alignas(Arch::alignment()) static const uint8_t alphaMaski[] = {
 | 
						|
      0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
 | 
						|
      0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
 | 
						|
  };
 | 
						|
 | 
						|
  using batch_type = xsimd::batch<uint8_t, Arch>;
 | 
						|
  constexpr size_t batch_size = batch_type::size;
 | 
						|
  static_assert(batch_size == 16);
 | 
						|
 | 
						|
  batch_type greenMask = batch_type::load_aligned(greenMaski);
 | 
						|
  batch_type alphaMask = batch_type::load_aligned(alphaMaski);
 | 
						|
 | 
						|
  for (int32_t i = 0; i < size.height; ++i) {
 | 
						|
    int32_t j = 0;
 | 
						|
    // Loop single pixels until at 4 byte alignment.
 | 
						|
    while (NS_PTR_TO_UINT32(blackData) & 0xf && j < size.width) {
 | 
						|
      *((uint32_t*)blackData) =
 | 
						|
          RecoverPixel(*reinterpret_cast<uint32_t*>(blackData),
 | 
						|
                       *reinterpret_cast<uint32_t*>(whiteData));
 | 
						|
      blackData += 4;
 | 
						|
      whiteData += 4;
 | 
						|
      j++;
 | 
						|
    }
 | 
						|
    // This extra loop allows the compiler to do some more clever registry
 | 
						|
    // management and makes it about 5% faster than with only the 4 pixel
 | 
						|
    // at a time loop.
 | 
						|
    for (; j < size.width - 8; j += 8) {
 | 
						|
      auto black1 = batch_type::load_aligned(blackData);
 | 
						|
      auto white1 = batch_type::load_aligned(whiteData);
 | 
						|
      auto black2 = batch_type::load_aligned(blackData + batch_size);
 | 
						|
      auto white2 = batch_type::load_aligned(whiteData + batch_size);
 | 
						|
 | 
						|
      // Execute the same instructions as described in RecoverPixel, only
 | 
						|
      // using an SSE2 packed saturated subtract.
 | 
						|
      white1 = xsimd::ssub(white1, black1);
 | 
						|
      white2 = xsimd::ssub(white2, black2);
 | 
						|
      white1 = xsimd::ssub(greenMask, white1);
 | 
						|
      white2 = xsimd::ssub(greenMask, white2);
 | 
						|
      // Producing the final black pixel in an XMM register and storing
 | 
						|
      // that is actually faster than doing a masked store since that
 | 
						|
      // does an unaligned storage. We have the black pixel in a register
 | 
						|
      // anyway.
 | 
						|
      black1 = xsimd::bitwise_andnot(black1, alphaMask);
 | 
						|
      black2 = xsimd::bitwise_andnot(black2, alphaMask);
 | 
						|
      white1 = xsimd::slide_left<2>(white1);
 | 
						|
      white2 = xsimd::slide_left<2>(white2);
 | 
						|
      white1 &= alphaMask;
 | 
						|
      white2 &= alphaMask;
 | 
						|
      black1 |= white1;
 | 
						|
      black2 |= white2;
 | 
						|
 | 
						|
      black1.store_aligned(blackData);
 | 
						|
      black2.store_aligned(blackData + batch_size);
 | 
						|
      blackData += 2 * batch_size;
 | 
						|
      whiteData += 2 * batch_size;
 | 
						|
    }
 | 
						|
    for (; j < size.width - 4; j += 4) {
 | 
						|
      auto black = batch_type::load_aligned(blackData);
 | 
						|
      auto white = batch_type::load_aligned(whiteData);
 | 
						|
 | 
						|
      white = xsimd::ssub(white, black);
 | 
						|
      white = xsimd::ssub(greenMask, white);
 | 
						|
      black = xsimd::bitwise_andnot(black, alphaMask);
 | 
						|
      white = xsimd::slide_left<2>(white);
 | 
						|
      white &= alphaMask;
 | 
						|
      black |= white;
 | 
						|
      black.store_aligned(blackData);
 | 
						|
      blackData += batch_size;
 | 
						|
      whiteData += batch_size;
 | 
						|
    }
 | 
						|
    // Loop single pixels until we're done.
 | 
						|
    while (j < size.width) {
 | 
						|
      *((uint32_t*)blackData) =
 | 
						|
          RecoverPixel(*reinterpret_cast<uint32_t*>(blackData),
 | 
						|
                       *reinterpret_cast<uint32_t*>(whiteData));
 | 
						|
      blackData += 4;
 | 
						|
      whiteData += 4;
 | 
						|
      j++;
 | 
						|
    }
 | 
						|
    blackData += blackSurf->Stride() - j * 4;
 | 
						|
    whiteData += whiteSurf->Stride() - j * 4;
 | 
						|
  }
 | 
						|
 | 
						|
  blackSurf->MarkDirty();
 | 
						|
 | 
						|
  return true;
 | 
						|
}
 | 
						|
#endif
 |