mirror of
https://github.com/classilla/tenfourfox.git
synced 2025-01-21 04:31:26 +00:00
461 lines
20 KiB
C++
461 lines
20 KiB
C++
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#include "Blur.h"
|
|
|
|
#include <altivec.h>
|
|
#include <string.h>
|
|
|
|
namespace mozilla {
|
|
namespace gfx {
|
|
|
|
MOZ_ALWAYS_INLINE
|
|
vector float Reciprocal(vector float v)
|
|
{
|
|
//Get the reciprocal estimate
|
|
vector float estimate = vec_re(v);
|
|
|
|
//One round of Newton-Raphson refinement
|
|
return vec_madd(vec_nmsub(estimate, v, vec_ctf(vec_splat_u32(1), 0)), estimate, estimate );
|
|
}
|
|
|
|
MOZ_ALWAYS_INLINE
|
|
vector unsigned int DivideArray(vector unsigned int data, vector float aDivisor)
|
|
{
|
|
return vec_ctu(vec_madd(vec_ctf(data, 0), aDivisor, (vector float)vec_splat_u32(0)), 0);
|
|
}
|
|
|
|
MOZ_ALWAYS_INLINE
|
|
uint32_t DivideAndPack(vector unsigned int aValues, vector float aDivisor)
|
|
{
|
|
uint32_t retVal __attribute__((aligned(16)));
|
|
|
|
vector unsigned int temp2 = (vector unsigned int)vec_packsu(vec_packsu(DivideArray(aValues, aDivisor), vec_splat_u32(0)), (vector unsigned short)vec_splat_u32(0));
|
|
|
|
vec_ste(temp2, 0, &retVal);
|
|
|
|
return retVal;
|
|
}
|
|
|
|
MOZ_ALWAYS_INLINE
|
|
void LoadIntegralRowFromRow(uint32_t* aDest, const uint8_t* aSource,
|
|
int32_t aSourceWidth, int32_t aLeftInflation,
|
|
int32_t aRightInflation)
|
|
{
|
|
int32_t currentRowSum = 0;
|
|
|
|
for (int x = 0; x < aLeftInflation; x++) {
|
|
currentRowSum += aSource[0];
|
|
aDest[x] = currentRowSum;
|
|
}
|
|
for (int x = aLeftInflation; x < (aSourceWidth + aLeftInflation); x++) {
|
|
currentRowSum += aSource[(x - aLeftInflation)];
|
|
aDest[x] = currentRowSum;
|
|
}
|
|
for (int x = (aSourceWidth + aLeftInflation); x < (aSourceWidth + aLeftInflation + aRightInflation); x++) {
|
|
currentRowSum += aSource[aSourceWidth - 1];
|
|
aDest[x] = currentRowSum;
|
|
}
|
|
}
|
|
|
|
// This function calculates an integral of four pixels stored in the 4
|
|
// 32-bit integers on aPixels. i.e. for { 30, 50, 80, 100 } this returns
|
|
// { 30, 80, 160, 260 }.
|
|
MOZ_ALWAYS_INLINE
|
|
vector unsigned int AccumulatePixelSums(vector unsigned int aPixels)
|
|
{
|
|
vector unsigned char shiftCounter = vec_sll(vec_splat_u8(8), vec_splat_u8(2));
|
|
vector unsigned int currentPixels = vec_sro(aPixels, shiftCounter);
|
|
vector unsigned int sumPixels = vec_add(aPixels, currentPixels);
|
|
|
|
shiftCounter = vec_sll(shiftCounter, vec_splat_u8(1));
|
|
currentPixels = vec_sro(sumPixels, shiftCounter);
|
|
sumPixels = vec_add(sumPixels, currentPixels);
|
|
|
|
return sumPixels;
|
|
}
|
|
|
|
MOZ_ALWAYS_INLINE
|
|
vector unsigned short AccumulatePixelSums(vector unsigned short aPixels)
|
|
{
|
|
vector unsigned char shiftCounter = vec_sll(vec_splat_u8(8), vec_splat_u8(1));
|
|
vector unsigned short currentPixels = vec_sro(aPixels, shiftCounter);
|
|
vector unsigned short sumPixels = vec_add(aPixels, currentPixels);
|
|
|
|
shiftCounter = vec_sll(shiftCounter, vec_splat_u8(1));
|
|
currentPixels = vec_sro(sumPixels, shiftCounter);
|
|
sumPixels = vec_add(sumPixels, currentPixels);
|
|
|
|
shiftCounter = vec_sll(shiftCounter, vec_splat_u8(1));
|
|
currentPixels = vec_sro(sumPixels, shiftCounter);
|
|
sumPixels = vec_add(sumPixels, currentPixels);
|
|
|
|
return sumPixels;
|
|
}
|
|
|
|
MOZ_ALWAYS_INLINE
|
|
void GenerateIntegralImage_VMX(int32_t aLeftInflation, int32_t aRightInflation,
|
|
int32_t aTopInflation, int32_t aBottomInflation,
|
|
uint32_t* aIntegralImage, size_t aIntegralImageStride,
|
|
uint8_t* &mData, int32_t &mStride, const IntSize &aSize)
|
|
{
|
|
MOZ_ASSERT(!(aLeftInflation & 3));
|
|
|
|
uint8_t* aSource = mData;
|
|
int32_t aSourceStride = mStride;
|
|
|
|
uint32_t stride32bit = aIntegralImageStride / 4;
|
|
|
|
IntSize integralImageSize(aSize.width + aLeftInflation + aRightInflation,
|
|
aSize.height + aTopInflation + aBottomInflation);
|
|
|
|
LoadIntegralRowFromRow(aIntegralImage, aSource, aSize.width, aLeftInflation, aRightInflation);
|
|
|
|
for (int y = 1; y < aTopInflation + 1; y++) {
|
|
uint32_t* intRow = aIntegralImage + (y * stride32bit);
|
|
uint32_t* intPrevRow = aIntegralImage + (y - 1) * stride32bit;
|
|
uint32_t* intFirstRow = aIntegralImage;
|
|
|
|
int x;
|
|
for (x = 0; (x += 16) < (integralImageSize.width * 4); x += 16) {
|
|
vector unsigned int firstRow = vec_ld(x - 16, intFirstRow);
|
|
vector unsigned int previousRow = vec_ld(x - 16, intPrevRow);
|
|
vec_st(vec_add(firstRow, previousRow), x - 16, intRow);
|
|
|
|
firstRow = vec_ld(x, intFirstRow);
|
|
previousRow = vec_ld(x, intPrevRow);
|
|
vec_st(vec_add(firstRow, previousRow), x, intRow);
|
|
}
|
|
if ((x -= 16) < (integralImageSize.width * 4)) {
|
|
vector unsigned int firstRow = vec_ld(x, intFirstRow);
|
|
vector unsigned int previousRow = vec_ld(x, intPrevRow);
|
|
vec_st(vec_add(firstRow, previousRow), x, intRow);
|
|
}
|
|
}
|
|
|
|
for (int y = aTopInflation + 1; y < (aSize.height + aTopInflation); y++) {
|
|
vector unsigned int currentRowSum = vec_splat_u32(0);
|
|
uint32_t* intRow = aIntegralImage + (y * stride32bit);
|
|
uint32_t* intPrevRow = aIntegralImage + (y - 1) * stride32bit;
|
|
uint8_t* sourceRow = aSource + aSourceStride * (y - aTopInflation);
|
|
|
|
volatile uint32_t pixel __attribute__((aligned(16))) = sourceRow[0];
|
|
|
|
int x;
|
|
volatile vector unsigned int sumPixels0 = AccumulatePixelSums(vec_splat(vec_lde(0, &pixel), 0));
|
|
for (x = 0; (x += 16) < (aLeftInflation * 4); x += 16) {
|
|
vector unsigned int sumPixels = vec_add(sumPixels0, currentRowSum);
|
|
|
|
currentRowSum = vec_splat(sumPixels, 3);
|
|
|
|
vec_st(vec_add(sumPixels, vec_ld(x - 16, intPrevRow)), x - 16, intRow);
|
|
|
|
sumPixels = vec_add(sumPixels0, currentRowSum);
|
|
currentRowSum = vec_splat(sumPixels, 3);
|
|
vec_st(vec_add(sumPixels, vec_ld(x, intPrevRow)), x, intRow);
|
|
}
|
|
if ((x -= 16) < (aLeftInflation * 4)) {
|
|
vector unsigned int sumPixels = vec_add(sumPixels0, currentRowSum);
|
|
currentRowSum = vec_splat(sumPixels, 3);
|
|
vec_st(vec_add(sumPixels, vec_ld(x, intPrevRow)), x, intRow);
|
|
}
|
|
|
|
for (x = (aLeftInflation * 4); (x += 48) < ((aSize.width + aLeftInflation) * 4); x += 16) {
|
|
uint64_t pixels[2] __attribute__((aligned(16))) = {*(uint64_t*)(sourceRow + (((x - 48) / 4) - aLeftInflation)),
|
|
*(uint64_t*)(sourceRow + (((x - 16) / 4) - aLeftInflation))};
|
|
vector unsigned char pixelsVector = vec_ld(0, (unsigned char*)&pixels);
|
|
// It's important to shuffle here. When we exit this loop currentRowSum
|
|
// has to be set to sumPixels, so that the following loop can get the
|
|
// correct pixel for the currentRowSum. The highest order pixel in
|
|
// currentRowSum could've originated from accumulation in the stride.
|
|
currentRowSum = vec_splat(currentRowSum, 3);
|
|
|
|
vector unsigned short sumPixelsShort = AccumulatePixelSums((vector unsigned short)vec_mergeh((vector unsigned char)vec_splat_u32(0), pixelsVector));
|
|
vector unsigned int sumPixels = vec_add((vector unsigned int)vec_mergeh((vector unsigned short)vec_splat_u32(0), sumPixelsShort), currentRowSum);
|
|
|
|
vec_st(vec_add(sumPixels, vec_ld(x - 48, intPrevRow)), x - 48, intRow);
|
|
|
|
sumPixels = vec_add((vector unsigned int)vec_mergel((vector unsigned short)vec_splat_u32(0), sumPixelsShort), currentRowSum);
|
|
vec_st(vec_add(sumPixels, vec_ld(x - 32, intPrevRow)), x - 32, intRow);
|
|
|
|
currentRowSum = vec_splat(sumPixels, 3);
|
|
|
|
sumPixelsShort = AccumulatePixelSums((vector unsigned short)vec_mergel((vector unsigned char)vec_splat_u32(0), pixelsVector));
|
|
sumPixels = vec_add((vector unsigned int)vec_mergeh((vector unsigned short)vec_splat_u32(0), sumPixelsShort), currentRowSum);
|
|
|
|
vec_st(vec_add(sumPixels, vec_ld(x - 16, intPrevRow)), x - 16, intRow);
|
|
|
|
sumPixels = vec_add((vector unsigned int)vec_mergel((vector unsigned short)vec_splat_u32(0), sumPixelsShort), currentRowSum);
|
|
currentRowSum = sumPixels;
|
|
vec_st(vec_add(sumPixels, vec_ld(x, intPrevRow)), x, intRow);
|
|
}
|
|
if ((x -= 32) < ((aSize.width + aLeftInflation) * 4)) {
|
|
uint64_t pixels __attribute__((aligned(16))) = *(uint64_t*)(sourceRow + (((x - 16) / 4) - aLeftInflation));
|
|
// It's important to shuffle here. When we exit this loop currentRowSum
|
|
// has to be set to sumPixels, so that the following loop can get the
|
|
// correct pixel for the currentRowSum. The highest order pixel in
|
|
// currentRowSum could've originated from accumulation in the stride.
|
|
currentRowSum = vec_splat(currentRowSum, 3);
|
|
|
|
vector unsigned short sumPixelsShort = AccumulatePixelSums((vector unsigned short)vec_mergeh((vector unsigned char)vec_splat_u32(0), vec_ld(0, (unsigned char*)&pixels)));
|
|
vector unsigned int sumPixels = vec_add((vector unsigned int)vec_mergeh((vector unsigned short)vec_splat_u32(0), sumPixelsShort), currentRowSum);
|
|
|
|
vec_st(vec_add(sumPixels, vec_ld(x - 16, intPrevRow)), x - 16, intRow);
|
|
|
|
sumPixels = vec_add((vector unsigned int)vec_mergel((vector unsigned short)vec_splat_u32(0), sumPixelsShort), currentRowSum);
|
|
currentRowSum = sumPixels;
|
|
vec_st(vec_add(sumPixels, vec_ld(x, intPrevRow)), x, intRow);
|
|
x += 32;
|
|
}
|
|
if ((x -= 16) < ((aSize.width + aLeftInflation) * 4)) {
|
|
uint32_t pixels __attribute__((aligned(16))) = *(uint32_t*)(sourceRow + ((x / 4) - aLeftInflation));
|
|
currentRowSum = vec_splat(currentRowSum, 3);
|
|
vector unsigned int sumPixels = AccumulatePixelSums((vector unsigned int)vec_mergeh((vector unsigned short)vec_splat_u32(0), (vector unsigned short)vec_mergeh((vector unsigned char)vec_splat_u32(0), (vector unsigned char)vec_lde(0, &pixels))));
|
|
sumPixels = vec_add(sumPixels, currentRowSum);
|
|
currentRowSum = sumPixels;
|
|
vec_st(vec_add(sumPixels, vec_ld(x, intPrevRow)), x, intRow);
|
|
}
|
|
|
|
pixel = sourceRow[aSize.width - 1];
|
|
x = (aSize.width + aLeftInflation);
|
|
if ((aSize.width & 3)) {
|
|
// Deal with unaligned portion. Get the correct pixel from currentRowSum,
|
|
// see explanation above.
|
|
volatile uint32_t __attribute__((aligned(16))) intCurrentRowSum = ((uint32_t*)¤tRowSum)[(aSize.width % 4) - 1];
|
|
for (; x < integralImageSize.width; x++) {
|
|
// We could be unaligned here!
|
|
if (!(x & 3)) {
|
|
// aligned!
|
|
currentRowSum = vec_splat(vec_lde(0, &intCurrentRowSum), 0);
|
|
break;
|
|
}
|
|
intCurrentRowSum += pixel;
|
|
intRow[x] = intPrevRow[x] + intCurrentRowSum;
|
|
}
|
|
} else {
|
|
currentRowSum = vec_splat(currentRowSum, 3);
|
|
}
|
|
|
|
sumPixels0 = AccumulatePixelSums(vec_splat(vec_lde(0, &pixel), 0));
|
|
for (x = x * 4; (x += 16) < (integralImageSize.width * 4); x += 16) {
|
|
vector unsigned int sumPixels = vec_add(sumPixels0, currentRowSum);
|
|
|
|
currentRowSum = vec_splat(sumPixels, 3);
|
|
|
|
vec_st(vec_add(sumPixels, vec_ld(x - 16, intPrevRow)), x - 16, intRow);
|
|
|
|
sumPixels = vec_add(sumPixels0, currentRowSum);
|
|
currentRowSum = vec_splat(sumPixels, 3);
|
|
vec_st(vec_add(sumPixels, vec_ld(x, intPrevRow)), x, intRow);
|
|
}
|
|
if ((x -= 16) < (integralImageSize.width * 4)) {
|
|
vector unsigned int sumPixels = vec_add(sumPixels0, currentRowSum);
|
|
currentRowSum = vec_splat(sumPixels, 3);
|
|
vec_st(vec_add(sumPixels, vec_ld(x, intPrevRow)), x, intRow);
|
|
}
|
|
}
|
|
|
|
if (aBottomInflation) {
|
|
uint32_t* intLastRow = aIntegralImage + (integralImageSize.height - 1) * stride32bit;
|
|
// Store the last valid row of our source image in the last row of
|
|
// our integral image. This will be overwritten with the correct values
|
|
// in the upcoming loop.
|
|
LoadIntegralRowFromRow(intLastRow,
|
|
aSource + (aSize.height - 1) * aSourceStride, aSize.width, aLeftInflation, aRightInflation);
|
|
|
|
|
|
for (int y = aSize.height + aTopInflation; y < integralImageSize.height; y++) {
|
|
uint32_t* intRow = aIntegralImage + (y * stride32bit);
|
|
uint32_t* intPrevRow = aIntegralImage + (y - 1) * stride32bit;
|
|
|
|
int x;
|
|
for (x = 0; (x += 16) < (integralImageSize.width * 4); x += 16) {
|
|
vector unsigned int lastRow = vec_ld(x - 16, intLastRow);
|
|
vector unsigned int previousRow = vec_ld(x - 16, intPrevRow);
|
|
vec_st(vec_add(lastRow, previousRow), x - 16, intRow);
|
|
|
|
lastRow = vec_ld(x, intLastRow);
|
|
previousRow = vec_ld(x, intPrevRow);
|
|
vec_st(vec_add(lastRow, previousRow), x, intRow);
|
|
}
|
|
if ((x -= 16) < (integralImageSize.width * 4)) {
|
|
vector unsigned int lastRow = vec_ld(x, intLastRow);
|
|
vector unsigned int previousRow = vec_ld(x, intPrevRow);
|
|
vec_st(vec_add(lastRow, previousRow), x, intRow);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
MOZ_ALWAYS_INLINE
|
|
void loop(int32_t startIdx, int32_t endIdx,
|
|
uint32_t* topLeftBase, uint32_t* topRightBase, uint32_t* bottomRightBase, uint32_t* bottomLeftBase,
|
|
uint8_t *data, int32_t stride,
|
|
vector float reciprocal,
|
|
int32_t y)
|
|
{
|
|
int topLeftIndex = reinterpret_cast<uint32_t>(topLeftBase + startIdx) & 0xf ? 0 : 1;
|
|
int topRightIndex = reinterpret_cast<uint32_t>(topRightBase + startIdx) & 0xf ? 0 : 1;
|
|
int bottomRightIndex = reinterpret_cast<uint32_t>(bottomRightBase + startIdx) & 0xf ? 0 : 1;
|
|
int bottomLeftIndex = reinterpret_cast<uint32_t>(bottomLeftBase + startIdx) & 0xf ? 0 : 1;
|
|
|
|
vector unsigned char topLeftMask = vec_lvsl(0, reinterpret_cast<unsigned char*>(topLeftBase + startIdx));
|
|
vector unsigned int topLeftVector1 = vec_ld(0, topLeftBase + startIdx);
|
|
vector unsigned int topLeftVector2;
|
|
|
|
vector unsigned char topRightMask = vec_lvsl(0, reinterpret_cast<unsigned char*>(topRightBase + startIdx));
|
|
vector unsigned int topRightVector1 = vec_ld(0, topRightBase + startIdx);
|
|
vector unsigned int topRightVector2;
|
|
|
|
vector unsigned char bottomRightMask = vec_lvsl(0, reinterpret_cast<unsigned char*>(bottomRightBase + startIdx));
|
|
vector unsigned int bottomRightVector1 = vec_ld(0, bottomRightBase + startIdx);
|
|
vector unsigned int bottomRightVector2;
|
|
|
|
vector unsigned char bottomLeftMask = vec_lvsl(0, reinterpret_cast<unsigned char*>(bottomLeftBase + startIdx));
|
|
vector unsigned int bottomLeftVector1 = vec_ld(0, bottomLeftBase + startIdx);
|
|
vector unsigned int bottomLeftVector2;
|
|
|
|
int32_t x;
|
|
for (x = startIdx; (x += 4) < endIdx; x += 4) {
|
|
// Safe to use with aligned and unaligned addresses
|
|
#define LoadUnaligned(index, target, MSQ, LSQ , mask) \
|
|
({ \
|
|
LSQ = vec_ld(index + 15, target); \
|
|
vec_perm(MSQ, LSQ, mask); \
|
|
})
|
|
|
|
vector unsigned int topLeft = LoadUnaligned(topLeftIndex, topLeftBase + x - 4, topLeftVector1, topLeftVector2, topLeftMask);
|
|
topLeftVector1 = topLeftVector2;
|
|
|
|
vector unsigned int topRight = LoadUnaligned(topRightIndex, topRightBase + x - 4, topRightVector1, topRightVector2, topRightMask);
|
|
topRightVector1 = topRightVector2;
|
|
|
|
vector unsigned int bottomRight = LoadUnaligned(bottomRightIndex, bottomRightBase + x - 4, bottomRightVector1, bottomRightVector2, bottomRightMask);
|
|
bottomRightVector1 = bottomRightVector2;
|
|
|
|
vector unsigned int bottomLeft = LoadUnaligned(bottomLeftIndex, bottomLeftBase + x - 4, bottomLeftVector1, bottomLeftVector2, bottomLeftMask);
|
|
bottomLeftVector1 = bottomLeftVector2;
|
|
|
|
vector unsigned int values = vec_add(vec_sub(vec_sub(bottomRight, topRight), bottomLeft), topLeft);
|
|
|
|
*(uint32_t*)(data + stride * y + x - 4) = DivideAndPack(values, reciprocal);
|
|
|
|
topLeft = LoadUnaligned(topLeftIndex, topLeftBase + x, topLeftVector1, topLeftVector2, topLeftMask);
|
|
topLeftVector1 = topLeftVector2;
|
|
|
|
topRight = LoadUnaligned(topRightIndex, topRightBase + x, topRightVector1, topRightVector2, topRightMask);
|
|
topRightVector1 = topRightVector2;
|
|
|
|
bottomRight = LoadUnaligned(bottomRightIndex, bottomRightBase + x, bottomRightVector1, bottomRightVector2, bottomRightMask);
|
|
bottomRightVector1 = bottomRightVector2;
|
|
|
|
bottomLeft = LoadUnaligned(bottomLeftIndex, bottomLeftBase + x, bottomLeftVector1, bottomLeftVector2, bottomLeftMask);
|
|
bottomLeftVector1 = bottomLeftVector2;
|
|
|
|
values = vec_add(vec_sub(vec_sub(bottomRight, topRight), bottomLeft), topLeft);
|
|
|
|
*(uint32_t*)(data + stride * y + x) = DivideAndPack(values, reciprocal);
|
|
}
|
|
if ((x -= 4) < endIdx) {
|
|
vector unsigned int topLeft = LoadUnaligned(topLeftIndex, topLeftBase + x, topLeftVector1, topLeftVector2, topLeftMask);
|
|
topLeftVector1 = topLeftVector2;
|
|
|
|
vector unsigned int topRight = LoadUnaligned(topRightIndex, topRightBase + x, topRightVector1, topRightVector2, topRightMask);
|
|
topRightVector1 = topRightVector2;
|
|
|
|
vector unsigned int bottomRight = LoadUnaligned(bottomRightIndex, bottomRightBase + x, bottomRightVector1, bottomRightVector2, bottomRightMask);
|
|
bottomRightVector1 = bottomRightVector2;
|
|
|
|
vector unsigned int bottomLeft = LoadUnaligned(bottomLeftIndex, bottomLeftBase + x, bottomLeftVector1, bottomLeftVector2, bottomLeftMask);
|
|
bottomLeftVector1 = bottomLeftVector2;
|
|
|
|
vector unsigned int values = vec_add(vec_sub(vec_sub(bottomRight, topRight), bottomLeft), topLeft);
|
|
|
|
*(uint32_t*)(data + stride * y + x) = DivideAndPack(values, reciprocal);
|
|
}
|
|
}
|
|
|
|
MOZ_ALWAYS_INLINE
|
|
void Blur_VMX(int32_t aLeftLobe, int32_t aRightLobe,
|
|
int32_t aTopLobe, int32_t aBottomLobe,
|
|
uint32_t* aIntegralImage, size_t aIntegralImageStride,
|
|
uint8_t* &aData, int32_t &mStride, const IntSize &size,
|
|
uint32_t* boxSize, int32_t leftInflation,
|
|
IntRect &mSkipRect)
|
|
{
|
|
// Storing these locally makes this about 30% faster! Presumably the compiler
|
|
// can't be sure we're not altering the member variables in this loop.
|
|
IntRect skipRect = mSkipRect;
|
|
uint8_t *data = aData;
|
|
int32_t stride = mStride;
|
|
|
|
uint32_t stride32bit = aIntegralImageStride / 4;
|
|
|
|
// This points to the start of the rectangle within the IntegralImage that overlaps
|
|
// the surface being blurred.
|
|
uint32_t* innerIntegral = aIntegralImage + (aTopLobe * stride32bit) + leftInflation;
|
|
|
|
vector unsigned int divisor = vec_splat(vec_lde(0, boxSize), 0);
|
|
vector float reciprocal = vec_re(vec_ctf(divisor, 0));
|
|
|
|
for (int32_t y = 0; y < size.height; y++) {
|
|
bool inSkipRectY = y > skipRect.y && y < skipRect.YMost();
|
|
|
|
uint32_t* topLeftBase = innerIntegral + ((y - aTopLobe) * ptrdiff_t(stride32bit) - aLeftLobe);
|
|
uint32_t* topRightBase = innerIntegral + ((y - aTopLobe) * ptrdiff_t(stride32bit) + aRightLobe);
|
|
uint32_t* bottomRightBase = innerIntegral + ((y + aBottomLobe) * ptrdiff_t(stride32bit) + aRightLobe);
|
|
uint32_t* bottomLeftBase = innerIntegral + ((y + aBottomLobe) * ptrdiff_t(stride32bit) - aLeftLobe);
|
|
|
|
if (inSkipRectY) {
|
|
loop(0, skipRect.x + 4, topLeftBase, topRightBase, bottomRightBase, bottomLeftBase, data, stride, reciprocal,y);
|
|
loop(skipRect.XMost(), size.width, topLeftBase, topRightBase, bottomRightBase, bottomLeftBase, data, stride, reciprocal, y);
|
|
} else {
|
|
loop(0, size.width, topLeftBase, topRightBase, bottomRightBase, bottomLeftBase, data, stride, reciprocal, y);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Attempt to do an in-place box blur using an integral image.
|
|
*/
|
|
void
|
|
AlphaBoxBlur::BoxBlur_VMX(uint8_t* aData,
|
|
int32_t aLeftLobe,
|
|
int32_t aRightLobe,
|
|
int32_t aTopLobe,
|
|
int32_t aBottomLobe,
|
|
uint32_t* aIntegralImage,
|
|
size_t aIntegralImageStride)
|
|
{
|
|
IntSize size = GetSize();
|
|
|
|
MOZ_ASSERT(size.height > 0);
|
|
|
|
// Our 'left' or 'top' lobe will include the current pixel. i.e. when
|
|
// looking at an integral image the value of a pixel at 'x,y' is calculated
|
|
// using the value of the integral image values above/below that.
|
|
aLeftLobe++;
|
|
aTopLobe++;
|
|
uint32_t boxSize __attribute__((aligned(16))) = (aLeftLobe + aRightLobe) * (aTopLobe + aBottomLobe);
|
|
|
|
MOZ_ASSERT(static_cast<int32_t>(boxSize) > 0);
|
|
|
|
if (boxSize == 1) {
|
|
return;
|
|
}
|
|
|
|
int32_t leftInflation = RoundUpToMultipleOf4(aLeftLobe).value();
|
|
|
|
GenerateIntegralImage_VMX(leftInflation, aRightLobe, aTopLobe, aBottomLobe,
|
|
aIntegralImage, aIntegralImageStride, aData,
|
|
mStride, size);
|
|
|
|
Blur_VMX(aLeftLobe, aRightLobe, aTopLobe, aBottomLobe,
|
|
aIntegralImage, aIntegralImageStride, aData,
|
|
mStride, size, &boxSize, leftInflation, mSkipRect);
|
|
}
|
|
|
|
}
|
|
}
|