mirror of
https://github.com/classilla/tenfourfox.git
synced 2025-02-15 03:31:24 +00:00
#324: complete intra predictors for h/v/dc/128
This commit is contained in:
parent
f6c2519506
commit
7bfd387acb
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018 Cameron Kaiser and Contributors to TenFourFox
|
||||
* Copyright (c) 2017 Cameron Kaiser and Contributors to TenFourFox
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
@ -21,10 +21,12 @@
|
||||
|
||||
// v = vector, s = *uint32_t, vv = temporary vector (unsigned int)
|
||||
|
||||
// Basic notion.
|
||||
// Basic notion. Guaranteed to work at any offset.
|
||||
#define _unaligned_load128(v,s) { v=vec_perm(vec_ld(0,s),vec_ld(16,s),vec_lvsl(0,s)); }
|
||||
|
||||
// Equivalent for _mm_cvtsi32_si128. (Upper 96 bits undefined.)
|
||||
// Equivalent for _mm_cvtsi32_si128. (Upper 96 bits undefined.) However,
|
||||
// this may have issues loading at really weird addresses if they're not
|
||||
// minimally word-aligned.
|
||||
#define _unaligned_load32(v,s) { v=vec_lde(0,s); v=vec_perm(v,v,vec_lvsl(0,s)); }
|
||||
// Equivalent for _mm_cvtsi128_si32.
|
||||
#define _unaligned_store32(v,vv,s) { vv=vec_splat((vector unsigned int)v,0); vec_ste(vv,0,s); }
|
||||
@ -36,6 +38,8 @@
|
||||
vv = vec_splat((vector unsigned int)v, 1); vec_ste(vv,4,s);\
|
||||
}
|
||||
|
||||
/* 4x4 */
|
||||
|
||||
void vp9_dc_predictor_4x4_vmx(uint8_t *dst, ptrdiff_t y_stride,
|
||||
const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
@ -71,12 +75,160 @@ void vp9_dc_predictor_4x4_vmx(uint8_t *dst, ptrdiff_t y_stride,
|
||||
s1 = vec_splat(s0, 1);
|
||||
c0 = vec_packsu(s1, s1);
|
||||
|
||||
_unaligned_store32(c0, m0, (uint32_t *)dst);
|
||||
_unaligned_store32(c0, m0, (uint32_t *)(dst + y_stride));
|
||||
_unaligned_store32(c0, m0, (uint32_t *)(dst + y_stride + y_stride));
|
||||
_unaligned_store32(c0, m0, (uint32_t *)(dst + y_stride + y_stride + y_stride));
|
||||
// This is faster than a whole bunch of _unaligned_store32s because we
|
||||
// already splatted the vector, so it's the same at all positions.
|
||||
vec_ste((vector unsigned int)c0, 0, (uint32_t *)dst);
|
||||
vec_ste((vector unsigned int)c0, y_stride, (uint32_t *)dst);
|
||||
vec_ste((vector unsigned int)c0, y_stride + y_stride, (uint32_t *)dst);
|
||||
vec_ste((vector unsigned int)c0, y_stride + y_stride + y_stride, (uint32_t *)dst);
|
||||
}
|
||||
|
||||
inline void _common_top_or_left_4x4_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *what)
|
||||
{
|
||||
// Similar idea to the standard predictor, but one column only.
|
||||
vector unsigned int m0, m1, m2, m3;
|
||||
vector unsigned short s0, s1;
|
||||
vector unsigned char c0;
|
||||
|
||||
vector unsigned int vzero = vec_splat_u32(0);
|
||||
vector unsigned int vtwo = vec_splat_u32(2);
|
||||
|
||||
_unaligned_load32(m1, (uint32_t *)what);
|
||||
// Interpolate zero to clear out the upper bits so that we get a
|
||||
// proper cross-sum over the full range.
|
||||
m0 = vec_mergeh(m1, vzero);
|
||||
m2 = vec_sum4s((vector unsigned char)m0, vzero);
|
||||
m3 = (vector unsigned int)vec_sum2s((vector signed int)m2, (vector signed int)vzero);
|
||||
m0 = vec_add(m3, vtwo);
|
||||
m1 = vec_sra(m0, vtwo);
|
||||
|
||||
s0 = vec_packsu(m1, m1);
|
||||
s1 = vec_splat(s0, 1);
|
||||
c0 = vec_packsu(s1, s1);
|
||||
|
||||
vec_ste((vector unsigned int)c0, 0, (uint32_t *)dst);
|
||||
vec_ste((vector unsigned int)c0, y_stride, (uint32_t *)dst);
|
||||
vec_ste((vector unsigned int)c0, y_stride + y_stride, (uint32_t *)dst);
|
||||
vec_ste((vector unsigned int)c0, y_stride + y_stride + y_stride, (uint32_t *)dst);
|
||||
}
|
||||
|
||||
void vp9_dc_top_predictor_4x4_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
_common_top_or_left_4x4_vmx(dst, y_stride, above);
|
||||
}
|
||||
void vp9_dc_left_predictor_4x4_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
_common_top_or_left_4x4_vmx(dst, y_stride, left);
|
||||
}
|
||||
|
||||
void vp9_dc_128_predictor_4x4_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
// Pretty much just blit 128s.
|
||||
// Splatting to a char vector doesn't store properly, so splat
|
||||
// to int.
|
||||
vector unsigned int m0 = (vector unsigned int)vec_sl(vec_splat_u8(2), vec_splat_u8(6));
|
||||
vec_ste(m0, 0, (uint32_t *)dst);
|
||||
vec_ste(m0, y_stride, (uint32_t *)dst);
|
||||
vec_ste(m0, y_stride + y_stride, (uint32_t *)dst);
|
||||
vec_ste(m0, y_stride + y_stride + y_stride, (uint32_t *)dst);
|
||||
}
|
||||
|
||||
void vp9_v_predictor_4x4_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
// Pretty much just copy.
|
||||
|
||||
vector unsigned int m0, m1;
|
||||
_unaligned_load32(m1, (uint32_t *)above);
|
||||
m0 = vec_splat(m1, 0);
|
||||
vec_ste(m0, 0, (uint32_t *)dst);
|
||||
vec_ste(m0, y_stride, (uint32_t *)dst);
|
||||
vec_ste(m0, y_stride + y_stride, (uint32_t *)dst);
|
||||
vec_ste(m0, y_stride + y_stride + y_stride, (uint32_t *)dst);
|
||||
}
|
||||
|
||||
void vp9_h_predictor_4x4_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
// Expand sequence
|
||||
// aa bb cc dd -- -- -- -- -- ....
|
||||
// to
|
||||
// aa aa aa aa bb bb bb bb cc ....
|
||||
// This can be done with just splats.
|
||||
|
||||
vector unsigned char c0;
|
||||
vector unsigned int m0, m1, m2, m3;
|
||||
vector unsigned char vzero = vec_splat_u8(0);
|
||||
|
||||
_unaligned_load32(c0, (uint32_t *)left);
|
||||
|
||||
m0 = (vector unsigned int)vec_splat(c0, 0);
|
||||
m1 = (vector unsigned int)vec_splat(c0, 1);
|
||||
vec_ste(m0, 0, (uint32_t *)dst);
|
||||
m2 = (vector unsigned int)vec_splat(c0, 2);
|
||||
vec_ste(m1, y_stride, (uint32_t *)dst);
|
||||
m3 = (vector unsigned int)vec_splat(c0, 3);
|
||||
vec_ste(m2, y_stride + y_stride, (uint32_t *)dst);
|
||||
vec_ste(m3, y_stride + y_stride + y_stride, (uint32_t *)dst);
|
||||
}
|
||||
|
||||
#if(0)
|
||||
// This doesn't work properly, and the large amount of unaligned
|
||||
// memory access in the True Motion predictors makes them a poor
|
||||
// fit for AltiVec.
|
||||
void vp9_tm_predictor_4x4_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
// Get the last top value from above - 1 and splat it ("tl").
|
||||
// Using that preceding byte, compute t[i]-tl+l[i] for the
|
||||
// appropriate block size where t = top and l = left with our
|
||||
// tl vector above.
|
||||
|
||||
vector unsigned char c0, c1, c2, c3, c4;
|
||||
vector unsigned short s0, s1, s2, s3, s4, s5, s6, tl;
|
||||
vector unsigned int m0;
|
||||
vector unsigned char vzero = vec_splat_u8(0);
|
||||
ptrdiff_t offs = 0;
|
||||
|
||||
// This can load at really weird addresses, so our
|
||||
// faster unaligned load32 macro is not sufficient.
|
||||
_unaligned_load128(c0, (uint32_t *)(above - 1));
|
||||
c1 = vec_splat(c0, 0);
|
||||
tl = vec_mergeh(vzero, c1);
|
||||
|
||||
// Expand t to short and subtract tl.
|
||||
_unaligned_load128(c0, (uint32_t *)above);
|
||||
s1 = vec_mergeh(vzero, c0);
|
||||
s0 = vec_sub(s1, tl);
|
||||
|
||||
#define TM_2X2(x) \
|
||||
_unaligned_load128(c2, (uint32_t *)(left + 4 - x)); \
|
||||
_unaligned_load128(c3, (uint32_t *)(left + 5 - x)); \
|
||||
s2 = vec_mergeh(vzero, c2); \
|
||||
s3 = vec_mergeh(vzero, c3); \
|
||||
s4 = vec_splat(s2, 0); \
|
||||
s5 = vec_splat(s3, 0); \
|
||||
s2 = vec_add(s0, s4); \
|
||||
s3 = vec_add(s0, s5); \
|
||||
c2 = vec_packsu(s2, s2); \
|
||||
c3 = vec_packsu(s3, s3); \
|
||||
c4 = vec_perm(c2, c2, vec_lvsr(0, (uint32_t *)(dst + offs))); \
|
||||
vec_ste(c4, 0, (dst + offs)); \
|
||||
vec_ste(c4, 1, (dst + offs)); \
|
||||
vec_ste(c4, 2, (dst + offs)); \
|
||||
vec_ste(c4, 3, (dst + offs)); \
|
||||
c4 = vec_perm(c3, c3, vec_lvsr(0, (uint32_t *)(dst + offs + y_stride))); \
|
||||
vec_ste(c4, 0, (dst + offs + y_stride)); \
|
||||
vec_ste(c4, 1, (dst + offs + y_stride)); \
|
||||
vec_ste(c4, 2, (dst + offs + y_stride)); \
|
||||
vec_ste(c4, 3, (dst + offs + y_stride)); \
|
||||
offs += y_stride + y_stride;
|
||||
|
||||
TM_2X2(4)
|
||||
TM_2X2(2)
|
||||
#undef TM_2X2
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 8x8 */
|
||||
|
||||
void vp9_dc_predictor_8x8_vmx(uint8_t *dst, ptrdiff_t y_stride,
|
||||
const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
@ -109,7 +261,9 @@ void vp9_dc_predictor_8x8_vmx(uint8_t *dst, ptrdiff_t y_stride,
|
||||
s1 = vec_splat(s0, 1);
|
||||
c0 = vec_packsu(s1, s1);
|
||||
|
||||
#define NEXT _unaligned_store64(c0, m0, (uint32_t *)(dst+offs)); offs+=y_stride;
|
||||
// Again, faster than repeated _unaligned_store64s since we
|
||||
// already splatted.
|
||||
#define NEXT vec_ste((vector unsigned int)c0, offs, (uint32_t *)dst); vec_ste((vector unsigned int)c0, 4+offs, (uint32_t *)dst); offs+=y_stride;
|
||||
|
||||
NEXT
|
||||
NEXT
|
||||
@ -123,6 +277,133 @@ void vp9_dc_predictor_8x8_vmx(uint8_t *dst, ptrdiff_t y_stride,
|
||||
#undef NEXT
|
||||
}
|
||||
|
||||
inline void _common_top_or_left_8x8_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *what)
|
||||
{
|
||||
// Again, single column variant.
|
||||
vector unsigned int m0, m1, m2, m3, m4;
|
||||
vector unsigned short s0, s1;
|
||||
vector unsigned char c0;
|
||||
ptrdiff_t offs = 0;
|
||||
|
||||
vector unsigned int vzero = vec_splat_u32(0);
|
||||
vector unsigned int vfour = vec_splat_u32(4);
|
||||
vector unsigned int vthree = vec_splat_u32(3);
|
||||
|
||||
_unaligned_load64(m0, (uint32_t *)what);
|
||||
|
||||
// Since all these functions from here load at least 64 bits, we don't need
|
||||
// to interpolate zero anymore to clear out the other half for the SAD.
|
||||
m1 = vec_sum4s((vector unsigned char)m0, vzero);
|
||||
m2 = (vector unsigned int)vec_sum2s((vector signed int)m1, (vector signed int)vzero);
|
||||
m3 = vec_adds(m2, vfour);
|
||||
m4 = vec_sra(m3, vthree);
|
||||
|
||||
s0 = vec_packsu(m4, m4);
|
||||
s1 = vec_splat(s0, 1);
|
||||
c0 = vec_packsu(s1, s1);
|
||||
|
||||
#define NEXT vec_ste((vector unsigned int)c0, offs, (uint32_t *)dst); vec_ste((vector unsigned int)c0, 4+offs, (uint32_t *)dst); offs+=y_stride;
|
||||
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
|
||||
#undef NEXT
|
||||
}
|
||||
|
||||
void vp9_dc_top_predictor_8x8_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
_common_top_or_left_8x8_vmx(dst, y_stride, above);
|
||||
}
|
||||
void vp9_dc_left_predictor_8x8_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
_common_top_or_left_8x8_vmx(dst, y_stride, left);
|
||||
}
|
||||
|
||||
void vp9_dc_128_predictor_8x8_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
// Yup, blitting 128s again.
|
||||
vector unsigned int m0 = (vector unsigned int)vec_sl(vec_splat_u8(2), vec_splat_u8(6));
|
||||
ptrdiff_t offs = 0;
|
||||
|
||||
#define NEXT vec_ste(m0, offs, (uint32_t *)dst); vec_ste(m0, 4+offs, (uint32_t *)dst); offs+=y_stride;
|
||||
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
|
||||
#undef NEXT
|
||||
}
|
||||
|
||||
void vp9_v_predictor_8x8_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
// Yup, copying again.
|
||||
vector unsigned int m0, m1;
|
||||
ptrdiff_t offs = 0;
|
||||
|
||||
_unaligned_load64(m1, (uint32_t *)above);
|
||||
m0 = vec_splat(m1, 0);
|
||||
m1 = vec_splat(m1, 1);
|
||||
|
||||
#define NEXT vec_ste(m0, offs, (uint32_t *)dst); vec_ste(m1, 4+offs, (uint32_t *)dst); offs+=y_stride;
|
||||
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
|
||||
#undef NEXT
|
||||
}
|
||||
|
||||
void vp9_h_predictor_8x8_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
// Expand sequence
|
||||
// aa bb cc dd ee ff gg hh -- -- -- -- -- ....
|
||||
// to
|
||||
// aa aa aa aa bb bb bb bb cc cc cc cc dd ....
|
||||
|
||||
vector unsigned char c0;
|
||||
vector unsigned int m0, m1, m2;
|
||||
ptrdiff_t offs = 0;
|
||||
|
||||
_unaligned_load64(c0, (uint32_t *)left);
|
||||
|
||||
#define STORE(x) vec_ste(x, offs, (uint32_t *)dst); vec_ste(x, 4+offs, (uint32_t *)dst); offs+=y_stride;
|
||||
m0 = (vector unsigned int)vec_splat(c0, 0);
|
||||
m1 = (vector unsigned int)vec_splat(c0, 1);
|
||||
STORE(m0)
|
||||
m2 = (vector unsigned int)vec_splat(c0, 2);
|
||||
STORE(m1)
|
||||
m0 = (vector unsigned int)vec_splat(c0, 3);
|
||||
STORE(m2)
|
||||
m1 = (vector unsigned int)vec_splat(c0, 4);
|
||||
STORE(m0)
|
||||
m2 = (vector unsigned int)vec_splat(c0, 5);
|
||||
STORE(m1)
|
||||
m0 = (vector unsigned int)vec_splat(c0, 6);
|
||||
STORE(m2)
|
||||
m1 = (vector unsigned int)vec_splat(c0, 7);
|
||||
STORE(m0)
|
||||
STORE(m1)
|
||||
#undef STORE
|
||||
}
|
||||
|
||||
/* 16x16 */
|
||||
|
||||
void vp9_dc_predictor_16x16_vmx(uint8_t *dst, ptrdiff_t y_stride,
|
||||
const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
@ -189,4 +470,485 @@ void vp9_dc_predictor_16x16_vmx(uint8_t *dst, ptrdiff_t y_stride,
|
||||
NEXT
|
||||
|
||||
#undef NEXT
|
||||
}
|
||||
|
||||
void _common_top_or_left_16x16_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *what)
|
||||
{
|
||||
vector unsigned int m0, m1, m2, m3;
|
||||
vector unsigned short s0, s1;
|
||||
vector unsigned char c0;
|
||||
ptrdiff_t offs = 0;
|
||||
|
||||
vector unsigned int vzero = vec_splat_u32(0);
|
||||
vector unsigned int vfour = vec_splat_u32(4);
|
||||
vector unsigned int veight = vec_splat_u32(8);
|
||||
|
||||
m0 = vec_ld(0, (uint32_t *)what);
|
||||
|
||||
// The Intel version is identical to the full 16x16 except for
|
||||
// zeroing out the additional vector; otherwise it does all the
|
||||
// same computations. This is clearly wasteful, so I've elided
|
||||
// them here. In particular, an SAD of zero against zero will
|
||||
// always be zero, so we can just drop one of the SADs right now.
|
||||
m1 = vec_sum4s((vector unsigned char)m0, vzero);
|
||||
m2 = (vector unsigned int)vec_sum2s((vector signed int)m1, (vector signed int)vzero);
|
||||
|
||||
// Also, we don't need the full movhlps steps because the other vector
|
||||
// will always be zero, so only one vector shift is required.
|
||||
m3 = vec_sld(m2, vzero, 8);
|
||||
|
||||
m0 = vec_adds(m3, m2);
|
||||
m1 = vec_adds(m0, veight);
|
||||
m2 = vec_sra(m1, vfour);
|
||||
|
||||
s0 = vec_packsu(m2, m2);
|
||||
s1 = vec_splat(s0, 1);
|
||||
c0 = vec_packsu(s1, s1);
|
||||
|
||||
// 16 stores
|
||||
#define NEXT vec_st(c0, offs, dst); offs += y_stride;
|
||||
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
|
||||
#undef NEXT
|
||||
}
|
||||
|
||||
void vp9_dc_top_predictor_16x16_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
_common_top_or_left_16x16_vmx(dst, y_stride, above);
|
||||
}
|
||||
void vp9_dc_left_predictor_16x16_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
_common_top_or_left_16x16_vmx(dst, y_stride, left);
|
||||
}
|
||||
|
||||
void vp9_dc_128_predictor_16x16_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
// Mmmm, 128 splat splat splat.
|
||||
vector unsigned char c0 = vec_sl(vec_splat_u8(2), vec_splat_u8(6));
|
||||
ptrdiff_t offs = 0;
|
||||
|
||||
// 16 stores
|
||||
#define NEXT vec_st(c0, offs, dst); offs += y_stride;
|
||||
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
|
||||
#undef NEXT
|
||||
}
|
||||
|
||||
void vp9_v_predictor_16x16_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
// Mmmm, vector copy copy copy.
|
||||
vector unsigned char c0 = vec_ld(0, above);
|
||||
ptrdiff_t offs = 0;
|
||||
|
||||
// 16 stores
|
||||
#define NEXT vec_st(c0, offs, dst); offs += y_stride;
|
||||
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
NEXT
|
||||
|
||||
#undef NEXT
|
||||
}
|
||||
|
||||
void vp9_h_predictor_16x16_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
// Expand an entire 16-byte vector to 16 splatted vectors.
|
||||
// Unfortunately, the load is not aligned, but the stores are.
|
||||
vector unsigned char c0, c1, c2, c3;
|
||||
ptrdiff_t offs = 0;
|
||||
|
||||
_unaligned_load128(c0, left);
|
||||
|
||||
// 16 stores
|
||||
#define SPLAT(n,x) x = vec_splat(c0, n);
|
||||
#define STORE(x) vec_st(x, offs, dst); offs += y_stride;
|
||||
|
||||
SPLAT(0,c1)
|
||||
SPLAT(1,c2)
|
||||
STORE(c1)
|
||||
SPLAT(2,c3)
|
||||
STORE(c2)
|
||||
SPLAT(3,c1)
|
||||
STORE(c3)
|
||||
SPLAT(4,c2)
|
||||
STORE(c1)
|
||||
SPLAT(5,c3)
|
||||
STORE(c2)
|
||||
SPLAT(6,c1)
|
||||
STORE(c3)
|
||||
SPLAT(7,c2)
|
||||
STORE(c1)
|
||||
SPLAT(8,c3)
|
||||
STORE(c2)
|
||||
SPLAT(9,c1)
|
||||
STORE(c3)
|
||||
SPLAT(10,c2)
|
||||
STORE(c1)
|
||||
SPLAT(11,c3)
|
||||
STORE(c2)
|
||||
SPLAT(12,c1)
|
||||
STORE(c3)
|
||||
SPLAT(13,c2)
|
||||
STORE(c1)
|
||||
SPLAT(14,c3)
|
||||
STORE(c2)
|
||||
SPLAT(15,c1)
|
||||
STORE(c3)
|
||||
STORE(c1)
|
||||
|
||||
#undef STORE
|
||||
#undef SPLAT
|
||||
}
|
||||
|
||||
/* 32x32 */
|
||||
|
||||
void vp9_dc_predictor_32x32_vmx(uint8_t *dst, ptrdiff_t y_stride,
|
||||
const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
// Also aligned.
|
||||
// Approximately the same routine, but double-pumped.
|
||||
|
||||
vector unsigned int m0, m1, m2, m3, m4, m5, m6, m7, m8;
|
||||
vector unsigned short s0, s1;
|
||||
vector unsigned char c0;
|
||||
ptrdiff_t offs = 0;
|
||||
|
||||
vector unsigned int v32 = vec_splat_u32(8); // Computed momentarily
|
||||
vector unsigned int vtwo = vec_splat_u32(2);
|
||||
vector unsigned int vsix = vec_splat_u32(6);
|
||||
vector unsigned int vzero = vec_splat_u32(0);
|
||||
|
||||
m1 = vec_ld(0, (uint32_t *)above);
|
||||
m2 = vec_ld(16, (uint32_t *)above);
|
||||
m3 = vec_ld(0, (uint32_t *)left);
|
||||
m4 = vec_ld(16, (uint32_t *)left);
|
||||
|
||||
v32 = vec_sl(v32, vtwo);
|
||||
|
||||
m5 = vec_sum4s((vector unsigned char)m1, vzero);
|
||||
m6 = vec_sum4s((vector unsigned char)m2, vzero);
|
||||
m7 = vec_sum4s((vector unsigned char)m3, vzero);
|
||||
m8 = vec_sum4s((vector unsigned char)m4, vzero);
|
||||
|
||||
m1 = (vector unsigned int)vec_sum2s((vector signed int)m5, (vector signed int)vzero);
|
||||
m2 = (vector unsigned int)vec_sum2s((vector signed int)m6, (vector signed int)vzero);
|
||||
m3 = (vector unsigned int)vec_sum2s((vector signed int)m7, (vector signed int)vzero);
|
||||
m4 = (vector unsigned int)vec_sum2s((vector signed int)m8, (vector signed int)vzero);
|
||||
|
||||
m5 = vec_adds(m1, m2);
|
||||
m6 = vec_adds(m3, m4);
|
||||
m0 = vec_adds(m5, m6);
|
||||
|
||||
m1 = vec_sld(m2, m2, 8);
|
||||
m3 = vec_sld(m0, m1, 8);
|
||||
|
||||
m4 = vec_adds(m3, m0);
|
||||
m1 = vec_adds(m4, v32);
|
||||
m2 = vec_sra(m1, vsix);
|
||||
|
||||
s0 = vec_packsu(m2, m2);
|
||||
s1 = vec_splat(s0, 1);
|
||||
c0 = vec_packsu(s1, s1);
|
||||
|
||||
// 32 stores
|
||||
#define NEXT vec_st(c0, offs, dst); vec_st(c0, offs+16, dst); offs += y_stride;
|
||||
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
|
||||
#undef NEXT
|
||||
}
|
||||
|
||||
void _common_top_or_left_32x32_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *what)
|
||||
{
|
||||
// This actually has slightly different logic.
|
||||
vector unsigned int m0, m1, m2, m3, m4, m5, m6;
|
||||
vector unsigned short s0, s1;
|
||||
vector unsigned char c0;
|
||||
ptrdiff_t offs = 0;
|
||||
|
||||
vector unsigned int v16 = vec_splat_u32(8); // Computed momentarily
|
||||
vector unsigned int vfive = vec_splat_u32(5);
|
||||
vector unsigned int vzero = vec_splat_u32(0);
|
||||
|
||||
m0 = vec_ld(0, (uint32_t *)what);
|
||||
m2 = vec_ld(16, (uint32_t *)what);
|
||||
|
||||
v16 = vec_add(v16, v16);
|
||||
|
||||
m5 = vec_sum4s((vector unsigned char)m0, vzero);
|
||||
m6 = vec_sum4s((vector unsigned char)m2, vzero);
|
||||
m4 = (vector unsigned int)vec_sum2s((vector signed int)m5, (vector signed int)vzero);
|
||||
m2 = (vector unsigned int)vec_sum2s((vector signed int)m6, (vector signed int)vzero);
|
||||
|
||||
m0 = vec_adds(m4, m2);
|
||||
|
||||
m1 = vec_sld(m2, m2, 8);
|
||||
m3 = vec_sld(m0, m1, 8);
|
||||
|
||||
m4 = vec_adds(m3, m0);
|
||||
m1 = vec_adds(m4, v16);
|
||||
m2 = vec_sra(m1, vfive);
|
||||
|
||||
s0 = vec_packsu(m2, m2);
|
||||
s1 = vec_splat(s0, 1);
|
||||
c0 = vec_packsu(s1, s1);
|
||||
|
||||
// 32 stores
|
||||
#define NEXT vec_st(c0, offs, dst); vec_st(c0, offs+16, dst); offs += y_stride;
|
||||
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
|
||||
#undef NEXT
|
||||
}
|
||||
|
||||
void vp9_dc_top_predictor_32x32_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
_common_top_or_left_32x32_vmx(dst, y_stride, above);
|
||||
}
|
||||
void vp9_dc_left_predictor_32x32_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
_common_top_or_left_32x32_vmx(dst, y_stride, left);
|
||||
}
|
||||
|
||||
void vp9_dc_128_predictor_32x32_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
// Oh baby, I love to feel those creamy 128s ru... um, sorry.
|
||||
// What were we doing again?
|
||||
vector unsigned char c0 = vec_sl(vec_splat_u8(2), vec_splat_u8(6));
|
||||
ptrdiff_t offs = 0;
|
||||
|
||||
// 32 stores
|
||||
#define NEXT vec_st(c0, offs, dst); vec_st(c0, offs+16, dst); offs += y_stride;
|
||||
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
|
||||
#undef NEXT
|
||||
}
|
||||
|
||||
void vp9_v_predictor_32x32_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
// Is it hot in here or is it just me?
|
||||
// Oh, right, copying even more data.
|
||||
vector unsigned char c0 = vec_ld(0, above);
|
||||
vector unsigned char c1 = vec_ld(16, above);
|
||||
ptrdiff_t offs = 0;
|
||||
|
||||
// 32 stores
|
||||
#define NEXT vec_st(c0, offs, dst); vec_st(c1, offs+16, dst); offs += y_stride;
|
||||
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
NEXT NEXT
|
||||
|
||||
#undef NEXT
|
||||
}
|
||||
|
||||
void vp9_h_predictor_32x32_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left)
|
||||
{
|
||||
// Two 16-byte vectors to 32 splatted vectors.
|
||||
// Again, the load doesn't seem to be aligned :(
|
||||
// Simplest to run this as a double-pumped 16x16 routine.
|
||||
vector unsigned char c0, c1, c2, c3;
|
||||
ptrdiff_t offs = 0;
|
||||
|
||||
_unaligned_load128(c0, left);
|
||||
|
||||
// 32 stores
|
||||
#define SPLAT(n,x) x = vec_splat(c0, n);
|
||||
#define STORE(x) vec_st(x, offs, dst); vec_st(x, 16+offs, dst); offs += y_stride;
|
||||
|
||||
SPLAT(0,c1)
|
||||
SPLAT(1,c2)
|
||||
STORE(c1)
|
||||
SPLAT(2,c3)
|
||||
STORE(c2)
|
||||
SPLAT(3,c1)
|
||||
STORE(c3)
|
||||
SPLAT(4,c2)
|
||||
STORE(c1)
|
||||
SPLAT(5,c3)
|
||||
STORE(c2)
|
||||
SPLAT(6,c1)
|
||||
STORE(c3)
|
||||
SPLAT(7,c2)
|
||||
STORE(c1)
|
||||
SPLAT(8,c3)
|
||||
STORE(c2)
|
||||
SPLAT(9,c1)
|
||||
STORE(c3)
|
||||
SPLAT(10,c2)
|
||||
STORE(c1)
|
||||
SPLAT(11,c3)
|
||||
STORE(c2)
|
||||
SPLAT(12,c1)
|
||||
STORE(c3)
|
||||
SPLAT(13,c2)
|
||||
STORE(c1)
|
||||
SPLAT(14,c3)
|
||||
STORE(c2)
|
||||
SPLAT(15,c1)
|
||||
STORE(c3)
|
||||
_unaligned_load128(c0, left + 16);
|
||||
STORE(c1)
|
||||
|
||||
// 32 more stores
|
||||
SPLAT(0,c1)
|
||||
SPLAT(1,c2)
|
||||
STORE(c1)
|
||||
SPLAT(2,c3)
|
||||
STORE(c2)
|
||||
SPLAT(3,c1)
|
||||
STORE(c3)
|
||||
SPLAT(4,c2)
|
||||
STORE(c1)
|
||||
SPLAT(5,c3)
|
||||
STORE(c2)
|
||||
SPLAT(6,c1)
|
||||
STORE(c3)
|
||||
SPLAT(7,c2)
|
||||
STORE(c1)
|
||||
SPLAT(8,c3)
|
||||
STORE(c2)
|
||||
SPLAT(9,c1)
|
||||
STORE(c3)
|
||||
SPLAT(10,c2)
|
||||
STORE(c1)
|
||||
SPLAT(11,c3)
|
||||
STORE(c2)
|
||||
SPLAT(12,c1)
|
||||
STORE(c3)
|
||||
SPLAT(13,c2)
|
||||
STORE(c1)
|
||||
SPLAT(14,c3)
|
||||
STORE(c2)
|
||||
SPLAT(15,c1)
|
||||
STORE(c3)
|
||||
STORE(c1)
|
||||
|
||||
#undef STORE
|
||||
#undef SPLAT
|
||||
}
|
@ -142,55 +142,68 @@ void vp9_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab
|
||||
#define vp9_d63_predictor_8x8 vp9_d63_predictor_8x8_c
|
||||
|
||||
void vp9_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_c
|
||||
void vp9_dc_128_predictor_16x16_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_vmx
|
||||
|
||||
void vp9_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_c
|
||||
void vp9_dc_128_predictor_32x32_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_vmx
|
||||
|
||||
void vp9_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_c
|
||||
void vp9_dc_128_predictor_4x4_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_vmx
|
||||
|
||||
void vp9_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_c
|
||||
void vp9_dc_128_predictor_8x8_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_vmx
|
||||
|
||||
void vp9_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_c
|
||||
void vp9_dc_left_predictor_16x16_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_vmx
|
||||
|
||||
void vp9_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_c
|
||||
void vp9_dc_left_predictor_32x32_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_vmx
|
||||
|
||||
void vp9_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_c
|
||||
void vp9_dc_left_predictor_4x4_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_vmx
|
||||
|
||||
void vp9_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_c
|
||||
void vp9_dc_left_predictor_8x8_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_vmx
|
||||
|
||||
void vp9_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
void vp9_dc_predictor_16x16_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_predictor_16x16 vp9_dc_predictor_16x16_c
|
||||
#define vp9_dc_predictor_16x16 vp9_dc_predictor_16x16_vmx
|
||||
|
||||
void vp9_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_predictor_32x32 vp9_dc_predictor_32x32_c
|
||||
void vp9_dc_predictor_32x32_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_predictor_32x32 vp9_dc_predictor_32x32_vmx
|
||||
|
||||
void vp9_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
void vp9_dc_predictor_4x4_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_predictor_4x4 vp9_dc_predictor_4x4_c
|
||||
#define vp9_dc_predictor_4x4 vp9_dc_predictor_4x4_vmx
|
||||
|
||||
void vp9_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
void vp9_dc_predictor_8x8_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_predictor_8x8 vp9_dc_predictor_8x8_c
|
||||
#define vp9_dc_predictor_8x8 vp9_dc_predictor_8x8_vmx
|
||||
|
||||
void vp9_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_c
|
||||
void vp9_dc_top_predictor_16x16_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_vmx
|
||||
|
||||
void vp9_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_c
|
||||
void vp9_dc_top_predictor_32x32_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_vmx
|
||||
|
||||
void vp9_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_c
|
||||
void vp9_dc_top_predictor_4x4_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_vmx
|
||||
|
||||
void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c
|
||||
void vp9_dc_top_predictor_8x8_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_vmx
|
||||
|
||||
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
|
||||
#define vp9_diamond_search_sad vp9_diamond_search_sad_c
|
||||
@ -244,16 +257,20 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
|
||||
#define vp9_fwht4x4 vp9_fwht4x4_c
|
||||
|
||||
void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_h_predictor_16x16 vp9_h_predictor_16x16_c
|
||||
void vp9_h_predictor_16x16_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_h_predictor_16x16 vp9_h_predictor_16x16_vmx
|
||||
|
||||
void vp9_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_h_predictor_32x32 vp9_h_predictor_32x32_c
|
||||
void vp9_h_predictor_32x32_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_h_predictor_32x32 vp9_h_predictor_32x32_vmx
|
||||
|
||||
void vp9_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_h_predictor_4x4 vp9_h_predictor_4x4_c
|
||||
void vp9_h_predictor_4x4_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_h_predictor_4x4 vp9_h_predictor_4x4_vmx
|
||||
|
||||
void vp9_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_h_predictor_8x8 vp9_h_predictor_8x8_c
|
||||
void vp9_h_predictor_8x8_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_h_predictor_8x8 vp9_h_predictor_8x8_vmx
|
||||
|
||||
void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride, int16_t *coeff);
|
||||
#define vp9_hadamard_16x16 vp9_hadamard_16x16_c
|
||||
@ -477,16 +494,20 @@ void vp9_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abo
|
||||
#define vp9_tm_predictor_8x8 vp9_tm_predictor_8x8_c
|
||||
|
||||
void vp9_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_v_predictor_16x16 vp9_v_predictor_16x16_c
|
||||
void vp9_v_predictor_16x16_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_v_predictor_16x16 vp9_v_predictor_16x16_vmx
|
||||
|
||||
void vp9_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_v_predictor_32x32 vp9_v_predictor_32x32_c
|
||||
void vp9_v_predictor_32x32_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_v_predictor_32x32 vp9_v_predictor_32x32_vmx
|
||||
|
||||
void vp9_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_v_predictor_4x4 vp9_v_predictor_4x4_c
|
||||
void vp9_v_predictor_4x4_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_v_predictor_4x4 vp9_v_predictor_4x4_vmx
|
||||
|
||||
void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_v_predictor_8x8 vp9_v_predictor_8x8_c
|
||||
void vp9_v_predictor_8x8_vmx(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
|
||||
#define vp9_v_predictor_8x8 vp9_v_predictor_8x8_vmx
|
||||
|
||||
int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
|
||||
#define vp9_vector_var vp9_vector_var_c
|
||||
|
Loading…
x
Reference in New Issue
Block a user