llvm-6502

mirror of https://github.com/c64scene-ar/llvm-6502.git synced 2024-08-27 15:29:51 +00:00

History

Matt Arsenault ba38e6c2ae AMDGPU: Avoid using 64-bit shift for i64 (shl x, 32) This can be done only with moves which theoretically will optimize better later. Although this transform increases the instruction count, it should be code size / cycle count neutral in the worst VALU case. It also seems to slightly improve a couple of testcases due to other DAG combines this exposes. This is probably slightly worse for the SALU case, so it might be better to handle this during moveToVALU, although then you lose some simplifications like the load width reducing in the simple testcase. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242177 91177308-0d34-0410-b5e6-96231b3b80d8		2015-07-14 18:20:33 +00:00
..
32-bit-local-address-space.ll
add_i64.ll
add-debug.ll
add.ll
address-space.ll
and.ll
anyext.ll
array-ptr-calc-i32.ll
array-ptr-calc-i64.ll	AMDGPU: Avoid using 64-bit shift for i64 (shl x, 32)	2015-07-14 18:20:33 +00:00
atomic_cmp_swap_local.ll
atomic_load_add.ll
atomic_load_sub.ll
basic-branch.ll
basic-loop.ll
bfe_uint.ll
bfi_int.ll
big_alu.ll
bitcast.ll
bswap.ll
build_vector.ll
call_fs.ll
call.ll
cayman-loop-bug.ll
cf_end.ll
cf-stack-bug.ll
cgp-addressing-modes.ll
coalescer_remat.ll
codegen-prepare-addrmode-sext.ll
combine_vloads.ll
commute_modifiers.ll
commute-compares.ll
commute-shifts.ll	AMDGPU: really don't commute REV opcodes if the target variant doesn't exist	2015-06-26 20:29:10 +00:00
complex-folding.ll
concat_vectors.ll
copy-illegal-type.ll
copy-to-reg.ll
ctlz_zero_undef.ll
ctpop64.ll
ctpop.ll
cttz_zero_undef.ll
cvt_f32_ubyte.ll
cvt_flr_i32_f32.ll
cvt_rpi_i32_f32.ll
dagcombiner-bug-illegal-vec4-int-to-fp.ll
debug.ll
default-fp-mode.ll
disconnected-predset-break-bug.ll
dot4-folding.ll
ds_read2_offset_order.ll	AMDGPU/SI: Fix read2 merging into a super register.	2015-07-14 17:57:36 +00:00
ds_read2_superreg.ll	AMDGPU/SI: Fix read2 merging into a super register.	2015-07-14 17:57:36 +00:00
ds_read2.ll	AMDGPU/SI: Fix read2 merging into a super register.	2015-07-14 17:57:36 +00:00
ds_read2st64.ll	AMDGPU/SI: Fix read2 merging into a super register.	2015-07-14 17:57:36 +00:00
ds_write2.ll	AMDGPU/SI: Fix read2 merging into a super register.	2015-07-14 17:57:36 +00:00
ds_write2st64.ll	AMDGPU/SI: Fix read2 merging into a super register.	2015-07-14 17:57:36 +00:00
ds-negative-offset-addressing-mode-loop.ll	AMDGPU/SI: Add debugging subtarget feature for DS offsets	2015-07-06 16:01:58 +00:00
elf.ll	AMDGPU/SI: Set ELF OS/ABI to ELFOSABI_AMDGPU_HSA	2015-06-26 21:15:11 +00:00
elf.r600.ll
empty-function.ll
endcf-loop-header.ll
extload-private.ll
extload.ll
extract_vector_elt_i16.ll
fabs.f64.ll
fabs.ll
fadd64.ll
fadd.ll
fceil64.ll
fceil.ll
fcmp64.ll
fcmp-cnd.ll
fcmp-cnde-int-args.ll
fcmp.ll
fconst64.ll
fcopysign.f32.ll
fcopysign.f64.ll
fdiv.f64.ll
fdiv.ll
fetch-limits.r600.ll
fetch-limits.r700+.ll
ffloor.f64.ll
ffloor.ll
flat-address-space.ll
floor.ll
fma-combine.ll
fma.f64.ll
fma.ll
fmad.ll
fmax3.f64.ll
fmax3.ll
fmax_legacy.f64.ll
fmax_legacy.ll
fmax.ll
fmaxnum.f64.ll
fmaxnum.ll
fmin3.ll
fmin_legacy.f64.ll
fmin_legacy.ll
fmin.ll
fminnum.f64.ll
fminnum.ll
fmul64.ll
fmul.ll
fmuladd.ll	AMDGPU/SI: Select mad patterns to v_mac_f32	2015-07-13 15:47:57 +00:00
fnearbyint.ll
fneg-fabs.f64.ll
fneg-fabs.ll
fneg.f64.ll
fneg.ll
fp16_to_fp.ll
fp32_to_fp16.ll
fp_to_sint.f64.ll
fp_to_sint.ll
fp_to_uint.f64.ll
fp_to_uint.ll
fp-classify.ll
fpext.ll
fptrunc.ll
frem.ll
fsqrt.ll
fsub64.ll
fsub.ll
ftrunc.f64.ll
ftrunc.ll
gep-address-space.ll
global_atomics.ll
global-directive.ll
global-extload-i1.ll
global-extload-i8.ll
global-extload-i16.ll
global-extload-i32.ll
global-zero-initializer.ll
gv-const-addrspace-fail.ll
gv-const-addrspace.ll
half.ll
hsa.ll	AMDPGU/SI: Use correct resource descriptors for VI on HSA	2015-06-26 21:58:42 +00:00
i1-copy-implicit-def.ll
i1-copy-phi.ll
i8-to-double-to-float.ll
icmp64.ll
icmp-select-sete-reverse-args.ll
imm.ll
indirect-addressing-si.ll
indirect-private-64.ll
infinite-loop-evergreen.ll
infinite-loop.ll
inline-asm.ll
inline-calls.ll
input-mods.ll
insert_subreg.ll
insert_vector_elt.ll
invariant-load-no-alias-store.ll	DAGCombiner: Assume invariant load cannot alias a store	2015-07-10 22:17:40 +00:00
jump-address.ll
kcache-fold.ll
kernel-args.ll
large-alloca.ll
large-constant-initializer.ll
lds-initializer.ll
lds-oqap-crash.ll
lds-output-queue.ll
lds-size.ll
lds-zero-initializer.ll
legalizedag-bug-expand-setcc.ll
lit.local.cfg
literals.ll
llvm.AMDGPU.abs.ll
llvm.AMDGPU.barrier.global.ll
llvm.AMDGPU.barrier.local.ll
llvm.AMDGPU.bfe.i32.ll
llvm.AMDGPU.bfe.u32.ll
llvm.AMDGPU.bfi.ll
llvm.AMDGPU.bfm.ll
llvm.AMDGPU.brev.ll
llvm.AMDGPU.clamp.ll
llvm.AMDGPU.class.ll
llvm.AMDGPU.cube.ll
llvm.AMDGPU.cvt_f32_ubyte.ll
llvm.AMDGPU.div_fixup.ll
llvm.AMDGPU.div_fmas.ll	AMDGPU/SI: Fix extra space when printing v_div_fmas_*	2015-06-28 18:16:14 +00:00
llvm.AMDGPU.div_scale.ll
llvm.amdgpu.dp4.ll
llvm.AMDGPU.flbit.i32.ll
llvm.AMDGPU.fract.f64.ll
llvm.AMDGPU.fract.ll
llvm.AMDGPU.imad24.ll
llvm.AMDGPU.imax.ll
llvm.AMDGPU.imin.ll
llvm.AMDGPU.imul24.ll
llvm.AMDGPU.kill.ll
llvm.amdgpu.kilp.ll
llvm.AMDGPU.ldexp.ll
llvm.AMDGPU.legacy.rsq.ll
llvm.amdgpu.lrp.ll	AMDGPU/SI: Select mad patterns to v_mac_f32	2015-07-13 15:47:57 +00:00
llvm.AMDGPU.mul.ll
llvm.AMDGPU.rcp.f64.ll
llvm.AMDGPU.rcp.ll
llvm.AMDGPU.rsq.clamped.f64.ll
llvm.AMDGPU.rsq.clamped.ll
llvm.AMDGPU.rsq.ll
llvm.AMDGPU.tex.ll
llvm.AMDGPU.trig_preop.ll
llvm.AMDGPU.trunc.ll
llvm.AMDGPU.umad24.ll
llvm.AMDGPU.umax.ll
llvm.AMDGPU.umin.ll
llvm.AMDGPU.umul24.ll
llvm.cos.ll
llvm.exp2.ll
llvm.log2.ll
llvm.memcpy.ll
llvm.pow.ll
llvm.rint.f64.ll
llvm.rint.ll
llvm.round.f64.ll
llvm.round.ll	AMDGPU/SI: Add support for shrinking v_cndmask_b32_e32 instructions	2015-07-14 14:15:03 +00:00
llvm.SI.fs.interp.ll
llvm.SI.gather4.ll
llvm.SI.getlod.ll
llvm.SI.image.ll
llvm.SI.image.sample.ll
llvm.SI.image.sample.o.ll
llvm.SI.imageload.ll
llvm.SI.load.dword.ll
llvm.SI.resinfo.ll
llvm.SI.sample-masked.ll
llvm.SI.sample.ll
llvm.SI.sampled.ll
llvm.SI.sendmsg-m0.ll
llvm.SI.sendmsg.ll
llvm.SI.tbuffer.store.ll
llvm.SI.tid.ll
llvm.sin.ll
llvm.sqrt.ll
load64.ll
load-i1.ll
load-input-fold.ll
load.ll
load.vec.ll
local-64.ll
local-atomics64.ll
local-atomics.ll
local-memory-two-objects.ll	Fix "the the" in comments.	2015-06-19 01:53:21 +00:00
local-memory.ll
loop-address.ll
loop-idiom.ll
lshl.ll
lshr.ll
m0-spill.ll
mad_int24.ll
mad_uint24.ll
mad-combine.ll	AMDGPU/SI: Select mad patterns to v_mac_f32	2015-07-13 15:47:57 +00:00
mad-sub.ll	AMDGPU/SI: Select mad patterns to v_mac_f32	2015-07-13 15:47:57 +00:00
madak.ll	AMDGPU/SI: Select mad patterns to v_mac_f32	2015-07-13 15:47:57 +00:00
madmk.ll	AMDGPU/SI: Select mad patterns to v_mac_f32	2015-07-13 15:47:57 +00:00
max3.ll
max-literals.ll
max.ll
merge-stores.ll
min3.ll
min.ll
missing-store.ll
mubuf.ll
mul_int24.ll
mul_uint24.ll	AMDGPU: Avoid using 64-bit shift for i64 (shl x, 32)	2015-07-14 18:20:33 +00:00
mul.ll
mulhu.ll
no-initializer-constant-addrspace.ll
no-shrink-extloads.ll
operand-folding.ll
operand-spacing.ll
or.ll
packetizer.ll
parallelandifcollapse.ll
parallelorifcollapse.ll
predicate-dp4.ll
predicates.ll
private-memory-atomics.ll
private-memory-broken.ll
private-memory.ll
pv-packing.ll
pv.ll
r600-encoding.ll
r600-export-fix.ll
r600-infinite-loop-bug-while-reorganizing-vector.ll
r600cfg.ll
README
reciprocal.ll
register-count-comments.ll
reorder-stores.ll
rotl.i64.ll
rotl.ll
rotr.i64.ll
rotr.ll
rsq.ll
rv7x0_count3.ll
s_movk_i32.ll
saddo.ll
salu-to-valu.ll
scalar_to_vector.ll
schedule-fs-loop-nested-if.ll
schedule-fs-loop-nested.ll
schedule-fs-loop.ll
schedule-global-loads.ll
schedule-if-2.ll
schedule-if.ll
schedule-kernel-arg-loads.ll
schedule-vs-if-nested-loop-failure.ll
schedule-vs-if-nested-loop.ll
scratch-buffer.ll
sdiv.ll
sdivrem24.ll
sdivrem64.ll
select64.ll	AMDGPU/SI: Add support for shrinking v_cndmask_b32_e32 instructions	2015-07-14 14:15:03 +00:00
select-i1.ll
select-vectors.ll	AMDGPU/SI: Add support for shrinking v_cndmask_b32_e32 instructions	2015-07-14 14:15:03 +00:00
select.ll
selectcc-cnd.ll
selectcc-cnde-int.ll
selectcc-icmp-select-float.ll
selectcc-opt.ll
selectcc.ll
set-dx10.ll
setcc64.ll
setcc-equivalent.ll
setcc-opt.ll
setcc.ll
seto.ll
setuo.ll
sext-eliminate.ll
sext-in-reg.ll
sgpr-control-flow.ll
sgpr-copy-duplicate-operand.ll
sgpr-copy.ll
shared-op-cycle.ll
shl_add_constant.ll
shl_add_ptr.ll
shl.ll	AMDGPU: Avoid using 64-bit shift for i64 (shl x, 32)	2015-07-14 18:20:33 +00:00
si-annotate-cf-assertion.ll
si-annotate-cf.ll
si-lod-bias.ll
si-sgpr-spill.ll
si-spill-cf.ll
si-triv-disjoint-mem-access.ll
si-vector-hang.ll
sign_extend.ll
simplify-demanded-bits-build-pair.ll
sint_to_fp.f64.ll	AMDGPU/SI: Add support for shrinking v_cndmask_b32_e32 instructions	2015-07-14 14:15:03 +00:00
sint_to_fp.ll
smrd.ll
split-scalar-i64-add.ll
sra.ll
srem.ll
srl.ll	AMDGPU: Avoid using 64-bit shift for i64 (shl x, 32)	2015-07-14 18:20:33 +00:00
ssubo.ll
store-barrier.ll
store-v3i32.ll
store-v3i64.ll
store-vector-ptrs.ll
store.ll
store.r600.ll
structurize1.ll
structurize.ll
sub.ll
subreg-coalescer-crash.ll
subreg-coalescer-undef-use.ll	Test for specific output in lit test	2015-07-01 22:34:59 +00:00
subreg-eliminate-dead.ll
swizzle-export.ll
tex-clause-antidep.ll
texture-input-merge.ll
trunc-cmp-constant.ll
trunc-store-f64-to-f16.ll
trunc-store-i1.ll
trunc-vector-store-assertion-failure.ll
trunc.ll
tti-unroll-prefs.ll
uaddo.ll
udiv.ll
udivrem24.ll
udivrem64.ll
udivrem.ll
uint_to_fp.f64.ll	AMDGPU/SI: Add support for shrinking v_cndmask_b32_e32 instructions	2015-07-14 14:15:03 +00:00
uint_to_fp.ll
unaligned-load-store.ll
unhandled-loop-condition-assertion.ll
unroll.ll
unsupported-cc.ll
urecip.ll
urem.ll
use-sgpr-multiple-times.ll
usubo.ll
v1i64-kernel-arg.ll
v_cndmask.ll
v_mac.ll	AMDGPU/SI: Select mad patterns to v_mac_f32	2015-07-13 15:47:57 +00:00
valu-i1.ll
vector-alloca.ll
vertex-fetch-encoding.ll
vop-shrink.ll
vselect64.ll
vselect.ll	AMDGPU/SI: Add support for shrinking v_cndmask_b32_e32 instructions	2015-07-14 14:15:03 +00:00
vtx-fetch-branch.ll
vtx-schedule.ll
wait.ll
work-item-intrinsics.ll
wrong-transalu-pos-fix.ll
xor.ll	AMDGPU/SI: Add support for shrinking v_cndmask_b32_e32 instructions	2015-07-14 14:15:03 +00:00
zero_extend.ll

README

+==============================================================================+
| How to organize the lit tests                                                |
+==============================================================================+

- If you write a test for matching a single DAG opcode or intrinsic, it should
  go in a file called {opcode_name,intrinsic_name}.ll (e.g. fadd.ll)

- If you write a test that matches several DAG opcodes and checks for a single
  ISA instruction, then that test should go in a file called {ISA_name}.ll (e.g.
  bfi_int.ll

- For all other tests, use your best judgement for organizing tests and naming
  the files.

+==============================================================================+
| Naming conventions                                                           |
+==============================================================================+

- Use dash '-' and not underscore '_' to separate words in file names, unless
  the file is named after a DAG opcode or ISA instruction that has an
  underscore '_' in its name.