mirror of
https://github.com/autc04/Retro68.git
synced 2024-11-19 03:05:15 +00:00
89 lines
1.7 KiB
ArmAsm
89 lines
1.7 KiB
ArmAsm
/*
|
|
* ====================================================
|
|
* Copyright (C) 2007 by Ellips BV. All rights reserved.
|
|
*
|
|
* Permission to use, copy, modify, and distribute this
|
|
* software is freely granted, provided that this notice
|
|
* is preserved.
|
|
* ====================================================
|
|
*/
|
|
|
|
#include "x86_64mach.h"
|
|
|
|
.global SYM (memset)
|
|
SOTYPE_FUNCTION(memset)
|
|
|
|
SYM (memset):
|
|
movq rdi, r9 /* Save return value */
|
|
movq rsi, rax
|
|
movq rdx, rcx
|
|
cmpq $16, rdx
|
|
jb byte_set
|
|
|
|
movq rdi, r8 /* Align on quad word boundary */
|
|
andq $7, r8
|
|
jz quadword_aligned
|
|
movq $8, rcx
|
|
subq r8, rcx
|
|
subq rcx, rdx
|
|
rep stosb
|
|
movq rdx, rcx
|
|
|
|
quadword_aligned:
|
|
movabs $0x0101010101010101, r8
|
|
movzbl sil, eax
|
|
imul r8, rax
|
|
cmpq $256, rdx
|
|
jb quadword_set
|
|
|
|
shrq $7, rcx /* Store 128 bytes at a time with minimum cache polution */
|
|
|
|
.p2align 4
|
|
loop:
|
|
movntiq rax, (rdi)
|
|
movntiq rax, 8 (rdi)
|
|
movntiq rax, 16 (rdi)
|
|
movntiq rax, 24 (rdi)
|
|
movntiq rax, 32 (rdi)
|
|
movntiq rax, 40 (rdi)
|
|
movntiq rax, 48 (rdi)
|
|
movntiq rax, 56 (rdi)
|
|
movntiq rax, 64 (rdi)
|
|
movntiq rax, 72 (rdi)
|
|
movntiq rax, 80 (rdi)
|
|
movntiq rax, 88 (rdi)
|
|
movntiq rax, 96 (rdi)
|
|
movntiq rax, 104 (rdi)
|
|
movntiq rax, 112 (rdi)
|
|
movntiq rax, 120 (rdi)
|
|
|
|
leaq 128 (rdi), rdi
|
|
|
|
dec rcx
|
|
jnz loop
|
|
|
|
sfence
|
|
movq rdx, rcx
|
|
andq $127, rcx
|
|
rep stosb
|
|
movq r9, rax
|
|
ret
|
|
|
|
|
|
byte_set:
|
|
rep stosb
|
|
movq r9, rax
|
|
ret
|
|
|
|
|
|
quadword_set:
|
|
shrq $3, rcx
|
|
.p2align 4
|
|
rep stosq
|
|
movq rdx, rcx
|
|
andq $7, rcx
|
|
rep stosb /* Store the remaining bytes */
|
|
movq r9, rax
|
|
ret
|
|
|