mirror of
https://github.com/autc04/Retro68.git
synced 2024-12-04 01:50:38 +00:00
458 lines
12 KiB
ArmAsm
458 lines
12 KiB
ArmAsm
/*
|
|
* vr5xxx.S -- CPU specific support routines
|
|
*
|
|
* Copyright (c) 1999 Cygnus Solutions
|
|
*
|
|
* The authors hereby grant permission to use, copy, modify, distribute,
|
|
* and license this software and its documentation for any purpose, provided
|
|
* that existing copyright notices are retained in all copies and that this
|
|
* notice is included verbatim in any distributions. No written agreement,
|
|
* license, or royalty fee is required for any of the authorized uses.
|
|
* Modifications to this software may be copyrighted by their authors
|
|
* and need not follow the licensing terms described here, provided that
|
|
* the new terms are clearly indicated on the first page of each file where
|
|
* they apply.
|
|
*/
|
|
|
|
/* This file cloned from vr4300.S by dlindsay@cygnus.com
|
|
* and recoded to suit Vr5432 and Vr5000.
|
|
* Should be no worse for Vr43{00,05,10}.
|
|
* Specifically, __cpu_flush() has been changed (a) to allow for the hardware
|
|
* difference (in set associativity) between the Vr5432 and Vr5000,
|
|
* and (b) to flush the optional secondary cache of the Vr5000.
|
|
*/
|
|
|
|
/* Processor Revision Identifier (PRID) Register: Implementation Numbers */
|
|
#define IMPL_VR5432 0x54
|
|
|
|
/* Cache Constants not determinable dynamically */
|
|
#define VR5000_2NDLINE 32 /* secondary cache line size */
|
|
#define VR5432_LINE 32 /* I,Dcache line sizes */
|
|
#define VR5432_SIZE (16*1024) /* I,Dcache half-size */
|
|
|
|
|
|
#ifndef __mips64
|
|
.set mips3
|
|
#endif
|
|
#ifdef __mips16
|
|
/* This file contains 32 bit assembly code. */
|
|
.set nomips16
|
|
#endif
|
|
|
|
#include "regs.S"
|
|
|
|
.text
|
|
.align 2
|
|
|
|
# Taken from "R4300 Preliminary RISC Processor Specification
|
|
# Revision 2.0 January 1995" page 39: "The Count
|
|
# register... increments at a constant rate... at one-half the
|
|
# PClock speed."
|
|
# We can use this fact to provide small polled delays.
|
|
.globl __cpu_timer_poll
|
|
.ent __cpu_timer_poll
|
|
__cpu_timer_poll:
|
|
.set noreorder
|
|
# in: a0 = (unsigned int) number of PClock ticks to wait for
|
|
# out: void
|
|
|
|
# The Vr4300 counter updates at half PClock, so divide by 2 to
|
|
# get counter delta:
|
|
bnezl a0, 1f # continue if delta non-zero
|
|
srl a0, a0, 1 # divide ticks by 2 {DELAY SLOT}
|
|
# perform a quick return to the caller:
|
|
j ra
|
|
nop # {DELAY SLOT}
|
|
1:
|
|
mfc0 v0, C0_COUNT # get current counter value
|
|
nop
|
|
nop
|
|
# We cannot just do the simple test, of adding our delta onto
|
|
# the current value (ignoring overflow) and then checking for
|
|
# equality. The counter is incrementing every two PClocks,
|
|
# which means the counter value can change between
|
|
# instructions, making it hard to sample at the exact value
|
|
# desired.
|
|
|
|
# However, we do know that our entry delta value is less than
|
|
# half the number space (since we divide by 2 on entry). This
|
|
# means we can use a difference in signs to indicate timer
|
|
# overflow.
|
|
addu a0, v0, a0 # unsigned add (ignore overflow)
|
|
# We know have our end value (which will have been
|
|
# sign-extended to fill the 64bit register value).
|
|
2:
|
|
# get current counter value:
|
|
mfc0 v0, C0_COUNT
|
|
nop
|
|
nop
|
|
# This is an unsigned 32bit subtraction:
|
|
subu v0, a0, v0 # delta = (end - now) {DELAY SLOT}
|
|
bgtzl v0, 2b # looping back is most likely
|
|
nop
|
|
# We have now been delayed (in the foreground) for AT LEAST
|
|
# the required number of counter ticks.
|
|
j ra # return to caller
|
|
nop # {DELAY SLOT}
|
|
.set reorder
|
|
.end __cpu_timer_poll
|
|
|
|
# Flush the processor caches to memory:
|
|
|
|
.globl __cpu_flush
|
|
.ent __cpu_flush
|
|
__cpu_flush:
|
|
.set noreorder
|
|
# NOTE: The Vr4300 and Vr5432 *CANNOT* have any secondary cache.
|
|
# On those, SC (bit 17 of CONFIG register) is hard-wired to 1,
|
|
# except that email from Dennis_Han@el.nec.com says that old
|
|
# versions of the Vr5432 incorrectly hard-wired this bit to 0.
|
|
# The Vr5000 has an optional direct-mapped secondary cache,
|
|
# and the SC bit correctly indicates this.
|
|
|
|
# So, for the 4300 and 5432 we want to just
|
|
# flush the primary Data and Instruction caches.
|
|
# For the 5000 it is desired to flush the secondary cache too.
|
|
# There is an operation difference worth noting.
|
|
# The 4300 and 5000 primary caches use VA bit 14 to choose cache set,
|
|
# whereas 5432 primary caches use VA bit 0.
|
|
|
|
# This code interprets the relevant Config register bits as
|
|
# much as possible, except for the 5432.
|
|
# The code therefore has some portability.
|
|
# However, the associativity issues mean you should not just assume
|
|
# that this code works anywhere. Also, the secondary cache set
|
|
# size is hardwired, since the 5000 series does not define codes
|
|
# for variant sizes.
|
|
|
|
# Note: this version of the code flushes D$ before I$.
|
|
# It is difficult to construct a case where that matters,
|
|
# but it cant hurt.
|
|
|
|
mfc0 a0, C0_PRID # a0 = Processor Revision register
|
|
nop # dlindsay: unclear why the nops, but
|
|
nop # vr4300.S had such so I do too.
|
|
srl a2, a0, PR_IMP # want bits 8..15
|
|
andi a2, a2, 0x255 # mask: now a2 = Implementation # field
|
|
li a1, IMPL_VR5432
|
|
beq a1, a2, 8f # use Vr5432-specific flush algorithm
|
|
nop
|
|
|
|
# Non-Vr5432 version of the code.
|
|
# (The distinctions being: CONFIG is truthful about secondary cache,
|
|
# and we act as if the primary Icache and Dcache are direct mapped.)
|
|
|
|
mfc0 t0, C0_CONFIG # t0 = CONFIG register
|
|
nop
|
|
nop
|
|
li a1, 1 # a1=1, a useful constant
|
|
|
|
srl a2, t0, CR_IC # want IC field of CONFIG
|
|
andi a2, a2, 0x7 # mask: now a2= code for Icache size
|
|
add a2, a2, 12 # +12
|
|
sllv a2, a1, a2 # a2=primary instruction cache size in bytes
|
|
|
|
srl a3, t0, CR_DC # DC field of CONFIG
|
|
andi a3, a3, 0x7 # mask: now a3= code for Dcache size
|
|
add a3, a3, 12 # +12
|
|
sllv a3, a1, a3 # a3=primary data cache size in bytes
|
|
|
|
li t2, (1 << CR_IB) # t2=mask over IB boolean
|
|
and t2, t2, t0 # test IB field of CONFIG register value
|
|
beqz t2, 1f #
|
|
li a1, 16 # 16 bytes (branch shadow: always loaded.)
|
|
li a1, 32 # non-zero, then 32bytes
|
|
1:
|
|
|
|
li t2, (1 << CR_DB) # t2=mask over DB boolean
|
|
and t2, t2, t0 # test BD field of CONFIG register value
|
|
beqz t2, 2f #
|
|
li a0, 16 # 16bytes (branch shadow: always loaded.)
|
|
li a0, 32 # non-zero, then 32bytes
|
|
2:
|
|
lui t1, ((K0BASE >> 16) & 0xFFFF)
|
|
ori t1, t1, (K0BASE & 0xFFFF)
|
|
|
|
# At this point,
|
|
# a0 = primary Dcache line size in bytes
|
|
# a1 = primary Icache line size in bytes
|
|
# a2 = primary Icache size in bytes
|
|
# a3 = primary Dcache size in bytes
|
|
# t0 = CONFIG value
|
|
# t1 = a round unmapped cached base address (we are in kernel mode)
|
|
# t2,t3 scratch
|
|
|
|
addi t3, t1, 0 # t3=t1=start address for any cache
|
|
add t2, t3, a3 # t2=end adress+1 of Dcache
|
|
sub t2, t2, a0 # t2=address of last line in Dcache
|
|
3:
|
|
cache INDEX_WRITEBACK_INVALIDATE_D,0(t3)
|
|
bne t3, t2, 3b #
|
|
addu t3, a0 # (delay slot) increment by Dcache line size
|
|
|
|
|
|
# Now check CONFIG to see if there is a secondary cache
|
|
lui t2, (1 << (CR_SC-16)) # t2=mask over SC boolean
|
|
and t2, t2, t0 # test SC in CONFIG
|
|
bnez t2, 6f
|
|
|
|
# There is a secondary cache. Find out its sizes.
|
|
|
|
srl t3, t0, CR_SS # want SS field of CONFIG
|
|
andi t3, t3, 0x3 # mask: now t3= code for cache size.
|
|
beqz t3, 4f
|
|
lui a3, ((512*1024)>>16) # a3= 512K, code was 0
|
|
addu t3, -1 # decrement code
|
|
beqz t3, 4f
|
|
lui a3, ((1024*1024)>>16) # a3= 1 M, code 1
|
|
addu t3, -1 # decrement code
|
|
beqz t3, 4f
|
|
lui a3, ((2*1024*1024)>>16) # a3= 2 M, code 2
|
|
j 6f # no secondary cache, code 3
|
|
|
|
4: # a3 = secondary cache size in bytes
|
|
li a0, VR5000_2NDLINE # no codes assigned for other than 32
|
|
|
|
# At this point,
|
|
# a0 = secondary cache line size in bytes
|
|
# a1 = primary Icache line size in bytes
|
|
# a2 = primary Icache size in bytes
|
|
# a3 = secondary cache size in bytes
|
|
# t1 = a round unmapped cached base address (we are in kernel mode)
|
|
# t2,t3 scratch
|
|
|
|
addi t3, t1, 0 # t3=t1=start address for any cache
|
|
add t2, t3, a3 # t2=end address+1 of secondary cache
|
|
sub t2, t2, a0 # t2=address of last line in secondary cache
|
|
5:
|
|
cache INDEX_WRITEBACK_INVALIDATE_SD,0(t3)
|
|
bne t3, t2, 5b
|
|
addu t3, a0 # (delay slot) increment by line size
|
|
|
|
|
|
6: # Any optional secondary cache done. Now do I-cache and return.
|
|
|
|
# At this point,
|
|
# a1 = primary Icache line size in bytes
|
|
# a2 = primary Icache size in bytes
|
|
# t1 = a round unmapped cached base address (we are in kernel mode)
|
|
# t2,t3 scratch
|
|
|
|
add t2, t1, a2 # t2=end adress+1 of Icache
|
|
sub t2, t2, a1 # t2=address of last line in Icache
|
|
7:
|
|
cache INDEX_INVALIDATE_I,0(t1)
|
|
bne t1, t2, 7b
|
|
addu t1, a1 # (delay slot) increment by Icache line size
|
|
|
|
j ra # return to the caller
|
|
nop
|
|
|
|
8:
|
|
|
|
# Vr5432 version of the cpu_flush code.
|
|
# (The distinctions being: CONFIG can not be trusted about secondary
|
|
# cache (which does not exist). The primary caches use Virtual Address Bit 0
|
|
# to control set selection.
|
|
|
|
# Code does not consult CONFIG about cache sizes: knows the hardwired sizes.
|
|
# Since both I and D have the same size and line size, uses a merged loop.
|
|
|
|
li a0, VR5432_LINE
|
|
li a1, VR5432_SIZE
|
|
lui t1, ((K0BASE >> 16) & 0xFFFF)
|
|
ori t1, t1, (K0BASE & 0xFFFF)
|
|
|
|
# a0 = cache line size in bytes
|
|
# a1 = 1/2 cache size in bytes
|
|
# t1 = a round unmapped cached base address (we are in kernel mode)
|
|
|
|
add t2, t1, a1 # t2=end address+1
|
|
sub t2, t2, a0 # t2=address of last line in Icache
|
|
|
|
9:
|
|
cache INDEX_WRITEBACK_INVALIDATE_D,0(t1) # set 0
|
|
cache INDEX_WRITEBACK_INVALIDATE_D,1(t1) # set 1
|
|
cache INDEX_INVALIDATE_I,0(t1) # set 0
|
|
cache INDEX_INVALIDATE_I,1(t1) # set 1
|
|
bne t1, t2, 9b
|
|
addu t1, a0
|
|
|
|
j ra # return to the caller
|
|
nop
|
|
.set reorder
|
|
.end __cpu_flush
|
|
|
|
# NOTE: This variable should *NOT* be addressed relative to
|
|
# the $gp register since this code is executed before $gp is
|
|
# initialised... hence we leave it in the text area. This will
|
|
# cause problems if this routine is ever ROMmed:
|
|
|
|
.globl __buserr_cnt
|
|
__buserr_cnt:
|
|
.word 0
|
|
.align 3
|
|
__k1_save:
|
|
.word 0
|
|
.word 0
|
|
.align 2
|
|
|
|
.ent __buserr
|
|
.globl __buserr
|
|
__buserr:
|
|
.set noat
|
|
.set noreorder
|
|
# k0 and k1 available for use:
|
|
mfc0 k0,C0_CAUSE
|
|
nop
|
|
nop
|
|
andi k0,k0,0x7c
|
|
sub k0,k0,7 << 2
|
|
beq k0,$0,__buserr_do
|
|
nop
|
|
# call the previous handler
|
|
la k0,__previous
|
|
jr k0
|
|
nop
|
|
#
|
|
__buserr_do:
|
|
# TODO: check that the cause is indeed a bus error
|
|
# - if not then just jump to the previous handler
|
|
la k0,__k1_save
|
|
sd k1,0(k0)
|
|
#
|
|
la k1,__buserr_cnt
|
|
lw k0,0(k1) # increment counter
|
|
addu k0,1
|
|
sw k0,0(k1)
|
|
#
|
|
la k0,__k1_save
|
|
ld k1,0(k0)
|
|
#
|
|
mfc0 k0,C0_EPC
|
|
nop
|
|
nop
|
|
addu k0,k0,4 # skip offending instruction
|
|
mtc0 k0,C0_EPC # update EPC
|
|
nop
|
|
nop
|
|
eret
|
|
# j k0
|
|
# rfe
|
|
.set reorder
|
|
.set at
|
|
.end __buserr
|
|
|
|
__exception_code:
|
|
.set noreorder
|
|
lui k0,%hi(__buserr)
|
|
daddiu k0,k0,%lo(__buserr)
|
|
jr k0
|
|
nop
|
|
.set reorder
|
|
__exception_code_end:
|
|
|
|
.data
|
|
__previous:
|
|
.space (__exception_code_end - __exception_code)
|
|
# This subtracting two addresses is working
|
|
# but is not garenteed to continue working.
|
|
# The assemble reserves the right to put these
|
|
# two labels into different frags, and then
|
|
# cant take their difference.
|
|
|
|
.text
|
|
|
|
.ent __default_buserr_handler
|
|
.globl __default_buserr_handler
|
|
__default_buserr_handler:
|
|
.set noreorder
|
|
# attach our simple bus error handler:
|
|
# in: void
|
|
# out: void
|
|
mfc0 a0,C0_SR
|
|
nop
|
|
li a1,SR_BEV
|
|
and a1,a1,a0
|
|
beq a1,$0,baseaddr
|
|
lui a0,0x8000 # delay slot
|
|
lui a0,0xbfc0
|
|
daddiu a0,a0,0x0200
|
|
baseaddr:
|
|
daddiu a0,a0,0x0180
|
|
# a0 = base vector table address
|
|
la a1,__exception_code_end
|
|
la a2,__exception_code
|
|
subu a1,a1,a2
|
|
la a3,__previous
|
|
# there must be a better way of doing this????
|
|
copyloop:
|
|
lw v0,0(a0)
|
|
sw v0,0(a3)
|
|
lw v0,0(a2)
|
|
sw v0,0(a0)
|
|
daddiu a0,a0,4
|
|
daddiu a2,a2,4
|
|
daddiu a3,a3,4
|
|
subu a1,a1,4
|
|
bne a1,$0,copyloop
|
|
nop
|
|
la a0,__buserr_cnt
|
|
sw $0,0(a0)
|
|
j ra
|
|
nop
|
|
.set reorder
|
|
.end __default_buserr_handler
|
|
|
|
.ent __restore_buserr_handler
|
|
.globl __restore_buserr_handler
|
|
__restore_buserr_handler:
|
|
.set noreorder
|
|
# restore original (monitor) bus error handler
|
|
# in: void
|
|
# out: void
|
|
mfc0 a0,C0_SR
|
|
nop
|
|
li a1,SR_BEV
|
|
and a1,a1,a0
|
|
beq a1,$0,res_baseaddr
|
|
lui a0,0x8000 # delay slot
|
|
lui a0,0xbfc0
|
|
daddiu a0,a0,0x0200
|
|
res_baseaddr:
|
|
daddiu a0,a0,0x0180
|
|
# a0 = base vector table address
|
|
la a1,__exception_code_end
|
|
la a3,__exception_code
|
|
subu a1,a1,a3
|
|
la a3,__previous
|
|
# there must be a better way of doing this????
|
|
res_copyloop:
|
|
lw v0,0(a3)
|
|
sw v0,0(a0)
|
|
daddiu a0,a0,4
|
|
daddiu a3,a3,4
|
|
subu a1,a1,4
|
|
bne a1,$0,res_copyloop
|
|
nop
|
|
j ra
|
|
nop
|
|
.set reorder
|
|
.end __restore_buserr_handler
|
|
|
|
.ent __buserr_count
|
|
.globl __buserr_count
|
|
__buserr_count:
|
|
.set noreorder
|
|
# restore original (monitor) bus error handler
|
|
# in: void
|
|
# out: unsigned int __buserr_cnt
|
|
la v0,__buserr_cnt
|
|
lw v0,0(v0)
|
|
j ra
|
|
nop
|
|
.set reorder
|
|
.end __buserr_count
|
|
|
|
/* EOF vr5xxx.S */
|