mirror of
https://github.com/elliotnunn/supermario.git
synced 2024-11-26 16:49:18 +00:00
262 lines
7.5 KiB
Plaintext
262 lines
7.5 KiB
Plaintext
;
|
|
; File: MemMgrUtils.a (used to be Utilities.a)
|
|
;
|
|
; Contains: Memory Manager BlockMove type utilities.
|
|
;
|
|
; Written by: Jeff Crawford
|
|
;
|
|
; Copyright: © 1992 by Apple Computer, Inc., all rights reserved.
|
|
;
|
|
; Change History (most recent first):
|
|
;
|
|
; <1> 12/18/92 JC first checked in
|
|
;
|
|
;
|
|
;
|
|
; Notes: We use these routines instead of calling the trap _BlockMove
|
|
; for several reasons.
|
|
;
|
|
; 1) With quadLongWordAlignment, all blocks
|
|
; are a multiple of 16. No need to waste time checking alignment.
|
|
; 2) Due to quadlong word alignment, the 040 version will use the MOVE16
|
|
; instruction for improved performance.
|
|
; 3) The new memory manager has a much better cache coherancy management
|
|
; than the old one. BlockMove flushes caches on every move. If the old
|
|
; MM moved 50 blocks, caches would be flushed 50 times. Under this new
|
|
; scheme, caches are flushed during exit from the MM, not during block
|
|
; movement. If 50 blocks are moved, in one operation, caches are only
|
|
; flushed once.
|
|
; 4) BlockMove masks pointers and size for 24 bit move. Since we are 32 bit
|
|
; only, there is no need to mask these values.
|
|
; 5) Skip overhead of calling the trap dispacher
|
|
;
|
|
;
|
|
|
|
CASE ON
|
|
MACHINE MC68020
|
|
|
|
|
|
export MOVEBYTES020
|
|
export MOVEBYTES040
|
|
|
|
|
|
|
|
MMUtilitiesCode PROC
|
|
;
|
|
;
|
|
; MoveBytes020 - copies data from source to destination for 68020 machines
|
|
;
|
|
; Registers
|
|
; A1 Destination Pointer
|
|
; A0 Source Pointer
|
|
; D0 Length, later large loop count
|
|
; D1 Remnant size or count
|
|
;
|
|
; Trashes:
|
|
;
|
|
MOVEBYTES020 Proc Export
|
|
MOVEQ #$3F,D1 ; load 64 byte mask
|
|
CMPA.L A0,A1 ; Is source > destination
|
|
BCC.S MoveBytesDec ; yes, we decrement instead of increment
|
|
|
|
AND.L D0,D1 ; get remnant, do we have one?
|
|
BEQ.S StartLargeInc ; if no remnant, jump to large loop
|
|
SUB.L D1,D0 ; subtract remnant size from size
|
|
LSR.L #$4,D1 ; Turn byte count into a loop count
|
|
BRA.S SmallLoopIncTest ; branch to DBRA instruction
|
|
|
|
SmallLoopInc
|
|
MOVE.L (A0)+,(A1)+ ; copy 16 bytes
|
|
MOVE.L (A0)+,(A1)+
|
|
MOVE.L (A0)+,(A1)+
|
|
MOVE.L (A0)+,(A1)+
|
|
|
|
SmallLoopIncTest
|
|
DBRA D1,SmallLoopInc ; decrement loop count, branch if more
|
|
; we use DBRA since D1 contains a small number
|
|
|
|
StartLargeInc
|
|
TST.L D0 ; have we reached the end of the remnant
|
|
BEQ.S endOfMoveBytes
|
|
LSR.L #$6,D0 ; divide length by 64 to get count
|
|
|
|
LargeLoopInc ; move 64 bytes while incrementing pointers
|
|
MOVE.L (A0)+,(A1)+
|
|
MOVE.L (A0)+,(A1)+
|
|
MOVE.L (A0)+,(A1)+
|
|
MOVE.L (A0)+,(A1)+
|
|
MOVE.L (A0)+,(A1)+
|
|
MOVE.L (A0)+,(A1)+
|
|
MOVE.L (A0)+,(A1)+
|
|
MOVE.L (A0)+,(A1)+
|
|
MOVE.L (A0)+,(A1)+
|
|
MOVE.L (A0)+,(A1)+
|
|
MOVE.L (A0)+,(A1)+
|
|
MOVE.L (A0)+,(A1)+
|
|
MOVE.L (A0)+,(A1)+
|
|
MOVE.L (A0)+,(A1)+
|
|
MOVE.L (A0)+,(A1)+
|
|
MOVE.L (A0)+,(A1)+
|
|
SUBQ.L #$1,D0 ; decrement count, set CC's
|
|
BNE.S LargeLoopInc ; Continue if more
|
|
BRA.S endOfMoveBytes ; all done
|
|
|
|
MoveBytesDec
|
|
ADDA.L D0,A0 ; source ptr starts at end of buffer
|
|
ADDA.L D0,A1 ; destination ptr start at end of buffer
|
|
|
|
AND.L D0,D1 ; get remnant, do we have one?
|
|
tst.l d1
|
|
BEQ.S StartLargeDec ; no, we just to large chunks
|
|
SUB.L D1,D0 ; subtract remnant size from size
|
|
LSR.L #$4,D1 ; turn remnant size into a count
|
|
BRA.S SmallLoopDecTest ; branch to DBRA instruction
|
|
|
|
SmallLoopDec
|
|
MOVE.L -(A0),-(A1) ; copy 16 bytes
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
|
|
SmallLoopDecTest
|
|
DBRA D1,SmallLoopDec ; decrement loop count, branch if more
|
|
; we use DBRA since D1 contains a small number
|
|
|
|
StartLargeDec
|
|
TST.L D0 ; is there anything left?
|
|
BEQ.S endOfMoveBytes ; no, were outa here
|
|
LSR.L #$6,D0 ; turn byte count into loop count (divide by 64)
|
|
|
|
LargeLoopDec ; copy 64 bytes while decrementing pointers
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
SUBQ.L #$1,D0 ; decrement count
|
|
BNE.S LargeLoopDec ;
|
|
|
|
endOfMoveBytes
|
|
RTS
|
|
|
|
|
|
|
|
align 16
|
|
MACHINE MC68040
|
|
;
|
|
;
|
|
; MoveBytes040 - copies data from source to destination for 68040 machines
|
|
;
|
|
; Registers
|
|
; A1 Destination Pointer
|
|
; A0 Source Pointer
|
|
; D0 Length, later large loop count
|
|
; D1 Remnant size or count
|
|
;
|
|
; Trashes:
|
|
;
|
|
; Notes: Since Move16 autoincrements only, we only do the decrement if there is
|
|
; overlap such that copying would mangle the source.
|
|
;
|
|
MOVEBYTES040 Proc Export
|
|
CMPA.L A1,A0 ; is (dest < src) ?
|
|
BHI.S MoveBytesInc040 ; yes we can increment
|
|
MOVE.L D0,D1 ; get size
|
|
ADD.L A0,D1 ; get end of src
|
|
CMPA.L D1,A1 ; is (src+size <= dest) ?
|
|
BCC.S MoveBytesInc040 ; yes we can increment
|
|
BRA.S MoveBytesDec040 ; we must decrement
|
|
|
|
MoveBytesInc040
|
|
MOVEQ #$3F,D1 ; load 64 byte mask
|
|
AND.L D0,D1 ; get remnant, do we have one?
|
|
BEQ.S StartLargeInc040 ; if no remnant, jump to large loop
|
|
SUB.L D1,D0 ; subtract remnant size from size
|
|
LSR.L #$4,D1 ; Turn byte count into a loop count
|
|
NOP ; sync the pipeline for defective 68040s
|
|
; note: there are no moves till after the move16s are done
|
|
BRA.S SmallLoopIncTest040 ; branch to DBRA instruction
|
|
|
|
SmallLoopInc040
|
|
MOVE16 (A0)+,(A1)+ ; copy 16 bytes
|
|
|
|
SmallLoopIncTest040
|
|
DBRA D1,SmallLoopInc040 ; decrement loop count, branch if more
|
|
; we use DBRA since D1 contains a small number
|
|
|
|
StartLargeInc040
|
|
TST.L D0 ; have we reach the end of the remnant
|
|
BEQ.S endOfMoveBytes040
|
|
LSR.L #$6,D0 ; divide length by 64 to get count
|
|
|
|
LargeLoopInc040 ; move 64 bytes while incrementing pointers
|
|
MOVE16 (A0)+,(A1)+
|
|
MOVE16 (A0)+,(A1)+
|
|
MOVE16 (A0)+,(A1)+
|
|
SUBQ.L #$1,D0 ; decrement count, set CC's
|
|
MOVE16 (A0)+,(A1)+
|
|
BNE.S LargeLoopInc040 ; Contine if more
|
|
BRA.S endOfMoveBytes040 ; all done
|
|
|
|
MoveBytesDec040
|
|
MOVEQ #$3F,D1 ; load 64 byte mask
|
|
ADDA.L D0,A0 ; source ptr starts at end of buffer
|
|
ADDA.L D0,A1 ; destination ptr start at end of buffer
|
|
|
|
AND.L D0,D1 ; get remnant, so we have one?
|
|
BEQ.S StartLargeDec040 ; no, we just to large chunks
|
|
SUB.L D1,D0 ; subtract remnant size from size
|
|
LSR.L #$4,D1 ; turn remnant size into a count
|
|
BRA.S SmallLoopDecTest040 ; branch to DBRA instruction
|
|
|
|
SmallLoopDec040
|
|
MOVE.L -(A0),-(A1) ; copy 16 bytes, can't use move16
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
|
|
SmallLoopDecTest040
|
|
DBRA D1,SmallLoopDec040 ; decrement loop count, branch if more
|
|
; we use DBRA since D1 contains a small number
|
|
|
|
StartLargeDec040
|
|
TST.L D0 ; is there anything left?
|
|
BEQ.S endOfMoveBytes040 ; no, were outa here
|
|
LSR.L #$6,D0 ; turn byte count into loop count (divide by 64)
|
|
|
|
LargeLoopDec040 ; copy 64 bytes while decrementing pointers
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
MOVE.L -(A0),-(A1)
|
|
SUBQ.L #$1,D0 ; decrement count
|
|
BNE.S LargeLoopDec040 ;
|
|
|
|
endOfMoveBytes040
|
|
RTS
|
|
|
|
|
|
END |