JPEGView/Source/68020/CopyReallyFastScaledFrom32.a
Aaron Giles 92bdb55672 JPEGView 3.3 for Macintosh
These are the sources for the final official release of JPEGView for the
Mac, back in 1994.
2015-02-05 00:18:10 -08:00

1 line
9.5 KiB
Plaintext

;*********************************************************/
;* This source code copyright (c) 1991-2001, Aaron Giles */
;* See the Read Me file for licensing information. */
;* Contact email: mac@aarongiles.com */
;*********************************************************/
;
; On entry here we expect the following values:
;
; srcBase = (long) pointer to the first source pixel
; srcRow = (long) rowBytes for the source pixmap
; dstBase = (long) pointer to the first destination pixel
; dstRow = (long) rowBytes for the destination pixmap
; itAddr = (long) pointer to the inverse color table
; ctAddr = (long) pointer to the color table
; theRgn = (long) handle to the destination region
; boxRect = (Rect) the bounding rectangle of this region
; height = (word) height of the region bounding box
; width = (word) width of the region bounding box
; srcWidth = (word) width of the source rectangle
; srcHeight = (word) height of the source rectangle
; dstWidth = (word) width of the destination rectangle
; dstHeight = (word) height of the destination rectangle
; xRemainder = (word) starting X remainder for the source counter
; yRemainder = (word) starting Y remainder for the source counter
;
; The following registers will be modified:
;
; d0,d1,d2
; a0,a1
;
; Internally, the register usage is as follows:
;
; d0 = accumulator
; d1 = scratch register for expanding 24-bit pixels
; d2 = scratch [save for d6.l/d7.l]
; d3 = scratch [multiplier for xy-scale]
; d4 = (low word) remaining source Y pixels needed for current dest. Y
; d4 = (high word) dstHeight/srcHeight
; d5 = (low word) remaining Y fraction of current source pixel
; d5 = (high word) 1.00
; d6 = (low word) remaining source Y pixels needed for current dest. X
; d6 = (high word) dstWidth/srcWidth
; d7 = (low word) remaining Y fraction of current source pixel
; d7 = (high word) scratch [save for d5.l]
;
; a0 = pointer to 24-bit source row
; a1 = pointer to 8-bit destination row
; a2 = sum of the red contributions
; a3 = sum of the green contributions
; a4 = sum of the blue contributions
; a5 = temporary save area for a0
;
movem.l d3-d7/a2-a6,-(sp) ;save registers on the stack
move.l 48(sp),a6 ;get the address of the CopyData structure in a6
;(based off stack: 10 registers + 1 a6 link + 1 return addr.
; = 12 * 4 bytes = 48 bytes)
;
; Create some room in the stack for our buffers
;
clr.l d0 ;zero out d0
move.w width(a6),d0 ;get width of destination in d0.l
addq.l #2,d0 ;plus two for overflows
lsl.l #4,d0 ;times 16 (2 * 6 + 4)
addq.l #4,d0 ;add 4 for alignment
sub.l d0,sp ;get it from the stack space
move.l d0,-(sp) ;and save the amount
lsr.l #3,d0 ;return to times 2
move.l d0,d1 ;save that in d1
lsl.l #1,d0 ;d0 = width * 4
add.l d1,d0 ; = width * 6
moveq.l #13,d1 ;offset by 10, plus 3 for rounding up
add.l sp,d1 ;add in the stack pointer
andi.l #-4,d1 ;make it longword-aligned (-4 == $fffffffc)
move.l d1,evenAddr(a6) ;that's the even buffer
add.l d0,d1 ;point to the odd buffer
move.l d1,oddAddr(a6) ;save that pointer
add.l d0,d1 ;point to the scale buffer
move.l d1,outputAddr(a6) ;now we've got them all
;
; Clear the dithering buffers to zeros
;
move.l oddAddr(a6),a0 ;point to the odd buffer by default
subq.l #4,d1 ;decrement d1 for the dbra, plus the extras
lsr.l #1,d0 ;divide by two since we only clear one buffer
btst.b #0,evodd+1(a6) ;check the even/odd flag
bne.s @ClearLoop ;if even, skip ahead
move.l evenAddr(a6),a0 ;point to the even buffer
@ClearLoop:
clr.w (a0)+ ;clear this word
dbra.w d0,@ClearLoop ;loop for all dithering
;
; Initialize the region data
;
move.l theRgn(a6),a0 ;get theRgn in a0
lea.l rgnBuffer(a6),a1 ;point a1 to the region buffer
move.w boxRect+0(a6),d2 ;get box top in d2
move.w boxRect+2(a6),d3 ;box left in d3
move.w height(a6),d4 ;height in d4
move.w width(a6),d5 ;width in d5
jsr InitRegion ;initialize the region
move.l a0,-(sp) ;store our pointer to the rgn on the stack
;
; Set up the source/destination quantities in the high words of d4-d7
;
clr.l d6 ;clear out d6
move.w dstHeight(a6),d6 ;get dest height there
moveq.l #10,d0 ;get shift value in d0
lsl.l d0,d6 ;shift it up by 10 bits
divu.w srcHeight(a6),d6 ;divide by source height
addq.w #1,d6 ;plus one to prevent overflows
swap d6 ;swap it high
clr.l d4 ;clear out d4
move.w dstWidth(a6),d4 ;get destination width in d4
lsl.l d0,d4 ;shift d6 up by 10 bits
divu.w srcWidth(a6),d4 ;divide by source width
addq.w #1,d4 ;plus one to prevent overflows
swap d4 ;into high word of d6
;
; Set up the counter "rows" & reset the source Y remainder
;
move.w height(a6),rows(a6) ;set up the row counter
move.w yRemainder(a6),d5 ;restore yRemainder
;
; The outermost (row) loop begins here; set up our pointers into the data
;
@ScaleRowLoop:
move.l srcBase(a6),a0 ;get pointer to source in a0
move.l outputAddr(a6),a1 ;put to destination with a1
;
; Reset the "columns" counter and reset the source X remainder
;
move.w width(a6),columns(a6);reset the column counter
move.w xRemainder(a6),d7 ;restore xRemainder
;
; The inner (column) loop begins here; find starting pixel location in Y
;
move.w #1024,d0 ;get needed Y in d0
cmp.w d0,d5 ;is the Y contribution completely contained?
bgt.s @ScaleCommon ;if so, go straight to the common scaling
sub.w d5,d0 ;subtract off remaining source
swap d6 ;get source Y contribution in d6
cmp.w d0,d6 ;do we overlap next Y pixel only?
ble.s @NotScaleY2 ;if not, skip ahead
swap d6 ;swap d6 back
cmp.w d0,d5 ;should we skip ahead a row?
bgt.s @ScaleCommon ;if not, go ahead and go for it
add.w srcRow(a6),a0 ;point to the next row
bra.s @ScaleCommon ;and *then* go for it
@NotScaleY2:
swap d6 ;swap d6 back
add.w srcRow(a6),a0 ;point to the next row
;
; The common scaling loop; here we copy pixels from the current row to the destination
;
@ScaleCommon:
swap d4 ;get the X contribution
@ScaleColLoop:
move.w #1024,d6 ;initialize our X counter (1024 == $400)
cmp.w d6,d7 ;is the X contribution completely contained?
ble.s @ScaleX1 ;if not, skip this
move.l (a0),(a1)+ ;copy the current source pixel
bra.s @ScaleEnd ;skip to the end
@ScaleX1:
sub.w d7,d6 ;subtract remaining X from our needed X
cmp.w d6,d4 ;do we overlap onto next pixel only?
ble.s @ScaleX2 ;if not, skip
cmp.w d6,d7 ;which pixel do we contribute?
ble.s @ScaleX1Add2 ;if second one, skip ahead
move.l (a0)+,(a1)+ ;copy the first pixel and increment
bra.s @ScaleEndReset ;skip to the end
@ScaleX1Add2:
addq.l #4,a0 ;increment source first
move.l (a0),(a1)+ ;copy second pixel now
bra.s @ScaleEndReset ;skip to the end
@ScaleX2:
addq.l #4,a0 ;increment source
move.l (a0)+,(a1)+ ;copy second pixel and increment
@ScaleX2Loop:
sub.w d4,d6 ;subtract contribution
cmp.w d6,d4 ;see if we have another whole contribution left
bgt.s @ScaleEndReset ;if not, reset the needed value
addq.l #4,a0 ;increment source
bra.s @ScaleX2Loop ;loop until we exit
@ScaleEndReset:
move.w d4,d7 ;set the needed value to the contribution
@ScaleEnd:
sub.w d6,d7 ;subtract needed from remaining
subq.w #1,columns(a6) ;decrement the columns count
bne.s @ScaleColLoop ;loop until done
;
; We now have a row; save our registers and set up to do a dithered copy
;
swap d4 ;swap d4 back
movem.l d4-d7,-(sp) ;save registers
move.l outputAddr(a6),a0 ;source here
move.l dstBase(a6),a1 ;get destination address
move.l itAddr(a6),a2 ;inverse color table
move.l ctAddr(a6),a3 ;color table
lea.l rgnBuffer+4(a6),a4 ;get rgnBuffer address in a4
move.l a4,d2 ;point d2 (a6) to the region buffer
move.l evenAddr(a6),d0 ;even address
move.l oddAddr(a6),d1 ;odd address
move.l errTable(a6),d6 ;point d6 to the error table
move.w width(a6),d7 ;column count
bchg.b #0,evodd+1(a6) ;check the even/odd flag
beq.s @ScaleEven ;if zero, do an even row
move.l odd(a6),a4 ;get address of routine
jsr (a4) ;otherwise, dither as an odd row
bra.s @ScaleRowEnd ;skip ahead
@ScaleEven:
move.l even(a6),a4 ;get address of routine
jsr (a4) ;dither as an even row
@ScaleRowEnd:
movem.l (sp)+,d4-d7 ;restore registers
;
; Adjust ourselves for the next row
;
subq.w #1,rows(a6) ;decrement rows
beq.s @ScaleExit ;exit if done
move.l dstRow(a6),d0 ;get destination row increment in d0
add.l d0,dstBase(a6) ;point to next destination row
move.l srcRow(a6),d0 ;get source row increment in d0
move.w #1024,d4 ;get 1.00 count in d4 (1024 == $400)
swap d6 ;swap d6 beforehand for speed
@ScaleIncRow:
cmp.w d4,d5 ;did we upgrade 1 row?
bgt.s @ScaleRowNext ;if not, loop
add.l d0,srcBase(a6) ;point to next source row
sub.w d5,d4 ;adjust Y for remainder
move.w d6,d5 ;reset Y count for the new pixel
bra.s @ScaleIncRow ;handle any further adjustments
@ScaleRowNext:
swap d6 ;restore d6
sub.w d4,d5 ;get the new Y multiplier
;
; Update the region counters
;
subq.w #1,rgnBuffer(a6) ;decrement the Y region count
bne.w @ScaleRowLoop ;skip if we're not done
move.l (sp)+,a0 ;restore region pointer
lea.l rgnBuffer+4(a6),a1 ;point a1 into the region buffer
move.w boxRect+2(a6),d0 ;get left end of the box in d0
jsr UpdateRegion ;update our region
move.l a0,-(sp) ;push new position again
bra.w @ScaleRowLoop ;loop for more
@ScaleExit:
add.l #4,sp ;pop off region position
move.l (sp)+,d0 ;get length of buffer
add.l d0,sp ;get it off the stack
movem.l (sp)+,d3-d7/a2-a6 ;get registers from the stack