Retro68/gcc/newlib/libc/machine/i960/strncat_ca.S

/*******************************************************************************
 * 
 * Copyright (c) 1993 Intel Corporation
 * 
 * Intel hereby grants you permission to copy, modify, and distribute this
 * software and its documentation.  Intel grants this permission provided
 * that the above copyright notice appears in all copies and that both the
 * copyright notice and this permission notice appear in supporting
 * documentation.  In addition, Intel grants this permission provided that
 * you prominently mark as "not part of the original" any modifications
 * made to this software or documentation, and that the name of Intel
 * Corporation not be used in advertising or publicity pertaining to
 * distribution of the software or the documentation without specific,
 * written prior permission.
 * 
 * Intel Corporation provides this AS IS, WITHOUT ANY WARRANTY, EXPRESS OR
 * IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY
 * OR FITNESS FOR A PARTICULAR PURPOSE.  Intel makes no guarantee or
 * representations regarding the use of, or the results of the use of,
 * the software and documentation in terms of correctness, accuracy,
 * reliability, currentness, or otherwise; and you rely on the software,
 * documentation and results solely at your own risk.
 *
 * IN NO EVENT SHALL INTEL BE LIABLE FOR ANY LOSS OF USE, LOSS OF BUSINESS,
 * LOSS OF PROFITS, INDIRECT, INCIDENTAL, SPECIAL OR CONSEQUENTIAL DAMAGES
 * OF ANY KIND.  IN NO EVENT SHALL INTEL'S TOTAL LIABILITY EXCEED THE SUM
 * PAID TO INTEL FOR THE PRODUCT LICENSED HEREUNDER.
 * 
 ******************************************************************************/

	.file "sncat_ca.s"
#ifdef	__PIC
	.pic
#endif
#ifdef	__PID
	.pid
#endif
/*
 * (c) copyright 1988,1993 Intel Corp., all rights reserved
 */

/*
	procedure strncat  (optimized assembler version for the CA)

	dest_addr = strncat (dest_addr, src_addr, max_bytes)

	append the null terminated string pointed to by src_addr to the null 
	terminated string pointed to by dest_addr.  Return the original
	dest_addr.  If the source string is longer than max_bytes, then 
	append only max_bytes bytes, and tack on a null byte on the end

	This routine will fail if the source and destination string
	overlap (in particular, if the end of the source is overlapped
	by the beginning of the destination).  The behavior is undefined.
	This is acceptable according to the draft C standard.

	Undefined behavior will also occur if the end of the source string
	(i.e. the terminating null byte) is in the last word of the program's
	allocated memory space.  This is so because, in several cases, strncat
	will fetch ahead one word.  Disallowing the fetch ahead would impose
	a severe performance penalty.

	This program handles five cases:

	1) both arguments start on a word boundary
	2) neither are word aligned, but they are offset by the same amount
	3) source is word aligned, destination is not
	4) destination is word aligned, source is not
	5) neither is word aligned, and they are offset by differing amounts

	At the time of this writing, only g0 thru g7 and g13 are available 
	for use in this leafproc;  other registers would have to be saved and
	restored.  These nine registers, plus tricky use of g14 are sufficient
	to implement the routine.  The registers are used as follows:

	g0  original dest ptr;  not modified, so that it may be returned.
	g1  src ptr;  shift count
	g2  max_bytes
	g3  src ptr (word aligned)
	g4  dest ptr (word aligned)
	g5  0xff  --  byte extraction mask
	Little endian:
		g6  lsw of double word for extraction of 4 bytes
		g7  msw of double word for extraction of 4 bytes
	Big endian:
		g6  msw of double word for extraction of 4 bytes
		g7  lsw of double word for extraction of 4 bytes
	g13 return address
	g14 byte extracted.
*/

#if __i960_BIG_ENDIAN__
#define MSW g6
#define LSW g7
#else
#define LSW g6
#define MSW g7
#endif

	.globl	_strncat
	.globl	__strncat
	.leafproc	_strncat, __strncat
	.align	2
_strncat:
#ifndef __PIC
	lda 	Lrett,g14
#else
	lda 	Lrett-(.+8)(ip),g14
#endif
__strncat:
	notand	g0,3,g4		# extract word addr of start of dest
	 lda	(g14),g13	# preserve return address
	cmpibge.f 0,g2,Lexit_code # Lexit if number of bytes to move is <= zero.
	and	g0,3,LSW	# extract byte offset of dest
	 ld	(g4),MSW	# fetch word containing at least first byte
	shlo	3,LSW,g14	# get shift count for making mask for first word
	subi	1,0,LSW		# mask initially all ones
#if __i960_BIG_ENDIAN__
	shro	g14,LSW,LSW	# get mask for bytes needed from first word
#else
	shlo	g14,LSW,LSW	# get mask for bytes needed from first word
#endif
	notor	MSW,LSW,MSW	# set unneeded bytes to all ones
	 lda	0xff,g5		# byte extraction mask
Lsearch_for_word_with_null:
	scanbyte 0,MSW		# check for null byte
	 lda	4(g4),g4	# post-increment dest word pointer
	mov	MSW,LSW		# keep a copy of current word
	 ld	(g4),MSW	# fetch next word of dest
	 bno.t	Lsearch_for_word_with_null	# branch if null not found yet
#if __i960_BIG_ENDIAN__
	shro	24,LSW,g14	# extract byte
#else
	and	g5,LSW,g14	# extract byte
#endif
	cmpo	0,g14		# branch if null is first byte of word
	subo	4,g4,g4		# move dest word ptr to word with null
	notand	g1,3,g3		# extract word addr of start of src
	 bne.t	Lsearch_for_null

Lcase_14:
	cmpo	g1,g3		# check alignment of source
	 ld	(g3),LSW	# fetch first word of source
	shlo	3,g1,g14	# compute shift count
	 lda	4(g3),g3	# post-increment src addr
	 bne.f	Lcase_4		# branch if source is unaligned
Lcase_1:
Lcase_1_wloop:			# word copying loop
	cmpi	g2,4		# check for fewer than four bytes to move
	 lda	(LSW),g1	# keep a copy of the src word
	 bl.f	Lcase_1_cloop	# branch if fewer than four bytes to copy
	scanbyte 0,g1		# check for null byte in src word
	 ld	(g3),LSW	# pre-fetch next word of src
	addo	4,g3,g3		# post-increment src addr
	 bo.f	Lcase_1_cloop	# branch if word contains null byte
	subi	4,g2,g2		# decrease max_byte count by the 4 bytes moved
	 st	g1,(g4)		# store word in dest string
	addo	4,g4,g4		# post-increment dest addr
	 b	Lcase_1_wloop

Lcase_3_cloop:
Lcase_1_cloop:			# character copying loop (max_bytes <= 3)
	cmpdeci	0,g2,g2		# is max_bytes exhausted?
#if __i960_BIG_ENDIAN__
	rotate	8,g1,g1		# move next byte into position for extraction
#endif
	and	g5,g1,g14	# extract next char
	be.f	Lstore_null	# if max_bytes is exhausted, store null and quit
	cmpo	0,g14		# check for null byte
	 stob	g14,(g4)	# store the byte in dest
#if ! __i960_BIG_ENDIAN__
	shro	8,g1,g1		# move next byte into position for extraction
#endif
	 lda	1(g4),g4	# post-increment dest byte addr
	 bne.t	Lcase_1_cloop	# branch if null not reached
	bx	(g13)		# Lexit (g14 == 0)

Lstore_null:
	mov	0,g14		# store null, and set g14 to zero
	stob	g14,(g4)
	bx	(g13)


Lsearch_for_null:
#if __i960_BIG_ENDIAN__
	shlo	8,LSW,LSW	# check next byte
	shro	24,LSW,g14
#else
	shlo	8,g5,g5		# move mask up to next byte
	and	g5,LSW,g14	# extract byte
#endif
	 lda	1(g4),g4	# move dest byte ptr to next byte
	cmpobne.t 0,g14,Lsearch_for_null	# branch if null is not yet found

Lcase_235:
	cmpo	g1,g3		# check alignment of src
	 ld	(g3),LSW	# pre-fetch word with start of src
	and	3,g1,g1		# compute shift count
	 lda	0xff,g5		# load mask for byte extraction
	shlo	3,g1,g14	
	 lda	4(g3),g3	# post-increment src word counter
	 be.t	Lcase_3		# branch if src is word aligned
	and	g4,3,MSW	# extract byte offset for dest string
	cmpo    MSW,g1		# < indicates first word of dest has more bytes
				/* than first word of source. */
	 ld	(g3),MSW	# fetch second word of src
#if __i960_BIG_ENDIAN__
	subo	g14,0,g14	# adjust shift count for big endian
#endif
	eshro	g14,g6,g5	# extract four bytes
#if __i960_BIG_ENDIAN__
	 bge.f	1f
#else
	 bg.f	1f
#endif
	mov	MSW,LSW
	 lda	4(g3),g3	# move src word addr to second word boundary
1:
	mov	g5,MSW
	 lda	0xff,g5
	 b	Lcase_25

Lcase_3:				# src is word aligned; dest is not
	mov	LSW,MSW		# make copy of first word of src
	 lda	32,g14		# initialize shift count to zero (mod 32)
Lcase_25:

Lcase_3_cloop_at_start:		# character copying loop for start of dest str
	cmpdeci	0,g2,g2		# is max_bytes exhausted?
#if __i960_BIG_ENDIAN__
	shro	24,MSW,g5	# extract next char
#else
	and	g5,MSW,g5	# extract next char
#endif
	 be.f	Lstore_null	# Lexit if max_bytes is exhausted
	cmpo	0,g5		# check for null byte
	 stob	g5,(g4)		# store the byte in dest
	addo	1,g4,g4		# post-increment dest ptr
	 lda	0xff,g5		# re-initialize byte extraction mask
	notand	g4,3,g1		# extract word address
	 be.t	Lexit_code	# Lexit if null byte reached
	cmpo	g1,g4		# have we reached word boundary in dest yet?
#if __i960_BIG_ENDIAN__
	 lda	-8(g14),g14	# augment the shift counter
	rotate	8,MSW,MSW	# move next byte into position for extraction
#else
	 lda	8(g14),g14	# augment the shift counter
	shro	8,MSW,MSW	# move next byte into position for extraction
#endif
	 bne.t	Lcase_3_cloop_at_start	# branch if reached word boundary?

#if __i960_BIG_ENDIAN__
	cmpo	0,g14
	 ld	(g3),MSW	# fetch msw of operand for double shift
	bne	Lcase_3_wloop	# branch if src is still unaligned.

Lcase_3_wloop2:
	cmpi	g2,4		# less than four bytes to move?
	mov	LSW,g1		# extract 4 bytes of src
	 lda	4(g3),g3	# post-increment src word addr
	 bl.f	Lcase_3_cloop	# branch if < four bytes left to move
	scanbyte 0,g1		# check for null byte
	mov	MSW,LSW		# move msw to lsw
	 ld	(g3),MSW	# pre-fetch msw of operand for double shift
	 bo.f	Lcase_3_cloop	# branch if word contains null byte
	subi	4,g2,g2		# decrease max_byte count by the 4 bytes moved
	 st	g1,(g4)		# store 4 bytes to dest
	addo	4,g4,g4		# post-increment dest ptr
	 b	Lcase_3_wloop2
Lcase_4:
	subo	g14,0,g14	# adjust shift count for big endian
#else
Lcase_4:
#endif

	ld	(g3),MSW	# fetch msw of operand for double shift

Lcase_3_wloop:
	cmpi	g2,4		# less than four bytes to move?
	eshro	g14,g6,g1	# extract 4 bytes of src
	 lda	4(g3),g3	# post-increment src word addr
	 bl.f	Lcase_3_cloop	# branch if < four bytes left to move
	scanbyte 0,g1		# check for null byte
	mov	MSW,LSW		# move msw to lsw
	 ld	(g3),MSW	# pre-fetch msw of operand for double shift
	 bo.f	Lcase_3_cloop	# branch if word contains null byte
	subi	4,g2,g2		# decrease max_byte count by the 4 bytes moved
	 st	g1,(g4)		# store 4 bytes to dest
	addo	4,g4,g4		# post-increment dest ptr
	 b	Lcase_3_wloop


Lexit_code:
	mov	0,g14		# conform to register conventions
	bx	(g13)		# g0 = addr of dest;  g14 = 0
Lrett:
	ret

/* end of strncat */
add libgloss and newlib 1.20.0 to gcc directory 2012-03-26 23:51:53 +00:00			`/*******************************************************************************`
			`*`
			`* Copyright (c) 1993 Intel Corporation`
			`*`
			`* Intel hereby grants you permission to copy, modify, and distribute this`
			`* software and its documentation. Intel grants this permission provided`
			`* that the above copyright notice appears in all copies and that both the`
			`* copyright notice and this permission notice appear in supporting`
			`* documentation. In addition, Intel grants this permission provided that`
			`* you prominently mark as "not part of the original" any modifications`
			`* made to this software or documentation, and that the name of Intel`
			`* Corporation not be used in advertising or publicity pertaining to`
			`* distribution of the software or the documentation without specific,`
			`* written prior permission.`
			`*`
			`* Intel Corporation provides this AS IS, WITHOUT ANY WARRANTY, EXPRESS OR`
			`* IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY`
			`* OR FITNESS FOR A PARTICULAR PURPOSE. Intel makes no guarantee or`
			`* representations regarding the use of, or the results of the use of,`
			`* the software and documentation in terms of correctness, accuracy,`
			`* reliability, currentness, or otherwise; and you rely on the software,`
			`* documentation and results solely at your own risk.`
			`*`
			`* IN NO EVENT SHALL INTEL BE LIABLE FOR ANY LOSS OF USE, LOSS OF BUSINESS,`
			`* LOSS OF PROFITS, INDIRECT, INCIDENTAL, SPECIAL OR CONSEQUENTIAL DAMAGES`
			`* OF ANY KIND. IN NO EVENT SHALL INTEL'S TOTAL LIABILITY EXCEED THE SUM`
			`* PAID TO INTEL FOR THE PRODUCT LICENSED HEREUNDER.`
			`*`
			`******************************************************************************/`

			`.file "sncat_ca.s"`
			`#ifdef __PIC`
			`.pic`
			`#endif`
			`#ifdef __PID`
			`.pid`
			`#endif`
			`/*`
			`* (c) copyright 1988,1993 Intel Corp., all rights reserved`
			`*/`

			`/*`
			`procedure strncat (optimized assembler version for the CA)`

			`dest_addr = strncat (dest_addr, src_addr, max_bytes)`

			`append the null terminated string pointed to by src_addr to the null`
			`terminated string pointed to by dest_addr. Return the original`
			`dest_addr. If the source string is longer than max_bytes, then`
			`append only max_bytes bytes, and tack on a null byte on the end`

			`This routine will fail if the source and destination string`
			`overlap (in particular, if the end of the source is overlapped`
			`by the beginning of the destination). The behavior is undefined.`
			`This is acceptable according to the draft C standard.`

			`Undefined behavior will also occur if the end of the source string`
			`(i.e. the terminating null byte) is in the last word of the program's`
			`allocated memory space. This is so because, in several cases, strncat`
			`will fetch ahead one word. Disallowing the fetch ahead would impose`
			`a severe performance penalty.`

			`This program handles five cases:`

			`1) both arguments start on a word boundary`
			`2) neither are word aligned, but they are offset by the same amount`
			`3) source is word aligned, destination is not`
			`4) destination is word aligned, source is not`
			`5) neither is word aligned, and they are offset by differing amounts`

			`At the time of this writing, only g0 thru g7 and g13 are available`
			`for use in this leafproc; other registers would have to be saved and`
			`restored. These nine registers, plus tricky use of g14 are sufficient`
			`to implement the routine. The registers are used as follows:`

			`g0 original dest ptr; not modified, so that it may be returned.`
			`g1 src ptr; shift count`
			`g2 max_bytes`
			`g3 src ptr (word aligned)`
			`g4 dest ptr (word aligned)`
			`g5 0xff -- byte extraction mask`
			`Little endian:`
			`g6 lsw of double word for extraction of 4 bytes`
			`g7 msw of double word for extraction of 4 bytes`
			`Big endian:`
			`g6 msw of double word for extraction of 4 bytes`
			`g7 lsw of double word for extraction of 4 bytes`
			`g13 return address`
			`g14 byte extracted.`
			`*/`

			`#if __i960_BIG_ENDIAN__`
			`#define MSW g6`
			`#define LSW g7`
			`#else`
			`#define LSW g6`
			`#define MSW g7`
			`#endif`

			`.globl _strncat`
			`.globl __strncat`
			`.leafproc _strncat, __strncat`
			`.align 2`
			`_strncat:`
			`#ifndef __PIC`
			`lda Lrett,g14`
			`#else`
			`lda Lrett-(.+8)(ip),g14`
			`#endif`
			`__strncat:`
			`notand g0,3,g4 # extract word addr of start of dest`
			`lda (g14),g13 # preserve return address`
			`cmpibge.f 0,g2,Lexit_code # Lexit if number of bytes to move is <= zero.`
			`and g0,3,LSW # extract byte offset of dest`
			`ld (g4),MSW # fetch word containing at least first byte`
			`shlo 3,LSW,g14 # get shift count for making mask for first word`
			`subi 1,0,LSW # mask initially all ones`
			`#if __i960_BIG_ENDIAN__`
			`shro g14,LSW,LSW # get mask for bytes needed from first word`
			`#else`
			`shlo g14,LSW,LSW # get mask for bytes needed from first word`
			`#endif`
			`notor MSW,LSW,MSW # set unneeded bytes to all ones`
			`lda 0xff,g5 # byte extraction mask`
			`Lsearch_for_word_with_null:`
			`scanbyte 0,MSW # check for null byte`
			`lda 4(g4),g4 # post-increment dest word pointer`
			`mov MSW,LSW # keep a copy of current word`
			`ld (g4),MSW # fetch next word of dest`
			`bno.t Lsearch_for_word_with_null # branch if null not found yet`
			`#if __i960_BIG_ENDIAN__`
			`shro 24,LSW,g14 # extract byte`
			`#else`
			`and g5,LSW,g14 # extract byte`
			`#endif`
			`cmpo 0,g14 # branch if null is first byte of word`
			`subo 4,g4,g4 # move dest word ptr to word with null`
			`notand g1,3,g3 # extract word addr of start of src`
			`bne.t Lsearch_for_null`

			`Lcase_14:`
			`cmpo g1,g3 # check alignment of source`
			`ld (g3),LSW # fetch first word of source`
			`shlo 3,g1,g14 # compute shift count`
			`lda 4(g3),g3 # post-increment src addr`
			`bne.f Lcase_4 # branch if source is unaligned`
			`Lcase_1:`
			`Lcase_1_wloop: # word copying loop`
			`cmpi g2,4 # check for fewer than four bytes to move`
			`lda (LSW),g1 # keep a copy of the src word`
			`bl.f Lcase_1_cloop # branch if fewer than four bytes to copy`
			`scanbyte 0,g1 # check for null byte in src word`
			`ld (g3),LSW # pre-fetch next word of src`
			`addo 4,g3,g3 # post-increment src addr`
			`bo.f Lcase_1_cloop # branch if word contains null byte`
			`subi 4,g2,g2 # decrease max_byte count by the 4 bytes moved`
			`st g1,(g4) # store word in dest string`
			`addo 4,g4,g4 # post-increment dest addr`
			`b Lcase_1_wloop`

			`Lcase_3_cloop:`
			`Lcase_1_cloop: # character copying loop (max_bytes <= 3)`
			`cmpdeci 0,g2,g2 # is max_bytes exhausted?`
			`#if __i960_BIG_ENDIAN__`
			`rotate 8,g1,g1 # move next byte into position for extraction`
			`#endif`
			`and g5,g1,g14 # extract next char`
			`be.f Lstore_null # if max_bytes is exhausted, store null and quit`
			`cmpo 0,g14 # check for null byte`
			`stob g14,(g4) # store the byte in dest`
			`#if ! __i960_BIG_ENDIAN__`
			`shro 8,g1,g1 # move next byte into position for extraction`
			`#endif`
			`lda 1(g4),g4 # post-increment dest byte addr`
			`bne.t Lcase_1_cloop # branch if null not reached`
			`bx (g13) # Lexit (g14 == 0)`

			`Lstore_null:`
			`mov 0,g14 # store null, and set g14 to zero`
			`stob g14,(g4)`
			`bx (g13)`


			`Lsearch_for_null:`
			`#if __i960_BIG_ENDIAN__`
			`shlo 8,LSW,LSW # check next byte`
			`shro 24,LSW,g14`
			`#else`
			`shlo 8,g5,g5 # move mask up to next byte`
			`and g5,LSW,g14 # extract byte`
			`#endif`
			`lda 1(g4),g4 # move dest byte ptr to next byte`
			`cmpobne.t 0,g14,Lsearch_for_null # branch if null is not yet found`

			`Lcase_235:`
			`cmpo g1,g3 # check alignment of src`
			`ld (g3),LSW # pre-fetch word with start of src`
			`and 3,g1,g1 # compute shift count`
			`lda 0xff,g5 # load mask for byte extraction`
			`shlo 3,g1,g14`
			`lda 4(g3),g3 # post-increment src word counter`
			`be.t Lcase_3 # branch if src is word aligned`
			`and g4,3,MSW # extract byte offset for dest string`
			`cmpo MSW,g1 # < indicates first word of dest has more bytes`
			`/* than first word of source. */`
			`ld (g3),MSW # fetch second word of src`
			`#if __i960_BIG_ENDIAN__`
			`subo g14,0,g14 # adjust shift count for big endian`
			`#endif`
			`eshro g14,g6,g5 # extract four bytes`
			`#if __i960_BIG_ENDIAN__`
			`bge.f 1f`
			`#else`
			`bg.f 1f`
			`#endif`
			`mov MSW,LSW`
			`lda 4(g3),g3 # move src word addr to second word boundary`
			`1:`
			`mov g5,MSW`
			`lda 0xff,g5`
			`b Lcase_25`

			`Lcase_3: # src is word aligned; dest is not`
			`mov LSW,MSW # make copy of first word of src`
			`lda 32,g14 # initialize shift count to zero (mod 32)`
			`Lcase_25:`

			`Lcase_3_cloop_at_start: # character copying loop for start of dest str`
			`cmpdeci 0,g2,g2 # is max_bytes exhausted?`
			`#if __i960_BIG_ENDIAN__`
			`shro 24,MSW,g5 # extract next char`
			`#else`
			`and g5,MSW,g5 # extract next char`
			`#endif`
			`be.f Lstore_null # Lexit if max_bytes is exhausted`
			`cmpo 0,g5 # check for null byte`
			`stob g5,(g4) # store the byte in dest`
			`addo 1,g4,g4 # post-increment dest ptr`
			`lda 0xff,g5 # re-initialize byte extraction mask`
			`notand g4,3,g1 # extract word address`
			`be.t Lexit_code # Lexit if null byte reached`
			`cmpo g1,g4 # have we reached word boundary in dest yet?`
			`#if __i960_BIG_ENDIAN__`
			`lda -8(g14),g14 # augment the shift counter`
			`rotate 8,MSW,MSW # move next byte into position for extraction`
			`#else`
			`lda 8(g14),g14 # augment the shift counter`
			`shro 8,MSW,MSW # move next byte into position for extraction`
			`#endif`
			`bne.t Lcase_3_cloop_at_start # branch if reached word boundary?`

			`#if __i960_BIG_ENDIAN__`
			`cmpo 0,g14`
			`ld (g3),MSW # fetch msw of operand for double shift`
			`bne Lcase_3_wloop # branch if src is still unaligned.`

			`Lcase_3_wloop2:`
			`cmpi g2,4 # less than four bytes to move?`
			`mov LSW,g1 # extract 4 bytes of src`
			`lda 4(g3),g3 # post-increment src word addr`
			`bl.f Lcase_3_cloop # branch if < four bytes left to move`
			`scanbyte 0,g1 # check for null byte`
			`mov MSW,LSW # move msw to lsw`
			`ld (g3),MSW # pre-fetch msw of operand for double shift`
			`bo.f Lcase_3_cloop # branch if word contains null byte`
			`subi 4,g2,g2 # decrease max_byte count by the 4 bytes moved`
			`st g1,(g4) # store 4 bytes to dest`
			`addo 4,g4,g4 # post-increment dest ptr`
			`b Lcase_3_wloop2`
			`Lcase_4:`
			`subo g14,0,g14 # adjust shift count for big endian`
			`#else`
			`Lcase_4:`
			`#endif`

			`ld (g3),MSW # fetch msw of operand for double shift`

			`Lcase_3_wloop:`
			`cmpi g2,4 # less than four bytes to move?`
			`eshro g14,g6,g1 # extract 4 bytes of src`
			`lda 4(g3),g3 # post-increment src word addr`
			`bl.f Lcase_3_cloop # branch if < four bytes left to move`
			`scanbyte 0,g1 # check for null byte`
			`mov MSW,LSW # move msw to lsw`
			`ld (g3),MSW # pre-fetch msw of operand for double shift`
			`bo.f Lcase_3_cloop # branch if word contains null byte`
			`subi 4,g2,g2 # decrease max_byte count by the 4 bytes moved`
			`st g1,(g4) # store 4 bytes to dest`
			`addo 4,g4,g4 # post-increment dest ptr`
			`b Lcase_3_wloop`


			`Lexit_code:`
			`mov 0,g14 # conform to register conventions`
			`bx (g13) # g0 = addr of dest; g14 = 0`
			`Lrett:`
			`ret`

			`/* end of strncat */`