Optimizing Microcontroller Performance
by James Flynn



Listing One
void *memcpy(void *s1, const void *s2, size_t n)
{
    char *su1;
    const char *su2;

    for (su1 = s1, su2=s2; 0<n; ++su1,++su2, --n)
        *su1 = *su2;
    return s1;
}

Listing Two
;; op-codes needed to build program snippet on the software frame 

#define  MVN_INST	0x54     ;; MOVE INSTRUCTION
#define  PLD_INST 	0x2B     ;; POP FRAME REGISTER
#define  PLY_INST	0x7A     ;; POP Y REGISTER
#define  RTL_INST	0x6B     ;; RETURN LONG
#define  PLA_INST	0x68      ;; POP A REGISTER
#define  PLB_INST	0x42      ;; POP B REGISTER
#define  PLT_INST	0xAB     ;; POP DATA BANK REGISTER

;*************************************************************** 
;	memcpy
;  	void memcpy(void *s1, const void *s2, size_t n)
;	This routine copies n characters from the object pointed to by s2 
;	into the object pointed to by s1.  If copying takes place between 
;	objects that overlap, the behavior is undefined.
;	NOTE: This routine is written to conform to IAR M37735 interface
;		requirements for the banked memory model.
;Arguments:
;	s1 = destination pointer 
;	s2 = source pointer
;	n  = number of bytes to copy
;Returns:
;	Returns s1
;*************************************************************** 
;; On entry the following registers & frame locations are in use.
;;    Y - LSW of s1
;;    B - MSW of s1
;;  dp:2- LSW of s2
;;  dp:4- MSW of s2
;;  dp:0- Number of bytes to copy
;;
;; this routine uses memory locations dp:6 - dp:14 for code

	.public	memcpy
memcpy:
	lda	a, 4, s			;; set new frame pointer up
	phd				     ;; but first save the old one
	tad				     ;; now, calculate the new one 
	clc				     ;; by adding the users request
	adc	a, #15			;; of 15 bytes.  Next save the new
	pha				     ;; frame location for future callers 
	pht                      ;; save the DT register
	phb				     ;; Save destination address so it can 
	phy				     ;; be returned to the caller.
	ldx	dp:2			     ;; X=source offset,Y=destination offset
	.data	8			;; 
	sem				     ;; switch to 8-bit data mode and
	ldm	#MVN_INST, dp:5	;; setup op-codes on the frame
	sta	b, dp:6			;; store destination bank
	lda	a, dp:4			;; get & store source bank
	sta	a, dp:7			;; 
	ldm	#PLA_INST, dp:8	;; restore LSW of s1 to the A register
	ldm	#PLB_INST, dp:9	;; restore MSW of s1 to the B register
	ldm	#PLA_INST, dp:10	;; (whose op-code is two bytes)
	ldm	#PLT_INST, dp:11    ;; restore DT register
	ldm	#PLY_INST, dp:12	;; clean up stack
	ldm	#PLD_INST, dp:13	;; restore dptr register
	ldm	#RTL_INST, dp:14	;; then return to caller
	.data	16			;;
	clm				     ;; now, return to 16-bit mode and
	tda				     ;; get the address of the current frame
	clc				     ;; reposition it so it points to start
	adc	a, #5			;; of newly constructed code snippet
	.data	8			;; switch to 8-bit data mode for move
	sem				     ;; (moves take place 8 bits at a time)
	lda	b, #0		     ;; setup the h/w stack with the frame 
	phb				     ;; address (stacks are in bank 0)
	.data	16		     ;;
	clm				     ;;
	pha				     ;; to perform a RTL when done
	lda	a, dp:0		     ;; A = number of bytes to move
	clp	m,x			     ;; data & index both 16-bit mode.
	rtl				     ;; now we can jump to our snippet

1


