; -*-MIDAS-*- subttl Fast String Copy comment | Herewith are all the routines ever consed up for fast string copying. Rather than keep them in separate files, the idea is to keep them all together, and use assembly-time switches to determine which features you really want. | IFNDEF $$FSCS,$$FSCS==0 ; Use Slow byte-copy version IFNDEF $$FSCE,$$FSCE==0 ; Use EAK's version IFNDEF $$FSCK,$$FSCK==0 ; Use KLH's version IFNDEF $$FSC.,$$FSC.==0 ; Use KL string-copy instruction IFE $$FSCS\$$FSC.\$$FSCE\$$FSCK,$$FSCE==1 ; Default is to use EAK's IFNDEF $$FSCZ,$$FSCZ==1 ; 1 = zero last bit of word. IFNDEF $$FSCX,$$FSCX==0 ; Optimize at runtime for CPU. (not yet) IFNDEF $$FSIX,$$FSIX==1 ; 0 = assume no I,X in BP's. IFNDEF $$FSCU,$$FSCU==0 ; 1 = Update AC's properly (both BP's) IFNDEF $$FSCP,$$FSCP==0 ; 1 = Preserve AC's (BP's and count) ; If neither set, AC results undefined. IFNDEF T1, T1==:D+2 ? T2==:T1+1 ? $$FSV2==1 ; Ensure T1, T2 defined IFNDEF $$FSV2,$$FSV2==0 ; If user defined T1, assume can clobber. ;;; NOTE: for all routines, the following symbols are defined: ;;; FSCOPY - Entry point for all-purpose copying. ;;; FSCPY - Entry point for fast copying when length past breakeven. ;;; FSBRKE - Constant, # chars above which FSCPY is faster than byte loop. ;;; FSKLCV - Entry point to configure routine for KL. (KA assumed) subttl SLOW version IFN $$FSCS,[ ; This version is primarily for comparison purposes. It always ; simply copies via byte loop. fscopy: caig c, fsklcv: popj p, fscpy: IFN $$FSCP, push p,a ? push p,b ? push p,c IFN $$FSV2,push p,t1 ildb t1,a idpb t1,b sojg c,.-2 IFN $$FSV2,pop p,t1 IFN $$FSCP,pop p,c ? pop p,b ? pop p,a popj p, ] ;IFN $$FSCS IFN $$FSCS,.INEOF ; Halt here if done. subttl EAK's version ifn $$fsce,[ ; FSCOPY copies N 7 bit bytes from a source to a destination. Both are ; specified by BPs which will increment to point to the first byte to ; transfer or store into. ; Arguments: ; A source BP ; B destination BP ; C no. of bytes ; Results: ; A updated source BP ; B updated destination BP ; Assumes C,D and T1,T2 are contiguous. Clobbers T1 and T2. fscopy: caile c,18. ; compare N to breakeven point jrst fscpy ; hairy copy is faster jumple c,[popj p,] ; N <= 0 does no moving ; N is less than breakeven point - Use ILDB/IDPB loop. fscpy1: IFN $$FSCP,push p,a ? push p,b ? push p,c IFN $$FSV2,push p,t1 ildb t1,a ; get byte of source idpb t1,b ; deposit in destination sojg c,fscpy1 ; do N bytes IFN $$FSV2,pop p,t1 IFN $$FSCP,pop p,c ? pop p,b ? pop p,a popj p, fsklcv: push p,[jrst fsckl] ; Convert to KL version. pop p,fscvec popj p, ; N greater than breakeven point, start doing hair. fscpy: IFN $$FSCP,push p,a ? push p,b ? push p,c push p,d ; save AC IFN $$FSV2,push p,t1 jumpge b,fsc2 ; if not 440700 then enter byte copy loop sub b,[430000,,1] ; 440700, convert to 10700 jrst fsc3 ; skip byte copy loop, we're already there ; First deposit in destination until dest BP will increment to point ; to the first byte of a word. fsc1: ildb t1,a ; load byte from source idpb t1,b ; deposit in destination fsc2: tlne b,320000 ; ready to increment to new word? soja c,fsc1 ; decrement count, keep going ; B+1 is now address of next destination word fsc3: idivi c,5 ; no. of words in C, leftover chars in D tlnn a,320000 ; source BP P = 44 or 01? jrst fscblt ; yes, use BLT! ; KA version. ifndef flac,flac==d+1 ; Default start of AC loop. nflac== ; Length of loop. ifge t2-flac,[ifle t1-,[ ; T1, T2 avail? nflac==nflac+2 ; Ugh, no. flact1==flac+nflac-2]] ; Must make room for 2 more acs. ifndef flact1,flact1==t1 ; Define temp ACs to use flact2==flact1+1 ; for holding chars in transit. flac1==flac+1 ; Place for first LSHC flac2==flac+ ; Place for MOVEM flac3==flac2+1 ; Place for second LSHC flace==flac+nflac-1 ; Last FLAC AC. fscvec: push p,flac ; Store victimized ACs on PDL. movei flac,1(p) hrli flac,flac+1 add p,[nflac-1,,nflac-1] blt flac,(p) ifn $$fsix,move flact1,@a ; read word source BP points to .else move flact1,(a) lsh flact1,-1 ; put into low 35 bits addi a,(c) ; add word count to source BP addi b,(c) ; add word count to destination BP ifn $$fsix,movei flac,@a ? movei flac2,@b ; Find source & dest addrs .else movei flac,(a) ? movei flac2,(b) ; one way or the other. hrli flac,(MOVE FLACT2,(C)) ; make MOVE T2,SOURCE+COUNT(C) hrli flac2,(MOVEM FLACT1,(C)) ; make MOVEM T1,DESTINATION+COUNT(C) movn c,c ; negate count ldb flac3,[360600,,a] ; get bit position from source BP IFE $$FSCZ,movei flac3,-2(flac3) .else movei flac3,-1(flac3) ; Shift one more bit to compensate for LSH 1. hrli flac3,(LSHC FLACT1,) ; Set up second LSH movni flac1,-35.(flac3) hrli flac1,(LSHC FLACT1,) ; Set up first LSH IFN $$FSCZ,move flac1+1,[LSH FLACT1,1] move flac3+1,[AOJLE C,FLAC] move flac3+2,[JRST FSCKAE] aojle c,flac ; start loop, unless (unlikely!!!) ; words already gone, so do last bytes. fsckae: movei flace,flac hrli flace,1-nflac(p) blt flace,flace ; Restore ACs from PDL. sub p,[nflac,,nflac] jrst fscb2 ; go wrap up. ;------------------------------------- ; Non-BLT copy. fsckl: IFN $$FSV2,push p,t2 ; Will need to use 2nd temp AC. ifn $$fsix,move t1,@a ; read word source BP points to .else move t1,(a) lsh t1,-1 ; put into low 35 bits addi a,(c) ; add word count to source BP addi b,(c) ; add word count to destination BP ifn $$fsix,[ movei t2,@a ; Find true source addr hrrm t2,fsclp ; make MOVE T2,SOURCE+COUNT(C) movei t2,@b ; Find true dest addr hrrm t2,fsclp2 ; make MOVEM T2,DESTINATION+COUNT(C) ] .else hrrm a,fsclp ? hrrm b,fsclp2 movn c,c ; negate count ldb t2,[360600,,a] ; get bit position from source BP IFE $$FSCZ,movei t2,-2(t2) .else movei t2,-1(t2) ; Shift one more bit to compensate for LSH 1. hrrm t2,fsclp3 ; Set up second LSH movni t2,-35.(t2) hrrm t2,fsclp1 ; Set up first LSH aojle c,fsclp ; start loop, unless (unlikely!!!) IFN $$FSV2,pop p,t2 ; words already gone, so do last bytes. jrst fscb2 ; String copy loop. ifndef bvar,bvar ; this code is impure!! fsclp: move t2,0(c) fsclp1: lshc t1,0 ; shift into place ifn $$fscz,lsh t1,1 ; If zeroing, clear without zapping next char. fsclp2: movem t1,0(c) fsclp3: lshc t1,0 aojle c,fsclp ; increment count, keep going until gone IFN $$FSV2,pop p,t2 jrst fscb2 ifndef evar,evar ; Use BLT! fscblt: jumpge a,.+2 ; 10700 or 440700? sub a,[430000,,1] ; it's 440700, convert to 10700 ifn $$fsix,[ movsi t1,@a ; BLT AC: source address in LH hrri t1,@b ; and destination address in RH add t1,[1,,1] ] .else movsi t1,(a) ? hrri t1,(b) addi a,(c) ; bump up BP to last word of source addi b,(c) ; get BLT stop address ifn $$fsix,blt t1,@b ; move words from source to destination .else blt t1,(b) sojl d,fscpy9 ; return immediately if no more characters ; Copy remaining bytes to last destination word. fscb1: ildb t1,a ; load byte from source idpb t1,b ; deposit byte in destination fscb2: sojge d,fscb1 ; finish off last word fscpy9: IFN $$FSV2,pop p,t1 pop p,d ; restore AC IFN $$FSCP,pop p,c ? pop p,b ? pop p,a popj p, ] ; IFN $$FSCE IFN $$FSCE, .INEOF ; Halt here if done. subttl KLH's version ; Main screw with this algorithm is that it doesn't yet try to ; return updated BP's right. IFN $$FSCK,[ ; FSCOPY - Fast String Copy ; A - Source BP ; B - Dest BP ; C - char count ; Note: Don't set KA/KLWINC less than 10 or things may screw up. ; actually range is > 5 for lsh'ing and > 9 for blt'ing. ; If running on a KL, call FSKLCV once before using FSCOPY. ; Various declarations and storage kawinc==12. ; KA Breakeven - # chars after which hairy word move is klwinc==12. ; KL " faster than BP loop $stent==1 ; offset from beg of loop for entry to STORE phase $gent==4 ; offset from beg of loop for entry to GET phase $fsi==0 ; SHIFT-IN LSHC $fso==3 ; SHIFT-OUT LSHC $fmi==4 ; MOVE-IN $fmo==2 ; MOVE-OUT $flj==5 ; LOOP-JUMP ifndef flac,flac==e+1 ; # of first AC to use by KA fast loop. nflac==7 ; Loop always uses 7 ACs. ifge t2-flac,[ifle t1-,[ ; T1, T2 avail? nflac==nflac+2 ; Ugh, no. flact1==flac+nflac-2]] ; Must make room for 2 more acs. ifndef flact1,flact1==t1 ; Define temp ACs to use flact2==flact1+1 ; for holding chars in transit. flace==flac+nflac-1 ; Last AC irp ofs,,[si,so,mi,mo,lj] ; Def various ACs as per offsets. flac!ofs==flac+$f!ofs termin ; KL loop hacked here. This must be in impure storage. ifdef bvar,bvar fscpkl: lshc t1, ; a LSHC T1, for SHIFT-IN goes here lsh t1,1 movem t1,(c) ; Address of dest stored in RH here lshc t1, ; a LSHC T1, for SHIFT-OUT goes here move t2,(c) ; Address of source stored in RH here aobjn c,fscpkl jrst @fentrm(d) ifdef evar,evar ; Space to save ACs in. savacs: block 14-1 ; accs 1 thru 13 sava14: 0 ;--------------------------------------------------------------------- ; Convert FSCOPY to speedier setup for KL (don't run in ACs) ; Do this before purifying. fsklcv: push p,a ? push p,b movsi b,-fskltl move a,fskltb+1(b) movem a,@fskltb(b) addi b,1 aobjn b,.-3 pop p,b ? pop p,a popj p, fskltb: fscopy ? cail c,klwinc fscpy3 ? jrst fscpy4 fscptl ? lshc t1,@fscpkl ifn $$fsix,%%off==2 .else %%off==0 fscpy+%%off+0 ? push p,d fscpy+%%off+1 ? push p,e ifn $$fsv2,[ fscpy+%%off+2 ? push p,t1 fscpy+%%off+3 ? push p,t2 %%off==%%off+2 ] fscpy+%%off+2 ? jrst fscpy0 ifn $$fsv2,fscpy9+0 ? pop p,t2 ? fscpy9+1 ? pop p,t1 ? %%off==2 .else %%off==0 fscpy9+%%off+0 ? pop p,e fscpy9+%%off+1 ? pop p,d fscpy9+%%off+2 ? popj p, expunge %%off fskltl==<.-fskltb>/2 ;----------------------------------------------------------------------- ; Here we go.... fscopy: cail c,kawinc ; Less than break-even point? jrst fscpy ; No, use hairy word copy. IFN $$FSCP,pushae p,[a,b,c] IFN $$FSV2,push p,t1 ; Less than break-even, faster to use ildb t1,a ; simple byte-by-byte copying. idpb t1,b ; (actually, it would have been better to sojg c,.-2 ; not have called FSCOPY at all, IFN $$FSV2,pop p,t1 ; mainly due to this save-ac overhead!) IFN $$FSCP,popae p,[c,b,a] popj p, ; Wheee, using hairy word copying! ifle flace-a,.err ACs conflict! ifn $$fscp,acsv1==a ; Preserve all ACs .else acsv1==d ; else from D onwards. fscpy: ifn $$fsix,hrri a,@a ? hrri b,@b add p,[flace-acsv1,,flace-acsv1] movem flace,(p) movei flace,1-(p) hrli flace,acsv1 blt flace,-1(p) fscpy0: ldb e,[360300,,a] ; get low 3 bits of P field for source skipge e,fschtb(e) ; Get resulting # chars, skip if addr ok addi a,1 ; P= 01, must bump address. ldb d,[360300,,b] ; Repeat procedure for dest skipge d,fscht2(d) ; using slightly different table addi b,1 ; Now get index for shift values, and count for words subi c,(e) ; Get # chars minus those in 1st src wd. addi e,-6(d) ; Get E index - d*5+s, zero based. idivi c,5 ; find # words to loop through, rem in d. move t2,(a) ; and get 1st word of source. aoja a,@fpath(e) ; Must now pick a path... ; BLT possible! Jump to fsblt0 if no shifting needed for setup. fsblt0: movem t2,(b) ; Store source word directly jrst fsblt4 fsblt: lsh t2,@shasl(e) ; Shift source up against left move t1,(b) ; Get 1st wd of dest. lsh t1,@shadr(e) ; right-adjust it lshc t1,@shfix(e) ; and get everything into T1 lsh t1,1 ; need one more bit's worth. movem t1,(b) ; Store 1st wd of dest... ; Now settle down to serious BLT'ing. fsblt4: movei t1,1(b) hrli t1,(a) addi b,(c) ; Find addr of last dest word blt t1,(b) ; Zoom!! jumpe d,fscpy9 ; If no remainder, super win - done! addi a,(c) ; Hmm, must get last source word. move t1,(a) ; like so. move c,fbmsk(d) ; and a word mask for chars and t1,c ; clear unused bits from source, andcam c,1(b) iorm t1,1(b) aoja b,fscpy9 ; Can't do BLT. Well, get A and B set up for magical shift loop. shskp2: lsh t2,@shasl(e) ; Here, only need to adjust source, jrst shskp5 ; since dest will be totally clobbered. fsshft: lsh t2,@shasl(e) ; Here, both src and dest must be integrated. shskp1: ; Here, only need adjust dest; src wd is full. move t1,(b) ; get dest word lsh t1,@shadr(e) shskp5: lshc t1,@shfix(e) ; Stuff as many chars as possible into T1. caie d,0 ; If any remainder, movei c,1(c) ; add 1 more word. add d,ffindx(e) ; Make new index using # chs left in last wd. ; Now set things up for loop, and enter it. fscpy3: ifn flact1-t1,move flact1,t1 ? move flact2,t2 move flacsi,fshint(e) ; Get shift amount for shift-in move flacsi+1,[lsh flact1,1] ; Clear last bit, don't zap next char movei flacmo,(b) hrli flacmo,(movem flact1,(c)) ; This stores word into dest move flacso,fshout(e) ; Get shift amount for shift-out movei flacmi,(a) hrli flacmi,(move flact2,(c)) ; Get new word from source move flaclj,[aobjn c,flac] ; loop for count of words. ifn flact1-t1,move flaclj+1,[jrst fsckae] .else move flaclj+1,[jrst @fentrm(d)] ; Use right exit jump. addi b,(c) ; Update dest cnt movni c,(c) ; Make AOBJN pointer. movsi c,(c) jumpge d,flac+$stent ; Now enter loop at either the STORE soj b, soja flacmo,flac+$gent ; or the GET phase. ; KL fast loop, in core. fscpy4: move e,fshint(e) ; Get LSH for shift-in hrrm e,fscpkl+$fsi movni e,-35.(e) ; and shift-out hrrm e,fscpkl+$fso hrrm a,fscpkl+$fmi ; Address for MOVE T2, hrrm b,fscpkl+$fmo ; Address for MOVEM T1, addi b,(c) ; Update dest addr to point at last wd. movni c,(c) movsi c,(c) ; Make AOBJN count. jumpge d,fscpkl+$stent ; Depending on D flag, enter at STORE sos fscpkl+$fmo ; or at GET phase (latter must update soja b,fscpkl+$gent ; both AC and instr.) ;--------------------------------------------------------------------------- ; Come here when loop finished. The last word of the source string ; will be in B. It may have 1 to 5 chars left for moving, but will ; never have 0. fsckae: move t1,flact1 ; Entry pt to restore final wds to right place move t2,flact2 jrst @fentrm(d) ; Long wrapup. fscptl: lshc t1,(flacsi) ; Perform a shift-in lsh t1,1 movem t1,(b) ; Store full word. addi b,1 ; increment address ; and drop through to Medium wrapup. ; Medium wrapup. fscptm: lshc t1,@flsout(d) ; Shift rest of source word into A move t2,(b) ; Get dest word it will be stored into lsh t2,@fladj(d) ; left-adjust chars to preserve. ; and drop thru to Short wrapup. ; Short wrapup. fscpts: lshc t1,@fflout(d) ; Do final, last, shift-out. andcmi t1,1 movem t1,(b) ; and store last dest word. ; Done!! Just restore regs and return. fscpy9: movei flace,acsv1 hrli flace,1-(p) blt flace,flace sub p,[flace-acsv1,,flace-acsv1] IFN $$FSCU,[ ptskip c,a ; Adjust BP in A ptskip c,b ; Adjust BP in B ifn $$fsix,[ fscp9x: tlne a,17 ; Was source BP originally indexed? jrst fscp9y ; Yes, go hack it. tlne b,17 ; Was dest BP indexed? jrst fscp9z ; Yep, fix up. popj p, fscp9y: ldb c,[220400,,a] ; Get X field of BP in A subi a,@c ; Adjust BP properly. tlnn b,17 ; Now check dest BP... popj p, fscpz: ldb c,[220400,,b] ; Get X field of BP in B subi b,@c ; Adjust. ]] popj p, ; Indexed by low 3 bits of P field, returns # chars ; existing to right of loc BP points to. Hence value ; ranges from 5 to 1; if P = 01, SETZ indicates that ; bp address needs incrementing. fschtb: 1 ; P=10 setz 5 ; P=01, increment addr 0 ? 0 ; randomness 5 ; P=44, full word 4 ; P=35, 4 chars to go 3 ; P=26 2 ; P=17 ; This table is just like FSCHTB except values are pre-multiplied ; by 5 for easy addition into E. fscht2: 1*5 ; P=10 setz 5*5 ; P=01, increment addr 0 ? 0 ; random 5*5 ? 4*5 ? 3*5 ? 2*5 ; This table is indexed by D when it has # chars remaining from ; dividing # chars (in C) by 5. Provides mask for these chars. fbmsk: 0 ; Nothing here. .byte 7 177 ? 0 ? 0 ? 0 ? 0 177 ? 177 ? 0 ? 0 ? 0 177 ? 177 ? 177 ? 0 ? 0 177 ? 177 ? 177 ? 177 ? 0 .byte ; FPATH table vectors off to BLT and other minor stuff as ; soon as all the basic computations are made. ; Indexed by E. fpath: fsblt ? fsshft ? fsshft ? fsshft ? shskp1 fsshft ? fsblt ? fsshft ? fsshft ? shskp1 fsshft ? fsshft ? fsblt ? fsshft ? shskp1 fsshft ? fsshft ? fsshft ? fsblt ? shskp1 shskp2 ? shskp2 ? shskp2 ? shskp2 ? fsblt0 ; SHASL table, contains # bits to shift first source wd left so ; as to left-adjust it in B. Indexed by E. shasl: repeat 5, repeat 5,<4-.rpcnt>*7 ; ent 4,3,2,1,0 ; ent 4,3,2,1,0 ; ent 4,3,2,1,0 ; ent 4,3,2,1,0 ; ent 4,3,2,1,0 ; SHADR table, contains # bits to shift first dest wd right so ; as to right-adjust it in A. Indexed by E. shadr: repeat 5, %%%cnt==.rpcnt+1 ? repeat 5,[ ? 0,,-<%%%cnt*7+1>] ; ent -1,-1,-1,-1,-1 ; ent -2,-2,-2,-2,-2 ; ent -3,-3,-3,-3,-3 ; ent -4,-4,-4,-4,-4 ; ent -5,-5,-5,-5,-5 ; macro to make randomness more bearable. define ent a,b,c,d,e a*7 ? b*7 ? c*7 ? d*7 ? e*7 termin ; SHFIX table, contains # bits to left-shift A and B combined so ; as to move as many characters out of B as possible. Indexed ; by E. MIN(d,e) (d and e after fschtb) shfix: repeat 5,[%%%cnt==.rpcnt repeat 5,[ifle .rpcnt-%%%cnt, <1+.rpcnt>*7 .else <1+%%%cnt>*7 ]] ; ent 1,1,1,1,1 ; ent 1,2,2,2,2 ; ent 1,2,3,3,3 ; ent 1,2,3,4,4 ; ent 1,2,3,4,5 ; FSHINT table, containing appropriate LSHC instructions for shifting ; in the first chars of a fresh source word. Indexed by E. fshint: repeat 5,[%%%cnt==.rpcnt repeat 5,[ifle %%%cnt, %%%cnt==5 LSHC FLACT1,%%%cnt*7 %%%cnt==%%%cnt-1 ]] ; ent 5,4,3,2,1 ; ent 1,5,4,3,2 ; ent 2,1,5,4,3 ; ent 3,2,1,5,4 ; ent 4,3,2,1,5 ; FSHOUT table, containing appropriate LSHC instructions for shifting ; out the last chars of an old source word, to make room for a ; new one. Indexed by E. fshout: repeat 5,[%%%cnt==5-.rpcnt repeat 5,[ifge %%%cnt-5, %%%cnt==0 LSHC FLACT1,%%%cnt*7 %%%cnt==%%%cnt+1 ]] ; ent 0,1,2,3,4 ; ent 4,0,1,2,3 ; ent 3,4,0,1,2 ; ent 2,3,4,0,1 ; ent 1,2,3,4,0 ; FFINDX table, contains part of D index for fast add-in. ; Indexed by E. Similar to FSHOUT. Sign bit also indicates ; whether entry point is $STENT (pos) or $GENT (neg). ffindx: repeat 5,[%%%cnt==5-.rpcnt ? %%%cn2==.rpcnt repeat 5,[ifge %%%cnt-5, %%%cnt==0 ifle %%%cn2-.rpcnt,%%%cnt*5 .else setz %%%cnt*5 %%%cnt==%%%cnt+1 ]] ; ent5 0,1,2,3,4 ; ent5 4,0,1,2,3 ; ent5 3,4,0,1,2 ; ent5 2,3,4,0,1 ; ent5 1,2,3,4,0 ; ent s,s,s,s,s ; entry point flag (sign bit) ; ent g,s,s,s,s ; ent g,g,s,s,s ; ent g,g,g,s,s ; ent g,g,g,g,s define entx a,b,c,d,e ; Last item (5) is actually first (0) 7*e ? 7*a ? 7*b ? 7*c ? 7*d termin ; FENTRM table, dispatching to appropriate wrapup routine when fast AC ; loop is finished. Indexed by D. fentrm: define entxj a,b,c,d,e irp l,,[e,a,b,c,d] fscpt!l termin termin entxj m,m,m,m,s entxj m,m,m,s,l entxj m,m,s,l,l entxj m,s,l,l,l entxj s,l,l,l,l ; FLSOUT table, for use by Medium wrapup routine; pushes out remaining ; source chars in B, making room for incoming dest word. ; Indexed by D. flsout: entx 1,2,3,4,0 entx 1,2,3,0,1 entx 1,2,0,1,2 entx 1,0,1,2,3 entx 0,1,2,3,4 ; FLADJ table, also for Medium wrapup routine; adjusts dest word in ; B to left-adjust chars to be preserved. fladj: entx 1,2,3,4,5 entx 2,3,4,5,1 entx 3,4,5,1,2 entx 4,5,1,2,3 entx 5,1,2,3,4 ; FFLOUT table, for Short wrapup routine. Final Last shift-out of ; chars in B, so that the last dest word can be stored from A. ; Indexed by D. Adds 1 extra bit since MOVEM A, done right after it, ; and nothing to preserve in B. fflout: define entx1 a,b,c,d,e e*7+1 ? a*7+1 ? b*7+1 ? c*7+1 ? d*7+1 termin entx1 4,3,2,1,5 entx1 3,2,1,4,4 entx1 2,1,3,4,3 entx1 1,2,4,3,2 entx1 1,4,3,2,1 ] ;IFN $$FSCK IFN $$FSCK,.INEOF IFN $$FSC.,[ IFE OS%20X, .ERR FSCOPY will lose - MOVSLJ instr non-existent on this machine!! ifndef extend,extend=:(123000) ; EXTEND 20X instruction ifndef movslj,movslj==:(016000) ; Subinstr fscopy: caile c,10 ; Breakeven?? jrst fscpy ; Go hack special mover. IFN $$FSCP,pushae p,[a,b,c] push p,d ; Less than break-even, faster to use ildb d,a ; simple byte-by-byte copying. idpb d,b ; (actually, it would have been better to sojg c,.-2 ; not have called FSCOPY at all, pop p,d ; mainly due to this save-ac overhead!) IFN $$FSCP,popae p,[c,b,a] fsklcv: popj p, .scalar acsava(6) fscpy: movem a,acsava move a,[b,,acsava+1] blt a,acsava+5 ; Must save 6 ACs move a,c ; AC: source string length move a+4,b ; AC+4: dest byte ptr move b,acsava ; AC+1: source byte ptr move a+3,c ; AC+3: dest string length EXTEND A,[MOVSLJ] ; DO IT! nop IFN $$FSCP,[ move a+5,[acsava,,a] ; Restore all 6 ACs. blt a+5,a+5 ] .ELSE [ IFN $$FSCU,[ move a,b ; Restore updated source ptr move b,a+4 ; and updated dest ptr ] move a+3,acsava+3 ; Restore remaining 3 ACs. move a+4,acsava+4 move a+5,acsava+5 ] popj p, ] ;IFN $$FSC.