.symtab 4000.,6000. p=:17 %%%asc==1 ; For ASCNT /foo/ (not ascnt [foo]) .insrt macros ; for OS defs etc. .insrt timer .insrt nuuos ; For PTSKIP ifndef e,e=d+1 define telmac a,b,c,d,e printx a!b!c!d!e  termin IFNDEF $$FSCP,$$FSCP==0 ; No preservation of ACs IFNDEF $$FSCU,$$FSCU==0 ; No update either! T1==U3 ; For fscopy refs T2==U4 %%sav==. .begin slow $$FSCS==1 ; use std slow version .insrt fscopy %%rln==<.-%%sav> .end slow %%sav==. .begin klh $$FSCK==1 ; use KLH's version .insrt fscopy %%rln==<.-%%sav> .end klh %%sav==. .BEGIN eak $$FSCE==1 ; use EAK's version .insrt fscopy %%rln==<.-%%sav> .end eak constants %%sav==. .BEGIN NEW $$FSCN==1 ; use "new" version $FSBLT==1 ; Store PDL code with BLT .insrt fscopy %%rln==<.-%%sav> .end new constants block 100 %%sav==. .BEGIN 20x $$FSC.==1 ; use ucode version .insrt fscopy %%rln==<.-%%sav> .end 20x strcnt: 52. strsi: 440700,,strbf1 strs1: 0 strdi2: 440700,,strbf2 strd2: 0 strdi3: 440700,,strbf3 strd3: 0 strbf1: ascii |0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789| ascii | This will have to be a very long string, for purposes of testing the string copying routines. Goal is to first find out if the damn thing actually works, and at what string size it becomes faster to use FSCOPY instead of plain old byte copying. dum te dum...| strbfl==500 strbf2: block strbfl strbf3: block strbfl strbfe: vertbl: slow"fscpy ; Canonical copy klh"fscpy ; Test 1 eak"fscpy ; Test 2 new"fscpy ; Test 3 20x"fscpy ; Test 4 vertln==<.-vertbl> vernam: ascnt "BP" ascnt "KLH" ascnt "EAK" ascnt "NEW" ascnt "20X" verini: slow"fsklcv klh"fsklcv eak"fsklcv new"fsklcv 20x"fsklcv verlen: slow"%%rln klh"%%rln eak"%%rln new"%%rln 20x"%%rln ; Results of tests .vector verrtb(vertln) ; Base setup time .vector verrtc(vertln) ; Time per char .vector verrtp(vertln) ; Break-even point .scalar vertxs ; saved AOBJN thru tables. .scalar stdrtn,tstrtn ; Vectors to routines for this pass. sumflg: -1 ; Non-zero for summary only. sgo: move p,pdl call sysini ; Initialize timer system stuff ifn os%its,[ .open tyoc,[.uao,,'tty] .lose out(tyoc,open(uc$iot)) ] ifn os%tnx,out(tyoc,open(uc$iot,[.priou])) out(,ch(tyoc)) ; Make this be std output. ; Announce system type etc. out(,("String-move timing test of "),tim(mdyt),eol,call(syssho),eol,eol) call smvset ; Set up routines as they like it. call verify ; Verify that routines actually work. call newtim ; Then determine timing parameters. out(tyoc,eol,("SMOVE done"),eol) .value jrst go subttl VERIFICATION - stuff for making sure that string copy works. verify: setzm errcnt out(,("Verifying routine correctness...")) ; Copy string from buffer 1 to both buffer 2 and 3, using ; canonical copy for 2 and test routine for 3. movsi c,-vertln ; Get AOBJN to routine test table. ; GLOBAL LOOP - set up routines to test. tstlp0: aobjp c,tstdon ; First time, skip past first entry. tstlp1: movem c,vertxs ; save table index. move a,vertbl ; 1st entry is addr of canonical routine. skipn b,vertbl(c) ; Get addr of rtn to test this pass. jrst tstlp0 ; None? skip it. movem a,stdrtn ; Store addr of standard string copy routine movem b,tstrtn ; and addr of test routine. movsi e,-6 ; Subloop 0 - 6 different source-BP's. move a,strsi ; Use BP to constant string movem a,strs1 ; as initial source BP. tstlp2: movsi d,-6 ; Subloop 1 - 6 different destination BP's. move b,strdi2 ; Set up initial BP's for movem b,strd2 ; buffer 2 move b,strdi3 movem b,strd3 ; and buffer 3 tstlp3: movsi cnt,-5 ; Subloop 2 - 5 different string lengths hrr cnt,strcnt ; Use max as initial length and decrement from there. tstlp4: pushj p,dopass ; Make 1 pass over stuff, plus check/report subi cnt,2 ; Decrement length aobjn cnt,tstlp4 ; by 1 each pass. ibp strd2 ; For subloop 1, increment destination BP's. ibp strd3 aobjn d,tstlp3 ibp strs1 ; For subloop 0, increment source BP. aobjn e,tstlp2 move c,vertxs ; Restore index to routine tables aobjn c,tstlp1 ; for Global Loop. tstdon: skipn errcnt jrst [ out(,(" done."),eol) ret] out(,eol,(" --- VERIFY DONE - "),d(errcnt),(" errors ---"),eol) ret ; DOPASS - do one pass for current parameters, including check/report. ; STRS1 - source BP from buffer 1 ; STRD2 - dest BP for buffer 2 ; STRD3 - dest BP for buffer 3 ; rh(CNT) - string length ; rh(D), rh(E) - char offsets for dest and source strings respectively. dopass: move a,[ascii /xxxxx/] ; Make a test. First fill both buffer 2 and 3 movem a,strbf2 ; with x's. move a,[strbf2,,strbf2+1] blt a,strbfe-1 setom repflg ; Enable report-loc 1st time move a,strs1 ; First do canonical copy move b,strd2 ; into buffer 2 movei c,(cnt) pushj p,@stdrtn ; using supplied routine. movem a,resaca ; Then save resulting AC's. movem b,resacb movem c,resacc move a,strs1 ; Now do test copy move b,strd3 ; into buffer 3 movei c,(cnt) pushj p,@tstrtn ; using supplied test routine. subi b,strbf3-strbf2 ; Adjust for dest diff, then compare AC's. IFN $$FSCP, came c,resacc ? jrst dopas4 IFN $$FSCP\$$FSCU,camn a,resaca ? came b,resacb ? jrst dopas4 jrst dopas5 dopas4: pushj p,reploc ; Report location in test out(,("Result-AC difference: Std: "),hv(resaca),tab,hv(resacb)) IFN $$FSCP, out(,tab,hv(resacc)) out(,(" Test: "),hv(a),tab,hv(b)) IFN $$FSCP,out(,tab,hv(c)) out(,eol) dopas5: pushj p,check jrst [ pushj p,reploc ; Report location in test pushj p,report ; and show strings jrst .+1] popj p, .scalar resaca,resacb,resacc ; Save ACs here ; REPLOC - Report current location within overall test. .scalar repflg ; Set -1 to enable reporting loc 1st time. .scalar errcnt ; # errors seen. reploc: skipn errcnt outcal(,eol) aose repflg ret aos errcnt push p,a push p,b movei a,(cnt) move b,vertxs ; Get index to rtn being used out(,("Arrgh! Routine "),tc(vernam(b)),(", Src offset "),rhv(e),(", Dst offset "),rhv(d),(", Length "),d(a),("."),eol) pop p,b pop p,a RET ; CHECK - compare string buffers 2 and 3 to see if copied identically. ; Skips if true. check: pushae p,[a,b,c] movsi c,-strbfl check2: move a,strbf2(c) move b,strbf3(c) andcmi a,1 andcmi b,1 came a,b jrst check4 aobjn c,check2 aos -3(p) check4: popae p,[c,b,a] popj p, ; REPORT - Called when a check fails; string buffers 2 and 3 differ. ; Prints out both for human comparison. report: push p,a push p,b move a,strcnt addi a,9. idivi a,5 out(,("Good: "),s(a,[440700,,strbf2])) out(,("Bad: "),s(a,[440700,,strbf3])) pop p,b pop p,a popj p, subttl TIMING - new timing routines. newtim: movei a,10. ; Use this as more reasonable for long rtns. movei b,100. ; A/ # checks, B/ # xcts per check setom outmod ; Tell TIMER not to output during experiments. ifn os%20x,setom hptim ; If on 20X, try using HPTIM instead of RUNTM. out(,("Starting timing test, "),d(a),(" timechecks per exper, "),d(b),(" executions per check."),eol) pushae p,[a,b] call setup ; Do basic TIMER setup for experiments, w/default vals setz xpr, call doxper ; Do control experiment first. call scnsho ; Show results for it. popae p,[loops,ncheks] ; Restore reasonable vals call setup ; and set up again for faster checking. out(,eol) skipn sumflg call tabdes ; Output sample table as description. ; Now loop through table of string-move routines, testing all ; combinations and producing output page for each. movsi d,-vertln ; Use 1st thing in rtn table for control... ntiml2: skipn sumflg outcal(,c(14)) ; Page break between each routine, if lots of output. out(,("Routine "),tc(vernam(d)),(": ")) call vertim ; Do test for this routine trnn d,-1 ; Is this the control routine? jrst [ move a,[rtbas,,ctbas] ; Move results to control tables. blt a,ctbas+rtbln-1 move a,[rtpc,,ctpc] blt a,ctpc+rtbln-1 jrst .+1] call tabsho ; Analyze results, print table. ntiml3: aobjn d,ntiml2 out(tyoc,("End of timing tests"),eol) call sumsho ; Print out summary for all versions ret subttl New TIMING - Result collection xper xfscpy,"FSCOPY",|call rtnvec|,0 ; Define an exper to use. ifdef bvar,bvar rtnvec: move a,strs1 move b,strd2 move c,strct rtnj: pjrst 0 ; RH has addr of real routine. ifdef evar,evar lenbas: 10. ; Base length to use (plus "loff") flnbas: 10.0 ; Floating-point version. lentpc: 5*10. ; Additional length for deriving time-per-char. flntpc: 0 ; Must be multiple of 5 to preserve word alignment!! ;;; RESULT TABLES - Each FSCOPY routine is timed in 125 different ;;; configurations (because there are 5 different possible P-field ;;; values for both the source and destination BP, and 5 possible ;;; values for the string length (modulo word boundaries). ;;; The index for each 3D array is derived as: ;;; <+doff>*5+loff ;;; RTBAS holds the total time in usec used by a particular config. ;;; RTPC holds the time-per-char in usec for that config, calculated by ;;; re-doing the timing test with LENTPC more chars and dividing the ;;; resulting increment in time by LENTPC. ;;; CTBAS, CTPC hold a copy of the result table for the 1st FSCOPY ;;; routine, which is assumed to be the "control" routine (BP copy). rtbln==125. ; Things depend on this exact number, don't change. .vector rtbas(rtbln) ; Result Base time - floating usec per call .vector rtpc(rtbln) ; Result Time Per Char - floating usec per add'l char .vector rtlen(rtbln) ; Length of string - floating # chars used. ; Control tables (results for ctl) .vector ctbas(rtbln),ctpc(rtbln),ctlen(rtbln) ; VERTIM - Test a routine, gathering info in RTBAS, RTPC, RTLEN. vertim: push p,d ; Save version index move a,lenbas ? fsc a,233 ? movem a,flnbas ; Make floating move a,lentpc ? fsc a,233 ? movem a,flntpc ; Ditto movei xpr,xfscpy ; Set experiment index move a,vertbl(d) hrrm a,rtnj ; Set addr of rtn to test. movsi e,-rtbln ; Index = (<*5+doff>*5+loff) 3D array index vertm3: movei a,(e) ; Get current index idivi a,25. ; Get soff and rem idivi b,5. ; Get doff and loff add c,lenbas ; Find total str length to use push p,e ; Save idx call cpyset ; Set up (takes args in A,B,C) call doxper ; Do the experiment!! move e,(p) ; Restore index move a,xprtpi(xpr) ; Get resulting avg time movem a,rtbas(e) ; Store as "base" for this string length. move a,strct fsc a,233 ; Save string length as float number. movem a,rtlen(e) move a,lentpc ; Get # chars to increment len by addm a,strct ; in order to find time-per-char. call doxper ; Do experiment again!! pop p,e move a,xprtpi(xpr) ; Now get avg time for this length. fsbr a,rtbas(e) ; Find additional time required. caige a, ; Cruddy OS timing can produce negatives!! setz a, fdvr a,flntpc ; Find time-per-char. movem a,rtpc(e) ; Store it. out(tyoc,(".")) ; For human reassurance of progress. aobjn e,vertm3 ; Loop til all configs tested. pop p,d ; Restore version index. ret subttl New TIMING - Result table printout. ; TABDES - prints sample table as description. tabdes: skipe sumflg ret out(,tc(desctx),eol,eol,c(14)) ret desctx: ascnt | Routine SAMPLE Dest 0 Dest 1 Dest 2 Dest 3 Dest 4 Src 0 Src 1 Src 2 Src 3 Src 4 (this is "Grand Average") Each entry has 6 lines: length offset 0: , ; is time per char, in usec. 1: , ; is break-even point in chars. 2: , ; For string lengths above , 3: , ; the routine will be faster than 4: , ; standard BP loop. AVG of all loffs: , | ; TABSHO - Prints out results of timing test for a FSCOPY routine, ; using data collected in the RTBAS/RTPC array. ; Calculates breakeven points by comparing with the results of the ; control routine, gathered in CTBAS/CTPC. ; TB1DOF - totals over all doffs (innermost loop) ; TB2LOF - totals over all loffs .scalar tb1dof,tb2lof(5),tb3sof(25.) ; Base time totals .scalar tc1dof,tc2lof(5),tc3sof(25.) ; Time-per-char totals .scalar tp1dof,tp2lof(5),tp3sof(25.) ; Break-even point totals ind: 3,,[ascii / /] ; indentation for line tabsho: push p,d ; First print header. skipe sumflg jrst tbsh05 out(,eol,eol,tc(ind)) movsi a,-5 outcal(,(" Dest "),rhv(a),(" ")) aobjn a,.-1 out(,(" Avg doffs"),eol) ; Now loop over source offset. tbsh05: movsi e,-25. tbsh06: setzm tp3sof(e) setzm tc3sof(e) setzm tb3sof(e) aobjn e,tbsh06 movsi e,-5 tbsho2: skipn sumflg outcal(,("Source offset "),rhv(e),eol) ; Output source offset # ; Loop over length offset. movsi d,-5 tbsh31: setzm tp2lof(d) ; Clear table for loff avgs setzm tc2lof(d) setzm tb2lof(d) aobjn d,tbsh31 movsi d,-5 tbsho3: ; Loop over dest offset, actually outputting stuff. skipn sumflg outcal(,tc(ind)) setzm tp1dof ; Clear total over all doffs setzm tc1dof setzm tb1dof movsi c,-5 ; Sum over all doffs for this soff/loff combo. ; C/ doff ; D/ loff ; E/ soff ; Remember index into array is <+doff>*5 + loff tbsho4: movei xpr,(e) ; Get current soff imuli xpr,5 addi xpr,(c) ; add doff imuli xpr,5 addi xpr,(d) ; add loff to finally get index. call brkevn ; Calculate breakeven point for XPR's routine ; clobbers B ; Now have break-even point in A! push p,a ; Save for later printout movei b,(d) imuli b,5 addi b,(c) ; index over all soffs with loff*5 + doff. fadrm a,tp1dof ; Add to total over all doffs fadrm a,tp2lof(c) ; and total over all loffs for this soff/doff fadrm a,tp3sof(b) ; and total over all soffs. move a,rtpc(xpr) ; Do same for RTPC value fadrm a,tc1dof fadrm a,tc2lof(c) fadrm a,tc3sof(b) move a,rtbas(xpr) ; Ditto for RTBAS value fadrm a,tb1dof fadrm a,tb2lof(c) fadrm a,tb3sof(b) pop p,a ; Restore breakeven for this config skipn sumflg outcal(,(" "),f(a,5,2),(" "),f(rtpc(xpr),4,2),("|")) ; Output value. aobjn c,tbsho4 ; Loop over all doffs. ; One line of doffs done, output sum for line. move a,tp1dof ; Get totals for the 5 doffs move b,tc1dof fdvri a,(5.0) ; Make averages fdvri b,(5.0) skipn sumflg outcal(,(" "),f(a,5,2),(" "),f(b,4,2),eol) aobjn d,tbsho3 ; Loop over all loffs. ; 1 section of loffs done for this soff, output avgs line. skipn sumflg outcal(,tc(ind)) setzm tp1dof setzm tc1dof setzm tb1dof movsi c,-5 tbsh35: move a,tp2lof(c) ; Get total of loffs for this soff/doff fdvri a,(5.0) move b,tc2lof(c) fdvri b,(5.0) skipn sumflg outcal(,(" "),f(a,5,2),(" "),f(b,4,2),("|")) fadrm a,tp1dof fadrm b,tc1dof aobjn c,tbsh35 move a,tp1dof fdvri a,(5.0) move b,tc1dof fdvri b,(5.0) skipn sumflg outcal(,(" "),f(a,5,2),(" "),f(b,4,2),eol,eol) ; One soff section done, on to next. aobjn e,tbsho2 ; Loop over all soffs. ; Now output section averaging all soffs. skipn sumflg outcal(,("Avgs all soffs"),eol) movsi c,-5 tbsh21: setzm tp2lof(c) setzm tc2lof(c) setzm tb2lof(c) aobjn c,tbsh21 movsi d,-5 tbsh23: setzm tp1dof setzm tc1dof setzm tb1dof skipn sumflg outcal(,tc(ind)) movsi c,-5 tbsh24: movei xpr,(d) imuli xpr,5 addi xpr,(c) ; Get index into table move a,tp3sof(xpr) ; Retrieve value fdvri a,(5.0) move b,tc3sof(xpr) fdvri b,(5.0) skipn sumflg outcal(,(" "),f(a,5,2),(" "),f(b,4,2),("|")) fadrm a,tp2lof(c) fadrm b,tc2lof(c) fadrm a,tp1dof fadrm b,tc1dof aobjn c,tbsh24 ; Loop over all dof avgs. move a,tp1dof fdvri a,(5.0) move b,tc1dof fdvri b,(5.0) skipn sumflg outcal(,(" "),f(a,5,2),(" "),f(b,4,2),eol) aobjn d,tbsh23 ; Loop over all lof avgs. ; avgs section done, output avgs line for it. skipn sumflg outcal(,tc(ind)) setzm tp1dof setzm tc1dof setzm tb1dof movsi c,-5 tbsh25: move a,tp2lof(c) fdvri a,(5.0) move b,tc2lof(c) fdvri b,(5.0) skipn sumflg outcal(,(" "),f(a,5,2),(" "),f(b,4,2),("|")) fadrm a,tp1dof fadrm b,tc1dof aobjn c,tbsh25 move a,tp1dof fdvri a,(5.0) move b,tc1dof fdvri b,(5.0) skipn sumflg outcal(,(" "),f(a,5,2),(" "),f(b,4,2),eol) ; All done with table output, wrap up. ; E/ total RTPC ; C/ total setup time ; TP1DOF/ total breakeven time movsi xpr,-rtbln ; Loop over all configs impartially setzm c,tp1dof setz e, tbsh91: fadr e,rtpc(xpr) ; Add to total RTPC move b,rtpc(xpr) ; Setup time is rtbas - rtlen*rtpc fmpr b,rtlen(xpr) fadr c,rtbas(xpr) ; Add to total of all setup times. fsbr c,b call brkevn ; Now find breakeven point for config fadrm a,tp1dof ; Add to total. aobjn xpr,tbsh91 move a,tp1dof fdvr a,[125.0] ; Get average breakeven point! fdvr c,[125.0] ; Get average setup time! fdvr e,[125.0] ; Get average TPC! pop p,d ; Restore version index movem a,verrtp(d) ; Store results for this version movem c,verrtb(d) movem e,verrtc(d) out(,eol,("Routine "),tc(vernam(d)),(": Avg over all configs")) out(,eol,tab,f(e,6,2),(" usec/char")) out(,eol,tab,f(c,6,2),(" usec setup time")) out(,eol,tab,f(a,6,2)) trnn d,-1 outcal(,(" chars avg test string length (No breakeven point!)")) trne d,-1 outcal(,(" chars breakeven point")) out(,eol,eol,eol) ret sumsho: out(,("Summary of all versions: (time in usec) Routine # wds Setup time time/char Breakeven point ")) movsi e,-vertln ; Loop thru all versions sumsh1: out(,(" "),fmt(tc(vernam(e)),-6),d(verlen(e),6),f(verrtb(e),11.,2),f(verrtc(e),11.,2),f(verrtp(e),11.,2),(" chars"),eol) aobjn e,sumsh1 ret subttl Miscellaneous ; Compute break-even point for this configuration (RT) as compared ; with control config (CT). ; Want breakeven point B such that ; + B* = + B* ; So we can solve for B as: ; B = ( - )/( - ) ; The setup time can be derived in theory as ; setup = base - len*tpc ; So we insert that and derive: ; B = (ctbas - len*ctpc - rtbas + len*rtpc)/(rtpc - ctpc) ; B = (ctbas - rtbas + len*(rtpc - ctpc))/(rtpc - ctpc) ; B = len + (ctbas - rtbas)/(rtpc - ctpc) ; Note that ctlen and rtlen should be identical. brkevn: move a,ctbas(xpr) fsbr a,rtbas(xpr) move b,rtpc(xpr) fsbr b,ctpc(xpr) fdvr a,b fadr a,rtlen(xpr) ; Add length of string used caige a, ; Cruddy OS timing can produce negatives!!! setz a, ret ; Do various setups. Currently just converts rtns to KL if nec. smvset: push p,a setz a, blt a, cain a, jrst [pop p,a ? ret] ; Not a KL/KS movsi a,-vertln ; a KL or KS. Run through table. skipe verini(a) ; so undef sym or null entry is OK. call @verini(a) ; Set up for KL-ness. aobjn a,.-2 pop p,a ret strct: 0 ; current # chars ; Set up STRCT, STRS1, and STRD2 for copy args. cpyzap: ldb a,[331100,,c] ldb b,[221100,,c] movei c,(c) cpyset: movem c,strct move c,strsi caie a, ptskip a,c movem c,strs1 move c,strdi2 caie b, ptskip b,c movem c,strd2 apopj: popj p, cpytst: pushae p,[a,b,c,d] move d,b move a,strsi move b,strdi2 move c,strcnt pushj p,(d) popae p,[d,c,b,a] cpyctl: popj p, bytctl: popj p, bytcpy: jumpe c,[popj p,] pushae p,[a,b,c,d] ildb d,a idpb d,b sojg c,.-2 popae p,[d,c,b,a] popj p, end sgo