;****************************************************************************** ; 96/192-bit sync pattern finder. Computes running correlations over 4 frames. ;****************************************************************************** ;Algorithm: ; Clear correlation buffer (corbuf) ; Clear running sum buffer (sumbuf) ; Receive F+1 values via DMA in Y buffer ; Repeat forever { ; Clear running max (max, maxsq) ; Repeat for i=1,...,B/A ; Receive A values from DMA in Y buffer (ybuf) ; Receive A values from DMA in correlation buffer ; Receive A values from DMA in sum buffer ; Repeat for ct=A-1,...,0 { ; Compute Fx1 correlation ; Update running sum, running max, correlation buffer ; } ; Send A values from correlation buffer via DMA ; Send A values from sum buffer via DMA ; } ; Report max-2B to control DSP ; } ; ;The "Update running sum, ..." line really means: ; m=s[ct]+sum(yf) ; max=(m^2>=max^2)?m:max ; s[ct]=m-c[j][ct] ; c[j][ct]=sum(yf) ; ;F=frame sync length (96), B=block length (1788*3+96), A<4096 (B/2) ; ;Internal RAM: ; y: correlation: sums: ; <----- power of 2 ------> ; <-- A+F --><-- A --><---> <-- A --><-- A --> <-- A --><-- A --> ; compute DMA compute DMA compute DMA ; ;SBSRAM: ; correlation: sums: ; i-2 <---- B ----> <---- B ----> ; i-1 <---- B ----> ; i <---- B ----> ;Global RAM: ; y: ; -- from control DSP -- ;The pipelined multiply-add's of the correlation computation are arranged as ;shown in the following sketch. Time is horizontal; computation for each pair ;of correlations begins at the top and finishes at the bottom of the sketch. ;The top and bottom edges are numbered to show when each pair of computations ;begins and ends, respectively. Each correlation computation requires 54 ;clock cycles, (numbered 2 through 55 for historical reasons). A few of ;these key cycle numbers are shown on the left edge of the sketch. In the ;assembly code, most lines are commented with this cycle number as ;";NN: comment". For each pair of correlations computed, the input symbols ;are fetched in 22 sets of four (or occasionally two), numbered 0 through 21. ;About half are processed by the A-side registers, and half by the B-side ;registers, and are sorted by whether they are fetched from even or odd word ;addresses (to avoid bank hits). For example, the 7'th step of a correlation ;computation involves (among other things) reading the 5'th set of four ;symbols from memory. Those lines in the assembly language bear the comments ;";7: a5" and ";7: b5", and these comments can be used as search keys. ; ; 0 2 4 6 8 10 12 14 16 18 20 22 24 26 ... ; 2 |\ \ \ \ | |\ \ \ | |\ \ \ | |\ \ \ | |\ ; | \ \ \ \ | | \ \ \ | | \ \ \ | | \ \ \ | | \ ; | \ \ \ \ | | \ \ \ | | \ \ \ | | \ \ \ | | \ ; | \ \ \ \|_| \ \ \|__| \ \ \|_| \ \ \|__| \ ;25 |\ \ \ \ | |\ \ \ | |\ \ \ | |\ \ \ | |\ ; | \ \ \ \ | | \ \ \ | | \ \ \ | | \ \ \ | | \ ; | \ \ \ \ | | \ \ \ | | \ \ \ | | \ \ \ | | \ ; | \ \ \ \|_| \ \ \| | \ \ \|_| \ \ \| | \ ;48 |\ \ \ \ | |\ \ \ | |\ \ \ | |\ \ \ | |\ ;55 | \ \ \ \ | | \ \ \ |\ | \ \ \ | | \ \ \ |\ | \ ; x -2 0 2 4 6 8 10 x 12 14 16 18 20 22 x ... ; |__________________________| |_________________________| ; result=max(these 12) result=max(these 12) ... ; ; Sketched for A=6, B=A*2=12. ; ;Note that the very first pass, the loop is repeated (A+2)/2 times; all ;subsequent passes execute the loop A/2 times. This means the block of B ;symbols fetched are offset from the max function by 2 more than one might ;expect. .include "gram.asm" ; .include "gramsync.asm" ;## for testing as a single DSP YSIZE .equ 16384 ;must be >= 2(2A+F+2), and a power of 2 ;It turns out 32768 is optimal for all F ;but 16384's OK, and avoids an assembler error for now .if F=96 CONST1 .equ 122 ;Magical constants to correct various maxi offsets CONST2 .equ -124 CONST3 .equ -126 .else ;F=192 ;CONST1 .equ 34 ;I thought this was right before 10/30/00. CONST1 .equ 82 ;As of 10/30/00, I now think maybe it's this. ## CONST2 .equ -36 CONST3 .equ -38 .endif .bss yblk,YSIZE,YSIZE ybuf .equ yblk ybufe .equ yblk+YSIZE ;just past end of ybuf .bss corblk,A*4+4,8 ;1 word overwritten, 2A shorts corbuf .equ corblk+4 ;odd word aligned .bss sumblk,A*4+8,8 ;1 word unused, 1 word overwritten, 2A shorts sumbuf .equ sumblk+8 ;even word aligned .bss marktbl,0x100 ;for debug and timing only corbufx .usect "sbsram",(B-LASTA+A)*6,4 ;might be slightly bigger than corbufxe .equ corbufx+(B-LASTA+A)*6 ;B, since A>=LASTA. sumbufx .usect "sbsram",(B-LASTA+A)*2,4 ;not used when A=B/2 sumbufxe .equ sumbufx+(B-LASTA+A)*2 timer0 .set 0x01940000 timerC .set 0 timerT .set 1 timerD .set 2 DMA .set 0x01840000 DMA0 .set 0 DMA2 .set 1 DMA1 .set 16 DMA3 .set 17 DMApri .set 0 DMAsec .set 2 DMAsrc .set 4 DMAdst .set 6 DMAcnt .set 8 DMActA .set 10 DMActB .set 11 DMAixA .set 12 DMAixB .set 13 DMAadA .set 14 DMAadB .set 15 DMAadC .set 26 DMAadD .set 27 VINTA .set 0x016D8004 ;DSP A's interrupt register VINTB .set 0x016D8008 ;DSP B's interrupt register ;*** Name some registers *** ;Shuffle these assignments at will, within the following constraints: ; Preserve a-side/b-side for all but DMA registers ; ct,s2,dmac,cnd are condition regs ; ada, adb, and dmady require circular addressing. maxsq .equ a0 ;max squared s0 .equ b0 ;sum of values with +1 in correlations 1 and 2 s2 .equ a1 ;sum of values with +1 in correlation 2 only ct .equ b1 ;loop counter cnd .equ a2 ;a condition register sum .equ b2 ;sum of all F values dlta .equ a3 ;second inductive correction to sum dltb .equ b3 ;first inductive correction to sum ada .equ a4 ;circular address (side a/b) (in ybuf) adb .equ b4 sbuf .equ a5 ;sum buffer pointer cbuf .equ b5 ;correlation buffer pointer timer .equ a6 ofs .equ b6 ;offset to actual y values in SBSRAM rcor .equ a7 ;running correlation s1 .equ b7 ;sum of values with +1 in correlation 1 only lowa .equ a8 ;first (low half) of fetched pair lowb .equ b8 higha .equ a9 ;second (high half) of fetched pair highb .equ b9 onea .equ a10 ;const 10000h, low half is also outer loop counter oneb .equ b10 ;const 10000h for splitting high/low halves s0a .equ a11 ;running sum0 (s0=s0a+s0b) s0b .equ b11 s1a .equ a12 ;running sum1 (s1=s1a+s1b) s1b .equ b12 s2a .equ a13 ;running sum2 (s2=s2a+s2b) s2b .equ b13 mark .equ a14 ;where the final results are stored cor .equ b14 ;old correlation max .equ a15 ;max value maxi .equ b15 ;offset of max value ;DANGEROUS! Double-named registers are: ;s0, s1, s2, a6, b6, and dltb are available during spin-down code ;dmad .equ s0 ;DMA simulator circular dest address (in ybuf) dmac .equ s2 ;DMA simulator counter ;dmasy .equ a6 ;DMA simulator source address ;dmady .equ b6 ;DMA simulator non-circular dest address dmas .equ dltb ;DMA simulator source address for y dmav .equ s0 ;DMA simulator data value .sect ".vectors" b.s1 start ;IST entry 0 (RESET) || nop 5 tstamp .macro ; ldw *timer[timerD],dmav ; nop 4 ; stw dmav,*mark++ .endm .sect .text start: ;*********** Initialization *********** ;Initialize some CPU control registers for safety mvk 0x00000100,ada ;Disable interrupts, little-endian. mvc ada,csr zero s0a ;Make all registers linear mvc s0a,amr mvk -1,dmav ;Clear any pending interrupts mvc dmav,icr mvk timer0,timer ;Fire up the timer mvkh timer0,timer mvk 0x204,dmav ;cpuclk(0x200)+enable global SRAM(0x4) stw dmav,*timer[timerC] ;hold timer mvk -1,dmav stw dmav,*timer[timerT] ;max count=0xffffffff mvk 0x2c4,dmav ;cpuclk(0x200)+run(0xc0)+global SRAM (0x4) stw dmav,*timer[timerC] ;start timing ;Clear corbuf in local memory mvk corbuf-4,ada mvkh corbuf-4,ada zero s0a mvk A,ct ;number of words to clear, minus 1 clrcor: [ct] b clrcor sub ct,1,ct stw s0a,*ada++ nop 3 ;Clear sumbuf in local memory, including the preceding half word! mvk sumbuf-4,ada mvkh sumbuf-4,ada zero s0a mvk A,ct ;number of words to clear, minus 1 clrsum: [ct] b clrsum sub ct,1,ct stw s0a,*ada++ nop 3 ;Clear corbuf in external memory mvk corbufx,ada mvkh corbufx,ada mvk (corbufxe-corbufx)/4-1,ct ;number of words to clear, minus 1 mvkh (corbufxe-corbufx)/4-1,ct clrcrx: [ct] b clrcrx sub ct,1,ct stw s0a,*ada++ nop 3 ;Clear sumbuf in external memory (sumbufx not needed when B=2A, or B=A) .if B>A+LASTA mvk sumbufx,ada mvkh sumbufx,ada mvk (sumbufxe-sumbufx)/4-1,ct ;number of words to clear, minus 1 mvkh (sumbufxe-sumbufx)/4-1,ct clrsmx: [ct] b clrsmx sub ct,1,ct stw s0a,*ada++ nop 3 .endif ;Make addresses in ybuf circular mvk 0x1101,s0a ;Make A4, B4, B6 circular, size 16K mvklh 0x000d,s0a mvc s0a,amr ;Initialize lots of DMA registers mvk DMA,dmas mvkh DMA,dmas mvk 0x0000a000,dmav ;clear block IE for DMA 0,2 mvkh 0x0000a000,dmav stw dmav,*dmas[DMA0+DMAsec] stw dmav,*dmas[DMA2+DMAsec] mvk 0x0000a080,dmav ;set block IE for DMA 1 mvkh 0x0000a080,dmav stw dmav,*dmas[DMA1+DMAsec] mvk sumbufx,dmav ;store sumbufx ptr in secret reg mvkh sumbufx,dmav stw dmav,*dmas[DMAadA] mvk corbufx,dmav ;store corbufx ptr in secret reg mvkh corbufx,dmav stw dmav,*dmas[DMAadD] ;Announce to Control DSP that we're alive mvk gsync,s0a mvkh gsync,s0a mvk -1,dmav stw dmav,*s0a ;Wait for B+A+F+2 values to arrive in gybuf iwait: mvc ifr,ct ;What interrupts are pending? extu ct,27,31,ct ;mask for inter-DSP interrupt shl ct,4,ct [!ct] b iwait nop 5 mvc ct,icr ;Found. Now clear it at the CPU mvk VINTB,dmav ;and in the Monaco registers mvkh VINTB,dmav stw ct,*dmav ;Receive A+F+2 values for ybuf with DMA0 mvk gybuf0,dmav ;source (next block is B-short aligned) mvkh gybuf0,dmav stw dmav,*dmas[DMA0+DMAsrc] mvk ybuf,dmav ;destination mvkh ybuf,dmav stw dmav,*dmas[DMA0+DMAdst] mvk (A+F+2)/2,dmav ;count stw dmav,*dmas[DMA0+DMAcnt] mvk 0x01000051,dmav ;vanilla DMA, DMA priority mvkh 0x01000051,dmav stw dmav,*dmas[DMA0+DMApri] ;start DMA nop 9 ;how fast does the DMA respond? dwait: ldw *dmas[DMA0+DMApri],cnd ;wait for it to finish nop 4 and 0xc,cnd,cnd [cnd] b dwait nop 5 ;Compute sum for correlation mvk ybuf,ada ;Compute sum of first F elements mvkh ybuf,ada mvk F-1,ct ;number of elements to sum, minus 1 zero sum isum: ldh *ada++,lowa nop [ct] b isum nop 2 add lowa,sum,sum sub ct,1,ct nop ;Read A values for ybuf with DMA0 .if B=A+LASTA ;If this is the last fragment, mvk LASTA/2,dmav ;then count is LASTA shorts .else mvk A/2,dmav ;else, count is A shorts. .endif stw dmav,*dmas[DMA0+DMAcnt] mvk 0x01000051,dmav ;basic DMA, DMA priority mvkh 0x01000051,dmav stw dmav,*dmas[DMA0+DMApri] ;start DMA ;Initialize all registers mvk marktbl,mark ;table of identified ASM locations mvkh marktbl,mark mvk gybuf+(B-A-F-2)*2,ofs ;actual address of external y data mvkh gybuf+(B-A-F-2)*2,ofs mvk corbuf-4,cbuf ;first good data goes at corbuf mvkh corbuf-4,cbuf mvk sumbuf-4,sbuf ;first good data goes at sumbuf mvkh sumbuf-4,sbuf mvk (B-LASTA)/A,onea ;low half is outer loop counter mvklh 1,onea ;high half=1 for multiply/copy mvk 0,oneb ;low half is unused mvklh 1,oneb ;high half=1 for multiply/copy mvk ybuf+4,ada mvkh ybuf+4,ada mvk ybuf+24,adb mvkh ybuf+24,adb zero max ;clear all these so running sums are correct zero maxsq ;despite the loop fragments executed zero cor zero rcor zero s1 zero s2 shr sum,1,s1a ;set s1a, s2a to sum/2, so corr's come out shr sum,1,s2a ;about zero. Not exact for first 4 results! zero s1b zero s2b zero s0a zero s0b zero dlta zero dltb zero higha zero highb zero lowa zero lowb mvk A+2,ct ;Number of correlations to compute, plus 2 ;Ready to go! tstamp b clk2 ;Start loop at clock 2 nop 5 ;Fill remaining branch delay slots ;*********** The loop *********** .if F=192 ;Clock 44' (22) loop: ldw.d1 *ada++[-24],lowa ;44': a42 || ldw.d2 *adb++[-22],lowb ;44': b42 || mpyh.m1 lowa,onea,higha ;44': a37 || mpyh.m2 lowb,oneb,highb ;44': b37 || add.s2 s1b,lowb,s1b ;44': b37 || add.l1 s0a,higha,s0a ;44': a35 || add.l2 s0b,highb,s0b ;44': b35 ;Clock 45' (23) ldw.d1 *ada++[-21],lowa ;45': a43 || ldw.d2 *adb++[7],dltb ;45': b43 || mpyh.m2 lowb,oneb,highb ;45': b38 || add.s2 s0b,lowb,s0b ;45': b38 || add.l1 s0a,higha,s0a ;45': a36 || add.l2 s1b,highb,s1b ;45': b36 || mpyh.m1 rcor,rcor,cnd ;47: square cor ;Clock 46' (24) mpyh.m2 lowb,oneb,highb ;46': a39 || add.s2 s0b,lowb,s0b ;46': a39 || add.l1 s2a,higha,s2a ;46': a37 || add.l2 s2b,highb,s2b ;46': b37 || stw.d1 s2,*sbuf++ ;47: store running correlation || stw.d2 s0,*cbuf++ ;47: store new correlation ;Clock 2' (2) clk2: ldw.d1 *ada++[2],s1a ;2': a0, clear s1a || ldw.d2 *adb++[-4],s2b ;2': b0, clear s2b || mpyh.m2 lowb,oneb,highb ;47': b40 || add.s1 s1a,lowa,s1a ;47': a40 || add.s2 s2b,lowb,s2b ;47': b40 || add.l2 s0b,highb,s0b ;47': b38 || mpy.m1 rcor,rcor,cnd ;48: square rcor ;Clock 3' (3) clk3: ldw.d1 *ada++[2],s2a ;3': a1, clear s2a || ldw.d2 *adb++[8],lowb ;3': b1 || mpyh.m1 lowa,onea,higha ;48': a41 || mpyh.m2 lowb,oneb,highb ;48': b41 || add.s1 s0a,lowa,s0a ;48': a41 || add.l2 s0b,highb,s0b ;48': a39 || cmplt.l1 maxsq,cnd,s2 ;49: s2=max^2A+LASTA ;Write A values from sumbuf with DMA1, then ;read A values from sumbuf with DMA2 when DMA1 is done stw sbuf,*dmas[DMA1+DMAsrc] ;source1 stw sbuf,*dmas[DMA2+DMAdst] ;destination2 ldw *dmas[DMAadA],dmav ;retrieve sumbufx ptr from secret reg mvk sumbufxe,dmac mvkh sumbufxe,dmac nop 2 stw dmav,*dmas[DMA1+DMAdst] ;destination 1 addk A*2,dmav cmpgt dmac,dmav,cnd [!cnd] mvk sumbufx,dmav [!cnd] mvkh sumbufx,dmav stw dmav,*dmas[DMAadA] ;update secret reg addk A*2,dmav ;source 2 cmpgt dmac,dmav,cnd [!cnd] mvk sumbufx,dmav [!cnd] mvkh sumbufx,dmav stw dmav,*dmas[DMA2+DMAsrc] mvk A/2,dmav ;count1, count2 stw dmav,*dmas[DMA1+DMAcnt] stw dmav,*dmas[DMA2+DMAcnt] mvk 0x00000080,dmav stw dmav,*dmas[DMA1+DMAsec] ;DMA1 interrupts when done mvk 0x03000051,dmav mvkh 0x03000051,dmav stw dmav,*dmas[DMA1+DMApri] ;start DMA1 mvk 0x05024051,dmav ;DMA2 starts after DMA1 finishes mvkh 0x05024051,dmav stw dmav,*dmas[DMA2+DMApri] ;enable DMA2 dmalp2: ldw *dmas[DMA2+DMApri],cnd ;wait for DMA2 to finish nop 4 and 0xc,cnd,cnd [cnd] b dmalp2 nop 5 tstamp .endif ;Write A values from corbuf with DMA1 ;Read A values for corbuf with DMA2 when DMA1 is done stw cbuf,*dmas[DMA1+DMAsrc] ;source1 stw cbuf,*dmas[DMA2+DMAdst] ;destination2 ldw *dmas[DMAadD],dmav ;retrieve corbufx ptr from secret reg mvk corbufxe,dmac mvkh corbufxe,dmac nop 2 stw dmav,*dmas[DMA1+DMAdst] ;destination1 addk A*2,dmav cmpgt dmac,dmav,cnd [!cnd] mvk corbufx,dmav [!cnd] mvkh corbufx,dmav stw dmav,*dmas[DMAadD] ;update secret reg addk A*2,dmav ;source2 cmpgt dmac,dmav,cnd [!cnd] mvk corbufx,dmav [!cnd] mvkh corbufx,dmav stw dmav,*dmas[DMA2+DMAsrc] mvk A/2,dmav ;count1, count2 stw dmav,*dmas[DMA1+DMAcnt] stw dmav,*dmas[DMA2+DMAcnt] mvk 0x00000080,dmav stw dmav,*dmas[DMA1+DMAsec] ;DMA1 interrupts when done mvk 0x03000051,dmav mvkh 0x03000051,dmav stw dmav,*dmas[DMA1+DMApri] ;start DMA1 mvk 0x05024051,dmav ;DMA2 starts after DMA1 finishes mvkh 0x05024051,dmav stw dmav,*dmas[DMA2+DMApri] ;enable DMA2 ;Read A (or LASTA) values for ybuf with DMA0 ext onea,16,16,cnd ;extract outer loop counter again sub cnd,1,cnd ;cnd=0 ? fetch LASTA values : fetch A values [cnd] mvk A,ct ;Number of correlations to compute [!cnd] mvk LASTA,ct ;Maybe fewer correlations last time. ldw *dmas[DMA0+DMAsrc],dmav ;wrap around source pointer if needed mvk gybufe,cnd mvkh gybufe,cnd nop 2 cmpeq dmav,cnd,cnd [cnd] mvk gybuf,dmav [cnd] mvkh gybuf,dmav [cnd] stw dmav,*dmas[DMA0+DMAsrc] [cnd] mvk gybuf-gybufe,dmav ;future max's will assume the wrapped [cnd] mvkh gybuf-gybufe,dmav ;(smaller) offset. [cnd] add ofs,dmav,ofs ldw *dmas[DMA0+DMAdst],dmav ;if 2 frames last time, must fix dst nop 4 shr dmav,31,cnd ;if high bit clear, no good [!cnd] addk ybufe-ybuf,dmav ;undo erroneous end-of-frame indexing [!cnd] stw dmav,*dmas[DMA0+DMAdst] ;store fixed version mvk ybufe,cnd ;are we exactly at the end of ybuf? mvkh ybufe,cnd cmpeq cnd,dmav,cnd [cnd] b wrap0 ;wrap around, no fractional part first nop 5 mvk ybufe,cnd ;are at least A words left in ybuf? mvkh ybufe,cnd sub cnd,ct,cnd ;ct is in shorts, so subtract it twice sub cnd,ct,cnd cmplt cnd,dmav,cnd ;if cnd=0, we don't wrap around [!cnd] b wrap1 nop 5 mvk ybufe,cnd ;move two partial blocks mvkh ybufe,cnd sub cnd,dmav,dmav ;count for first (in bytes) shr dmav,2,dmav ;convert to words mvklh 2,dmav ;two frames stw dmav,*dmas[DMA0+DMAcnt] shr ct,1,cnd ;count for second sub cnd,dmav,dmav mvkh 0,dmav ;clear high half (one frame) stw dmav,*dmas[DMActA] mvk 4,dmav ;index reg gives offset to second dest mvklh ybuf-ybufe+4,dmav stw dmav,*dmas[DMAixA] mvk 0x010000d1,dmav ;2-frame DMA, ixA for dest mvkh 0x010000d1,dmav b wrap2 ;done. nop 5 wrap0: mvk ybuf,cnd ;destination mvkh ybuf,cnd stw cnd,*dmas[DMA0+DMAdst] wrap1: shr ct,1,dmav ;count stw dmav,*dmas[DMA0+DMAcnt] mvk 0x01000051,dmav ;basic DMA mvkh 0x01000051,dmav wrap2: stw dmav,*dmas[DMA0+DMApri] ;start DMA ;Bizarre fact: if the above DMA is not given priority, the system occasionally ;deadlocks, apparently due to a bug internal to the '6201 DMA priority logic. ;Channel 0 finishes except for the last write to DMEM, and channel 1 fills ;the DMA FIFO and then stalls. Apparently this also ties up the DMEM bus, ;since any subsequent HPI access to DMEM fails (the A24 VME chip times out?), ;though all other memory and periperals are accessible. ;Reset count, and ping-pong cbuf and sbuf to other buffer half mvk corbuf+A,cnd ;cbuf=(cbuf==corbuf+2A)?corbuf:corbuf+2A mvkh corbuf+A,cnd sub cnd,cbuf,cbuf add cnd,cbuf,cbuf .if B>A ;if B>A, ping-pong sbuf to other half mvk sumbuf+A,cnd ;sbuf=(sbuf==sumbuf+2A)?sumbuf:sumbuf+2A mvkh sumbuf+A,cnd sub cnd,sbuf,sbuf add cnd,sbuf,sbuf .else mvk sumbuf,sbuf ;if B=A, we use the same data immediately mvkh sumbuf,sbuf ;sbuf=sumbuf .endif ext onea,16,16,cnd ;extract outer loop counter again [cnd] mvk A,ct ;Number of correlations to compute [!cnd] mvk LASTA,ct ;Maybe fewer correlations last time. tstamp b clk3 ;Spin up pipeline again .if F=96 ;Clock 21 ldw.d1 *ada++[2],lowa ;21: a19 || ldw.d2 *adb++[-22],lowb ;21: b19 ;Clock 22 ldw.d1 *ada++[-24],lowa ;22: a20 || ldw.d2 *adb++[-22],lowb ;22: b20 ;Clock 23 ldw.d1 *ada++[-21],lowa ;23: a21 || ldw.d2 *adb++[7],dltb ;23: b21 ;Clock 24 mpyh.m1 rcor,rcor,cnd ;47: square cor ;Clock 2 ldw.d1 *ada++[2],s1a ;2: a0 || ldw.d2 *adb++[-4],s2b ;2: b0 || mpy.m1 rcor,rcor,cnd ;48: square rcor .endif .if F=192 ;Clock 43' (21) nop ;Clock 44' (22) ldw.d1 *ada++[-24],lowa ;44': a42 || ldw.d2 *adb++[-22],lowb ;44': b42 ;Clock 45' (23) ldw.d1 *ada++[-21],lowa ;45': a43 || ldw.d2 *adb++[7],dltb ;45': b43 ;Clock 46' (24) mpyh.m1 rcor,rcor,cnd ;47: square cor ;Clock 2' (2) ldw.d1 *ada++[2],s1a ;2': a0, clear s1a || ldw.d2 *adb++[-4],s2b ;2': b0, clear s2b || mpy.m1 rcor,rcor,cnd ;48: square rcor .endif ; branch to clock 3 occurs here.