ya4 .set B2 yb3 .set A2 tb2 .set B2 ta3 .set A2 ea0 .set B2 ba .set B10 ta4 .set B2 yb4 .set A2 tb3 .set A2 ea1 .set A9 eb0 .set B2 ya5 .set A1 eea .set B1 ss4 .set A0 bb .set A10 tb4 .set A2 ea2 .set B2 eb1 .set A2 ta5 .set B1 eeb .set A1 yb5 .set B1 da .set B8 eb2 .set B2 ea3 .set A2 ca0 .set B1 tb5 .set A1 db .set A8 ra .set B8 ea .set B3 ea4 .set B2 eb3 .set A2 xa0 .set B2 ca1 .set A1 cb0 .set B1 fa .set B9 rb .set A8 gxa .set B7 eb .set A3 eb4 .set A2 xb0 .set A3 xa1 .set B10 ea5 .set A1 ca2 .set B7 cb1 .set A1 sa0 .set B5 fb .set A9 gxb .set A7 ta .set B4 ga .set A14 dd .set B14 xa2 .set B2 xb1 .set A2 cb2 .set B7 ca3 .set A1 eb5 .set B1 sb0 .set A1 ggg .set B9 sea .set B8 tb .set A4 gb .set A13 xb2 .set A2 xa3 .set B2 ca4 .set B1 cb3 .set A11 sa2 .set B1 s0 .set B0 gc .set A1 ia .set B6 gaa .set B8 seb .set A8 xa4 .set B0 xb3 .set A2 cb4 .set A1 sb2 .set A1 s1 .set A0 ja .set B8 ib .set A12 xb4 .set A0 ya0 .set B2 sa4 .set B1 xa5 .set B1 s2 .set B0 jb .set B1 gbb .set A8 xa .set B5 d .set B15 xa6 .set B2 ta0 .set B4 ya1 .set A2 yb0 .set B2 sb4 .set A1 xb5 .set A2 s3 .set A0 zz .set B10 ya .set B6 xb .set A5 xb6 .set A0 ya2 .set B2 ta1 .set A2 tb0 .set B10 yb1 .set A2 s4 .set B0 za .set B11 xxa .set B8 yb .set A6 gg .set A15 ta2 .set B2 ya3 .set A2 yb2 .set B2 tb1 .set A2 lc .set B1 zb .set A11 xxb .set A8 g .set B12 t .set B13 ; 0:15 SHL.S1X g, 29, gc ; 0:16 [!gc] LDW.D2 *d++[1], gg ; 0:17 NOP ; 0:18 NOP ; 0:19 NOP ; 0:20 NOP ; 0:21 MV.L2X gg, gaa || EXTU.S1 gg, 30, 30, gbb ; Extracting 2-bit gain from gain word ; 0:22 ADD.S1X g, gbb, gbb || EXTU.S2 gaa, 14, 30, gaa ; 0:23 SHL.S1 gbb, 7, gb || ADD.S2 g, gaa, gaa || LDW.D2 *d++[1], s0 ; 0:24 SHL.S1X gaa, 7, ga || LDW.D1 *-gb[1], yb5 ; 5p^2 || LDW.D2 *d[OFF_SAMP_1], s1 ; 2nd sample is after 64 words (128 ch) ; 0:25 LDW.D1 *-ga[24], sa0 ; h1|h2 for channel 0 ; 1:0 LDW.D1 *-gb[8], sb0 ; h1|h2 for channel 32 ; 1:1 LDW.D1 *-gb[13], cb0 ; a1|b1 for channel 32 ; 1:2 LDW.D1 *-gb[12], cb1 ; a2|b2 for channel 32 || MPYU.M2 s0, s0, yb0 ; 1:3 LDW.D1 *-gb[4], eb5 ; -p*sum(a) for channel 32 || MPYU.M1 s1, s1, yb1 ; 1:4 ADD.L2 yb5, yb0, yb0 || LDW.D1 *-gb[3], tb5 ; -p*sum(b) for channel 32 || MPYHUS.M2 s0, sa0, xa0 ; 1:5 ADD.L1X yb0, yb1, yb || LDW.D1 *-ga[29], ca0 || MPYUS.M1 s1, sb0, xb1 || MPYHULS.M2X s1, sa0, xa1 ; 1:6 LDW.D1 *-ga[28], ca1 || MPYLUHS.M1X s0, sb0, xb0 || MPYLUHS.M2 s0, cb0, eb0 ; 1:7 ADD.L2 xa0, xa1, xa || LDW.D1 *-gb[2], xb5 ; 2p*sum(h) || LDW.D2 *d[OFF_SAMP_2], s2 ; 3rd sample is after 2*64 words || MPYLUHS.M1 s1, cb1, eb1 || MPYUS.M2 s0, cb0, tb0 ; 1:8 ADD.L1 xb0, xb1, xb || ADD.L2 eb5, eb0, eb0 || LDW.D1 *-ga[23], sa2 ; h3|h4 || LDW.D2 *d[OFF_SAMP_3], s3 ; 4th sample is after 3*64 words || MPYUS.M1 s1, cb1, tb1 || MPYHU.M2 s0, s0, ya0 ; 1:9 ADD.L1X eb0, eb1, eb || ADD.L2X tb5, tb0, tb0 || LDW.D1 *-ga[26], ca3 ; a4|b4 || MPYHU.M1 s1, s1, ya1 ; 1:10 ADD.L1X tb0, tb1, tb || ADD2.S2X s1, s0, zz || LDW.D1 *-gb[11], cb2 ; a3|b3 || MPYHUS.M2 s0, ca0, ea0 ; 1:11 ADD.L2X ya1, ya0, ya || LDW.D1 *-gb[10], cb3 ; a4|b4 || MPYHUS.M1 s1, ca1, ea1 || MPYHULS.M2 s0, ca0, ta0 ; 1:12 ADD.L1 xb5, xb, xb || MV.L2X gg, ggg || LDW.D1 *-ga[27], ca2 ; a3|b3 || MPYHULS.M1 s1, ca1, ta1 || MPYU.M2 s2, s2, yb2 ; 1:13 ADD.L1X ea0, ea1, ea1 || ADD.L2 g, 3, g || LDW.D1 *-gb[7], sb2 ; h3|h4 || MPYHU.M1 s3, s3, ya3 || MPYHULS.M2X s3, sa2, xa3 ; 1:14 ADD.L1X yb2, yb, yb || ADD.L2X ta1, ta0, ta || EXTU.S1 gg, 30, 30, gxb || EXT.S2 g, 25, 27, lc || LDW.D1 *-ga[18], xa5 ; 2p*sum(h) || MPYHUS.M1 s3, ca3, ea3 || MPYHUS.M2 s2, sa2, xa2 ; 1:15 ADD.L2 xa3, xa, xa || SHL.S1X g, 29, gc || ADD.S2X ya3, ya, ya || LDW.D1 *-ga[20], ea5 ; -p*sum(a) || LDW.D2 *d[OFF_SAMP_4], s4 ; 5th sample is after 4*64 words || MPYHULS.M1 s3, ca3, ta3 || MPYLUHS.M2 s2, cb2, eb2 ; 1:16 ADD.L1 ea3, ea1, ea1 || CMPEQ.L2 -9, lc, lc || SHL.S1 gxb, 14, gxb || ADD.S2 xa2, xa, xa || LDW.D1 *-ga[25], ca4 ; a5|b5 ||[!gc] LDW.D2 *g, gg ; reading gain word from cc section || MPYLUHS.M1 s3, cb3, eb3 || MPYUS.M2 s2, cb2, tb2 ; 1:17 ADD.L1X eb2, eb, eb || ADD.L2X ta3, ta, ta || ADD2.S2 s2, zz, zz || LDW.D1 *-gb[9], cb4 ; a5|b5 || MPYUS.M1 s3, cb3, tb3 || MPYHUS.M2 s2, ca2, ea2 ; 1:18 ADD.L1X tb2, tb, tb || ADD.S1 eb3, eb, eb || ADD2.S2X s3, zz, zz || LDW.D1 *-ga[22], sa4 ; h5|-2p || MPYUS.M1 s3, sb2, xb3 || MPYHULS.M2 s2, ca2, ta2 ; 1:19 ADD.L1 tb3, tb, tb || ADD.L2X ea1, ea2, ea || SHRU.S1 gg, 2, gg || EXTU.S2 ggg, 14, 30, gxa || LDW.D1 *-gb[6], sb4 ; h5|-2p || ADD.D2 xa5, xa, xa || MPYLUHS.M1X s2, sb2, xb2 || MPYHU.M2 s2, s2, ya2 ; 1:20 ADD.L1 xb3, xb, xb || ADD.L2X ea5, ea, ea || MV.S1X s4, ss4 || ADD2.S2 s4, zz, zz || LDW.D1 *-ga[21], eea ; sum(h)|S_E-n || ADD.D2 ta2, ta, ta || MPYU.M1 s3, s3, yb3 || MPYHU.M2 s4, s4, ya4 ; 1:21 ADD.L1 xb2, xb, xb || MV.L2X gg, gaa || EXTU.S1 gg, 30, 30, gbb || SHL.S2 gxa, 14, gxa || LDW.D1 *-gb[5], eeb ; sum(h)|S_E-n || ADD.D2 ya2, ya, ya || MPYU.M1 ss4, ss4, yb4 || MPYHUS.M2 s4, ca4, ea4 ; 1:22 ADD.L1 yb3, yb, yb || ADD.L2 ya4, ya, ya || ADD.S1X g, gbb, gbb || EXTU.S2 gaa, 14, 30, gaa || LDW.D1 *-ga[19], ta5 ; -p*sum(b) || ADD.D2 dd, 4, dd || MPYLUHS.M1 ss4, cb4, eb4 || MPYHULS.M2 s4, ca4, ta4 etchi_loop: ; 1:23 ADD.L1 yb4, yb, yb || ADD.L2 ea4, ea, ea || SHL.S1 gbb, 7, gb || ADD.S2 g, gaa, gaa || LDW.D1 *-ga[17], ya5 || LDW.D2 *d++[1], s0 || MPYUS.M1 ss4, cb4, tb4 || MPYHUS.M2 s4, sa4, xa4 ; 1:24 ADD.L1 eb4, eb, eb || ABS.L2 ea, fa || SHL.S1X gaa, 7, ga || SSHL.S2 ea, 2, sea || LDW.D1 *-gb[1], yb5 || LDW.D2 *d[OFF_SAMP_1], s1 ; 2nd sample after 64 words || MPYLUHS.M1 ss4, sb4, xb4 || MPYHL.M2 zz, sa4, za ; 1:25 ABS.L1 eb, fb || NORM.L2 fa, ba || SSHL.S1 eb, 2, seb || ADD.S2 ta4, ta, ta || LDW.D1 *-ga[24], sa0 || ADD.D2 xa4, xa, xa || MPY.M1X zz, sb4, zb || MPYH.M2 eea, sea, xa6 ; 2:0 NORM.L1 fb, bb || ADD.L2 ba, 2, ba || ADD.S1 xb4, xb, xb || SUB.S2 eea, ba, ia || LDW.D1 *-gb[8], sb0 || ADD.D2 ya, za, za || MPYH.M1 eeb, seb, xb6 ; 2:1 ADD.L1 bb, 2, bb || ADD.L2 xa6, xa, xa || SUB.S1 eeb, bb, ib || SHL.S2 fa, ba, fa || LDW.D1 *-gb[13], cb0 || ADD.D2 ta5, ta, ta ; 2:2 ADD.L1 xb6, xb, xb || ADD.L2X ya5, za, za || SHL.S1 fb, bb, fb || SSHL.S2 xa, 4, xa || LDW.D1 *-gb[12], cb1 || MPYU.M2 s0, s0, yb0 ; 2:3 ADD.L1 tb4, tb, tb || SSHL.S1 xb, 4, xb || SHRU.S2 fa, 20, ja || LDW.D1 *-gb[4], eb5 || MPYU.M1 s1, s1, yb1 || MPYH.M2 sea, xa, xxa ; 2:4 ADD.L1 yb, zb, zb || ADD.L2 yb5, yb0, yb0 || SSHL.S1 tb, bb, tb || SHRU.S2X fb, 20, jb || LDW.D1 *-gb[3], tb5 || LDH.D2 *+t[ja], da || MPYH.M1 seb, xb, xxb || MPYHUS.M2 s0, sa0, xa0 ; 2:5 ADD.L1X yb0, yb1, yb || ADD.L2 xxa, za, za || CLR.S1 eb, 0, 30, eb || SSHL.S2 ta, ba, ta || LDW.D1 *-ga[29], ca0 || LDH.D2 *+t[jb], db || MPYUS.M1 s1, sb0, xb1 || MPYHULS.M2X s1, sa0, xa1 ; 2:6 ADD.L1 xxb, zb, zb || SHRU.S1 fb, 9, fb || SHL.S2 ia, 23, ia || LDW.D1 *-ga[28], ca1 || MPYLUHS.M1X s0, sb0, xb0 || MPYLUHS.M2 s0, cb0, eb0 ; 2:7 OR.L1 eb, fb, fb || ADD.L2 xa0, xa1, xa || SHL.S1 ib, 23, ib || SHRU.S2 fa, 9, fa || LDW.D1 *-gb[2], xb5 || LDW.D2 *d[OFF_SAMP_2], s2 ; 3rd sample || MPYLUHS.M1 s1, cb1, eb1 || MPYUS.M2 s0, cb0, tb0 ; 2:8 ADD.L1 xb0, xb1, xb || ADD.L2 eb5, eb0, eb0 || SSHL.S1 zb, 16, zb || CLR.S2 ea, 0, 30, ea || LDW.D1 *-ga[23], sa2 || LDW.D2 *d[OFF_SAMP_3], s3 ; 4th sample || MPYUS.M1 s1, cb1, tb1 || MPYHU.M2 s0, s0, ya0 ; 2:9 ADD.L1X eb0, eb1, eb || ADD.L2X tb5, tb0, tb0 || SSHL.S2 za, 16, za || LDW.D1 *-ga[26], ca3 || ADD.D2 ia, fa, fa || MPYHU.M1 s1, s1, ya1 || MPYLUHS.M2 da, ta, ra ; 2:10 ADD.L1X tb0, tb1, tb || OR.L2 ea, fa, fa || CLR.S1 zb, 0, 15, zb || ADD2.S2X s1, s0, zz || LDW.D1 *-gb[11], cb2 || MPYLUHS.M1 db, tb, rb || MPYHUS.M2 s0, ca0, ea0 ; 2:11 OR.L1 gxb, zb, zb || ADD.L2X ya1, ya0, ya || OR.S1 ib, fb, fb || CLR.S2 za, 0, 15, za || LDW.D1 *-gb[10], cb3 || STW.D2 fa, *+dd[0x00] || MPYHUS.M1 s1, ca1, ea1 || MPYHULS.M2 s0, ca0, ta0 ; 2:12 ADD.L1 xb5, xb, xb || MV.L2X gg, ggg || SSHL.S1 rb, 1, rb || SSHL.S2 ra, 1, ra || LDW.D1 *-ga[27], ca2 || STW.D2 fb, *+dd[0x20] || MPYHULS.M1 s1, ca1, ta1 || MPYU.M2 s2, s2, yb2 ; 2:13 ADD.L1X ea0, ea1, ea1 || ADD.L2 g, 3, g || SHRU.S1 rb, 18, rb || SHRU.S2 ra, 18, ra || LDW.D1 *-gb[7], sb2 || ADD.D2 gxa, za, za || MPYHU.M1 s3, s3, ya3 || MPYHULS.M2X s3, sa2, xa3 ; 2:14 ADD.L1X yb2, yb, yb || ADD.L2X ta1, ta0, ta || EXTU.S1 gg, 30, 30, gxb || EXT.S2 g, 25, 27, lc || LDW.D1 *-ga[18], xa5 || ADD.D2 za, ra, ra || MPYHUS.M1 s3, ca3, ea3 || MPYHUS.M2 s2, sa2, xa2 ; 2:15 OR.L1 zb, rb, rb || ADD.L2 xa3, xa, xa || SHL.S1X g, 29, gc || ADD.S2X ya3, ya, ya || LDW.D1 *-ga[20], ea5 || LDW.D2 *d[OFF_SAMP_4], s4 ; 5th sample || MPYHULS.M1 s3, ca3, ta3 || MPYLUHS.M2 s2, cb2, eb2 ; 2:16 ADD.L1 ea3, ea1, ea1 || CMPEQ.L2 -9, lc, lc || SHL.S1 gxb, 14, gxb || ADD.S2 xa2, xa, xa || LDW.D1 *-ga[25], ca4 ||[!gc] LDW.D2 *g, gg ; reading gain word from cc section || MPYLUHS.M1 s3, cb3, eb3 || MPYUS.M2 s2, cb2, tb2 ; 2:17 ADD.L1X eb2, eb, eb || ADD.L2X ta3, ta, ta ||[!lc] B.S1 etchi_loop || ADD2.S2 s2, zz, zz || LDW.D1 *-gb[9], cb4 || STW.D2 ra, *+dd[0x40] || MPYUS.M1 s3, cb3, tb3 || MPYHUS.M2 s2, ca2, ea2 ; 2:18 ADD.L1X tb2, tb, tb || ADD.S1 eb3, eb, eb || ADD2.S2X s3, zz, zz || LDW.D1 *-ga[22], sa4 || STW.D2 rb, *+dd[0x60] || MPYUS.M1 s3, sb2, xb3 || MPYHULS.M2 s2, ca2, ta2 ; 2:19 ADD.L1 tb3, tb, tb || ADD.L2X ea1, ea2, ea || SHRU.S1 gg, 2, gg || EXTU.S2 ggg, 14, 30, gxa || LDW.D1 *-gb[6], sb4 || ADD.D2 xa5, xa, xa || MPYLUHS.M1X s2, sb2, xb2 || MPYHU.M2 s2, s2, ya2 ; 2:20 ADD.L1 xb3, xb, xb || ADD.L2X ea5, ea, ea || MV.S1X s4, ss4 || ADD2.S2 s4, zz, zz || LDW.D1 *-ga[21], eea || ADD.D2 ta2, ta, ta || MPYU.M1 s3, s3, yb3 || MPYHU.M2 s4, s4, ya4 ; 2:21 ADD.L1 xb2, xb, xb || MV.L2X gg, gaa || EXTU.S1 gg, 30, 30, gbb || SHL.S2 gxa, 14, gxa || LDW.D1 *-gb[5], eeb || ADD.D2 ya2, ya, ya || MPYU.M1 ss4, ss4, yb4 || MPYHUS.M2 s4, ca4, ea4 ; 2:22 ADD.L1 yb3, yb, yb || ADD.L2 ya4, ya, ya || ADD.S1X g, gbb, gbb || EXTU.S2 gaa, 14, 30, gaa || LDW.D1 *-ga[19], ta5 || ADD.D2 dd, 4, dd || MPYLUHS.M1 ss4, cb4, eb4 || MPYHULS.M2 s4, ca4, ta4 * end of etchi_loop ; 2:23 ADD.L1 yb4, yb, yb || ADD.L2 ea4, ea, ea || SHL.S1 gbb, 7, gb || ADD.S2 g, gaa, gaa || LDW.D1 *-ga[17], ya5 || LDW.D2 *d++[1], s0 || MPYUS.M1 ss4, cb4, tb4 || MPYHUS.M2 s4, sa4, xa4 ; 2:24 ADD.L1 eb4, eb, eb || ABS.L2 ea, fa || SHL.S1X gaa, 7, ga || SSHL.S2 ea, 2, sea || LDW.D1 *-gb[1], yb5 || LDW.D2 *d[OFF_SAMP_2], s1 ; 2nd sample || MPYLUHS.M1 ss4, sb4, xb4 || MPYHL.M2 zz, sa4, za ; 2:25 ABS.L1 eb, fb || NORM.L2 fa, ba || SSHL.S1 eb, 2, seb || ADD.S2 ta4, ta, ta || LDW.D1 *-ga[24], sa0 || ADD.D2 xa4, xa, xa || MPY.M1X zz, sb4, zb || MPYH.M2 eea, sea, xa6 ; 3:0 NORM.L1 fb, bb || ADD.L2 ba, 2, ba || ADD.S1 xb4, xb, xb || SUB.S2 eea, ba, ia || LDW.D1 *-gb[8], sb0 || ADD.D2 ya, za, za || MPYH.M1 eeb, seb, xb6 ; 3:1 ADD.L1 bb, 2, bb || ADD.L2 xa6, xa, xa || SUB.S1 eeb, bb, ib || SHL.S2 fa, ba, fa || LDW.D1 *-gb[13], cb0 || ADD.D2 ta5, ta, ta ; 3:2 ADD.L1 xb6, xb, xb || ADD.L2X ya5, za, za || SHL.S1 fb, bb, fb || SSHL.S2 xa, 4, xa || LDW.D1 *-gb[12], cb1 || MPYU.M2 s0, s0, yb0 ; 3:3 ADD.L1 tb4, tb, tb || SSHL.S1 xb, 4, xb || SHRU.S2 fa, 20, ja || LDW.D1 *-gb[4], eb5 || MPYU.M1 s1, s1, yb1 || MPYH.M2 sea, xa, xxa ; 3:4 ADD.L1 yb, zb, zb || ADD.L2 yb5, yb0, yb0 || SSHL.S1 tb, bb, tb || SHRU.S2X fb, 20, jb || LDW.D1 *-gb[3], tb5 || LDH.D2 *+t[ja], da || MPYH.M1 seb, xb, xxb || MPYHUS.M2 s0, sa0, xa0 ; 3:5 ADD.L1X yb0, yb1, yb || ADD.L2 xxa, za, za || CLR.S1 eb, 0, 30, eb || SSHL.S2 ta, ba, ta || LDW.D1 *-ga[29], ca0 || LDH.D2 *+t[jb], db || MPYUS.M1 s1, sb0, xb1 || MPYHULS.M2X s1, sa0, xa1 ; 3:6 ADD.L1 xxb, zb, zb || SHRU.S1 fb, 9, fb || SHL.S2 ia, 23, ia || LDW.D1 *-ga[28], ca1 || MPYLUHS.M1X s0, sb0, xb0 || MPYLUHS.M2 s0, cb0, eb0 ; 3:7 OR.L1 eb, fb, fb || ADD.L2 xa0, xa1, xa || SHL.S1 ib, 23, ib || SHRU.S2 fa, 9, fa || LDW.D1 *-gb[2], xb5 || LDW.D2 *d[OFF_SAMP_2], s2 ; 3rd sample || MPYLUHS.M1 s1, cb1, eb1 || MPYUS.M2 s0, cb0, tb0 ; 3:8 ADD.L1 xb0, xb1, xb || ADD.L2 eb5, eb0, eb0 || SSHL.S1 zb, 16, zb || CLR.S2 ea, 0, 30, ea || LDW.D1 *-ga[23], sa2 || LDW.D2 *d[OFF_SAMP_3], s3 ; 4th sample || MPYUS.M1 s1, cb1, tb1 || MPYHU.M2 s0, s0, ya0 ; 3:9 ADD.L1X eb0, eb1, eb || ADD.L2X tb5, tb0, tb0 || SSHL.S2 za, 16, za || LDW.D1 *-ga[26], ca3 || ADD.D2 ia, fa, fa || MPYHU.M1 s1, s1, ya1 || MPYLUHS.M2 da, ta, ra ; 3:10 ADD.L1X tb0, tb1, tb || OR.L2 ea, fa, fa || CLR.S1 zb, 0, 15, zb || ADD2.S2X s1, s0, zz || LDW.D1 *-gb[11], cb2 || MPYLUHS.M1 db, tb, rb || MPYHUS.M2 s0, ca0, ea0 ; 3:11 OR.L1 gxb, zb, zb || ADD.L2X ya1, ya0, ya || OR.S1 ib, fb, fb || CLR.S2 za, 0, 15, za || LDW.D1 *-gb[10], cb3 || STW.D2 fa, *+dd[0x00] || MPYHUS.M1 s1, ca1, ea1 || MPYHULS.M2 s0, ca0, ta0 ; 3:12 ADD.L1 xb5, xb, xb || MV.L2X gg, ggg || SSHL.S1rb, 1, rb || SSHL.S2ra, 1, ra || LDW.D1 *-ga[27], ca2 || STW.D2 fb, *+dd[0x20] || MPYHULS.M1 s1, ca1, ta1 || MPYU.M2 s2, s2, yb2 ; 3:13 ADD.L1X ea0, ea1, ea1 || ADD.L2 g, 3, g || SHRU.S1 rb, 18, rb || SHRU.S2 ra, 18, ra || LDW.D1 *-gb[7], sb2 || ADD.D2 gxa, za, za || MPYHU.M1 s3, s3, ya3 || MPYHULS.M2X s3, sa2, xa3 ; 3:14 ADD.L1X yb2, yb, yb || ADD.L2X ta1, ta0, ta || EXTU.S1 gg, 30, 30, gxb || EXT.S2 g, 25, 27, lc || LDW.D1 *-ga[18], xa5 || ADD.D2 za, ra, ra || MPYHUS.M1 s3, ca3, ea3 || MPYHUS.M2 s2, sa2, xa2 ; 3:15 OR.L1 zb, rb, rb || ADD.L2 xa3, xa, xa || ADD.S2X ya3, ya, ya || LDW.D1 *-ga[20], ea5 || LDW.D2 *d[OFF_SAMP_4], s4 ; 5th sample || MPYHULS.M1 s3, ca3, ta3 || MPYLUHS.M2 s2, cb2, eb2 ; 3:16 ADD.L1 ea3, ea1, ea1 || CMPEQ.L2 -9, lc, lc || SHL.S1 gxb, 14, gxb || ADD.S2 xa2, xa, xa || LDW.D1 *-ga[25], ca4 || MPYLUHS.M1 s3, cb3, eb3 || MPYUS.M2 s2, cb2, tb2 ; 3:17 ADD.L1X eb2, eb, eb || ADD.L2X ta3, ta, ta || ADD2.S2 s2, zz, zz || LDW.D1 *-gb[9], cb4 || STW.D2 ra, *+dd[0x40] || MPYUS.M1 s3, cb3, tb3 || MPYHUS.M2 s2, ca2, ea2 ; 3:18 ADD.L1X tb2, tb, tb || ADD.S1 eb3, eb, eb || ADD2.S2X s3, zz, zz || LDW.D1 *-ga[22], sa4 || STW.D2 rb, *+dd[0x60] || MPYUS.M1 s3, sb2, xb3 || MPYHULS.M2 s2, ca2, ta2 ; 3:19 ADD.L1 tb3, tb, tb || ADD.L2X ea1, ea2, ea || SHRU.S1 gg, 2, gg || EXTU.S2 ggg, 14, 30, gxa || LDW.D1 *-gb[6], sb4 || ADD.D2 xa5, xa, xa || MPYLUHS.M1X s2, sb2, xb2 || MPYHU.M2 s2, s2, ya2 ; 3:20 ADD.L1 xb3, xb, xb || ADD.L2X ea5, ea, ea || MV.S1X s4, ss4 || ADD2.S2 s4, zz, zz || LDW.D1 *-ga[21], eea || ADD.D2 ta2, ta, ta || MPYU.M1 s3, s3, yb3 || MPYHU.M2 s4, s4, ya4 ; 3:21 ADD.L1 xb2, xb, xb || SHL.S2 gxa, 14, gxa || LDW.D1 *-gb[5], eeb || ADD.D2 ya2, ya, ya || MPYU.M1 ss4, ss4, yb4 || MPYHUS.M2 s4, ca4, ea4 ; 3:22 ADD.L1 yb3, yb, yb || ADD.L2 ya4, ya, ya || LDW.D1 *-ga[19], ta5 || ADD.D2 dd, 4, dd || MPYLUHS.M1 ss4, cb4, eb4 || MPYHULS.M2 s4, ca4, ta4 * end of etchi_loop ; 3:23 ADD.L1 yb4, yb, yb || ADD.L2 ea4, ea, ea || LDW.D1 *-ga[17], ya5 || MPYUS.M1 ss4, cb4, tb4 || MPYHUS.M2 s4, sa4, xa4 ; 3:24 ADD.L1 eb4, eb, eb || ABS.L2 ea, fa || SSHL.S2 ea, 2, sea || MPYLUHS.M1 ss4, sb4, xb4 || MPYHL.M2 zz, sa4, za ; 3:25 ABS.L1 eb, fb || NORM.L2 fa, ba || SSHL.S1 eb, 2, seb || ADD.S2 ta4, ta, ta || ADD.D2 xa4, xa, xa || MPY.M1X zz, sb4, zb || MPYH.M2 eea, sea, xa6 ; 4:0 NORM.L1 fb, bb || ADD.L2 ba, 2, ba || ADD.S1 xb4, xb, xb || SUB.S2 eea, ba, ia || ADD.D2 ya, za, za || MPYH.M1 eeb, seb, xb6 ; 4:1 ADD.L1 bb, 2, bb || ADD.L2 xa6, xa, xa || SUB.S1 eeb, bb, ib || SHL.S2 fa, ba, fa || ADD.D2 ta5, ta, ta ; 4:2 ADD.L1 xb6, xb, xb || ADD.L2X ya5, za, za || SHL.S1 fb, bb, fb || SSHL.S2 xa, 4, xa ; 4:3 ADD.L1 tb4, tb, tb || SSHL.S1 xb, 4, xb || SHRU.S2 fa, 20, ja || MPYH.M2 sea, xa, xxa ; 4:4 ADD.L1 yb, zb, zb || SSHL.S1 tb, bb, tb || SHRU.S2X fb, 20, jb || LDH.D2 *+t[ja], da || MPYH.M1 seb, xb, xxb ; 4:5 ADD.L2 xxa, za, za || CLR.S1 eb, 0, 30, eb || SSHL.S2 ta, ba, ta || LDH.D2 *+t[jb], db ; 4:6 ADD.L1 xxb, zb, zb || SHRU.S1 fb, 9, fb || SHL.S2 ia, 23, ia ; 4:7 OR.L1 eb, fb, fb || SHL.S1 ib, 23, ib || SHRU.S2 fa, 9, fa ; 4:8 SSHL.S1 zb, 16, zb || CLR.S2 ea, 0, 30, ea ; 4:9 SSHL.S2 za, 16, za || ADD.D2 ia, fa, fa || MPYLUHS.M2 da, ta, ra ; 4:10 OR.L2 ea, fa, fa || CLR.S1 zb, 0, 15, zb || MPYLUHS.M1 db, tb, rb ; 4:11 OR.L1 gxb, zb, zb || OR.S1 ib, fb, fb || CLR.S2 za, 0, 15, za || STW.D2 fa, *+dd[0x00] ; 4:12 SSHL.S1 rb, 1, rb || SSHL.S2 ra, 1, ra || STW.D2 fb, *+dd[0x20] ; 4:13 SHRU.S1 rb, 18, rb || SHRU.S2 ra, 18, ra || ADD.D2 gxa, za, za ; 4:14 ADD.D2 za, ra, ra ; 4:15 OR.L1 zb, rb, rb ; 4:16 NOP ; 4:17 STW.D2 ra, *+dd[0x40] ; 4:18 STW.D2 rb, *+dd[0x60] ; 4:19