blob: 6fb9ad0c3fa228eab0a1edd540c9744500b71191 [file] [log] [blame]
# mach: bfin
// The assembly program uses two instructions to speed the decoder inner loop:
// R6= VMAX/VMAX (R5, R4) A0>>2;
// R2 =H+L (SGN(R0)*R1);
// VMAX is a 2-way parallel comparison of four updated path metrics, resulting
// in 2 new path metrics as well as a 2 bit field indicating the selection
// results. This 2 bit field is shifted into accumulator A0. This instruction
// implements the selections of a complete butterfly for a rate 1/n system.
// The H+L(SGN) instruction is used to compute the branch metric used by each
// butterfly. It takes as input a pair of values representing the received
// symbol, and another pair of values which are +1 or -1. The latter come
// from a pre-computed table that holds all the branch metric information for
// a specific set of polynomials. As all symbols are assumed to be binary,
// distance metrics between a received symbol and a branch metric are computed
// by adding and subtracting the values of the symbol according to the
// transition of a branch.
.include "testutils.inc"
start
// 16 in bytes for M2
// A few pointer initializations
// P2 points to decision history, where outputs are stored
loadsym P2, DecisionHistory
// P4 holds address of APMFrom
loadsym P4, APMFrom;
// P5 holds address of APMTo
loadsym P5, APMTo;
// I0 points to precomputed d's
loadsym I0, BranchStorage;
M2.L = 32;
loadsym P0, InputData;
// storage for all precomputed branch metrics
loadsym P1, BranchStorage;
R6 = 0; R0 = 0; // inits
R0.L = 0x0001;
R0.H = 0x0001;
[ P1 + 0 ] = R0;
R0.L = 0xffff;
R0.H = 0xffff;
[ P1 + 4 ] = R0;
R0.L = 0xffff;
R0.H = 0x0001;
[ P1 + 8 ] = R0;
R0.L = 0x0001;
R0.H = 0xffff;
[ P1 + 12 ] = R0;
R0.L = 0xffff;
R0.H = 0x0001;
[ P1 + 16 ] = R0;
R0.L = 0x0001;
R0.H = 0xffff;
[ P1 + 20 ] = R0;
R0.L = 0x0001;
R0.H = 0x0001;
[ P1 + 24 ] = R0;
R0.L = 0xffff;
R0.H = 0xffff;
[ P1 + 28 ] = R0;
R0.L = 0x0001;
R0.H = 0xffff;
[ P1 + 32 ] = R0;
R0.L = 0xffff;
R0.H = 0x0001;
[ P1 + 36 ] = R0;
R0.L = 0xffff;
R0.H = 0xffff;
[ P1 + 40 ] = R0;
R0.L = 0x0001;
R0.H = 0x0001;
[ P1 + 44 ] = R0;
R0.L = 0xffff;
R0.H = 0xffff;
[ P1 + 48 ] = R0;
R0.L = 0x0001;
R0.H = 0x0001;
[ P1 + 52 ] = R0;
R0.L = 0x0001;
R0.H = 0xffff;
[ P1 + 56 ] = R0;
R0.L = 0xffff;
R0.H = 0x0001;
[ P1 + 60 ] = R0;
P1 = 18;
LSETUP ( L$0 , L$0end ) LC0 = P1; // SymNo loop start
L$0:
// Get a symbol and leave it resident in R1
R1 = [ P0 ]; // R1=(InputData[SymNo*2+1] InputData[SymNo*2])
P0 += 4;
A0 = 0;
// I0 points to precomputed D1, D0
loadsym I0, BranchStorage;
I1 = P4; // I1 points to APM[From]
I2 = P4;
I2 += M2; // I2 points to APM[From+16]
I3 = P5; // I3 points to APM[To]
P1 = 16;
P1 += -1;
LSETUP ( L$1 , L$1end ) LC1 = P1;
// APMFrom and APMTo are in alternate
// memory banks.
R0 = [ I0 ++ ]; // load R0 = (D1 D0)
R3.L = W [ I1 ++ ]; // load RL3 = PM0
// (R1 holds current symbol)
R2.H = R2.L = SIGN(R0.H) * R1.H + SIGN(R0.L) * R1.L; // apply sum-on-sign instruction
R3.H = W [ I2 ++ ]; // now, R3 = (PM1 PM0)
L$1:
R5 = R3 +|- R2 , R4 = R3 -|+ R2 || R0 = [ I0 ++ ] || NOP;
// R5 = (PM11 PM01) R4 = (PM10 PM00)
// and load next (D1 D0)
R6 = VIT_MAX( R5 , R4 ) (ASR) || R3.L = W [ I1 ++ ] || NOP;
// do 2 ACS in parallel
// R6 = (nPM1 nPM0) and update to A0
L$1end:
R2.H = R2.L = SIGN(R0.H) * R1.H + SIGN(R0.L) * R1.L || R3.H = W [ I2 ++ ] || [ I3 ++ ] = R6;
// store new path metrics in
// two consecutive locations
R5 = R3 +|- R2 , R4 = R3 -|+ R2;
R6 = VIT_MAX( R5 , R4 ) (ASR);
[ I3 ++ ] = R6;
R7 = A0.w;
[ P2 ] = R7;
P2 += 4; // store history
FP = P4; // swap pointers From <--> To
P4 = P5;
L$0end:
P5 = FP;
// check results
loadsym I0, DecisionHistory
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x6ff2 );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0xf99f );
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x9909 );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x6666 );
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x0096 );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x6996 );
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x9309 );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x0000 );
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0xffff );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0xffff );
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0xf0ff );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0xcf00 );
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x9009 );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x07f6 );
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x6004 );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x6996 );
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x8338 );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x3443 );
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x6bd6 );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x6197 );
R0.L = W [ I0 ++ ]; DBGA ( R0.L , 0x6c26 );
R0.H = W [ I0 ++ ]; DBGA ( R0.H , 0x0990 );
pass
.data
.align 8
InputData:
.dw 0x0001
.dw 0x0001
.dw 0xffff
.dw 0xfffb
.dw 0x0005
.dw 0x0001
.dw 0xfffd
.dw 0xfffd
.dw 0x0005
.dw 0x0001
.dw 0x0001
.dw 0x0001
.dw 0xffff
.dw 0xfffb
.dw 0x0005
.dw 0x0001
.dw 0xfffd
.dw 0xfffd
.dw 0x0005
.dw 0x0001
.align 8
APMFrom:
.dw 0xc000
.dw 0x0
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.dw 0xc000
.align 8
APMTo:
.space (32*8)
.align 8
BranchStorage:
.space (32*8)
.align 8
DecisionHistory:
.space (18*4)