/* DCT testbed */ /* testing 12 bit TRACE type data, stored in (nx,ny) image, assume nx and ny are multiples of 8 */ /* various code fragments taken from jpeg-5a */ #include "ana_structures.h" #include #include #include #include "defs.h" #if __sgi #include #include #endif extern struct sym_desc sym[]; extern struct sym_list *subr_sym_list[]; extern int temp_base; extern int edb_context; extern int ana_float(); extern char *find_sym_name(); extern int ana_type_size[]; extern int vfix_top, num_ana_subr, next_user_subr_num; extern float float_arg(); extern double double_arg(); /* zag and zig are used in the re-ordering of the DCT coefficients, they are inverses of one another, zag is used in the compress side */ /* zag[i] is the natural-order position of the i'th element of zigzag order. */ static const int zag[64] = { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 }; /* zig[i] is the zigzag-order position of the i'th element of a DCT block */ /* read in natural order (left to right, top to bottom). */ static const int zig[64] = { 0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42, 3, 8, 12, 17, 25, 30, 41, 43, 9, 11, 18, 24, 31, 40, 44, 53, 10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60, 21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63 }; /* zigtrans is the transpose of zig and is used as a destination pointer in some of the forward dct's */ static int zigtrans[64] = { 0, 2, 3, 9, 10, 20, 21, 35, 1, 4, 8, 11, 19, 22, 34, 36, 5, 7, 12, 18, 23, 33, 37, 48, 6, 13, 17, 24, 32, 38, 47, 49, 14, 16, 25, 31, 39, 46, 50, 57, 15, 26, 30, 40, 45, 51, 56, 58, 27, 29, 41, 44, 52, 55, 59, 62, 28, 42, 43, 53, 54, 60, 61, 63, }; static float aansf[8] = { 1.0, 1.387039845, 1.306562965, 1.175875602, 1.0, 0.785694958, 0.541196100, 0.275899379}; #define huff_EXTEND(x,s) ((x) < extend_test[s] ? (x) + extend_offset[s] : (x)) static const int extend_test[16] = /* entry n is 2**(n-1) */ { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 }; static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */ { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 }; static void dct_cell(), rdct_cell(), int_dct_cell(), exp_dct_cell(), exp2_dct_cell(); /* #ifdef __alpha */ void exp3_dct_cell(), exp4_dct_cell(), exp5_dct_cell(); /* #endif */ /* workspace (ws) for the dct calcs, and a place for the modified q tables */ /* should malloc these only if we need them ... */ static float ws[64], fqtbl[64], jpeg_bias = 2048., bias = 2048.5; static int wsint[64], intqtb[64]; static short inverseqt[64]; static short wshort[64], wshort2[64]; static short dctw[64] = { 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 11363, 9633, 6436, 2260, -2260, -6436, -9633, -11363, 10703, 4433, -4433,-10703,-10703, -4433, 4433, 10703, 9633, -2260,-11363, -6436, 6436, 11363, 2260, -9633, 8192, -8192, -8192, 8192, 8192, -8192, -8192, 8192, 6436,-11363, 2260, 9633, -9633, -2260, 11363, -6436, 4433,-10703, 10703, -4433, -4433, 10703, -10703, 4433, 2260, -6436, 9633,-11363, 11363, -9633, 6436, -2260 }; /*------------------------------------------------------------------------*/ int fdct_exp(image, nx, ny, qtable, dct_array) /* int dct, quant, and re-order */ /* exp. code for testing trace methods out */ /* nx and ny must be multiples of 8 (check in calling routine) */ short *qtable, *image, *dct_array; int nx, ny; { int n, i, ncx, iq, ystride, icx, icy, row, col; short *dctstart; /* condition the q table for the output of the dct, taken from jcdctmgr.c */ /* for this method, we used to multiply by 8, but now we do the extra 8 divide in the dct step to be able to store a 16 bit result right away */ for (i = 0; i < 64; i++) { intqtb[i] = (int) qtable[i]; /* just copy into I*4 */ } /* break out the cells and call dct_cell for each */ n = nx*ny/64; /* number of cells */ ncx = nx/8; ystride = nx; dctstart = dct_array; for (i=0; i < n; i++) { /* note that we access image by 8x8 blocks but dct is stored seq. */ icx = i%ncx; icy = i/ncx; iq = icx*8 + icy*8*ystride; exp_dct_cell(&image[iq], ystride, dctstart); dctstart += 64; } return 1; /* success return */ } /*--------------------------------------------------------------------------*/ int fdct_exp2(image, nx, ny, qtable, dct_array,test_flag) /* int dct, quant, and re-order */ /* exp. code for testing trace methods out */ /* nx and ny must be multiples of 8 (check in calling routine) */ short *qtable, *image, *dct_array; int nx, ny, test_flag; { int n, i, ncx, iq, ystride, icx, icy, row, col; short *dctstart; /* condition the q table for the output of the dct, taken from jcdctmgr.c */ /* for this method, we used to multiply by 8, but now we do the extra 8 divide in the dct step to be able to store a 16 bit result right away */ for (i = 0; i < 64; i++) { intqtb[i] = (int) qtable[i]; /* just copy into I*4 */ } /* break out the cells and call dct_cell for each */ n = nx*ny/64; /* number of cells */ ncx = nx/8; ystride = nx; dctstart = dct_array; for (i=0; i < n; i++) { /* note that we access image by 8x8 blocks but dct is stored seq. */ icx = i%ncx; icy = i/ncx; iq = icx*8 + icy*8*ystride; exp2_dct_cell(&image[iq], ystride, dctstart, test_flag); dctstart += 64; } return 1; /* success return */ } /*--------------------------------------------------------------------------*/ /* the exp3 routines are only defined for the alpha, they use 64 bit shifts and such */ /* #ifdef __alpha */ int ana_trace_expdct3(narg,ps) /* function for exp dct tables */ /* dct_array = ana_trace_expdct( image, qtable) */ int narg, ps[]; /* calls trace_dct with mode = 5 for int dct */ { return trace_dct(narg,ps, 5); } /*---------------------------------------------------------------------------*/ int fdct_exp3(image, nx, ny, qtable, dct_array,test_flag) /* int dct, quant, and re-order */ /* exp. code for testing trace methods out */ /* nx and ny must be multiples of 8 (check in calling routine) */ short *qtable, *image, *dct_array; int nx, ny, test_flag; { int n, i, ncx, iq, ystride, icx, icy, row, col; short *dctstart; printf("in fdct_exp3\n"); /* condition the q table for the output of the dct, taken from jcdctmgr.c */ /* the exp3 version uses multiplies for the QT */ for (i = 0; i < 64; i++) { inverseqt[i] = (short) ((0x00004000)/(int) qtable[i]); /* (2^14)/qtable */ } /* break out the cells and call dct_cell for each */ n = nx*ny/64; /* number of cells */ ncx = nx/8; ystride = nx; dctstart = dct_array; for (i=0; i < n; i++) { /* note that we access image by 8x8 blocks but dct is stored seq. */ icx = i%ncx; icy = i/ncx; iq = icx*8 + icy*8*ystride; exp3_dct_cell(&image[iq], ystride, dctstart, test_flag); dctstart += 64; } return 1; /* success return */ } /*--------------------------------------------------------------------------*/ int fdct_exp5(image, nx, ny, qtable, dct_array,test_flag) /* int dct, quant, and re-order */ /* exp. code for testing trace methods out */ /* nx and ny must be multiples of 8 (check in calling routine) */ short *qtable, *image, *dct_array; int nx, ny, test_flag; { int n, i, ncx, iq, ystride, icx, icy, row, col; short *dctstart; printf("in fdct_exp3\n"); /* condition the q table for the output of the dct, taken from jcdctmgr.c */ /* the exp5 version uses multiplies for the QT */ for (i = 0; i < 64; i++) { inverseqt[i] = (short) ((0x00004000)/(int) qtable[i]); /* (2^14)/qtable */ } /* break out the cells and call dct_cell for each */ n = nx*ny/64; /* number of cells */ ncx = nx/8; ystride = nx; dctstart = dct_array; for (i=0; i < n; i++) { /* note that we access image by 8x8 blocks but dct is stored seq. */ icx = i%ncx; icy = i/ncx; iq = icx*8 + icy*8*ystride; exp5_dct_cell(&image[iq], ystride, dctstart, test_flag); dctstart += 64; } return 1; /* success return */ } /*--------------------------------------------------------------------------*/ void exp3_dct_cell(start, ystride, dctstart, test_flag) /* does dct and quant. for one cell */ /* simple 1-D DCT's done as sum, sort of emulating the 16 bit operands and 48 bit accumulator in the IP */ /* differs from exp2_dct_cell in treatment of quantization, test multiply rather than divide */ short *start, *dctstart; int ystride, test_flag; { int i, j, n; short int_bias = 2048; /* note this is I*2 in this routine */ /* remove bias and stash in short workspace array wshort */ { /* no problem doing this in I*2 */ register short *wsptr; register short *elemptr; wsptr = wshort; elemptr = start; n = ystride - 8; for (i=0; i < 8; i++) { *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; elemptr += n; } } /* now the actual dct, use pre-generated weights, 8x8 array although we don't actually use the 0 row because it is constant, note that we take elements from wshort and put them in wshort2, row 4 is constant except for sign, we just use it so that it looks like all the others */ /* Pass 1: process rows. */ { int nq = 8; register short *wsptr = wshort; register short *wsptr2 = wshort2, *p, *pw; int acc; while (nq--) { /* actually do 8 sets of 8 multiply/sums for each result */ /* dc term, could use weights here and then it would look like the others; instead, just add and shift left by 1 */ p = wsptr; n = 8; acc = 0; while(n--) acc += (int) *p++; wsptr2[0] = (short) (acc << 1); /* start weight pointer at 8 because dc term was done different */ p = wsptr; n = 8; acc = 2048; pw = &dctw[8]; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[1*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[2*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[3*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[4*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[5*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[6*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[7*8] = (short) (acc >> 12); wsptr2++; wsptr += 8; /* advance pointer to next row */ } } /* output these results for a test if test_flag set */ if (test_flag) { register short *wsptr2 = wshort2; register short *dctptr = dctstart; for (i=0; i < 64; i++) { *dctptr++ = *wsptr2++; } return; } /* Pass 2: process columns. */ { /* here we go from wshort2 to wshort */ register short *wsptr = wshort; register short *wsptr2 = wshort2, *p, *pw, *qpt; int *z = zigtrans; long long acc; int nq = 8; qpt = inverseqt; while (nq--) { /* actually do 8 sets of 8 multiply/sums for each result */ /* almost the same as row pass, except for buffers switched and a different shift amount to get final results */ p = wsptr2; n = 8; acc = 0; while(n--) acc += (long long) *p++; dctstart[*z++] = (short) ( ((acc * (long long) qpt[0]) + 0x20000) >> 18); p = wsptr2; n = 8; acc = 0; pw = &dctw[8]; while(n--) acc += (long long)*p++ * (long long)*pw++; /* now include the quant. value (inverted) and round */ acc = acc * (long long) qpt[1*8]; acc = acc + 0x40000000; dctstart[*z++] = (short) ( acc >> 31); p = wsptr2; n = 8; acc = 0; while(n--) acc += (long long)*p++ * (long long)*pw++; acc = acc * (long long) qpt[2*8]; acc = acc + 0x40000000; dctstart[*z++] = (short) (acc >> 31); p = wsptr2; n = 8; acc = 0; while(n--) acc += (long long)*p++ * (long long)*pw++; acc = acc * (long long) qpt[3*8]; acc = acc + 0x40000000; dctstart[*z++] = (short) (acc >> 31); p = wsptr2; n = 8; acc = 0; while(n--) acc += (long long)*p++ * (long long)*pw++; acc = acc * (long long) qpt[4*8]; acc = acc + 0x40000000; dctstart[*z++] = (short) (acc >> 31); p = wsptr2; n = 8; acc = 0; while(n--) acc += (long long)*p++ * (long long)*pw++; acc = acc * (long long) qpt[5*8]; acc = acc + 0x40000000; dctstart[*z++] = (short) (acc >> 31); p = wsptr2; n = 8; acc = 0; while(n--) acc += (long long)*p++ * (long long)*pw++; acc = acc * (long long) qpt[6*8]; acc = acc + 0x40000000; dctstart[*z++] = (short) (acc >> 31); p = wsptr2; n = 8; acc = 0; while(n--) acc += (long long)*p++ * (long long)*pw++; acc = acc * (long long) qpt[7*8]; acc = acc + 0x40000000; dctstart[*z++] = (short) (acc >> 31); wsptr2 += 8; qpt++; /* advance pointer to next column */ } } /* the zigzag and quantization were included in the second pass above */ return; } /*---------------------------------------------------------------------------*/ void exp5_dct_cell(start, ystride, dctstart, test_flag) /* does dct and quant. for one cell */ /* simple 1-D DCT's done as sum, sort of emulating the 16 bit operands and 48 bit accumulator in the IP */ /* differs from exp2_dct_cell in treatment of quantization, test multiply rather than divide */ /* the exp5_dct is similar to exp3_dct_cell but it mimics the DEP more closely, the acc for the column gets a 16 bit result which is then multiplied by the inverted QT; exp3_dct_cell did not use an intermediate 16 bit result, the DEP has to */ short *start, *dctstart; int ystride, test_flag; { int i, j, n; short int_bias = 2048; /* note this is I*2 in this routine */ /* remove bias and stash in short workspace array wshort */ { /* no problem doing this in I*2 */ register short *wsptr; register short *elemptr; wsptr = wshort; elemptr = start; n = ystride - 8; for (i=0; i < 8; i++) { *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; elemptr += n; } } /* now the actual dct, use pre-generated weights, 8x8 array although we don't actually use the 0 row because it is constant, note that we take elements from wshort and put them in wshort2, row 4 is constant except for sign, we just use it so that it looks like all the others */ /* Pass 1: process rows. */ { int nq = 8; register short *wsptr = wshort; register short *wsptr2 = wshort2, *p, *pw; int acc; while (nq--) { /* actually do 8 sets of 8 multiply/sums for each result */ /* dc term, could use weights here and then it would look like the others; instead, just add and shift left by 1 */ p = wsptr; n = 8; acc = 0; while(n--) acc += (int) *p++; wsptr2[0] = (short) (acc << 1); /* start weight pointer at 8 because dc term was done different */ p = wsptr; n = 8; acc = 2048; pw = &dctw[8]; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[1*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[2*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[3*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[4*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[5*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[6*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[7*8] = (short) (acc >> 12); wsptr2++; wsptr += 8; /* advance pointer to next row */ } } /* output these results for a test if test_flag set */ if (test_flag) { register short *wsptr2 = wshort2; register short *dctptr = dctstart; for (i=0; i < 64; i++) { *dctptr++ = *wsptr2++; } return; } /* Pass 2: process columns. */ { /* here we go from wshort2 to wshort */ register short *wsptr = wshort; register short *wsptr2 = wshort2, *p, *pw, *qpt; short acc16; int *z = zigtrans; int acc; int nq = 8; qpt = inverseqt; while (nq--) { /* actually do 8 sets of 8 multiply/sums for each result */ /* almost the same as row pass, except for buffers switched and a different shift amount to get final results */ p = wsptr2; n = 8; acc = 8; while(n--) acc += (int) *p++; acc16 = (short) (acc >> 4); dctstart[*z++] = (short) ( (((int) acc16 * (int) qpt[0]) + 0x2000) >> 14); p = wsptr2; n = 8; acc = 65536; pw = &dctw[8]; while(n--) acc += (int)*p++ * (int)*pw++; acc16 = (short) (acc >> 17); dctstart[*z++] = (short) ( (((int) acc16 * (int) qpt[1*8]) + 0x2000) >> 14); p = wsptr2; n = 8; acc = 65536; while(n--) acc += (int)*p++ * (int)*pw++; acc16 = (short) (acc >> 17); dctstart[*z++] = (short) ( (((int) acc16 * (int) qpt[2*8]) + 0x2000) >> 14); p = wsptr2; n = 8; acc = 65536; while(n--) acc += (int)*p++ * (int)*pw++; acc16 = (short) (acc >> 17); dctstart[*z++] = (short) ( (((int) acc16 * (int) qpt[3*8]) + 0x2000) >> 14); p = wsptr2; n = 8; acc = 65536; while(n--) acc += (int)*p++ * (int)*pw++; acc16 = (short) (acc >> 17); dctstart[*z++] = (short) ( (((int) acc16 * (int) qpt[4*8]) + 0x2000) >> 14); p = wsptr2; n = 8; acc = 65536; while(n--) acc += (int)*p++ * (int)*pw++; acc16 = (short) (acc >> 17); dctstart[*z++] = (short) ( (((int) acc16 * (int) qpt[5*8]) + 0x2000) >> 14); p = wsptr2; n = 8; acc = 65536; while(n--) acc += (int)*p++ * (int)*pw++; acc16 = (short) (acc >> 17); dctstart[*z++] = (short) ( (((int) acc16 * (int) qpt[6*8]) + 0x2000) >> 14); p = wsptr2; n = 8; acc = 65536; while(n--) acc += (int)*p++ * (int)*pw++; acc16 = (short) (acc >> 17); dctstart[*z++] = (short) ( (((int) acc16 * (int) qpt[7*8]) + 0x2000) >> 14); wsptr2 += 8; qpt++; /* advance pointer to next column */ } } /* the zigzag and quantization were included in the second pass above */ return; } /*---------------------------------------------------------------------------*/ int ana_trace_expdct4(narg,ps) /* function for exp dct tables */ /* dct_array = ana_trace_expdct( image, qtable) */ int narg, ps[]; /* calls trace_dct with mode = 6 for int dct */ { return trace_dct(narg,ps, 6); } /*---------------------------------------------------------------------------*/ int ana_trace_expdct5(narg,ps) /* function for exp dct tables */ /* dct_array = ana_trace_expdct( image, qtable) */ int narg, ps[]; /* calls trace_dct with mode = 7 for int dct */ { return trace_dct(narg,ps, 7); } /*---------------------------------------------------------------------------*/ static short dctwqt[512]; int fdct_exp4(image, nx, ny, qtable, dct_array,test_flag) /* int dct, quant, and re-order */ /* exp. code for testing trace methods out */ /* nx and ny must be multiples of 8 (check in calling routine) */ short *qtable, *image, *dct_array; int nx, ny, test_flag; { int n, i, j, ncx, iq, ystride, icx, icy, row, col, acc; short *dctstart, *pwqt = dctwqt, *pw = dctw, *qpt; printf("in fdct_exp4\n"); /* first get an inverse Q table */ for (i = 0; i < 64; i++) { inverseqt[i] = (short) ((0x00004000)/(int) qtable[i]); /* (2^14)/qtable */ } /* the exp4 version combines QT and weights into a 512 table for second pass */ for (j = 0; j < 8; j++) { pw = dctw; qpt = inverseqt + j; for (i = 0; i < 8; i++) { n = 8; while (n--) { acc = (int) *pw++ * (int) *qpt; acc = (acc + 0x2000) >> 14; *pwqt++ = (short) acc; } qpt = qpt + 8; } } /* break out the cells and call dct_cell for each */ n = nx*ny/64; /* number of cells */ ncx = nx/8; ystride = nx; dctstart = dct_array; for (i=0; i < n; i++) { /* note that we access image by 8x8 blocks but dct is stored seq. */ icx = i%ncx; icy = i/ncx; iq = icx*8 + icy*8*ystride; exp4_dct_cell(&image[iq], ystride, dctstart, test_flag); dctstart += 64; } return 1; /* success return */ } /*--------------------------------------------------------------------------*/ void exp4_dct_cell(start, ystride, dctstart, test_flag) /* does dct and quant. for one cell */ /* simple 1-D DCT's done as sum, sort of emulating the 16 bit operands and 48 bit accumulator in the IP */ /* differs from exp2_dct_cell in treatment of quantization, test multiply rather than divide */ short *start, *dctstart; int ystride, test_flag; { int i, j, n; short int_bias = 2048; /* note this is I*2 in this routine */ /* remove bias and stash in short workspace array wshort */ { /* no problem doing this in I*2 */ register short *wsptr; register short *elemptr; wsptr = wshort; elemptr = start; n = ystride - 8; for (i=0; i < 8; i++) { *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; elemptr += n; } } /* now the actual dct, use pre-generated weights, 8x8 array although we don't actually use the 0 row because it is constant, note that we take elements from wshort and put them in wshort2, row 4 is constant except for sign, we just use it so that it looks like all the others */ /* Pass 1: process rows. */ { int nq = 8; register short *wsptr = wshort; register short *wsptr2 = wshort2, *p, *pw; int acc; while (nq--) { /* actually do 8 sets of 8 multiply/sums for each result */ /* dc term, could use weights here and then it would look like the others; instead, just add and shift left by 1 */ p = wsptr; n = 8; acc = 0; while(n--) acc += (int) *p++; wsptr2[0] = (short) (acc << 1); /* start weight pointer at 8 because dc term was done different */ p = wsptr; n = 8; acc = 2048; pw = &dctw[8]; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[1*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[2*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[3*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[4*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[5*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[6*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[7*8] = (short) (acc >> 12); wsptr2++; wsptr += 8; /* advance pointer to next row */ } } /* output these results for a test if test_flag set */ if (test_flag) { register short *wsptr2 = wshort2; register short *dctptr = dctstart; for (i=0; i < 64; i++) { *dctptr++ = *wsptr2++; } return; } /* Pass 2: process columns. */ { /* here we go from wshort2 to wshort */ register short *wsptr = wshort; register short *wsptr2 = wshort2, *p, *pwqt = dctwqt; int *z = zigtrans; int acc; int nq = 8, mq; while (nq--) { /* actually do 8 sets of 8 multiply/sums for each result */ /* use a 512 weight table which combines weights and QT */ mq = 8; while (mq--) { p = wsptr2; n = 8; acc = 65536; while(n--) acc += (int) *p++ * (int) *pwqt++; dctstart[*z++] = (short) (acc >> 17); } wsptr2 += 8; } } /* the zigzag and quantization were included in the second pass above */ return; } /*---------------------------------------------------------------------------*/ /* #endif */ int fdct_int(image, nx, ny, qtable, dct_array) /* int dct, quant, and re-order */ /* this uses the slow int method for 12 bit pixels */ /* nx and ny must be multiples of 8 (check in calling routine) */ short *qtable, *image, *dct_array; int nx, ny; { int n, i, ncx, iq, ystride, icx, icy, row, col; short *dctstart; /* condition the q table for the output of the dct, taken from jcdctmgr.c */ /* for this method, we just multiply by 8 */ for (i = 0; i < 64; i++) { intqtb[i] = 8 * (int) qtable[i]; } /* break out the cells and call dct_cell for each */ n = nx*ny/64; /* number of cells */ ncx = nx/8; ystride = nx; dctstart = dct_array; for (i=0; i < n; i++) { /* note that we access image by 8x8 blocks but dct is stored seq. */ icx = i%ncx; icy = i/ncx; iq = icx*8 + icy*8*ystride; int_dct_cell(&image[iq], ystride, dctstart); dctstart += 64; } return 1; /* success return */ } /*-------------------------------------------------------------------------*/ int fdct_fp(image, nx, ny, qtable, dct_array) /* fp dct, quant, and re-order */ /* nx and ny must be multiples of 8 (check in calling routine) */ float *dct_array; short *qtable, *image; int nx, ny; { int n, i, ncx, iq, ystride, icx, icy, row, col; float *dctstart; /* condition the q table for the output of the dct, taken from jcdctmgr.c */ /* this is set up inverted so we multiply rather than divide later */ for (i = 0; i < 64; i++) { row = zag[i] >> 3; col = zag[i] & 7; fqtbl[i] = (0.125 / (((float) qtable[i] * aansf[row] * aansf[col]))); } /* break out the cells and call dct_cell for each */ n = nx*ny/64; /* number of cells */ ncx = nx/8; ystride = nx; dctstart = dct_array; for (i=0; i < n; i++) { /* note that we access image by 8x8 blocks but dct is stored seq. */ icx = i%ncx; icy = i/ncx; iq = icx*8 + icy*8*ystride; /* printf("iq = %d, dctstart - dct_array = %d\n", iq, dctstart - dct_array);*/ dct_cell(&image[iq], ystride, dctstart); dctstart += 64; } return 1; /* success return */ } /*---------------------------------------------------------------------------*/ int rdct_fp(image, nx, ny, qtable, dct_array) /* reverse fp dct, quant, and re-order included, result is I*2 */ /* nx and ny must be multiples of 8 (check in calling routine) */ float *dct_array; short *qtable, *image; int nx, ny; { int n, i, ncx, iq, ystride, icx, icy, row, col; float *dctstart; /* condition the q table for the dct's we created, must be same input qtable */ i = 0; for (i = 0; i < 64; i++) { row = zag[i] >> 3; col = zag[i] & 7; fqtbl[i] = (float) qtable[i] * aansf[row] * aansf[col] * 0.125; } /* break out the cells and call dct_cell for each */ n = nx*ny/64; /* number of cells */ ncx = nx/8; ystride = nx; dctstart = dct_array; for (i=0; i < n; i++) { /* note that we access image by 8x8 blocks but dct is stored seq. */ icx = i%ncx; icy = i/ncx; iq = icx*8 + icy*8*ystride; /*printf("iq = %d, dctstart - dct_array = %d\n", iq, dctstart - dct_array);*/ rdct_cell(&image[iq], ystride, dctstart); dctstart += 64; } return 1; /* success return */ } /*---------------------------------------------------------------------------*/ void dct_cell(start, ystride, dctstart) /* does dct and quant. for one cell */ short *start; float *dctstart; int ystride; { int i, j, n; float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; float tmp10, tmp11, tmp12, tmp13; float z1, z2, z3, z4, z5, z11, z13; /* remove bias and stash in workspace array ws */ { register float *wsptr; register short *elemptr; wsptr = ws; elemptr = start; n = ystride - 8; for (i=0; i < 8; i++) { *wsptr++ = (float) *elemptr++ - jpeg_bias; *wsptr++ = (float) *elemptr++ - jpeg_bias; *wsptr++ = (float) *elemptr++ - jpeg_bias; *wsptr++ = (float) *elemptr++ - jpeg_bias; *wsptr++ = (float) *elemptr++ - jpeg_bias; *wsptr++ = (float) *elemptr++ - jpeg_bias; *wsptr++ = (float) *elemptr++ - jpeg_bias; *wsptr++ = (float) *elemptr++ - jpeg_bias; elemptr += n; } } /* now the actual dct */ /* Pass 1: process rows. */ { register float *wsptr = ws; int nq = 8; while (nq--) { tmp0 = wsptr[0] + wsptr[7]; tmp7 = wsptr[0] - wsptr[7]; tmp1 = wsptr[1] + wsptr[6]; tmp6 = wsptr[1] - wsptr[6]; tmp2 = wsptr[2] + wsptr[5]; tmp5 = wsptr[2] - wsptr[5]; tmp3 = wsptr[3] + wsptr[4]; tmp4 = wsptr[3] - wsptr[4]; /* Even part */ tmp10 = tmp0 + tmp3; /* phase 2 */ tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; wsptr[0] = tmp10 + tmp11; /* phase 3 */ wsptr[4] = tmp10 - tmp11; z1 = (tmp12 + tmp13) * ( 0.707106781); /* c4 */ wsptr[2] = tmp13 + z1; /* phase 5 */ wsptr[6] = tmp13 - z1; /* Odd part */ tmp10 = tmp4 + tmp5; /* phase 2 */ tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; /* The rotator is modified from fig 4-8 to avoid extra negations. */ z5 = (tmp10 - tmp12) * ( 0.382683433); /* c6 */ z2 = ( 0.541196100) * tmp10 + z5; /* c2-c6 */ z4 = ( 1.306562965) * tmp12 + z5; /* c2+c6 */ z3 = tmp11 * ( 0.707106781); /* c4 */ z11 = tmp7 + z3; /* phase 5 */ z13 = tmp7 - z3; wsptr[5] = z13 + z2; /* phase 6 */ wsptr[3] = z13 - z2; wsptr[1] = z11 + z4; wsptr[7] = z11 - z4; wsptr += 8; /* advance pointer to next row */ } } /* Pass 2: process columns. */ { register float *wsptr = ws; int nq = 8; while (nq--) { tmp0 = wsptr[0] + wsptr[8*7]; tmp7 = wsptr[0] - wsptr[8*7]; tmp1 = wsptr[8] + wsptr[8*6]; tmp6 = wsptr[8] - wsptr[8*6]; tmp2 = wsptr[8*2] + wsptr[8*5]; tmp5 = wsptr[8*2] - wsptr[8*5]; tmp3 = wsptr[8*3] + wsptr[8*4]; tmp4 = wsptr[8*3] - wsptr[8*4]; /* Even part */ tmp10 = tmp0 + tmp3; /* phase 2 */ tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; wsptr[0] = tmp10 + tmp11; /* phase 3 */ wsptr[8*4] = tmp10 - tmp11; z1 = (tmp12 + tmp13) * ( 0.707106781); /* c4 */ wsptr[8*2] = tmp13 + z1; /* phase 5 */ wsptr[8*6] = tmp13 - z1; /* Odd part */ tmp10 = tmp4 + tmp5; /* phase 2 */ tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; /* The rotator is modified from fig 4-8 to avoid extra negations. */ z5 = (tmp10 - tmp12) * ( 0.382683433); /* c6 */ z2 = ( 0.541196100) * tmp10 + z5; /* c2-c6 */ z4 = ( 1.306562965) * tmp12 + z5; /* c2+c6 */ z3 = tmp11 * ( 0.707106781); /* c4 */ z11 = tmp7 + z3; /* phase 5 */ z13 = tmp7 - z3; wsptr[8*5] = z13 + z2; /* phase 6 */ wsptr[8*3] = z13 - z2; wsptr[8*1] = z11 + z4; wsptr[8*7] = z11 - z4; wsptr++; /* advance pointer to next column */ } } /* quantize and re-order */ /* for this fp test, we do not convert to int but save the "quantized" fp DCT for subsequent scrutiny */ { register float *wsptr, *dctptr; wsptr = ws; dctptr = dctstart; for (i=0; i < 64; i++) { *dctptr++ = wsptr[zag[i]] * fqtbl[i]; } } } /*---------------------------------------------------------------------------*/ void int_dct_cell(start, ystride, dctstart) /* does dct and quant. for one cell */ short *start, *dctstart; int ystride; { int i, j, n; int int_bias = 2048; int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int tmp10, tmp11, tmp12, tmp13; int z1, z2, z3, z4, z5; #define FIX_0_298631336 ((int) 2446) #define FIX_0_390180644 ((int) 3196) #define FIX_0_541196100 ((int) 4433) #define FIX_0_765366865 ((int) 6270) #define FIX_0_899976223 ((int) 7373) #define FIX_1_175875602 ((int) 9633) #define FIX_1_501321110 ((int) 12299) #define FIX_1_847759065 ((int) 15137) #define FIX_1_961570560 ((int) 16069) #define FIX_2_053119869 ((int) 16819) #define FIX_2_562915447 ((int) 20995) #define FIX_3_072711026 ((int) 25172) #define CONST_BITS 13 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ /* some of the jpeg-5a macros are handy, note SHIFT_TEMPS is blank */ #define SHIFT_TEMPS #define RIGHT_SHIFT(x,shft) ((x) >> (shft)) #define ONE ((int) 1) #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) /* remove bias and stash in workspace array ws */ { register int *wsptr; register short *elemptr; wsptr = wsint; elemptr = start; n = ystride - 8; for (i=0; i < 8; i++) { *wsptr++ = (int) *elemptr++ - int_bias; *wsptr++ = (int) *elemptr++ - int_bias; *wsptr++ = (int) *elemptr++ - int_bias; *wsptr++ = (int) *elemptr++ - int_bias; *wsptr++ = (int) *elemptr++ - int_bias; *wsptr++ = (int) *elemptr++ - int_bias; *wsptr++ = (int) *elemptr++ - int_bias; *wsptr++ = (int) *elemptr++ - int_bias; elemptr += n; } } /* now the actual dct */ /* Pass 1: process rows. */ { register int *wsptr = wsint; int nq = 8; while (nq--) { tmp0 = wsptr[0] + wsptr[7]; tmp7 = wsptr[0] - wsptr[7]; tmp1 = wsptr[1] + wsptr[6]; tmp6 = wsptr[1] - wsptr[6]; tmp2 = wsptr[2] + wsptr[5]; tmp5 = wsptr[2] - wsptr[5]; tmp3 = wsptr[3] + wsptr[4]; tmp4 = wsptr[3] - wsptr[4]; /* Even part */ tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; /* we can only shift by 1 bit in each coeff. if the constants use 13, or overflows in 32 bit regs, this may lose a little */ wsptr[0] = (tmp10 + tmp11) << PASS1_BITS; wsptr[4] = (tmp10 - tmp11) << PASS1_BITS; z1 = (tmp12 + tmp13) * FIX_0_541196100; wsptr[2] = DESCALE( z1 + tmp13 * FIX_0_765366865, CONST_BITS-PASS1_BITS); wsptr[6] = DESCALE( z1 - tmp12 * FIX_1_847759065, CONST_BITS-PASS1_BITS); /* Odd part */ z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; z3 = tmp4 + tmp6; z4 = tmp5 + tmp7; z5 = (z3 + z4) * FIX_1_175875602; tmp4 = tmp4 * FIX_0_298631336; tmp5 = tmp5 * FIX_2_053119869; tmp6 = tmp6 * FIX_3_072711026; tmp7 = tmp7 * FIX_1_501321110; z1 = z1 * - FIX_0_899976223; z2 = z2 * - FIX_2_562915447; z3 = z3 * - FIX_1_961570560; z4 = z4 * - FIX_0_390180644; z3 += z5; z4 += z5; wsptr[7] = DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); wsptr[5] = DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); wsptr[3] = DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); wsptr[1] = DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); wsptr += 8; /* advance pointer to next row */ } } /* Pass 2: process columns. */ { register int *wsptr = wsint; int nq = 8; while (nq--) { tmp0 = wsptr[0] + wsptr[8*7]; tmp7 = wsptr[0] - wsptr[8*7]; tmp1 = wsptr[8] + wsptr[8*6]; tmp6 = wsptr[8] - wsptr[8*6]; tmp2 = wsptr[8*2] + wsptr[8*5]; tmp5 = wsptr[8*2] - wsptr[8*5]; tmp3 = wsptr[8*3] + wsptr[8*4]; tmp4 = wsptr[8*3] - wsptr[8*4]; /* Even part */ tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; wsptr[0] = DESCALE(tmp10 + tmp11, PASS1_BITS); wsptr[8*4] = DESCALE(tmp10 - tmp11, PASS1_BITS); z1 = (tmp12 + tmp13) * FIX_0_541196100; wsptr[8*2] = DESCALE( z1 + tmp13 * FIX_0_765366865, CONST_BITS+PASS1_BITS); wsptr[8*6] = DESCALE( z1 - tmp12 * FIX_1_847759065, CONST_BITS+PASS1_BITS); z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; z3 = tmp4 + tmp6; z4 = tmp5 + tmp7; z5 = (z3 + z4) * FIX_1_175875602; tmp4 = tmp4 * FIX_0_298631336; tmp5 = tmp5 * FIX_2_053119869; tmp6 = tmp6 * FIX_3_072711026; tmp7 = tmp7 * FIX_1_501321110; z1 = z1 * - FIX_0_899976223; z2 = z2 * - FIX_2_562915447; z3 = z3 * - FIX_1_961570560; z4 = z4 * - FIX_0_390180644; z3 += z5; z4 += z5; wsptr[8*7] = DESCALE(tmp4 + z1 + z3, CONST_BITS+PASS1_BITS); wsptr[8*5] = DESCALE(tmp5 + z2 + z4, CONST_BITS+PASS1_BITS); wsptr[8*3] = DESCALE(tmp6 + z2 + z3, CONST_BITS+PASS1_BITS); wsptr[8*1] = DESCALE(tmp7 + z1 + z4, CONST_BITS+PASS1_BITS); wsptr++; /* advance pointer to next column */ } } /* quantize and re-order */ { register int *wsptr; register short *dctptr; int qval, temp; wsptr = wsint; dctptr = dctstart; for (i=0; i < 64; i++) { /* this coded similar to the jpeg-5a code but with the zero checks */ /* a lot done for rounding */ qval = intqtb[i]; temp = wsptr[zag[i]]; if (temp < 0) { temp = -temp; temp += qval>>1; /* for rounding */ temp = temp/qval; temp = -temp; } else { temp += qval>>1; /* for rounding */ temp = temp/qval; } *dctptr++ = (short) temp; } } } /*---------------------------------------------------------------------------*/ void exp_dct_cell(start, ystride, dctstart) /* does dct and quant. for one cell */ short *start, *dctstart; int ystride; { int i, j, n; short int_bias = 2048; /* note this is I*2 in this routine */ int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int tmp10, tmp11, tmp12, tmp13; int z1, z2, z3, z4, z5; #define FIX_0_298631336 ((int) 2446) #define FIX_0_390180644 ((int) 3196) #define FIX_0_541196100 ((int) 4433) #define FIX_0_765366865 ((int) 6270) #define FIX_0_899976223 ((int) 7373) #define FIX_1_175875602 ((int) 9633) #define FIX_1_501321110 ((int) 12299) #define FIX_1_847759065 ((int) 15137) #define FIX_1_961570560 ((int) 16069) #define FIX_2_053119869 ((int) 16819) #define FIX_2_562915447 ((int) 20995) #define FIX_3_072711026 ((int) 25172) #define CONST_BITS 13 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ /* some of the jpeg-5a macros are handy, note SHIFT_TEMPS is blank */ #define SHIFT_TEMPS #define RIGHT_SHIFT(x,shft) ((x) >> (shft)) #define ONE ((int) 1) #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) /* this is experimental version, uses some I*2 rather than all I*4 */ /* remove bias and stash in short workspace array wshort */ { /* no problem doing this in I*2 */ register short *wsptr; register short *elemptr; wsptr = wshort; elemptr = start; n = ystride - 8; for (i=0; i < 8; i++) { *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; elemptr += n; } } /* now the actual dct */ /* Pass 1: process rows. */ { /* do as many as possible in I*2, results get stored in I*2 for column pass */ /* avoid 32x32 multiplies, use 16x16 to 32 (sort of emulated here by doing (int)'s on I*2's before the multiply */ register short *wsptr = wshort; register int *wsptr2 = wsint; short tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; short tmp10, tmp11, tmp12, tmp13, y1, y2, y3, y4; /* note that the z temps are I*4 while the tmp's and y's are I*2 */ int z1, z2, z3, z4, z5; int nq = 8; while (nq--) { tmp0 = wsptr[0] + wsptr[7]; /* need 13 bits here */ tmp7 = wsptr[0] - wsptr[7]; tmp1 = wsptr[1] + wsptr[6]; tmp6 = wsptr[1] - wsptr[6]; tmp2 = wsptr[2] + wsptr[5]; tmp5 = wsptr[2] - wsptr[5]; tmp3 = wsptr[3] + wsptr[4]; tmp4 = wsptr[3] - wsptr[4]; /* Even part */ tmp10 = tmp0 + tmp3; /* up to 14 bits */ tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; /* we can only shift by 1 bit in each coeff. if the constants use 13, or overflows in 32 bit regs, this may lose a little */ /* these 2 could are stored as 16 bits */ wsptr[0] = ((tmp10 + tmp11)) << PASS1_BITS; wsptr[4] = ((tmp10 - tmp11)) << PASS1_BITS; /* a 16 add and then a 16x16->32, but here we have to convert first */ z1 = (int) (tmp12 + tmp13) * FIX_0_541196100; /* keep z1 in accum, and add products below */ wsptr[2] = (short) DESCALE( z1 + (int) tmp13 * FIX_0_765366865, CONST_BITS-PASS1_BITS); wsptr[6] = (short) DESCALE( z1 - (int) tmp12 * FIX_1_847759065, CONST_BITS-PASS1_BITS); /* Odd part */ y1 = tmp4 + tmp7; /* more 16 bit adds, use 14 bits */ y2 = tmp5 + tmp6; y3 = tmp4 + tmp6; y4 = tmp5 + tmp7; z5 = (int) ((y3 + y4)) * FIX_1_175875602; z1 = (int) y1 * - FIX_0_899976223; z2 = (int) y2 * - FIX_2_562915447; z3 = (int) y3 * - FIX_1_961570560; z4 = (int) y4 * - FIX_0_390180644; z3 += z5; z4 += z5; wsptr[7] = (short) DESCALE((int) tmp4 * FIX_0_298631336 + z1 + z3, CONST_BITS-PASS1_BITS); wsptr[5] = (short) DESCALE((int) tmp5 * FIX_2_053119869 + z2 + z4, CONST_BITS-PASS1_BITS); wsptr[3] = (short) DESCALE((int) tmp6 * FIX_3_072711026 + z2 + z3, CONST_BITS-PASS1_BITS); wsptr[1] = (short) DESCALE((int) tmp7 * FIX_1_501321110 + z1 + z4, CONST_BITS-PASS1_BITS); wsptr2 += 8; wsptr += 8; /* advance pointer to next row */ } } /* Pass 2: process columns. */ { register short *wsptr = wshort; /* register int *wsptr2 = wsint; */ int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int tmp10, tmp11, tmp12, tmp13; int z1, z2, z3, z4, z5; int nq = 8; while (nq--) { tmp0 = (int) wsptr[0] + (int) wsptr[8*7]; tmp7 = (int) wsptr[0] - (int) wsptr[8*7]; tmp1 = (int) wsptr[8] + (int) wsptr[8*6]; tmp6 = (int) wsptr[8] - (int) wsptr[8*6]; tmp2 = (int) wsptr[8*2] + (int) wsptr[8*5]; tmp5 = (int) wsptr[8*2] - (int) wsptr[8*5]; tmp3 = (int) wsptr[8*3] + (int) wsptr[8*4]; tmp4 = (int) wsptr[8*3] - (int) wsptr[8*4]; /* Even part */ tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; /* to store in a 16 bit result, we do the x8 descale here instead of multiplying the q table by 8 */ wsptr[0] = DESCALE(tmp10 + tmp11, PASS1_BITS+3); wsptr[8*4] = DESCALE(tmp10 - tmp11, PASS1_BITS+3); z1 = (tmp12 + tmp13) * FIX_0_541196100; wsptr[8*2] = DESCALE(z1 + tmp13 * FIX_0_765366865, CONST_BITS+PASS1_BITS+3); wsptr[8*6] = DESCALE(z1 - tmp12 * FIX_1_847759065, CONST_BITS+PASS1_BITS+3); z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; z3 = tmp4 + tmp6; z4 = tmp5 + tmp7; z5 = (z3 + z4) * FIX_1_175875602; tmp4 = tmp4 * FIX_0_298631336; tmp5 = tmp5 * FIX_2_053119869; tmp6 = tmp6 * FIX_3_072711026; tmp7 = tmp7 * FIX_1_501321110; z1 = z1 * - FIX_0_899976223; z2 = z2 * - FIX_2_562915447; z3 = z3 * - FIX_1_961570560; z4 = z4 * - FIX_0_390180644; z3 += z5; z4 += z5; wsptr[8*7] = DESCALE(tmp4 + z1 + z3, CONST_BITS+PASS1_BITS+3); wsptr[8*5] = DESCALE(tmp5 + z2 + z4, CONST_BITS+PASS1_BITS+3); wsptr[8*3] = DESCALE(tmp6 + z2 + z3, CONST_BITS+PASS1_BITS+3); wsptr[8*1] = DESCALE(tmp7 + z1 + z4, CONST_BITS+PASS1_BITS+3); wsptr++; /* advance pointer to next column */ } } /* quantize and re-order */ { register short *wsptr; register short *dctptr; int qval, temp; wsptr = wshort; dctptr = dctstart; for (i=0; i < 64; i++) { /* this coded similar to the jpeg-5a code but without the zero checks */ /* a lot done for rounding */ qval = intqtb[i]; temp = (int) wsptr[zag[i]]; if (temp < 0) { temp = -temp; temp += qval>>1; /* for rounding */ temp = temp/qval; temp = -temp; } else { temp += qval>>1; /* for rounding */ temp = temp/qval; } *dctptr++ = (short) temp; } } } void exp2_dct_cell(start, ystride, dctstart, test_flag) /* does dct and quant. for one cell */ /* simple 1-D DCT's done as sum, sort of emulating the 16 bit operands and 48 bit accumulator in the IP */ short *start, *dctstart; int ystride, test_flag; { int i, j, n; short int_bias = 2048; /* note this is I*2 in this routine */ /* remove bias and stash in short workspace array wshort */ { /* no problem doing this in I*2 */ register short *wsptr; register short *elemptr; wsptr = wshort; elemptr = start; n = ystride - 8; for (i=0; i < 8; i++) { *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; *wsptr++ = *elemptr++ - int_bias; elemptr += n; } } /* now the actual dct, use pre-generated weights, 8x8 array although we don't actually use the 0 row because it is constant, note that we take elements from wshort and put them in wshort2, row 4 is constant except for sign, we just use it so that it looks like all the others */ /* Pass 1: process rows. */ { int nq = 8; register short *wsptr = wshort; register short *wsptr2 = wshort2, *p, *pw; int acc; while (nq--) { /* actually do 8 sets of 8 multiply/sums for each result */ /* dc term, could use weights here and then it would look like the others; instead, just add and shift left by 1 */ p = wsptr; n = 8; acc = 0; while(n--) acc += (int) *p++; wsptr2[0] = (short) (acc << 1); /* start weight pointer at 8 because dc term was done different */ p = wsptr; n = 8; acc = 2048; pw = &dctw[8]; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[1*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[2*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[3*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[4*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[5*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[6*8] = (short) (acc >> 12); p = wsptr; n = 8; acc = 2048; while(n--) acc += (int)*p++ * (int)*pw++; wsptr2[7*8] = (short) (acc >> 12); wsptr2++; wsptr += 8; /* advance pointer to next row */ } } /* output these results for a test if test_flag set */ if (test_flag) { register short *wsptr2 = wshort2; register short *dctptr = dctstart; for (i=0; i < 64; i++) { *dctptr++ = *wsptr2++; } return; } /* Pass 2: process columns. */ { /* here we go from wshort2 to wshort */ register short *wsptr = wshort; register short *wsptr2 = wshort2, *p, *pw; int acc; int nq = 8; while (nq--) { /* actually do 8 sets of 8 multiply/sums for each result */ /* almost the same as row pass, except for buffers switched and a different shift amount to get final results */ p = wsptr2; n = 8; acc = 8; while(n--) acc += (int) *p++; wsptr[0] = (short) (acc >> 4); p = wsptr2; n = 8; acc = 65536; pw = &dctw[8]; while(n--) acc += (int)*p++ * (int)*pw++; wsptr[1*8] = (short) (acc >> 17); p = wsptr2; n = 8; acc = 65536; while(n--) acc += (int)*p++ * (int)*pw++; wsptr[2*8] = (short) (acc >> 17); p = wsptr2; n = 8; acc = 65536; while(n--) acc += (int)*p++ * (int)*pw++; wsptr[3*8] = (short) (acc >> 17); p = wsptr2; n = 8; acc = 65536; while(n--) acc += (int)*p++ * (int)*pw++; wsptr[4*8] = (short) (acc >> 17); p = wsptr2; n = 8; acc = 65536; while(n--) acc += (int)*p++ * (int)*pw++; wsptr[5*8] = (short) (acc >> 17); p = wsptr2; n = 8; acc = 65536; while(n--) acc += (int)*p++ * (int)*pw++; wsptr[6*8] = (short) (acc >> 17); p = wsptr2; n = 8; acc = 65536; while(n--) acc += (int)*p++ * (int)*pw++; wsptr[7*8] = (short) (acc >> 17); wsptr2 += 8; wsptr++; /* advance pointer to next column */ } } /* quantize and re-order */ { register short *wsptr; register short *dctptr; int qval, temp; wsptr = wshort; dctptr = dctstart; for (i=0; i < 64; i++) { /* this coded similar to the jpeg-5a code but without the zero checks */ /* the coeffs are put into zig/zag order */ /* a lot done for rounding */ qval = intqtb[i]; temp = (int) wsptr[zag[i]]; if (temp < 0) { temp = -temp; temp += qval>>1; /* for rounding */ temp = temp/qval; temp = -temp; } else { temp += qval>>1; /* for rounding */ temp = temp/qval; } *dctptr++ = (short) temp; } } return; } /*---------------------------------------------------------------------------*/ void rdct_cell(start, ystride, dctstart) /* does reverse quant. and dct for one cell */ short *start; float *dctstart; int ystride; { int i, j, n; float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; float tmp10, tmp11, tmp12, tmp13; float z1, z2, z3, z4, z5, z11, z13, z10, z12; /* first de-zag and de-quantize */ { register float *wsptr, *dctptr; wsptr = ws; dctptr = dctstart; for (i=0; i < 64; i++) { wsptr[zag[i]] = *dctptr++ * fqtbl[i]; } } /* Pass 1: process columns. */ /* we don't check for columns of zeroes since this usually uses full precision */ { register float *wsptr = ws; register float *fqtptr = fqtbl; int nq = 8; while (nq--) { tmp0 = wsptr[0]; tmp1 = wsptr[8*2]; tmp2 = wsptr[8*4]; tmp3 = wsptr[8*6]; tmp10 = tmp0 + tmp2; /* phase 3 */ tmp11 = tmp0 - tmp2; tmp13 = tmp1 + tmp3; /* phases 5-3 */ tmp12 = (tmp1 - tmp3) * 1.414213562 - tmp13; /* 2*c4 */ tmp0 = tmp10 + tmp13; /* phase 2 */ tmp3 = tmp10 - tmp13; tmp1 = tmp11 + tmp12; tmp2 = tmp11 - tmp12; /* Odd part */ tmp4 = wsptr[8]; tmp5 = wsptr[8*3]; tmp6 = wsptr[8*5]; tmp7 = wsptr[8*7]; z13 = tmp6 + tmp5; /* phase 6 */ z10 = tmp6 - tmp5; z11 = tmp4 + tmp7; z12 = tmp4 - tmp7; tmp7 = z11 + z13; /* phase 5 */ tmp11 = (z11 - z13) * ( 1.414213562); /* 2*c4 */ z5 = (z10 + z12) * ( 1.847759065); /* 2*c2 */ tmp10 = ( 1.082392200) * z12 - z5; /* 2*(c2-c6) */ tmp12 = ( -2.613125930) * z10 + z5; /* -2*(c2+c6) */ tmp6 = tmp12 - tmp7; /* phase 2 */ tmp5 = tmp11 - tmp6; tmp4 = tmp10 + tmp5; wsptr[0] = tmp0 + tmp7; wsptr[8*7] = tmp0 - tmp7; wsptr[8] = tmp1 + tmp6; wsptr[8*6] = tmp1 - tmp6; wsptr[8*2] = tmp2 + tmp5; wsptr[8*5] = tmp2 - tmp5; wsptr[8*4] = tmp3 + tmp4; wsptr[8*3] = tmp3 - tmp4; fqtptr++; wsptr++; /* advance pointers to next column */ } } /* Pass 2: process rows. */ { register float *wsptr; register short *elemptr; int nq = 8; wsptr = ws; elemptr = start; while (nq--) { /* Even part */ tmp10 = wsptr[0] + wsptr[4] + bias; tmp11 = wsptr[0] - wsptr[4] + bias; tmp13 = wsptr[2] + wsptr[6]; tmp12 = (wsptr[2] - wsptr[6]) * ( 1.414213562) - tmp13; tmp0 = tmp10 + tmp13; tmp3 = tmp10 - tmp13; tmp1 = tmp11 + tmp12; tmp2 = tmp11 - tmp12; /* Odd part */ z13 = wsptr[5] + wsptr[3]; z10 = wsptr[5] - wsptr[3]; z11 = wsptr[1] + wsptr[7]; z12 = wsptr[1] - wsptr[7]; tmp7 = z11 + z13; tmp11 = (z11 - z13) * ( 1.414213562); z5 = (z10 + z12) * ( 1.847759065); /* 2*c2 */ tmp10 = ( 1.082392200) * z12 - z5; /* 2*(c2-c6) */ tmp12 = ( -2.613125930) * z10 + z5; /* -2*(c2+c6) */ tmp6 = tmp12 - tmp7; tmp5 = tmp11 - tmp6; tmp4 = tmp10 + tmp5; /* Final output stage, note bias was added in above */ /* we don't range limit since results should be near exact */ elemptr[0] = (short) (tmp0 + tmp7); elemptr[7] = (short) (tmp0 - tmp7); elemptr[1] = (short) (tmp1 + tmp6); elemptr[6] = (short) (tmp1 - tmp6); elemptr[2] = (short) (tmp2 + tmp5); elemptr[5] = (short) (tmp2 - tmp5); elemptr[4] = (short) (tmp3 + tmp4); elemptr[3] = (short) (tmp3 - tmp4); wsptr += 8; elemptr += ystride; /* to next row */ } } } /*---------------------------------------------------------------------------*/ int ana_trace_fpdct(narg,ps) /* function for fp dct tables */ /* dct_array = ana_trace_fpdct( image, qtable) */ int narg, ps[]; /* calls trace_dct with mode = 1 for fp dct */ { return trace_dct(narg,ps, 1); } /*---------------------------------------------------------------------------*/ int ana_trace_intdct(narg,ps) /* function for int dct tables */ /* dct_array = ana_trace_fpdct( image, qtable) */ int narg, ps[]; /* calls trace_dct with mode = 2 for int dct */ { return trace_dct(narg,ps, 2); } /*---------------------------------------------------------------------------*/ int ana_trace_expdct(narg,ps) /* function for exp dct tables */ /* dct_array = ana_trace_expdct( image, qtable) */ int narg, ps[]; /* calls trace_dct with mode = 3 for int dct */ { return trace_dct(narg,ps, 3); } /*---------------------------------------------------------------------------*/ int ana_trace_expdct2(narg,ps) /* function for exp dct tables */ /* dct_array = ana_trace_expdct( image, qtable) */ int narg, ps[]; /* calls trace_dct with mode = 4 for int dct */ { return trace_dct(narg,ps, 4); } /*---------------------------------------------------------------------------*/ int trace_dct(narg,ps,mode) /* internal routine for dct tables */ /* mode = 1 for fp dct, 2 for I*2 dct, 3 for experimental version */ int narg, ps[]; { short *image, *qtable, *dct_intarray; float *dct_array; int nd, nx, ny, i, j, n, dim[8], iq, result_sym, test_flag=0; struct ahead *h; /* first arg must be a 2-D I*2 array */ iq = ps[0]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 1 ) return execute_error(111); h = (struct ahead *) sym[iq].spec.array.ptr; image = (short *) ((char *)h + sizeof(struct ahead)); nd = h->ndim; if (nd != 2) return execute_error(101); nx = h->dims[0]; ny = h->dims[1]; if ( nx%8 != 0 || nx%8 != 0) { printf("image dimensions must be a multiple of 8, nx, ny = %d %d\n",nx,ny); return 0; } /* second arg must also be a I*2 array of length 64 */ iq = ps[1]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 1 ) return execute_error(111); h = (struct ahead *) sym[iq].spec.array.ptr; qtable = (short *) ((char *)h + sizeof(struct ahead)); nd = h->ndim; n = 1; for (j=0;jdims[j]; /*# of elements for nsym */ if ( n != 64) { printf("quantization table must be 64 long, n = %d\n", n); return -1; } /* an optional 3rd argument can be a test flag */ if (narg>2) if (int_arg_stat(ps[2], &test_flag) != 1) return -1; /* we return an array with all the dct's, (64, nx*ny/64) */ dim[0] = 64; dim[1] = nx*ny/64; switch (mode) { case 1: result_sym = array_scratch(3, 2, dim); h = (struct ahead *) sym[result_sym].spec.array.ptr; dct_array = (float *) ((char *)h + sizeof(struct ahead)); /* call the routine that does the work */ fdct_fp(image, nx, ny, qtable, dct_array); break; case 2: result_sym = array_scratch(1, 2, dim); h = (struct ahead *) sym[result_sym].spec.array.ptr; dct_intarray = (short *) ((char *)h + sizeof(struct ahead)); /* call the routine that does the work */ fdct_int(image, nx, ny, qtable, dct_intarray); break; case 3: result_sym = array_scratch(1, 2, dim); h = (struct ahead *) sym[result_sym].spec.array.ptr; dct_intarray = (short *) ((char *)h + sizeof(struct ahead)); /* call the routine that does the work */ fdct_exp(image, nx, ny, qtable, dct_intarray); break; case 4: result_sym = array_scratch(1, 2, dim); h = (struct ahead *) sym[result_sym].spec.array.ptr; dct_intarray = (short *) ((char *)h + sizeof(struct ahead)); /* call the routine that does the work */ fdct_exp2(image, nx, ny, qtable, dct_intarray,test_flag); break; /* #ifdef __alpha */ case 5: result_sym = array_scratch(1, 2, dim); h = (struct ahead *) sym[result_sym].spec.array.ptr; dct_intarray = (short *) ((char *)h + sizeof(struct ahead)); /* call the routine that does the work */ fdct_exp3(image, nx, ny, qtable, dct_intarray,test_flag); break; case 6: result_sym = array_scratch(1, 2, dim); h = (struct ahead *) sym[result_sym].spec.array.ptr; dct_intarray = (short *) ((char *)h + sizeof(struct ahead)); /* call the routine that does the work */ fdct_exp4(image, nx, ny, qtable, dct_intarray,test_flag); break; case 7: result_sym = array_scratch(1, 2, dim); h = (struct ahead *) sym[result_sym].spec.array.ptr; dct_intarray = (short *) ((char *)h + sizeof(struct ahead)); /* call the routine that does the work */ fdct_exp5(image, nx, ny, qtable, dct_intarray,test_flag); break; /* #endif */ } return result_sym; } /*end of ana_trace_fpdct */ /*------------------------------------------------------------------------- */ int ana_trace_rdct(narg,ps) /* function for restoring image from dct */ /* image = ana_trace_rdct(dct, nx, qtable) */ int narg, ps[]; { short *image, *qtable; float *dct_array; int nd, nx, ny, i, j, n, dim[8], iq, result_sym; struct ahead *h; /* first arg must be a F*4 array */ iq = ps[0]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 3 ) iq = ana_float(1, &iq); h = (struct ahead *) sym[iq].spec.array.ptr; dct_array = (float *) ((char *)h + sizeof(struct ahead)); nd = h->ndim; n = 1; for (j=0;jdims[j]; /*# of elements for nsym */ /* second arg is number of columns in result array */ if (int_arg_stat(ps[1], &nx) != 1) return -1; /* some restrictions, n%64 must = 0, and (n/nx)%8 must = 0 */ if ( n%64 != 0 || (n/nx)%8 != 0) { printf("dct dimensions invalid, n, nx = %d %d\n",n, nx); return 0; } ny = n/nx; /* third arg must be a I*2 array of length 64 */ iq = ps[2]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 1 ) return execute_error(111); h = (struct ahead *) sym[iq].spec.array.ptr; qtable = (short *) ((char *)h + sizeof(struct ahead)); nd = h->ndim; n = 1; for (j=0;jdims[j]; /*# of elements for nsym */ if ( n != 64) { printf("quantization table must be 64 long, n = %d\n", n); return -1; } /* we return a short array (nx, ny) */ dim[0] = nx; dim[1] = ny; result_sym = array_scratch(1, 2, dim); h = (struct ahead *) sym[result_sym].spec.array.ptr; image = (short*) ((char *)h + sizeof(struct ahead)); ana_zero(1, &result_sym); /* call the routine that does the work */ rdct_fp(image, nx, ny, qtable, dct_array); return result_sym; } /*end of ana_trace_fpdct */ /*------------------------------------------------------------------------- */ int ana_huff_table_vert(narg,ps) /* test routine for tables */ int narg, ps[]; { /* expect 4 args, first 2 input */ /* huff_table_vert, bits, values, ehufco, ehufsi */ int nd, nx, ny, dim[8], iq, maxval; short *ehufco, *ehufsi; struct ahead *h; byte *bits, *huffval, *p; /* first arg must be a byte array of length 16 */ /*printf("in ana_huff_table_vert\n");*/ iq = ps[0]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 0 ) iq = ana_byte(1, &iq); h = (struct ahead *) sym[iq].spec.array.ptr; bits = (byte *) ((char *)h + sizeof(struct ahead)); nd = h->ndim; nx = h->dims[0]; if (nd != 1 || nx != 16) { printf("HUFF_TABLE_VERT, bits must be a vector of 16\n"); return -1; } /* now the values, the length of this determines the sizes of the outputs */ iq = ps[1]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 0 ) iq = ana_byte(1, &iq); h = (struct ahead *) sym[iq].spec.array.ptr; huffval = (byte *) ((char *)h + sizeof(struct ahead)); nd = h->ndim; nx = h->dims[0]; if (nd != 1) { printf("HUFF_TABLE_VERT, values must be a vector\n"); return -1; } /* create the output arrays, since these are indexed by value, we need to find the max value to get sizes */ maxval = 0; iq = nx; p = huffval; while(iq--) { maxval = MAX(maxval, (int) *p); p++; } maxval++; /* printf("maxval in huff_table_vert = %d\n", maxval); */ nd = 1; dim[0] = maxval; if (redef_array(ps[2], 1, nd, dim) != 1 || redef_array(ps[3], 1, nd, dim) != 1) return -1; h = (struct ahead *) sym[ps[2]].spec.array.ptr; ehufco = (short *) ((char *)h + sizeof(struct ahead)); bzero( (char *) ehufco, maxval * sizeof(short) ); h = (struct ahead *) sym[ps[3]].spec.array.ptr; ehufsi = (short *) ((char *)h + sizeof(struct ahead)); bzero( (char *) ehufsi, maxval * sizeof(short) ); /* make the useful tables */ iq = huff_table_vert(bits, huffval, ehufco, ehufsi, nx); /* printf("return status from huff_table_vert = %d\n", iq); */ if (iq != 1) printf("error in huff_table_vert\n"); return iq; } /*------------------------------------------------------------------------- */ int ana_huff_table_gen(narg,ps) /* test routine for tables */ int narg, ps[]; /* calls huff_table_gen(freqin, nf, bits, huffval, nval) */ { /* expect 3 args, first is input, others are computed here */ int nd, nx, ny, dim[8], iq, *freq, nval, j; struct ahead *h, *h2; byte *bits, *huffval; /* first arg is a histogram (or frequency distribution) */ /* we make it an I*4 array */ iq = ps[0]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 2 ) iq = ana_long(1, &iq); h = (struct ahead *) sym[iq].spec.array.ptr; freq = (int *) ((char *)h + sizeof(struct ahead)); nd = h->ndim; nx = 1; for (j=0;jdims[j]; /* # of elements */ /* accept multi-dimensional histograms (16x16 common) but output vector array of same (or smaller) size */ if (nx > 256) { printf("HUFF_TABLE_GEN, histogram must be <= 256 long\n"); return -1; } /* create the output arrays */ /* redefine the second arg to be a byte array 32 long */ iq = ps[1]; dim[0] = 32; nd = 1; if (redef_array(iq, 0, nd, dim) != 1) return -1; h2 = (struct ahead *) sym[iq].spec.array.ptr; bits = (byte *) ((char *)h2 + sizeof(struct ahead)); iq = ps[2]; dim[0] = nx; nd = 1; if (redef_array(iq, 0, nd, dim) != 1) return -1; h = (struct ahead *) sym[iq].spec.array.ptr; huffval = (byte *) ((char *)h + sizeof(struct ahead)); /* done with setup, call huff_table_gen to do the work */ iq = huff_table_gen(freq, nx, bits, huffval, &nval); if (iq != 1) return iq; /* reduce the size of bits to 16 via buggering the dimension */ h2->dims[0] = 16; /* now bugger the dimension of huffval if nval is smaller than nx */ /* note that we still have the header for huffval in h */ if (nval < nx) h->dims[0] = nval; return 1; } /*------------------------------------------------------------------------- */ int huff_table_gen(freqin, nf, bits, huffval, nval) unsigned char bits[], huffval[]; int freqin[], *nval, nf; /* nf (<=256) should be the size of freqin; bits must be of length 32, the freqin counts are assumed to represent values starting with 0 and incrementing by 1, huffval will be a sorted set of values that had non-zero frequencies, the length will be output in nval and will be <= nf */ { int freq[257], codesize[257], others[257], *pq, *pf; int c1, c2; int i, j, v, n; byte *p; /* this code adapted from the Independent JPEG code */ n = 32; p = bits; while (n--) *p++ = 0; /* zero bits */ n = nf+1; pq = codesize; while (n--) *pq++ = 0; /* zero codesize */ n = nf+1; pq = others; while (n--) *pq++ = -1; /* init others */ /* make a local copy of freqin and add a ringer to the end to avoid a symbol with all 1's */ n = nf; pq = freq; pf = freqin; while (n--) *pq++ = *pf++; *pq = 1; /* this is the ringer */ for (;;) { /* Find the smallest nonzero frequency, set c1 = its symbol */ /* In case of ties, take the larger symbol number */ c1 = -1; v = 1000000000; for (i = 0; i <= nf; i++) { if (freq[i] && freq[i] <= v) { v = freq[i]; c1 = i; } } /* Find the next smallest nonzero frequency, set c2 = its symbol */ /* In case of ties, take the larger symbol number */ c2 = -1; v = 1000000000; for (i = 0; i <= nf; i++) { if (freq[i] && freq[i] <= v && i != c1) { v = freq[i]; c2 = i; } } /* Done if we've merged everything into one frequency */ if (c2 < 0) break; /* Else merge the two counts/trees */ freq[c1] += freq[c2]; freq[c2] = 0; /* Increment the codesize of everything in c1's tree branch */ codesize[c1]++; while (others[c1] >= 0) { c1 = others[c1]; codesize[c1]++; } others[c1] = c2; /* chain c2 onto c1's tree branch */ /* Increment the codesize of everything in c2's tree branch */ codesize[c2]++; while (others[c2] >= 0) { c2 = others[c2]; codesize[c2]++; } } /* Now count the number of symbols of each code length */ for (i = 0; i <= nf; i++) { if (codesize[i]) { /* The JPEG standard seems to think that this can't happen, */ /* but I'm paranoid... */ if (codesize[i] > 32) { printf("huff_table_gen, huffman code > 32 bits long! (%d)\n", codesize[i]); return -1; } bits[codesize[i]-1]++; } } /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure * Huffman procedure assigned any such lengths, we must adjust the coding. * Here is what the JPEG spec says about how this next bit works: * Since symbols are paired for the longest Huffman code, the symbols are * removed from this length category two at a time. The prefix for the pair * (which is one bit shorter) is allocated to one of the pair; then, * skipping the BITS entry for that prefix length, a code word from the next * shortest nonzero BITS entry is converted into a prefix for two code words * one bit longer. */ for (i = 31; i > 15; i--) { while (bits[i] > 0) { /* printf("caught one at i = %d, bits[i] = %d\n", i, bits[i]); */ j = i - 2; /* find length of new prefix to be used */ while (bits[j] == 0) j--; bits[i] -= 2; /* remove two symbols */ bits[i-1]++; /* one goes in this length */ bits[j+1] += 2; /* two new symbols in this length */ bits[j]--; /* symbol of this length is now a prefix */ } } /* Remove the count for the pseudo-symbol 256 from the largest codelength */ while (bits[i] == 0) /* find largest codelength still in use */ i--; bits[i]--; /* Return final symbol counts (only for lengths 0..16) */ /* Return a list of the symbols sorted by code length */ /* It's not real clear to me why we don't need to consider the codelength * changes made above, but the JPEG spec seems to think this works. */ p = huffval; for (i = 1; i <= 32; i++) { for (j = 0; j <= nf -1; j++) if (codesize[j] == i) *p++ = (byte) j; } *nval = p - huffval; /* return number of meaningful values */ return 1; } /*------------------------------------------------------------------------- */ /* EMITCODE macros */ /* code that inserts up to 16 bits in the output buffer, used for a single Huffman code in 2 places, the "inputs" are code and sq */ #define EMITCODE \ i=r1>>3; j=r1%8; /* our byte and bit addresses */ \ px = x + i; \ y = (int) (code & mask[sq]); \ r1 = r1 + sq; /* future r1 (after this entry) */ \ if ( r1 > limit ) return dct_buffer_err(limit, size); \ if ( j == 0 ) { /* this is the lucky case, 1 in 8, less work */ \ /* check for spillover to next bytes (only 1 in this case)*/ \ /* messy but works on little or big endians */ \ ks = sq - 8; \ if ( ks > 0 ) { *px++ = (byte) (y >> ks); ks = ks - 8; } \ *px = (byte) (y << (-ks) ); \ } else { /* more common, need to shift before merging */ \ ks = sq + j - 8; \ if ( ks > 0 ) { *px |= (y >> ks); ks = ks - 8; px++; \ if ( ks > 0 ) { *px++ = (byte) ( y >> ks); ks = ks - 8; } \ *px = (byte) (y << (-ks) ); \ } else *px |= (byte) (y << (-ks) ); \ } /* code that inserts up to 31 bits in the output buffer, used for the dc and ac coefficients, the "inputs" are code, sq, nbits, and temp2 */ #define EMITCODE2 \ i=r1>>3; j=r1%8; /* our byte and bit addresses */ \ px = x + i; \ z = (int) (temp2 & mask[nbits]); \ y = (int) (code & mask[sq]); \ y = y << nbits; \ y = y | z; /* entire dc or ac result */ \ size = sq + nbits; \ r1 = r1 + size; /* future r1 (after this entry) */ \ if ( r1 > limit ) return dct_buffer_err(limit, size); \ \ if ( j == 0 ) { /* this is the lucky case, 1 in 8, less work */ \ /* check for spillover to next bytes */ \ /* messy but works on little or big endians */ \ ks = size - 8; \ if ( ks > 0 ) { *px++ = (byte) (y >> ks); ks = ks - 8; \ if ( ks > 0 ) { *px++ = (byte) (y >> ks); ks = ks - 8; \ if ( ks > 0 ) { *px++ = (byte) (y >> ks); ks = ks - 8;}}} \ *px = (byte) (y << (-ks) ); \ } else { /* more common, need to shift before merging */ \ /* up to 5 bytes (including the portion of a previous one) */ \ ks = size + j - 8; \ if ( ks > 0 ) { *px |= (y >> ks); ks = ks - 8; px++; \ if ( ks > 0 ) { *px++ = (byte) ( y >> ks); ks = ks - 8; \ if ( ks > 0 ) { *px++ = (byte) ( y >> ks); ks = ks - 8; \ if ( ks > 0 ) { *px++ = (byte) (y >> ks); ks = ks - 8;}}} \ *px = (byte) (y << (-ks) ); \ } else *px |= (byte) (y << (-ks) ); \ } /*------------------------------------------------------------------------- */ int dct_buffer_err(limit,size) int limit, size; { printf("Huffman compression exceeded buffer space, limit =%d\n", limit); return -1; } /*------------------------------------------------------------------------- */ int dct_buffer_decode_err(limit) int limit; { printf("Exceeded data size during Huffman decompression , limit =%d\n", limit); return -1; } /*------------------------------------------------------------------------- */ int ana_huff_encode_dct(narg,ps) /* test routine for encoding dct's */ /* this is an ana function, it returns the # of bytes required for the compression */ int narg, ps[]; /* calls huff_encode_dct */ { /* expect 6 args n = huff_encode_dct(dct, buffer, dc_ehufco, dc_ehufsi, ac_ehufco, ac_ehufsi)*/ int nd, nx, ny, dim[8], iq, *freq, nval, n, lenx, i, j, limit, nblocks; int result_sym, blimit; struct ahead *h; unsigned short *dc_ehufco, *dc_ehufsi, *ac_ehufco, *ac_ehufsi; short last_dc = 0, *dct; byte *xbuffer, *tbuffer; byte *px, *pt; /* first arg is the dct array, usually a (64,nblocks) array but we accept any thing that is a multiple of 64 */ /* we make it an I*2 array */ iq = ps[0]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 1 ) iq = ana_word(1, &iq); h = (struct ahead *) sym[iq].spec.array.ptr; dct = (short *) ((char *)h + sizeof(struct ahead)); nd = h->ndim; n = 1; for (j=0;jdims[j]; /* # of elements */ if ( (n%64) != 0 || n == 0) { printf("HUFF_ENCODE_DCT, dct array must be 0 mod 64\n"); return -1; } nblocks = n/64; /* the second arg is a buffer to hold the result, don't worry about data type, but must be an array. We figure out total bytes available for limit */ /* this will be the buffer for the result but we also use another buffer to hold the result before checking for accidental markers (0xff's), the marker removal is done by replacing each ff with an ff followed by a zero which causes a slight expansion of the data. This could be implemented by checking within the Huffman coding but we allocate a second buffer and do it afterwards, transferring every byte from a temp. buffer to the final one. The temp buffer is freed before any other mallocs so it shouldn't cause frag. problems at least. */ iq = ps[1]; if ( sym[iq].class != 4 ) return execute_error(66); h = (struct ahead *) sym[iq].spec.array.ptr; nd = h->ndim; limit = ana_type_size[sym[iq].type]; for (j=0;jdims[j]; /* # of bytes */ xbuffer = (byte *) ((char *)h + sizeof(struct ahead)); blimit = limit * 8; /* make a bit limit also */ printf("limit/blimit = %d, %d\n", limit, blimit); /* now malloc a temp buffer of same size */ tbuffer = (byte *) malloc(limit); bzero( tbuffer, limit); /* arg 3 is the dc Huffman code table, must be 16 long */ iq = ps[2]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 1 ) iq = ana_word(1, &iq); h = (struct ahead *) sym[iq].spec.array.ptr; nd = h->ndim; nx = h->dims[0]; if (nd != 1 || nx != 16) { printf("HUFF_ENCODE_DCT, dc_ehufco array must be 16 long, it is %d\n",nx); return -1; } dc_ehufco = (unsigned short *) ((char *)h + sizeof(struct ahead)); /* arg 4 is the dc Huffman length table, must also be 16 long */ iq = ps[3]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 0 ) iq = ana_byte(1, &iq); h = (struct ahead *) sym[iq].spec.array.ptr; nd = h->ndim; nx = h->dims[0]; if (nd != 1 || nx != 16) { printf("HUFF_ENCODE_DCT, dc_ehufsi array must be 16 long\n"); return -1; } dc_ehufsi = (unsigned short *) ((char *)h + sizeof(struct ahead)); /* arg 5 is the ac Huffman code table, must be 256 long */ iq = ps[4]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 1 ) iq = ana_word(1, &iq); h = (struct ahead *) sym[iq].spec.array.ptr; nd = h->ndim; n = 1; for (j=0;jdims[j]; /*# of elements for nsym */ if (n != 256) { printf("HUFF_ENCODE_DCT, ac_ehufco array must be 256 long\n"); return -1; } ac_ehufco = (unsigned short *) ((char *)h + sizeof(struct ahead)); /* arg 6 is the ac Huffman length table, must also be 256 long */ iq = ps[5]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 0 ) iq = ana_byte(1, &iq); h = (struct ahead *) sym[iq].spec.array.ptr; nd = h->ndim; n = 1; for (j=0;jdims[j]; /*# of elements for nsym */ if (n != 256) { printf("HUFF_ENCODE_DCT, ac_ehufsi array must be 256 long\n"); return -1; } ac_ehufsi = (unsigned short *) ((char *)h + sizeof(struct ahead)); /* at present, we always have last_dc set to 0 before this call */ lenx = huff_encode_dct(dct, nblocks, tbuffer, blimit, dc_ehufco, dc_ehufsi, ac_ehufco, ac_ehufsi, last_dc); if (lenx < 0) { free(tbuffer); return lenx; } /* now transfer to xbuffer and zero pad any accidental markers */ n = i = lenx; px = xbuffer; pt = tbuffer; while (n--) { if ( (*px++ = *pt++) == 0xff ) { *px++ = 0; lenx++; if (lenx > limit) { /* trouble */ free(tbuffer); printf("huff_encode_dct, exceeded buffer limit during padding\n"); return -1; }} } /* printf("padded %d markers\n", lenx - i); */ free(tbuffer); /* put the byte count into a scalar and return with it */ result_sym = scalar_scratch(2); sym[result_sym].spec.scalar.l = lenx; return result_sym; } /*------------------------------------------------------------------------- */ int huff_encode_dct(dct, nblocks, x, limit, dc_ehufco, dc_ehufsi, ac_ehufco, ac_ehufsi, last_dc) /* takes a 12 bit DCT array organized as a 64 x nblocks array and puts the Huffman encoded result in a buffer x. The 64 coefficients for each block are in zig/zag order. The total buffer size available is limit which is only occassionally checked so there should be enough margin for a worse case coefficient (about 32 bits?) to avoid possible clobbering. The Huffman codes and their lengths for the dc terms are in dc_ehufco and dc_ehufsi (this is JPEGese) and the ac codes/lengths are in ac_ehufco and ac_ehufsi. These are tables which are 16 long for the dc case and 256 for the ac (although 14 are not used for the ac). Tables for 8 bit JPEG would be a bit shorter. The last_dc is just 0 if this is an entire image, could be the previous dc value if this routine is used to encode just part of an image. An attempt has been made to use 16 bit or 8 bit quantities (except for an accumulator) to make the relationship with an IP code more likely. But there are still many 32 bit counters in the code at present. */ /* returns # of bytes used in x */ short dct[], last_dc; /* short is 16 bits */ unsigned short dc_ehufco[], ac_ehufco[]; byte x[], dc_ehufsi[], ac_ehufsi[]; int limit, nblocks; /* but could be short */ { int i, j, k, idct, r1, size, iq, lenx; int ks; short temp, temp2, nbits, rs, sq, code; unsigned int y, z; unsigned short mask[] = { 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff,0x3ff,0x7ff,0xfff,0x1fff,0x3fff,0x7fff,0xffff}; byte *px; px = x; /* where we put the results */ r1 = 0; /* a bit count */ while (nblocks--) { /* loop over input blocks */ /* output the dc and ac components of a block */ /* parts modified from the jpeg-5a code */ /* the dct array contains the coeffs. in zig/zag order, we write to a buffer x[] which has to be big enough (we check), x is not pre-zeroed which accounts for some of the "j==0" testing done */ /* Encode the DC coefficient difference per section F.1.2.1 */ /* this involves doing a first difference and then then finding the nagnitude range of the result (i.e., log2). There are 16 ranges which use a Huffman code tabe, the remaining bits (up to 15) then follow the Huffman code for a total of up to 31 bits for a dc term */ temp = temp2 = dct[0] - last_dc; last_dc = dct[0]; /* note that dct is bumped by 64 at end of loop */ if (temp < 0) { temp = -temp; temp2--; } /* temp is abs value of input */ /* For a negative input, want temp2 = bitwise complement of abs(input) */ /* This code assumes we are on a two's complement machine */ /* Find the number of bits needed for the magnitude of the coefficient */ nbits = 0; while (temp) { nbits++; temp >>= 1; } /* this zaps temp, we use temp2 */ /* the number of bits (nbits) will be 0 to 14 for the dc term with 12 JPEG, a Huffman code for nbits is inserted into the bit stream followed by temp2. The Huffman code can be up to 16 bits long and temp2 can be up to 15 bits for a total of 31 bits. We could emit these out separately but instead combine them first in a 32 bit word. */ sq = dc_ehufsi[nbits]; code = dc_ehufco[nbits]; /* size and code */ if (sq == 0) return huff_encode_err(nbits); EMITCODE2 /* a macro, used here and for ac terms */ /* now up to 63 ac terms, dtc[1 to 63] */ /* Encode the AC coefficients per section F.1.2.2 */ rs = 0; /* rs = run length of zeros */ for (idct = 1; idct < 64; idct++) { /* change back to 64 */ if ((temp =dct[idct]) == 0) rs++; else { while (rs > 15) { /* if run length > 15, must emit special run-length-16 codes (0xF0), this is usually a long code, typically 10 or 11 bits, fairly rare */ sq = ac_ehufsi[0xF0]; code = ac_ehufco[0xF0]; /* size and code */ if (sq == 0) return huff_encode_err(nbits); EMITCODE /* defined above, good for 16 bits or less */ rs -= 16; } /* as with the dc term, get temp2 and nbits */ temp2 = temp; if (temp < 0) { temp = -temp; temp2--; } nbits = 1; /* there must be at least one 1 bit for the ac's */ while ((temp >>= 1)) nbits++; /* the JPEG symbol is 8 bits using 4 bits for the run length and 4 for the magnitude range of the next non-zero ac term, this 8 bit symbol is Huffman coded and we now emit the Huffman code plus the extra bits */ iq = (rs << 4) + nbits; sq = ac_ehufsi[iq]; code = ac_ehufco[iq]; /* size and code */ if (sq == 0) return huff_encode_err(iq); EMITCODE2 /* a macro, used here and for dc term */ rs = 0; } } /* If the last coef(s) were zero, emit an end-of-block code (#0) */ if (rs > 0) { sq = ac_ehufsi[0]; code = ac_ehufco[0]; /* size and code */ if (sq == 0) return huff_encode_err(nbits); EMITCODE /* defined above, good for 16 bits or less */ } dct = dct +64; /* point to next block */ } lenx = px - x + 1; if ( (r1%8) != 0 ) lenx++; return lenx; } /*------------------------------------------------------------------------- */ int huff_decode_err(n) int n; { printf("Huffman decoding error # %d\n", n); return -1; } /*------------------------------------------------------------------------- */ int huff_encode_err(n) int n; { printf("illegal symbol, no Huffman code for value %d\n", n); return -1; } /*------------------------------------------------------------------------- */ int ana_jpeg12_scan(narg,ps) /* test routine for decoding epic dct's */ /* an ana function, returns dct array */ int narg, ps[]; /* calls huff_decode_dct */ { /* expect 7 args dct = jpeg12_scan(nblocks, x, dc_bits, dc_huffval, ac_bits, ac_huffval, restart_interval) note that x is modified */ int nd, nx, ny, dim[8], iq, *freq, n, lenx, i, j, limit, nblocks; int nval_ac, nval_dc, blimit, restart_interval, icount; int result_sym, nb, n_restarts_found, code, iblock, stat, n_unexpected; struct ahead *h; byte *dc_bits, *dc_huffval, *ac_bits, *ac_huffval; short last_dc = 0, *dct; byte *xbuffer, *px, *pt, *pb; /* first arg is nblocks */ if (int_arg_stat(ps[0], &nblocks) != 1) return -1; if (int_arg_stat(ps[6], &restart_interval) != 1) return -1; /* setup the dct array, a (64,nblocks) I*2 array */ dim[0] = 64; dim[1] = nblocks; result_sym = array_scratch(1, 2, dim); h = (struct ahead *) sym[result_sym].spec.array.ptr; dct = (short *) ((char *)h + sizeof(struct ahead)); /* the second arg is the encoded data, don't worry about data type, but must be an array. We figure out total bytes available for limit */ iq = ps[1]; if ( sym[iq].class != 4 ) return execute_error(66); h = (struct ahead *) sym[iq].spec.array.ptr; nd = h->ndim; limit = ana_type_size[sym[iq].type]; for (j=0;jdims[j]; /* # of bytes */ xbuffer = (byte *) ((char *)h + sizeof(struct ahead)); /* arg 3 is the dc bit length table, must be 16 long */ iq = ps[2]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 0 ) iq = ana_byte(1, &iq); h = (struct ahead *) sym[iq].spec.array.ptr; dc_bits = (byte *) ((char *)h + sizeof(struct ahead)); nd = h->ndim; nx = h->dims[0]; if (nd != 1 || nx != 16) { printf("JPEG12_SCAN, bits must be a vector of 16\n"); return -1; } /* arg 4, now the dc values, the size determines nval_dc */ iq = ps[3]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 0 ) iq = ana_byte(1, &iq); h = (struct ahead *) sym[iq].spec.array.ptr; dc_huffval = (byte *) ((char *)h + sizeof(struct ahead)); nd = h->ndim; nval_dc = h->dims[0]; if (nd != 1) { printf("JPEG12_SCAN, values must be a vector\n"); return -1; } /* arg 5 is the ac bit length table, must be 16 long */ iq = ps[4]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 0 ) iq = ana_byte(1, &iq); h = (struct ahead *) sym[iq].spec.array.ptr; ac_bits = (byte *) ((char *)h + sizeof(struct ahead)); nd = h->ndim; nx = h->dims[0]; if (nd != 1 || nx != 16) { printf("JPEG12_SCAN, bits must be a vector of 16\n"); return -1; } /* arg 6, now the ac values, the size determines nval_ac */ iq = ps[5]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 0 ) iq = ana_byte(1, &iq); h = (struct ahead *) sym[iq].spec.array.ptr; ac_huffval = (byte *) ((char *)h + sizeof(struct ahead)); nd = h->ndim; nval_ac = h->dims[0]; if (nd != 1) { printf("JPEG12_SCAN, values must be a vector\n"); return -1; } /* scan for padded markers and messages, decode each restart section after we finish the scan of it */ px = xbuffer; pt = pb = px; iblock = 0; icount = 0; n = 0; /* n will be the bytes found between restarts */ /* nblocks is the total for the whole image, nb is the # for each restart interval, if that is 0 then nb = nblocks (i.e., no restarts) */ if (restart_interval) nb = restart_interval; else nb = nblocks; printf("nb = %d\n", nb); for (;;) { /* scan until we hit limit or decode nblocks or error */ if ( (*pt++ = *px++) == 0xff) { /* a message or pad? */ code = *px++ & 0x00ff; if (code == 0) /* just a padded ff, ff in stream already, just bump n */ n++; else { if (code == 0xff) { /* this is 2 ff's */ *pt++ = 0xff; n +=2; } else { if ( (code == 0xd9) || ((code & 0xf8) == 0xd0) ) { printf("message code = %#x, icount = %d\n", code, icount); --pt; n_restarts_found += 1; /* either the end of image or a restart, process what we have */ /* we assume that we only process a number of blocks equal to the restart_interval or whatever is left in image, note that normal TRACE images will always have an integer number of restart intervals */ if ( (nblocks - iblock) < nb ) nb = nblocks - iblock; if (nb > 0) { //printf("decoding, n, nb = %d, %d at %d, iblock = %d, ic = %d\n", // n, nb, pb, iblock,icount); icount++; stat = huff_decode_dct(&dct[iblock*64], nb, pb, n, dc_bits, dc_huffval, last_dc, ac_bits, ac_huffval, nval_dc, nval_ac); if (stat != 1) printf("error at iblock = %d\n", iblock); } else { /* got a restart or EOI and no data left, OK if EOI, in any case we have to stop */ if (code == 0xd9) { printf("got the EOI\n"); } else { printf("abnormal end of image, beware!\n"); } break; } /* if the code was the end of image, we should be done */ n = 0; pb = pt = px; iblock += nb; if (code == 0xd9) { printf("got the EOI\n"); break; } } else { printf("unexpected message %#x at iblock = %d\n", code, iblock); n_unexpected += 1; /* check ahead to see if there is a restart message next */ printf("%#x %#x %#x %#x\n", *px, *(px+1), *(px+2), *(px+3) ); /* treat as data and see if we choke */ *pt++ = code; n += 2; /*return scan_err(1);*/ } } } /* all of the cases with a code use up 2 bytes, so */ limit = limit -2; } else { limit--; n ++; } /* if not a ff, only one byte consumed */ if ( limit <= 0) { printf("jpeg12_scan - limit error\n"); printf("px-xbuffer = %d, n = %d\n", px-xbuffer, n); printf("pt-xbuffer = %d, n = %d\n", pt-xbuffer, n); break;} } return result_sym; } /*------------------------------------------------------------------------- */ int ana_huff_decode_dct(narg,ps) /* test routine for decoding dct's */ /* an ana function, returns dct array */ int narg, ps[]; /* calls huff_decode_dct */ { /* expect 6 args optional 7th added 5/29/96, pflag set to 0 disables padding removal dct = huff_decode_dct(nblocks, x, dc_bits, dc_huffval, ac_bits, ac_huffval, [pflag]) */ int nd, nx, ny, dim[8], iq, *freq, n, lenx, i, j, limit, nblocks; int nval_ac, nval_dc, blimit, pflag=1; int result_sym; struct ahead *h; byte *dc_bits, *dc_huffval, *ac_bits, *ac_huffval; short last_dc = 0, *dct; byte *xbuffer, *px, *pt; /* first arg is nblocks */ if (int_arg_stat(ps[0], &nblocks) != 1) return -1; if (narg > 6) { if (int_arg_stat(ps[6], &pflag) != 1) return -1; } /* setup the dct array, a (64,nblocks) I*2 array */ dim[0] = 64; dim[1] = nblocks; result_sym = array_scratch(1, 2, dim); h = (struct ahead *) sym[result_sym].spec.array.ptr; dct = (short *) ((char *)h + sizeof(struct ahead)); /* the second arg is the encoded data, don't worry about data type, but must be an array. We figure out total bytes available for limit */ /* we also do a pass to remove zero pads after false markers, this modifies the buffer array */ iq = ps[1]; if ( sym[iq].class != 4 ) return execute_error(66); h = (struct ahead *) sym[iq].spec.array.ptr; nd = h->ndim; limit = ana_type_size[sym[iq].type]; for (j=0;jdims[j]; /* # of bytes */ xbuffer = (byte *) ((char *)h + sizeof(struct ahead)); /* scan for padded markers and remove the zero pads, if markers without zero pads are found, emit a warning */ if (pflag == 1) { n = i = limit; px = xbuffer; while (n--) { /* skip to first ff */ if (*px++ == 0xff) break; } /* found the first one, back up one */ px--; n++; pt = px; while ( (n--) > 0 ) { if ( (*pt++ = *px++) == 0xff) { if (*px++ != 0 ) { /* expected a zero?, mention this */ printf("huff_decode_dct, unpadded marker encountered, beware\n"); px--; i = px - xbuffer; printf("mark %#x followed by %#x, at i = %d\n", *(px-1), *px, i); *pt++ = *px; px++; /* pass it on but probably hopeless */ } else { limit--; n--;} }} /* printf("removed %d pads from encoded data\n", i - limit); */ } /* end of pad removal section */ /* arg 3 is the dc bit length table, must be 16 long */ iq = ps[2]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 0 ) iq = ana_byte(1, &iq); h = (struct ahead *) sym[iq].spec.array.ptr; dc_bits = (byte *) ((char *)h + sizeof(struct ahead)); nd = h->ndim; nx = h->dims[0]; if (nd != 1 || nx != 16) { printf("HUFF_DECODE_DCT, bits must be a vector of 16\n"); return -1; } /* arg 4, now the dc values, the size determines nval_dc */ iq = ps[3]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 0 ) iq = ana_byte(1, &iq); h = (struct ahead *) sym[iq].spec.array.ptr; dc_huffval = (byte *) ((char *)h + sizeof(struct ahead)); nd = h->ndim; nval_dc = h->dims[0]; if (nd != 1) { printf("HUFF_DECODE_DCT, values must be a vector\n"); return -1; } /* arg 5 is the ac bit length table, must be 16 long */ iq = ps[4]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 0 ) iq = ana_byte(1, &iq); h = (struct ahead *) sym[iq].spec.array.ptr; ac_bits = (byte *) ((char *)h + sizeof(struct ahead)); nd = h->ndim; nx = h->dims[0]; if (nd != 1 || nx != 16) { printf("HUFF_DECODE_DCT, bits must be a vector of 16\n"); return -1; } /* arg 6, now the ac values, the size determines nval_ac */ iq = ps[5]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 0 ) iq = ana_byte(1, &iq); h = (struct ahead *) sym[iq].spec.array.ptr; ac_huffval = (byte *) ((char *)h + sizeof(struct ahead)); nd = h->ndim; nval_ac = h->dims[0]; if (nd != 1) { printf("HUFF_DECODE_DCT, values must be a vector\n"); return -1; } huff_decode_dct(dct, nblocks, xbuffer, limit, dc_bits, dc_huffval, last_dc, ac_bits, ac_huffval, nval_dc, nval_ac); return result_sym; } /*------------------------------------------------------------------------- */ int huff_decode_dct(dct, nblocks, x, limit, dc_bits, dc_huffval, last_dc, ac_bits, ac_huffval) /* returns decoded dct, the coded data is in x, huffman tables as in huff_encode_dct */ short dct[], last_dc; byte x[]; int limit, nblocks; byte dc_bits[], dc_huffval[], ac_bits[], ac_huffval[]; { int i, j, sq, code, k, idct, r1, size, iq, lenx, lastp, n, nval; int jq, result, look, ntable, lookbits, nb, lbits, ks, iblock; unsigned char huffsize[256], *p, *bits, *huffval; unsigned short huffcode[256], *pc; int dc_mincode[16], dc_maxcode[16], dc_valptr[16]; int ac_mincode[16], ac_maxcode[16], ac_valptr[16]; int *mincode, *maxcode, *valptr; byte dc_look_nbits[256],dc_look_sym[256],ac_look_nbits[256],ac_look_sym[256]; byte *look_nbits, *look_sym; unsigned int y, z, temp, temp2, nbits, rs; unsigned int mask[] = { 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff,0x3ff,0x7ff,0xfff,0x1fff,0x3fff,0x7fff,0xffff}; byte *px; /* use bits to generate the Huffman codes and sizes in code-length order */ /* for both dc and ac tables */ ntable = 2; bits = dc_bits; valptr = dc_valptr; mincode = dc_mincode; maxcode = dc_maxcode; look_nbits = dc_look_nbits; look_sym = dc_look_sym; huffval = dc_huffval; while (ntable--) { p = huffsize; pc = huffcode; code = 0; for (k = 0; k < 16; k++) { j = k + 1; n = (int) bits[k]; /* n is the number of this length */ while (n--) { /* the codes of a common length */ *p++ = (unsigned char) (j); /* just increase by 1 wrt previous */ *pc++ = code++; } code <<= 1; /* for new code size, left shift */ } *p = 0; lastp = p - huffsize; /* Figure F.15: generate decoding tables for bit-sequential decoding */ iq = 0; for (k = 0; k < 16; k++) { if (bits[k]) { valptr[k] = iq; /* huffval[] index of 1st symbol of code length k+1 */ mincode[k] = huffcode[iq]; /* minimum code of length k+1 */ iq += bits[k]; maxcode[k] = huffcode[iq-1]; /* maximum code of length k+1 */ } else { maxcode[k] = -1; /* -1 if no codes of this length */ } } /* generate lookup tables to speed up the process for Huffman codes of length 8 or less, one set for dc and ac, follows technique in jpeg-5a but not the same code so some variables may look similar but beware ! */ bzero(look_nbits, 256); iq = 0; for (k = 0; k < 8; k++) { for (i = 0; i < (int) bits[k]; i++, iq++) { /* k+1 = current code's length, iq = its index in huffcode[] & huffval[]. */ /* Generate left-justified code followed by all possible bit sequences */ lookbits = huffcode[iq] << (7 - k); jq = k+1; for (j= 1 << (7 - k);j > 0;j--) { look_nbits[lookbits] = jq; look_sym[lookbits] = huffval[iq]; lookbits++; } } } bits = ac_bits; valptr = ac_valptr; mincode = ac_mincode; maxcode = ac_maxcode; look_nbits = ac_look_nbits; look_sym = ac_look_sym; huffval = ac_huffval; } px = x; /* where we get the Huffman coded version */ r1 = 0; /* a bit count */ /*printf("in huff_decode_dct, mark 1 =========================\n");*/ /* decode the dc and ac components of a block */ /* parts modified from the jpeg-5a code */ /* iblock = 0; */ while (nblocks--) { /* printf("iblock = %d\n", iblock); iblock++; */ bzero(dct, 128); /* pre-zero this block */ /* start dc decode, grab 8 bits and use lookup shortcut to see if we got a short Huffman code */ i=r1>>3; j=r1%8; /* our byte and bit addresses */ if (i > limit) return dct_buffer_decode_err(limit); /*printf("start dc decode, r1, i, j = %d %d %d ------------\n", r1, i,j);*/ px = x + i; if (j == 0) { /* aligned on byte, lucky 1/8 */ look = *px; } else { /* need part of next byte as well */ jq = 8 - j; /* available bits in px */ look = *px++ << j; /* msb, make room for lsb */ look |= ((int) *px) >> jq; look = look & 0xff; } /*printf("look = %#x\n", look);*/ if ((nb = dc_look_nbits[look]) != 0) { nbits = dc_look_sym[look]; lbits = 8 -nb; r1 += nb; /*printf("successful lookup, size = %d, value(nbits) = %d\n", nb, nbits);*/ } else { /*printf("slow decode\n");*/ /* get here if the code is longer than 8 bits or some kind of disaster */ r1 += 8; /* point after look */ i=r1>>3; j=r1%8; px = x + i; /* get 16 bits in temp */ temp = ((int) look) << 8; /* look will be top 8 */ if (j == 0) { temp = temp | ( (int) *px ); } else { /* need 2 transfers, 8 bits total */ jq = 8 - j; /* available bits in px */ temp |= ( *px++ << j ); temp |= ( ((int) *px) >> jq); } /*printf("16 bit candidate in slow decode = %#x\n", temp);*/ k = 7; nb = 8; /* here nb is code size - 1 */ while ( (ks =(temp >> k)) > dc_maxcode[nb] ) { k--; nb++; if (k < 0) return huff_decode_err(0); } /* note error return if we couldn't find a code */ nbits = dc_huffval[ dc_valptr[nb] + ks - dc_mincode[nb] ]; /*printf("ks=%d, nb=%d, dc_maxcode[nb]=%d, dc_mincode[nb]=%d,dc_valptr[nb]=%d\n", ks,nb,dc_maxcode[nb],dc_maxcode[nb],dc_valptr[nb]);*/ nb++; /* actual size of code */ /*printf("size of code from slow_decode = %d\n", nb);*/ r1 += (nb -8); lbits = 0; } /*printf("nbits = %#x\n", nbits);*/ /* that defines the dc range interval, now get the rest of the bits */ /* if nbits is 0, don't need any */ if (nbits) { /* we have some still in look, lbits */ /* if so, nb is valid, so use it to shift old look */ /*printf("lbits = %d\n", lbits);*/ if (lbits >= nbits) { temp = (look>> (lbits-nbits)) & mask[nbits]; } else { /* harder, need "nbits" # of bits */ i=r1>>3; j=r1%8; px = x + i; /*printf("start appended bits, r1, i, j = %d %d %d\n", r1, i,j);*/ jq = 8 - j; temp = ( *px & mask[jq] ); /* gets us jq ls bits */ ks = nbits - jq; /* how much more needed (could be < 0) */ /*printf("jq, ks, temp =%d %d %#x\n", jq, ks, temp);*/ if (ks>0) { temp = temp << ks; /* shift over to make room */ ks -=8; temp2 = ((int) *++px); /*printf("second byte, ks, temp2 = %d %#x\n", ks, temp2);*/ if (ks>0) { temp |= ( temp2 << ks ); /* need a third ? */ ks -=8; temp2 = ((int) *++px); /*printf("third byte, ks, temp2 = %d %#x\n", ks, temp2);*/ } temp |= ( temp2 >> (-ks) ); /* last, #2 or #3 */ } else { temp = temp >> (-ks); } } r1 += nbits; /* count after this is done */ /* now extend the sign, uses the jpeg-5a macro defined above */ /* we use the lookup table version */ /*printf("temp = %#x ", temp);*/ nbits = huff_EXTEND(temp, nbits); /*printf("extended = %#x\n", nbits);*/ } dct[0] = last_dc = nbits + last_dc; /*printf("dc = %d, %#x\n", dct[0], dct[0]);*/ /* wraps the dc term, start the ac */ /*printf("start ac decode, r1, i, j = %d %d %d ***************\n", r1, i,j);*/ for (idct = 1; idct < 64; idct++) { /*printf("start ac term %d\n", idct);*/ i=r1>>3; j=r1%8; px = x + i; if (i > limit) return dct_buffer_decode_err(limit); /* decode the Huffman symbol, use ac tables */ if (j == 0) { /* aligned on byte, lucky 1/8 */ look = *px; } else { /* need part of next byte as well */ jq = 8 - j; /* available bits in px */ look = *px++ << j; /* msb, make room for lsb */ look |= ((int) *px) >> jq; look = look & 0xff; } /*printf("look = %#x\n", look);*/ if ((nb = ac_look_nbits[look]) != 0) { nbits = ac_look_sym[look]; lbits = 8 -nb; r1 += nb; /*printf("successful lookup, size = %d, value(nbits) = %d\n", nb, nbits);*/ } else { /*printf("slow decode\n");*/ /* get here if the code is longer than 8 bits or some kind of disaster */ r1 += 8; /* point after look */ i=r1>>3; j=r1%8; px = x + i; /* get 16 bits in temp */ temp = ((int) look) << 8; /* look will be top 8 */ if (j == 0) { temp = temp | ( (int) *px ); } else { /* need 2 transfers, 8 bits total */ jq = 8 - j; /* available bits in px */ temp |= ( *px++ << j ); temp |= ( ((int) *px) >> jq); } /*printf("16 bit candidate in slow decode = %#x\n", temp);*/ k = 7; nb = 8; /* here nb is code size - 1 */ while ( (ks =(temp >> k)) > ac_maxcode[nb] ) { k--; nb++; if (k < 0) return huff_decode_err(1); } /* note error return if we couldn't find a code */ nbits = ac_huffval[ ac_valptr[nb] + ks - ac_mincode[nb] ]; /*printf("ks=%d, nb=%d, ac_maxcode[nb]=%d, ac_mincode[nb]=%d,ac_valptr[nb]=%d\n", ks,nb,ac_maxcode[nb],ac_maxcode[nb],ac_valptr[nb]);*/ nb++; /* actual size of code */ /*printf("size of code from slow_decode = %d\n", nb);*/ r1 += (nb -8); lbits = 0; } /*printf("nbits = %#x\n", nbits);*/ /* that defines the ac symbol, contains a zero run length and bits in next non-zero coeff. */ rs = nbits >> 4; /* run length */ nbits = nbits & 0xf; /* bits in next one, unless 0, then a special code */ /*printf("rs = %d, nbits = %d\n", rs, nbits);*/ if (nbits == 0) { if (rs != 15) break; /* hit the end, rest are all zips */ idct += 15; } else { idct += rs; /* skip over the zeroes */ /* need the next nbits bits */ /* we have some still in look, lbits */ /* if so, nb is valid, so use it to shift old look */ /*printf("lbits = %d\n", lbits);*/ if (lbits >= nbits) { temp = (look>> (lbits-nbits)) & mask[nbits]; } else { /* harder, need "nbits" # of bits */ i=r1>>3; j=r1%8; px = x + i; /*printf("start appended bits, r1, i, j = %d %d %d\n", r1, i,j);*/ jq = 8 - j; temp = ( *px & mask[jq] ); /* gets us jq ls bits */ ks = nbits - jq; /* how much more needed (could be < 0) */ /*printf("jq, ks, temp =%d %d %#x\n", jq, ks, temp);*/ if (ks>0) { temp = temp << ks; /* shift over to make room */ ks -=8; temp2 = ((int) *++px); /*printf("second byte, ks, temp2 = %d %#x\n", ks, temp2);*/ if (ks>0) { temp |= ( temp2 << ks ); /* need a third ? */ ks -=8; temp2 = ((int) *++px); /*printf("third byte, ks, temp2 = %d %#x\n", ks, temp2);*/ } temp |= ( temp2 >> (-ks) ); /* last, #2 or #3 */ } else { temp = temp >> (-ks); } } r1 += nbits; /* count after this is done */ /* now extend the sign, uses the jpeg-5a macro defined above */ /* we use the lookup table version */ /*printf("temp = %#x ", temp);*/ nbits = huff_EXTEND(temp, nbits); /*printf("extended = %#x\n", nbits);*/ /* this nbits is now the ac term */ dct[idct] = (short) nbits; /*printf("ac term = %d\n", dct[idct]);*/ } } /* end of idct loop for ac terms */ dct += 64; } /* end of nblocks loop */ return 1; } /*------------------------------------------------------------------------- */ int ana_dcthist(narg,ps) /* test routine for encoding dct's */ /* computes histograms for the dc and ac symbols that get Huffman coded */ int narg, ps[]; /* calls huff_encode_dct */ { /* expect 3 args dcthist, dct, dcbins, acbins the first is input, the others are output */ int nd, nx, ny, dim[8], iq, *freq, nval, n, lenx, i, j, nblocks; int *dcbins, *acbins, rs, temp, temp2, nbits, idct; struct ahead *h; short last_dc = 0, *dct; byte *xbuffer, *tbuffer; byte *px, *pt; /* first arg is the dct array, usually a (64,nblocks) array but we accept any thing that is a multiple of 64 */ /* we make it an I*2 array */ iq = ps[0]; if ( sym[iq].class != 4 ) return execute_error(66); if ( sym[iq].type != 1 ) iq = ana_word(1, &iq); h = (struct ahead *) sym[iq].spec.array.ptr; dct = (short *) ((char *)h + sizeof(struct ahead)); nd = h->ndim; n = 1; for (j=0;jdims[j]; /* # of elements */ if ( (n%64) != 0 || n == 0) { printf("DCTHIST, dct array must be 0 mod 64\n"); return -1; } nblocks = n/64; /* create the output arrays */ /* redefine the second arg to be a long array 16 long */ iq = ps[1]; dim[0] = 16; nd = 1; if (redef_array(iq, 2, nd, dim) != 1) return -1; h = (struct ahead *) sym[iq].spec.array.ptr; dcbins = (int *) ((char *)h + sizeof(struct ahead)); /* redefine the third arg to be a long array 16 x 16 */ iq = ps[2]; dim[0] = 16; dim[1] = 16; nd = 2; if (redef_array(iq, 2, nd, dim) != 1) return -1; h = (struct ahead *) sym[iq].spec.array.ptr; acbins = (int *) ((char *)h + sizeof(struct ahead)); /* zero these */ bzero(dcbins, 16*4); bzero(acbins, 256*4); /* now run through the blocks, compute the strange values, and accumulate the histograms */ n = nblocks; while (n--) { temp = temp2 = dct[0] - last_dc; last_dc = dct[0]; if (temp < 0) { temp = -temp; temp2--; } nbits = 0; while (temp) { nbits++; temp >>= 1; } /* this zaps temp, we use temp2 */ dcbins[nbits] += 1; /* ac terms */ rs = 0; /* rs = run length of zeros */ for (idct = 1; idct < 64; idct++) { /* change back to 64 */ if ((temp =dct[idct]) == 0) rs++; else { while (rs > 15) { acbins[0xf0] += 1; rs -= 16; } temp2 = temp; if (temp < 0) { temp = -temp; temp2--; } nbits = 1; while ((temp >>= 1)) nbits++; iq = (rs << 4) + nbits; acbins[iq] += 1; rs = 0; } } /* If the last coef(s) were zero, emit an end-of-block code (#0) */ if (rs > 0) { acbins[0] += 1; } dct = dct +64; /* point to next block */ } return 1; } /*------------------------------------------------------------------------- */