#include /* Setup the IWRAM-loading definitions if this has been enabled. There are * three sections of this; the register definitions, the IWRAM end * determination, and the DMA copying functions. Register definitions are the * same as anywhere else, only with a JPEG_IWRAM prefix. The IWRAM end * determination uses a generated variable that DevKit Advance's linker script * creates. Because of this, other linker scripts might not work with this * code. Finally, the DMA copying uses DMA 3. * * Functions that are to be copied to IWRAM must obey certain restrictions. * They cannot refer to external constant data. They must be declared static. * They must have a JPEG_FUNCTION_END(NAME) macro after them; see how it is * used in the code ahead for an example. Finally, you should avoid external * references altogether because of how it limits your flexibility. Instead, * pass necessary variable and function pointers in the arguments. */ #if JPEG_USE_IWRAM /* The source address pointer for DMA 3. */ #define JPEG_IWRAM_REG_DM3SAD (*(volatile unsigned int *) 0x40000D4) /* The destination address pointer for DMA 3. */ #define JPEG_IWRAM_REG_DM3DAD (*(volatile unsigned int *) 0x40000D8) /* The number of words or halfwords to transfer for DMA 3. */ #define JPEG_IWRAM_REG_DM3CNT_L (*(volatile unsigned short *) 0x40000DC) /* DMA 3 control register. */ #define JPEG_IWRAM_REG_DM3CNT_H (*(volatile unsigned short *) 0x40000DE) /* The address of this is the end of the .bss (uninitialized variables) * segment, which DevKit Advance's linker script puts last. */ extern char __bss_end; /* Retrieve the pointer to the first free byte in the IWRAM segment. */ #define JPEG_IWRAM_USED_END (&__bss_end) /* This creates a simple stub function that can be used with JPEG_FUNCTION_SIZE * to determine the size of a function in bytes. If the function will be * IWRAM-loaded, this macro must be executed immediately after the * function with the name of the function in the NAME parameter, and the * function must be declared static. */ #define JPEG_FUNCTION_END(NAME) static void NAME##End () { } /* Retrieve the size in bytes of a function that has a JPEG_FUNCTION_END * ballast. */ #define JPEG_FUNCTION_SIZE(NAME) ((int) ((char *) &NAME##End - (char *) &NAME) & ~3) /* Start a loading function by defining the necessary variables. */ #define JPEG_IWRAM_LoadStart() char *iwramEnd = (char *) JPEG_IWRAM_USED_END /* Load the value named JPEG_NAME into the pointer named NAME, * adjusting the read pointer. This copies SIZE bytes through DMA 3. */ #define JPEG_IWRAM_LoadValue(NAME, SIZE) \ *(void **) &NAME = iwramEnd; \ while (JPEG_IWRAM_REG_DM3CNT_H & (1 << 15)) { } \ JPEG_Assert (iwramEnd + (SIZE) < (char *) &iwramEnd); \ JPEG_IWRAM_REG_DM3SAD = (unsigned int) &JPEG_##NAME; \ JPEG_IWRAM_REG_DM3DAD = (unsigned int) iwramEnd; \ JPEG_IWRAM_REG_DM3CNT_L = (SIZE + 3) >> 2; \ JPEG_IWRAM_REG_DM3CNT_H = (1 << 10) | (1 << 15); \ iwramEnd += (SIZE & ~3) #define JPEG_IWRAM_LoadFunction(NAME) JPEG_IWRAM_LoadValue (NAME, JPEG_FUNCTION_SIZE (JPEG_##NAME)) #define JPEG_IWRAM_LoadData(NAME) JPEG_IWRAM_LoadValue (NAME, sizeof (JPEG_##NAME)) /* Finish loading the IWRAM by waiting for the DMA transfers to finish and * making an assertion check that makes sure (with fairly good but not * perfect assurance) that we haven't written over the stack. */ #define JPEG_IWRAM_LoadDone() \ do { } while (JPEG_IWRAM_REG_DM3CNT_H & (1 << 15)) #else /* This stub does absolutely nothing. */ #define JPEG_FUNCTION_END(NAME) #endif /* JPEG_USE_IWRAM */ /* Converts left-to-right coefficient indices into zig-zagged indices. */ const unsigned char JPEG_ToZigZag [JPEG_DCTSIZE2] = { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, }; /* These macros are so that we can generate the AA&N multipliers at * compile-time, allowing configuration control of fixed point precision. */ #define JPEG_AAN_0 1.0 #define JPEG_AAN_1 1.387039845 #define JPEG_AAN_2 1.306562965 #define JPEG_AAN_3 1.175875602 #define JPEG_AAN_4 1.0 #define JPEG_AAN_5 0.785694958 #define JPEG_AAN_6 0.541196100 #define JPEG_AAN_7 0.275899379 #define JPEG_AAN_LINE(B) \ JPEG_FTOFIX (JPEG_AAN_0 * JPEG_AAN_##B), \ JPEG_FTOFIX (JPEG_AAN_1 * JPEG_AAN_##B), \ JPEG_FTOFIX (JPEG_AAN_2 * JPEG_AAN_##B), \ JPEG_FTOFIX (JPEG_AAN_3 * JPEG_AAN_##B), \ JPEG_FTOFIX (JPEG_AAN_4 * JPEG_AAN_##B), \ JPEG_FTOFIX (JPEG_AAN_5 * JPEG_AAN_##B), \ JPEG_FTOFIX (JPEG_AAN_6 * JPEG_AAN_##B), \ JPEG_FTOFIX (JPEG_AAN_7 * JPEG_AAN_##B) /* The AA&N scaling factors. These should be multiplied against quantization * coefficients to determine their real value. */ const JPEG_FIXED_TYPE JPEG_AANScaleFactor [JPEG_DCTSIZE2] = { JPEG_AAN_LINE (0), JPEG_AAN_LINE (1), JPEG_AAN_LINE (2), JPEG_AAN_LINE (3), JPEG_AAN_LINE (4), JPEG_AAN_LINE (5), JPEG_AAN_LINE (6), JPEG_AAN_LINE (7), }; int jpeg_width = 256; /* This converts values in the range [-32 .. 32] to [0 .. 32] by clamping * values outside of that range. To use it, add 32 to your input. */ const unsigned char JPEG_ComponentRange [32 * 3] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }; /* Compute the columns half of the IDCT. */ void JPEG_IDCT_Columns (JPEG_FIXED_TYPE *zz) { JPEG_FIXED_TYPE tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9, tmp10, tmp11; JPEG_FIXED_TYPE *ez = zz + JPEG_DCTSIZE; /* The first column will always have a non-zero coefficient, the DC. */ goto skipFirstCheckb; for ( ; zz < ez; zz ++) { /* A column containing only zeroes will output only zeroes. Since we * output in-place, we don't need to do anything in that case. */ if (!zz [0 * JPEG_DCTSIZE] && !zz [1 * JPEG_DCTSIZE] && !zz [2 * JPEG_DCTSIZE] && !zz [3 * JPEG_DCTSIZE] && !zz [4 * JPEG_DCTSIZE] && !zz [5 * JPEG_DCTSIZE] && !zz [6 * JPEG_DCTSIZE] && !zz [7 * JPEG_DCTSIZE]) continue; skipFirstCheckb: tmp0 = zz [0 * JPEG_DCTSIZE]; tmp1 = zz [2 * JPEG_DCTSIZE]; tmp2 = zz [4 * JPEG_DCTSIZE]; tmp3 = zz [6 * JPEG_DCTSIZE]; tmp6 = tmp1 + tmp3; tmp7 = JPEG_FIXMUL (tmp1 - tmp3, JPEG_FTOFIX (1.414213562)) - tmp6; tmp1 = tmp0 - tmp2 + tmp7; tmp0 = tmp0 + tmp2 + tmp6; tmp3 = tmp0 - (tmp6 << 1); tmp2 = tmp1 - (tmp7 << 1); tmp4 = zz [1 * JPEG_DCTSIZE]; tmp5 = zz [3 * JPEG_DCTSIZE]; tmp6 = zz [5 * JPEG_DCTSIZE]; tmp7 = zz [7 * JPEG_DCTSIZE]; tmp10 = tmp4 - tmp7; tmp8 = tmp6 + tmp5; tmp9 = tmp4 + tmp7; tmp7 = tmp9 + tmp8; tmp11 = JPEG_FIXMUL (tmp9 - tmp8, JPEG_FTOFIX (1.414213562)); tmp8 = tmp6 - tmp5; tmp9 = JPEG_FIXMUL (tmp8 + tmp10, JPEG_FTOFIX (1.847759065)); tmp6 = JPEG_FIXMUL (JPEG_FTOFIX (-2.613125930), tmp8) + tmp9 - tmp7; tmp5 = tmp11 - tmp6; tmp4 = JPEG_FIXMUL (JPEG_FTOFIX (1.082392200), tmp10) - tmp9 + tmp5; zz [0 * JPEG_DCTSIZE] = tmp0 + tmp7; zz [1 * JPEG_DCTSIZE] = tmp1 + tmp6; zz [2 * JPEG_DCTSIZE] = tmp2 + tmp5; zz [3 * JPEG_DCTSIZE] = tmp3 - tmp4; zz [4 * JPEG_DCTSIZE] = tmp3 + tmp4; zz [5 * JPEG_DCTSIZE] = tmp2 - tmp5; zz [6 * JPEG_DCTSIZE] = tmp1 - tmp6; zz [7 * JPEG_DCTSIZE] = tmp0 - tmp7; } } JPEG_FUNCTION_END (JPEG_IDCT_Columns) /* Compute the rows half of the IDCT, loading the component information into * chunk as values in the range -64 to 64, although it can go somewhat outside * of that range. chunkStride is the number of bytes in a row in chunk. */ void JPEG_IDCT_Rows (const JPEG_FIXED_TYPE *zz, signed char *chunk, int chunkStride) { JPEG_FIXED_TYPE tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; JPEG_FIXED_TYPE tmp4, tmp5, tmp6, tmp7, z5, z10, z11, z12, z13; int row; for (row = 0; row < JPEG_DCTSIZE; row ++, zz += JPEG_DCTSIZE, chunk += chunkStride) { tmp10 = zz [0] + zz [4]; tmp11 = zz [0] - zz [4]; tmp13 = zz [2] + zz [6]; tmp12 = JPEG_FIXMUL (zz [2] - zz [6], JPEG_FTOFIX (1.414213562)) - tmp13; tmp0 = tmp10 + tmp13; tmp3 = tmp10 - tmp13; tmp1 = tmp11 + tmp12; tmp2 = tmp11 - tmp12; z13 = zz [5] + zz [3]; z10 = zz [5] - zz [3]; z11 = zz [1] + zz [7]; z12 = zz [1] - zz [7]; tmp7 = z11 + z13; tmp11 = JPEG_FIXMUL (z11 - z13, JPEG_FTOFIX (1.414213562)); z5 = JPEG_FIXMUL (z10 + z12, JPEG_FTOFIX (1.847759065)); tmp10 = JPEG_FIXMUL (JPEG_FTOFIX (1.082392200), z12) - z5; tmp12 = JPEG_FIXMUL (JPEG_FTOFIX (-2.613125930), z10) + z5; tmp6 = tmp12 - tmp7; tmp5 = tmp11 - tmp6; tmp4 = tmp10 + tmp5; /* This shifts by an extra bit to remove the need for clamping at * this point. Thus the normative samples are in the range -64 to 63. * This requires a later bit-shift, but that comes for free with the ARM * instruction set, and has an acceptable, likely imperceptible, loss * of quality. */ chunk [0] = JPEG_FIXTOI (tmp0 + tmp7) >> 4; chunk [1] = JPEG_FIXTOI (tmp1 + tmp6) >> 4; chunk [2] = JPEG_FIXTOI (tmp2 + tmp5) >> 4; chunk [3] = JPEG_FIXTOI (tmp3 - tmp4) >> 4; chunk [4] = JPEG_FIXTOI (tmp3 + tmp4) >> 4; chunk [5] = JPEG_FIXTOI (tmp2 - tmp5) >> 4; chunk [6] = JPEG_FIXTOI (tmp1 - tmp6) >> 4; chunk [7] = JPEG_FIXTOI (tmp0 - tmp7) >> 4; } } JPEG_FUNCTION_END (JPEG_IDCT_Rows) /* This function comes from jpeglib. I feel all right about that since it comes from AA&N anyway. */ void JPEG_IDCT (JPEG_FIXED_TYPE *zz, signed char *chunk, int chunkStride) { JPEG_IDCT_Columns (zz); JPEG_IDCT_Rows (zz, chunk, chunkStride); } /* Compute a signed value. COUNT is the number of bits to read, and OUT is * where to store the result. */ #define JPEG_Value(COUNT, OUT) \ do { \ unsigned int value = JPEG_BITS_GET (COUNT); \ \ if (value < (unsigned int) (1 << ((unsigned int) (COUNT - 1)))) \ value += (-1 << COUNT) + 1; \ (OUT) = value; \ } while (0) /* Decode the coefficients from the input stream and do dequantization at the * same time. dcLast is the previous block's DC value and is updated. zz is * the output coefficients and will be all ready for an IDCT. quant is the * quantization table to use, dcTable and acTable are the Huffman tables for * the DC and AC coefficients respectively, dataBase, bitsLeftBase, and * bitsDataBase are for input stream state, and toZigZag is a pointer to * JPEG_ToZigZag or to its IWRAM copy. */ void JPEG_DecodeCoefficients ( JPEG_FIXED_TYPE *dcLast, JPEG_FIXED_TYPE *zz, JPEG_FIXED_TYPE *quant, JPEG_HuffmanTable *dcTable, JPEG_HuffmanTable *acTable, const unsigned char **dataBase, unsigned int *bitsLeftBase, unsigned long int *bitsDataBase, const unsigned char *toZigZag) { unsigned bits_left = *bitsLeftBase, bits_data = *bitsDataBase; /* Input stream state. */ const unsigned char *data = *dataBase; /* Input stream state. */ int r, s, diff; /* Various temporary data variables. */ int index = 1; /* The current zig-zagged index. */ /* Clear all coefficients to zero. */ { JPEG_FIXED_TYPE *ez = zz + JPEG_DCTSIZE2; do *-- ez = 0; while (ez > zz); } /* Read the DC coefficient. */ JPEG_BITS_CHECK (); JPEG_HuffmanTable_Decode (dcTable, s); JPEG_Value (s, diff); /* Store the DC coefficient. */ *dcLast += diff; zz [toZigZag [0]] = *dcLast * quant [0]; while (1) { /* Read a bits/run-length value. */ JPEG_BITS_CHECK (); JPEG_HuffmanTable_Decode (acTable, s); r = s >> 4; s &= 15; /* If there is a value at this cell +r, then read it. */ if (s) { index += r; JPEG_Value (s, r); zz [toZigZag [index]] = r * quant [index]; if (index == JPEG_DCTSIZE2 - 1) break; index ++; } /* Otherwise we skip 16 cells or finish up. */ else { if (r != 15) break; index += 16; } } /* Restore state for the caller. */ *bitsDataBase = bits_data; *bitsLeftBase = bits_left; *dataBase = data; } JPEG_FUNCTION_END (JPEG_DecodeCoefficients) /* Convert a chunk of YCbCr data to the output format. YBlock, CbBlock, * and CrBlock are the pointers to the relevant chunks; each sample is * between -64 and 64, although out-of-range values are possible. * nHorzFactor and nVertFactor, where n is Y, Cb, and Cr, hold the * multipliers for each coordinate. Shift right by horzMax and vertMax to * get the actual point to sample data from. M211 is true if the * component factors satisfy a 2:1:1 relationship; this leads to a much * faster conversion if JPEG_FASTER_M211 is enabled. * out and outStride are the output pointers and the number of samples * in an output row. Finally, ComponentRange is a pointer to the * JPEG_ComponentRange array. */ void JPEG_ConvertBlock ( signed char *YBlock, signed char *CbBlock, signed char *CrBlock, int YHorzFactor, int YVertFactor, int CbHorzFactor, int CbVertFactor, int CrHorzFactor, int CrVertFactor, int horzMax, int vertMax, char M211, volatile JPEG_OUTPUT_TYPE *out, int outStride, const unsigned char *ComponentRange) { int px, py; /* Since we need to offset all indices into this anyway, we might as well do it once only. */ ComponentRange += 32; /* Do the faster 2:1:1 code if JPEG_FASTER_M211 is set and the image scan satisfies that relationship. */ #if JPEG_FASTER_M211 if (M211) { /* Nothing complex here. Because of its nature, we can do Cb and Cr * conversion only once for every four pixels. This optimization is * done implicitly, using GCC's optimizer for gleaning the actual * advantage. */ for (py = 0; py < 2 * JPEG_DCTSIZE; py += 2) { volatile JPEG_OUTPUT_TYPE *row = &out [outStride * py]; volatile JPEG_OUTPUT_TYPE *rowEnd = row + JPEG_DCTSIZE * 2; for ( ; row < rowEnd; row += 2, YBlock += 2, CbBlock ++, CrBlock ++) { int Cb = *CbBlock, Cr = *CrBlock; JPEG_Convert (row [0], YBlock [0], Cb, Cr); JPEG_Convert (row [1], YBlock [1], Cb, Cr); JPEG_Convert (row [jpeg_width], YBlock [2 * JPEG_DCTSIZE + 0], Cb, Cr); // 240 JPEG_Convert (row [jpeg_width+1], YBlock [2 * JPEG_DCTSIZE + 1], Cb, Cr); // 241 } YBlock += JPEG_DCTSIZE * 2; } } #else if (0) { } #endif /* JPEG_FASTER_M211 */ /* Otherwise we fall back on generic code, if JPEG_HANDLE_ANY_FACTORS is set. * If it is not, then this function does nothing at all! */ #if JPEG_HANDLE_ANY_FACTORS else for (py = 0; py < vertMax; py ++) { signed char *YScan = YBlock + (py * YVertFactor >> 8) * (horzMax * YHorzFactor >> 8); signed char *CbScan = CbBlock + (py * CbVertFactor >> 8) * (horzMax * CbHorzFactor >> 8); signed char *CrScan = CrBlock + (py * CrVertFactor >> 8) * (horzMax * CrHorzFactor >> 8); volatile JPEG_OUTPUT_TYPE *row = &out [outStride * py]; for (px = 0; px < horzMax; px ++, row ++) { int Y = YScan [px * YHorzFactor >> 8]; int Cb = CbScan [px * CbHorzFactor >> 8]; int Cr = CrScan [px * CrHorzFactor >> 8]; JPEG_Convert (*row, Y, Cb, Cr); } } #endif /* JPEG_HANDLE_ANY_FACTORS */ /* Make sure all variables are referenced. */ (void) YHorzFactor; (void) YVertFactor; (void) CbHorzFactor; (void) CbVertFactor; (void) CrHorzFactor; (void) CrVertFactor; (void) horzMax; (void) vertMax; (void) px; (void) py; (void) YBlock; (void) CbBlock; (void) CrBlock; (void) M211; (void) out; (void) outStride; } JPEG_FUNCTION_END (JPEG_ConvertBlock) /* Decode a Huffman table and initialize its data. This expects to be called * after the DHT marker and the type/slot pair. */ int JPEG_HuffmanTable_Read (JPEG_HuffmanTable *huffmanTable, const unsigned char **dataBase) { const unsigned char *data = *dataBase; const unsigned char *bits; int huffcode [256]; unsigned char huffsize [256]; int total = 0; int c; bits = data; for (c = 0; c < 16; c ++) total += *data ++; huffmanTable->huffval = data; data += total; /*void GenerateSizeTable ()*/ { int k = 0, i = 1, j = 1; do { while (j ++ <= bits [i - 1]) huffsize [k ++] = i; i ++; j = 1; } while (i <= 16); huffsize [k] = 0; } /*void GenerateCodeTable ()*/ { int k = 0, code = 0, si = huffsize [0]; while (1) { do huffcode [k ++] = code ++; while (huffsize [k] == si); if (huffsize [k] == 0) break; do code <<= 1, si ++; while (huffsize [k] != si); } } /*void DecoderTables ()*/ { int i = 0, j = 0; while (1) { if (i >= 16) break; if (bits [i] == 0) huffmanTable->maxcode [i] = -1; else { huffmanTable->valptr [i] = &huffmanTable->huffval [j - huffcode [j]]; j += bits [i]; huffmanTable->maxcode [i] = huffcode [j - 1]; } i ++; } } /*void GenerateLookahead ()*/ { int l, i, p, c, ctr; for (c = 0; c < 256; c ++) huffmanTable->look_nbits [c] = 0; p = 0; for (l = 1; l <= 8; l ++) { for (i = 1; i <= bits [l - 1]; i ++, p ++) { int lookbits = huffcode [p] << (8 - l); for (ctr = 1 << (8 - l); ctr > 0; ctr --) { huffmanTable->look_nbits [lookbits] = l; huffmanTable->look_sym [lookbits] = huffmanTable->huffval [p]; lookbits ++; } } } } *dataBase = data; return 1; } /* Skip past a Huffman table section. This expects to be called after reading * the DHT marker and the type/slot pair. */ int JPEG_HuffmanTable_Skip (const unsigned char **dataBase) { const unsigned char *data = *dataBase; int c, total = 16; for (c = 0; c < 16; c ++) total += *data ++; *dataBase += total; return 1; } /* Takes information discovered in JPEG_Decoder_ReadHeaders and loads the * image. This is a public function; see gba-jpeg.h for more information on it. */ int JPEG_Decoder_ReadImage (JPEG_Decoder *decoder, const unsigned char **dataBase, volatile JPEG_OUTPUT_TYPE *out, int outWidth, int outHeight) { JPEG_FrameHeader *frame = &decoder->frame; /* Pointer to the image's frame. */ JPEG_ScanHeader *scan = &decoder->scan; /* Pointer to the image's scan. */ int YHorzFactor = 0, YVertFactor = 0; /* Scaling factors for the Y component. */ int CbHorzFactor = 1, CbVertFactor = 1; /* Scaling factors for the Cb component. The default is important because it is used for greyscale images. */ int CrHorzFactor = 1, CrVertFactor = 1; /* Scaling factors for the Cr component. The default is important because it is used for greyscale images. */ int horzMax = 0, vertMax = 0; /* The maximum horizontal and vertical scaling factors for the components. */ JPEG_FrameHeader_Component *frameComponents [JPEG_MAXIMUM_COMPONENTS]; /* Pointers translating scan header components to frame header components. */ JPEG_FrameHeader_Component *item, *itemEnd = frame->componentList + frame->componentCount; /* The frame header's components for loops. */ JPEG_FIXED_TYPE dcLast [JPEG_MAXIMUM_COMPONENTS]; /* The last DC coefficient computed. This is initialized to zeroes at the start and after a restart interval. */ int c, bx, by, cx, cy; /* Various loop parameters. */ int horzShift = 0; /* The right shift to use after multiplying by nHorzFactor to get the actual sample. */ int vertShift = 0; /* The right shift to use after multiplying by nVertFactor to get the actual sample. */ char M211 = 0; /* Whether this scan satisfies the 2:1:1 relationship, which leads to faster code. */ const unsigned char *data = *dataBase; /* The input data pointer; this must be right at the start of scan data. */ signed char blockBase [JPEG_DCTSIZE2 * JPEG_MAXIMUM_SCAN_COMPONENT_FACTORS]; /* Blocks that have been read and are alloted to YBlock, CbBlock, and CrBlock based on their scaling factors. */ signed char *YBlock; /* Y component temporary block that holds samples for the MCU currently being decompressed. */ signed char *CbBlock; /* Cb component temporary block that holds samples for the MCU currently being decompressed. */ signed char *CrBlock; /* Cr component temporary block that holds samples for the MCU currently being decompressed. */ JPEG_HuffmanTable acTableList [2]; /* The decompressed AC Huffman tables. JPEG Baseline allows only two AC Huffman tables in a scan. */ int acTableUse [2] = { -1, -1 }; /* The indices of the decompressed AC Huffman tables, or -1 if this table hasn't been used. */ JPEG_HuffmanTable dcTableList [2]; /* The decompressed DC Huffman tables. JPEG Baseline allows only two DC Huffman tables in a scan. */ int dcTableUse [2] = { -1, -1 }; /* The indices of the decompressed DC Huffman tables, or -1 if this table hasn't been used. */ int restartInterval = decoder->restartInterval; /* Number of blocks until the next restart. */ /* Pointer to JPEG_ConvertBlock, which might be moved to IWRAM. */ void (*ConvertBlock) (signed char *, signed char *, signed char *, int, int, int, int, int, int, int, int, char, volatile JPEG_OUTPUT_TYPE *, int, const unsigned char *) = &JPEG_ConvertBlock; /* Pointer to JPEG_IDCT_Columns, which might be moved to IWRAM. */ void (*IDCT_Columns) (JPEG_FIXED_TYPE *) = &JPEG_IDCT_Columns; /* Pointer to JPEG_IDCT_Rows, which might be moved to IWRAM. */ void (*IDCT_Rows) (const JPEG_FIXED_TYPE *, signed char *, int) = &JPEG_IDCT_Rows; /* Pointer to JPEG_DecodeCoefficients, which might be moved to IWRAM. */ void (*DecodeCoefficients) (JPEG_FIXED_TYPE *, JPEG_FIXED_TYPE *, JPEG_FIXED_TYPE *, JPEG_HuffmanTable *, JPEG_HuffmanTable *, const unsigned char **, unsigned int *, unsigned long int *, const unsigned char *) = &JPEG_DecodeCoefficients; const unsigned char *ToZigZag = JPEG_ToZigZag; /* Pointer to JPEG_ToZigZag, which might be moved to IWRAM. */ const unsigned char *ComponentRange = JPEG_ComponentRange; /* Pointer to JPEG_ComponentRange, which might be moved to IWRAM. */ /* Start decoding bits. */ JPEG_BITS_START (); /* The sum of all factors in the scan; this cannot be greater than 10 in JPEG Baseline. */ int factorSum = 0; /* Load the essential functions and data into IWRAM if this has been set. */ #if JPEG_USE_IWRAM JPEG_IWRAM_LoadStart (); /* Define variables. */ JPEG_IWRAM_LoadFunction (ConvertBlock); JPEG_IWRAM_LoadFunction (DecodeCoefficients); JPEG_IWRAM_LoadFunction (IDCT_Columns); JPEG_IWRAM_LoadFunction (IDCT_Rows); JPEG_IWRAM_LoadData (ToZigZag); JPEG_IWRAM_LoadData (ComponentRange); JPEG_IWRAM_LoadDone (); /* Finished; run down DMA and check that we haven't overwritten the stack. */ #endif /* JPEG_USE_IWRAM */ /* Find the maximum factors and the factors for each component. */ for (item = frame->componentList; item < itemEnd; item ++) { /* Find the opposing scan header component. */ for (c = 0; ; c ++) { JPEG_ScanHeader_Component *sc; JPEG_Assert (c < scan->componentCount); sc = &scan->componentList [c]; if (sc->selector != item->selector) continue; /* Decompress the DC table if necessary. */ if (sc->dcTable != dcTableUse [0] && sc->dcTable != dcTableUse [1]) { const unsigned char *tablePointer = decoder->dcTables [sc->dcTable]; if (dcTableUse [0] == -1) dcTableUse [0] = sc->dcTable, JPEG_HuffmanTable_Read (&dcTableList [0], &tablePointer); else if (dcTableUse [1] == -1) dcTableUse [1] = sc->dcTable, JPEG_HuffmanTable_Read (&dcTableList [1], &tablePointer); else JPEG_Assert (0); } /* Decompress the AC table if necessary. */ if (sc->acTable != acTableUse [0] && sc->acTable != acTableUse [1]) { const unsigned char *tablePointer = decoder->acTables [sc->acTable]; if (acTableUse [0] == -1) acTableUse [0] = sc->acTable, JPEG_HuffmanTable_Read (&acTableList [0], &tablePointer); else if (acTableUse [1] == -1) acTableUse [1] = sc->acTable, JPEG_HuffmanTable_Read (&acTableList [1], &tablePointer); else JPEG_Assert (0); } frameComponents [c] = item; break; } /* Add the sum for a later assertion test. */ factorSum += item->horzFactor * item->vertFactor; /* Adjust the maximum horizontal and vertical scaling factors as necessary. */ if (item->horzFactor > horzMax) horzMax = item->horzFactor; if (item->vertFactor > vertMax) vertMax = item->vertFactor; /* Update the relevant component scaling factors if necessary. */ if (item->selector == 1) { YHorzFactor = item->horzFactor; YVertFactor = item->vertFactor; } else if (item->selector == 2) { CbHorzFactor = item->horzFactor; CbVertFactor = item->vertFactor; } else if (item->selector == 3) { CrHorzFactor = item->horzFactor; CrVertFactor = item->vertFactor; } } /* Ensure that we have enough memory for these factors. */ JPEG_Assert (factorSum < JPEG_MAXIMUM_SCAN_COMPONENT_FACTORS); /* Split up blockBase according to the components. */ YBlock = blockBase; CbBlock = YBlock + YHorzFactor * YVertFactor * JPEG_DCTSIZE2; CrBlock = CbBlock + CbHorzFactor * CbVertFactor * JPEG_DCTSIZE2; /* Compute the right shift to be done after multiplying against the scaling factor. */ if (horzMax == 1) horzShift = 8; else if (horzMax == 2) horzShift = 7; else if (horzMax == 4) horzShift = 6; /* Compute the right shift to be done after multiplying against the scaling factor. */ if (vertMax == 1) vertShift = 8; else if (vertMax == 2) vertShift = 7; else if (vertMax == 4) vertShift = 6; /* Adjust the scaling factors for our parameters. */ YHorzFactor <<= horzShift; YVertFactor <<= vertShift; CbHorzFactor <<= horzShift; CbVertFactor <<= vertShift; CrHorzFactor <<= horzShift; CrVertFactor <<= vertShift; /* Clear the Cb channel for potential grayscale. */ { signed char *e = CbBlock + JPEG_DCTSIZE2; do *-- e = 0; while (e > CbBlock); } /* Clear the Cr channel for potential grayscale. */ { signed char *e = CrBlock + JPEG_DCTSIZE2; do *-- e = 0; while (e > CrBlock); } /* Compute whether this satisfies the sped up 2:1:1 relationship. */ #if JPEG_FASTER_M211 if (YHorzFactor == 256 && YVertFactor == 256 && CbHorzFactor == 128 && CbVertFactor == 128 && CrHorzFactor == 128 && CrVertFactor == 128) M211 = 1; #endif /* JPEG_FASTER_M211 */ /* Clear the DC parameters. */ for (c = 0; c < JPEG_MAXIMUM_COMPONENTS; c ++) dcLast [c] = 0; /* Now run over each MCU horizontally, then vertically. */ for (by = 0; by < frame->height; by += vertMax * JPEG_DCTSIZE) { for (bx = 0; bx < frame->width; bx += horzMax * JPEG_DCTSIZE) { /* Read the components for the MCU. */ for (c = 0; c < scan->componentCount; c ++) { JPEG_ScanHeader_Component *sc = &scan->componentList [c]; JPEG_FrameHeader_Component *fc = frameComponents [c]; JPEG_HuffmanTable *dcTable, *acTable; JPEG_FIXED_TYPE *quant = decoder->quantTables [fc->quantTable]; int stride = fc->horzFactor * JPEG_DCTSIZE; signed char *chunk = 0; dcTable = &dcTableList [sc->dcTable == dcTableUse [1] ? 1 : 0]; acTable = &acTableList [sc->acTable == acTableUse [1] ? 1 : 0]; /* Compute the output chunk. */ if (fc->selector == 1) chunk = YBlock; else if (fc->selector == 2) chunk = CbBlock; else if (fc->selector == 3) chunk = CrBlock; for (cy = 0; cy < fc->vertFactor * JPEG_DCTSIZE; cy += JPEG_DCTSIZE) { for (cx = 0; cx < fc->horzFactor * JPEG_DCTSIZE; cx += JPEG_DCTSIZE) { int start = cx + cy * stride; JPEG_FIXED_TYPE zz [JPEG_DCTSIZE2]; /* Decode coefficients. */ DecodeCoefficients (&dcLast [c], zz, quant, dcTable, acTable, &data, &bits_left, &bits_data, ToZigZag); /* Perform an IDCT if this component will contribute to the image. */ if (chunk) { IDCT_Columns (zz); IDCT_Rows (zz, chunk + start, stride); } } } } /* Check that our block will be in-range; this should actually use clamping. */ if (bx + horzMax * JPEG_DCTSIZE > outWidth || by + vertMax * JPEG_DCTSIZE > outHeight) continue; /* Convert our block from YCbCr to the output. */ ConvertBlock (YBlock, CbBlock, CrBlock, YHorzFactor, YVertFactor, CbHorzFactor, CbVertFactor, CrHorzFactor, CrVertFactor, horzMax * JPEG_DCTSIZE, vertMax * JPEG_DCTSIZE, M211, out + bx + by * outWidth, outWidth, ComponentRange); /* Handle the restart interval. */ if (decoder->restartInterval && --restartInterval == 0) { restartInterval = decoder->restartInterval; JPEG_BITS_REWIND (); if (((data [0] << 8) | data [1]) == JPEG_Marker_EOI) goto finish; JPEG_Assert (data [0] == 0xFF && (data [1] >= 0xD0 && data [1] <= 0xD7)); for (c = 0; c < JPEG_MAXIMUM_COMPONENTS; c ++) dcLast [c] = 0; data += 2; } } } finish: /* Make sure we read an EOI marker. */ JPEG_BITS_REWIND (); JPEG_Assert (((data [0] << 8) | data [1]) == JPEG_Marker_EOI); data += 2; /* Clear up and return success. */ *dataBase = data; return 1; } /* Read an JPEG_Marker_SOFn marker into frame. This expects to start * processing immediately after the marker. */ int JPEG_FrameHeader_Read (JPEG_FrameHeader *frame, const unsigned char **dataBase, JPEG_Marker marker) { const unsigned char *data = *dataBase; unsigned short length = (data [0] << 8) | data [1]; int index; (void) length; JPEG_Assert (length >= 8); data += 2; /* Skip the length. */ frame->marker = marker; frame->encoding = (marker >= 0xFFC0 && marker <= 0xFFC7) ? 0 : 1; frame->differential = !(marker >= 0xFFC0 && marker <= 0xFFC3 && marker >= 0xFFC8 && marker <= 0xFFCB); frame->precision = *data ++; frame->height = (data [0] << 8) | data [1]; data += 2; frame->width = (data [0] << 8) | data [1]; data += 2; jpeg_width = frame->width; frame->componentCount = *data ++; JPEG_Assert (frame->precision == 8); JPEG_Assert (frame->componentCount <= JPEG_MAXIMUM_COMPONENTS); JPEG_Assert (length == 8 + 3 * frame->componentCount); /* Read the frame components. */ for (index = 0; index < frame->componentCount; index ++) { JPEG_FrameHeader_Component *c = &frame->componentList [index]; unsigned char pair; c->selector = *data ++; pair = *data ++; c->horzFactor = pair >> 4; c->vertFactor = pair & 15; c->quantTable = *data ++; JPEG_Assert (c->horzFactor == 1 || c->horzFactor == 2 || c->horzFactor == 4); JPEG_Assert (c->vertFactor == 1 || c->vertFactor == 2 || c->vertFactor == 4); JPEG_Assert (c->quantTable <= 3); } *dataBase = data; return 1; } /* Read a JPEG_Marker_SOS marker into scan. This expects to start processing * immediately after the marker. */ int JPEG_ScanHeader_Read (JPEG_ScanHeader *scan, const unsigned char **dataBase) { const unsigned char *data = *dataBase; unsigned short length = (data [0] << 8) | data [1]; JPEG_ScanHeader_Component *c, *cEnd; unsigned char pair; (void) length; JPEG_Assert (length >= 6); data += 2; /* Skip the length. */ scan->componentCount = *data ++; JPEG_Assert (scan->componentCount <= JPEG_MAXIMUM_COMPONENTS); JPEG_Assert (length == 6 + 2 * scan->componentCount); /* Read the scan components. */ for (c = scan->componentList, cEnd = c + scan->componentCount; c < cEnd; c ++) { c->selector = *data ++; pair = *data ++; c->dcTable = pair >> 4; c->acTable = pair & 15; JPEG_Assert (c->dcTable < 4); JPEG_Assert (c->acTable < 4); } /* Read the spectral and approximation footers, which are used for * progressive. */ scan->spectralStart = *data ++; scan->spectralEnd = *data ++; JPEG_Assert (scan->spectralStart <= 63); JPEG_Assert (scan->spectralEnd <= 63); pair = *data ++; scan->successiveApproximationBitPositionHigh = pair >> 4; scan->successiveApproximationBitPositionLow = pair & 15; JPEG_Assert (scan->successiveApproximationBitPositionHigh <= 13); JPEG_Assert (scan->successiveApproximationBitPositionLow <= 15); *dataBase = data; return 1; } /* Read all headers from the very start of the JFIF stream to right after the * SOS marker. */ int JPEG_Decoder_ReadHeaders (JPEG_Decoder *decoder, const unsigned char **dataBase) { const unsigned char *data = *dataBase; JPEG_Marker marker; int c; /* Initialize state and assure that this is a JFIF file. */ decoder->restartInterval = 0; JPEG_Assert (((data [0] << 8) | data [1]) == JPEG_Marker_SOI); data += 2; /* Start reading every marker as it comes in. */ while (1) { marker = (JPEG_Marker) ((data [0] << 8) | data [1]); data += 2; switch (marker) { /* This block is just skipped over. */ case JPEG_Marker_APP0: case JPEG_Marker_APP1: case JPEG_Marker_APP2: case JPEG_Marker_APP3: case JPEG_Marker_APP4: case JPEG_Marker_APP5: case JPEG_Marker_APP6: case JPEG_Marker_APP7: case JPEG_Marker_APP8: case JPEG_Marker_APP9: case JPEG_Marker_APP10: case JPEG_Marker_APP11: case JPEG_Marker_APP12: case JPEG_Marker_APP13: case JPEG_Marker_APP14: case JPEG_Marker_APP15: case JPEG_Marker_COM: data += (data [0] << 8) | data [1]; break; case JPEG_Marker_DHT: /* Define Huffman table. We just skip it for later decompression. */ { unsigned short length = (data [0] << 8) | data [1]; const unsigned char *end = data + length; JPEG_Assert (length >= 2); data += 2; while (data < end) { unsigned char pair, type, slot; pair = *data ++; type = pair >> 4; slot = pair & 15; JPEG_Assert (type == 0 || type == 1); JPEG_Assert (slot <= 15); if (type == 0) decoder->dcTables [slot] = data; else decoder->acTables [slot] = data; if (!JPEG_HuffmanTable_Skip (&data)) return 0; } JPEG_Assert (data == end); break; } case JPEG_Marker_DQT: /* Define quantization table. */ { unsigned short length = (data [0] << 8) | data [1]; const unsigned char *end = data + length; int col, row; JPEG_FIXED_TYPE *s; JPEG_Assert (length >= 2); data += 2; while (data < end) { int pair, slot; pair = *data ++; slot = pair & 15; JPEG_Assert (precision == 0); /* Only allow 8-bit. */ JPEG_Assert (slot < 4); /* Ensure the slot is in-range. */ JPEG_Assert (data + 64 <= end); /* Ensure it's the right size. */ s = decoder->quantTables [slot]; for (c = 0; c < JPEG_DCTSIZE2; c ++) s [c] = JPEG_ITOFIX (*data ++); /* Multiply against the AAN factors. */ for (row = 0; row < JPEG_DCTSIZE; row ++) for (col = 0; col < JPEG_DCTSIZE; col ++) { JPEG_FIXED_TYPE *item = &s [col + row * JPEG_DCTSIZE]; *item = JPEG_FIXMUL (*item, JPEG_AANScaleFactor [JPEG_ToZigZag [row * JPEG_DCTSIZE + col]]); } } JPEG_Assert (data == end); /* Ensure we've finished it. */ break; } case JPEG_Marker_DRI: /* Define restart interval. */ JPEG_Assert (((data [0] << 8) | data [1]) == 4); /* Check the length. */ decoder->restartInterval = (data [2] << 8) | data [3]; data += 4; break; case JPEG_Marker_SOF0: /* Start of Frame: Baseline Sequential Huffman. */ if (!JPEG_FrameHeader_Read (&decoder->frame, &data, marker)) return 0; break; case JPEG_Marker_SOS: /* Start of scan, immediately followed by the image. */ if (!JPEG_ScanHeader_Read (&decoder->scan, &data)) return 0; *dataBase = data; return 1; default: /* No known marker of this type. */ JPEG_Assert (0); break; } } } /* Perform the two steps necessary to decompress a JPEG image. * Nothing fancy about it. */ int JPEG_DecompressImage (const unsigned char *data, volatile JPEG_OUTPUT_TYPE *out, int outWidth, int outHeight) { JPEG_Decoder decoder; if (!JPEG_Decoder_ReadHeaders (&decoder, &data)) return 0; if (!JPEG_Decoder_ReadImage (&decoder, &data, out, outWidth, outHeight)) return 0; return 1; } /* Return whether this code is a JPEG file. Unfortunately it will incorrectly * match variants such as JPEG 2000 and JPEG-LS. A better function would * skip known markers until it reaches an unknown marker or a handled * SOFn. */ int JPEG_Match (const unsigned char *data, int length) { if (length == 0) return 0; if (data [0] != 0xFF) return 0; if (length == 1) return 1; if (data [1] != 0xD8) return 0; if (length == 2) return 1; return 1; if (data [2] != 0xFF) return 0; if (length == 3) return 1; if (data [3] < 0xC0 || data [3] > 0xCF) return 0; if (data [3] == 0xC0) return 1; return 0; }