001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019
020/*
021 * This package is based on the work done by Keiron Liddle, Aftex Software
022 * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
023 * great code.
024 */
025package org.apache.commons.compress.compressors.bzip2;
026
027import java.io.IOException;
028import java.io.InputStream;
029import java.nio.ByteOrder;
030import java.util.Arrays;
031
032import org.apache.commons.compress.compressors.CompressorInputStream;
033import org.apache.commons.compress.utils.BitInputStream;
034import org.apache.commons.compress.utils.CloseShieldFilterInputStream;
035
036/**
037 * An input stream that decompresses from the BZip2 format to be read as any other stream.
038 * 
039 * @NotThreadSafe
040 */
041public class BZip2CompressorInputStream extends CompressorInputStream implements
042                                                                          BZip2Constants {
043
044    /**
045     * Index of the last char in the block, so the block size == last + 1.
046     */
047    private int last;
048
049    /**
050     * Index in zptr[] of original string after sorting.
051     */
052    private int origPtr;
053
054    /**
055     * always: in the range 0 .. 9. The current block size is 100000 * this
056     * number.
057     */
058    private int blockSize100k;
059
060    private boolean blockRandomised;
061
062    private final CRC crc = new CRC();
063
064    private int nInUse;
065
066    private BitInputStream bin;
067    private final boolean decompressConcatenated;
068
069    private static final int EOF = 0;
070    private static final int START_BLOCK_STATE = 1;
071    private static final int RAND_PART_A_STATE = 2;
072    private static final int RAND_PART_B_STATE = 3;
073    private static final int RAND_PART_C_STATE = 4;
074    private static final int NO_RAND_PART_A_STATE = 5;
075    private static final int NO_RAND_PART_B_STATE = 6;
076    private static final int NO_RAND_PART_C_STATE = 7;
077
078    private int currentState = START_BLOCK_STATE;
079
080    private int storedBlockCRC, storedCombinedCRC;
081    private int computedBlockCRC, computedCombinedCRC;
082
083    // Variables used by setup* methods exclusively
084
085    private int su_count;
086    private int su_ch2;
087    private int su_chPrev;
088    private int su_i2;
089    private int su_j2;
090    private int su_rNToGo;
091    private int su_rTPos;
092    private int su_tPos;
093    private char su_z;
094
095    /**
096     * All memory intensive stuff. This field is initialized by initBlock().
097     */
098    private BZip2CompressorInputStream.Data data;
099
100    /**
101     * Constructs a new BZip2CompressorInputStream which decompresses bytes
102     * read from the specified stream. This doesn't suppprt decompressing
103     * concatenated .bz2 files.
104     * 
105     * @param in the InputStream from which this object should be created
106     * @throws IOException
107     *             if the stream content is malformed or an I/O error occurs.
108     * @throws NullPointerException
109     *             if {@code in == null}
110     */
111    public BZip2CompressorInputStream(final InputStream in) throws IOException {
112        this(in, false);
113    }
114
115    /**
116     * Constructs a new BZip2CompressorInputStream which decompresses bytes
117     * read from the specified stream.
118     *
119     * @param in the InputStream from which this object should be created
120     * @param decompressConcatenated
121     *                     if true, decompress until the end of the input;
122     *                     if false, stop after the first .bz2 stream and
123     *                     leave the input position to point to the next
124     *                     byte after the .bz2 stream
125     *
126     * @throws IOException
127     *             if {@code in == null}, the stream content is malformed, or an I/O error occurs.
128     */
129    public BZip2CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException {
130        this.bin = new BitInputStream(in == System.in ? new CloseShieldFilterInputStream(in) : in,
131            ByteOrder.BIG_ENDIAN);
132        this.decompressConcatenated = decompressConcatenated;
133
134        init(true);
135        initBlock();
136    }
137
138    @Override
139    public int read() throws IOException {
140        if (this.bin != null) {
141            final int r = read0();
142            count(r < 0 ? -1 : 1);
143            return r;
144        }
145        throw new IOException("stream closed");
146    }
147
148    /*
149     * (non-Javadoc)
150     * 
151     * @see java.io.InputStream#read(byte[], int, int)
152     */
153    @Override
154    public int read(final byte[] dest, final int offs, final int len)
155        throws IOException {
156        if (offs < 0) {
157            throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
158        }
159        if (len < 0) {
160            throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
161        }
162        if (offs + len > dest.length) {
163            throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
164                                                + len + ") > dest.length(" + dest.length + ").");
165        }
166        if (this.bin == null) {
167            throw new IOException("stream closed");
168        }
169        if (len == 0) {
170            return 0;
171        }
172
173        final int hi = offs + len;
174        int destOffs = offs;
175        int b;
176        while (destOffs < hi && ((b = read0()) >= 0)) {
177            dest[destOffs++] = (byte) b;
178            count(1);
179        }
180
181        final int c = (destOffs == offs) ? -1 : (destOffs - offs);
182        return c;
183    }
184
185    private void makeMaps() {
186        final boolean[] inUse = this.data.inUse;
187        final byte[] seqToUnseq = this.data.seqToUnseq;
188
189        int nInUseShadow = 0;
190
191        for (int i = 0; i < 256; i++) {
192            if (inUse[i]) {
193                seqToUnseq[nInUseShadow++] = (byte) i;
194            }
195        }
196
197        this.nInUse = nInUseShadow;
198    }
199
200    private int read0() throws IOException {
201        switch (currentState) {
202        case EOF:
203            return -1;
204
205        case START_BLOCK_STATE:
206            return setupBlock();
207
208        case RAND_PART_A_STATE:
209            throw new IllegalStateException();
210
211        case RAND_PART_B_STATE:
212            return setupRandPartB();
213
214        case RAND_PART_C_STATE:
215            return setupRandPartC();
216
217        case NO_RAND_PART_A_STATE:
218            throw new IllegalStateException();
219
220        case NO_RAND_PART_B_STATE:
221            return setupNoRandPartB();
222
223        case NO_RAND_PART_C_STATE:
224            return setupNoRandPartC();
225
226        default:
227            throw new IllegalStateException();
228        }
229    }
230
231    private int readNextByte(BitInputStream in) throws IOException {
232        long b = in.readBits(8);
233        return (int) b;
234    }
235
236    private boolean init(final boolean isFirstStream) throws IOException {
237        if (null == bin) {
238            throw new IOException("No InputStream");
239        }
240
241        if (!isFirstStream) {
242            bin.clearBitCache();
243        }
244
245        final int magic0 = readNextByte(this.bin);
246        if (magic0 == -1 && !isFirstStream) {
247            return false;
248        }
249        final int magic1 = readNextByte(this.bin);
250        final int magic2 = readNextByte(this.bin);
251
252        if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') {
253            throw new IOException(isFirstStream
254                    ? "Stream is not in the BZip2 format"
255                    : "Garbage after a valid BZip2 stream");
256        }
257
258        final int blockSize = readNextByte(this.bin);
259        if ((blockSize < '1') || (blockSize > '9')) {
260            throw new IOException("BZip2 block size is invalid");
261        }
262
263        this.blockSize100k = blockSize - '0';
264
265        this.computedCombinedCRC = 0;
266
267        return true;
268    }
269
270    private void initBlock() throws IOException {
271        BitInputStream bin = this.bin;
272        char magic0;
273        char magic1;
274        char magic2;
275        char magic3;
276        char magic4;
277        char magic5;
278
279        while (true) {
280            // Get the block magic bytes.
281            magic0 = bsGetUByte(bin);
282            magic1 = bsGetUByte(bin);
283            magic2 = bsGetUByte(bin);
284            magic3 = bsGetUByte(bin);
285            magic4 = bsGetUByte(bin);
286            magic5 = bsGetUByte(bin);
287
288            // If isn't end of stream magic, break out of the loop.
289            if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45
290                    || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) {
291                break;
292            }
293
294            // End of stream was reached. Check the combined CRC and
295            // advance to the next .bz2 stream if decoding concatenated
296            // streams.
297            if (complete()) {
298                return;
299            }
300        }
301
302        if (magic0 != 0x31 || // '1'
303            magic1 != 0x41 || // ')'
304            magic2 != 0x59 || // 'Y'
305            magic3 != 0x26 || // '&'
306            magic4 != 0x53 || // 'S'
307            magic5 != 0x59 // 'Y'
308            ) {
309            this.currentState = EOF;
310            throw new IOException("bad block header");
311        }
312        this.storedBlockCRC = bsGetInt(bin);
313        this.blockRandomised = bsR(bin, 1) == 1;
314
315        /**
316         * Allocate data here instead in constructor, so we do not allocate
317         * it if the input file is empty.
318         */
319        if (this.data == null) {
320            this.data = new Data(this.blockSize100k);
321        }
322
323        // currBlockNo++;
324        getAndMoveToFrontDecode();
325
326        this.crc.initialiseCRC();
327        this.currentState = START_BLOCK_STATE;
328    }
329
330    private void endBlock() throws IOException {
331        this.computedBlockCRC = this.crc.getFinalCRC();
332
333        // A bad CRC is considered a fatal error.
334        if (this.storedBlockCRC != this.computedBlockCRC) {
335            // make next blocks readable without error
336            // (repair feature, not yet documented, not tested)
337            this.computedCombinedCRC = (this.storedCombinedCRC << 1)
338                | (this.storedCombinedCRC >>> 31);
339            this.computedCombinedCRC ^= this.storedBlockCRC;
340
341            throw new IOException("BZip2 CRC error");
342        }
343
344        this.computedCombinedCRC = (this.computedCombinedCRC << 1)
345            | (this.computedCombinedCRC >>> 31);
346        this.computedCombinedCRC ^= this.computedBlockCRC;
347    }
348
349    private boolean complete() throws IOException {
350        this.storedCombinedCRC = bsGetInt(bin);
351        this.currentState = EOF;
352        this.data = null;
353
354        if (this.storedCombinedCRC != this.computedCombinedCRC) {
355            throw new IOException("BZip2 CRC error");
356        }
357
358        // Look for the next .bz2 stream if decompressing
359        // concatenated files.
360        return !decompressConcatenated || !init(false);
361    }
362
363    @Override
364    public void close() throws IOException {
365        final BitInputStream inShadow = this.bin;
366        if (inShadow != null) {
367            try {
368                inShadow.close();
369            } finally {
370                this.data = null;
371                this.bin = null;
372            }
373        }
374    }
375
376    /**
377     * read bits from the input stream
378     * @param n the number of bits to read, must not exceed 32?
379     * @return
380     * @throws IOException
381     */
382    private static int bsR(BitInputStream bin, final int n) throws IOException {
383        long thech = bin.readBits(n);
384        if (thech < 0) {
385            throw new IOException("unexpected end of stream");
386        }
387        return (int) thech;
388    }
389
390    private static boolean bsGetBit(BitInputStream bin) throws IOException {
391        return bsR(bin, 1) != 0;
392    }
393
394    private static char bsGetUByte(BitInputStream bin) throws IOException {
395        return (char) bsR(bin, 8);
396    }
397
398    private static int bsGetInt(BitInputStream bin) throws IOException {
399        return bsR(bin, 32);
400    }
401
402    /**
403     * Called by createHuffmanDecodingTables() exclusively.
404     */
405    private static void hbCreateDecodeTables(final int[] limit,
406                                             final int[] base, final int[] perm, final char[] length,
407                                             final int minLen, final int maxLen, final int alphaSize) {
408        for (int i = minLen, pp = 0; i <= maxLen; i++) {
409            for (int j = 0; j < alphaSize; j++) {
410                if (length[j] == i) {
411                    perm[pp++] = j;
412                }
413            }
414        }
415
416        for (int i = MAX_CODE_LEN; --i > 0;) {
417            base[i] = 0;
418            limit[i] = 0;
419        }
420
421        for (int i = 0; i < alphaSize; i++) {
422            base[length[i] + 1]++;
423        }
424
425        for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) {
426            b += base[i];
427            base[i] = b;
428        }
429
430        for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) {
431            final int nb = base[i + 1];
432            vec += nb - b;
433            b = nb;
434            limit[i] = vec - 1;
435            vec <<= 1;
436        }
437
438        for (int i = minLen + 1; i <= maxLen; i++) {
439            base[i] = ((limit[i - 1] + 1) << 1) - base[i];
440        }
441    }
442
443    private void recvDecodingTables() throws IOException {
444        final BitInputStream bin = this.bin;
445        final Data dataShadow = this.data;
446        final boolean[] inUse = dataShadow.inUse;
447        final byte[] pos = dataShadow.recvDecodingTables_pos;
448        final byte[] selector = dataShadow.selector;
449        final byte[] selectorMtf = dataShadow.selectorMtf;
450
451        int inUse16 = 0;
452
453        /* Receive the mapping table */
454        for (int i = 0; i < 16; i++) {
455            if (bsGetBit(bin)) {
456                inUse16 |= 1 << i;
457            }
458        }
459
460        Arrays.fill(inUse, false);
461        for (int i = 0; i < 16; i++) {
462            if ((inUse16 & (1 << i)) != 0) {
463                final int i16 = i << 4;
464                for (int j = 0; j < 16; j++) {
465                    if (bsGetBit(bin)) {
466                        inUse[i16 + j] = true;
467                    }
468                }
469            }
470        }
471
472        makeMaps();
473        final int alphaSize = this.nInUse + 2;
474        /* Now the selectors */
475        final int nGroups = bsR(bin, 3);
476        final int nSelectors = bsR(bin, 15);
477
478        for (int i = 0; i < nSelectors; i++) {
479            int j = 0;
480            while (bsGetBit(bin)) {
481                j++;
482            }
483            selectorMtf[i] = (byte) j;
484        }
485
486        /* Undo the MTF values for the selectors. */
487        for (int v = nGroups; --v >= 0;) {
488            pos[v] = (byte) v;
489        }
490
491        for (int i = 0; i < nSelectors; i++) {
492            int v = selectorMtf[i] & 0xff;
493            final byte tmp = pos[v];
494            while (v > 0) {
495                // nearly all times v is zero, 4 in most other cases
496                pos[v] = pos[v - 1];
497                v--;
498            }
499            pos[0] = tmp;
500            selector[i] = tmp;
501        }
502
503        final char[][] len = dataShadow.temp_charArray2d;
504
505        /* Now the coding tables */
506        for (int t = 0; t < nGroups; t++) {
507            int curr = bsR(bin, 5);
508            final char[] len_t = len[t];
509            for (int i = 0; i < alphaSize; i++) {
510                while (bsGetBit(bin)) {
511                    curr += bsGetBit(bin) ? -1 : 1;
512                }
513                len_t[i] = (char) curr;
514            }
515        }
516
517        // finally create the Huffman tables
518        createHuffmanDecodingTables(alphaSize, nGroups);
519    }
520
521    /**
522     * Called by recvDecodingTables() exclusively.
523     */
524    private void createHuffmanDecodingTables(final int alphaSize,
525                                             final int nGroups) {
526        final Data dataShadow = this.data;
527        final char[][] len = dataShadow.temp_charArray2d;
528        final int[] minLens = dataShadow.minLens;
529        final int[][] limit = dataShadow.limit;
530        final int[][] base = dataShadow.base;
531        final int[][] perm = dataShadow.perm;
532
533        for (int t = 0; t < nGroups; t++) {
534            int minLen = 32;
535            int maxLen = 0;
536            final char[] len_t = len[t];
537            for (int i = alphaSize; --i >= 0;) {
538                final char lent = len_t[i];
539                if (lent > maxLen) {
540                    maxLen = lent;
541                }
542                if (lent < minLen) {
543                    minLen = lent;
544                }
545            }
546            hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen,
547                                 maxLen, alphaSize);
548            minLens[t] = minLen;
549        }
550    }
551
552    private void getAndMoveToFrontDecode() throws IOException {
553        final BitInputStream bin = this.bin;
554        this.origPtr = bsR(bin, 24);
555        recvDecodingTables();
556
557        final Data dataShadow = this.data;
558        final byte[] ll8 = dataShadow.ll8;
559        final int[] unzftab = dataShadow.unzftab;
560        final byte[] selector = dataShadow.selector;
561        final byte[] seqToUnseq = dataShadow.seqToUnseq;
562        final char[] yy = dataShadow.getAndMoveToFrontDecode_yy;
563        final int[] minLens = dataShadow.minLens;
564        final int[][] limit = dataShadow.limit;
565        final int[][] base = dataShadow.base;
566        final int[][] perm = dataShadow.perm;
567        final int limitLast = this.blockSize100k * 100000;
568
569        /*
570         * Setting up the unzftab entries here is not strictly necessary, but it
571         * does save having to do it later in a separate pass, and so saves a
572         * block's worth of cache misses.
573         */
574        for (int i = 256; --i >= 0;) {
575            yy[i] = (char) i;
576            unzftab[i] = 0;
577        }
578
579        int groupNo = 0;
580        int groupPos = G_SIZE - 1;
581        final int eob = this.nInUse + 1;
582        int nextSym = getAndMoveToFrontDecode0(0);
583        int lastShadow = -1;
584        int zt = selector[groupNo] & 0xff;
585        int[] base_zt = base[zt];
586        int[] limit_zt = limit[zt];
587        int[] perm_zt = perm[zt];
588        int minLens_zt = minLens[zt];
589
590        while (nextSym != eob) {
591            if ((nextSym == RUNA) || (nextSym == RUNB)) {
592                int s = -1;
593
594                for (int n = 1; true; n <<= 1) {
595                    if (nextSym == RUNA) {
596                        s += n;
597                    } else if (nextSym == RUNB) {
598                        s += n << 1;
599                    } else {
600                        break;
601                    }
602
603                    if (groupPos == 0) {
604                        groupPos = G_SIZE - 1;
605                        zt = selector[++groupNo] & 0xff;
606                        base_zt = base[zt];
607                        limit_zt = limit[zt];
608                        perm_zt = perm[zt];
609                        minLens_zt = minLens[zt];
610                    } else {
611                        groupPos--;
612                    }
613
614                    int zn = minLens_zt;
615                    int zvec = bsR(bin, zn);
616                    while(zvec > limit_zt[zn]) {
617                        zn++;
618                        zvec = (zvec << 1) | bsR(bin, 1);
619                    }
620                    nextSym = perm_zt[zvec - base_zt[zn]];
621                }
622
623                final byte ch = seqToUnseq[yy[0]];
624                unzftab[ch & 0xff] += s + 1;
625
626                while (s-- >= 0) {
627                    ll8[++lastShadow] = ch;
628                }
629
630                if (lastShadow >= limitLast) {
631                    throw new IOException("block overrun");
632                }
633            } else {
634                if (++lastShadow >= limitLast) {
635                    throw new IOException("block overrun");
636                }
637
638                final char tmp = yy[nextSym - 1];
639                unzftab[seqToUnseq[tmp] & 0xff]++;
640                ll8[lastShadow] = seqToUnseq[tmp];
641
642                /*
643                 * This loop is hammered during decompression, hence avoid
644                 * native method call overhead of System.arraycopy for very
645                 * small ranges to copy.
646                 */
647                if (nextSym <= 16) {
648                    for (int j = nextSym - 1; j > 0;) {
649                        yy[j] = yy[--j];
650                    }
651                } else {
652                    System.arraycopy(yy, 0, yy, 1, nextSym - 1);
653                }
654
655                yy[0] = tmp;
656
657                if (groupPos == 0) {
658                    groupPos = G_SIZE - 1;
659                    zt = selector[++groupNo] & 0xff;
660                    base_zt = base[zt];
661                    limit_zt = limit[zt];
662                    perm_zt = perm[zt];
663                    minLens_zt = minLens[zt];
664                } else {
665                    groupPos--;
666                }
667
668                int zn = minLens_zt;
669                int zvec = bsR(bin, zn);
670                while(zvec > limit_zt[zn]) {
671                    zn++;
672                    zvec = (zvec << 1) | bsR(bin, 1);
673                }
674                nextSym = perm_zt[zvec - base_zt[zn]];
675            }
676        }
677
678        this.last = lastShadow;
679    }
680
681    private int getAndMoveToFrontDecode0(final int groupNo) throws IOException {
682        final Data dataShadow = this.data;
683        final int zt = dataShadow.selector[groupNo] & 0xff;
684        final int[] limit_zt = dataShadow.limit[zt];
685        int zn = dataShadow.minLens[zt];
686        int zvec = bsR(bin, zn);
687        while (zvec > limit_zt[zn]) {
688            zn++;
689            zvec = (zvec << 1) | bsR(bin, 1);
690        }
691
692        return dataShadow.perm[zt][zvec - dataShadow.base[zt][zn]];
693    }
694
695    private int setupBlock() throws IOException {
696        if (currentState == EOF || this.data == null) {
697            return -1;
698        }
699
700        final int[] cftab = this.data.cftab;
701        final int[] tt = this.data.initTT(this.last + 1);
702        final byte[] ll8 = this.data.ll8;
703        cftab[0] = 0;
704        System.arraycopy(this.data.unzftab, 0, cftab, 1, 256);
705
706        for (int i = 1, c = cftab[0]; i <= 256; i++) {
707            c += cftab[i];
708            cftab[i] = c;
709        }
710
711        for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
712            tt[cftab[ll8[i] & 0xff]++] = i;
713        }
714
715        if ((this.origPtr < 0) || (this.origPtr >= tt.length)) {
716            throw new IOException("stream corrupted");
717        }
718
719        this.su_tPos = tt[this.origPtr];
720        this.su_count = 0;
721        this.su_i2 = 0;
722        this.su_ch2 = 256; /* not a char and not EOF */
723
724        if (this.blockRandomised) {
725            this.su_rNToGo = 0;
726            this.su_rTPos = 0;
727            return setupRandPartA();
728        }
729        return setupNoRandPartA();
730    }
731
732    private int setupRandPartA() throws IOException {
733        if (this.su_i2 <= this.last) {
734            this.su_chPrev = this.su_ch2;
735            int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
736            this.su_tPos = this.data.tt[this.su_tPos];
737            if (this.su_rNToGo == 0) {
738                this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
739                if (++this.su_rTPos == 512) {
740                    this.su_rTPos = 0;
741                }
742            } else {
743                this.su_rNToGo--;
744            }
745            this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0;
746            this.su_i2++;
747            this.currentState = RAND_PART_B_STATE;
748            this.crc.updateCRC(su_ch2Shadow);
749            return su_ch2Shadow;
750        }
751        endBlock();
752        initBlock();
753        return setupBlock();
754    }
755
756    private int setupNoRandPartA() throws IOException {
757        if (this.su_i2 <= this.last) {
758            this.su_chPrev = this.su_ch2;
759            final int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
760            this.su_ch2 = su_ch2Shadow;
761            this.su_tPos = this.data.tt[this.su_tPos];
762            this.su_i2++;
763            this.currentState = NO_RAND_PART_B_STATE;
764            this.crc.updateCRC(su_ch2Shadow);
765            return su_ch2Shadow;
766        }
767        this.currentState = NO_RAND_PART_A_STATE;
768        endBlock();
769        initBlock();
770        return setupBlock();
771    }
772
773    private int setupRandPartB() throws IOException {
774        if (this.su_ch2 != this.su_chPrev) {
775            this.currentState = RAND_PART_A_STATE;
776            this.su_count = 1;
777            return setupRandPartA();
778        } else if (++this.su_count >= 4) {
779            this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
780            this.su_tPos = this.data.tt[this.su_tPos];
781            if (this.su_rNToGo == 0) {
782                this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
783                if (++this.su_rTPos == 512) {
784                    this.su_rTPos = 0;
785                }
786            } else {
787                this.su_rNToGo--;
788            }
789            this.su_j2 = 0;
790            this.currentState = RAND_PART_C_STATE;
791            if (this.su_rNToGo == 1) {
792                this.su_z ^= 1;
793            }
794            return setupRandPartC();
795        } else {
796            this.currentState = RAND_PART_A_STATE;
797            return setupRandPartA();
798        }
799    }
800
801    private int setupRandPartC() throws IOException {
802        if (this.su_j2 < this.su_z) {
803            this.crc.updateCRC(this.su_ch2);
804            this.su_j2++;
805            return this.su_ch2;
806        }
807        this.currentState = RAND_PART_A_STATE;
808        this.su_i2++;
809        this.su_count = 0;
810        return setupRandPartA();
811    }
812
813    private int setupNoRandPartB() throws IOException {
814        if (this.su_ch2 != this.su_chPrev) {
815            this.su_count = 1;
816            return setupNoRandPartA();
817        } else if (++this.su_count >= 4) {
818            this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
819            this.su_tPos = this.data.tt[this.su_tPos];
820            this.su_j2 = 0;
821            return setupNoRandPartC();
822        } else {
823            return setupNoRandPartA();
824        }
825    }
826
827    private int setupNoRandPartC() throws IOException {
828        if (this.su_j2 < this.su_z) {
829            final int su_ch2Shadow = this.su_ch2;
830            this.crc.updateCRC(su_ch2Shadow);
831            this.su_j2++;
832            this.currentState = NO_RAND_PART_C_STATE;
833            return su_ch2Shadow;
834        }
835        this.su_i2++;
836        this.su_count = 0;
837        return setupNoRandPartA();
838    }
839
840    private static final class Data {
841
842        // (with blockSize 900k)
843        final boolean[] inUse = new boolean[256]; // 256 byte
844
845        final byte[] seqToUnseq = new byte[256]; // 256 byte
846        final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
847        final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
848
849        /**
850         * Freq table collected to save a pass over the data during
851         * decompression.
852         */
853        final int[] unzftab = new int[256]; // 1024 byte
854
855        final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
856        final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
857        final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
858        final int[] minLens = new int[N_GROUPS]; // 24 byte
859
860        final int[] cftab = new int[257]; // 1028 byte
861        final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte
862        final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096
863        // byte
864        final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte
865        // ---------------
866        // 60798 byte
867
868        int[] tt; // 3600000 byte
869        byte[] ll8; // 900000 byte
870
871        // ---------------
872        // 4560782 byte
873        // ===============
874
875        Data(final int blockSize100k) {
876            this.ll8 = new byte[blockSize100k * BZip2Constants.BASEBLOCKSIZE];
877        }
878
879        /**
880         * Initializes the {@link #tt} array.
881         * 
882         * This method is called when the required length of the array is known.
883         * I don't initialize it at construction time to avoid unneccessary
884         * memory allocation when compressing small files.
885         */
886        int[] initTT(final int length) {
887            int[] ttShadow = this.tt;
888
889            // tt.length should always be >= length, but theoretically
890            // it can happen, if the compressor mixed small and large
891            // blocks. Normally only the last block will be smaller
892            // than others.
893            if ((ttShadow == null) || (ttShadow.length < length)) {
894                this.tt = ttShadow = new int[length];
895            }
896
897            return ttShadow;
898        }
899
900    }
901
902    /**
903     * Checks if the signature matches what is expected for a bzip2 file.
904     * 
905     * @param signature
906     *            the bytes to check
907     * @param length
908     *            the number of bytes to check
909     * @return true, if this stream is a bzip2 compressed stream, false otherwise
910     * 
911     * @since 1.1
912     */
913    public static boolean matches(final byte[] signature, final int length) {
914
915        if (length < 3) {
916            return false;
917        }
918
919        if (signature[0] != 'B') {
920            return false;
921        }
922
923        if (signature[1] != 'Z') {
924            return false;
925        }
926
927        if (signature[2] != 'h') {
928            return false;
929        }
930
931        return true;
932    }
933}