001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018
019/*
020 * This package is based on the work done by Timothy Gerard Endres
021 * (time@ice.com) to whom the Ant project is very grateful for his great code.
022 */
023
024package org.apache.commons.compress.archivers.tar;
025
026import java.io.ByteArrayOutputStream;
027import java.io.FileInputStream;
028import java.io.IOException;
029import java.io.InputStream;
030import java.util.ArrayList;
031import java.util.Arrays;
032import java.util.HashMap;
033import java.util.List;
034import java.util.Map;
035
036import org.apache.commons.compress.archivers.ArchiveEntry;
037import org.apache.commons.compress.archivers.ArchiveInputStream;
038import org.apache.commons.compress.archivers.zip.ZipEncoding;
039import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
040import org.apache.commons.compress.utils.ArchiveUtils;
041import org.apache.commons.compress.utils.BoundedInputStream;
042import org.apache.commons.compress.utils.IOUtils;
043
044/**
045 * The TarInputStream reads a UNIX tar archive as an InputStream.
046 * methods are provided to position at each successive entry in
047 * the archive, and the read each entry as a normal input stream
048 * using read().
049 * @NotThreadSafe
050 */
051public class TarArchiveInputStream extends ArchiveInputStream {
052
053    private static final int SMALL_BUFFER_SIZE = 256;
054
055    /**
056     * Checks if the signature matches what is expected for a tar file.
057     *
058     * @param signature
059     *            the bytes to check
060     * @param length
061     *            the number of bytes to check
062     * @return true, if this stream is a tar archive stream, false otherwise
063     */
064    public static boolean matches(final byte[] signature, final int length) {
065        if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) {
066            return false;
067        }
068
069        if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX,
070                signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
071            &&
072            ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX,
073                signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
074                ){
075            return true;
076        }
077        if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU,
078                signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
079            &&
080            (
081             ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE,
082                signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
083            ||
084            ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO,
085                signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
086            )
087                ){
088            return true;
089        }
090        // COMPRESS-107 - recognise Ant tar files
091        return ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT,
092                signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
093                &&
094                ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT,
095                        signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN);
096    }
097
098    private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE];
099
100    /** The size the TAR header */
101    private final int recordSize;
102
103    /** The buffer to store the TAR header **/
104    private final byte[] recordBuffer;
105
106    /** The size of a block */
107    private final int blockSize;
108
109    /** True if file has hit EOF */
110    private boolean hasHitEOF;
111
112    /** Size of the current entry */
113    private long entrySize;
114
115    /** How far into the entry the stream is at */
116    private long entryOffset;
117
118    /** An input stream to read from */
119    private final InputStream inputStream;
120
121    /** Input streams for reading sparse entries **/
122    private List<InputStream> sparseInputStreams;
123
124    /** the index of current input stream being read when reading sparse entries */
125    private int currentSparseInputStreamIndex;
126
127    /** The meta-data about the current entry */
128    private TarArchiveEntry currEntry;
129
130    /** The encoding of the file */
131    private final ZipEncoding zipEncoding;
132
133    // the provided encoding (for unit tests)
134    final String encoding;
135
136    // the global PAX header
137    private Map<String, String> globalPaxHeaders = new HashMap<>();
138
139    // the global sparse headers, this is only used in PAX Format 0.X
140    private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>();
141
142    private final boolean lenient;
143
144    /**
145     * Constructor for TarInputStream.
146     * @param is the input stream to use
147     */
148    public TarArchiveInputStream(final InputStream is) {
149        this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE);
150    }
151
152    /**
153     * Constructor for TarInputStream.
154     * @param is the input stream to use
155     * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be
156     * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an
157     * exception instead.
158     * @since 1.19
159     */
160    public TarArchiveInputStream(final InputStream is, final boolean lenient) {
161        this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
162    }
163
164    /**
165     * Constructor for TarInputStream.
166     * @param is the input stream to use
167     * @param blockSize the block size to use
168     */
169    public TarArchiveInputStream(final InputStream is, final int blockSize) {
170        this(is, blockSize, TarConstants.DEFAULT_RCDSIZE);
171    }
172
173    /**
174     * Constructor for TarInputStream.
175     * @param is the input stream to use
176     * @param blockSize the block size to use
177     * @param recordSize the record size to use
178     */
179    public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize) {
180        this(is, blockSize, recordSize, null);
181    }
182
183    /**
184     * Constructor for TarInputStream.
185     * @param is the input stream to use
186     * @param blockSize the block size to use
187     * @param recordSize the record size to use
188     * @param encoding name of the encoding to use for file names
189     * @since 1.4
190     */
191    public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize,
192                                 final String encoding) {
193        this(is, blockSize, recordSize, encoding, false);
194    }
195
196    /**
197     * Constructor for TarInputStream.
198     * @param is the input stream to use
199     * @param blockSize the block size to use
200     * @param recordSize the record size to use
201     * @param encoding name of the encoding to use for file names
202     * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be
203     * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an
204     * exception instead.
205     * @since 1.19
206     */
207    public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize,
208                                 final String encoding, final boolean lenient) {
209        this.inputStream = is;
210        this.hasHitEOF = false;
211        this.encoding = encoding;
212        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
213        this.recordSize = recordSize;
214        this.recordBuffer = new byte[recordSize];
215        this.blockSize = blockSize;
216        this.lenient = lenient;
217    }
218
219    /**
220     * Constructor for TarInputStream.
221     * @param is the input stream to use
222     * @param blockSize the block size to use
223     * @param encoding name of the encoding to use for file names
224     * @since 1.4
225     */
226    public TarArchiveInputStream(final InputStream is, final int blockSize,
227                                 final String encoding) {
228        this(is, blockSize, TarConstants.DEFAULT_RCDSIZE, encoding);
229    }
230
231    /**
232     * Constructor for TarInputStream.
233     * @param is the input stream to use
234     * @param encoding name of the encoding to use for file names
235     * @since 1.4
236     */
237    public TarArchiveInputStream(final InputStream is, final String encoding) {
238        this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE,
239             encoding);
240    }
241
242    private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders)
243        throws IOException {
244        currEntry.updateEntryFromPaxHeaders(headers);
245        currEntry.setSparseHeaders(sparseHeaders);
246    }
247
248    /**
249     * Get the available data that can be read from the current
250     * entry in the archive. This does not indicate how much data
251     * is left in the entire archive, only in the current entry.
252     * This value is determined from the entry's size header field
253     * and the amount of data already read from the current entry.
254     * Integer.MAX_VALUE is returned in case more than Integer.MAX_VALUE
255     * bytes are left in the current entry in the archive.
256     *
257     * @return The number of available bytes for the current entry.
258     * @throws IOException for signature
259     */
260    @Override
261    public int available() throws IOException {
262        if (isDirectory()) {
263            return 0;
264        }
265
266        if (currEntry.getRealSize() - entryOffset > Integer.MAX_VALUE) {
267            return Integer.MAX_VALUE;
268        }
269        return (int) (currEntry.getRealSize() - entryOffset);
270    }
271
272
273    /**
274     * Build the input streams consisting of all-zero input streams and non-zero input streams.
275     * When reading from the non-zero input streams, the data is actually read from the original input stream.
276     * The size of each input stream is introduced by the sparse headers.
277     *
278     * NOTE : Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the
279     *        0 size input streams because they are meaningless.
280     */
281    private void buildSparseInputStreams() throws IOException {
282        currentSparseInputStreamIndex = -1;
283        sparseInputStreams = new ArrayList<>();
284
285        final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders();
286
287        // Stream doesn't need to be closed at all as it doesn't use any resources
288        final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); //NOSONAR
289        // logical offset into the extracted entry
290        long offset = 0;
291        for (final TarArchiveStructSparse sparseHeader : sparseHeaders) {
292            final long zeroBlockSize = sparseHeader.getOffset() - offset;
293            if (zeroBlockSize < 0) {
294                // sparse header says to move backwards inside of the extracted entry
295                throw new IOException("Corrupted struct sparse detected");
296            }
297
298            // only store the zero block if it is not empty
299            if (zeroBlockSize > 0) {
300                sparseInputStreams.add(new BoundedInputStream(zeroInputStream, sparseHeader.getOffset() - offset));
301            }
302
303            // only store the input streams with non-zero size
304            if (sparseHeader.getNumbytes() > 0) {
305                sparseInputStreams.add(new BoundedInputStream(inputStream, sparseHeader.getNumbytes()));
306            }
307
308            offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
309        }
310
311        if (!sparseInputStreams.isEmpty()) {
312            currentSparseInputStreamIndex = 0;
313        }
314    }
315
316    /**
317     * Whether this class is able to read the given entry.
318     *
319     * @return The implementation will return true if the {@link ArchiveEntry} is an instance of {@link TarArchiveEntry}
320     */
321    @Override
322    public boolean canReadEntryData(final ArchiveEntry ae) {
323        return ae instanceof TarArchiveEntry;
324    }
325
326    /**
327     * Closes this stream. Calls the TarBuffer's close() method.
328     * @throws IOException on error
329     */
330    @Override
331    public void close() throws IOException {
332        // Close all the input streams in sparseInputStreams
333        if (sparseInputStreams != null) {
334            for (final InputStream inputStream : sparseInputStreams) {
335                inputStream.close();
336            }
337        }
338
339        inputStream.close();
340    }
341
342    /**
343     * This method is invoked once the end of the archive is hit, it
344     * tries to consume the remaining bytes under the assumption that
345     * the tool creating this archive has padded the last block.
346     */
347    private void consumeRemainderOfLastBlock() throws IOException {
348        final long bytesReadOfLastBlock = getBytesRead() % blockSize;
349        if (bytesReadOfLastBlock > 0) {
350            final long skipped = IOUtils.skip(inputStream, blockSize - bytesReadOfLastBlock);
351            count(skipped);
352        }
353    }
354
355    /**
356     * For FileInputStream, the skip always return the number you input, so we
357     * need the available bytes to determine how many bytes are actually skipped
358     *
359     * @param available available bytes returned by inputStream.available()
360     * @param skipped   skipped bytes returned by inputStream.skip()
361     * @param expected  bytes expected to skip
362     * @return number of bytes actually skipped
363     * @throws IOException if a truncated tar archive is detected
364     */
365    private long getActuallySkipped(final long available, final long skipped, final long expected) throws IOException {
366        long actuallySkipped = skipped;
367        if (inputStream instanceof FileInputStream) {
368            actuallySkipped = Math.min(skipped, available);
369        }
370
371        if (actuallySkipped != expected) {
372            throw new IOException("Truncated TAR archive");
373        }
374
375        return actuallySkipped;
376    }
377
378    /**
379     * Get the current TAR Archive Entry that this input stream is processing
380     *
381     * @return The current Archive Entry
382     */
383    public TarArchiveEntry getCurrentEntry() {
384        return currEntry;
385    }
386
387    /**
388     * Get the next entry in this tar archive as longname data.
389     *
390     * @return The next entry in the archive as longname data, or null.
391     * @throws IOException on error
392     */
393    protected byte[] getLongNameData() throws IOException {
394        // read in the name
395        final ByteArrayOutputStream longName = new ByteArrayOutputStream();
396        int length = 0;
397        while ((length = read(smallBuf)) >= 0) {
398            longName.write(smallBuf, 0, length);
399        }
400        getNextEntry();
401        if (currEntry == null) {
402            // Bugzilla: 40334
403            // Malformed tar file - long entry name not followed by entry
404            return null;
405        }
406        byte[] longNameData = longName.toByteArray();
407        // remove trailing null terminator(s)
408        length = longNameData.length;
409        while (length > 0 && longNameData[length - 1] == 0) {
410            --length;
411        }
412        if (length != longNameData.length) {
413            longNameData = Arrays.copyOf(longNameData, length);
414        }
415        return longNameData;
416    }
417
418    /**
419     * Returns the next Archive Entry in this Stream.
420     *
421     * @return the next entry,
422     *         or {@code null} if there are no more entries
423     * @throws IOException if the next entry could not be read
424     */
425    @Override
426    public ArchiveEntry getNextEntry() throws IOException {
427        return getNextTarEntry();
428    }
429
430    /**
431     * Get the next entry in this tar archive. This will skip
432     * over any remaining data in the current entry, if there
433     * is one, and place the input stream at the header of the
434     * next entry, and read the header and instantiate a new
435     * TarEntry from the header bytes and return that entry.
436     * If there are no more entries in the archive, null will
437     * be returned to indicate that the end of the archive has
438     * been reached.
439     *
440     * @return The next TarEntry in the archive, or null.
441     * @throws IOException on error
442     */
443    public TarArchiveEntry getNextTarEntry() throws IOException {
444        if (isAtEOF()) {
445            return null;
446        }
447
448        if (currEntry != null) {
449            /* Skip will only go to the end of the current entry */
450            IOUtils.skip(this, Long.MAX_VALUE);
451
452            /* skip to the end of the last record */
453            skipRecordPadding();
454        }
455
456        final byte[] headerBuf = getRecord();
457
458        if (headerBuf == null) {
459            /* hit EOF */
460            currEntry = null;
461            return null;
462        }
463
464        try {
465            currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf, zipEncoding, lenient);
466        } catch (final IllegalArgumentException e) {
467            throw new IOException("Error detected parsing the header", e);
468        }
469
470        entryOffset = 0;
471        entrySize = currEntry.getSize();
472
473        if (currEntry.isGNULongLinkEntry()) {
474            final byte[] longLinkData = getLongNameData();
475            if (longLinkData == null) {
476                // Bugzilla: 40334
477                // Malformed tar file - long link entry name not followed by
478                // entry
479                return null;
480            }
481            currEntry.setLinkName(zipEncoding.decode(longLinkData));
482        }
483
484        if (currEntry.isGNULongNameEntry()) {
485            final byte[] longNameData = getLongNameData();
486            if (longNameData == null) {
487                // Bugzilla: 40334
488                // Malformed tar file - long entry name not followed by
489                // entry
490                return null;
491            }
492
493            // COMPRESS-509 : the name of directories should end with '/'
494            final String name = zipEncoding.decode(longNameData);
495            currEntry.setName(name);
496            if (currEntry.isDirectory() && !name.endsWith("/")) {
497                currEntry.setName(name + "/");
498            }
499        }
500
501        if (currEntry.isGlobalPaxHeader()){ // Process Global Pax headers
502            readGlobalPaxHeaders();
503        }
504
505        try {
506            if (currEntry.isPaxHeader()){ // Process Pax headers
507                paxHeaders();
508            } else if (!globalPaxHeaders.isEmpty()) {
509                applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders);
510            }
511        } catch (final NumberFormatException e) {
512            throw new IOException("Error detected parsing the pax header", e);
513        }
514
515        if (currEntry.isOldGNUSparse()){ // Process sparse files
516            readOldGNUSparse();
517        }
518
519        // If the size of the next element in the archive has changed
520        // due to a new size being reported in the posix header
521        // information, we update entrySize here so that it contains
522        // the correct value.
523        entrySize = currEntry.getSize();
524
525        return currEntry;
526    }
527
528    /**
529     * Get the next record in this tar archive. This will skip
530     * over any remaining data in the current entry, if there
531     * is one, and place the input stream at the header of the
532     * next entry.
533     *
534     * <p>If there are no more entries in the archive, null will be
535     * returned to indicate that the end of the archive has been
536     * reached.  At the same time the {@code hasHitEOF} marker will be
537     * set to true.</p>
538     *
539     * @return The next header in the archive, or null.
540     * @throws IOException on error
541     */
542    private byte[] getRecord() throws IOException {
543        byte[] headerBuf = readRecord();
544        setAtEOF(isEOFRecord(headerBuf));
545        if (isAtEOF() && headerBuf != null) {
546            tryToConsumeSecondEOFRecord();
547            consumeRemainderOfLastBlock();
548            headerBuf = null;
549        }
550        return headerBuf;
551    }
552
553    /**
554     * Get the record size being used by this stream's buffer.
555     *
556     * @return The TarBuffer record size.
557     */
558    public int getRecordSize() {
559        return recordSize;
560    }
561
562    protected final boolean isAtEOF() {
563        return hasHitEOF;
564    }
565
566    private boolean isDirectory() {
567        return currEntry != null && currEntry.isDirectory();
568    }
569
570    /**
571     * Determine if an archive record indicate End of Archive. End of
572     * archive is indicated by a record that consists entirely of null bytes.
573     *
574     * @param record The record data to check.
575     * @return true if the record data is an End of Archive
576     */
577    protected boolean isEOFRecord(final byte[] record) {
578        return record == null || ArchiveUtils.isArrayZero(record, recordSize);
579    }
580
581    /**
582     * Since we do not support marking just yet, we do nothing.
583     *
584     * @param markLimit The limit to mark.
585     */
586    @Override
587    public synchronized void mark(final int markLimit) {
588    }
589
590    /**
591     * Since we do not support marking just yet, we return false.
592     *
593     * @return False.
594     */
595    @Override
596    public boolean markSupported() {
597        return false;
598    }
599
600    /**
601     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes)
602     * may appear multi times, and they look like:
603     *
604     * GNU.sparse.size=size
605     * GNU.sparse.numblocks=numblocks
606     * repeat numblocks times
607     *   GNU.sparse.offset=offset
608     *   GNU.sparse.numbytes=numbytes
609     * end repeat
610     *
611     *
612     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
613     *
614     * GNU.sparse.map
615     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
616     *
617     *
618     * For PAX Format 1.X:
619     * The sparse map itself is stored in the file data block, preceding the actual file data.
620     * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary.
621     * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers
622     * giving the offset and size of the data block it describes.
623     * @throws IOException
624     */
625    private void paxHeaders() throws IOException {
626        List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
627        final Map<String, String> headers = TarUtils.parsePaxHeaders(this, sparseHeaders, globalPaxHeaders, entrySize);
628
629        // for 0.1 PAX Headers
630        if (headers.containsKey(TarGnuSparseKeys.MAP)) {
631            sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get(TarGnuSparseKeys.MAP)));
632        }
633        getNextEntry(); // Get the actual file entry
634        if (currEntry == null) {
635            throw new IOException("premature end of tar archive. Didn't find any entry after PAX header.");
636        }
637        applyPaxHeadersToCurrentEntry(headers, sparseHeaders);
638
639        // for 1.0 PAX Format, the sparse map is stored in the file data block
640        if (currEntry.isPaxGNU1XSparse()) {
641            sparseHeaders = TarUtils.parsePAX1XSparseHeaders(inputStream, recordSize);
642            currEntry.setSparseHeaders(sparseHeaders);
643        }
644
645        // sparse headers are all done reading, we need to build
646        // sparse input streams using these sparse headers
647        buildSparseInputStreams();
648    }
649
650    /**
651     * Reads bytes from the current tar archive entry.
652     *
653     * This method is aware of the boundaries of the current
654     * entry in the archive and will deal with them as if they
655     * were this stream's start and EOF.
656     *
657     * @param buf The buffer into which to place bytes read.
658     * @param offset The offset at which to place bytes read.
659     * @param numToRead The number of bytes to read.
660     * @return The number of bytes read, or -1 at EOF.
661     * @throws IOException on error
662     */
663    @Override
664    public int read(final byte[] buf, final int offset, int numToRead) throws IOException {
665        if (numToRead == 0) {
666            return 0;
667        }
668        int totalRead = 0;
669
670        if (isAtEOF() || isDirectory()) {
671            return -1;
672        }
673
674        if (currEntry == null) {
675            throw new IllegalStateException("No current tar entry");
676        }
677
678        if (entryOffset >= currEntry.getRealSize()) {
679            return -1;
680        }
681
682        numToRead = Math.min(numToRead, available());
683
684        if (currEntry.isSparse()) {
685            // for sparse entries, we need to read them in another way
686            totalRead = readSparse(buf, offset, numToRead);
687        } else {
688            totalRead = inputStream.read(buf, offset, numToRead);
689        }
690
691        if (totalRead == -1) {
692            if (numToRead > 0) {
693                throw new IOException("Truncated TAR archive");
694            }
695            setAtEOF(true);
696        } else {
697            count(totalRead);
698            entryOffset += totalRead;
699        }
700
701        return totalRead;
702    }
703
704    private void readGlobalPaxHeaders() throws IOException {
705        globalPaxHeaders = TarUtils.parsePaxHeaders(this, globalSparseHeaders, globalPaxHeaders, entrySize);
706        getNextEntry(); // Get the actual file entry
707
708        if (currEntry == null) {
709            throw new IOException("Error detected parsing the pax header");
710        }
711    }
712
713    /**
714     * Adds the sparse chunks from the current entry to the sparse chunks,
715     * including any additional sparse entries following the current entry.
716     *
717     * @throws IOException on error
718     */
719    private void readOldGNUSparse() throws IOException {
720        if (currEntry.isExtended()) {
721            TarArchiveSparseEntry entry;
722            do {
723                final byte[] headerBuf = getRecord();
724                if (headerBuf == null) {
725                    throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag.");
726                }
727                entry = new TarArchiveSparseEntry(headerBuf);
728                currEntry.getSparseHeaders().addAll(entry.getSparseHeaders());
729            } while (entry.isExtended());
730        }
731
732        // sparse headers are all done reading, we need to build
733        // sparse input streams using these sparse headers
734        buildSparseInputStreams();
735    }
736
737    /**
738     * Read a record from the input stream and return the data.
739     *
740     * @return The record data or null if EOF has been hit.
741     * @throws IOException on error
742     */
743    protected byte[] readRecord() throws IOException {
744        final int readNow = IOUtils.readFully(inputStream, recordBuffer);
745        count(readNow);
746        if (readNow != recordSize) {
747            return null;
748        }
749
750        return recordBuffer;
751    }
752
753    /**
754     * For sparse tar entries, there are many "holes"(consisting of all 0) in the file. Only the non-zero data is
755     * stored in tar files, and they are stored separately. The structure of non-zero data is introduced by the
756     * sparse headers using the offset, where a block of non-zero data starts, and numbytes, the length of the
757     * non-zero data block.
758     * When reading sparse entries, the actual data is read out with "holes" and non-zero data combined together
759     * according to the sparse headers.
760     *
761     * @param buf The buffer into which to place bytes read.
762     * @param offset The offset at which to place bytes read.
763     * @param numToRead The number of bytes to read.
764     * @return The number of bytes read, or -1 at EOF.
765     * @throws IOException on error
766     */
767    private int readSparse(final byte[] buf, final int offset, final int numToRead) throws IOException {
768        // if there are no actual input streams, just read from the original input stream
769        if (sparseInputStreams == null || sparseInputStreams.isEmpty()) {
770            return inputStream.read(buf, offset, numToRead);
771        }
772
773        if (currentSparseInputStreamIndex >= sparseInputStreams.size()) {
774            return -1;
775        }
776
777        final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex);
778        final int readLen = currentInputStream.read(buf, offset, numToRead);
779
780        // if the current input stream is the last input stream,
781        // just return the number of bytes read from current input stream
782        if (currentSparseInputStreamIndex == sparseInputStreams.size() - 1) {
783            return readLen;
784        }
785
786        // if EOF of current input stream is meet, open a new input stream and recursively call read
787        if (readLen == -1) {
788            currentSparseInputStreamIndex++;
789            return readSparse(buf, offset, numToRead);
790        }
791
792        // if the rest data of current input stream is not long enough, open a new input stream
793        // and recursively call read
794        if (readLen < numToRead) {
795            currentSparseInputStreamIndex++;
796            final int readLenOfNext = readSparse(buf, offset + readLen, numToRead - readLen);
797            if (readLenOfNext == -1) {
798                return readLen;
799            }
800
801            return readLen + readLenOfNext;
802        }
803
804        // if the rest data of current input stream is enough(which means readLen == len), just return readLen
805        return readLen;
806    }
807
808    /**
809     * Since we do not support marking just yet, we do nothing.
810     */
811    @Override
812    public synchronized void reset() {
813    }
814
815    protected final void setAtEOF(final boolean b) {
816        hasHitEOF = b;
817    }
818
819    protected final void setCurrentEntry(final TarArchiveEntry e) {
820        currEntry = e;
821    }
822
823    /**
824     * Skips over and discards {@code n} bytes of data from this input
825     * stream. The {@code skip} method may, for a variety of reasons, end
826     * up skipping over some smaller number of bytes, possibly {@code 0}.
827     * This may result from any of a number of conditions; reaching end of file
828     * or end of entry before {@code n} bytes have been skipped; are only
829     * two possibilities. The actual number of bytes skipped is returned. If
830     * {@code n} is negative, no bytes are skipped.
831     *
832     *
833     * @param n
834     *            the number of bytes to be skipped.
835     * @return the actual number of bytes skipped.
836     * @throws IOException if a truncated tar archive is detected
837     *                     or some other I/O error occurs
838     */
839    @Override
840    public long skip(final long n) throws IOException {
841        if (n <= 0 || isDirectory()) {
842            return 0;
843        }
844
845        final long availableOfInputStream = inputStream.available();
846        final long available = currEntry.getRealSize() - entryOffset;
847        final long numToSkip = Math.min(n, available);
848        long skipped;
849
850        if (!currEntry.isSparse()) {
851            skipped = IOUtils.skip(inputStream, numToSkip);
852            // for non-sparse entry, we should get the bytes actually skipped bytes along with
853            // inputStream.available() if inputStream is instance of FileInputStream
854            skipped = getActuallySkipped(availableOfInputStream, skipped, numToSkip);
855        } else {
856            skipped = skipSparse(numToSkip);
857        }
858
859
860        count(skipped);
861        entryOffset += skipped;
862        return skipped;
863    }
864
865    /**
866     * The last record block should be written at the full size, so skip any
867     * additional space used to fill a record after an entry.
868     *
869     * @throws IOException if a truncated tar archive is detected
870     */
871    private void skipRecordPadding() throws IOException {
872        if (!isDirectory() && this.entrySize > 0 && this.entrySize % this.recordSize != 0) {
873            final long available = inputStream.available();
874            final long numRecords = (this.entrySize / this.recordSize) + 1;
875            final long padding = (numRecords * this.recordSize) - this.entrySize;
876            long skipped = IOUtils.skip(inputStream, padding);
877
878            skipped = getActuallySkipped(available, skipped, padding);
879
880            count(skipped);
881        }
882    }
883
884    /**
885     * Skip n bytes from current input stream, if the current input stream doesn't have enough data to skip,
886     * jump to the next input stream and skip the rest bytes, keep doing this until total n bytes are skipped
887     * or the input streams are all skipped
888     *
889     * @param n bytes of data to skip
890     * @return actual bytes of data skipped
891     * @throws IOException
892     */
893    private long skipSparse(final long n) throws IOException {
894        if (sparseInputStreams == null || sparseInputStreams.isEmpty()) {
895            return inputStream.skip(n);
896        }
897
898        long bytesSkipped = 0;
899
900        while (bytesSkipped < n && currentSparseInputStreamIndex < sparseInputStreams.size()) {
901            final InputStream  currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex);
902            bytesSkipped += currentInputStream.skip(n - bytesSkipped);
903
904            if (bytesSkipped < n) {
905                currentSparseInputStreamIndex++;
906            }
907        }
908
909        return bytesSkipped;
910    }
911
912    /**
913     * Tries to read the next record rewinding the stream if it is not a EOF record.
914     *
915     * <p>This is meant to protect against cases where a tar
916     * implementation has written only one EOF record when two are
917     * expected.  Actually this won't help since a non-conforming
918     * implementation likely won't fill full blocks consisting of - by
919     * default - ten records either so we probably have already read
920     * beyond the archive anyway.</p>
921     */
922    private void tryToConsumeSecondEOFRecord() throws IOException {
923        boolean shouldReset = true;
924        final boolean marked = inputStream.markSupported();
925        if (marked) {
926            inputStream.mark(recordSize);
927        }
928        try {
929            shouldReset = !isEOFRecord(readRecord());
930        } finally {
931            if (shouldReset && marked) {
932                pushedBackBytes(recordSize);
933                inputStream.reset();
934            }
935        }
936    }
937}