001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018 019/* 020 * This package is based on the work done by Timothy Gerard Endres 021 * (time@ice.com) to whom the Ant project is very grateful for his great code. 022 */ 023 024package org.apache.commons.compress.archivers.tar; 025 026import java.io.ByteArrayOutputStream; 027import java.io.FileInputStream; 028import java.io.IOException; 029import java.io.InputStream; 030import java.util.ArrayList; 031import java.util.Arrays; 032import java.util.HashMap; 033import java.util.List; 034import java.util.Map; 035 036import org.apache.commons.compress.archivers.ArchiveEntry; 037import org.apache.commons.compress.archivers.ArchiveInputStream; 038import org.apache.commons.compress.archivers.zip.ZipEncoding; 039import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 040import org.apache.commons.compress.utils.ArchiveUtils; 041import org.apache.commons.compress.utils.BoundedInputStream; 042import org.apache.commons.compress.utils.IOUtils; 043 044/** 045 * The TarInputStream reads a UNIX tar archive as an InputStream. 046 * methods are provided to position at each successive entry in 047 * the archive, and the read each entry as a normal input stream 048 * using read(). 049 * @NotThreadSafe 050 */ 051public class TarArchiveInputStream extends ArchiveInputStream { 052 053 private static final int SMALL_BUFFER_SIZE = 256; 054 055 /** 056 * Checks if the signature matches what is expected for a tar file. 057 * 058 * @param signature 059 * the bytes to check 060 * @param length 061 * the number of bytes to check 062 * @return true, if this stream is a tar archive stream, false otherwise 063 */ 064 public static boolean matches(final byte[] signature, final int length) { 065 if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) { 066 return false; 067 } 068 069 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX, 070 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 071 && 072 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX, 073 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 074 ){ 075 return true; 076 } 077 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU, 078 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 079 && 080 ( 081 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE, 082 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 083 || 084 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO, 085 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 086 ) 087 ){ 088 return true; 089 } 090 // COMPRESS-107 - recognise Ant tar files 091 return ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT, 092 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 093 && 094 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT, 095 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN); 096 } 097 098 private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE]; 099 100 /** The size the TAR header */ 101 private final int recordSize; 102 103 /** The buffer to store the TAR header **/ 104 private final byte[] recordBuffer; 105 106 /** The size of a block */ 107 private final int blockSize; 108 109 /** True if file has hit EOF */ 110 private boolean hasHitEOF; 111 112 /** Size of the current entry */ 113 private long entrySize; 114 115 /** How far into the entry the stream is at */ 116 private long entryOffset; 117 118 /** An input stream to read from */ 119 private final InputStream inputStream; 120 121 /** Input streams for reading sparse entries **/ 122 private List<InputStream> sparseInputStreams; 123 124 /** the index of current input stream being read when reading sparse entries */ 125 private int currentSparseInputStreamIndex; 126 127 /** The meta-data about the current entry */ 128 private TarArchiveEntry currEntry; 129 130 /** The encoding of the file */ 131 private final ZipEncoding zipEncoding; 132 133 // the provided encoding (for unit tests) 134 final String encoding; 135 136 // the global PAX header 137 private Map<String, String> globalPaxHeaders = new HashMap<>(); 138 139 // the global sparse headers, this is only used in PAX Format 0.X 140 private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>(); 141 142 private final boolean lenient; 143 144 /** 145 * Constructor for TarInputStream. 146 * @param is the input stream to use 147 */ 148 public TarArchiveInputStream(final InputStream is) { 149 this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE); 150 } 151 152 /** 153 * Constructor for TarInputStream. 154 * @param is the input stream to use 155 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be 156 * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an 157 * exception instead. 158 * @since 1.19 159 */ 160 public TarArchiveInputStream(final InputStream is, final boolean lenient) { 161 this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient); 162 } 163 164 /** 165 * Constructor for TarInputStream. 166 * @param is the input stream to use 167 * @param blockSize the block size to use 168 */ 169 public TarArchiveInputStream(final InputStream is, final int blockSize) { 170 this(is, blockSize, TarConstants.DEFAULT_RCDSIZE); 171 } 172 173 /** 174 * Constructor for TarInputStream. 175 * @param is the input stream to use 176 * @param blockSize the block size to use 177 * @param recordSize the record size to use 178 */ 179 public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize) { 180 this(is, blockSize, recordSize, null); 181 } 182 183 /** 184 * Constructor for TarInputStream. 185 * @param is the input stream to use 186 * @param blockSize the block size to use 187 * @param recordSize the record size to use 188 * @param encoding name of the encoding to use for file names 189 * @since 1.4 190 */ 191 public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize, 192 final String encoding) { 193 this(is, blockSize, recordSize, encoding, false); 194 } 195 196 /** 197 * Constructor for TarInputStream. 198 * @param is the input stream to use 199 * @param blockSize the block size to use 200 * @param recordSize the record size to use 201 * @param encoding name of the encoding to use for file names 202 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be 203 * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an 204 * exception instead. 205 * @since 1.19 206 */ 207 public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize, 208 final String encoding, final boolean lenient) { 209 this.inputStream = is; 210 this.hasHitEOF = false; 211 this.encoding = encoding; 212 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 213 this.recordSize = recordSize; 214 this.recordBuffer = new byte[recordSize]; 215 this.blockSize = blockSize; 216 this.lenient = lenient; 217 } 218 219 /** 220 * Constructor for TarInputStream. 221 * @param is the input stream to use 222 * @param blockSize the block size to use 223 * @param encoding name of the encoding to use for file names 224 * @since 1.4 225 */ 226 public TarArchiveInputStream(final InputStream is, final int blockSize, 227 final String encoding) { 228 this(is, blockSize, TarConstants.DEFAULT_RCDSIZE, encoding); 229 } 230 231 /** 232 * Constructor for TarInputStream. 233 * @param is the input stream to use 234 * @param encoding name of the encoding to use for file names 235 * @since 1.4 236 */ 237 public TarArchiveInputStream(final InputStream is, final String encoding) { 238 this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, 239 encoding); 240 } 241 242 private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders) 243 throws IOException { 244 currEntry.updateEntryFromPaxHeaders(headers); 245 currEntry.setSparseHeaders(sparseHeaders); 246 } 247 248 /** 249 * Get the available data that can be read from the current 250 * entry in the archive. This does not indicate how much data 251 * is left in the entire archive, only in the current entry. 252 * This value is determined from the entry's size header field 253 * and the amount of data already read from the current entry. 254 * Integer.MAX_VALUE is returned in case more than Integer.MAX_VALUE 255 * bytes are left in the current entry in the archive. 256 * 257 * @return The number of available bytes for the current entry. 258 * @throws IOException for signature 259 */ 260 @Override 261 public int available() throws IOException { 262 if (isDirectory()) { 263 return 0; 264 } 265 266 if (currEntry.getRealSize() - entryOffset > Integer.MAX_VALUE) { 267 return Integer.MAX_VALUE; 268 } 269 return (int) (currEntry.getRealSize() - entryOffset); 270 } 271 272 273 /** 274 * Build the input streams consisting of all-zero input streams and non-zero input streams. 275 * When reading from the non-zero input streams, the data is actually read from the original input stream. 276 * The size of each input stream is introduced by the sparse headers. 277 * 278 * NOTE : Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the 279 * 0 size input streams because they are meaningless. 280 */ 281 private void buildSparseInputStreams() throws IOException { 282 currentSparseInputStreamIndex = -1; 283 sparseInputStreams = new ArrayList<>(); 284 285 final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders(); 286 287 // Stream doesn't need to be closed at all as it doesn't use any resources 288 final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); //NOSONAR 289 // logical offset into the extracted entry 290 long offset = 0; 291 for (final TarArchiveStructSparse sparseHeader : sparseHeaders) { 292 final long zeroBlockSize = sparseHeader.getOffset() - offset; 293 if (zeroBlockSize < 0) { 294 // sparse header says to move backwards inside of the extracted entry 295 throw new IOException("Corrupted struct sparse detected"); 296 } 297 298 // only store the zero block if it is not empty 299 if (zeroBlockSize > 0) { 300 sparseInputStreams.add(new BoundedInputStream(zeroInputStream, sparseHeader.getOffset() - offset)); 301 } 302 303 // only store the input streams with non-zero size 304 if (sparseHeader.getNumbytes() > 0) { 305 sparseInputStreams.add(new BoundedInputStream(inputStream, sparseHeader.getNumbytes())); 306 } 307 308 offset = sparseHeader.getOffset() + sparseHeader.getNumbytes(); 309 } 310 311 if (!sparseInputStreams.isEmpty()) { 312 currentSparseInputStreamIndex = 0; 313 } 314 } 315 316 /** 317 * Whether this class is able to read the given entry. 318 * 319 * @return The implementation will return true if the {@link ArchiveEntry} is an instance of {@link TarArchiveEntry} 320 */ 321 @Override 322 public boolean canReadEntryData(final ArchiveEntry ae) { 323 return ae instanceof TarArchiveEntry; 324 } 325 326 /** 327 * Closes this stream. Calls the TarBuffer's close() method. 328 * @throws IOException on error 329 */ 330 @Override 331 public void close() throws IOException { 332 // Close all the input streams in sparseInputStreams 333 if (sparseInputStreams != null) { 334 for (final InputStream inputStream : sparseInputStreams) { 335 inputStream.close(); 336 } 337 } 338 339 inputStream.close(); 340 } 341 342 /** 343 * This method is invoked once the end of the archive is hit, it 344 * tries to consume the remaining bytes under the assumption that 345 * the tool creating this archive has padded the last block. 346 */ 347 private void consumeRemainderOfLastBlock() throws IOException { 348 final long bytesReadOfLastBlock = getBytesRead() % blockSize; 349 if (bytesReadOfLastBlock > 0) { 350 final long skipped = IOUtils.skip(inputStream, blockSize - bytesReadOfLastBlock); 351 count(skipped); 352 } 353 } 354 355 /** 356 * For FileInputStream, the skip always return the number you input, so we 357 * need the available bytes to determine how many bytes are actually skipped 358 * 359 * @param available available bytes returned by inputStream.available() 360 * @param skipped skipped bytes returned by inputStream.skip() 361 * @param expected bytes expected to skip 362 * @return number of bytes actually skipped 363 * @throws IOException if a truncated tar archive is detected 364 */ 365 private long getActuallySkipped(final long available, final long skipped, final long expected) throws IOException { 366 long actuallySkipped = skipped; 367 if (inputStream instanceof FileInputStream) { 368 actuallySkipped = Math.min(skipped, available); 369 } 370 371 if (actuallySkipped != expected) { 372 throw new IOException("Truncated TAR archive"); 373 } 374 375 return actuallySkipped; 376 } 377 378 /** 379 * Get the current TAR Archive Entry that this input stream is processing 380 * 381 * @return The current Archive Entry 382 */ 383 public TarArchiveEntry getCurrentEntry() { 384 return currEntry; 385 } 386 387 /** 388 * Get the next entry in this tar archive as longname data. 389 * 390 * @return The next entry in the archive as longname data, or null. 391 * @throws IOException on error 392 */ 393 protected byte[] getLongNameData() throws IOException { 394 // read in the name 395 final ByteArrayOutputStream longName = new ByteArrayOutputStream(); 396 int length = 0; 397 while ((length = read(smallBuf)) >= 0) { 398 longName.write(smallBuf, 0, length); 399 } 400 getNextEntry(); 401 if (currEntry == null) { 402 // Bugzilla: 40334 403 // Malformed tar file - long entry name not followed by entry 404 return null; 405 } 406 byte[] longNameData = longName.toByteArray(); 407 // remove trailing null terminator(s) 408 length = longNameData.length; 409 while (length > 0 && longNameData[length - 1] == 0) { 410 --length; 411 } 412 if (length != longNameData.length) { 413 longNameData = Arrays.copyOf(longNameData, length); 414 } 415 return longNameData; 416 } 417 418 /** 419 * Returns the next Archive Entry in this Stream. 420 * 421 * @return the next entry, 422 * or {@code null} if there are no more entries 423 * @throws IOException if the next entry could not be read 424 */ 425 @Override 426 public ArchiveEntry getNextEntry() throws IOException { 427 return getNextTarEntry(); 428 } 429 430 /** 431 * Get the next entry in this tar archive. This will skip 432 * over any remaining data in the current entry, if there 433 * is one, and place the input stream at the header of the 434 * next entry, and read the header and instantiate a new 435 * TarEntry from the header bytes and return that entry. 436 * If there are no more entries in the archive, null will 437 * be returned to indicate that the end of the archive has 438 * been reached. 439 * 440 * @return The next TarEntry in the archive, or null. 441 * @throws IOException on error 442 */ 443 public TarArchiveEntry getNextTarEntry() throws IOException { 444 if (isAtEOF()) { 445 return null; 446 } 447 448 if (currEntry != null) { 449 /* Skip will only go to the end of the current entry */ 450 IOUtils.skip(this, Long.MAX_VALUE); 451 452 /* skip to the end of the last record */ 453 skipRecordPadding(); 454 } 455 456 final byte[] headerBuf = getRecord(); 457 458 if (headerBuf == null) { 459 /* hit EOF */ 460 currEntry = null; 461 return null; 462 } 463 464 try { 465 currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf, zipEncoding, lenient); 466 } catch (final IllegalArgumentException e) { 467 throw new IOException("Error detected parsing the header", e); 468 } 469 470 entryOffset = 0; 471 entrySize = currEntry.getSize(); 472 473 if (currEntry.isGNULongLinkEntry()) { 474 final byte[] longLinkData = getLongNameData(); 475 if (longLinkData == null) { 476 // Bugzilla: 40334 477 // Malformed tar file - long link entry name not followed by 478 // entry 479 return null; 480 } 481 currEntry.setLinkName(zipEncoding.decode(longLinkData)); 482 } 483 484 if (currEntry.isGNULongNameEntry()) { 485 final byte[] longNameData = getLongNameData(); 486 if (longNameData == null) { 487 // Bugzilla: 40334 488 // Malformed tar file - long entry name not followed by 489 // entry 490 return null; 491 } 492 493 // COMPRESS-509 : the name of directories should end with '/' 494 final String name = zipEncoding.decode(longNameData); 495 currEntry.setName(name); 496 if (currEntry.isDirectory() && !name.endsWith("/")) { 497 currEntry.setName(name + "/"); 498 } 499 } 500 501 if (currEntry.isGlobalPaxHeader()){ // Process Global Pax headers 502 readGlobalPaxHeaders(); 503 } 504 505 try { 506 if (currEntry.isPaxHeader()){ // Process Pax headers 507 paxHeaders(); 508 } else if (!globalPaxHeaders.isEmpty()) { 509 applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders); 510 } 511 } catch (final NumberFormatException e) { 512 throw new IOException("Error detected parsing the pax header", e); 513 } 514 515 if (currEntry.isOldGNUSparse()){ // Process sparse files 516 readOldGNUSparse(); 517 } 518 519 // If the size of the next element in the archive has changed 520 // due to a new size being reported in the posix header 521 // information, we update entrySize here so that it contains 522 // the correct value. 523 entrySize = currEntry.getSize(); 524 525 return currEntry; 526 } 527 528 /** 529 * Get the next record in this tar archive. This will skip 530 * over any remaining data in the current entry, if there 531 * is one, and place the input stream at the header of the 532 * next entry. 533 * 534 * <p>If there are no more entries in the archive, null will be 535 * returned to indicate that the end of the archive has been 536 * reached. At the same time the {@code hasHitEOF} marker will be 537 * set to true.</p> 538 * 539 * @return The next header in the archive, or null. 540 * @throws IOException on error 541 */ 542 private byte[] getRecord() throws IOException { 543 byte[] headerBuf = readRecord(); 544 setAtEOF(isEOFRecord(headerBuf)); 545 if (isAtEOF() && headerBuf != null) { 546 tryToConsumeSecondEOFRecord(); 547 consumeRemainderOfLastBlock(); 548 headerBuf = null; 549 } 550 return headerBuf; 551 } 552 553 /** 554 * Get the record size being used by this stream's buffer. 555 * 556 * @return The TarBuffer record size. 557 */ 558 public int getRecordSize() { 559 return recordSize; 560 } 561 562 protected final boolean isAtEOF() { 563 return hasHitEOF; 564 } 565 566 private boolean isDirectory() { 567 return currEntry != null && currEntry.isDirectory(); 568 } 569 570 /** 571 * Determine if an archive record indicate End of Archive. End of 572 * archive is indicated by a record that consists entirely of null bytes. 573 * 574 * @param record The record data to check. 575 * @return true if the record data is an End of Archive 576 */ 577 protected boolean isEOFRecord(final byte[] record) { 578 return record == null || ArchiveUtils.isArrayZero(record, recordSize); 579 } 580 581 /** 582 * Since we do not support marking just yet, we do nothing. 583 * 584 * @param markLimit The limit to mark. 585 */ 586 @Override 587 public synchronized void mark(final int markLimit) { 588 } 589 590 /** 591 * Since we do not support marking just yet, we return false. 592 * 593 * @return False. 594 */ 595 @Override 596 public boolean markSupported() { 597 return false; 598 } 599 600 /** 601 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) 602 * may appear multi times, and they look like: 603 * 604 * GNU.sparse.size=size 605 * GNU.sparse.numblocks=numblocks 606 * repeat numblocks times 607 * GNU.sparse.offset=offset 608 * GNU.sparse.numbytes=numbytes 609 * end repeat 610 * 611 * 612 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 613 * 614 * GNU.sparse.map 615 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 616 * 617 * 618 * For PAX Format 1.X: 619 * The sparse map itself is stored in the file data block, preceding the actual file data. 620 * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary. 621 * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers 622 * giving the offset and size of the data block it describes. 623 * @throws IOException 624 */ 625 private void paxHeaders() throws IOException { 626 List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 627 final Map<String, String> headers = TarUtils.parsePaxHeaders(this, sparseHeaders, globalPaxHeaders, entrySize); 628 629 // for 0.1 PAX Headers 630 if (headers.containsKey(TarGnuSparseKeys.MAP)) { 631 sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get(TarGnuSparseKeys.MAP))); 632 } 633 getNextEntry(); // Get the actual file entry 634 if (currEntry == null) { 635 throw new IOException("premature end of tar archive. Didn't find any entry after PAX header."); 636 } 637 applyPaxHeadersToCurrentEntry(headers, sparseHeaders); 638 639 // for 1.0 PAX Format, the sparse map is stored in the file data block 640 if (currEntry.isPaxGNU1XSparse()) { 641 sparseHeaders = TarUtils.parsePAX1XSparseHeaders(inputStream, recordSize); 642 currEntry.setSparseHeaders(sparseHeaders); 643 } 644 645 // sparse headers are all done reading, we need to build 646 // sparse input streams using these sparse headers 647 buildSparseInputStreams(); 648 } 649 650 /** 651 * Reads bytes from the current tar archive entry. 652 * 653 * This method is aware of the boundaries of the current 654 * entry in the archive and will deal with them as if they 655 * were this stream's start and EOF. 656 * 657 * @param buf The buffer into which to place bytes read. 658 * @param offset The offset at which to place bytes read. 659 * @param numToRead The number of bytes to read. 660 * @return The number of bytes read, or -1 at EOF. 661 * @throws IOException on error 662 */ 663 @Override 664 public int read(final byte[] buf, final int offset, int numToRead) throws IOException { 665 if (numToRead == 0) { 666 return 0; 667 } 668 int totalRead = 0; 669 670 if (isAtEOF() || isDirectory()) { 671 return -1; 672 } 673 674 if (currEntry == null) { 675 throw new IllegalStateException("No current tar entry"); 676 } 677 678 if (entryOffset >= currEntry.getRealSize()) { 679 return -1; 680 } 681 682 numToRead = Math.min(numToRead, available()); 683 684 if (currEntry.isSparse()) { 685 // for sparse entries, we need to read them in another way 686 totalRead = readSparse(buf, offset, numToRead); 687 } else { 688 totalRead = inputStream.read(buf, offset, numToRead); 689 } 690 691 if (totalRead == -1) { 692 if (numToRead > 0) { 693 throw new IOException("Truncated TAR archive"); 694 } 695 setAtEOF(true); 696 } else { 697 count(totalRead); 698 entryOffset += totalRead; 699 } 700 701 return totalRead; 702 } 703 704 private void readGlobalPaxHeaders() throws IOException { 705 globalPaxHeaders = TarUtils.parsePaxHeaders(this, globalSparseHeaders, globalPaxHeaders, entrySize); 706 getNextEntry(); // Get the actual file entry 707 708 if (currEntry == null) { 709 throw new IOException("Error detected parsing the pax header"); 710 } 711 } 712 713 /** 714 * Adds the sparse chunks from the current entry to the sparse chunks, 715 * including any additional sparse entries following the current entry. 716 * 717 * @throws IOException on error 718 */ 719 private void readOldGNUSparse() throws IOException { 720 if (currEntry.isExtended()) { 721 TarArchiveSparseEntry entry; 722 do { 723 final byte[] headerBuf = getRecord(); 724 if (headerBuf == null) { 725 throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag."); 726 } 727 entry = new TarArchiveSparseEntry(headerBuf); 728 currEntry.getSparseHeaders().addAll(entry.getSparseHeaders()); 729 } while (entry.isExtended()); 730 } 731 732 // sparse headers are all done reading, we need to build 733 // sparse input streams using these sparse headers 734 buildSparseInputStreams(); 735 } 736 737 /** 738 * Read a record from the input stream and return the data. 739 * 740 * @return The record data or null if EOF has been hit. 741 * @throws IOException on error 742 */ 743 protected byte[] readRecord() throws IOException { 744 final int readNow = IOUtils.readFully(inputStream, recordBuffer); 745 count(readNow); 746 if (readNow != recordSize) { 747 return null; 748 } 749 750 return recordBuffer; 751 } 752 753 /** 754 * For sparse tar entries, there are many "holes"(consisting of all 0) in the file. Only the non-zero data is 755 * stored in tar files, and they are stored separately. The structure of non-zero data is introduced by the 756 * sparse headers using the offset, where a block of non-zero data starts, and numbytes, the length of the 757 * non-zero data block. 758 * When reading sparse entries, the actual data is read out with "holes" and non-zero data combined together 759 * according to the sparse headers. 760 * 761 * @param buf The buffer into which to place bytes read. 762 * @param offset The offset at which to place bytes read. 763 * @param numToRead The number of bytes to read. 764 * @return The number of bytes read, or -1 at EOF. 765 * @throws IOException on error 766 */ 767 private int readSparse(final byte[] buf, final int offset, final int numToRead) throws IOException { 768 // if there are no actual input streams, just read from the original input stream 769 if (sparseInputStreams == null || sparseInputStreams.isEmpty()) { 770 return inputStream.read(buf, offset, numToRead); 771 } 772 773 if (currentSparseInputStreamIndex >= sparseInputStreams.size()) { 774 return -1; 775 } 776 777 final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex); 778 final int readLen = currentInputStream.read(buf, offset, numToRead); 779 780 // if the current input stream is the last input stream, 781 // just return the number of bytes read from current input stream 782 if (currentSparseInputStreamIndex == sparseInputStreams.size() - 1) { 783 return readLen; 784 } 785 786 // if EOF of current input stream is meet, open a new input stream and recursively call read 787 if (readLen == -1) { 788 currentSparseInputStreamIndex++; 789 return readSparse(buf, offset, numToRead); 790 } 791 792 // if the rest data of current input stream is not long enough, open a new input stream 793 // and recursively call read 794 if (readLen < numToRead) { 795 currentSparseInputStreamIndex++; 796 final int readLenOfNext = readSparse(buf, offset + readLen, numToRead - readLen); 797 if (readLenOfNext == -1) { 798 return readLen; 799 } 800 801 return readLen + readLenOfNext; 802 } 803 804 // if the rest data of current input stream is enough(which means readLen == len), just return readLen 805 return readLen; 806 } 807 808 /** 809 * Since we do not support marking just yet, we do nothing. 810 */ 811 @Override 812 public synchronized void reset() { 813 } 814 815 protected final void setAtEOF(final boolean b) { 816 hasHitEOF = b; 817 } 818 819 protected final void setCurrentEntry(final TarArchiveEntry e) { 820 currEntry = e; 821 } 822 823 /** 824 * Skips over and discards {@code n} bytes of data from this input 825 * stream. The {@code skip} method may, for a variety of reasons, end 826 * up skipping over some smaller number of bytes, possibly {@code 0}. 827 * This may result from any of a number of conditions; reaching end of file 828 * or end of entry before {@code n} bytes have been skipped; are only 829 * two possibilities. The actual number of bytes skipped is returned. If 830 * {@code n} is negative, no bytes are skipped. 831 * 832 * 833 * @param n 834 * the number of bytes to be skipped. 835 * @return the actual number of bytes skipped. 836 * @throws IOException if a truncated tar archive is detected 837 * or some other I/O error occurs 838 */ 839 @Override 840 public long skip(final long n) throws IOException { 841 if (n <= 0 || isDirectory()) { 842 return 0; 843 } 844 845 final long availableOfInputStream = inputStream.available(); 846 final long available = currEntry.getRealSize() - entryOffset; 847 final long numToSkip = Math.min(n, available); 848 long skipped; 849 850 if (!currEntry.isSparse()) { 851 skipped = IOUtils.skip(inputStream, numToSkip); 852 // for non-sparse entry, we should get the bytes actually skipped bytes along with 853 // inputStream.available() if inputStream is instance of FileInputStream 854 skipped = getActuallySkipped(availableOfInputStream, skipped, numToSkip); 855 } else { 856 skipped = skipSparse(numToSkip); 857 } 858 859 860 count(skipped); 861 entryOffset += skipped; 862 return skipped; 863 } 864 865 /** 866 * The last record block should be written at the full size, so skip any 867 * additional space used to fill a record after an entry. 868 * 869 * @throws IOException if a truncated tar archive is detected 870 */ 871 private void skipRecordPadding() throws IOException { 872 if (!isDirectory() && this.entrySize > 0 && this.entrySize % this.recordSize != 0) { 873 final long available = inputStream.available(); 874 final long numRecords = (this.entrySize / this.recordSize) + 1; 875 final long padding = (numRecords * this.recordSize) - this.entrySize; 876 long skipped = IOUtils.skip(inputStream, padding); 877 878 skipped = getActuallySkipped(available, skipped, padding); 879 880 count(skipped); 881 } 882 } 883 884 /** 885 * Skip n bytes from current input stream, if the current input stream doesn't have enough data to skip, 886 * jump to the next input stream and skip the rest bytes, keep doing this until total n bytes are skipped 887 * or the input streams are all skipped 888 * 889 * @param n bytes of data to skip 890 * @return actual bytes of data skipped 891 * @throws IOException 892 */ 893 private long skipSparse(final long n) throws IOException { 894 if (sparseInputStreams == null || sparseInputStreams.isEmpty()) { 895 return inputStream.skip(n); 896 } 897 898 long bytesSkipped = 0; 899 900 while (bytesSkipped < n && currentSparseInputStreamIndex < sparseInputStreams.size()) { 901 final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex); 902 bytesSkipped += currentInputStream.skip(n - bytesSkipped); 903 904 if (bytesSkipped < n) { 905 currentSparseInputStreamIndex++; 906 } 907 } 908 909 return bytesSkipped; 910 } 911 912 /** 913 * Tries to read the next record rewinding the stream if it is not a EOF record. 914 * 915 * <p>This is meant to protect against cases where a tar 916 * implementation has written only one EOF record when two are 917 * expected. Actually this won't help since a non-conforming 918 * implementation likely won't fill full blocks consisting of - by 919 * default - ten records either so we probably have already read 920 * beyond the archive anyway.</p> 921 */ 922 private void tryToConsumeSecondEOFRecord() throws IOException { 923 boolean shouldReset = true; 924 final boolean marked = inputStream.markSupported(); 925 if (marked) { 926 inputStream.mark(recordSize); 927 } 928 try { 929 shouldReset = !isEOFRecord(readRecord()); 930 } finally { 931 if (shouldReset && marked) { 932 pushedBackBytes(recordSize); 933 inputStream.reset(); 934 } 935 } 936 } 937}