001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.sevenz; 019 020import static java.nio.charset.StandardCharsets.UTF_16LE; 021 022import java.io.BufferedInputStream; 023import java.io.ByteArrayInputStream; 024import java.io.Closeable; 025import java.io.DataInputStream; 026import java.io.EOFException; 027import java.io.File; 028import java.io.FilterInputStream; 029import java.io.IOException; 030import java.io.InputStream; 031import java.nio.ByteBuffer; 032import java.nio.ByteOrder; 033import java.nio.channels.Channels; 034import java.nio.channels.SeekableByteChannel; 035import java.nio.file.Files; 036import java.nio.file.StandardOpenOption; 037import java.util.ArrayList; 038import java.util.Arrays; 039import java.util.BitSet; 040import java.util.EnumSet; 041import java.util.LinkedHashMap; 042import java.util.LinkedList; 043import java.util.List; 044import java.util.Map; 045import java.util.Objects; 046import java.util.zip.CRC32; 047import java.util.zip.CheckedInputStream; 048 049import org.apache.commons.compress.MemoryLimitException; 050import org.apache.commons.compress.utils.BoundedInputStream; 051import org.apache.commons.compress.utils.ByteUtils; 052import org.apache.commons.compress.utils.CRC32VerifyingInputStream; 053import org.apache.commons.compress.utils.IOUtils; 054import org.apache.commons.compress.utils.InputStreamStatistics; 055 056/** 057 * Reads a 7z file, using SeekableByteChannel under 058 * the covers. 059 * <p> 060 * The 7z file format is a flexible container 061 * that can contain many compression and 062 * encryption types, but at the moment only 063 * only Copy, LZMA, LZMA2, BZIP2, Deflate and AES-256 + SHA-256 064 * are supported. 065 * </p> 066 * <p> 067 * The format is very Windows/Intel specific, 068 * so it uses little-endian byte order, 069 * doesn't store user/group or permission bits, 070 * and represents times using NTFS timestamps 071 * (100 nanosecond units since 1 January 1601). 072 * Hence the official tools recommend against 073 * using it for backup purposes on *nix, and 074 * recommend .tar.7z or .tar.lzma or .tar.xz 075 * instead. 076 * </p> 077 * <p> 078 * Both the header and file contents may be 079 * compressed and/or encrypted. With both 080 * encrypted, neither file names nor file 081 * contents can be read, but the use of 082 * encryption isn't plausibly deniable. 083 * </p> 084 * <p>Multi volume archives can be read by concatenating the parts in 085 * correct order - either manually or by using {link 086 * org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel} 087 * for example.</p> 088 * 089 * @NotThreadSafe 090 * @since 1.6 091 */ 092public class SevenZFile implements Closeable { 093 private static class ArchiveStatistics { 094 private int numberOfPackedStreams; 095 private long numberOfCoders; 096 private long numberOfOutStreams; 097 private long numberOfInStreams; 098 private long numberOfUnpackSubStreams; 099 private int numberOfFolders; 100 private BitSet folderHasCrc; 101 private int numberOfEntries; 102 private int numberOfEntriesWithStream; 103 104 void assertValidity(final int maxMemoryLimitInKb) throws IOException { 105 if (numberOfEntriesWithStream > 0 && numberOfFolders == 0) { 106 throw new IOException("archive with entries but no folders"); 107 } 108 if (numberOfEntriesWithStream > numberOfUnpackSubStreams) { 109 throw new IOException("archive doesn't contain enough substreams for entries"); 110 } 111 112 final long memoryNeededInKb = estimateSize() / 1024; 113 if (maxMemoryLimitInKb < memoryNeededInKb) { 114 throw new MemoryLimitException(memoryNeededInKb, maxMemoryLimitInKb); 115 } 116 } 117 118 private long bindPairSize() { 119 return 16; 120 } 121 122 private long coderSize() { 123 return 2 /* methodId is between 1 and four bytes currently, COPY and LZMA2 are the most common with 1 */ 124 + 16 125 + 4 /* properties, guess */ 126 ; 127 } 128 129 private long entrySize() { 130 return 100; /* real size depends on name length, everything without name is about 70 bytes */ 131 } 132 133 long estimateSize() { 134 final long lowerBound = 16L * numberOfPackedStreams /* packSizes, packCrcs in Archive */ 135 + numberOfPackedStreams / 8 /* packCrcsDefined in Archive */ 136 + numberOfFolders * folderSize() /* folders in Archive */ 137 + numberOfCoders * coderSize() /* coders in Folder */ 138 + (numberOfOutStreams - numberOfFolders) * bindPairSize() /* bindPairs in Folder */ 139 + 8L * (numberOfInStreams - numberOfOutStreams + numberOfFolders) /* packedStreams in Folder */ 140 + 8L * numberOfOutStreams /* unpackSizes in Folder */ 141 + numberOfEntries * entrySize() /* files in Archive */ 142 + streamMapSize() 143 ; 144 return 2 * lowerBound /* conservative guess */; 145 } 146 147 private long folderSize() { 148 return 30; /* nested arrays are accounted for separately */ 149 } 150 151 private long streamMapSize() { 152 return 8 * numberOfFolders /* folderFirstPackStreamIndex, folderFirstFileIndex */ 153 + 8 * numberOfPackedStreams /* packStreamOffsets */ 154 + 4 * numberOfEntries /* fileFolderIndex */ 155 ; 156 } 157 158 @Override 159 public String toString() { 160 return "Archive with " + numberOfEntries + " entries in " + numberOfFolders 161 + " folders. Estimated size " + estimateSize()/ 1024L + " kB."; 162 } 163 } 164 165 static final int SIGNATURE_HEADER_SIZE = 32; 166 167 private static final String DEFAULT_FILE_NAME = "unknown archive"; 168 169 /** Shared with SevenZOutputFile and tests, neither mutates it. */ 170 static final byte[] sevenZSignature = { //NOSONAR 171 (byte) '7', (byte) 'z', (byte) 0xBC, (byte) 0xAF, (byte) 0x27, (byte) 0x1C 172 }; 173 174 private static int assertFitsIntoNonNegativeInt(final String what, final long value) throws IOException { 175 if (value > Integer.MAX_VALUE || value < 0) { 176 throw new IOException(String.format("Cannot handle % %,d", what, value)); 177 } 178 return (int) value; 179 } 180 181 private static ByteBuffer checkEndOfFile(final ByteBuffer buf, final int expectRemaining) throws EOFException { 182 final int remaining = buf.remaining(); 183 if (remaining < expectRemaining) { 184 throw new EOFException(String.format("remaining %,d < expectRemaining %,d", remaining, expectRemaining)); 185 } 186 return buf; 187 } 188 189 private static void get(final ByteBuffer buf, final byte[] to) throws EOFException { 190 checkEndOfFile(buf, to.length).get(to); 191 } 192 193 private static char getChar(final ByteBuffer buf) throws EOFException { 194 return checkEndOfFile(buf, Character.BYTES).getChar(); 195 } 196 197 private static int getInt(final ByteBuffer buf) throws EOFException { 198 return checkEndOfFile(buf, Integer.BYTES).getInt(); 199 } 200 201 private static long getLong(final ByteBuffer buf) throws EOFException { 202 return checkEndOfFile(buf, Long.BYTES).getLong(); 203 } 204 205 private static int getUnsignedByte(final ByteBuffer buf) throws EOFException { 206 if (!buf.hasRemaining()) { 207 throw new EOFException(); 208 } 209 return buf.get() & 0xff; 210 } 211 212 /** 213 * Checks if the signature matches what is expected for a 7z file. 214 * 215 * @param signature 216 * the bytes to check 217 * @param length 218 * the number of bytes to check 219 * @return true, if this is the signature of a 7z archive. 220 * @since 1.8 221 */ 222 public static boolean matches(final byte[] signature, final int length) { 223 if (length < sevenZSignature.length) { 224 return false; 225 } 226 227 for (int i = 0; i < sevenZSignature.length; i++) { 228 if (signature[i] != sevenZSignature[i]) { 229 return false; 230 } 231 } 232 return true; 233 } 234 private static long readUint64(final ByteBuffer in) throws IOException { 235 // long rather than int as it might get shifted beyond the range of an int 236 final long firstByte = getUnsignedByte(in); 237 int mask = 0x80; 238 long value = 0; 239 for (int i = 0; i < 8; i++) { 240 if ((firstByte & mask) == 0) { 241 return value | (firstByte & mask - 1) << 8 * i; 242 } 243 final long nextByte = getUnsignedByte(in); 244 value |= nextByte << 8 * i; 245 mask >>>= 1; 246 } 247 return value; 248 } 249 250 private static long skipBytesFully(final ByteBuffer input, long bytesToSkip) { 251 if (bytesToSkip < 1) { 252 return 0; 253 } 254 final int current = input.position(); 255 final int maxSkip = input.remaining(); 256 if (maxSkip < bytesToSkip) { 257 bytesToSkip = maxSkip; 258 } 259 input.position(current + (int) bytesToSkip); 260 return bytesToSkip; 261 } 262 263 private final String fileName; 264 265 private SeekableByteChannel channel; 266 267 private final Archive archive; 268 269 private int currentEntryIndex = -1; 270 271 private int currentFolderIndex = -1; 272 273 private InputStream currentFolderInputStream; 274 275 private byte[] password; 276 277 private final SevenZFileOptions options; 278 279 private long compressedBytesReadFromCurrentEntry; 280 281 private long uncompressedBytesReadFromCurrentEntry; 282 283 private final ArrayList<InputStream> deferredBlockStreams = new ArrayList<>(); 284 285 /** 286 * Reads a file as unencrypted 7z archive 287 * 288 * @param fileName the file to read 289 * @throws IOException if reading the archive fails 290 */ 291 public SevenZFile(final File fileName) throws IOException { 292 this(fileName, SevenZFileOptions.DEFAULT); 293 } 294 295 /** 296 * Reads a file as 7z archive 297 * 298 * @param fileName the file to read 299 * @param password optional password if the archive is encrypted - 300 * the byte array is supposed to be the UTF16-LE encoded 301 * representation of the password. 302 * @throws IOException if reading the archive fails 303 * @deprecated use the char[]-arg version for the password instead 304 */ 305 @Deprecated 306 public SevenZFile(final File fileName, final byte[] password) throws IOException { 307 this(Files.newByteChannel(fileName.toPath(), EnumSet.of(StandardOpenOption.READ)), 308 fileName.getAbsolutePath(), password, true, SevenZFileOptions.DEFAULT); 309 } 310 311 /** 312 * Reads a file as 7z archive 313 * 314 * @param fileName the file to read 315 * @param password optional password if the archive is encrypted 316 * @throws IOException if reading the archive fails 317 * @since 1.17 318 */ 319 public SevenZFile(final File fileName, final char[] password) throws IOException { 320 this(fileName, password, SevenZFileOptions.DEFAULT); 321 } 322 323 /** 324 * Reads a file as 7z archive with additional options. 325 * 326 * @param fileName the file to read 327 * @param password optional password if the archive is encrypted 328 * @param options the options to apply 329 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 330 * @since 1.19 331 */ 332 public SevenZFile(final File fileName, final char[] password, final SevenZFileOptions options) throws IOException { 333 this(Files.newByteChannel(fileName.toPath(), EnumSet.of(StandardOpenOption.READ)), // NOSONAR 334 fileName.getAbsolutePath(), AES256SHA256Decoder.utf16Decode(password), true, options); 335 } 336 337 /** 338 * Reads a file as unencrypted 7z archive 339 * 340 * @param fileName the file to read 341 * @param options the options to apply 342 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 343 * @since 1.19 344 */ 345 public SevenZFile(final File fileName, final SevenZFileOptions options) throws IOException { 346 this(fileName, null, options); 347 } 348 349 /** 350 * Reads a SeekableByteChannel as 7z archive 351 * 352 * <p>{@link 353 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 354 * allows you to read from an in-memory archive.</p> 355 * 356 * @param channel the channel to read 357 * @throws IOException if reading the archive fails 358 * @since 1.13 359 */ 360 public SevenZFile(final SeekableByteChannel channel) throws IOException { 361 this(channel, SevenZFileOptions.DEFAULT); 362 } 363 364 /** 365 * Reads a SeekableByteChannel as 7z archive 366 * 367 * <p>{@link 368 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 369 * allows you to read from an in-memory archive.</p> 370 * 371 * @param channel the channel to read 372 * @param password optional password if the archive is encrypted - 373 * the byte array is supposed to be the UTF16-LE encoded 374 * representation of the password. 375 * @throws IOException if reading the archive fails 376 * @since 1.13 377 * @deprecated use the char[]-arg version for the password instead 378 */ 379 @Deprecated 380 public SevenZFile(final SeekableByteChannel channel, 381 final byte[] password) throws IOException { 382 this(channel, DEFAULT_FILE_NAME, password); 383 } 384 385 /** 386 * Reads a SeekableByteChannel as 7z archive 387 * 388 * <p>{@link 389 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 390 * allows you to read from an in-memory archive.</p> 391 * 392 * @param channel the channel to read 393 * @param password optional password if the archive is encrypted 394 * @throws IOException if reading the archive fails 395 * @since 1.17 396 */ 397 public SevenZFile(final SeekableByteChannel channel, 398 final char[] password) throws IOException { 399 this(channel, password, SevenZFileOptions.DEFAULT); 400 } 401 402 /** 403 * Reads a SeekableByteChannel as 7z archive with additional options. 404 * 405 * <p>{@link 406 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 407 * allows you to read from an in-memory archive.</p> 408 * 409 * @param channel the channel to read 410 * @param password optional password if the archive is encrypted 411 * @param options the options to apply 412 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 413 * @since 1.19 414 */ 415 public SevenZFile(final SeekableByteChannel channel, final char[] password, final SevenZFileOptions options) 416 throws IOException { 417 this(channel, DEFAULT_FILE_NAME, password, options); 418 } 419 420 /** 421 * Reads a SeekableByteChannel as 7z archive with addtional options. 422 * 423 * <p>{@link 424 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 425 * allows you to read from an in-memory archive.</p> 426 * 427 * @param channel the channel to read 428 * @param options the options to apply 429 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 430 * @since 1.19 431 */ 432 public SevenZFile(final SeekableByteChannel channel, final SevenZFileOptions options) throws IOException { 433 this(channel, DEFAULT_FILE_NAME, null, options); 434 } 435 436 /** 437 * Reads a SeekableByteChannel as 7z archive 438 * 439 * <p>{@link 440 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 441 * allows you to read from an in-memory archive.</p> 442 * 443 * @param channel the channel to read 444 * @param fileName name of the archive - only used for error reporting 445 * @throws IOException if reading the archive fails 446 * @since 1.17 447 */ 448 public SevenZFile(final SeekableByteChannel channel, final String fileName) 449 throws IOException { 450 this(channel, fileName, SevenZFileOptions.DEFAULT); 451 } 452 453 /** 454 * Reads a SeekableByteChannel as 7z archive 455 * 456 * <p>{@link 457 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 458 * allows you to read from an in-memory archive.</p> 459 * 460 * @param channel the channel to read 461 * @param fileName name of the archive - only used for error reporting 462 * @param password optional password if the archive is encrypted - 463 * the byte array is supposed to be the UTF16-LE encoded 464 * representation of the password. 465 * @throws IOException if reading the archive fails 466 * @since 1.13 467 * @deprecated use the char[]-arg version for the password instead 468 */ 469 @Deprecated 470 public SevenZFile(final SeekableByteChannel channel, final String fileName, 471 final byte[] password) throws IOException { 472 this(channel, fileName, password, false, SevenZFileOptions.DEFAULT); 473 } 474 475 private SevenZFile(final SeekableByteChannel channel, final String filename, 476 final byte[] password, final boolean closeOnError, final SevenZFileOptions options) throws IOException { 477 boolean succeeded = false; 478 this.channel = channel; 479 this.fileName = filename; 480 this.options = options; 481 try { 482 archive = readHeaders(password); 483 if (password != null) { 484 this.password = Arrays.copyOf(password, password.length); 485 } else { 486 this.password = null; 487 } 488 succeeded = true; 489 } finally { 490 if (!succeeded && closeOnError) { 491 this.channel.close(); 492 } 493 } 494 } 495 496 /** 497 * Reads a SeekableByteChannel as 7z archive 498 * 499 * <p>{@link 500 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 501 * allows you to read from an in-memory archive.</p> 502 * 503 * @param channel the channel to read 504 * @param fileName name of the archive - only used for error reporting 505 * @param password optional password if the archive is encrypted 506 * @throws IOException if reading the archive fails 507 * @since 1.17 508 */ 509 public SevenZFile(final SeekableByteChannel channel, final String fileName, 510 final char[] password) throws IOException { 511 this(channel, fileName, password, SevenZFileOptions.DEFAULT); 512 } 513 514 /** 515 * Reads a SeekableByteChannel as 7z archive with addtional options. 516 * 517 * <p>{@link 518 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 519 * allows you to read from an in-memory archive.</p> 520 * 521 * @param channel the channel to read 522 * @param fileName name of the archive - only used for error reporting 523 * @param password optional password if the archive is encrypted 524 * @param options the options to apply 525 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 526 * @since 1.19 527 */ 528 public SevenZFile(final SeekableByteChannel channel, final String fileName, final char[] password, 529 final SevenZFileOptions options) throws IOException { 530 this(channel, fileName, AES256SHA256Decoder.utf16Decode(password), false, options); 531 } 532 533 /** 534 * Reads a SeekableByteChannel as 7z archive with additional options. 535 * 536 * <p>{@link 537 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 538 * allows you to read from an in-memory archive.</p> 539 * 540 * @param channel the channel to read 541 * @param fileName name of the archive - only used for error reporting 542 * @param options the options to apply 543 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 544 * @since 1.19 545 */ 546 public SevenZFile(final SeekableByteChannel channel, final String fileName, final SevenZFileOptions options) 547 throws IOException { 548 this(channel, fileName, null, false, options); 549 } 550 551 private InputStream buildDecoderStack(final Folder folder, final long folderOffset, 552 final int firstPackStreamIndex, final SevenZArchiveEntry entry) throws IOException { 553 channel.position(folderOffset); 554 InputStream inputStreamStack = new FilterInputStream(new BufferedInputStream( 555 new BoundedSeekableByteChannelInputStream(channel, 556 archive.packSizes[firstPackStreamIndex]))) { 557 private void count(final int c) { 558 compressedBytesReadFromCurrentEntry += c; 559 } 560 @Override 561 public int read() throws IOException { 562 final int r = in.read(); 563 if (r >= 0) { 564 count(1); 565 } 566 return r; 567 } 568 @Override 569 public int read(final byte[] b) throws IOException { 570 return read(b, 0, b.length); 571 } 572 @Override 573 public int read(final byte[] b, final int off, final int len) throws IOException { 574 if (len == 0) { 575 return 0; 576 } 577 final int r = in.read(b, off, len); 578 if (r >= 0) { 579 count(r); 580 } 581 return r; 582 } 583 }; 584 final LinkedList<SevenZMethodConfiguration> methods = new LinkedList<>(); 585 for (final Coder coder : folder.getOrderedCoders()) { 586 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 587 throw new IOException("Multi input/output stream coders are not yet supported"); 588 } 589 final SevenZMethod method = SevenZMethod.byId(coder.decompressionMethodId); 590 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, 591 folder.getUnpackSizeForCoder(coder), coder, password, options.getMaxMemoryLimitInKb()); 592 methods.addFirst(new SevenZMethodConfiguration(method, 593 Coders.findByMethod(method).getOptionsFromCoder(coder, inputStreamStack))); 594 } 595 entry.setContentMethods(methods); 596 if (folder.hasCrc) { 597 return new CRC32VerifyingInputStream(inputStreamStack, 598 folder.getUnpackSize(), folder.crc); 599 } 600 return inputStreamStack; 601 } 602 603 /** 604 * Build the decoding stream for the entry to be read. 605 * This method may be called from a random access(getInputStream) or 606 * sequential access(getNextEntry). 607 * If this method is called from a random access, some entries may 608 * need to be skipped(we put them to the deferredBlockStreams and 609 * skip them when actually needed to improve the performance) 610 * 611 * @param entryIndex the index of the entry to be read 612 * @param isRandomAccess is this called in a random access 613 * @throws IOException if there are exceptions when reading the file 614 */ 615 private void buildDecodingStream(final int entryIndex, final boolean isRandomAccess) throws IOException { 616 if (archive.streamMap == null) { 617 throw new IOException("Archive doesn't contain stream information to read entries"); 618 } 619 final int folderIndex = archive.streamMap.fileFolderIndex[entryIndex]; 620 if (folderIndex < 0) { 621 deferredBlockStreams.clear(); 622 // TODO: previously it'd return an empty stream? 623 // new BoundedInputStream(new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY), 0); 624 return; 625 } 626 final SevenZArchiveEntry file = archive.files[entryIndex]; 627 boolean isInSameFolder = false; 628 if (currentFolderIndex == folderIndex) { 629 // (COMPRESS-320). 630 // The current entry is within the same (potentially opened) folder. The 631 // previous stream has to be fully decoded before we can start reading 632 // but don't do it eagerly -- if the user skips over the entire folder nothing 633 // is effectively decompressed. 634 if (entryIndex > 0) { 635 file.setContentMethods(archive.files[entryIndex - 1].getContentMethods()); 636 } 637 638 // if this is called in a random access, then the content methods of previous entry may be null 639 // the content methods should be set to methods of the first entry as it must not be null, 640 // and the content methods would only be set if the content methods was not set 641 if (isRandomAccess && file.getContentMethods() == null) { 642 final int folderFirstFileIndex = archive.streamMap.folderFirstFileIndex[folderIndex]; 643 final SevenZArchiveEntry folderFirstFile = archive.files[folderFirstFileIndex]; 644 file.setContentMethods(folderFirstFile.getContentMethods()); 645 } 646 isInSameFolder = true; 647 } else { 648 currentFolderIndex = folderIndex; 649 // We're opening a new folder. Discard any queued streams/ folder stream. 650 reopenFolderInputStream(folderIndex, file); 651 } 652 653 boolean haveSkippedEntries = false; 654 if (isRandomAccess) { 655 // entries will only need to be skipped if it's a random access 656 haveSkippedEntries = skipEntriesWhenNeeded(entryIndex, isInSameFolder, folderIndex); 657 } 658 659 if (isRandomAccess && currentEntryIndex == entryIndex && !haveSkippedEntries) { 660 // we don't need to add another entry to the deferredBlockStreams when : 661 // 1. If this method is called in a random access and the entry index 662 // to be read equals to the current entry index, the input stream 663 // has already been put in the deferredBlockStreams 664 // 2. If this entry has not been read(which means no entries are skipped) 665 return; 666 } 667 668 InputStream fileStream = new BoundedInputStream(currentFolderInputStream, file.getSize()); 669 if (file.getHasCrc()) { 670 fileStream = new CRC32VerifyingInputStream(fileStream, file.getSize(), file.getCrcValue()); 671 } 672 673 deferredBlockStreams.add(fileStream); 674 } 675 676 private void calculateStreamMap(final Archive archive) throws IOException { 677 final StreamMap streamMap = new StreamMap(); 678 679 int nextFolderPackStreamIndex = 0; 680 final int numFolders = archive.folders != null ? archive.folders.length : 0; 681 streamMap.folderFirstPackStreamIndex = new int[numFolders]; 682 for (int i = 0; i < numFolders; i++) { 683 streamMap.folderFirstPackStreamIndex[i] = nextFolderPackStreamIndex; 684 nextFolderPackStreamIndex += archive.folders[i].packedStreams.length; 685 } 686 687 long nextPackStreamOffset = 0; 688 final int numPackSizes = archive.packSizes.length; 689 streamMap.packStreamOffsets = new long[numPackSizes]; 690 for (int i = 0; i < numPackSizes; i++) { 691 streamMap.packStreamOffsets[i] = nextPackStreamOffset; 692 nextPackStreamOffset += archive.packSizes[i]; 693 } 694 695 streamMap.folderFirstFileIndex = new int[numFolders]; 696 streamMap.fileFolderIndex = new int[archive.files.length]; 697 int nextFolderIndex = 0; 698 int nextFolderUnpackStreamIndex = 0; 699 for (int i = 0; i < archive.files.length; i++) { 700 if (!archive.files[i].hasStream() && nextFolderUnpackStreamIndex == 0) { 701 streamMap.fileFolderIndex[i] = -1; 702 continue; 703 } 704 if (nextFolderUnpackStreamIndex == 0) { 705 for (; nextFolderIndex < archive.folders.length; ++nextFolderIndex) { 706 streamMap.folderFirstFileIndex[nextFolderIndex] = i; 707 if (archive.folders[nextFolderIndex].numUnpackSubStreams > 0) { 708 break; 709 } 710 } 711 if (nextFolderIndex >= archive.folders.length) { 712 throw new IOException("Too few folders in archive"); 713 } 714 } 715 streamMap.fileFolderIndex[i] = nextFolderIndex; 716 if (!archive.files[i].hasStream()) { 717 continue; 718 } 719 ++nextFolderUnpackStreamIndex; 720 if (nextFolderUnpackStreamIndex >= archive.folders[nextFolderIndex].numUnpackSubStreams) { 721 ++nextFolderIndex; 722 nextFolderUnpackStreamIndex = 0; 723 } 724 } 725 726 archive.streamMap = streamMap; 727 } 728 729 private void checkEntryIsInitialized(final Map<Integer, SevenZArchiveEntry> archiveEntries, final int index) { 730 if (archiveEntries.get(index) == null) { 731 archiveEntries.put(index, new SevenZArchiveEntry()); 732 } 733 } 734 735 /** 736 * Closes the archive. 737 * @throws IOException if closing the file fails 738 */ 739 @Override 740 public void close() throws IOException { 741 if (channel != null) { 742 try { 743 channel.close(); 744 } finally { 745 channel = null; 746 if (password != null) { 747 Arrays.fill(password, (byte) 0); 748 } 749 password = null; 750 } 751 } 752 } 753 754 private InputStream getCurrentStream() throws IOException { 755 if (archive.files[currentEntryIndex].getSize() == 0) { 756 return new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY); 757 } 758 if (deferredBlockStreams.isEmpty()) { 759 throw new IllegalStateException("No current 7z entry (call getNextEntry() first)."); 760 } 761 762 while (deferredBlockStreams.size() > 1) { 763 // In solid compression mode we need to decompress all leading folder' 764 // streams to get access to an entry. We defer this until really needed 765 // so that entire blocks can be skipped without wasting time for decompression. 766 try (final InputStream stream = deferredBlockStreams.remove(0)) { 767 IOUtils.skip(stream, Long.MAX_VALUE); 768 } 769 compressedBytesReadFromCurrentEntry = 0; 770 } 771 772 return deferredBlockStreams.get(0); 773 } 774 775 /** 776 * Derives a default file name from the archive name - if known. 777 * 778 * <p>This implements the same heuristics the 7z tools use. In 779 * 7z's case if an archive contains entries without a name - 780 * i.e. {@link SevenZArchiveEntry#getName} returns {@code null} - 781 * then its command line and GUI tools will use this default name 782 * when extracting the entries.</p> 783 * 784 * @return null if the name of the archive is unknown. Otherwise 785 * if the name of the archive has got any extension, it is 786 * stripped and the remainder returned. Finally if the name of the 787 * archive hasn't got any extension then a {@code ~} character is 788 * appended to the archive name. 789 * 790 * @since 1.19 791 */ 792 public String getDefaultName() { 793 if (DEFAULT_FILE_NAME.equals(fileName) || fileName == null) { 794 return null; 795 } 796 797 final String lastSegment = new File(fileName).getName(); 798 final int dotPos = lastSegment.lastIndexOf("."); 799 if (dotPos > 0) { // if the file starts with a dot then this is not an extension 800 return lastSegment.substring(0, dotPos); 801 } 802 return lastSegment + "~"; 803 } 804 805 /** 806 * Returns a copy of meta-data of all archive entries. 807 * 808 * <p>This method only provides meta-data, the entries can not be 809 * used to read the contents, you still need to process all 810 * entries in order using {@link #getNextEntry} for that.</p> 811 * 812 * <p>The content methods are only available for entries that have 813 * already been reached via {@link #getNextEntry}.</p> 814 * 815 * @return a copy of meta-data of all archive entries. 816 * @since 1.11 817 */ 818 public Iterable<SevenZArchiveEntry> getEntries() { 819 return new ArrayList<>(Arrays.asList(archive.files)); 820 } 821 822 /** 823 * Returns an InputStream for reading the contents of the given entry. 824 * 825 * <p>For archives using solid compression randomly accessing 826 * entries will be significantly slower than reading the archive 827 * sequentially.</p> 828 * 829 * @param entry the entry to get the stream for. 830 * @return a stream to read the entry from. 831 * @throws IOException if unable to create an input stream from the entry 832 * @since 1.20 833 */ 834 public InputStream getInputStream(final SevenZArchiveEntry entry) throws IOException { 835 int entryIndex = -1; 836 for (int i = 0; i < this.archive.files.length;i++) { 837 if (entry == this.archive.files[i]) { 838 entryIndex = i; 839 break; 840 } 841 } 842 843 if (entryIndex < 0) { 844 throw new IllegalArgumentException("Can not find " + entry.getName() + " in " + this.fileName); 845 } 846 847 buildDecodingStream(entryIndex, true); 848 currentEntryIndex = entryIndex; 849 currentFolderIndex = archive.streamMap.fileFolderIndex[entryIndex]; 850 return getCurrentStream(); 851 } 852 853 /** 854 * Returns the next Archive Entry in this archive. 855 * 856 * @return the next entry, 857 * or {@code null} if there are no more entries 858 * @throws IOException if the next entry could not be read 859 */ 860 public SevenZArchiveEntry getNextEntry() throws IOException { 861 if (currentEntryIndex >= archive.files.length - 1) { 862 return null; 863 } 864 ++currentEntryIndex; 865 final SevenZArchiveEntry entry = archive.files[currentEntryIndex]; 866 if (entry.getName() == null && options.getUseDefaultNameForUnnamedEntries()) { 867 entry.setName(getDefaultName()); 868 } 869 buildDecodingStream(currentEntryIndex, false); 870 uncompressedBytesReadFromCurrentEntry = compressedBytesReadFromCurrentEntry = 0; 871 return entry; 872 } 873 874 /** 875 * Provides statistics for bytes read from the current entry. 876 * 877 * @return statistics for bytes read from the current entry 878 * @since 1.17 879 */ 880 public InputStreamStatistics getStatisticsForCurrentEntry() { 881 return new InputStreamStatistics() { 882 @Override 883 public long getCompressedCount() { 884 return compressedBytesReadFromCurrentEntry; 885 } 886 @Override 887 public long getUncompressedCount() { 888 return uncompressedBytesReadFromCurrentEntry; 889 } 890 }; 891 } 892 893 /** 894 * Find out if any data of current entry has been read or not. 895 * This is achieved by comparing the bytes remaining to read 896 * and the size of the file. 897 * 898 * @return true if any data of current entry has been read 899 * @since 1.21 900 */ 901 private boolean hasCurrentEntryBeenRead() { 902 boolean hasCurrentEntryBeenRead = false; 903 if (!deferredBlockStreams.isEmpty()) { 904 final InputStream currentEntryInputStream = deferredBlockStreams.get(deferredBlockStreams.size() - 1); 905 // get the bytes remaining to read, and compare it with the size of 906 // the file to figure out if the file has been read 907 if (currentEntryInputStream instanceof CRC32VerifyingInputStream) { 908 hasCurrentEntryBeenRead = ((CRC32VerifyingInputStream) currentEntryInputStream).getBytesRemaining() != archive.files[currentEntryIndex].getSize(); 909 } 910 911 if (currentEntryInputStream instanceof BoundedInputStream) { 912 hasCurrentEntryBeenRead = ((BoundedInputStream) currentEntryInputStream).getBytesRemaining() != archive.files[currentEntryIndex].getSize(); 913 } 914 } 915 return hasCurrentEntryBeenRead; 916 } 917 918 private Archive initializeArchive(final StartHeader startHeader, final byte[] password, final boolean verifyCrc) throws IOException { 919 assertFitsIntoNonNegativeInt("nextHeaderSize", startHeader.nextHeaderSize); 920 final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize; 921 channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset); 922 if (verifyCrc) { 923 final long position = channel.position(); 924 final CheckedInputStream cis = new CheckedInputStream(Channels.newInputStream(channel), new CRC32()); 925 if (cis.skip(nextHeaderSizeInt) != nextHeaderSizeInt) { 926 throw new IOException("Problem computing NextHeader CRC-32"); 927 } 928 if (startHeader.nextHeaderCrc != cis.getChecksum().getValue()) { 929 throw new IOException("NextHeader CRC-32 mismatch"); 930 } 931 channel.position(position); 932 } 933 Archive archive = new Archive(); 934 ByteBuffer buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN); 935 readFully(buf); 936 int nid = getUnsignedByte(buf); 937 if (nid == NID.kEncodedHeader) { 938 buf = readEncodedHeader(buf, archive, password); 939 // Archive gets rebuilt with the new header 940 archive = new Archive(); 941 nid = getUnsignedByte(buf); 942 } 943 if (nid != NID.kHeader) { 944 throw new IOException("Broken or unsupported archive: no Header"); 945 } 946 readHeader(buf, archive); 947 archive.subStreamsInfo = null; 948 return archive; 949 } 950 951 /** 952 * Reads a byte of data. 953 * 954 * @return the byte read, or -1 if end of input is reached 955 * @throws IOException 956 * if an I/O error has occurred 957 */ 958 public int read() throws IOException { 959 final int b = getCurrentStream().read(); 960 if (b >= 0) { 961 uncompressedBytesReadFromCurrentEntry++; 962 } 963 return b; 964 } 965 966 /** 967 * Reads data into an array of bytes. 968 * 969 * @param b the array to write data to 970 * @return the number of bytes read, or -1 if end of input is reached 971 * @throws IOException 972 * if an I/O error has occurred 973 */ 974 public int read(final byte[] b) throws IOException { 975 return read(b, 0, b.length); 976 } 977 978 /** 979 * Reads data into an array of bytes. 980 * 981 * @param b the array to write data to 982 * @param off offset into the buffer to start filling at 983 * @param len of bytes to read 984 * @return the number of bytes read, or -1 if end of input is reached 985 * @throws IOException 986 * if an I/O error has occurred 987 */ 988 public int read(final byte[] b, final int off, final int len) throws IOException { 989 if (len == 0) { 990 return 0; 991 } 992 final int cnt = getCurrentStream().read(b, off, len); 993 if (cnt > 0) { 994 uncompressedBytesReadFromCurrentEntry += cnt; 995 } 996 return cnt; 997 } 998 999 private BitSet readAllOrBits(final ByteBuffer header, final int size) throws IOException { 1000 final int areAllDefined = getUnsignedByte(header); 1001 final BitSet bits; 1002 if (areAllDefined != 0) { 1003 bits = new BitSet(size); 1004 for (int i = 0; i < size; i++) { 1005 bits.set(i, true); 1006 } 1007 } else { 1008 bits = readBits(header, size); 1009 } 1010 return bits; 1011 } 1012 1013 private void readArchiveProperties(final ByteBuffer input) throws IOException { 1014 // FIXME: the reference implementation just throws them away? 1015 int nid = getUnsignedByte(input); 1016 while (nid != NID.kEnd) { 1017 final long propertySize = readUint64(input); 1018 final byte[] property = new byte[(int)propertySize]; 1019 get(input, property); 1020 nid = getUnsignedByte(input); 1021 } 1022 } 1023 1024 private BitSet readBits(final ByteBuffer header, final int size) throws IOException { 1025 final BitSet bits = new BitSet(size); 1026 int mask = 0; 1027 int cache = 0; 1028 for (int i = 0; i < size; i++) { 1029 if (mask == 0) { 1030 mask = 0x80; 1031 cache = getUnsignedByte(header); 1032 } 1033 bits.set(i, (cache & mask) != 0); 1034 mask >>>= 1; 1035 } 1036 return bits; 1037 } 1038 1039 private ByteBuffer readEncodedHeader(final ByteBuffer header, final Archive archive, 1040 final byte[] password) throws IOException { 1041 final int pos = header.position(); 1042 final ArchiveStatistics stats = new ArchiveStatistics(); 1043 sanityCheckStreamsInfo(header, stats); 1044 stats.assertValidity(options.getMaxMemoryLimitInKb()); 1045 header.position(pos); 1046 1047 readStreamsInfo(header, archive); 1048 1049 if (archive.folders == null || archive.folders.length == 0) { 1050 throw new IOException("no folders, can't read encoded header"); 1051 } 1052 if (archive.packSizes == null || archive.packSizes.length == 0) { 1053 throw new IOException("no packed streams, can't read encoded header"); 1054 } 1055 1056 // FIXME: merge with buildDecodingStream()/buildDecoderStack() at some stage? 1057 final Folder folder = archive.folders[0]; 1058 final int firstPackStreamIndex = 0; 1059 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 1060 0; 1061 1062 channel.position(folderOffset); 1063 InputStream inputStreamStack = new BoundedSeekableByteChannelInputStream(channel, 1064 archive.packSizes[firstPackStreamIndex]); 1065 for (final Coder coder : folder.getOrderedCoders()) { 1066 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 1067 throw new IOException("Multi input/output stream coders are not yet supported"); 1068 } 1069 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, //NOSONAR 1070 folder.getUnpackSizeForCoder(coder), coder, password, options.getMaxMemoryLimitInKb()); 1071 } 1072 if (folder.hasCrc) { 1073 inputStreamStack = new CRC32VerifyingInputStream(inputStreamStack, 1074 folder.getUnpackSize(), folder.crc); 1075 } 1076 final int unpackSize = assertFitsIntoNonNegativeInt("unpackSize", folder.getUnpackSize()); 1077 final byte[] nextHeader = IOUtils.readRange(inputStreamStack, unpackSize); 1078 if (nextHeader.length < unpackSize) { 1079 throw new IOException("premature end of stream"); 1080 } 1081 inputStreamStack.close(); 1082 return ByteBuffer.wrap(nextHeader).order(ByteOrder.LITTLE_ENDIAN); 1083 } 1084 1085 private void readFilesInfo(final ByteBuffer header, final Archive archive) throws IOException { 1086 final int numFilesInt = (int) readUint64(header); 1087 final Map<Integer, SevenZArchiveEntry> fileMap = new LinkedHashMap<>(); 1088 BitSet isEmptyStream = null; 1089 BitSet isEmptyFile = null; 1090 BitSet isAnti = null; 1091 while (true) { 1092 final int propertyType = getUnsignedByte(header); 1093 if (propertyType == 0) { 1094 break; 1095 } 1096 final long size = readUint64(header); 1097 switch (propertyType) { 1098 case NID.kEmptyStream: { 1099 isEmptyStream = readBits(header, numFilesInt); 1100 break; 1101 } 1102 case NID.kEmptyFile: { 1103 isEmptyFile = readBits(header, isEmptyStream.cardinality()); 1104 break; 1105 } 1106 case NID.kAnti: { 1107 isAnti = readBits(header, isEmptyStream.cardinality()); 1108 break; 1109 } 1110 case NID.kName: { 1111 /* final int external = */ getUnsignedByte(header); 1112 final byte[] names = new byte[(int) (size - 1)]; 1113 final int namesLength = names.length; 1114 get(header, names); 1115 int nextFile = 0; 1116 int nextName = 0; 1117 for (int i = 0; i < namesLength; i += 2) { 1118 if (names[i] == 0 && names[i + 1] == 0) { 1119 checkEntryIsInitialized(fileMap, nextFile); 1120 fileMap.get(nextFile).setName(new String(names, nextName, i - nextName, UTF_16LE)); 1121 nextName = i + 2; 1122 nextFile++; 1123 } 1124 } 1125 if (nextName != namesLength || nextFile != numFilesInt) { 1126 throw new IOException("Error parsing file names"); 1127 } 1128 break; 1129 } 1130 case NID.kCTime: { 1131 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1132 /* final int external = */ getUnsignedByte(header); 1133 for (int i = 0; i < numFilesInt; i++) { 1134 checkEntryIsInitialized(fileMap, i); 1135 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1136 entryAtIndex.setHasCreationDate(timesDefined.get(i)); 1137 if (entryAtIndex.getHasCreationDate()) { 1138 entryAtIndex.setCreationDate(getLong(header)); 1139 } 1140 } 1141 break; 1142 } 1143 case NID.kATime: { 1144 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1145 /* final int external = */ getUnsignedByte(header); 1146 for (int i = 0; i < numFilesInt; i++) { 1147 checkEntryIsInitialized(fileMap, i); 1148 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1149 entryAtIndex.setHasAccessDate(timesDefined.get(i)); 1150 if (entryAtIndex.getHasAccessDate()) { 1151 entryAtIndex.setAccessDate(getLong(header)); 1152 } 1153 } 1154 break; 1155 } 1156 case NID.kMTime: { 1157 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1158 /* final int external = */ getUnsignedByte(header); 1159 for (int i = 0; i < numFilesInt; i++) { 1160 checkEntryIsInitialized(fileMap, i); 1161 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1162 entryAtIndex.setHasLastModifiedDate(timesDefined.get(i)); 1163 if (entryAtIndex.getHasLastModifiedDate()) { 1164 entryAtIndex.setLastModifiedDate(getLong(header)); 1165 } 1166 } 1167 break; 1168 } 1169 case NID.kWinAttributes: { 1170 final BitSet attributesDefined = readAllOrBits(header, numFilesInt); 1171 /* final int external = */ getUnsignedByte(header); 1172 for (int i = 0; i < numFilesInt; i++) { 1173 checkEntryIsInitialized(fileMap, i); 1174 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1175 entryAtIndex.setHasWindowsAttributes(attributesDefined.get(i)); 1176 if (entryAtIndex.getHasWindowsAttributes()) { 1177 entryAtIndex.setWindowsAttributes(getInt(header)); 1178 } 1179 } 1180 break; 1181 } 1182 case NID.kDummy: { 1183 // 7z 9.20 asserts the content is all zeros and ignores the property 1184 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1185 1186 skipBytesFully(header, size); 1187 break; 1188 } 1189 1190 default: { 1191 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1192 skipBytesFully(header, size); 1193 break; 1194 } 1195 } 1196 } 1197 int nonEmptyFileCounter = 0; 1198 int emptyFileCounter = 0; 1199 for (int i = 0; i < numFilesInt; i++) { 1200 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1201 if (entryAtIndex == null) { 1202 continue; 1203 } 1204 entryAtIndex.setHasStream(isEmptyStream == null || !isEmptyStream.get(i)); 1205 if (entryAtIndex.hasStream()) { 1206 if (archive.subStreamsInfo == null) { 1207 throw new IOException("Archive contains file with streams but no subStreamsInfo"); 1208 } 1209 entryAtIndex.setDirectory(false); 1210 entryAtIndex.setAntiItem(false); 1211 entryAtIndex.setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter)); 1212 entryAtIndex.setCrcValue(archive.subStreamsInfo.crcs[nonEmptyFileCounter]); 1213 entryAtIndex.setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]); 1214 if (entryAtIndex.getSize() < 0) { 1215 throw new IOException("broken archive, entry with negative size"); 1216 } 1217 ++nonEmptyFileCounter; 1218 } else { 1219 entryAtIndex.setDirectory(isEmptyFile == null || !isEmptyFile.get(emptyFileCounter)); 1220 entryAtIndex.setAntiItem(isAnti != null && isAnti.get(emptyFileCounter)); 1221 entryAtIndex.setHasCrc(false); 1222 entryAtIndex.setSize(0); 1223 ++emptyFileCounter; 1224 } 1225 } 1226 archive.files = fileMap.values().stream().filter(Objects::nonNull).toArray(SevenZArchiveEntry[]::new); 1227 calculateStreamMap(archive); 1228 } 1229 1230 private Folder readFolder(final ByteBuffer header) throws IOException { 1231 final Folder folder = new Folder(); 1232 1233 final long numCoders = readUint64(header); 1234 final Coder[] coders = new Coder[(int)numCoders]; 1235 long totalInStreams = 0; 1236 long totalOutStreams = 0; 1237 for (int i = 0; i < coders.length; i++) { 1238 coders[i] = new Coder(); 1239 final int bits = getUnsignedByte(header); 1240 final int idSize = bits & 0xf; 1241 final boolean isSimple = (bits & 0x10) == 0; 1242 final boolean hasAttributes = (bits & 0x20) != 0; 1243 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 1244 1245 coders[i].decompressionMethodId = new byte[idSize]; 1246 get(header, coders[i].decompressionMethodId); 1247 if (isSimple) { 1248 coders[i].numInStreams = 1; 1249 coders[i].numOutStreams = 1; 1250 } else { 1251 coders[i].numInStreams = readUint64(header); 1252 coders[i].numOutStreams = readUint64(header); 1253 } 1254 totalInStreams += coders[i].numInStreams; 1255 totalOutStreams += coders[i].numOutStreams; 1256 if (hasAttributes) { 1257 final long propertiesSize = readUint64(header); 1258 coders[i].properties = new byte[(int)propertiesSize]; 1259 get(header, coders[i].properties); 1260 } 1261 // would need to keep looping as above: 1262 if (moreAlternativeMethods) { 1263 throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR 1264 "The reference implementation doesn't support them either."); 1265 } 1266 } 1267 folder.coders = coders; 1268 folder.totalInputStreams = totalInStreams; 1269 folder.totalOutputStreams = totalOutStreams; 1270 1271 final long numBindPairs = totalOutStreams - 1; 1272 final BindPair[] bindPairs = new BindPair[(int)numBindPairs]; 1273 for (int i = 0; i < bindPairs.length; i++) { 1274 bindPairs[i] = new BindPair(); 1275 bindPairs[i].inIndex = readUint64(header); 1276 bindPairs[i].outIndex = readUint64(header); 1277 } 1278 folder.bindPairs = bindPairs; 1279 1280 final long numPackedStreams = totalInStreams - numBindPairs; 1281 final long[] packedStreams = new long[(int)numPackedStreams]; 1282 if (numPackedStreams == 1) { 1283 int i; 1284 for (i = 0; i < (int)totalInStreams; i++) { 1285 if (folder.findBindPairForInStream(i) < 0) { 1286 break; 1287 } 1288 } 1289 packedStreams[0] = i; 1290 } else { 1291 for (int i = 0; i < (int)numPackedStreams; i++) { 1292 packedStreams[i] = readUint64(header); 1293 } 1294 } 1295 folder.packedStreams = packedStreams; 1296 1297 return folder; 1298 } 1299 1300 private void readFully(final ByteBuffer buf) throws IOException { 1301 buf.rewind(); 1302 IOUtils.readFully(channel, buf); 1303 buf.flip(); 1304 } 1305 1306 private void readHeader(final ByteBuffer header, final Archive archive) throws IOException { 1307 final int pos = header.position(); 1308 final ArchiveStatistics stats = sanityCheckAndCollectStatistics(header); 1309 stats.assertValidity(options.getMaxMemoryLimitInKb()); 1310 header.position(pos); 1311 1312 int nid = getUnsignedByte(header); 1313 1314 if (nid == NID.kArchiveProperties) { 1315 readArchiveProperties(header); 1316 nid = getUnsignedByte(header); 1317 } 1318 1319 if (nid == NID.kAdditionalStreamsInfo) { 1320 throw new IOException("Additional streams unsupported"); 1321 //nid = getUnsignedByte(header); 1322 } 1323 1324 if (nid == NID.kMainStreamsInfo) { 1325 readStreamsInfo(header, archive); 1326 nid = getUnsignedByte(header); 1327 } 1328 1329 if (nid == NID.kFilesInfo) { 1330 readFilesInfo(header, archive); 1331 nid = getUnsignedByte(header); 1332 } 1333 } 1334 1335 private Archive readHeaders(final byte[] password) throws IOException { 1336 final ByteBuffer buf = ByteBuffer.allocate(12 /* signature + 2 bytes version + 4 bytes CRC */) 1337 .order(ByteOrder.LITTLE_ENDIAN); 1338 readFully(buf); 1339 final byte[] signature = new byte[6]; 1340 buf.get(signature); 1341 if (!Arrays.equals(signature, sevenZSignature)) { 1342 throw new IOException("Bad 7z signature"); 1343 } 1344 // 7zFormat.txt has it wrong - it's first major then minor 1345 final byte archiveVersionMajor = buf.get(); 1346 final byte archiveVersionMinor = buf.get(); 1347 if (archiveVersionMajor != 0) { 1348 throw new IOException(String.format("Unsupported 7z version (%d,%d)", 1349 archiveVersionMajor, archiveVersionMinor)); 1350 } 1351 1352 boolean headerLooksValid = false; // See https://www.7-zip.org/recover.html - "There is no correct End Header at the end of archive" 1353 final long startHeaderCrc = 0xffffFFFFL & buf.getInt(); 1354 if (startHeaderCrc == 0) { 1355 // This is an indication of a corrupt header - peek the next 20 bytes 1356 final long currentPosition = channel.position(); 1357 final ByteBuffer peekBuf = ByteBuffer.allocate(20); 1358 readFully(peekBuf); 1359 channel.position(currentPosition); 1360 // Header invalid if all data is 0 1361 while (peekBuf.hasRemaining()) { 1362 if (peekBuf.get()!=0) { 1363 headerLooksValid = true; 1364 break; 1365 } 1366 } 1367 } else { 1368 headerLooksValid = true; 1369 } 1370 1371 if (headerLooksValid) { 1372 return initializeArchive(readStartHeader(startHeaderCrc), password, true); 1373 } 1374 // No valid header found - probably first file of multipart archive was removed too early. Scan for end header. 1375 if (options.getTryToRecoverBrokenArchives()) { 1376 return tryToLocateEndHeader(password); 1377 } 1378 throw new IOException("archive seems to be invalid.\nYou may want to retry and enable the" 1379 + " tryToRecoverBrokenArchives if the archive could be a multi volume archive that has been closed" 1380 + " prematurely."); 1381 } 1382 1383 private void readPackInfo(final ByteBuffer header, final Archive archive) throws IOException { 1384 archive.packPos = readUint64(header); 1385 final int numPackStreamsInt = (int) readUint64(header); 1386 int nid = getUnsignedByte(header); 1387 if (nid == NID.kSize) { 1388 archive.packSizes = new long[numPackStreamsInt]; 1389 for (int i = 0; i < archive.packSizes.length; i++) { 1390 archive.packSizes[i] = readUint64(header); 1391 } 1392 nid = getUnsignedByte(header); 1393 } 1394 1395 if (nid == NID.kCRC) { 1396 archive.packCrcsDefined = readAllOrBits(header, numPackStreamsInt); 1397 archive.packCrcs = new long[numPackStreamsInt]; 1398 for (int i = 0; i < numPackStreamsInt; i++) { 1399 if (archive.packCrcsDefined.get(i)) { 1400 archive.packCrcs[i] = 0xffffFFFFL & getInt(header); 1401 } 1402 } 1403 1404 nid = getUnsignedByte(header); 1405 } 1406 } 1407 1408 private StartHeader readStartHeader(final long startHeaderCrc) throws IOException { 1409 final StartHeader startHeader = new StartHeader(); 1410 // using Stream rather than ByteBuffer for the benefit of the 1411 // built-in CRC check 1412 try (DataInputStream dataInputStream = new DataInputStream(new CRC32VerifyingInputStream( 1413 new BoundedSeekableByteChannelInputStream(channel, 20), 20, startHeaderCrc))) { 1414 startHeader.nextHeaderOffset = Long.reverseBytes(dataInputStream.readLong()); 1415 if (startHeader.nextHeaderOffset < 0 1416 || startHeader.nextHeaderOffset + SIGNATURE_HEADER_SIZE > channel.size()) { 1417 throw new IOException("nextHeaderOffset is out of bounds"); 1418 } 1419 1420 startHeader.nextHeaderSize = Long.reverseBytes(dataInputStream.readLong()); 1421 final long nextHeaderEnd = startHeader.nextHeaderOffset + startHeader.nextHeaderSize; 1422 if (nextHeaderEnd < startHeader.nextHeaderOffset 1423 || nextHeaderEnd + SIGNATURE_HEADER_SIZE > channel.size()) { 1424 throw new IOException("nextHeaderSize is out of bounds"); 1425 } 1426 1427 startHeader.nextHeaderCrc = 0xffffFFFFL & Integer.reverseBytes(dataInputStream.readInt()); 1428 1429 return startHeader; 1430 } 1431 } 1432 1433 private void readStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 1434 int nid = getUnsignedByte(header); 1435 1436 if (nid == NID.kPackInfo) { 1437 readPackInfo(header, archive); 1438 nid = getUnsignedByte(header); 1439 } 1440 1441 if (nid == NID.kUnpackInfo) { 1442 readUnpackInfo(header, archive); 1443 nid = getUnsignedByte(header); 1444 } else { 1445 // archive without unpack/coders info 1446 archive.folders = Folder.EMPTY_FOLDER_ARRAY; 1447 } 1448 1449 if (nid == NID.kSubStreamsInfo) { 1450 readSubStreamsInfo(header, archive); 1451 nid = getUnsignedByte(header); 1452 } 1453 } 1454 1455 private void readSubStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 1456 for (final Folder folder : archive.folders) { 1457 folder.numUnpackSubStreams = 1; 1458 } 1459 long unpackStreamsCount = archive.folders.length; 1460 1461 int nid = getUnsignedByte(header); 1462 if (nid == NID.kNumUnpackStream) { 1463 unpackStreamsCount = 0; 1464 for (final Folder folder : archive.folders) { 1465 final long numStreams = readUint64(header); 1466 folder.numUnpackSubStreams = (int)numStreams; 1467 unpackStreamsCount += numStreams; 1468 } 1469 nid = getUnsignedByte(header); 1470 } 1471 1472 final int totalUnpackStreams = (int) unpackStreamsCount; 1473 final SubStreamsInfo subStreamsInfo = new SubStreamsInfo(); 1474 subStreamsInfo.unpackSizes = new long[totalUnpackStreams]; 1475 subStreamsInfo.hasCrc = new BitSet(totalUnpackStreams); 1476 subStreamsInfo.crcs = new long[totalUnpackStreams]; 1477 1478 int nextUnpackStream = 0; 1479 for (final Folder folder : archive.folders) { 1480 if (folder.numUnpackSubStreams == 0) { 1481 continue; 1482 } 1483 long sum = 0; 1484 if (nid == NID.kSize) { 1485 for (int i = 0; i < folder.numUnpackSubStreams - 1; i++) { 1486 final long size = readUint64(header); 1487 subStreamsInfo.unpackSizes[nextUnpackStream++] = size; 1488 sum += size; 1489 } 1490 } 1491 if (sum > folder.getUnpackSize()) { 1492 throw new IOException("sum of unpack sizes of folder exceeds total unpack size"); 1493 } 1494 subStreamsInfo.unpackSizes[nextUnpackStream++] = folder.getUnpackSize() - sum; 1495 } 1496 if (nid == NID.kSize) { 1497 nid = getUnsignedByte(header); 1498 } 1499 1500 int numDigests = 0; 1501 for (final Folder folder : archive.folders) { 1502 if (folder.numUnpackSubStreams != 1 || !folder.hasCrc) { 1503 numDigests += folder.numUnpackSubStreams; 1504 } 1505 } 1506 1507 if (nid == NID.kCRC) { 1508 final BitSet hasMissingCrc = readAllOrBits(header, numDigests); 1509 final long[] missingCrcs = new long[numDigests]; 1510 for (int i = 0; i < numDigests; i++) { 1511 if (hasMissingCrc.get(i)) { 1512 missingCrcs[i] = 0xffffFFFFL & getInt(header); 1513 } 1514 } 1515 int nextCrc = 0; 1516 int nextMissingCrc = 0; 1517 for (final Folder folder: archive.folders) { 1518 if (folder.numUnpackSubStreams == 1 && folder.hasCrc) { 1519 subStreamsInfo.hasCrc.set(nextCrc, true); 1520 subStreamsInfo.crcs[nextCrc] = folder.crc; 1521 ++nextCrc; 1522 } else { 1523 for (int i = 0; i < folder.numUnpackSubStreams; i++) { 1524 subStreamsInfo.hasCrc.set(nextCrc, hasMissingCrc.get(nextMissingCrc)); 1525 subStreamsInfo.crcs[nextCrc] = missingCrcs[nextMissingCrc]; 1526 ++nextCrc; 1527 ++nextMissingCrc; 1528 } 1529 } 1530 } 1531 1532 nid = getUnsignedByte(header); 1533 } 1534 1535 archive.subStreamsInfo = subStreamsInfo; 1536 } 1537 1538 private void readUnpackInfo(final ByteBuffer header, final Archive archive) throws IOException { 1539 int nid = getUnsignedByte(header); 1540 final int numFoldersInt = (int) readUint64(header); 1541 final Folder[] folders = new Folder[numFoldersInt]; 1542 archive.folders = folders; 1543 /* final int external = */ getUnsignedByte(header); 1544 for (int i = 0; i < numFoldersInt; i++) { 1545 folders[i] = readFolder(header); 1546 } 1547 1548 nid = getUnsignedByte(header); 1549 for (final Folder folder : folders) { 1550 assertFitsIntoNonNegativeInt("totalOutputStreams", folder.totalOutputStreams); 1551 folder.unpackSizes = new long[(int)folder.totalOutputStreams]; 1552 for (int i = 0; i < folder.totalOutputStreams; i++) { 1553 folder.unpackSizes[i] = readUint64(header); 1554 } 1555 } 1556 1557 nid = getUnsignedByte(header); 1558 if (nid == NID.kCRC) { 1559 final BitSet crcsDefined = readAllOrBits(header, numFoldersInt); 1560 for (int i = 0; i < numFoldersInt; i++) { 1561 if (crcsDefined.get(i)) { 1562 folders[i].hasCrc = true; 1563 folders[i].crc = 0xffffFFFFL & getInt(header); 1564 } else { 1565 folders[i].hasCrc = false; 1566 } 1567 } 1568 1569 nid = getUnsignedByte(header); 1570 } 1571 } 1572 1573 /** 1574 * Discard any queued streams/ folder stream, and reopen the current folder input stream. 1575 * 1576 * @param folderIndex the index of the folder to reopen 1577 * @param file the 7z entry to read 1578 * @throws IOException if exceptions occur when reading the 7z file 1579 */ 1580 private void reopenFolderInputStream(final int folderIndex, final SevenZArchiveEntry file) throws IOException { 1581 deferredBlockStreams.clear(); 1582 if (currentFolderInputStream != null) { 1583 currentFolderInputStream.close(); 1584 currentFolderInputStream = null; 1585 } 1586 final Folder folder = archive.folders[folderIndex]; 1587 final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex]; 1588 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 1589 archive.streamMap.packStreamOffsets[firstPackStreamIndex]; 1590 1591 currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file); 1592 } 1593 1594 private ArchiveStatistics sanityCheckAndCollectStatistics(final ByteBuffer header) 1595 throws IOException { 1596 final ArchiveStatistics stats = new ArchiveStatistics(); 1597 1598 int nid = getUnsignedByte(header); 1599 1600 if (nid == NID.kArchiveProperties) { 1601 sanityCheckArchiveProperties(header); 1602 nid = getUnsignedByte(header); 1603 } 1604 1605 if (nid == NID.kAdditionalStreamsInfo) { 1606 throw new IOException("Additional streams unsupported"); 1607 //nid = getUnsignedByte(header); 1608 } 1609 1610 if (nid == NID.kMainStreamsInfo) { 1611 sanityCheckStreamsInfo(header, stats); 1612 nid = getUnsignedByte(header); 1613 } 1614 1615 if (nid == NID.kFilesInfo) { 1616 sanityCheckFilesInfo(header, stats); 1617 nid = getUnsignedByte(header); 1618 } 1619 1620 if (nid != NID.kEnd) { 1621 throw new IOException("Badly terminated header, found " + nid); 1622 } 1623 1624 return stats; 1625 } 1626 1627 private void sanityCheckArchiveProperties(final ByteBuffer header) 1628 throws IOException { 1629 int nid = getUnsignedByte(header); 1630 while (nid != NID.kEnd) { 1631 final int propertySize = 1632 assertFitsIntoNonNegativeInt("propertySize", readUint64(header)); 1633 if (skipBytesFully(header, propertySize) < propertySize) { 1634 throw new IOException("invalid property size"); 1635 } 1636 nid = getUnsignedByte(header); 1637 } 1638 } 1639 1640 private void sanityCheckFilesInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1641 stats.numberOfEntries = assertFitsIntoNonNegativeInt("numFiles", readUint64(header)); 1642 1643 int emptyStreams = -1; 1644 while (true) { 1645 final int propertyType = getUnsignedByte(header); 1646 if (propertyType == 0) { 1647 break; 1648 } 1649 final long size = readUint64(header); 1650 switch (propertyType) { 1651 case NID.kEmptyStream: { 1652 emptyStreams = readBits(header, stats.numberOfEntries).cardinality(); 1653 break; 1654 } 1655 case NID.kEmptyFile: { 1656 if (emptyStreams == -1) { 1657 throw new IOException("Header format error: kEmptyStream must appear before kEmptyFile"); 1658 } 1659 readBits(header, emptyStreams); 1660 break; 1661 } 1662 case NID.kAnti: { 1663 if (emptyStreams == -1) { 1664 throw new IOException("Header format error: kEmptyStream must appear before kAnti"); 1665 } 1666 readBits(header, emptyStreams); 1667 break; 1668 } 1669 case NID.kName: { 1670 final int external = getUnsignedByte(header); 1671 if (external != 0) { 1672 throw new IOException("Not implemented"); 1673 } 1674 final int namesLength = 1675 assertFitsIntoNonNegativeInt("file names length", size - 1); 1676 if ((namesLength & 1) != 0) { 1677 throw new IOException("File names length invalid"); 1678 } 1679 1680 int filesSeen = 0; 1681 for (int i = 0; i < namesLength; i += 2) { 1682 final char c = getChar(header); 1683 if (c == 0) { 1684 filesSeen++; 1685 } 1686 } 1687 if (filesSeen != stats.numberOfEntries) { 1688 throw new IOException("Invalid number of file names (" + filesSeen + " instead of " 1689 + stats.numberOfEntries + ")"); 1690 } 1691 break; 1692 } 1693 case NID.kCTime: { 1694 final int timesDefined = readAllOrBits(header, stats.numberOfEntries) 1695 .cardinality(); 1696 final int external = getUnsignedByte(header); 1697 if (external != 0) { 1698 throw new IOException("Not implemented"); 1699 } 1700 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1701 throw new IOException("invalid creation dates size"); 1702 } 1703 break; 1704 } 1705 case NID.kATime: { 1706 final int timesDefined = readAllOrBits(header, stats.numberOfEntries) 1707 .cardinality(); 1708 final int external = getUnsignedByte(header); 1709 if (external != 0) { 1710 throw new IOException("Not implemented"); 1711 } 1712 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1713 throw new IOException("invalid access dates size"); 1714 } 1715 break; 1716 } 1717 case NID.kMTime: { 1718 final int timesDefined = readAllOrBits(header, stats.numberOfEntries) 1719 .cardinality(); 1720 final int external = getUnsignedByte(header); 1721 if (external != 0) { 1722 throw new IOException("Not implemented"); 1723 } 1724 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1725 throw new IOException("invalid modification dates size"); 1726 } 1727 break; 1728 } 1729 case NID.kWinAttributes: { 1730 final int attributesDefined = readAllOrBits(header, stats.numberOfEntries) 1731 .cardinality(); 1732 final int external = getUnsignedByte(header); 1733 if (external != 0) { 1734 throw new IOException("Not implemented"); 1735 } 1736 if (skipBytesFully(header, 4 * attributesDefined) < 4 * attributesDefined) { 1737 throw new IOException("invalid windows attributes size"); 1738 } 1739 break; 1740 } 1741 case NID.kStartPos: { 1742 throw new IOException("kStartPos is unsupported, please report"); 1743 } 1744 case NID.kDummy: { 1745 // 7z 9.20 asserts the content is all zeros and ignores the property 1746 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1747 1748 if (skipBytesFully(header, size) < size) { 1749 throw new IOException("Incomplete kDummy property"); 1750 } 1751 break; 1752 } 1753 1754 default: { 1755 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1756 if (skipBytesFully(header, size) < size) { 1757 throw new IOException("Incomplete property of type " + propertyType); 1758 } 1759 break; 1760 } 1761 } 1762 } 1763 stats.numberOfEntriesWithStream = stats.numberOfEntries - Math.max(emptyStreams, 0); 1764 } 1765 1766 private int sanityCheckFolder(final ByteBuffer header, final ArchiveStatistics stats) 1767 throws IOException { 1768 1769 final int numCoders = assertFitsIntoNonNegativeInt("numCoders", readUint64(header)); 1770 if (numCoders == 0) { 1771 throw new IOException("Folder without coders"); 1772 } 1773 stats.numberOfCoders += numCoders; 1774 1775 long totalOutStreams = 0; 1776 long totalInStreams = 0; 1777 for (int i = 0; i < numCoders; i++) { 1778 final int bits = getUnsignedByte(header); 1779 final int idSize = bits & 0xf; 1780 get(header, new byte[idSize]); 1781 1782 final boolean isSimple = (bits & 0x10) == 0; 1783 final boolean hasAttributes = (bits & 0x20) != 0; 1784 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 1785 if (moreAlternativeMethods) { 1786 throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR 1787 "The reference implementation doesn't support them either."); 1788 } 1789 1790 if (isSimple) { 1791 totalInStreams++; 1792 totalOutStreams++; 1793 } else { 1794 totalInStreams += 1795 assertFitsIntoNonNegativeInt("numInStreams", readUint64(header)); 1796 totalOutStreams += 1797 assertFitsIntoNonNegativeInt("numOutStreams", readUint64(header)); 1798 } 1799 1800 if (hasAttributes) { 1801 final int propertiesSize = 1802 assertFitsIntoNonNegativeInt("propertiesSize", readUint64(header)); 1803 if (skipBytesFully(header, propertiesSize) < propertiesSize) { 1804 throw new IOException("invalid propertiesSize in folder"); 1805 } 1806 } 1807 } 1808 assertFitsIntoNonNegativeInt("totalInStreams", totalInStreams); 1809 assertFitsIntoNonNegativeInt("totalOutStreams", totalOutStreams); 1810 stats.numberOfOutStreams += totalOutStreams; 1811 stats.numberOfInStreams += totalInStreams; 1812 1813 if (totalOutStreams == 0) { 1814 throw new IOException("Total output streams can't be 0"); 1815 } 1816 1817 final int numBindPairs = 1818 assertFitsIntoNonNegativeInt("numBindPairs", totalOutStreams - 1); 1819 if (totalInStreams < numBindPairs) { 1820 throw new IOException("Total input streams can't be less than the number of bind pairs"); 1821 } 1822 final BitSet inStreamsBound = new BitSet((int) totalInStreams); 1823 for (int i = 0; i < numBindPairs; i++) { 1824 final int inIndex = assertFitsIntoNonNegativeInt("inIndex", readUint64(header)); 1825 if (totalInStreams <= inIndex) { 1826 throw new IOException("inIndex is bigger than number of inStreams"); 1827 } 1828 inStreamsBound.set(inIndex); 1829 final int outIndex = assertFitsIntoNonNegativeInt("outIndex", readUint64(header)); 1830 if (totalOutStreams <= outIndex) { 1831 throw new IOException("outIndex is bigger than number of outStreams"); 1832 } 1833 } 1834 1835 final int numPackedStreams = 1836 assertFitsIntoNonNegativeInt("numPackedStreams", totalInStreams - numBindPairs); 1837 1838 if (numPackedStreams == 1) { 1839 if (inStreamsBound.nextClearBit(0) == -1) { 1840 throw new IOException("Couldn't find stream's bind pair index"); 1841 } 1842 } else { 1843 for (int i = 0; i < numPackedStreams; i++) { 1844 final int packedStreamIndex = 1845 assertFitsIntoNonNegativeInt("packedStreamIndex", readUint64(header)); 1846 if (packedStreamIndex >= totalInStreams) { 1847 throw new IOException("packedStreamIndex is bigger than number of totalInStreams"); 1848 } 1849 } 1850 } 1851 1852 return (int) totalOutStreams; 1853 } 1854 1855 private void sanityCheckPackInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1856 final long packPos = readUint64(header); 1857 if (packPos < 0 || SIGNATURE_HEADER_SIZE + packPos > channel.size() 1858 || SIGNATURE_HEADER_SIZE + packPos < 0) { 1859 throw new IOException("packPos (" + packPos + ") is out of range"); 1860 } 1861 final long numPackStreams = readUint64(header); 1862 stats.numberOfPackedStreams = assertFitsIntoNonNegativeInt("numPackStreams", numPackStreams); 1863 int nid = getUnsignedByte(header); 1864 if (nid == NID.kSize) { 1865 long totalPackSizes = 0; 1866 for (int i = 0; i < stats.numberOfPackedStreams; i++) { 1867 final long packSize = readUint64(header); 1868 totalPackSizes += packSize; 1869 final long endOfPackStreams = SIGNATURE_HEADER_SIZE + packPos + totalPackSizes; 1870 if (packSize < 0 1871 || endOfPackStreams > channel.size() 1872 || endOfPackStreams < packPos) { 1873 throw new IOException("packSize (" + packSize + ") is out of range"); 1874 } 1875 } 1876 nid = getUnsignedByte(header); 1877 } 1878 1879 if (nid == NID.kCRC) { 1880 final int crcsDefined = readAllOrBits(header, stats.numberOfPackedStreams) 1881 .cardinality(); 1882 if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { 1883 throw new IOException("invalid number of CRCs in PackInfo"); 1884 } 1885 nid = getUnsignedByte(header); 1886 } 1887 1888 if (nid != NID.kEnd) { 1889 throw new IOException("Badly terminated PackInfo (" + nid + ")"); 1890 } 1891 } 1892 1893 private void sanityCheckStreamsInfo(final ByteBuffer header, 1894 final ArchiveStatistics stats) throws IOException { 1895 int nid = getUnsignedByte(header); 1896 1897 if (nid == NID.kPackInfo) { 1898 sanityCheckPackInfo(header, stats); 1899 nid = getUnsignedByte(header); 1900 } 1901 1902 if (nid == NID.kUnpackInfo) { 1903 sanityCheckUnpackInfo(header, stats); 1904 nid = getUnsignedByte(header); 1905 } 1906 1907 if (nid == NID.kSubStreamsInfo) { 1908 sanityCheckSubStreamsInfo(header, stats); 1909 nid = getUnsignedByte(header); 1910 } 1911 1912 if (nid != NID.kEnd) { 1913 throw new IOException("Badly terminated StreamsInfo"); 1914 } 1915 } 1916 1917 private void sanityCheckSubStreamsInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1918 1919 int nid = getUnsignedByte(header); 1920 final List<Integer> numUnpackSubStreamsPerFolder = new LinkedList<>(); 1921 if (nid == NID.kNumUnpackStream) { 1922 for (int i = 0; i < stats.numberOfFolders; i++) { 1923 numUnpackSubStreamsPerFolder.add(assertFitsIntoNonNegativeInt("numStreams", readUint64(header))); 1924 } 1925 stats.numberOfUnpackSubStreams = numUnpackSubStreamsPerFolder.stream().mapToLong(Integer::longValue).sum(); 1926 nid = getUnsignedByte(header); 1927 } else { 1928 stats.numberOfUnpackSubStreams = stats.numberOfFolders; 1929 } 1930 1931 assertFitsIntoNonNegativeInt("totalUnpackStreams", stats.numberOfUnpackSubStreams); 1932 1933 if (nid == NID.kSize) { 1934 for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { 1935 if (numUnpackSubStreams == 0) { 1936 continue; 1937 } 1938 for (int i = 0; i < numUnpackSubStreams - 1; i++) { 1939 final long size = readUint64(header); 1940 if (size < 0) { 1941 throw new IOException("negative unpackSize"); 1942 } 1943 } 1944 } 1945 nid = getUnsignedByte(header); 1946 } 1947 1948 int numDigests = 0; 1949 if (numUnpackSubStreamsPerFolder.isEmpty()) { 1950 numDigests = stats.folderHasCrc == null ? stats.numberOfFolders 1951 : stats.numberOfFolders - stats.folderHasCrc.cardinality(); 1952 } else { 1953 int folderIdx = 0; 1954 for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { 1955 if (numUnpackSubStreams != 1 || stats.folderHasCrc == null 1956 || !stats.folderHasCrc.get(folderIdx++)) { 1957 numDigests += numUnpackSubStreams; 1958 } 1959 } 1960 } 1961 1962 if (nid == NID.kCRC) { 1963 assertFitsIntoNonNegativeInt("numDigests", numDigests); 1964 final int missingCrcs = readAllOrBits(header, numDigests) 1965 .cardinality(); 1966 if (skipBytesFully(header, 4 * missingCrcs) < 4 * missingCrcs) { 1967 throw new IOException("invalid number of missing CRCs in SubStreamInfo"); 1968 } 1969 nid = getUnsignedByte(header); 1970 } 1971 1972 if (nid != NID.kEnd) { 1973 throw new IOException("Badly terminated SubStreamsInfo"); 1974 } 1975 } 1976 1977 private void sanityCheckUnpackInfo(final ByteBuffer header, final ArchiveStatistics stats) 1978 throws IOException { 1979 int nid = getUnsignedByte(header); 1980 if (nid != NID.kFolder) { 1981 throw new IOException("Expected kFolder, got " + nid); 1982 } 1983 final long numFolders = readUint64(header); 1984 stats.numberOfFolders = assertFitsIntoNonNegativeInt("numFolders", numFolders); 1985 final int external = getUnsignedByte(header); 1986 if (external != 0) { 1987 throw new IOException("External unsupported"); 1988 } 1989 1990 final List<Integer> numberOfOutputStreamsPerFolder = new LinkedList<>(); 1991 for (int i = 0; i < stats.numberOfFolders; i++) { 1992 numberOfOutputStreamsPerFolder.add(sanityCheckFolder(header, stats)); 1993 } 1994 1995 final long totalNumberOfBindPairs = stats.numberOfOutStreams - stats.numberOfFolders; 1996 final long packedStreamsRequiredByFolders = stats.numberOfInStreams - totalNumberOfBindPairs; 1997 if (packedStreamsRequiredByFolders < stats.numberOfPackedStreams) { 1998 throw new IOException("archive doesn't contain enough packed streams"); 1999 } 2000 2001 nid = getUnsignedByte(header); 2002 if (nid != NID.kCodersUnpackSize) { 2003 throw new IOException("Expected kCodersUnpackSize, got " + nid); 2004 } 2005 2006 for (final int numberOfOutputStreams : numberOfOutputStreamsPerFolder) { 2007 for (int i = 0; i < numberOfOutputStreams; i++) { 2008 final long unpackSize = readUint64(header); 2009 if (unpackSize < 0) { 2010 throw new IllegalArgumentException("negative unpackSize"); 2011 } 2012 } 2013 } 2014 2015 nid = getUnsignedByte(header); 2016 if (nid == NID.kCRC) { 2017 stats.folderHasCrc = readAllOrBits(header, stats.numberOfFolders); 2018 final int crcsDefined = stats.folderHasCrc.cardinality(); 2019 if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { 2020 throw new IOException("invalid number of CRCs in UnpackInfo"); 2021 } 2022 nid = getUnsignedByte(header); 2023 } 2024 2025 if (nid != NID.kEnd) { 2026 throw new IOException("Badly terminated UnpackInfo"); 2027 } 2028 } 2029 2030 /** 2031 * Skip all the entries if needed. 2032 * Entries need to be skipped when: 2033 * <p> 2034 * 1. it's a random access 2035 * 2. one of these 2 condition is meet : 2036 * <p> 2037 * 2.1 currentEntryIndex != entryIndex : this means there are some entries 2038 * to be skipped(currentEntryIndex < entryIndex) or the entry has already 2039 * been read(currentEntryIndex > entryIndex) 2040 * <p> 2041 * 2.2 currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead: 2042 * if the entry to be read is the current entry, but some data of it has 2043 * been read before, then we need to reopen the stream of the folder and 2044 * skip all the entries before the current entries 2045 * 2046 * @param entryIndex the entry to be read 2047 * @param isInSameFolder are the entry to be read and the current entry in the same folder 2048 * @param folderIndex the index of the folder which contains the entry 2049 * @return true if there are entries actually skipped 2050 * @throws IOException there are exceptions when skipping entries 2051 * @since 1.21 2052 */ 2053 private boolean skipEntriesWhenNeeded(final int entryIndex, final boolean isInSameFolder, final int folderIndex) throws IOException { 2054 final SevenZArchiveEntry file = archive.files[entryIndex]; 2055 // if the entry to be read is the current entry, and the entry has not 2056 // been read yet, then there's nothing we need to do 2057 if (currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead()) { 2058 return false; 2059 } 2060 2061 // 1. if currentEntryIndex < entryIndex : 2062 // this means there are some entries to be skipped(currentEntryIndex < entryIndex) 2063 // 2. if currentEntryIndex > entryIndex || (currentEntryIndex == entryIndex && hasCurrentEntryBeenRead) : 2064 // this means the entry has already been read before, and we need to reopen the 2065 // stream of the folder and skip all the entries before the current entries 2066 int filesToSkipStartIndex = archive.streamMap.folderFirstFileIndex[currentFolderIndex]; 2067 if (isInSameFolder) { 2068 if (currentEntryIndex < entryIndex) { 2069 // the entries between filesToSkipStartIndex and currentEntryIndex had already been skipped 2070 filesToSkipStartIndex = currentEntryIndex + 1; 2071 } else { 2072 // the entry is in the same folder of current entry, but it has already been read before, we need to reset 2073 // the position of the currentFolderInputStream to the beginning of folder, and then skip the files 2074 // from the start entry of the folder again 2075 reopenFolderInputStream(folderIndex, file); 2076 } 2077 } 2078 2079 for (int i = filesToSkipStartIndex; i < entryIndex; i++) { 2080 final SevenZArchiveEntry fileToSkip = archive.files[i]; 2081 InputStream fileStreamToSkip = new BoundedInputStream(currentFolderInputStream, fileToSkip.getSize()); 2082 if (fileToSkip.getHasCrc()) { 2083 fileStreamToSkip = new CRC32VerifyingInputStream(fileStreamToSkip, fileToSkip.getSize(), fileToSkip.getCrcValue()); 2084 } 2085 deferredBlockStreams.add(fileStreamToSkip); 2086 2087 // set the content methods as well, it equals to file.getContentMethods() because they are in same folder 2088 fileToSkip.setContentMethods(file.getContentMethods()); 2089 } 2090 return true; 2091 } 2092 2093 @Override 2094 public String toString() { 2095 return archive.toString(); 2096 } 2097 2098 private Archive tryToLocateEndHeader(final byte[] password) throws IOException { 2099 final ByteBuffer nidBuf = ByteBuffer.allocate(1); 2100 final long searchLimit = 1024L * 1024 * 1; 2101 // Main header, plus bytes that readStartHeader would read 2102 final long previousDataSize = channel.position() + 20; 2103 final long minPos; 2104 // Determine minimal position - can't start before current position 2105 if (channel.position() + searchLimit > channel.size()) { 2106 minPos = channel.position(); 2107 } else { 2108 minPos = channel.size() - searchLimit; 2109 } 2110 long pos = channel.size() - 1; 2111 // Loop: Try from end of archive 2112 while (pos > minPos) { 2113 pos--; 2114 channel.position(pos); 2115 nidBuf.rewind(); 2116 if (channel.read(nidBuf) < 1) { 2117 throw new EOFException(); 2118 } 2119 final int nid = nidBuf.array()[0]; 2120 // First indicator: Byte equals one of these header identifiers 2121 if (nid == NID.kEncodedHeader || nid == NID.kHeader) { 2122 try { 2123 // Try to initialize Archive structure from here 2124 final StartHeader startHeader = new StartHeader(); 2125 startHeader.nextHeaderOffset = pos - previousDataSize; 2126 startHeader.nextHeaderSize = channel.size() - pos; 2127 final Archive result = initializeArchive(startHeader, password, false); 2128 // Sanity check: There must be some data... 2129 if (result.packSizes.length > 0 && result.files.length > 0) { 2130 return result; 2131 } 2132 } catch (final Exception ignore) { 2133 // Wrong guess... 2134 } 2135 } 2136 } 2137 throw new IOException("Start header corrupt and unable to guess end header"); 2138 } 2139}