001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers; 020 021import java.io.ByteArrayInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.OutputStream; 025import java.security.AccessController; 026import java.security.PrivilegedAction; 027import java.util.Collections; 028import java.util.Locale; 029import java.util.ServiceLoader; 030import java.util.Set; 031import java.util.SortedMap; 032import java.util.TreeMap; 033 034import org.apache.commons.compress.archivers.ar.ArArchiveInputStream; 035import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream; 036import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream; 037import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; 038import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream; 039import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream; 040import org.apache.commons.compress.archivers.jar.JarArchiveInputStream; 041import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream; 042import org.apache.commons.compress.archivers.sevenz.SevenZFile; 043import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; 044import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; 045import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; 046import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; 047import org.apache.commons.compress.utils.IOUtils; 048import org.apache.commons.compress.utils.Sets; 049 050/** 051 * Factory to create Archive[In|Out]putStreams from names or the first bytes of 052 * the InputStream. In order to add other implementations, you should extend 053 * ArchiveStreamFactory and override the appropriate methods (and call their 054 * implementation from super of course). 055 * 056 * Compressing a ZIP-File: 057 * 058 * <pre> 059 * final OutputStream out = Files.newOutputStream(output.toPath()); 060 * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out); 061 * 062 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml")); 063 * IOUtils.copy(Files.newInputStream(file1.toPath()), os); 064 * os.closeArchiveEntry(); 065 * 066 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml")); 067 * IOUtils.copy(Files.newInputStream(file2.toPath()), os); 068 * os.closeArchiveEntry(); 069 * os.close(); 070 * </pre> 071 * 072 * Decompressing a ZIP-File: 073 * 074 * <pre> 075 * final InputStream is = Files.newInputStream(input.toPath()); 076 * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is); 077 * ZipArchiveEntry entry = (ZipArchiveEntry)in.getNextEntry(); 078 * OutputStream out = Files.newOutputStream(dir.toPath().resolve(entry.getName())); 079 * IOUtils.copy(in, out); 080 * out.close(); 081 * in.close(); 082 * </pre> 083 * @Immutable provided that the deprecated method setEntryEncoding is not used. 084 * @ThreadSafe even if the deprecated method setEntryEncoding is used 085 */ 086public class ArchiveStreamFactory implements ArchiveStreamProvider { 087 088 private static final int TAR_HEADER_SIZE = 512; 089 090 private static final int DUMP_SIGNATURE_SIZE = 32; 091 092 private static final int SIGNATURE_SIZE = 12; 093 094 /** 095 * The singleton instance using the platform default encoding. 096 * @since 1.21 097 */ 098 public static final ArchiveStreamFactory DEFAULT = new ArchiveStreamFactory(); 099 100 /** 101 * Constant (value {@value}) used to identify the APK archive format. 102 * <p> 103 * APK file extensions are .apk, .xapk, .apks, .apkm 104 * </p> 105 * 106 * @since 1.22 107 */ 108 public static final String APK = "apk"; 109 110 /** 111 * Constant (value {@value}) used to identify the XAPK archive format. 112 * <p> 113 * APK file extensions are .apk, .xapk, .apks, .apkm 114 * </p> 115 * 116 * @since 1.22 117 */ 118 public static final String XAPK = "xapk"; 119 120 /** 121 * Constant (value {@value}) used to identify the APKS archive format. 122 * <p> 123 * APK file extensions are .apk, .xapk, .apks, .apkm 124 * </p> 125 * 126 * @since 1.22 127 */ 128 public static final String APKS = "apks"; 129 130 /** 131 * Constant (value {@value}) used to identify the APKM archive format. 132 * <p> 133 * APK file extensions are .apk, .xapk, .apks, .apkm 134 * </p> 135 * 136 * @since 1.22 137 */ 138 public static final String APKM = "apkm"; 139 140 /** 141 * Constant (value {@value}) used to identify the AR archive format. 142 * @since 1.1 143 */ 144 public static final String AR = "ar"; 145 146 /** 147 * Constant (value {@value}) used to identify the ARJ archive format. 148 * Not supported as an output stream type. 149 * @since 1.6 150 */ 151 public static final String ARJ = "arj"; 152 153 /** 154 * Constant (value {@value}) used to identify the CPIO archive format. 155 * @since 1.1 156 */ 157 public static final String CPIO = "cpio"; 158 159 /** 160 * Constant (value {@value}) used to identify the Unix DUMP archive format. 161 * Not supported as an output stream type. 162 * @since 1.3 163 */ 164 public static final String DUMP = "dump"; 165 166 /** 167 * Constant (value {@value}) used to identify the JAR archive format. 168 * @since 1.1 169 */ 170 public static final String JAR = "jar"; 171 172 /** 173 * Constant used to identify the TAR archive format. 174 * @since 1.1 175 */ 176 public static final String TAR = "tar"; 177 178 /** 179 * Constant (value {@value}) used to identify the ZIP archive format. 180 * @since 1.1 181 */ 182 public static final String ZIP = "zip"; 183 184 /** 185 * Constant (value {@value}) used to identify the 7z archive format. 186 * @since 1.8 187 */ 188 public static final String SEVEN_Z = "7z"; 189 190 private static Iterable<ArchiveStreamProvider> archiveStreamProviderIterable() { 191 return ServiceLoader.load(ArchiveStreamProvider.class, ClassLoader.getSystemClassLoader()); 192 } 193 194 /** 195 * Try to determine the type of Archiver 196 * @param in input stream 197 * @return type of archiver if found 198 * @throws ArchiveException if an archiver cannot be detected in the stream 199 * @since 1.14 200 */ 201 public static String detect(final InputStream in) throws ArchiveException { 202 if (in == null) { 203 throw new IllegalArgumentException("Stream must not be null."); 204 } 205 206 if (!in.markSupported()) { 207 throw new IllegalArgumentException("Mark is not supported."); 208 } 209 210 final byte[] signature = new byte[SIGNATURE_SIZE]; 211 in.mark(signature.length); 212 int signatureLength = -1; 213 try { 214 signatureLength = IOUtils.readFully(in, signature); 215 in.reset(); 216 } catch (final IOException e) { 217 throw new ArchiveException("IOException while reading signature.", e); 218 } 219 220 if (ZipArchiveInputStream.matches(signature, signatureLength)) { 221 return ZIP; 222 } 223 if (JarArchiveInputStream.matches(signature, signatureLength)) { 224 return JAR; 225 } 226 if (ArArchiveInputStream.matches(signature, signatureLength)) { 227 return AR; 228 } 229 if (CpioArchiveInputStream.matches(signature, signatureLength)) { 230 return CPIO; 231 } 232 if (ArjArchiveInputStream.matches(signature, signatureLength)) { 233 return ARJ; 234 } 235 if (SevenZFile.matches(signature, signatureLength)) { 236 return SEVEN_Z; 237 } 238 239 // Dump needs a bigger buffer to check the signature; 240 final byte[] dumpsig = new byte[DUMP_SIGNATURE_SIZE]; 241 in.mark(dumpsig.length); 242 try { 243 signatureLength = IOUtils.readFully(in, dumpsig); 244 in.reset(); 245 } catch (final IOException e) { 246 throw new ArchiveException("IOException while reading dump signature", e); 247 } 248 if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) { 249 return DUMP; 250 } 251 252 // Tar needs an even bigger buffer to check the signature; read the first block 253 final byte[] tarHeader = new byte[TAR_HEADER_SIZE]; 254 in.mark(tarHeader.length); 255 try { 256 signatureLength = IOUtils.readFully(in, tarHeader); 257 in.reset(); 258 } catch (final IOException e) { 259 throw new ArchiveException("IOException while reading tar signature", e); 260 } 261 if (TarArchiveInputStream.matches(tarHeader, signatureLength)) { 262 return TAR; 263 } 264 265 // COMPRESS-117 - improve auto-recognition 266 if (signatureLength >= TAR_HEADER_SIZE) { 267 TarArchiveInputStream tais = null; 268 try { 269 tais = new TarArchiveInputStream(new ByteArrayInputStream(tarHeader)); 270 // COMPRESS-191 - verify the header checksum 271 if (tais.getNextTarEntry().isCheckSumOK()) { 272 return TAR; 273 } 274 } catch (final Exception e) { // NOPMD NOSONAR 275 // can generate IllegalArgumentException as well 276 // as IOException 277 // autodetection, simply not a TAR 278 // ignored 279 } finally { 280 IOUtils.closeQuietly(tais); 281 } 282 } 283 throw new ArchiveException("No Archiver found for the stream signature"); 284 } 285 286 /** 287 * Constructs a new sorted map from input stream provider names to provider 288 * objects. 289 * 290 * <p> 291 * The map returned by this method will have one entry for each provider for 292 * which support is available in the current Java virtual machine. If two or 293 * more supported provider have the same name then the resulting map will 294 * contain just one of them; which one it will contain is not specified. 295 * </p> 296 * 297 * <p> 298 * The invocation of this method, and the subsequent use of the resulting 299 * map, may cause time-consuming disk or network I/O operations to occur. 300 * This method is provided for applications that need to enumerate all of 301 * the available providers, for example to allow user provider selection. 302 * </p> 303 * 304 * <p> 305 * This method may return different results at different times if new 306 * providers are dynamically made available to the current Java virtual 307 * machine. 308 * </p> 309 * 310 * @return An immutable, map from names to provider objects 311 * @since 1.13 312 */ 313 public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveInputStreamProviders() { 314 return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> { 315 final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>(); 316 putAll(DEFAULT.getInputStreamArchiveNames(), DEFAULT, map); 317 archiveStreamProviderIterable().forEach(provider -> putAll(provider.getInputStreamArchiveNames(), provider, map)); 318 return map; 319 }); 320 } 321 322 /** 323 * Constructs a new sorted map from output stream provider names to provider 324 * objects. 325 * 326 * <p> 327 * The map returned by this method will have one entry for each provider for 328 * which support is available in the current Java virtual machine. If two or 329 * more supported provider have the same name then the resulting map will 330 * contain just one of them; which one it will contain is not specified. 331 * </p> 332 * 333 * <p> 334 * The invocation of this method, and the subsequent use of the resulting 335 * map, may cause time-consuming disk or network I/O operations to occur. 336 * This method is provided for applications that need to enumerate all of 337 * the available providers, for example to allow user provider selection. 338 * </p> 339 * 340 * <p> 341 * This method may return different results at different times if new 342 * providers are dynamically made available to the current Java virtual 343 * machine. 344 * </p> 345 * 346 * @return An immutable, map from names to provider objects 347 * @since 1.13 348 */ 349 public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveOutputStreamProviders() { 350 return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> { 351 final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>(); 352 putAll(DEFAULT.getOutputStreamArchiveNames(), DEFAULT, map); 353 archiveStreamProviderIterable().forEach(provider -> putAll(provider.getOutputStreamArchiveNames(), provider, map)); 354 return map; 355 }); 356 } 357 358 static void putAll(final Set<String> names, final ArchiveStreamProvider provider, final TreeMap<String, ArchiveStreamProvider> map) { 359 names.forEach(name -> map.put(toKey(name), provider)); 360 } 361 362 private static String toKey(final String name) { 363 return name.toUpperCase(Locale.ROOT); 364 } 365 366 /** 367 * Entry encoding, null for the platform default. 368 */ 369 private final String encoding; 370 371 /** 372 * Entry encoding, null for the default. 373 */ 374 private volatile String entryEncoding; 375 376 private SortedMap<String, ArchiveStreamProvider> archiveInputStreamProviders; 377 378 private SortedMap<String, ArchiveStreamProvider> archiveOutputStreamProviders; 379 380 /** 381 * Create an instance using the platform default encoding. 382 */ 383 public ArchiveStreamFactory() { 384 this(null); 385 } 386 387 /** 388 * Create an instance using the specified encoding. 389 * 390 * @param encoding the encoding to be used. 391 * 392 * @since 1.10 393 */ 394 public ArchiveStreamFactory(final String encoding) { 395 this.encoding = encoding; 396 // Also set the original field so can continue to use it. 397 this.entryEncoding = encoding; 398 } 399 400 /** 401 * Create an archive input stream from an input stream, autodetecting 402 * the archive type from the first few bytes of the stream. The InputStream 403 * must support marks, like BufferedInputStream. 404 * 405 * @param in the input stream 406 * @return the archive input stream 407 * @throws ArchiveException if the archiver name is not known 408 * @throws StreamingNotSupportedException if the format cannot be 409 * read from a stream 410 * @throws IllegalArgumentException if the stream is null or does not support mark 411 */ 412 public ArchiveInputStream createArchiveInputStream(final InputStream in) 413 throws ArchiveException { 414 return createArchiveInputStream(detect(in), in); 415 } 416 417 /** 418 * Creates an archive input stream from an archiver name and an input stream. 419 * 420 * @param archiverName the archive name, 421 * i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or {@value #SEVEN_Z} 422 * @param in the input stream 423 * @return the archive input stream 424 * @throws ArchiveException if the archiver name is not known 425 * @throws StreamingNotSupportedException if the format cannot be 426 * read from a stream 427 * @throws IllegalArgumentException if the archiver name or stream is null 428 */ 429 public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in) throws ArchiveException { 430 return createArchiveInputStream(archiverName, in, entryEncoding); 431 } 432 433 @Override 434 public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in, 435 final String actualEncoding) throws ArchiveException { 436 437 if (archiverName == null) { 438 throw new IllegalArgumentException("Archivername must not be null."); 439 } 440 441 if (in == null) { 442 throw new IllegalArgumentException("InputStream must not be null."); 443 } 444 445 if (AR.equalsIgnoreCase(archiverName)) { 446 return new ArArchiveInputStream(in); 447 } 448 if (ARJ.equalsIgnoreCase(archiverName)) { 449 if (actualEncoding != null) { 450 return new ArjArchiveInputStream(in, actualEncoding); 451 } 452 return new ArjArchiveInputStream(in); 453 } 454 if (ZIP.equalsIgnoreCase(archiverName)) { 455 if (actualEncoding != null) { 456 return new ZipArchiveInputStream(in, actualEncoding); 457 } 458 return new ZipArchiveInputStream(in); 459 } 460 if (TAR.equalsIgnoreCase(archiverName)) { 461 if (actualEncoding != null) { 462 return new TarArchiveInputStream(in, actualEncoding); 463 } 464 return new TarArchiveInputStream(in); 465 } 466 if (JAR.equalsIgnoreCase(archiverName) || APK.equalsIgnoreCase(archiverName)) { 467 if (actualEncoding != null) { 468 return new JarArchiveInputStream(in, actualEncoding); 469 } 470 return new JarArchiveInputStream(in); 471 } 472 if (CPIO.equalsIgnoreCase(archiverName)) { 473 if (actualEncoding != null) { 474 return new CpioArchiveInputStream(in, actualEncoding); 475 } 476 return new CpioArchiveInputStream(in); 477 } 478 if (DUMP.equalsIgnoreCase(archiverName)) { 479 if (actualEncoding != null) { 480 return new DumpArchiveInputStream(in, actualEncoding); 481 } 482 return new DumpArchiveInputStream(in); 483 } 484 if (SEVEN_Z.equalsIgnoreCase(archiverName)) { 485 throw new StreamingNotSupportedException(SEVEN_Z); 486 } 487 488 final ArchiveStreamProvider archiveStreamProvider = getArchiveInputStreamProviders().get(toKey(archiverName)); 489 if (archiveStreamProvider != null) { 490 return archiveStreamProvider.createArchiveInputStream(archiverName, in, actualEncoding); 491 } 492 493 throw new ArchiveException("Archiver: " + archiverName + " not found."); 494 } 495 496 /** 497 * Creates an archive output stream from an archiver name and an output stream. 498 * 499 * @param archiverName the archive name, 500 * i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO} 501 * @param out the output stream 502 * @return the archive output stream 503 * @throws ArchiveException if the archiver name is not known 504 * @throws StreamingNotSupportedException if the format cannot be 505 * written to a stream 506 * @throws IllegalArgumentException if the archiver name or stream is null 507 */ 508 public ArchiveOutputStream createArchiveOutputStream(final String archiverName, final OutputStream out) 509 throws ArchiveException { 510 return createArchiveOutputStream(archiverName, out, entryEncoding); 511 } 512 513 @Override 514 public ArchiveOutputStream createArchiveOutputStream( 515 final String archiverName, final OutputStream out, final String actualEncoding) 516 throws ArchiveException { 517 if (archiverName == null) { 518 throw new IllegalArgumentException("Archivername must not be null."); 519 } 520 if (out == null) { 521 throw new IllegalArgumentException("OutputStream must not be null."); 522 } 523 524 if (AR.equalsIgnoreCase(archiverName)) { 525 return new ArArchiveOutputStream(out); 526 } 527 if (ZIP.equalsIgnoreCase(archiverName)) { 528 final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out); 529 if (actualEncoding != null) { 530 zip.setEncoding(actualEncoding); 531 } 532 return zip; 533 } 534 if (TAR.equalsIgnoreCase(archiverName)) { 535 if (actualEncoding != null) { 536 return new TarArchiveOutputStream(out, actualEncoding); 537 } 538 return new TarArchiveOutputStream(out); 539 } 540 if (JAR.equalsIgnoreCase(archiverName)) { 541 if (actualEncoding != null) { 542 return new JarArchiveOutputStream(out, actualEncoding); 543 } 544 return new JarArchiveOutputStream(out); 545 } 546 if (CPIO.equalsIgnoreCase(archiverName)) { 547 if (actualEncoding != null) { 548 return new CpioArchiveOutputStream(out, actualEncoding); 549 } 550 return new CpioArchiveOutputStream(out); 551 } 552 if (SEVEN_Z.equalsIgnoreCase(archiverName)) { 553 throw new StreamingNotSupportedException(SEVEN_Z); 554 } 555 556 final ArchiveStreamProvider archiveStreamProvider = getArchiveOutputStreamProviders().get(toKey(archiverName)); 557 if (archiveStreamProvider != null) { 558 return archiveStreamProvider.createArchiveOutputStream(archiverName, out, actualEncoding); 559 } 560 561 throw new ArchiveException("Archiver: " + archiverName + " not found."); 562 } 563 564 public SortedMap<String, ArchiveStreamProvider> getArchiveInputStreamProviders() { 565 if (archiveInputStreamProviders == null) { 566 archiveInputStreamProviders = Collections 567 .unmodifiableSortedMap(findAvailableArchiveInputStreamProviders()); 568 } 569 return archiveInputStreamProviders; 570 } 571 572 public SortedMap<String, ArchiveStreamProvider> getArchiveOutputStreamProviders() { 573 if (archiveOutputStreamProviders == null) { 574 archiveOutputStreamProviders = Collections 575 .unmodifiableSortedMap(findAvailableArchiveOutputStreamProviders()); 576 } 577 return archiveOutputStreamProviders; 578 } 579 580 /** 581 * Gets the encoding to use for arj, jar, ZIP, dump, cpio and tar 582 * files, or null for the archiver default. 583 * 584 * @return entry encoding, or null for the archiver default 585 * @since 1.5 586 */ 587 public String getEntryEncoding() { 588 return entryEncoding; 589 } 590 591 @Override 592 public Set<String> getInputStreamArchiveNames() { 593 return Sets.newHashSet(AR, ARJ, ZIP, TAR, JAR, CPIO, DUMP, SEVEN_Z); 594 } 595 596 @Override 597 public Set<String> getOutputStreamArchiveNames() { 598 return Sets.newHashSet(AR, ZIP, TAR, JAR, CPIO, SEVEN_Z); 599 } 600 601 /** 602 * Sets the encoding to use for arj, jar, ZIP, dump, cpio and tar files. Use null for the archiver default. 603 * 604 * @param entryEncoding the entry encoding, null uses the archiver default. 605 * @since 1.5 606 * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding 607 * @throws IllegalStateException if the constructor {@link #ArchiveStreamFactory(String)} 608 * was used to specify the factory encoding. 609 */ 610 @Deprecated 611 public void setEntryEncoding(final String entryEncoding) { 612 // Note: this does not detect new ArchiveStreamFactory(null) but that does not set the encoding anyway 613 if (encoding != null) { 614 throw new IllegalStateException("Cannot overide encoding set by the constructor"); 615 } 616 this.entryEncoding = entryEncoding; 617 } 618 619}