001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers;
020
021import java.io.ByteArrayInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.OutputStream;
025import java.security.AccessController;
026import java.security.PrivilegedAction;
027import java.util.Collections;
028import java.util.Locale;
029import java.util.ServiceLoader;
030import java.util.Set;
031import java.util.SortedMap;
032import java.util.TreeMap;
033
034import org.apache.commons.compress.archivers.ar.ArArchiveInputStream;
035import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream;
036import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream;
037import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream;
038import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream;
039import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream;
040import org.apache.commons.compress.archivers.jar.JarArchiveInputStream;
041import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream;
042import org.apache.commons.compress.archivers.sevenz.SevenZFile;
043import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
044import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
045import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
046import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
047import org.apache.commons.compress.utils.IOUtils;
048import org.apache.commons.compress.utils.Sets;
049
050/**
051 * Factory to create Archive[In|Out]putStreams from names or the first bytes of
052 * the InputStream. In order to add other implementations, you should extend
053 * ArchiveStreamFactory and override the appropriate methods (and call their
054 * implementation from super of course).
055 *
056 * Compressing a ZIP-File:
057 *
058 * <pre>
059 * final OutputStream out = Files.newOutputStream(output.toPath());
060 * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out);
061 *
062 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml"));
063 * IOUtils.copy(Files.newInputStream(file1.toPath()), os);
064 * os.closeArchiveEntry();
065 *
066 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml"));
067 * IOUtils.copy(Files.newInputStream(file2.toPath()), os);
068 * os.closeArchiveEntry();
069 * os.close();
070 * </pre>
071 *
072 * Decompressing a ZIP-File:
073 *
074 * <pre>
075 * final InputStream is = Files.newInputStream(input.toPath());
076 * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is);
077 * ZipArchiveEntry entry = (ZipArchiveEntry)in.getNextEntry();
078 * OutputStream out = Files.newOutputStream(dir.toPath().resolve(entry.getName()));
079 * IOUtils.copy(in, out);
080 * out.close();
081 * in.close();
082 * </pre>
083 * @Immutable provided that the deprecated method setEntryEncoding is not used.
084 * @ThreadSafe even if the deprecated method setEntryEncoding is used
085 */
086public class ArchiveStreamFactory implements ArchiveStreamProvider {
087
088    private static final int TAR_HEADER_SIZE = 512;
089
090    private static final int DUMP_SIGNATURE_SIZE = 32;
091
092    private static final int SIGNATURE_SIZE = 12;
093
094    /**
095     * The singleton instance using the platform default encoding.
096     * @since 1.21
097     */
098    public static final ArchiveStreamFactory DEFAULT = new ArchiveStreamFactory();
099
100    /**
101     * Constant (value {@value}) used to identify the APK archive format.
102     * <p>
103     * APK file extensions are .apk, .xapk, .apks, .apkm
104     * </p>
105     *
106     * @since 1.22
107     */
108    public static final String APK = "apk";
109
110    /**
111     * Constant (value {@value}) used to identify the XAPK archive format.
112     * <p>
113     * APK file extensions are .apk, .xapk, .apks, .apkm
114     * </p>
115     *
116     * @since 1.22
117     */
118    public static final String XAPK = "xapk";
119
120    /**
121     * Constant (value {@value}) used to identify the APKS archive format.
122     * <p>
123     * APK file extensions are .apk, .xapk, .apks, .apkm
124     * </p>
125     *
126     * @since 1.22
127     */
128    public static final String APKS = "apks";
129
130    /**
131     * Constant (value {@value}) used to identify the APKM archive format.
132     * <p>
133     * APK file extensions are .apk, .xapk, .apks, .apkm
134     * </p>
135     *
136     * @since 1.22
137     */
138    public static final String APKM = "apkm";
139
140    /**
141     * Constant (value {@value}) used to identify the AR archive format.
142     * @since 1.1
143     */
144    public static final String AR = "ar";
145
146    /**
147     * Constant (value {@value}) used to identify the ARJ archive format.
148     * Not supported as an output stream type.
149     * @since 1.6
150     */
151    public static final String ARJ = "arj";
152
153    /**
154     * Constant (value {@value}) used to identify the CPIO archive format.
155     * @since 1.1
156     */
157    public static final String CPIO = "cpio";
158
159    /**
160     * Constant (value {@value}) used to identify the Unix DUMP archive format.
161     * Not supported as an output stream type.
162     * @since 1.3
163     */
164    public static final String DUMP = "dump";
165
166    /**
167     * Constant (value {@value}) used to identify the JAR archive format.
168     * @since 1.1
169     */
170    public static final String JAR = "jar";
171
172    /**
173     * Constant used to identify the TAR archive format.
174     * @since 1.1
175     */
176    public static final String TAR = "tar";
177
178    /**
179     * Constant (value {@value}) used to identify the ZIP archive format.
180     * @since 1.1
181     */
182    public static final String ZIP = "zip";
183
184    /**
185     * Constant (value {@value}) used to identify the 7z archive format.
186     * @since 1.8
187     */
188    public static final String SEVEN_Z = "7z";
189
190    private static Iterable<ArchiveStreamProvider> archiveStreamProviderIterable() {
191        return ServiceLoader.load(ArchiveStreamProvider.class, ClassLoader.getSystemClassLoader());
192    }
193
194    /**
195     * Try to determine the type of Archiver
196     * @param in input stream
197     * @return type of archiver if found
198     * @throws ArchiveException if an archiver cannot be detected in the stream
199     * @since 1.14
200     */
201    public static String detect(final InputStream in) throws ArchiveException {
202        if (in == null) {
203            throw new IllegalArgumentException("Stream must not be null.");
204        }
205
206        if (!in.markSupported()) {
207            throw new IllegalArgumentException("Mark is not supported.");
208        }
209
210        final byte[] signature = new byte[SIGNATURE_SIZE];
211        in.mark(signature.length);
212        int signatureLength = -1;
213        try {
214            signatureLength = IOUtils.readFully(in, signature);
215            in.reset();
216        } catch (final IOException e) {
217            throw new ArchiveException("IOException while reading signature.", e);
218        }
219
220        if (ZipArchiveInputStream.matches(signature, signatureLength)) {
221            return ZIP;
222        }
223        if (JarArchiveInputStream.matches(signature, signatureLength)) {
224            return JAR;
225        }
226        if (ArArchiveInputStream.matches(signature, signatureLength)) {
227            return AR;
228        }
229        if (CpioArchiveInputStream.matches(signature, signatureLength)) {
230            return CPIO;
231        }
232        if (ArjArchiveInputStream.matches(signature, signatureLength)) {
233            return ARJ;
234        }
235        if (SevenZFile.matches(signature, signatureLength)) {
236            return SEVEN_Z;
237        }
238
239        // Dump needs a bigger buffer to check the signature;
240        final byte[] dumpsig = new byte[DUMP_SIGNATURE_SIZE];
241        in.mark(dumpsig.length);
242        try {
243            signatureLength = IOUtils.readFully(in, dumpsig);
244            in.reset();
245        } catch (final IOException e) {
246            throw new ArchiveException("IOException while reading dump signature", e);
247        }
248        if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) {
249            return DUMP;
250        }
251
252        // Tar needs an even bigger buffer to check the signature; read the first block
253        final byte[] tarHeader = new byte[TAR_HEADER_SIZE];
254        in.mark(tarHeader.length);
255        try {
256            signatureLength = IOUtils.readFully(in, tarHeader);
257            in.reset();
258        } catch (final IOException e) {
259            throw new ArchiveException("IOException while reading tar signature", e);
260        }
261        if (TarArchiveInputStream.matches(tarHeader, signatureLength)) {
262            return TAR;
263        }
264
265        // COMPRESS-117 - improve auto-recognition
266        if (signatureLength >= TAR_HEADER_SIZE) {
267            TarArchiveInputStream tais = null;
268            try {
269                tais = new TarArchiveInputStream(new ByteArrayInputStream(tarHeader));
270                // COMPRESS-191 - verify the header checksum
271                if (tais.getNextTarEntry().isCheckSumOK()) {
272                    return TAR;
273                }
274            } catch (final Exception e) { // NOPMD NOSONAR
275                // can generate IllegalArgumentException as well
276                // as IOException
277                // autodetection, simply not a TAR
278                // ignored
279            } finally {
280                IOUtils.closeQuietly(tais);
281            }
282        }
283        throw new ArchiveException("No Archiver found for the stream signature");
284    }
285
286    /**
287     * Constructs a new sorted map from input stream provider names to provider
288     * objects.
289     *
290     * <p>
291     * The map returned by this method will have one entry for each provider for
292     * which support is available in the current Java virtual machine. If two or
293     * more supported provider have the same name then the resulting map will
294     * contain just one of them; which one it will contain is not specified.
295     * </p>
296     *
297     * <p>
298     * The invocation of this method, and the subsequent use of the resulting
299     * map, may cause time-consuming disk or network I/O operations to occur.
300     * This method is provided for applications that need to enumerate all of
301     * the available providers, for example to allow user provider selection.
302     * </p>
303     *
304     * <p>
305     * This method may return different results at different times if new
306     * providers are dynamically made available to the current Java virtual
307     * machine.
308     * </p>
309     *
310     * @return An immutable, map from names to provider objects
311     * @since 1.13
312     */
313    public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveInputStreamProviders() {
314        return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> {
315            final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
316            putAll(DEFAULT.getInputStreamArchiveNames(), DEFAULT, map);
317            archiveStreamProviderIterable().forEach(provider -> putAll(provider.getInputStreamArchiveNames(), provider, map));
318            return map;
319        });
320    }
321
322    /**
323     * Constructs a new sorted map from output stream provider names to provider
324     * objects.
325     *
326     * <p>
327     * The map returned by this method will have one entry for each provider for
328     * which support is available in the current Java virtual machine. If two or
329     * more supported provider have the same name then the resulting map will
330     * contain just one of them; which one it will contain is not specified.
331     * </p>
332     *
333     * <p>
334     * The invocation of this method, and the subsequent use of the resulting
335     * map, may cause time-consuming disk or network I/O operations to occur.
336     * This method is provided for applications that need to enumerate all of
337     * the available providers, for example to allow user provider selection.
338     * </p>
339     *
340     * <p>
341     * This method may return different results at different times if new
342     * providers are dynamically made available to the current Java virtual
343     * machine.
344     * </p>
345     *
346     * @return An immutable, map from names to provider objects
347     * @since 1.13
348     */
349    public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveOutputStreamProviders() {
350        return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> {
351            final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
352            putAll(DEFAULT.getOutputStreamArchiveNames(), DEFAULT, map);
353            archiveStreamProviderIterable().forEach(provider -> putAll(provider.getOutputStreamArchiveNames(), provider, map));
354            return map;
355        });
356    }
357
358    static void putAll(final Set<String> names, final ArchiveStreamProvider provider, final TreeMap<String, ArchiveStreamProvider> map) {
359        names.forEach(name -> map.put(toKey(name), provider));
360    }
361
362    private static String toKey(final String name) {
363        return name.toUpperCase(Locale.ROOT);
364    }
365
366    /**
367     * Entry encoding, null for the platform default.
368     */
369    private final String encoding;
370
371    /**
372     * Entry encoding, null for the default.
373     */
374    private volatile String entryEncoding;
375
376    private SortedMap<String, ArchiveStreamProvider> archiveInputStreamProviders;
377
378    private SortedMap<String, ArchiveStreamProvider> archiveOutputStreamProviders;
379
380    /**
381     * Create an instance using the platform default encoding.
382     */
383    public ArchiveStreamFactory() {
384        this(null);
385    }
386
387    /**
388     * Create an instance using the specified encoding.
389     *
390     * @param encoding the encoding to be used.
391     *
392     * @since 1.10
393     */
394    public ArchiveStreamFactory(final String encoding) {
395        this.encoding = encoding;
396        // Also set the original field so can continue to use it.
397        this.entryEncoding = encoding;
398    }
399
400    /**
401     * Create an archive input stream from an input stream, autodetecting
402     * the archive type from the first few bytes of the stream. The InputStream
403     * must support marks, like BufferedInputStream.
404     *
405     * @param in the input stream
406     * @return the archive input stream
407     * @throws ArchiveException if the archiver name is not known
408     * @throws StreamingNotSupportedException if the format cannot be
409     * read from a stream
410     * @throws IllegalArgumentException if the stream is null or does not support mark
411     */
412    public ArchiveInputStream createArchiveInputStream(final InputStream in)
413            throws ArchiveException {
414        return createArchiveInputStream(detect(in), in);
415    }
416
417    /**
418     * Creates an archive input stream from an archiver name and an input stream.
419     *
420     * @param archiverName the archive name,
421     * i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or {@value #SEVEN_Z}
422     * @param in the input stream
423     * @return the archive input stream
424     * @throws ArchiveException if the archiver name is not known
425     * @throws StreamingNotSupportedException if the format cannot be
426     * read from a stream
427     * @throws IllegalArgumentException if the archiver name or stream is null
428     */
429    public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in) throws ArchiveException {
430        return createArchiveInputStream(archiverName, in, entryEncoding);
431    }
432
433    @Override
434    public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in,
435            final String actualEncoding) throws ArchiveException {
436
437        if (archiverName == null) {
438            throw new IllegalArgumentException("Archivername must not be null.");
439        }
440
441        if (in == null) {
442            throw new IllegalArgumentException("InputStream must not be null.");
443        }
444
445        if (AR.equalsIgnoreCase(archiverName)) {
446            return new ArArchiveInputStream(in);
447        }
448        if (ARJ.equalsIgnoreCase(archiverName)) {
449            if (actualEncoding != null) {
450                return new ArjArchiveInputStream(in, actualEncoding);
451            }
452            return new ArjArchiveInputStream(in);
453        }
454        if (ZIP.equalsIgnoreCase(archiverName)) {
455            if (actualEncoding != null) {
456                return new ZipArchiveInputStream(in, actualEncoding);
457            }
458            return new ZipArchiveInputStream(in);
459        }
460        if (TAR.equalsIgnoreCase(archiverName)) {
461            if (actualEncoding != null) {
462                return new TarArchiveInputStream(in, actualEncoding);
463            }
464            return new TarArchiveInputStream(in);
465        }
466        if (JAR.equalsIgnoreCase(archiverName) || APK.equalsIgnoreCase(archiverName)) {
467            if (actualEncoding != null) {
468                return new JarArchiveInputStream(in, actualEncoding);
469            }
470            return new JarArchiveInputStream(in);
471        }
472        if (CPIO.equalsIgnoreCase(archiverName)) {
473            if (actualEncoding != null) {
474                return new CpioArchiveInputStream(in, actualEncoding);
475            }
476            return new CpioArchiveInputStream(in);
477        }
478        if (DUMP.equalsIgnoreCase(archiverName)) {
479            if (actualEncoding != null) {
480                return new DumpArchiveInputStream(in, actualEncoding);
481            }
482            return new DumpArchiveInputStream(in);
483        }
484        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
485            throw new StreamingNotSupportedException(SEVEN_Z);
486        }
487
488        final ArchiveStreamProvider archiveStreamProvider = getArchiveInputStreamProviders().get(toKey(archiverName));
489        if (archiveStreamProvider != null) {
490            return archiveStreamProvider.createArchiveInputStream(archiverName, in, actualEncoding);
491        }
492
493        throw new ArchiveException("Archiver: " + archiverName + " not found.");
494    }
495
496    /**
497     * Creates an archive output stream from an archiver name and an output stream.
498     *
499     * @param archiverName the archive name,
500     * i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO}
501     * @param out the output stream
502     * @return the archive output stream
503     * @throws ArchiveException if the archiver name is not known
504     * @throws StreamingNotSupportedException if the format cannot be
505     * written to a stream
506     * @throws IllegalArgumentException if the archiver name or stream is null
507     */
508    public ArchiveOutputStream createArchiveOutputStream(final String archiverName, final OutputStream out)
509            throws ArchiveException {
510        return createArchiveOutputStream(archiverName, out, entryEncoding);
511    }
512
513    @Override
514    public ArchiveOutputStream createArchiveOutputStream(
515            final String archiverName, final OutputStream out, final String actualEncoding)
516            throws ArchiveException {
517        if (archiverName == null) {
518            throw new IllegalArgumentException("Archivername must not be null.");
519        }
520        if (out == null) {
521            throw new IllegalArgumentException("OutputStream must not be null.");
522        }
523
524        if (AR.equalsIgnoreCase(archiverName)) {
525            return new ArArchiveOutputStream(out);
526        }
527        if (ZIP.equalsIgnoreCase(archiverName)) {
528            final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out);
529            if (actualEncoding != null) {
530                zip.setEncoding(actualEncoding);
531            }
532            return zip;
533        }
534        if (TAR.equalsIgnoreCase(archiverName)) {
535            if (actualEncoding != null) {
536                return new TarArchiveOutputStream(out, actualEncoding);
537            }
538            return new TarArchiveOutputStream(out);
539        }
540        if (JAR.equalsIgnoreCase(archiverName)) {
541            if (actualEncoding != null) {
542                return new JarArchiveOutputStream(out, actualEncoding);
543            }
544            return new JarArchiveOutputStream(out);
545        }
546        if (CPIO.equalsIgnoreCase(archiverName)) {
547            if (actualEncoding != null) {
548                return new CpioArchiveOutputStream(out, actualEncoding);
549            }
550            return new CpioArchiveOutputStream(out);
551        }
552        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
553            throw new StreamingNotSupportedException(SEVEN_Z);
554        }
555
556        final ArchiveStreamProvider archiveStreamProvider = getArchiveOutputStreamProviders().get(toKey(archiverName));
557        if (archiveStreamProvider != null) {
558            return archiveStreamProvider.createArchiveOutputStream(archiverName, out, actualEncoding);
559        }
560
561        throw new ArchiveException("Archiver: " + archiverName + " not found.");
562    }
563
564    public SortedMap<String, ArchiveStreamProvider> getArchiveInputStreamProviders() {
565        if (archiveInputStreamProviders == null) {
566            archiveInputStreamProviders = Collections
567                    .unmodifiableSortedMap(findAvailableArchiveInputStreamProviders());
568        }
569        return archiveInputStreamProviders;
570    }
571
572    public SortedMap<String, ArchiveStreamProvider> getArchiveOutputStreamProviders() {
573        if (archiveOutputStreamProviders == null) {
574            archiveOutputStreamProviders = Collections
575                    .unmodifiableSortedMap(findAvailableArchiveOutputStreamProviders());
576        }
577        return archiveOutputStreamProviders;
578    }
579
580    /**
581     * Gets the encoding to use for arj, jar, ZIP, dump, cpio and tar
582     * files, or null for the archiver default.
583     *
584     * @return entry encoding, or null for the archiver default
585     * @since 1.5
586     */
587    public String getEntryEncoding() {
588        return entryEncoding;
589    }
590
591    @Override
592    public Set<String> getInputStreamArchiveNames() {
593        return Sets.newHashSet(AR, ARJ, ZIP, TAR, JAR, CPIO, DUMP, SEVEN_Z);
594    }
595
596    @Override
597    public Set<String> getOutputStreamArchiveNames() {
598        return Sets.newHashSet(AR, ZIP, TAR, JAR, CPIO, SEVEN_Z);
599    }
600
601    /**
602     * Sets the encoding to use for arj, jar, ZIP, dump, cpio and tar files. Use null for the archiver default.
603     *
604     * @param entryEncoding the entry encoding, null uses the archiver default.
605     * @since 1.5
606     * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding
607     * @throws IllegalStateException if the constructor {@link #ArchiveStreamFactory(String)}
608     * was used to specify the factory encoding.
609     */
610    @Deprecated
611    public void setEntryEncoding(final String entryEncoding) {
612        // Note: this does not detect new ArchiveStreamFactory(null) but that does not set the encoding anyway
613        if (encoding != null) {
614            throw new IllegalStateException("Cannot overide encoding set by the constructor");
615        }
616        this.entryEncoding = entryEncoding;
617    }
618
619}