001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.File;
022import java.io.IOException;
023import java.io.Serializable;
024import java.nio.ByteBuffer;
025import java.nio.channels.SeekableByteChannel;
026import java.nio.file.Files;
027import java.nio.file.Path;
028import java.nio.file.StandardOpenOption;
029import java.util.ArrayList;
030import java.util.Arrays;
031import java.util.Comparator;
032import java.util.List;
033import java.util.Objects;
034import java.util.regex.Pattern;
035import java.util.stream.Collectors;
036import java.util.stream.Stream;
037
038import org.apache.commons.compress.archivers.ArchiveStreamFactory;
039import org.apache.commons.compress.utils.FileNameUtils;
040import org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel;
041
042/**
043 * {@link MultiReadOnlySeekableByteChannel} that knows what a split ZIP archive should look like.
044 *
045 * <p>If you want to read a split archive using {@link ZipFile} then create an instance of this class from the parts of
046 * the archive.</p>
047 *
048 * @since 1.20
049 */
050public class ZipSplitReadOnlySeekableByteChannel extends MultiReadOnlySeekableByteChannel {
051
052    private static class ZipSplitSegmentComparator implements Comparator<Path>, Serializable {
053        private static final long serialVersionUID = 20200123L;
054
055        @Override
056        public int compare(final Path file1, final Path file2) {
057            final String extension1 = FileNameUtils.getExtension(file1);
058            final String extension2 = FileNameUtils.getExtension(file2);
059
060            if (!extension1.startsWith("z")) {
061                return -1;
062            }
063
064            if (!extension2.startsWith("z")) {
065                return 1;
066            }
067
068            final Integer splitSegmentNumber1 = Integer.parseInt(extension1.substring(1));
069            final Integer splitSegmentNumber2 = Integer.parseInt(extension2.substring(1));
070
071            return splitSegmentNumber1.compareTo(splitSegmentNumber2);
072        }
073    }
074    private static final Path[] EMPTY_PATH_ARRAY = {};
075    private static final int ZIP_SPLIT_SIGNATURE_LENGTH = 4;
076
077    /**
078     * Concatenates ZIP split files from the last segment(the extension SHOULD be .zip)
079     *
080     * @param lastSegmentFile the last segment of ZIP split files, note that the extension SHOULD be .zip
081     * @return SeekableByteChannel that concatenates all ZIP split files
082     * @throws IllegalArgumentException if the lastSegmentFile's extension is NOT .zip
083     * @throws IOException if the first channel doesn't seem to hold
084     * the beginning of a split archive
085     */
086    public static SeekableByteChannel buildFromLastSplitSegment(final File lastSegmentFile) throws IOException {
087        return buildFromLastSplitSegment(lastSegmentFile.toPath());
088    }
089
090    /**
091     * Concatenates ZIP split files from the last segment (the extension MUST be .zip)
092     * @param lastSegmentPath the last segment of ZIP split files, note that the extension MUST be .zip
093     * @return SeekableByteChannel that concatenates all ZIP split files
094     * @throws IllegalArgumentException if the lastSegmentPath's extension is NOT .zip
095     * @throws IOException if the first channel doesn't seem to hold
096     * the beginning of a split archive
097     * @since 1.22
098     */
099    public static SeekableByteChannel buildFromLastSplitSegment(final Path lastSegmentPath) throws IOException {
100        final String extension = FileNameUtils.getExtension(lastSegmentPath);
101        if (!extension.equalsIgnoreCase(ArchiveStreamFactory.ZIP)) {
102            throw new IllegalArgumentException("The extension of last ZIP split segment should be .zip");
103        }
104
105        final Path parent = Objects.nonNull(lastSegmentPath.getParent()) ? lastSegmentPath.getParent()
106                : lastSegmentPath.getFileSystem().getPath(".");
107        final String fileBaseName = FileNameUtils.getBaseName(lastSegmentPath);
108        final ArrayList<Path> splitZipSegments;
109
110        // ZIP split segments should be like z01,z02....z(n-1) based on the ZIP specification
111        final Pattern pattern = Pattern.compile(Pattern.quote(fileBaseName) + ".[zZ][0-9]+");
112        try (Stream<Path> walk = Files.walk(parent, 1)) {
113            splitZipSegments = walk
114                    .filter(Files::isRegularFile)
115                    .filter(path -> pattern.matcher(path.getFileName().toString()).matches())
116                    .sorted(new ZipSplitSegmentComparator())
117                    .collect(Collectors.toCollection(ArrayList::new));
118        }
119
120        return forPaths(lastSegmentPath, splitZipSegments);
121    }
122
123    /**
124     * Concatenates the given files.
125     *
126     * @param files the files to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip)
127     *              and theses files should be added in correct order (e.g. .z01, .z02... .z99, .zip)
128     * @return SeekableByteChannel that concatenates all provided files
129     * @throws NullPointerException if files is null
130     * @throws IOException          if opening a channel for one of the files fails
131     * @throws IOException if the first channel doesn't seem to hold
132     * the beginning of a split archive
133     */
134    public static SeekableByteChannel forFiles(final File... files) throws IOException {
135        final List<Path> paths = new ArrayList<>();
136        for (final File f : Objects.requireNonNull(files, "files must not be null")) {
137            paths.add(f.toPath());
138        }
139
140        return forPaths(paths.toArray(EMPTY_PATH_ARRAY));
141    }
142
143    /**
144     * Concatenates the given files.
145     *
146     * @param lastSegmentFile the last segment of split ZIP segments, its extension should be .zip
147     * @param files           the files to concatenate except for the last segment,
148     *                        note theses files should be added in correct order (e.g. .z01, .z02... .z99)
149     * @return SeekableByteChannel that concatenates all provided files
150     * @throws IOException if the first channel doesn't seem to hold
151     * the beginning of a split archive
152     * @throws NullPointerException if files or lastSegmentFile is null
153     */
154    public static SeekableByteChannel forFiles(final File lastSegmentFile, final Iterable<File> files) throws IOException {
155        Objects.requireNonNull(files, "files");
156        Objects.requireNonNull(lastSegmentFile, "lastSegmentFile");
157
158        final List<Path> filesList = new ArrayList<>();
159        files.forEach(f -> filesList.add(f.toPath()));
160
161        return forPaths(lastSegmentFile.toPath(), filesList);
162    }
163
164    /**
165     * Concatenates the given channels.
166     *
167     * @param channels the channels to concatenate, note that the LAST CHANNEL of channels should be the LAST SEGMENT(.zip)
168     *                 and theses channels should be added in correct order (e.g. .z01, .z02... .z99, .zip)
169     * @return SeekableByteChannel that concatenates all provided channels
170     * @throws NullPointerException if channels is null
171     * @throws IOException if reading channels fails
172     */
173    public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel... channels) throws IOException {
174        if (Objects.requireNonNull(channels, "channels must not be null").length == 1) {
175            return channels[0];
176        }
177        return new ZipSplitReadOnlySeekableByteChannel(Arrays.asList(channels));
178    }
179
180    /**
181     * Concatenates the given channels.
182     *
183     * @param lastSegmentChannel channel of the last segment of split ZIP segments, its extension should be .zip
184     * @param channels           the channels to concatenate except for the last segment,
185     *                           note theses channels should be added in correct order (e.g. .z01, .z02... .z99)
186     * @return SeekableByteChannel that concatenates all provided channels
187     * @throws NullPointerException if lastSegmentChannel or channels is null
188     * @throws IOException if the first channel doesn't seem to hold
189     * the beginning of a split archive
190     */
191    public static SeekableByteChannel forOrderedSeekableByteChannels(final SeekableByteChannel lastSegmentChannel,
192        final Iterable<SeekableByteChannel> channels) throws IOException {
193        Objects.requireNonNull(channels, "channels");
194        Objects.requireNonNull(lastSegmentChannel, "lastSegmentChannel");
195
196        final List<SeekableByteChannel> channelsList = new ArrayList<>();
197        channels.forEach(channelsList::add);
198        channelsList.add(lastSegmentChannel);
199
200        return forOrderedSeekableByteChannels(channelsList.toArray(new SeekableByteChannel[0]));
201    }
202
203    /**
204     * Concatenates the given file paths.
205     * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip)
206     * and these files should be added in correct order (e.g.: .z01, .z02... .z99, .zip)
207     * @return SeekableByteChannel that concatenates all provided files
208     * @throws NullPointerException if files is null
209     * @throws IOException if opening a channel for one of the files fails
210     * @throws IOException if the first channel doesn't seem to hold
211     * the beginning of a split archive
212     * @since 1.22
213     */
214    public static SeekableByteChannel forPaths(final Path... paths) throws IOException {
215        final List<SeekableByteChannel> channels = new ArrayList<>();
216        for (final Path path : Objects.requireNonNull(paths, "paths must not be null")) {
217            channels.add(Files.newByteChannel(path, StandardOpenOption.READ));
218        }
219        if (channels.size() == 1) {
220            return channels.get(0);
221        }
222        return new ZipSplitReadOnlySeekableByteChannel(channels);
223    }
224
225    /**
226     * Concatenates the given file paths.
227     * @param lastSegmentPath the last segment path of split ZIP segments, its extension must be .zip
228     * @param paths the file paths to concatenate except for the last segment,
229     * note these files should be added in correct order (e.g.: .z01, .z02... .z99)
230     * @return SeekableByteChannel that concatenates all provided files
231     * @throws IOException if the first channel doesn't seem to hold
232     * the beginning of a split archive
233     * @throws NullPointerException if files or lastSegmentPath is null
234     * @since 1.22
235     */
236    public static SeekableByteChannel forPaths(final Path lastSegmentPath, final Iterable<Path> paths) throws IOException {
237        Objects.requireNonNull(paths, "paths");
238        Objects.requireNonNull(lastSegmentPath, "lastSegmentPath");
239
240        final List<Path> filesList = new ArrayList<>();
241        paths.forEach(filesList::add);
242        filesList.add(lastSegmentPath);
243
244        return forPaths(filesList.toArray(EMPTY_PATH_ARRAY));
245    }
246
247    private final ByteBuffer zipSplitSignatureByteBuffer =
248        ByteBuffer.allocate(ZIP_SPLIT_SIGNATURE_LENGTH);
249
250    /**
251     * Concatenates the given channels.
252     *
253     * <p>The channels should be add in ascending order, e.g. z01,
254     * z02, ... z99, ZIP please note that the .zip file is the last
255     * segment and should be added as the last one in the channels</p>
256     *
257     * @param channels the channels to concatenate
258     * @throws NullPointerException if channels is null
259     * @throws IOException if the first channel doesn't seem to hold
260     * the beginning of a split archive
261     */
262    public ZipSplitReadOnlySeekableByteChannel(final List<SeekableByteChannel> channels)
263        throws IOException {
264        super(channels);
265
266        // the first split ZIP segment should begin with ZIP split signature
267        assertSplitSignature(channels);
268    }
269
270    /**
271     * Based on the ZIP specification:
272     *
273     * <p>
274     * 8.5.3 Spanned/Split archives created using PKZIP for Windows
275     * (V2.50 or greater), PKZIP Command Line (V2.50 or greater),
276     * or PKZIP Explorer will include a special spanning
277     * signature as the first 4 bytes of the first segment of
278     * the archive.  This signature (0x08074b50) will be
279     * followed immediately by the local header signature for
280     * the first file in the archive.
281     *
282     * <p>
283     * the first 4 bytes of the first ZIP split segment should be the ZIP split signature(0x08074B50)
284     *
285     * @param channels channels to be validated
286     * @throws IOException
287     */
288    private void assertSplitSignature(final List<SeekableByteChannel> channels)
289        throws IOException {
290        final SeekableByteChannel channel = channels.get(0);
291        // the ZIP split file signature is at the beginning of the first split segment
292        channel.position(0L);
293
294        zipSplitSignatureByteBuffer.rewind();
295        channel.read(zipSplitSignatureByteBuffer);
296        final ZipLong signature = new ZipLong(zipSplitSignatureByteBuffer.array());
297        if (!signature.equals(ZipLong.DD_SIG)) {
298            channel.position(0L);
299            throw new IOException("The first ZIP split segment does not begin with split ZIP file signature");
300        }
301
302        channel.position(0L);
303    }
304}