001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.ar;
020
021import java.io.EOFException;
022import java.io.IOException;
023import java.io.InputStream;
024import java.util.Arrays;
025
026import org.apache.commons.compress.archivers.ArchiveEntry;
027import org.apache.commons.compress.archivers.ArchiveInputStream;
028import org.apache.commons.compress.utils.ArchiveUtils;
029import org.apache.commons.compress.utils.IOUtils;
030
031/**
032 * Implements the "ar" archive format as an input stream.
033 *
034 * @NotThreadSafe
035 *
036 */
037public class ArArchiveInputStream extends ArchiveInputStream {
038
039    // offsets and length of meta data parts
040    private static final int NAME_OFFSET = 0;
041    private static final int NAME_LEN = 16;
042    private static final int LAST_MODIFIED_OFFSET = NAME_LEN;
043
044    private static final int LAST_MODIFIED_LEN = 12;
045
046    private static final int USER_ID_OFFSET = LAST_MODIFIED_OFFSET + LAST_MODIFIED_LEN;
047
048    private static final int USER_ID_LEN = 6;
049
050    private static final int GROUP_ID_OFFSET = USER_ID_OFFSET + USER_ID_LEN;
051    private static final int GROUP_ID_LEN = 6;
052    private static final int FILE_MODE_OFFSET = GROUP_ID_OFFSET + GROUP_ID_LEN;
053    private static final int FILE_MODE_LEN = 8;
054    private static final int LENGTH_OFFSET = FILE_MODE_OFFSET + FILE_MODE_LEN;
055    private static final int LENGTH_LEN = 10;
056    static final String BSD_LONGNAME_PREFIX = "#1/";
057    private static final int BSD_LONGNAME_PREFIX_LEN =
058        BSD_LONGNAME_PREFIX.length();
059    private static final String BSD_LONGNAME_PATTERN =
060        "^" + BSD_LONGNAME_PREFIX + "\\d+";
061    private static final String GNU_STRING_TABLE_NAME = "//";
062    private static final String GNU_LONGNAME_PATTERN = "^/\\d+";
063    /**
064     * Does the name look like it is a long name (or a name containing
065     * spaces) as encoded by BSD ar?
066     *
067     * <p>From the FreeBSD ar(5) man page:</p>
068     * <pre>
069     * BSD   In the BSD variant, names that are shorter than 16
070     *       characters and without embedded spaces are stored
071     *       directly in this field.  If a name has an embedded
072     *       space, or if it is longer than 16 characters, then
073     *       the string "#1/" followed by the decimal represen-
074     *       tation of the length of the file name is placed in
075     *       this field. The actual file name is stored immedi-
076     *       ately after the archive header.  The content of the
077     *       archive member follows the file name.  The ar_size
078     *       field of the header (see below) will then hold the
079     *       sum of the size of the file name and the size of
080     *       the member.
081     * </pre>
082     *
083     * @since 1.3
084     */
085    private static boolean isBSDLongName(final String name) {
086        return name != null && name.matches(BSD_LONGNAME_PATTERN);
087    }
088
089    /**
090     * Is this the name of the "Archive String Table" as used by
091     * SVR4/GNU to store long file names?
092     *
093     * <p>GNU ar stores multiple extended file names in the data section
094     * of a file with the name "//", this record is referred to by
095     * future headers.</p>
096     *
097     * <p>A header references an extended file name by storing a "/"
098     * followed by a decimal offset to the start of the file name in
099     * the extended file name data section.</p>
100     *
101     * <p>The format of the "//" file itself is simply a list of the
102     * long file names, each separated by one or more LF
103     * characters. Note that the decimal offsets are number of
104     * characters, not line or string number within the "//" file.</p>
105     */
106    private static boolean isGNUStringTable(final String name) {
107        return GNU_STRING_TABLE_NAME.equals(name);
108    }
109
110    /**
111     * Checks if the signature matches ASCII "!&lt;arch&gt;" followed by a single LF
112     * control character
113     *
114     * @param signature
115     *            the bytes to check
116     * @param length
117     *            the number of bytes to check
118     * @return true, if this stream is an Ar archive stream, false otherwise
119     */
120    public static boolean matches(final byte[] signature, final int length) {
121        // 3c21 7261 6863 0a3e
122
123        return length >= 8 && signature[0] == 0x21 &&
124                signature[1] == 0x3c && signature[2] == 0x61 &&
125                signature[3] == 0x72 && signature[4] == 0x63 &&
126                signature[5] == 0x68 && signature[6] == 0x3e &&
127                signature[7] == 0x0a;
128    }
129
130    private final InputStream input;
131
132    private long offset;
133
134    private boolean closed;
135
136    /*
137     * If getNextEntry has been called, the entry metadata is stored in
138     * currentEntry.
139     */
140    private ArArchiveEntry currentEntry;
141
142    // Storage area for extra long names (GNU ar)
143    private byte[] namebuffer;
144
145    /*
146     * The offset where the current entry started. -1 if no entry has been
147     * called
148     */
149    private long entryOffset = -1;
150
151    // cached buffer for meta data - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
152    private final byte[] metaData =
153        new byte[NAME_LEN + LAST_MODIFIED_LEN + USER_ID_LEN + GROUP_ID_LEN + FILE_MODE_LEN + LENGTH_LEN];
154
155    /**
156     * Constructs an Ar input stream with the referenced stream
157     *
158     * @param pInput
159     *            the ar input stream
160     */
161    public ArArchiveInputStream(final InputStream pInput) {
162        input = pInput;
163        closed = false;
164    }
165
166    private int asInt(final byte[] byteArray, final int offset, final int len) {
167        return asInt(byteArray, offset, len, 10, false);
168    }
169
170    private int asInt(final byte[] byteArray, final int offset, final int len, final boolean treatBlankAsZero) {
171        return asInt(byteArray, offset, len, 10, treatBlankAsZero);
172    }
173
174    private int asInt(final byte[] byteArray, final int offset, final int len, final int base) {
175        return asInt(byteArray, offset, len, base, false);
176    }
177
178    private int asInt(final byte[] byteArray, final int offset, final int len, final int base, final boolean treatBlankAsZero) {
179        final String string = ArchiveUtils.toAsciiString(byteArray, offset, len).trim();
180        if (string.isEmpty() && treatBlankAsZero) {
181            return 0;
182        }
183        return Integer.parseInt(string, base);
184    }
185    private long asLong(final byte[] byteArray, final int offset, final int len) {
186        return Long.parseLong(ArchiveUtils.toAsciiString(byteArray, offset, len).trim());
187    }
188    /*
189     * (non-Javadoc)
190     *
191     * @see java.io.InputStream#close()
192     */
193    @Override
194    public void close() throws IOException {
195        if (!closed) {
196            closed = true;
197            input.close();
198        }
199        currentEntry = null;
200    }
201
202    /**
203     * Reads the real name from the current stream assuming the very
204     * first bytes to be read are the real file name.
205     *
206     * @see #isBSDLongName
207     *
208     * @since 1.3
209     */
210    private String getBSDLongName(final String bsdLongName) throws IOException {
211        final int nameLen =
212            Integer.parseInt(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN));
213        final byte[] name = IOUtils.readRange(input, nameLen);
214        final int read = name.length;
215        trackReadBytes(read);
216        if (read != nameLen) {
217            throw new EOFException();
218        }
219        return ArchiveUtils.toAsciiString(name);
220    }
221
222    /**
223     * Get an extended name from the GNU extended name buffer.
224     *
225     * @param offset pointer to entry within the buffer
226     * @return the extended file name; without trailing "/" if present.
227     * @throws IOException if name not found or buffer not set up
228     */
229    private String getExtendedName(final int offset) throws IOException {
230        if (namebuffer == null) {
231            throw new IOException("Cannot process GNU long filename as no // record was found");
232        }
233        for (int i = offset; i < namebuffer.length; i++) {
234            if (namebuffer[i] == '\012' || namebuffer[i] == 0) {
235                if (namebuffer[i - 1] == '/') {
236                    i--; // drop trailing /
237                }
238                return ArchiveUtils.toAsciiString(namebuffer, offset, i - offset);
239            }
240        }
241        throw new IOException("Failed to read entry: " + offset);
242    }
243
244    /**
245     * Returns the next AR entry in this stream.
246     *
247     * @return the next AR entry.
248     * @throws IOException
249     *             if the entry could not be read
250     */
251    public ArArchiveEntry getNextArEntry() throws IOException {
252        if (currentEntry != null) {
253            final long entryEnd = entryOffset + currentEntry.getLength();
254            final long skipped = IOUtils.skip(input, entryEnd - offset);
255            trackReadBytes(skipped);
256            currentEntry = null;
257        }
258
259        if (offset == 0) {
260            final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER);
261            final byte[] realized = IOUtils.readRange(input, expected.length);
262            final int read = realized.length;
263            trackReadBytes(read);
264            if (read != expected.length) {
265                throw new IOException("Failed to read header. Occurred at byte: " + getBytesRead());
266            }
267            if (!Arrays.equals(expected, realized)) {
268                throw new IOException("Invalid header " + ArchiveUtils.toAsciiString(realized));
269            }
270        }
271
272        if (offset % 2 != 0) {
273            if (input.read() < 0) {
274                // hit eof
275                return null;
276            }
277            trackReadBytes(1);
278        }
279
280        {
281            final int read = IOUtils.readFully(input, metaData);
282            trackReadBytes(read);
283            if (read == 0) {
284                return null;
285            }
286            if (read < metaData.length) {
287                throw new IOException("Truncated ar archive");
288            }
289        }
290
291        {
292            final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER);
293            final byte[] realized = IOUtils.readRange(input, expected.length);
294            final int read = realized.length;
295            trackReadBytes(read);
296            if (read != expected.length) {
297                throw new IOException("Failed to read entry trailer. Occurred at byte: " + getBytesRead());
298            }
299            if (!Arrays.equals(expected, realized)) {
300                throw new IOException("Invalid entry trailer. not read the content? Occurred at byte: " + getBytesRead());
301            }
302        }
303
304        entryOffset = offset;
305
306//        GNU ar uses a '/' to mark the end of the filename; this allows for the use of spaces without the use of an extended filename.
307
308        // entry name is stored as ASCII string
309        String temp = ArchiveUtils.toAsciiString(metaData, NAME_OFFSET, NAME_LEN).trim();
310        if (isGNUStringTable(temp)) { // GNU extended filenames entry
311            currentEntry = readGNUStringTable(metaData, LENGTH_OFFSET, LENGTH_LEN);
312            return getNextArEntry();
313        }
314
315        long len = asLong(metaData, LENGTH_OFFSET, LENGTH_LEN);
316        if (temp.endsWith("/")) { // GNU terminator
317            temp = temp.substring(0, temp.length() - 1);
318        } else if (isGNULongName(temp)) {
319            final int off = Integer.parseInt(temp.substring(1));// get the offset
320            temp = getExtendedName(off); // convert to the long name
321        } else if (isBSDLongName(temp)) {
322            temp = getBSDLongName(temp);
323            // entry length contained the length of the file name in
324            // addition to the real length of the entry.
325            // assume file name was ASCII, there is no "standard" otherwise
326            final int nameLen = temp.length();
327            len -= nameLen;
328            entryOffset += nameLen;
329        }
330
331        if (len < 0) {
332            throw new IOException("broken archive, entry with negative size");
333        }
334
335        currentEntry = new ArArchiveEntry(temp, len,
336                                          asInt(metaData, USER_ID_OFFSET, USER_ID_LEN, true),
337                                          asInt(metaData, GROUP_ID_OFFSET, GROUP_ID_LEN, true),
338                                          asInt(metaData, FILE_MODE_OFFSET, FILE_MODE_LEN, 8),
339                                          asLong(metaData, LAST_MODIFIED_OFFSET, LAST_MODIFIED_LEN));
340        return currentEntry;
341    }
342
343    /*
344     * (non-Javadoc)
345     *
346     * @see
347     * org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry()
348     */
349    @Override
350    public ArchiveEntry getNextEntry() throws IOException {
351        return getNextArEntry();
352    }
353
354    /**
355     * Does the name look like it is a long name (or a name containing
356     * spaces) as encoded by SVR4/GNU ar?
357     *
358     * @see #isGNUStringTable
359     */
360    private boolean isGNULongName(final String name) {
361        return name != null && name.matches(GNU_LONGNAME_PATTERN);
362    }
363
364    /*
365     * (non-Javadoc)
366     *
367     * @see java.io.InputStream#read(byte[], int, int)
368     */
369    @Override
370    public int read(final byte[] b, final int off, final int len) throws IOException {
371        if (len == 0) {
372            return 0;
373        }
374        if (currentEntry == null) {
375            throw new IllegalStateException("No current ar entry");
376        }
377        final long entryEnd = entryOffset + currentEntry.getLength();
378        if (len < 0 || offset >= entryEnd) {
379            return -1;
380        }
381        final int toRead = (int) Math.min(len, entryEnd - offset);
382        final int ret = this.input.read(b, off, toRead);
383        trackReadBytes(ret);
384        return ret;
385    }
386
387    /**
388     * Reads the GNU archive String Table.
389     *
390     * @see #isGNUStringTable
391     */
392    private ArArchiveEntry readGNUStringTable(final byte[] length, final int offset, final int len) throws IOException {
393        final int bufflen = asInt(length, offset, len); // Assume length will fit in an int
394        namebuffer = IOUtils.readRange(input, bufflen);
395        final int read = namebuffer.length;
396        trackReadBytes(read);
397        if (read != bufflen){
398            throw new IOException("Failed to read complete // record: expected="
399                                  + bufflen + " read=" + read);
400        }
401        return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen);
402    }
403
404    private void trackReadBytes(final long read) {
405        count(read);
406        if (read > 0) {
407            offset += read;
408        }
409    }
410}