001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.ar; 020 021import java.io.EOFException; 022import java.io.IOException; 023import java.io.InputStream; 024import java.util.Arrays; 025 026import org.apache.commons.compress.archivers.ArchiveEntry; 027import org.apache.commons.compress.archivers.ArchiveInputStream; 028import org.apache.commons.compress.utils.ArchiveUtils; 029import org.apache.commons.compress.utils.IOUtils; 030 031/** 032 * Implements the "ar" archive format as an input stream. 033 * 034 * @NotThreadSafe 035 * 036 */ 037public class ArArchiveInputStream extends ArchiveInputStream { 038 039 // offsets and length of meta data parts 040 private static final int NAME_OFFSET = 0; 041 private static final int NAME_LEN = 16; 042 private static final int LAST_MODIFIED_OFFSET = NAME_LEN; 043 044 private static final int LAST_MODIFIED_LEN = 12; 045 046 private static final int USER_ID_OFFSET = LAST_MODIFIED_OFFSET + LAST_MODIFIED_LEN; 047 048 private static final int USER_ID_LEN = 6; 049 050 private static final int GROUP_ID_OFFSET = USER_ID_OFFSET + USER_ID_LEN; 051 private static final int GROUP_ID_LEN = 6; 052 private static final int FILE_MODE_OFFSET = GROUP_ID_OFFSET + GROUP_ID_LEN; 053 private static final int FILE_MODE_LEN = 8; 054 private static final int LENGTH_OFFSET = FILE_MODE_OFFSET + FILE_MODE_LEN; 055 private static final int LENGTH_LEN = 10; 056 static final String BSD_LONGNAME_PREFIX = "#1/"; 057 private static final int BSD_LONGNAME_PREFIX_LEN = 058 BSD_LONGNAME_PREFIX.length(); 059 private static final String BSD_LONGNAME_PATTERN = 060 "^" + BSD_LONGNAME_PREFIX + "\\d+"; 061 private static final String GNU_STRING_TABLE_NAME = "//"; 062 private static final String GNU_LONGNAME_PATTERN = "^/\\d+"; 063 /** 064 * Does the name look like it is a long name (or a name containing 065 * spaces) as encoded by BSD ar? 066 * 067 * <p>From the FreeBSD ar(5) man page:</p> 068 * <pre> 069 * BSD In the BSD variant, names that are shorter than 16 070 * characters and without embedded spaces are stored 071 * directly in this field. If a name has an embedded 072 * space, or if it is longer than 16 characters, then 073 * the string "#1/" followed by the decimal represen- 074 * tation of the length of the file name is placed in 075 * this field. The actual file name is stored immedi- 076 * ately after the archive header. The content of the 077 * archive member follows the file name. The ar_size 078 * field of the header (see below) will then hold the 079 * sum of the size of the file name and the size of 080 * the member. 081 * </pre> 082 * 083 * @since 1.3 084 */ 085 private static boolean isBSDLongName(final String name) { 086 return name != null && name.matches(BSD_LONGNAME_PATTERN); 087 } 088 089 /** 090 * Is this the name of the "Archive String Table" as used by 091 * SVR4/GNU to store long file names? 092 * 093 * <p>GNU ar stores multiple extended file names in the data section 094 * of a file with the name "//", this record is referred to by 095 * future headers.</p> 096 * 097 * <p>A header references an extended file name by storing a "/" 098 * followed by a decimal offset to the start of the file name in 099 * the extended file name data section.</p> 100 * 101 * <p>The format of the "//" file itself is simply a list of the 102 * long file names, each separated by one or more LF 103 * characters. Note that the decimal offsets are number of 104 * characters, not line or string number within the "//" file.</p> 105 */ 106 private static boolean isGNUStringTable(final String name) { 107 return GNU_STRING_TABLE_NAME.equals(name); 108 } 109 110 /** 111 * Checks if the signature matches ASCII "!<arch>" followed by a single LF 112 * control character 113 * 114 * @param signature 115 * the bytes to check 116 * @param length 117 * the number of bytes to check 118 * @return true, if this stream is an Ar archive stream, false otherwise 119 */ 120 public static boolean matches(final byte[] signature, final int length) { 121 // 3c21 7261 6863 0a3e 122 123 return length >= 8 && signature[0] == 0x21 && 124 signature[1] == 0x3c && signature[2] == 0x61 && 125 signature[3] == 0x72 && signature[4] == 0x63 && 126 signature[5] == 0x68 && signature[6] == 0x3e && 127 signature[7] == 0x0a; 128 } 129 130 private final InputStream input; 131 132 private long offset; 133 134 private boolean closed; 135 136 /* 137 * If getNextEntry has been called, the entry metadata is stored in 138 * currentEntry. 139 */ 140 private ArArchiveEntry currentEntry; 141 142 // Storage area for extra long names (GNU ar) 143 private byte[] namebuffer; 144 145 /* 146 * The offset where the current entry started. -1 if no entry has been 147 * called 148 */ 149 private long entryOffset = -1; 150 151 // cached buffer for meta data - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 152 private final byte[] metaData = 153 new byte[NAME_LEN + LAST_MODIFIED_LEN + USER_ID_LEN + GROUP_ID_LEN + FILE_MODE_LEN + LENGTH_LEN]; 154 155 /** 156 * Constructs an Ar input stream with the referenced stream 157 * 158 * @param pInput 159 * the ar input stream 160 */ 161 public ArArchiveInputStream(final InputStream pInput) { 162 input = pInput; 163 closed = false; 164 } 165 166 private int asInt(final byte[] byteArray, final int offset, final int len) { 167 return asInt(byteArray, offset, len, 10, false); 168 } 169 170 private int asInt(final byte[] byteArray, final int offset, final int len, final boolean treatBlankAsZero) { 171 return asInt(byteArray, offset, len, 10, treatBlankAsZero); 172 } 173 174 private int asInt(final byte[] byteArray, final int offset, final int len, final int base) { 175 return asInt(byteArray, offset, len, base, false); 176 } 177 178 private int asInt(final byte[] byteArray, final int offset, final int len, final int base, final boolean treatBlankAsZero) { 179 final String string = ArchiveUtils.toAsciiString(byteArray, offset, len).trim(); 180 if (string.isEmpty() && treatBlankAsZero) { 181 return 0; 182 } 183 return Integer.parseInt(string, base); 184 } 185 private long asLong(final byte[] byteArray, final int offset, final int len) { 186 return Long.parseLong(ArchiveUtils.toAsciiString(byteArray, offset, len).trim()); 187 } 188 /* 189 * (non-Javadoc) 190 * 191 * @see java.io.InputStream#close() 192 */ 193 @Override 194 public void close() throws IOException { 195 if (!closed) { 196 closed = true; 197 input.close(); 198 } 199 currentEntry = null; 200 } 201 202 /** 203 * Reads the real name from the current stream assuming the very 204 * first bytes to be read are the real file name. 205 * 206 * @see #isBSDLongName 207 * 208 * @since 1.3 209 */ 210 private String getBSDLongName(final String bsdLongName) throws IOException { 211 final int nameLen = 212 Integer.parseInt(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN)); 213 final byte[] name = IOUtils.readRange(input, nameLen); 214 final int read = name.length; 215 trackReadBytes(read); 216 if (read != nameLen) { 217 throw new EOFException(); 218 } 219 return ArchiveUtils.toAsciiString(name); 220 } 221 222 /** 223 * Get an extended name from the GNU extended name buffer. 224 * 225 * @param offset pointer to entry within the buffer 226 * @return the extended file name; without trailing "/" if present. 227 * @throws IOException if name not found or buffer not set up 228 */ 229 private String getExtendedName(final int offset) throws IOException { 230 if (namebuffer == null) { 231 throw new IOException("Cannot process GNU long filename as no // record was found"); 232 } 233 for (int i = offset; i < namebuffer.length; i++) { 234 if (namebuffer[i] == '\012' || namebuffer[i] == 0) { 235 if (namebuffer[i - 1] == '/') { 236 i--; // drop trailing / 237 } 238 return ArchiveUtils.toAsciiString(namebuffer, offset, i - offset); 239 } 240 } 241 throw new IOException("Failed to read entry: " + offset); 242 } 243 244 /** 245 * Returns the next AR entry in this stream. 246 * 247 * @return the next AR entry. 248 * @throws IOException 249 * if the entry could not be read 250 */ 251 public ArArchiveEntry getNextArEntry() throws IOException { 252 if (currentEntry != null) { 253 final long entryEnd = entryOffset + currentEntry.getLength(); 254 final long skipped = IOUtils.skip(input, entryEnd - offset); 255 trackReadBytes(skipped); 256 currentEntry = null; 257 } 258 259 if (offset == 0) { 260 final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER); 261 final byte[] realized = IOUtils.readRange(input, expected.length); 262 final int read = realized.length; 263 trackReadBytes(read); 264 if (read != expected.length) { 265 throw new IOException("Failed to read header. Occurred at byte: " + getBytesRead()); 266 } 267 if (!Arrays.equals(expected, realized)) { 268 throw new IOException("Invalid header " + ArchiveUtils.toAsciiString(realized)); 269 } 270 } 271 272 if (offset % 2 != 0) { 273 if (input.read() < 0) { 274 // hit eof 275 return null; 276 } 277 trackReadBytes(1); 278 } 279 280 { 281 final int read = IOUtils.readFully(input, metaData); 282 trackReadBytes(read); 283 if (read == 0) { 284 return null; 285 } 286 if (read < metaData.length) { 287 throw new IOException("Truncated ar archive"); 288 } 289 } 290 291 { 292 final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER); 293 final byte[] realized = IOUtils.readRange(input, expected.length); 294 final int read = realized.length; 295 trackReadBytes(read); 296 if (read != expected.length) { 297 throw new IOException("Failed to read entry trailer. Occurred at byte: " + getBytesRead()); 298 } 299 if (!Arrays.equals(expected, realized)) { 300 throw new IOException("Invalid entry trailer. not read the content? Occurred at byte: " + getBytesRead()); 301 } 302 } 303 304 entryOffset = offset; 305 306// GNU ar uses a '/' to mark the end of the filename; this allows for the use of spaces without the use of an extended filename. 307 308 // entry name is stored as ASCII string 309 String temp = ArchiveUtils.toAsciiString(metaData, NAME_OFFSET, NAME_LEN).trim(); 310 if (isGNUStringTable(temp)) { // GNU extended filenames entry 311 currentEntry = readGNUStringTable(metaData, LENGTH_OFFSET, LENGTH_LEN); 312 return getNextArEntry(); 313 } 314 315 long len = asLong(metaData, LENGTH_OFFSET, LENGTH_LEN); 316 if (temp.endsWith("/")) { // GNU terminator 317 temp = temp.substring(0, temp.length() - 1); 318 } else if (isGNULongName(temp)) { 319 final int off = Integer.parseInt(temp.substring(1));// get the offset 320 temp = getExtendedName(off); // convert to the long name 321 } else if (isBSDLongName(temp)) { 322 temp = getBSDLongName(temp); 323 // entry length contained the length of the file name in 324 // addition to the real length of the entry. 325 // assume file name was ASCII, there is no "standard" otherwise 326 final int nameLen = temp.length(); 327 len -= nameLen; 328 entryOffset += nameLen; 329 } 330 331 if (len < 0) { 332 throw new IOException("broken archive, entry with negative size"); 333 } 334 335 currentEntry = new ArArchiveEntry(temp, len, 336 asInt(metaData, USER_ID_OFFSET, USER_ID_LEN, true), 337 asInt(metaData, GROUP_ID_OFFSET, GROUP_ID_LEN, true), 338 asInt(metaData, FILE_MODE_OFFSET, FILE_MODE_LEN, 8), 339 asLong(metaData, LAST_MODIFIED_OFFSET, LAST_MODIFIED_LEN)); 340 return currentEntry; 341 } 342 343 /* 344 * (non-Javadoc) 345 * 346 * @see 347 * org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry() 348 */ 349 @Override 350 public ArchiveEntry getNextEntry() throws IOException { 351 return getNextArEntry(); 352 } 353 354 /** 355 * Does the name look like it is a long name (or a name containing 356 * spaces) as encoded by SVR4/GNU ar? 357 * 358 * @see #isGNUStringTable 359 */ 360 private boolean isGNULongName(final String name) { 361 return name != null && name.matches(GNU_LONGNAME_PATTERN); 362 } 363 364 /* 365 * (non-Javadoc) 366 * 367 * @see java.io.InputStream#read(byte[], int, int) 368 */ 369 @Override 370 public int read(final byte[] b, final int off, final int len) throws IOException { 371 if (len == 0) { 372 return 0; 373 } 374 if (currentEntry == null) { 375 throw new IllegalStateException("No current ar entry"); 376 } 377 final long entryEnd = entryOffset + currentEntry.getLength(); 378 if (len < 0 || offset >= entryEnd) { 379 return -1; 380 } 381 final int toRead = (int) Math.min(len, entryEnd - offset); 382 final int ret = this.input.read(b, off, toRead); 383 trackReadBytes(ret); 384 return ret; 385 } 386 387 /** 388 * Reads the GNU archive String Table. 389 * 390 * @see #isGNUStringTable 391 */ 392 private ArArchiveEntry readGNUStringTable(final byte[] length, final int offset, final int len) throws IOException { 393 final int bufflen = asInt(length, offset, len); // Assume length will fit in an int 394 namebuffer = IOUtils.readRange(input, bufflen); 395 final int read = namebuffer.length; 396 trackReadBytes(read); 397 if (read != bufflen){ 398 throw new IOException("Failed to read complete // record: expected=" 399 + bufflen + " read=" + read); 400 } 401 return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen); 402 } 403 404 private void trackReadBytes(final long read) { 405 count(read); 406 if (read > 0) { 407 offset += read; 408 } 409 } 410}