001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 */
017package org.apache.commons.compress.harmony.unpack200;
018
019import java.io.BufferedInputStream;
020import java.io.ByteArrayInputStream;
021import java.io.ByteArrayOutputStream;
022import java.io.DataOutputStream;
023import java.io.IOException;
024import java.io.InputStream;
025import java.io.OutputStream;
026import java.io.OutputStreamWriter;
027import java.io.PrintWriter;
028import java.nio.charset.Charset;
029import java.util.ArrayList;
030import java.util.HashSet;
031import java.util.List;
032import java.util.Set;
033import java.util.TimeZone;
034import java.util.jar.JarEntry;
035import java.util.jar.JarOutputStream;
036import java.util.zip.CRC32;
037import java.util.zip.GZIPInputStream;
038import java.util.zip.ZipEntry;
039
040import org.apache.commons.compress.harmony.pack200.Codec;
041import org.apache.commons.compress.harmony.pack200.Pack200Exception;
042import org.apache.commons.compress.harmony.unpack200.bytecode.Attribute;
043import org.apache.commons.compress.harmony.unpack200.bytecode.CPClass;
044import org.apache.commons.compress.harmony.unpack200.bytecode.CPField;
045import org.apache.commons.compress.harmony.unpack200.bytecode.CPMethod;
046import org.apache.commons.compress.harmony.unpack200.bytecode.CPUTF8;
047import org.apache.commons.compress.harmony.unpack200.bytecode.ClassConstantPool;
048import org.apache.commons.compress.harmony.unpack200.bytecode.ClassFile;
049import org.apache.commons.compress.harmony.unpack200.bytecode.ClassFileEntry;
050import org.apache.commons.compress.harmony.unpack200.bytecode.InnerClassesAttribute;
051import org.apache.commons.compress.harmony.unpack200.bytecode.SourceFileAttribute;
052
053/**
054 * A Pack200 archive consists of one or more segments. Each segment is stand-alone, in the sense that every segment has
055 * the magic number header; thus, every segment is also a valid archive. However, it is possible to combine
056 * (non-GZipped) archives into a single large archive by concatenation alone. Thus all the hard work in unpacking an
057 * archive falls to understanding a segment.
058 *
059 * The first component of a segment is the header; this contains (amongst other things) the expected counts of constant
060 * pool entries, which in turn defines how many values need to be read from the stream. Because values are variable
061 * width (see {@link Codec}), it is not possible to calculate the start of the next segment, although one of the header
062 * values does hint at the size of the segment if non-zero, which can be used for buffering purposes.
063 *
064 * Note that this does not perform any buffering of the input stream; each value will be read on a byte-by-byte basis.
065 * It does not perform GZip decompression automatically; both of these are expected to be done by the caller if the
066 * stream has the magic header for GZip streams ({@link GZIPInputStream#GZIP_MAGIC}). In any case, if GZip decompression
067 * is being performed the input stream will be buffered at a higher level, and thus this can read on a byte-oriented
068 * basis.
069 */
070public class Segment {
071
072    public static final int LOG_LEVEL_VERBOSE = 2;
073
074    public static final int LOG_LEVEL_STANDARD = 1;
075
076    public static final int LOG_LEVEL_QUIET = 0;
077
078    private SegmentHeader header;
079
080    private CpBands cpBands;
081
082    private AttrDefinitionBands attrDefinitionBands;
083
084    private IcBands icBands;
085
086    private ClassBands classBands;
087
088    private BcBands bcBands;
089
090    private FileBands fileBands;
091
092    private boolean overrideDeflateHint;
093
094    private boolean deflateHint;
095
096    private boolean doPreRead;
097
098    private int logLevel;
099
100    private PrintWriter logStream;
101
102    private byte[][] classFilesContents;
103
104    private boolean[] fileDeflate;
105
106    private boolean[] fileIsClass;
107
108    private InputStream internalBuffer;
109
110    private ClassFile buildClassFile(final int classNum) {
111        final ClassFile classFile = new ClassFile();
112        final int[] major = classBands.getClassVersionMajor();
113        final int[] minor = classBands.getClassVersionMinor();
114        if (major != null) {
115            classFile.major = major[classNum];
116            classFile.minor = minor[classNum];
117        } else {
118            classFile.major = header.getDefaultClassMajorVersion();
119            classFile.minor = header.getDefaultClassMinorVersion();
120        }
121        // build constant pool
122        final ClassConstantPool cp = classFile.pool;
123        final int fullNameIndexInCpClass = classBands.getClassThisInts()[classNum];
124        final String fullName = cpBands.getCpClass()[fullNameIndexInCpClass];
125        // SourceFile attribute
126        int i = fullName.lastIndexOf("/") + 1; // if lastIndexOf==-1, then
127        // -1+1=0, so str.substring(0)
128        // == str
129
130        // Get the source file attribute
131        final List<Attribute> classAttributes = classBands.getClassAttributes()[classNum];
132        SourceFileAttribute sourceFileAttribute = null;
133        for (final Attribute classAttribute : classAttributes) {
134            if (classAttribute.isSourceFileAttribute()) {
135                sourceFileAttribute = ((SourceFileAttribute) classAttribute);
136            }
137        }
138
139        if (sourceFileAttribute == null) {
140            // If we don't have a source file attribute yet, we need
141            // to infer it from the class.
142            final AttributeLayout SOURCE_FILE = attrDefinitionBands.getAttributeDefinitionMap()
143                .getAttributeLayout(AttributeLayout.ATTRIBUTE_SOURCE_FILE, AttributeLayout.CONTEXT_CLASS);
144            if (SOURCE_FILE.matches(classBands.getRawClassFlags()[classNum])) {
145                int firstDollar = -1;
146                for (int index = 0; index < fullName.length(); index++) {
147                    if (fullName.charAt(index) <= '$') {
148                        firstDollar = index;
149                    }
150                }
151                String fileName = null;
152
153                if (firstDollar > -1 && (i <= firstDollar)) {
154                    fileName = fullName.substring(i, firstDollar) + ".java";
155                } else {
156                    fileName = fullName.substring(i) + ".java";
157                }
158                sourceFileAttribute = new SourceFileAttribute(cpBands.cpUTF8Value(fileName, false));
159                classFile.attributes = new Attribute[] {(Attribute) cp.add(sourceFileAttribute)};
160            } else {
161                classFile.attributes = new Attribute[] {};
162            }
163        } else {
164            classFile.attributes = new Attribute[] {(Attribute) cp.add(sourceFileAttribute)};
165        }
166
167        // If we see any class attributes, add them to the class's attributes
168        // that will
169        // be written out. Keep SourceFileAttributes out since we just
170        // did them above.
171        final List<Attribute> classAttributesWithoutSourceFileAttribute = new ArrayList<>(classAttributes.size());
172        for (int index = 0; index < classAttributes.size(); index++) {
173            final Attribute attrib = classAttributes.get(index);
174            if (!attrib.isSourceFileAttribute()) {
175                classAttributesWithoutSourceFileAttribute.add(attrib);
176            }
177        }
178        final Attribute[] originalAttributes = classFile.attributes;
179        classFile.attributes = new Attribute[originalAttributes.length
180            + classAttributesWithoutSourceFileAttribute.size()];
181        System.arraycopy(originalAttributes, 0, classFile.attributes, 0, originalAttributes.length);
182        for (int index = 0; index < classAttributesWithoutSourceFileAttribute.size(); index++) {
183            final Attribute attrib = (classAttributesWithoutSourceFileAttribute.get(index));
184            cp.add(attrib);
185            classFile.attributes[originalAttributes.length + index] = attrib;
186        }
187
188        // this/superclass
189        final ClassFileEntry cfThis = cp.add(cpBands.cpClassValue(fullNameIndexInCpClass));
190        final ClassFileEntry cfSuper = cp.add(cpBands.cpClassValue(classBands.getClassSuperInts()[classNum]));
191        // add interfaces
192        final ClassFileEntry[] cfInterfaces = new ClassFileEntry[classBands.getClassInterfacesInts()[classNum].length];
193        for (i = 0; i < cfInterfaces.length; i++) {
194            cfInterfaces[i] = cp.add(cpBands.cpClassValue(classBands.getClassInterfacesInts()[classNum][i]));
195        }
196        // add fields
197        final ClassFileEntry[] cfFields = new ClassFileEntry[classBands.getClassFieldCount()[classNum]];
198        // fieldDescr and fieldFlags used to create this
199        for (i = 0; i < cfFields.length; i++) {
200            final int descriptorIndex = classBands.getFieldDescrInts()[classNum][i];
201            final int nameIndex = cpBands.getCpDescriptorNameInts()[descriptorIndex];
202            final int typeIndex = cpBands.getCpDescriptorTypeInts()[descriptorIndex];
203            final CPUTF8 name = cpBands.cpUTF8Value(nameIndex);
204            final CPUTF8 descriptor = cpBands.cpSignatureValue(typeIndex);
205            cfFields[i] = cp.add(new CPField(name, descriptor, classBands.getFieldFlags()[classNum][i],
206                classBands.getFieldAttributes()[classNum][i]));
207        }
208        // add methods
209        final ClassFileEntry[] cfMethods = new ClassFileEntry[classBands.getClassMethodCount()[classNum]];
210        // methodDescr and methodFlags used to create this
211        for (i = 0; i < cfMethods.length; i++) {
212            final int descriptorIndex = classBands.getMethodDescrInts()[classNum][i];
213            final int nameIndex = cpBands.getCpDescriptorNameInts()[descriptorIndex];
214            final int typeIndex = cpBands.getCpDescriptorTypeInts()[descriptorIndex];
215            final CPUTF8 name = cpBands.cpUTF8Value(nameIndex);
216            final CPUTF8 descriptor = cpBands.cpSignatureValue(typeIndex);
217            cfMethods[i] = cp.add(new CPMethod(name, descriptor, classBands.getMethodFlags()[classNum][i],
218                classBands.getMethodAttributes()[classNum][i]));
219        }
220        cp.addNestedEntries();
221
222        // add inner class attribute (if required)
223        boolean addInnerClassesAttr = false;
224        final IcTuple[] icLocal = getClassBands().getIcLocal()[classNum];
225        final boolean icLocalSent = icLocal != null;
226        final InnerClassesAttribute innerClassesAttribute = new InnerClassesAttribute("InnerClasses");
227        final IcTuple[] icRelevant = getIcBands().getRelevantIcTuples(fullName, cp);
228        final List<IcTuple> ic_stored = computeIcStored(icLocal, icRelevant);
229        for (final IcTuple icStored : ic_stored) {
230            final int innerClassIndex = icStored.thisClassIndex();
231            final int outerClassIndex = icStored.outerClassIndex();
232            final int simpleClassNameIndex = icStored.simpleClassNameIndex();
233
234            final String innerClassString = icStored.thisClassString();
235            final String outerClassString = icStored.outerClassString();
236            final String simpleClassName = icStored.simpleClassName();
237
238            CPClass innerClass = null;
239            CPUTF8 innerName = null;
240            CPClass outerClass = null;
241
242            innerClass = innerClassIndex != -1 ? cpBands.cpClassValue(innerClassIndex)
243                : cpBands.cpClassValue(innerClassString);
244            if (!icStored.isAnonymous()) {
245                innerName = simpleClassNameIndex != -1 ? cpBands.cpUTF8Value(simpleClassNameIndex)
246                    : cpBands.cpUTF8Value(simpleClassName);
247            }
248
249            if (icStored.isMember()) {
250                outerClass = outerClassIndex != -1 ? cpBands.cpClassValue(outerClassIndex)
251                    : cpBands.cpClassValue(outerClassString);
252            }
253            final int flags = icStored.F;
254            innerClassesAttribute.addInnerClassesEntry(innerClass, outerClass, innerName, flags);
255            addInnerClassesAttr = true;
256        }
257        // If ic_local is sent and it's empty, don't add
258        // the inner classes attribute.
259        if (icLocalSent && (icLocal.length == 0)) {
260            addInnerClassesAttr = false;
261        }
262
263        // If ic_local is not sent and ic_relevant is empty,
264        // don't add the inner class attribute.
265        if (!icLocalSent && (icRelevant.length == 0)) {
266            addInnerClassesAttr = false;
267        }
268
269        if (addInnerClassesAttr) {
270            // Need to add the InnerClasses attribute to the
271            // existing classFile attributes.
272            final Attribute[] originalAttrs = classFile.attributes;
273            final Attribute[] newAttrs = new Attribute[originalAttrs.length + 1];
274            System.arraycopy(originalAttrs, 0, newAttrs, 0, originalAttrs.length);
275            newAttrs[newAttrs.length - 1] = innerClassesAttribute;
276            classFile.attributes = newAttrs;
277            cp.addWithNestedEntries(innerClassesAttribute);
278        }
279        // sort CP according to cp_All
280        cp.resolve(this);
281        // NOTE the indexOf is only valid after the cp.resolve()
282        // build up remainder of file
283        classFile.accessFlags = (int) classBands.getClassFlags()[classNum];
284        classFile.thisClass = cp.indexOf(cfThis);
285        classFile.superClass = cp.indexOf(cfSuper);
286        // TODO placate format of file for writing purposes
287        classFile.interfaces = new int[cfInterfaces.length];
288        for (i = 0; i < cfInterfaces.length; i++) {
289            classFile.interfaces[i] = cp.indexOf(cfInterfaces[i]);
290        }
291        classFile.fields = cfFields;
292        classFile.methods = cfMethods;
293        return classFile;
294    }
295
296    /**
297     * Given an ic_local and an ic_relevant, use them to calculate what should be added as ic_stored.
298     *
299     * @param icLocal IcTuple[] array of local transmitted tuples
300     * @param icRelevant IcTuple[] array of relevant tuples
301     * @return List of tuples to be stored. If ic_local is null or empty, the values returned may not be correct. The
302     *         caller will have to determine if this is the case.
303     */
304    private List<IcTuple> computeIcStored(final IcTuple[] icLocal, final IcTuple[] icRelevant) {
305        final List<IcTuple> result = new ArrayList<>(icRelevant.length);
306        final List<IcTuple> duplicates = new ArrayList<>(icRelevant.length);
307        final Set<IcTuple> isInResult = new HashSet<>(icRelevant.length);
308
309        // need to compute:
310        // result = ic_local XOR ic_relevant
311
312        // add ic_local
313        if (icLocal != null) {
314            for (final IcTuple element : icLocal) {
315                if (isInResult.add(element)) {
316                    result.add(element);
317                }
318            }
319        }
320
321        // add ic_relevant
322        for (final IcTuple element : icRelevant) {
323            if (isInResult.add(element)) {
324                result.add(element);
325            } else {
326                duplicates.add(element);
327            }
328        }
329
330        // eliminate "duplicates"
331        duplicates.forEach(result::remove);
332
333        return result;
334    }
335
336    protected AttrDefinitionBands getAttrDefinitionBands() {
337        return attrDefinitionBands;
338    }
339
340    protected ClassBands getClassBands() {
341        return classBands;
342    }
343
344    public SegmentConstantPool getConstantPool() {
345        return cpBands.getConstantPool();
346    }
347
348    protected CpBands getCpBands() {
349        return cpBands;
350    }
351
352    protected IcBands getIcBands() {
353        return icBands;
354    }
355
356    public SegmentHeader getSegmentHeader() {
357        return header;
358    }
359
360    public void log(final int logLevel, final String message) {
361        if (this.logLevel >= logLevel) {
362            logStream.println(message);
363        }
364    }
365
366    /**
367     * Override the archive's deflate hint with the given boolean
368     *
369     * @param deflateHint - the deflate hint to use
370     */
371    public void overrideDeflateHint(final boolean deflateHint) {
372        this.overrideDeflateHint = true;
373        this.deflateHint = deflateHint;
374    }
375
376    /**
377     * This performs the actual work of parsing against a non-static instance of Segment. This method is intended to run
378     * concurrently for multiple segments.
379     *
380     * @throws IOException if a problem occurs during reading from the underlying stream
381     * @throws Pack200Exception if a problem occurs with an unexpected value or unsupported codec
382     */
383    private void parseSegment() throws IOException, Pack200Exception {
384
385        header.unpack();
386        cpBands.unpack();
387        attrDefinitionBands.unpack();
388        icBands.unpack();
389        classBands.unpack();
390        bcBands.unpack();
391        fileBands.unpack();
392
393        int classNum = 0;
394        final int numberOfFiles = header.getNumberOfFiles();
395        final String[] fileName = fileBands.getFileName();
396        final int[] fileOptions = fileBands.getFileOptions();
397        final SegmentOptions options = header.getOptions();
398
399        classFilesContents = new byte[numberOfFiles][];
400        fileDeflate = new boolean[numberOfFiles];
401        fileIsClass = new boolean[numberOfFiles];
402
403        final ByteArrayOutputStream bos = new ByteArrayOutputStream();
404        final DataOutputStream dos = new DataOutputStream(bos);
405
406        for (int i = 0; i < numberOfFiles; i++) {
407            String name = fileName[i];
408
409            final boolean nameIsEmpty = (name == null) || name.equals("");
410            final boolean isClass = (fileOptions[i] & 2) == 2 || nameIsEmpty;
411            if (isClass && nameIsEmpty) {
412                name = cpBands.getCpClass()[classBands.getClassThisInts()[classNum]] + ".class";
413                fileName[i] = name;
414            }
415
416            if (!overrideDeflateHint) {
417                fileDeflate[i] = (fileOptions[i] & 1) == 1 || options.shouldDeflate();
418            } else {
419                fileDeflate[i] = deflateHint;
420            }
421
422            fileIsClass[i] = isClass;
423
424            if (isClass) {
425                final ClassFile classFile = buildClassFile(classNum);
426                classFile.write(dos);
427                dos.flush();
428
429                classFilesContents[classNum] = bos.toByteArray();
430                bos.reset();
431
432                classNum++;
433            }
434        }
435    }
436
437    /**
438     * This performs reading the data from the stream into non-static instance of Segment. After the completion of this
439     * method stream can be freed.
440     *
441     * @param in the input stream to read from
442     * @throws IOException if a problem occurs during reading from the underlying stream
443     * @throws Pack200Exception if a problem occurs with an unexpected value or unsupported codec
444     */
445    private void readSegment(final InputStream in) throws IOException, Pack200Exception {
446        log(LOG_LEVEL_VERBOSE, "-------");
447        cpBands = new CpBands(this);
448        cpBands.read(in);
449        attrDefinitionBands = new AttrDefinitionBands(this);
450        attrDefinitionBands.read(in);
451        icBands = new IcBands(this);
452        icBands.read(in);
453        classBands = new ClassBands(this);
454        classBands.read(in);
455        bcBands = new BcBands(this);
456        bcBands.read(in);
457        fileBands = new FileBands(this);
458        fileBands.read(in);
459
460        fileBands.processFileBits();
461    }
462
463    public void setLogLevel(final int logLevel) {
464        this.logLevel = logLevel;
465    }
466
467    public void setLogStream(final OutputStream logStream) {
468        this.logStream = new PrintWriter(new OutputStreamWriter(logStream, Charset.defaultCharset()), false);
469    }
470
471    public void setPreRead(final boolean value) {
472        doPreRead = value;
473    }
474
475    /**
476     * Unpacks a packed stream (either .pack. or .pack.gz) into a corresponding JarOuputStream.
477     *
478     * @param in a packed stream.
479     * @param out output stream.
480     * @throws Pack200Exception if there is a problem unpacking
481     * @throws IOException if there is a problem with I/O during unpacking
482     */
483    public void unpack(final InputStream in, final JarOutputStream out) throws IOException, Pack200Exception {
484        unpackRead(in);
485        unpackProcess();
486        unpackWrite(out);
487    }
488
489    void unpackProcess() throws IOException, Pack200Exception {
490        if (internalBuffer != null) {
491            readSegment(internalBuffer);
492        }
493        parseSegment();
494    }
495
496    /*
497     * Package-private accessors for unpacking stages
498     */
499    void unpackRead(InputStream in) throws IOException, Pack200Exception {
500        if (!in.markSupported()) {
501            in = new BufferedInputStream(in);
502        }
503
504        header = new SegmentHeader(this);
505        header.read(in);
506
507        final int size = (int) header.getArchiveSize() - header.getArchiveSizeOffset();
508
509        if (doPreRead && header.getArchiveSize() != 0) {
510            final byte[] data = new byte[size];
511            in.read(data);
512            internalBuffer = new BufferedInputStream(new ByteArrayInputStream(data));
513        } else {
514            readSegment(in);
515        }
516    }
517
518    void unpackWrite(final JarOutputStream out) throws IOException {
519        writeJar(out);
520        if (logStream != null) {
521            logStream.close();
522        }
523    }
524
525    /**
526     * Writes the segment to an output stream. The output stream should be pre-buffered for efficiency. Also takes the
527     * same input stream for reading, since the file bits may not be loaded and thus just copied from one stream to
528     * another. Doesn't close the output stream when finished, in case there are more entries (e.g. further segments) to
529     * be written.
530     *
531     * @param out the JarOutputStream to write data to
532     * @throws IOException if an error occurs while reading or writing to the streams
533     */
534    public void writeJar(final JarOutputStream out) throws IOException {
535        final String[] fileName = fileBands.getFileName();
536        final int[] fileModtime = fileBands.getFileModtime();
537        final long[] fileSize = fileBands.getFileSize();
538        final byte[][] fileBits = fileBands.getFileBits();
539
540        // now write the files out
541        int classNum = 0;
542        final int numberOfFiles = header.getNumberOfFiles();
543        final long archiveModtime = header.getArchiveModtime();
544
545        for (int i = 0; i < numberOfFiles; i++) {
546            final String name = fileName[i];
547            // For Pack200 archives, modtime is in seconds
548            // from the epoch. JarEntries need it to be in
549            // milliseconds from the epoch.
550            // Even though we're adding two longs and multiplying
551            // by 1000, we won't overflow because both longs are
552            // always under 2^32.
553            final long modtime = 1000 * (archiveModtime + fileModtime[i]);
554            final boolean deflate = fileDeflate[i];
555
556            final JarEntry entry = new JarEntry(name);
557            if (deflate) {
558                entry.setMethod(ZipEntry.DEFLATED);
559            } else {
560                entry.setMethod(ZipEntry.STORED);
561                final CRC32 crc = new CRC32();
562                if (fileIsClass[i]) {
563                    crc.update(classFilesContents[classNum]);
564                    entry.setSize(classFilesContents[classNum].length);
565                } else {
566                    crc.update(fileBits[i]);
567                    entry.setSize(fileSize[i]);
568                }
569                entry.setCrc(crc.getValue());
570            }
571            // On Windows at least, need to correct for timezone
572            entry.setTime(modtime - TimeZone.getDefault().getRawOffset());
573            out.putNextEntry(entry);
574
575            // write to output stream
576            if (fileIsClass[i]) {
577                entry.setSize(classFilesContents[classNum].length);
578                out.write(classFilesContents[classNum]);
579                classNum++;
580            } else {
581                entry.setSize(fileSize[i]);
582                out.write(fileBits[i]);
583            }
584        }
585    }
586
587}