001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 */
017package org.apache.commons.compress.harmony.pack200;
018
019import java.io.EOFException;
020import java.io.IOException;
021import java.io.InputStream;
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.List;
025
026import org.apache.commons.compress.utils.ExactMath;
027
028/**
029 * A BHSD codec is a means of encoding integer values as a sequence of bytes or vice versa using a specified "BHSD"
030 * encoding mechanism. It uses a variable-length encoding and a modified sign representation such that small numbers are
031 * represented as a single byte, whilst larger numbers take more bytes to encode. The number may be signed or unsigned;
032 * if it is unsigned, it can be weighted towards positive numbers or equally distributed using a one's complement. The
033 * Codec also supports delta coding, where a sequence of numbers is represented as a series of first-order differences.
034 * So a delta encoding of the integers [1..10] would be represented as a sequence of 10x1s. This allows the absolute
035 * value of a coded integer to fall outside of the 'small number' range, whilst still being encoded as a single byte.
036 *
037 * A BHSD codec is configured with four parameters:
038 * <dl>
039 * <dt>B</dt>
040 * <dd>The maximum number of bytes that each value is encoded as. B must be a value between [1..5]. For a pass-through
041 * coding (where each byte is encoded as itself, aka {@link #BYTE1}, B is 1 (each byte takes a maximum of 1 byte).</dd>
042 * <dt>H</dt>
043 * <dd>The radix of the integer. Values are defined as a sequence of values, where value {@code n} is multiplied by
044 * {@code H^<sup>n</sup>}. So the number 1234 may be represented as the sequence 4 3 2 1 with a radix (H) of 10.
045 * Note that other permutations are also possible; 43 2 1 will also encode 1234. The co-parameter L is defined as 256-H.
046 * This is important because only the last value in a sequence may be &lt; L; all prior values must be &gt; L.</dd>
047 * <dt>S</dt>
048 * <dd>Whether the codec represents signed values (or not). This may have 3 values; 0 (unsigned), 1 (signed, one's
049 * complement) or 2 (signed, two's complement)</dd>
050 * <dt>D</dt>
051 * <dd>Whether the codec represents a delta encoding. This may be 0 (no delta) or 1 (delta encoding). A delta encoding
052 * of 1 indicates that values are cumulative; a sequence of {@code 1 1 1 1 1} will represent the sequence
053 * {@code 1 2 3 4 5}. For this reason, the codec supports two variants of decode; one
054 * {@link #decode(InputStream, long) with} and one {@link #decode(InputStream) without} a {@code last} parameter.
055 * If the codec is a non-delta encoding, then the value is ignored if passed. If the codec is a delta encoding, it is a
056 * run-time error to call the value without the extra parameter, and the previous value should be returned. (It was
057 * designed this way to support multi-threaded access without requiring a new instance of the Codec to be cloned for
058 * each use.)
059 * </dd>
060 * </dl>
061 *
062 * Codecs are notated as (B,H,S,D) and either D or S,D may be omitted if zero. Thus {@link #BYTE1} is denoted
063 * (1,256,0,0) or (1,256). The {@link #toString()} method prints out the condensed form of the encoding. Often, the last
064 * character in the name ({@link #BYTE1}, {@link #UNSIGNED5}) gives a clue as to the B value. Those that start with U
065 * ({@link #UDELTA5}, {@link #UNSIGNED5}) are unsigned; otherwise, in most cases, they are signed. The presence of the
066 * word Delta ({@link #DELTA5}, {@link #UDELTA5}) indicates a delta encoding is used.
067 *
068 */
069public final class BHSDCodec extends Codec {
070
071    /**
072     * The maximum number of bytes in each coding word
073     */
074    private final int b;
075
076    /**
077     * Whether delta encoding is used (0=false,1=true)
078     */
079    private final int d;
080
081    /**
082     * The radix of the encoding
083     */
084    private final int h;
085
086    /**
087     * The co-parameter of h; 256-h
088     */
089    private final int l;
090
091    /**
092     * Represents signed numbers or not (0=unsigned,1/2=signed)
093     */
094    private final int s;
095
096    private long cardinality;
097
098    private final long smallest;
099
100    private final long largest;
101
102    /**
103     * radix^i powers
104     */
105    private final long[] powers;
106
107    /**
108     * Constructs an unsigned, non-delta Codec with the given B and H values.
109     *
110     * @param b the maximum number of bytes that a value can be encoded as [1..5]
111     * @param h the radix of the encoding [1..256]
112     */
113    public BHSDCodec(final int b, final int h) {
114        this(b, h, 0, 0);
115    }
116
117    /**
118     * Constructs a non-delta Codec with the given B, H and S values.
119     *
120     * @param b the maximum number of bytes that a value can be encoded as [1..5]
121     * @param h the radix of the encoding [1..256]
122     * @param s whether the encoding represents signed numbers (s=0 is unsigned; s=1 is signed with 1s complement; s=2
123     *        is signed with ?)
124     */
125    public BHSDCodec(final int b, final int h, final int s) {
126        this(b, h, s, 0);
127    }
128
129    /**
130     * Constructs a Codec with the given B, H, S and D values.
131     *
132     * @param b the maximum number of bytes that a value can be encoded as [1..5]
133     * @param h the radix of the encoding [1..256]
134     * @param s whether the encoding represents signed numbers (s=0 is unsigned; s=1 is signed with 1s complement; s=2
135     *        is signed with ?)
136     * @param d whether this is a delta encoding (d=0 is non-delta; d=1 is delta)
137     */
138    public BHSDCodec(final int b, final int h, final int s, final int d) {
139        if (b < 1 || b > 5) {
140            throw new IllegalArgumentException("1<=b<=5");
141        }
142        if (h < 1 || h > 256) {
143            throw new IllegalArgumentException("1<=h<=256");
144        }
145        if (s < 0 || s > 2) {
146            throw new IllegalArgumentException("0<=s<=2");
147        }
148        if (d < 0 || d > 1) {
149            throw new IllegalArgumentException("0<=d<=1");
150        }
151        if (b == 1 && h != 256) {
152            throw new IllegalArgumentException("b=1 -> h=256");
153        }
154        if (h == 256 && b == 5) {
155            throw new IllegalArgumentException("h=256 -> b!=5");
156        }
157        this.b = b;
158        this.h = h;
159        this.s = s;
160        this.d = d;
161        this.l = 256 - h;
162        if (h == 1) {
163            cardinality = b * 255 + 1;
164        } else {
165            cardinality = (long) ((long) (l * (1 - Math.pow(h, b)) / (1 - h)) + Math.pow(h, b));
166        }
167        smallest = calculateSmallest();
168        largest = calculateLargest();
169
170        powers = new long[b];
171        Arrays.setAll(powers, c -> (long) Math.pow(h, c));
172    }
173
174    private long calculateLargest() {
175        long result;
176        // TODO This can probably be optimized into a better mathematical
177        // statement
178        if (d == 1) {
179            final BHSDCodec bh0 = new BHSDCodec(b, h);
180            return bh0.largest();
181        }
182        switch (s) {
183        case 0:
184            result = cardinality() - 1;
185            break;
186        case 1:
187            result = cardinality() / 2 - 1;
188            break;
189        case 2:
190            result = (3L * cardinality()) / 4 - 1;
191            break;
192        default:
193            throw new Error("Unknown s value");
194        }
195        return Math.min((s == 0 ? ((long) Integer.MAX_VALUE) << 1 : Integer.MAX_VALUE) - 1, result);
196    }
197
198    private long calculateSmallest() {
199        long result;
200        if (d == 1 || !isSigned()) {
201            if (cardinality >= 4294967296L) { // 2^32
202                result = Integer.MIN_VALUE;
203            } else {
204                result = 0;
205            }
206        } else {
207            result = Math.max(Integer.MIN_VALUE, -cardinality() / (1 << s));
208        }
209        return result;
210    }
211
212    /**
213     * Returns the cardinality of this codec; that is, the number of distinct values that it can contain.
214     *
215     * @return the cardinality of this codec
216     */
217    public long cardinality() {
218        return cardinality;
219    }
220
221    @Override
222    public int decode(final InputStream in) throws IOException, Pack200Exception {
223        if (d != 0) {
224            throw new Pack200Exception("Delta encoding used without passing in last value; this is a coding error");
225        }
226        return decode(in, 0);
227    }
228
229    @Override
230    public int decode(final InputStream in, final long last) throws IOException, Pack200Exception {
231        int n = 0;
232        long z = 0;
233        long x = 0;
234
235        do {
236            x = in.read();
237            lastBandLength++;
238            z += x * powers[n];
239            n++;
240        } while (x >= l && n < b);
241
242        if (x == -1) {
243            throw new EOFException("End of stream reached whilst decoding");
244        }
245
246        if (isSigned()) {
247            final int u = ((1 << s) - 1);
248            if ((z & u) == u) {
249                z = z >>> s ^ -1L;
250            } else {
251                z = z - (z >>> s);
252            }
253        }
254        // This algorithm does the same thing, but is probably slower. Leaving
255        // in for now for readability
256        // if (isSigned()) {
257        // long u = z;
258        // long twoPowS = (long)Math.pow(2, s);
259        // double twoPowSMinusOne = twoPowS-1;
260        // if (u % twoPowS < twoPowSMinusOne) {
261        // if (cardinality < Math.pow(2, 32)) {
262        // z = (long) (u - (Math.floor(u/ twoPowS)));
263        // } else {
264        // z = cast32((long) (u - (Math.floor(u/ twoPowS))));
265        // }
266        // } else {
267        // z = (long) (-Math.floor(u/ twoPowS) - 1);
268        // }
269        // }
270        if (isDelta()) {
271            z += last;
272        }
273        return (int) z;
274    }
275
276    // private long cast32(long u) {
277    // u = (long) ((long) ((u + Math.pow(2, 31)) % Math.pow(2, 32)) -
278    // Math.pow(2, 31));
279    // return u;
280    // }
281
282    @Override
283    public int[] decodeInts(final int n, final InputStream in) throws IOException, Pack200Exception {
284        final int[] band = super.decodeInts(n, in);
285        if (isDelta()) {
286            for (int i = 0; i < band.length; i++) {
287                while (band[i] > largest) {
288                    band[i] -= cardinality;
289                }
290                while (band[i] < smallest) {
291                    band[i] = ExactMath.add(band[i], cardinality);
292                }
293            }
294        }
295        return band;
296    }
297
298    @Override
299    public int[] decodeInts(final int n, final InputStream in, final int firstValue)
300        throws IOException, Pack200Exception {
301        final int[] band = super.decodeInts(n, in, firstValue);
302        if (isDelta()) {
303            for (int i = 0; i < band.length; i++) {
304                while (band[i] > largest) {
305                    band[i] -= cardinality;
306                }
307                while (band[i] < smallest) {
308                    band[i] = ExactMath.add(band[i], cardinality);
309                }
310            }
311        }
312        return band;
313    }
314
315    @Override
316    public byte[] encode(final int value) throws Pack200Exception {
317        return encode(value, 0);
318    }
319
320    @Override
321    public byte[] encode(final int value, final int last) throws Pack200Exception {
322        if (!encodes(value)) {
323            throw new Pack200Exception("The codec " + this + " does not encode the value " + value);
324        }
325
326        long z = value;
327        if (isDelta()) {
328            z -= last;
329        }
330        if (isSigned()) {
331            if (z < Integer.MIN_VALUE) {
332                z += 4294967296L;
333            } else if (z > Integer.MAX_VALUE) {
334                z -= 4294967296L;
335            }
336            if (z < 0) {
337                z = (-z << s) - 1;
338            } else if (s == 1) {
339                z = z << s;
340            } else {
341                z += (z - z % 3) / 3;
342            }
343        } else if (z < 0) {
344            // Need to use integer overflow here to represent negatives.
345            // 4294967296L is the 1 << 32.
346            z += Math.min(cardinality, 4294967296L);
347        }
348        if (z < 0) {
349            throw new Pack200Exception("unable to encode");
350        }
351
352        final List<Byte> byteList = new ArrayList<>();
353        for (int n = 0; n < b; n++) {
354            long byteN;
355            if (z < l) {
356                byteN = z;
357            } else {
358                byteN = z % h;
359                while (byteN < l) {
360                    byteN += h;
361                }
362            }
363            byteList.add(Byte.valueOf((byte) byteN));
364            if (byteN < l) {
365                break;
366            }
367            z -= byteN;
368            z /= h;
369        }
370        final byte[] bytes = new byte[byteList.size()];
371        for (int i = 0; i < bytes.length; i++) {
372            bytes[i] = byteList.get(i).byteValue();
373        }
374        return bytes;
375    }
376
377    /**
378     * True if this encoding can code the given value
379     *
380     * @param value the value to check
381     * @return {@code true} if the encoding can encode this value
382     */
383    public boolean encodes(final long value) {
384        return value >= smallest && value <= largest;
385    }
386
387    @Override
388    public boolean equals(final Object o) {
389        if (o instanceof BHSDCodec) {
390            final BHSDCodec codec = (BHSDCodec) o;
391            return codec.b == b && codec.h == h && codec.s == s && codec.d == d;
392        }
393        return false;
394    }
395
396    /**
397     * @return the b
398     */
399    public int getB() {
400        return b;
401    }
402
403    /**
404     * @return the h
405     */
406    public int getH() {
407        return h;
408    }
409
410    /**
411     * @return the l
412     */
413    public int getL() {
414        return l;
415    }
416
417    /**
418     * @return the s
419     */
420    public int getS() {
421        return s;
422    }
423
424    @Override
425    public int hashCode() {
426        return ((b * 37 + h) * 37 + s) * 37 + d;
427    }
428
429    /**
430     * Returns true if this codec is a delta codec
431     *
432     * @return true if this codec is a delta codec
433     */
434    public boolean isDelta() {
435        return d != 0;
436    }
437
438    /**
439     * Returns true if this codec is a signed codec
440     *
441     * @return true if this codec is a signed codec
442     */
443    public boolean isSigned() {
444        return s != 0;
445    }
446
447    /**
448     * Returns the largest value that this codec can represent.
449     *
450     * @return the largest value that this codec can represent.
451     */
452    public long largest() {
453        return largest;
454    }
455
456    /**
457     * Returns the smallest value that this codec can represent.
458     *
459     * @return the smallest value that this codec can represent.
460     */
461    public long smallest() {
462        return smallest;
463    }
464
465    /**
466     * Returns the codec in the form (1,256) or (1,64,1,1). Note that trailing zero fields are not shown.
467     */
468    @Override
469    public String toString() {
470        final StringBuilder buffer = new StringBuilder(11);
471        buffer.append('(');
472        buffer.append(b);
473        buffer.append(',');
474        buffer.append(h);
475        if (s != 0 || d != 0) {
476            buffer.append(',');
477            buffer.append(s);
478        }
479        if (d != 0) {
480            buffer.append(',');
481            buffer.append(d);
482        }
483        buffer.append(')');
484        return buffer.toString();
485    }
486}