001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.tar;
020
021import java.io.ByteArrayOutputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.UncheckedIOException;
025import java.math.BigInteger;
026import java.nio.ByteBuffer;
027import java.nio.charset.Charset;
028import java.nio.charset.StandardCharsets;
029import java.util.ArrayList;
030import java.util.Collections;
031import java.util.HashMap;
032import java.util.List;
033import java.util.Map;
034
035import org.apache.commons.compress.archivers.zip.ZipEncoding;
036import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
037import org.apache.commons.compress.utils.CharsetNames;
038import org.apache.commons.compress.utils.IOUtils;
039import org.apache.commons.compress.utils.ParsingUtils;
040
041/**
042 * This class provides static utility methods to work with byte streams.
043 *
044 * @Immutable
045 */
046// CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
047public class TarUtils {
048
049    private static final int BYTE_MASK = 255;
050
051    static final ZipEncoding DEFAULT_ENCODING = ZipEncodingHelper.getZipEncoding(Charset.defaultCharset());
052
053    /**
054     * Encapsulates the algorithms used up to Commons Compress 1.3 as ZipEncoding.
055     */
056    static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
057        @Override
058        public boolean canEncode(final String name) {
059            return true;
060        }
061
062        @Override
063        public String decode(final byte[] buffer) {
064            final int length = buffer.length;
065            final StringBuilder result = new StringBuilder(length);
066
067            for (final byte b : buffer) {
068                if (b == 0) { // Trailing null
069                    break;
070                }
071                result.append((char) (b & 0xFF)); // Allow for sign-extension
072            }
073
074            return result.toString();
075        }
076
077        @Override
078        public ByteBuffer encode(final String name) {
079            final int length = name.length();
080            final byte[] buf = new byte[length];
081
082            // copy until end of input or output is reached.
083            for (int i = 0; i < length; ++i) {
084                buf[i] = (byte) name.charAt(i);
085            }
086            return ByteBuffer.wrap(buf);
087        }
088    };
089
090    /**
091     * Computes the checksum of a tar entry header.
092     *
093     * @param buf The tar entry's header buffer.
094     * @return The computed checksum.
095     */
096    public static long computeCheckSum(final byte[] buf) {
097        long sum = 0;
098
099        for (final byte element : buf) {
100            sum += BYTE_MASK & element;
101        }
102
103        return sum;
104    }
105
106    // Helper method to generate the exception message
107    private static String exceptionMessage(final byte[] buffer, final int offset, final int length, final int current, final byte currentByte) {
108        // default charset is good enough for an exception message,
109        //
110        // the alternative was to modify parseOctal and
111        // parseOctalOrBinary to receive the ZipEncoding of the
112        // archive (deprecating the existing public methods, of
113        // course) and dealing with the fact that ZipEncoding#decode
114        // can throw an IOException which parseOctal* doesn't declare
115        String string = new String(buffer, offset, length, Charset.defaultCharset());
116
117        string = string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed
118        return "Invalid byte " + currentByte + " at offset " + (current - offset) + " in '" + string + "' len=" + length;
119    }
120
121    private static void formatBigIntegerBinary(final long value, final byte[] buf, final int offset, final int length, final boolean negative) {
122        final BigInteger val = BigInteger.valueOf(value);
123        final byte[] b = val.toByteArray();
124        final int len = b.length;
125        if (len > length - 1) {
126            throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field.");
127        }
128        final int off = offset + length - len;
129        System.arraycopy(b, 0, buf, off, len);
130        final byte fill = (byte) (negative ? 0xff : 0);
131        for (int i = offset + 1; i < off; i++) {
132            buf[i] = fill;
133        }
134    }
135
136    /**
137     * Writes an octal value into a buffer.
138     *
139     * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by NUL and then
140     * space.
141     *
142     * @param value  The value to convert
143     * @param buf    The destination buffer
144     * @param offset The starting offset into the buffer.
145     * @param length The size of the buffer.
146     * @return The updated value of offset, i.e. offset+length
147     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
148     */
149    public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
150
151        int idx = length - 2; // for NUL and space
152        formatUnsignedOctalString(value, buf, offset, idx);
153
154        buf[offset + idx++] = 0; // Trailing null
155        buf[offset + idx] = (byte) ' '; // Trailing space
156
157        return offset + length;
158    }
159
160    private static void formatLongBinary(final long value, final byte[] buf, final int offset, final int length, final boolean negative) {
161        final int bits = (length - 1) * 8;
162        final long max = 1L << bits;
163        long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE
164        if (val < 0 || val >= max) {
165            throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field.");
166        }
167        if (negative) {
168            val ^= max - 1;
169            val++;
170            val |= 0xffL << bits;
171        }
172        for (int i = offset + length - 1; i >= offset; i--) {
173            buf[i] = (byte) val;
174            val >>= 8;
175        }
176    }
177
178    /**
179     * Writes an octal long integer into a buffer.
180     *
181     * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by a space.
182     *
183     * @param value  The value to write as octal
184     * @param buf    The destinationbuffer.
185     * @param offset The starting offset into the buffer.
186     * @param length The length of the buffer
187     * @return The updated offset
188     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
189     */
190    public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
191
192        final int idx = length - 1; // For space
193
194        formatUnsignedOctalString(value, buf, offset, idx);
195        buf[offset + idx] = (byte) ' '; // Trailing space
196
197        return offset + length;
198    }
199
200    /**
201     * Writes a long integer into a buffer as an octal string if this will fit, or as a binary number otherwise.
202     *
203     * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by a space.
204     *
205     * @param value  The value to write into the buffer.
206     * @param buf    The destination buffer.
207     * @param offset The starting offset into the buffer.
208     * @param length The length of the buffer.
209     * @return The updated offset.
210     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer.
211     * @since 1.4
212     */
213    public static int formatLongOctalOrBinaryBytes(final long value, final byte[] buf, final int offset, final int length) {
214
215        // Check whether we are dealing with UID/GID or SIZE field
216        final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
217
218        final boolean negative = value < 0;
219        if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
220            return formatLongOctalBytes(value, buf, offset, length);
221        }
222
223        if (length < 9) {
224            formatLongBinary(value, buf, offset, length, negative);
225        } else {
226            formatBigIntegerBinary(value, buf, offset, length, negative);
227        }
228
229        buf[offset] = (byte) (negative ? 0xff : 0x80);
230        return offset + length;
231    }
232
233    /**
234     * Copies a name into a buffer. Copies characters from the name into the buffer starting at the specified offset. If the buffer is longer than the name, the
235     * buffer is filled with trailing NULs. If the name is longer than the buffer, the output is truncated.
236     *
237     * @param name   The header name from which to copy the characters.
238     * @param buf    The buffer where the name is to be stored.
239     * @param offset The starting offset into the buffer
240     * @param length The maximum number of header bytes to copy.
241     * @return The updated offset, i.e. offset + length
242     */
243    public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) {
244        try {
245            return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
246        } catch (final IOException ex) { // NOSONAR
247            try {
248                return formatNameBytes(name, buf, offset, length, FALLBACK_ENCODING);
249            } catch (final IOException ex2) {
250                // impossible
251                throw new UncheckedIOException(ex2); // NOSONAR
252            }
253        }
254    }
255
256    /**
257     * Copies a name into a buffer. Copies characters from the name into the buffer starting at the specified offset. If the buffer is longer than the name, the
258     * buffer is filled with trailing NULs. If the name is longer than the buffer, the output is truncated.
259     *
260     * @param name     The header name from which to copy the characters.
261     * @param buf      The buffer where the name is to be stored.
262     * @param offset   The starting offset into the buffer
263     * @param length   The maximum number of header bytes to copy.
264     * @param encoding name of the encoding to use for file names
265     * @since 1.4
266     * @return The updated offset, i.e. offset + length
267     * @throws IOException on error
268     */
269    public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length, final ZipEncoding encoding) throws IOException {
270        int len = name.length();
271        ByteBuffer b = encoding.encode(name);
272        while (b.limit() > length && len > 0) {
273            b = encoding.encode(name.substring(0, --len));
274        }
275        final int limit = b.limit() - b.position();
276        System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
277
278        // Pad any remaining output bytes with NUL
279        for (int i = limit; i < length; ++i) {
280            buf[offset + i] = 0;
281        }
282
283        return offset + length;
284    }
285
286    /**
287     * Writes an octal integer into a buffer.
288     *
289     * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by space and NUL
290     *
291     * @param value  The value to write
292     * @param buf    The buffer to receive the output
293     * @param offset The starting offset into the buffer
294     * @param length The size of the output buffer
295     * @return The updated offset, i.e. offset+length
296     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
297     */
298    public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
299
300        int idx = length - 2; // For space and trailing null
301        formatUnsignedOctalString(value, buf, offset, idx);
302
303        buf[offset + idx++] = (byte) ' '; // Trailing space
304        buf[offset + idx] = 0; // Trailing null
305
306        return offset + length;
307    }
308
309    /**
310     * Fills a buffer with unsigned octal number, padded with leading zeroes.
311     *
312     * @param value  number to convert to octal - treated as unsigned
313     * @param buffer destination buffer
314     * @param offset starting offset in buffer
315     * @param length length of buffer to fill
316     * @throws IllegalArgumentException if the value will not fit in the buffer
317     */
318    public static void formatUnsignedOctalString(final long value, final byte[] buffer, final int offset, final int length) {
319        int remaining = length;
320        remaining--;
321        if (value == 0) {
322            buffer[offset + remaining--] = (byte) '0';
323        } else {
324            long val = value;
325            for (; remaining >= 0 && val != 0; --remaining) {
326                // CheckStyle:MagicNumber OFF
327                buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
328                val = val >>> 3;
329                // CheckStyle:MagicNumber ON
330            }
331            if (val != 0) {
332                throw new IllegalArgumentException(value + "=" + Long.toOctalString(value) + " will not fit in octal number buffer of length " + length);
333            }
334        }
335
336        for (; remaining >= 0; --remaining) { // leading zeros
337            buffer[offset + remaining] = (byte) '0';
338        }
339    }
340
341    private static long parseBinaryBigInteger(final byte[] buffer, final int offset, final int length, final boolean negative) {
342        final byte[] remainder = new byte[length - 1];
343        System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
344        BigInteger val = new BigInteger(remainder);
345        if (negative) {
346            // 2's complement
347            val = val.add(BigInteger.valueOf(-1)).not();
348        }
349        if (val.bitLength() > 63) {
350            throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number" + " exceeds maximum signed long" + " value");
351        }
352        return negative ? -val.longValue() : val.longValue();
353    }
354
355    private static long parseBinaryLong(final byte[] buffer, final int offset, final int length, final boolean negative) {
356        if (length >= 9) {
357            throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number" + " exceeds maximum signed long" + " value");
358        }
359        long val = 0;
360        for (int i = 1; i < length; i++) {
361            val = (val << 8) + (buffer[offset + i] & 0xff);
362        }
363        if (negative) {
364            // 2's complement
365            val--;
366            val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1;
367        }
368        return negative ? -val : val;
369    }
370
371    /**
372     * Parses a boolean byte from a buffer. Leading spaces and NUL are ignored. The buffer may contain trailing spaces or NULs.
373     *
374     * @param buffer The buffer from which to parse.
375     * @param offset The offset into the buffer from which to parse.
376     * @return The boolean value of the bytes.
377     * @throws IllegalArgumentException if an invalid byte is detected.
378     */
379    public static boolean parseBoolean(final byte[] buffer, final int offset) {
380        return buffer[offset] == 1;
381    }
382
383    /**
384     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map GNU.sparse.map Map of non-null data chunks. It is a string
385     * consisting of comma-separated values "offset,size[,offset-1,size-1...]"
386     *
387     * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
388     * @return unmodifiable list of sparse headers parsed from sparse map
389     * @throws IOException Corrupted TAR archive.
390     * @since 1.21
391     */
392    protected static List<TarArchiveStructSparse> parseFromPAX01SparseHeaders(final String sparseMap) throws IOException {
393        final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
394        final String[] sparseHeaderStrings = sparseMap.split(",");
395        if (sparseHeaderStrings.length % 2 == 1) {
396            throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header");
397        }
398
399        for (int i = 0; i < sparseHeaderStrings.length; i += 2) {
400            final long sparseOffset = ParsingUtils.parseLongValue(sparseHeaderStrings[i]);
401            if (sparseOffset < 0) {
402                throw new IOException("Corrupted TAR archive." + " Sparse struct offset contains negative value");
403            }
404            final long sparseNumbytes = ParsingUtils.parseLongValue(sparseHeaderStrings[i + 1]);
405            if (sparseNumbytes < 0) {
406                throw new IOException("Corrupted TAR archive." + " Sparse struct numbytes contains negative value");
407            }
408            sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
409        }
410
411        return Collections.unmodifiableList(sparseHeaders);
412    }
413
414    /**
415     * Parses an entry name from a buffer. Parsing stops when a NUL is found or the buffer length is reached.
416     *
417     * @param buffer The buffer from which to parse.
418     * @param offset The offset into the buffer from which to parse.
419     * @param length The maximum number of bytes to parse.
420     * @return The entry name.
421     */
422    public static String parseName(final byte[] buffer, final int offset, final int length) {
423        try {
424            return parseName(buffer, offset, length, DEFAULT_ENCODING);
425        } catch (final IOException ex) { // NOSONAR
426            try {
427                return parseName(buffer, offset, length, FALLBACK_ENCODING);
428            } catch (final IOException ex2) {
429                // impossible
430                throw new UncheckedIOException(ex2); // NOSONAR
431            }
432        }
433    }
434
435    /**
436     * Parses an entry name from a buffer. Parsing stops when a NUL is found or the buffer length is reached.
437     *
438     * @param buffer   The buffer from which to parse.
439     * @param offset   The offset into the buffer from which to parse.
440     * @param length   The maximum number of bytes to parse.
441     * @param encoding name of the encoding to use for file names
442     * @since 1.4
443     * @return The entry name.
444     * @throws IOException on error
445     */
446    public static String parseName(final byte[] buffer, final int offset, final int length, final ZipEncoding encoding) throws IOException {
447
448        int len = 0;
449        for (int i = offset; len < length && buffer[i] != 0; i++) {
450            len++;
451        }
452        if (len > 0) {
453            final byte[] b = new byte[len];
454            System.arraycopy(buffer, offset, b, 0, len);
455            return encoding.decode(b);
456        }
457        return "";
458    }
459
460    /**
461     * Parses an octal string from a buffer.
462     *
463     * <p>
464     * Leading spaces are ignored. The buffer must contain a trailing space or NUL, and may contain an additional trailing space or NUL.
465     * </p>
466     *
467     * <p>
468     * The input buffer is allowed to contain all NULs, in which case the method returns 0L (this allows for missing fields).
469     * </p>
470     *
471     * <p>
472     * To work-around some tar implementations that insert a leading NUL this method returns 0 if it detects a leading NUL since Commons Compress 1.4.
473     * </p>
474     *
475     * @param buffer The buffer from which to parse.
476     * @param offset The offset into the buffer from which to parse.
477     * @param length The maximum number of bytes to parse - must be at least 2 bytes.
478     * @return The long value of the octal string.
479     * @throws IllegalArgumentException if the trailing space/NUL is missing or if an invalid byte is detected.
480     */
481    public static long parseOctal(final byte[] buffer, final int offset, final int length) {
482        long result = 0;
483        int end = offset + length;
484        int start = offset;
485
486        if (length < 2) {
487            throw new IllegalArgumentException("Length " + length + " must be at least 2");
488        }
489
490        if (buffer[start] == 0) {
491            return 0L;
492        }
493
494        // Skip leading spaces
495        while (start < end) {
496            if (buffer[start] != ' ') {
497                break;
498            }
499            start++;
500        }
501
502        // Trim all trailing NULs and spaces.
503        // The ustar and POSIX tar specs require a trailing NUL or
504        // space but some implementations use the extra digit for big
505        // sizes/uids/gids ...
506        byte trailer = buffer[end - 1];
507        while (start < end && (trailer == 0 || trailer == ' ')) {
508            end--;
509            trailer = buffer[end - 1];
510        }
511
512        for (; start < end; start++) {
513            final byte currentByte = buffer[start];
514            // CheckStyle:MagicNumber OFF
515            if (currentByte < '0' || currentByte > '7') {
516                throw new IllegalArgumentException(exceptionMessage(buffer, offset, length, start, currentByte));
517            }
518            result = (result << 3) + (currentByte - '0'); // convert from ASCII
519            // CheckStyle:MagicNumber ON
520        }
521
522        return result;
523    }
524
525    /**
526     * Computes the value contained in a byte buffer. If the most significant bit of the first byte in the buffer is set, this bit is ignored and the rest of
527     * the buffer is interpreted as a binary number. Otherwise, the buffer is interpreted as an octal number as per the parseOctal function above.
528     *
529     * @param buffer The buffer from which to parse.
530     * @param offset The offset into the buffer from which to parse.
531     * @param length The maximum number of bytes to parse.
532     * @return The long value of the octal or binary string.
533     * @throws IllegalArgumentException if the trailing space/NUL is missing or an invalid byte is detected in an octal number, or if a binary number would
534     *                                  exceed the size of a signed long 64-bit integer.
535     * @since 1.4
536     */
537    public static long parseOctalOrBinary(final byte[] buffer, final int offset, final int length) {
538
539        if ((buffer[offset] & 0x80) == 0) {
540            return parseOctal(buffer, offset, length);
541        }
542        final boolean negative = buffer[offset] == (byte) 0xff;
543        if (length < 9) {
544            return parseBinaryLong(buffer, offset, length, negative);
545        }
546        return parseBinaryBigInteger(buffer, offset, length, negative);
547    }
548
549    /**
550     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
551     *
552     * <p>
553     * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
554     * </p>
555     * <p>
556     * Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You should use
557     * {@link #parseFromPAX01SparseHeaders} directly instead.
558     * </p>
559     *
560     * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
561     * @return sparse headers parsed from sparse map
562     * @deprecated use #parseFromPAX01SparseHeaders instead
563     */
564    @Deprecated
565    protected static List<TarArchiveStructSparse> parsePAX01SparseHeaders(final String sparseMap) {
566        try {
567            return parseFromPAX01SparseHeaders(sparseMap);
568        } catch (final IOException ex) {
569            throw new UncheckedIOException(ex.getMessage(), ex);
570        }
571    }
572
573    /**
574     * For PAX Format 1.X: The sparse map itself is stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers
575     * delimited by newlines. The map is padded with nulls to the nearest block boundary. The first number gives the number of entries in the map. Following are
576     * map entries, each one consisting of two numbers giving the offset and size of the data block it describes.
577     *
578     * @param inputStream parsing source.
579     * @param recordSize  The size the TAR header
580     * @return sparse headers
581     * @throws IOException if an I/O error occurs.
582     */
583    protected static List<TarArchiveStructSparse> parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException {
584        // for 1.X PAX Headers
585        final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
586        long bytesRead = 0;
587
588        long[] readResult = readLineOfNumberForPax1X(inputStream);
589        long sparseHeadersCount = readResult[0];
590        if (sparseHeadersCount < 0) {
591            // overflow while reading number?
592            throw new IOException("Corrupted TAR archive. Negative value in sparse headers block");
593        }
594        bytesRead += readResult[1];
595        while (sparseHeadersCount-- > 0) {
596            readResult = readLineOfNumberForPax1X(inputStream);
597            final long sparseOffset = readResult[0];
598            if (sparseOffset < 0) {
599                throw new IOException("Corrupted TAR archive." + " Sparse header block offset contains negative value");
600            }
601            bytesRead += readResult[1];
602
603            readResult = readLineOfNumberForPax1X(inputStream);
604            final long sparseNumbytes = readResult[0];
605            if (sparseNumbytes < 0) {
606                throw new IOException("Corrupted TAR archive." + " Sparse header block numbytes contains negative value");
607            }
608            bytesRead += readResult[1];
609            sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
610        }
611
612        // skip the rest of this record data
613        final long bytesToSkip = recordSize - bytesRead % recordSize;
614        org.apache.commons.io.IOUtils.skip(inputStream, bytesToSkip);
615        return sparseHeaders;
616    }
617
618    /**
619     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like:
620     *
621     * <pre>
622     * GNU.sparse.size=size
623     * GNU.sparse.numblocks=numblocks
624     * repeat numblocks times
625     *   GNU.sparse.offset=offset
626     *   GNU.sparse.numbytes=numbytes
627     * end repeat
628     * </pre>
629     * <p>
630     * For PAX Format 0.1, the sparse headers are stored in a single variable: GNU.sparse.map
631     * </p>
632     * <p>
633     * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
634     * </p>
635     *
636     * @param inputStream      input stream to read keys and values
637     * @param sparseHeaders    used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times, the sparse headers need to be stored in an array, not a map
638     * @param globalPaxHeaders global PAX headers of the tar archive
639     * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry.
640     * @throws IOException if an I/O error occurs.
641     * @deprecated use the four-arg version instead
642     */
643    @Deprecated
644    protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders,
645            final Map<String, String> globalPaxHeaders) throws IOException {
646        return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1);
647    }
648
649    /**
650     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like:
651     *
652     * <pre>
653     * GNU.sparse.size=size
654     * GNU.sparse.numblocks=numblocks
655     * repeat numblocks times
656     *   GNU.sparse.offset=offset
657     *   GNU.sparse.numbytes=numbytes
658     * end repeat
659     * </pre>
660     * <p>
661     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
662     * </p>
663     * <p>
664     * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
665     * </p>
666     *
667     * @param inputStream      input stream to read keys and values
668     * @param sparseHeaders    used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times, the sparse headers need to be stored in an array, not a map
669     * @param globalPaxHeaders global PAX headers of the tar archive
670     * @param headerSize       total size of the PAX header, will be ignored if negative
671     * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry.
672     * @throws IOException if an I/O error occurs.
673     * @since 1.21
674     */
675    protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders,
676            final Map<String, String> globalPaxHeaders, final long headerSize) throws IOException {
677        final Map<String, String> headers = new HashMap<>(globalPaxHeaders);
678        Long offset = null;
679        // Format is "length keyword=value\n";
680        int totalRead = 0;
681        while (true) { // get length
682            int ch;
683            int len = 0;
684            int read = 0;
685            while ((ch = inputStream.read()) != -1) {
686                read++;
687                totalRead++;
688                if (ch == '\n') { // blank line in header
689                    break;
690                }
691                if (ch == ' ') { // End of length string
692                    // Get keyword
693                    final ByteArrayOutputStream coll = new ByteArrayOutputStream();
694                    while ((ch = inputStream.read()) != -1) {
695                        read++;
696                        totalRead++;
697                        if (totalRead < 0 || headerSize >= 0 && totalRead >= headerSize) {
698                            break;
699                        }
700                        if (ch == '=') { // end of keyword
701                            final String keyword = coll.toString(CharsetNames.UTF_8);
702                            // Get rest of entry
703                            final int restLen = len - read;
704                            if (restLen <= 1) { // only NL
705                                headers.remove(keyword);
706                            } else if (headerSize >= 0 && restLen > headerSize - totalRead) {
707                                throw new IOException("Paxheader value size " + restLen + " exceeds size of header record");
708                            } else {
709                                final byte[] rest = IOUtils.readRange(inputStream, restLen);
710                                final int got = rest.length;
711                                if (got != restLen) {
712                                    throw new IOException("Failed to read " + "Paxheader. Expected " + restLen + " bytes, read " + got);
713                                }
714                                totalRead += restLen;
715                                // Drop trailing NL
716                                if (rest[restLen - 1] != '\n') {
717                                    throw new IOException("Failed to read Paxheader." + "Value should end with a newline");
718                                }
719                                final String value = new String(rest, 0, restLen - 1, StandardCharsets.UTF_8);
720                                headers.put(keyword, value);
721
722                                // for 0.0 PAX Headers
723                                if (keyword.equals(TarGnuSparseKeys.OFFSET)) {
724                                    if (offset != null) {
725                                        // previous GNU.sparse.offset header but no numBytes
726                                        sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
727                                    }
728                                    try {
729                                        offset = Long.valueOf(value);
730                                    } catch (final NumberFormatException ex) {
731                                        throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains a non-numeric value");
732                                    }
733                                    if (offset < 0) {
734                                        throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains negative value");
735                                    }
736                                }
737
738                                // for 0.0 PAX Headers
739                                if (keyword.equals(TarGnuSparseKeys.NUMBYTES)) {
740                                    if (offset == null) {
741                                        throw new IOException(
742                                                "Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " is expected before GNU.sparse.numbytes shows up.");
743                                    }
744                                    final long numbytes = ParsingUtils.parseLongValue(value);
745                                    if (numbytes < 0) {
746                                        throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.NUMBYTES + " contains negative value");
747                                    }
748                                    sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes));
749                                    offset = null;
750                                }
751                            }
752                            break;
753                        }
754                        coll.write((byte) ch);
755                    }
756                    break; // Processed single header
757                }
758
759                // COMPRESS-530 : throw if we encounter a non-number while reading length
760                if (ch < '0' || ch > '9') {
761                    throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length");
762                }
763
764                len *= 10;
765                len += ch - '0';
766            }
767            if (ch == -1) { // EOF
768                break;
769            }
770        }
771        if (offset != null) {
772            // offset but no numBytes
773            sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
774        }
775        return headers;
776    }
777
778    /**
779     * Parses the content of a PAX 1.0 sparse block.
780     *
781     * @since 1.20
782     * @param buffer The buffer from which to parse.
783     * @param offset The offset into the buffer from which to parse.
784     * @return a parsed sparse struct
785     */
786    public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) {
787        final long sparseOffset = parseOctalOrBinary(buffer, offset, TarConstants.SPARSE_OFFSET_LEN);
788        final long sparseNumbytes = parseOctalOrBinary(buffer, offset + TarConstants.SPARSE_OFFSET_LEN, TarConstants.SPARSE_NUMBYTES_LEN);
789
790        return new TarArchiveStructSparse(sparseOffset, sparseNumbytes);
791    }
792
793    /**
794     * For 1.X PAX Format, the sparse headers are stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers
795     * delimited by newlines.
796     *
797     * @param inputStream the input stream of the tar file
798     * @return the decimal number delimited by '\n', and the bytes read from input stream
799     * @throws IOException
800     */
801    private static long[] readLineOfNumberForPax1X(final InputStream inputStream) throws IOException {
802        int number;
803        long result = 0;
804        long bytesRead = 0;
805
806        while ((number = inputStream.read()) != '\n') {
807            bytesRead += 1;
808            if (number == -1) {
809                throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format");
810            }
811            if (number < '0' || number > '9') {
812                throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block");
813            }
814            result = result * 10 + (number - '0');
815        }
816        bytesRead += 1;
817
818        return new long[] { result, bytesRead };
819    }
820
821    /**
822     * @since 1.21
823     */
824    static List<TarArchiveStructSparse> readSparseStructs(final byte[] buffer, final int offset, final int entries) throws IOException {
825        final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
826        for (int i = 0; i < entries; i++) {
827            try {
828                final TarArchiveStructSparse sparseHeader = parseSparse(buffer,
829                        offset + i * (TarConstants.SPARSE_OFFSET_LEN + TarConstants.SPARSE_NUMBYTES_LEN));
830
831                if (sparseHeader.getOffset() < 0) {
832                    throw new IOException("Corrupted TAR archive, sparse entry with negative offset");
833                }
834                if (sparseHeader.getNumbytes() < 0) {
835                    throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes");
836                }
837                sparseHeaders.add(sparseHeader);
838            } catch (final IllegalArgumentException ex) {
839                // thrown internally by parseOctalOrBinary
840                throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex);
841            }
842        }
843        return Collections.unmodifiableList(sparseHeaders);
844    }
845
846    /**
847     * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(computing)#File_header">says</a>: <blockquote> The checksum is calculated by taking the sum of the
848     * unsigned byte values of the header block with the eight checksum bytes taken to be ASCII spaces (decimal value 32). It is stored as a six digit octal
849     * number with leading zeroes followed by a NUL and then a space. Various implementations do not adhere to this format. For better compatibility, ignore
850     * leading and trailing whitespace, and get the first six digits. In addition, some historic tar implementations treated bytes as signed. Implementations
851     * typically calculate the checksum both ways, and treat it as good if either the signed or unsigned sum matches the included checksum. </blockquote>
852     * <p>
853     * The return value of this method should be treated as a best-effort heuristic rather than an absolute and final truth. The checksum verification logic may
854     * well evolve over time as more special cases are encountered.
855     * </p>
856     *
857     * @param header tar header
858     * @return whether the checksum is reasonably good
859     * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
860     * @since 1.5
861     */
862    public static boolean verifyCheckSum(final byte[] header) {
863        final long storedSum = parseOctal(header, TarConstants.CHKSUM_OFFSET, TarConstants.CHKSUMLEN);
864        long unsignedSum = 0;
865        long signedSum = 0;
866
867        for (int i = 0; i < header.length; i++) {
868            byte b = header[i];
869            if (TarConstants.CHKSUM_OFFSET <= i && i < TarConstants.CHKSUM_OFFSET + TarConstants.CHKSUMLEN) {
870                b = ' ';
871            }
872            unsignedSum += 0xff & b;
873            signedSum += b;
874        }
875        return storedSum == unsignedSum || storedSum == signedSum;
876    }
877
878    /** Prevents instantiation. */
879    private TarUtils() {
880    }
881
882}