001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.bzip2; 020 021import java.io.IOException; 022import java.io.OutputStream; 023import java.util.Arrays; 024 025import org.apache.commons.compress.compressors.CompressorOutputStream; 026 027/** 028 * An output stream that compresses into the BZip2 format into another stream. 029 * 030 * <p> 031 * The compression requires large amounts of memory. Thus you should call the {@link #close() close()} method as soon as possible, to force 032 * {@code BZip2CompressorOutputStream} to release the allocated memory. 033 * </p> 034 * 035 * <p> 036 * You can shrink the amount of allocated memory and maybe raise the compression speed by choosing a lower blocksize, which in turn may cause a lower 037 * compression ratio. You can avoid unnecessary memory allocation by avoiding using a blocksize which is bigger than the size of the input. 038 * </p> 039 * 040 * <p> 041 * You can compute the memory usage for compressing by the following formula: 042 * </p> 043 * 044 * <pre> 045 * <code>400k + (9 * blocksize)</code>. 046 * </pre> 047 * 048 * <p> 049 * To get the memory required for decompression by {@link BZip2CompressorInputStream} use 050 * </p> 051 * 052 * <pre> 053 * <code>65k + (5 * blocksize)</code>. 054 * </pre> 055 * 056 * <table style="width:100%" border="1"> 057 * <caption>Memory usage by blocksize</caption> 058 * <tr> 059 * <th colspan="3">Memory usage by blocksize</th> 060 * </tr> 061 * <tr> 062 * <th style="text-align: right">Blocksize</th> 063 * <th style="text-align: right">Compression<br> 064 * memory usage</th> 065 * <th style="text-align: right">Decompression<br> 066 * memory usage</th> 067 * </tr> 068 * <tr> 069 * <td style="text-align: right">100k</td> 070 * <td style="text-align: right">1300k</td> 071 * <td style="text-align: right">565k</td> 072 * </tr> 073 * <tr> 074 * <td style="text-align: right">200k</td> 075 * <td style="text-align: right">2200k</td> 076 * <td style="text-align: right">1065k</td> 077 * </tr> 078 * <tr> 079 * <td style="text-align: right">300k</td> 080 * <td style="text-align: right">3100k</td> 081 * <td style="text-align: right">1565k</td> 082 * </tr> 083 * <tr> 084 * <td style="text-align: right">400k</td> 085 * <td style="text-align: right">4000k</td> 086 * <td style="text-align: right">2065k</td> 087 * </tr> 088 * <tr> 089 * <td style="text-align: right">500k</td> 090 * <td style="text-align: right">4900k</td> 091 * <td style="text-align: right">2565k</td> 092 * </tr> 093 * <tr> 094 * <td style="text-align: right">600k</td> 095 * <td style="text-align: right">5800k</td> 096 * <td style="text-align: right">3065k</td> 097 * </tr> 098 * <tr> 099 * <td style="text-align: right">700k</td> 100 * <td style="text-align: right">6700k</td> 101 * <td style="text-align: right">3565k</td> 102 * </tr> 103 * <tr> 104 * <td style="text-align: right">800k</td> 105 * <td style="text-align: right">7600k</td> 106 * <td style="text-align: right">4065k</td> 107 * </tr> 108 * <tr> 109 * <td style="text-align: right">900k</td> 110 * <td style="text-align: right">8500k</td> 111 * <td style="text-align: right">4565k</td> 112 * </tr> 113 * </table> 114 * 115 * <p> 116 * For decompression {@code BZip2CompressorInputStream} allocates less memory if the bzipped input is smaller than one block. 117 * </p> 118 * 119 * <p> 120 * Instances of this class are not threadsafe. 121 * </p> 122 * 123 * <p> 124 * TODO: Update to BZip2 1.0.1 125 * </p> 126 * 127 * @NotThreadSafe 128 */ 129public class BZip2CompressorOutputStream extends CompressorOutputStream implements BZip2Constants { 130 131 static final class Data { 132 133 // with blockSize 900k 134 /* maps unsigned byte => "does it occur in block" */ 135 final boolean[] inUse = new boolean[256]; // 256 byte 136 final byte[] unseqToSeq = new byte[256]; // 256 byte 137 final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte 138 final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte 139 final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte 140 141 final byte[] generateMTFValues_yy = new byte[256]; // 256 byte 142 final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548 143 // byte 144 final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 145 // byte 146 final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte 147 final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte 148 final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 149 // byte 150 final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte 151 final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte 152 153 final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte 154 final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte 155 final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte 156 157 // ------------ 158 // 333408 byte 159 160 /* 161 * holds the RLEd block of original data starting at index 1. After sorting the last byte added to the buffer is at index 0. 162 */ 163 final byte[] block; // 900021 byte 164 /* 165 * maps index in Burrows-Wheeler transformed block => index of byte in original block 166 */ 167 final int[] fmap; // 3600000 byte 168 final char[] sfmap; // 3600000 byte 169 // ------------ 170 // 8433529 byte 171 // ============ 172 173 /** 174 * Index of original line in Burrows-Wheeler table. 175 * 176 * <p> 177 * This is the index in fmap that points to the last byte of the original data. 178 * </p> 179 */ 180 int origPtr; 181 182 Data(final int blockSize100k) { 183 final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE; 184 this.block = new byte[n + 1 + NUM_OVERSHOOT_BYTES]; 185 this.fmap = new int[n]; 186 this.sfmap = new char[2 * n]; 187 } 188 189 } 190 191 /** 192 * The minimum supported blocksize {@code == 1}. 193 */ 194 public static final int MIN_BLOCKSIZE = 1; 195 196 /** 197 * The maximum supported blocksize {@code == 9}. 198 */ 199 public static final int MAX_BLOCKSIZE = 9; 200 private static final int GREATER_ICOST = 15; 201 202 private static final int LESSER_ICOST = 0; 203 204 /** 205 * Chooses a blocksize based on the given length of the data to compress. 206 * 207 * @return The blocksize, between {@link #MIN_BLOCKSIZE} and {@link #MAX_BLOCKSIZE} both inclusive. For a negative {@code inputLength} this method returns 208 * {@code MAX_BLOCKSIZE} always. 209 * 210 * @param inputLength The length of the data which will be compressed by {@code BZip2CompressorOutputStream}. 211 */ 212 public static int chooseBlockSize(final long inputLength) { 213 return inputLength > 0 ? (int) Math.min(inputLength / 132000 + 1, 9) : MAX_BLOCKSIZE; 214 } 215 216 private static void hbAssignCodes(final int[] code, final byte[] length, final int minLen, final int maxLen, final int alphaSize) { 217 int vec = 0; 218 for (int n = minLen; n <= maxLen; n++) { 219 for (int i = 0; i < alphaSize; i++) { 220 if ((length[i] & 0xff) == n) { 221 code[i] = vec; 222 vec++; 223 } 224 } 225 vec <<= 1; 226 } 227 } 228 229 private static void hbMakeCodeLengths(final byte[] len, final int[] freq, final Data dat, final int alphaSize, final int maxLen) { 230 /* 231 * Nodes and heap entries run from 1. Entry 0 for both the heap and nodes is a sentinel. 232 */ 233 final int[] heap = dat.heap; 234 final int[] weight = dat.weight; 235 final int[] parent = dat.parent; 236 237 for (int i = alphaSize; --i >= 0;) { 238 weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; 239 } 240 241 for (boolean tooLong = true; tooLong;) { 242 tooLong = false; 243 244 int nNodes = alphaSize; 245 int nHeap = 0; 246 heap[0] = 0; 247 weight[0] = 0; 248 parent[0] = -2; 249 250 for (int i = 1; i <= alphaSize; i++) { 251 parent[i] = -1; 252 nHeap++; 253 heap[nHeap] = i; 254 255 int zz = nHeap; 256 final int tmp = heap[zz]; 257 while (weight[tmp] < weight[heap[zz >> 1]]) { 258 heap[zz] = heap[zz >> 1]; 259 zz >>= 1; 260 } 261 heap[zz] = tmp; 262 } 263 264 while (nHeap > 1) { 265 final int n1 = heap[1]; 266 heap[1] = heap[nHeap]; 267 nHeap--; 268 269 int yy = 0; 270 int zz = 1; 271 int tmp = heap[1]; 272 273 while (true) { 274 yy = zz << 1; 275 276 if (yy > nHeap) { 277 break; 278 } 279 280 if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) { 281 yy++; 282 } 283 284 if (weight[tmp] < weight[heap[yy]]) { 285 break; 286 } 287 288 heap[zz] = heap[yy]; 289 zz = yy; 290 } 291 292 heap[zz] = tmp; 293 294 final int n2 = heap[1]; 295 heap[1] = heap[nHeap]; 296 nHeap--; 297 298 yy = 0; 299 zz = 1; 300 tmp = heap[1]; 301 302 while (true) { 303 yy = zz << 1; 304 305 if (yy > nHeap) { 306 break; 307 } 308 309 if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) { 310 yy++; 311 } 312 313 if (weight[tmp] < weight[heap[yy]]) { 314 break; 315 } 316 317 heap[zz] = heap[yy]; 318 zz = yy; 319 } 320 321 heap[zz] = tmp; 322 nNodes++; 323 parent[n1] = parent[n2] = nNodes; 324 325 final int weight_n1 = weight[n1]; 326 final int weight_n2 = weight[n2]; 327 weight[nNodes] = (weight_n1 & 0xffffff00) + (weight_n2 & 0xffffff00) | 1 + Math.max(weight_n1 & 0x000000ff, weight_n2 & 0x000000ff); 328 329 parent[nNodes] = -1; 330 nHeap++; 331 heap[nHeap] = nNodes; 332 333 tmp = 0; 334 zz = nHeap; 335 tmp = heap[zz]; 336 final int weight_tmp = weight[tmp]; 337 while (weight_tmp < weight[heap[zz >> 1]]) { 338 heap[zz] = heap[zz >> 1]; 339 zz >>= 1; 340 } 341 heap[zz] = tmp; 342 343 } 344 345 for (int i = 1; i <= alphaSize; i++) { 346 int j = 0; 347 int k = i; 348 349 for (int parent_k; (parent_k = parent[k]) >= 0;) { 350 k = parent_k; 351 j++; 352 } 353 354 len[i - 1] = (byte) j; 355 if (j > maxLen) { 356 tooLong = true; 357 } 358 } 359 360 if (tooLong) { 361 for (int i = 1; i < alphaSize; i++) { 362 int j = weight[i] >> 8; 363 j = 1 + (j >> 1); 364 weight[i] = j << 8; 365 } 366 } 367 } 368 } 369 370 /** 371 * Index of the last char in the block, so the block size == last + 1. 372 */ 373 private int last; 374 /** 375 * Always: in the range 0 .. 9. The current block size is 100000 * this number. 376 */ 377 private final int blockSize100k; 378 379 private int bsBuff; 380 381 private int bsLive; 382 383 private final CRC crc = new CRC(); 384 private int nInUse; 385 386 private int nMTF; 387 private int currentChar = -1; 388 private int runLength; 389 390 private int blockCRC; 391 private int combinedCRC; 392 393 private final int allowableBlockSize; 394 /** 395 * All memory intensive stuff. 396 */ 397 private Data data; 398 399 private BlockSort blockSorter; 400 401 private OutputStream out; 402 403 private volatile boolean closed; 404 405 /** 406 * Constructs a new {@code BZip2CompressorOutputStream} with a blocksize of 900k. 407 * 408 * @param out the destination stream. 409 * 410 * @throws IOException if an I/O error occurs in the specified stream. 411 * @throws NullPointerException if {@code out == null}. 412 */ 413 public BZip2CompressorOutputStream(final OutputStream out) throws IOException { 414 this(out, MAX_BLOCKSIZE); 415 } 416 417 /** 418 * Constructs a new {@code BZip2CompressorOutputStream} with specified blocksize. 419 * 420 * @param out the destination stream. 421 * @param blockSize the blockSize as 100k units. 422 * 423 * @throws IOException if an I/O error occurs in the specified stream. 424 * @throws IllegalArgumentException if {@code (blockSize < 1) || (blockSize > 9)}. 425 * @throws NullPointerException if {@code out == null}. 426 * 427 * @see #MIN_BLOCKSIZE 428 * @see #MAX_BLOCKSIZE 429 */ 430 public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException { 431 if (blockSize < 1) { 432 throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1"); 433 } 434 if (blockSize > 9) { 435 throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9"); 436 } 437 438 this.blockSize100k = blockSize; 439 this.out = out; 440 441 /* 20 is just a paranoia constant */ 442 this.allowableBlockSize = this.blockSize100k * BZip2Constants.BASEBLOCKSIZE - 20; 443 init(); 444 } 445 446 private void blockSort() { 447 blockSorter.blockSort(data, last); 448 } 449 450 private void bsFinishedWithStream() throws IOException { 451 while (this.bsLive > 0) { 452 final int ch = this.bsBuff >> 24; 453 this.out.write(ch); // write 8-bit 454 this.bsBuff <<= 8; 455 this.bsLive -= 8; 456 } 457 } 458 459 private void bsPutInt(final int u) throws IOException { 460 bsW(8, u >> 24 & 0xff); 461 bsW(8, u >> 16 & 0xff); 462 bsW(8, u >> 8 & 0xff); 463 bsW(8, u & 0xff); 464 } 465 466 private void bsPutUByte(final int c) throws IOException { 467 bsW(8, c); 468 } 469 470 private void bsW(final int n, final int v) throws IOException { 471 final OutputStream outShadow = this.out; 472 int bsLiveShadow = this.bsLive; 473 int bsBuffShadow = this.bsBuff; 474 475 while (bsLiveShadow >= 8) { 476 outShadow.write(bsBuffShadow >> 24); // write 8-bit 477 bsBuffShadow <<= 8; 478 bsLiveShadow -= 8; 479 } 480 481 this.bsBuff = bsBuffShadow | v << 32 - bsLiveShadow - n; 482 this.bsLive = bsLiveShadow + n; 483 } 484 485 @Override 486 public void close() throws IOException { 487 if (!closed) { 488 try (OutputStream outShadow = this.out) { 489 finish(); 490 } 491 } 492 } 493 494 private void endBlock() throws IOException { 495 this.blockCRC = this.crc.getValue(); 496 this.combinedCRC = this.combinedCRC << 1 | this.combinedCRC >>> 31; 497 this.combinedCRC ^= this.blockCRC; 498 499 // empty block at end of file 500 if (this.last == -1) { 501 return; 502 } 503 504 /* sort the block and establish posn of original string */ 505 blockSort(); 506 507 /* 508 * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 :-). A 32 bit value does not really give a strong enough guarantee that the 509 * value will not appear by chance in the compressed data stream. Worst-case probability of this event, for a 900k block, is about 2.0e-3 for 32 bits, 510 * 1.0e-5 for 40 bits and 4.0e-8 for 48 bits. For a compressed file of size 100Gb -- about 100000 blocks -- only a 48-bit marker will do. NB: normal 511 * compression/ decompression doesn't rely on these statistical properties. They are only important when trying to recover blocks from damaged files. 512 */ 513 bsPutUByte(0x31); 514 bsPutUByte(0x41); 515 bsPutUByte(0x59); 516 bsPutUByte(0x26); 517 bsPutUByte(0x53); 518 bsPutUByte(0x59); 519 520 /* Now the block's CRC, so it is in a known place. */ 521 bsPutInt(this.blockCRC); 522 523 /* Now a single bit indicating no randomisation. */ 524 bsW(1, 0); 525 526 /* Finally, block's contents proper. */ 527 moveToFrontCodeAndSend(); 528 } 529 530 private void endCompression() throws IOException { 531 /* 532 * Now another magic 48-bit number, 0x177245385090, to indicate the end of the last block. (sqrt(pi), if you want to know. I did want to use e, but it 533 * contains too much repetition -- 27 18 28 18 28 46 -- for me to feel statistically comfortable. Call me paranoid.) 534 */ 535 bsPutUByte(0x17); 536 bsPutUByte(0x72); 537 bsPutUByte(0x45); 538 bsPutUByte(0x38); 539 bsPutUByte(0x50); 540 bsPutUByte(0x90); 541 542 bsPutInt(this.combinedCRC); 543 bsFinishedWithStream(); 544 } 545 546 public void finish() throws IOException { 547 if (!closed) { 548 closed = true; 549 try { 550 if (this.runLength > 0) { 551 writeRun(); 552 } 553 this.currentChar = -1; 554 endBlock(); 555 endCompression(); 556 } finally { 557 this.out = null; 558 this.blockSorter = null; 559 this.data = null; 560 } 561 } 562 } 563 564 @Override 565 public void flush() throws IOException { 566 final OutputStream outShadow = this.out; 567 if (outShadow != null) { 568 outShadow.flush(); 569 } 570 } 571 572 /* 573 * Performs Move-To-Front on the Burrows-Wheeler transformed buffer, storing the MTFed data in data.sfmap in RUNA/RUNB run-length-encoded form. 574 * 575 * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p> 576 */ 577 private void generateMTFValues() { 578 final int lastShadow = this.last; 579 final Data dataShadow = this.data; 580 final boolean[] inUse = dataShadow.inUse; 581 final byte[] block = dataShadow.block; 582 final int[] fmap = dataShadow.fmap; 583 final char[] sfmap = dataShadow.sfmap; 584 final int[] mtfFreq = dataShadow.mtfFreq; 585 final byte[] unseqToSeq = dataShadow.unseqToSeq; 586 final byte[] yy = dataShadow.generateMTFValues_yy; 587 588 // make maps 589 int nInUseShadow = 0; 590 for (int i = 0; i < 256; i++) { 591 if (inUse[i]) { 592 unseqToSeq[i] = (byte) nInUseShadow; 593 nInUseShadow++; 594 } 595 } 596 this.nInUse = nInUseShadow; 597 598 final int eob = nInUseShadow + 1; 599 600 Arrays.fill(mtfFreq, 0, eob + 1, 0); 601 602 for (int i = nInUseShadow; --i >= 0;) { 603 yy[i] = (byte) i; 604 } 605 606 int wr = 0; 607 int zPend = 0; 608 609 for (int i = 0; i <= lastShadow; i++) { 610 final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff]; 611 byte tmp = yy[0]; 612 int j = 0; 613 614 while (ll_i != tmp) { 615 j++; 616 final byte tmp2 = tmp; 617 tmp = yy[j]; 618 yy[j] = tmp2; 619 } 620 yy[0] = tmp; 621 622 if (j == 0) { 623 zPend++; 624 } else { 625 if (zPend > 0) { 626 zPend--; 627 while (true) { 628 if ((zPend & 1) == 0) { 629 sfmap[wr] = RUNA; 630 wr++; 631 mtfFreq[RUNA]++; 632 } else { 633 sfmap[wr] = RUNB; 634 wr++; 635 mtfFreq[RUNB]++; 636 } 637 638 if (zPend < 2) { 639 break; 640 } 641 zPend = zPend - 2 >> 1; 642 } 643 zPend = 0; 644 } 645 sfmap[wr] = (char) (j + 1); 646 wr++; 647 mtfFreq[j + 1]++; 648 } 649 } 650 651 if (zPend > 0) { 652 zPend--; 653 while (true) { 654 if ((zPend & 1) == 0) { 655 sfmap[wr] = RUNA; 656 wr++; 657 mtfFreq[RUNA]++; 658 } else { 659 sfmap[wr] = RUNB; 660 wr++; 661 mtfFreq[RUNB]++; 662 } 663 664 if (zPend < 2) { 665 break; 666 } 667 zPend = zPend - 2 >> 1; 668 } 669 } 670 671 sfmap[wr] = (char) eob; 672 mtfFreq[eob]++; 673 this.nMTF = wr + 1; 674 } 675 676 /** 677 * Returns the blocksize parameter specified at construction time. 678 * 679 * @return the blocksize parameter specified at construction time 680 */ 681 public final int getBlockSize() { 682 return this.blockSize100k; 683 } 684 685 /** 686 * Writes magic bytes like BZ on the first position of the stream and bytes indicating the file-format, which is huffmanized, followed by a digit indicating 687 * blockSize100k. 688 * 689 * @throws IOException if the magic bytes could not been written 690 */ 691 private void init() throws IOException { 692 bsPutUByte('B'); 693 bsPutUByte('Z'); 694 695 this.data = new Data(this.blockSize100k); 696 this.blockSorter = new BlockSort(this.data); 697 698 // huffmanized magic bytes 699 bsPutUByte('h'); 700 bsPutUByte('0' + this.blockSize100k); 701 702 this.combinedCRC = 0; 703 initBlock(); 704 } 705 706 private void initBlock() { 707 // blockNo++; 708 this.crc.reset(); 709 this.last = -1; 710 // ch = 0; 711 712 final boolean[] inUse = this.data.inUse; 713 for (int i = 256; --i >= 0;) { 714 inUse[i] = false; 715 } 716 717 } 718 719 private void moveToFrontCodeAndSend() throws IOException { 720 bsW(24, this.data.origPtr); 721 generateMTFValues(); 722 sendMTFValues(); 723 } 724 725 private void sendMTFValues() throws IOException { 726 final byte[][] len = this.data.sendMTFValues_len; 727 final int alphaSize = this.nInUse + 2; 728 729 for (int t = N_GROUPS; --t >= 0;) { 730 final byte[] len_t = len[t]; 731 for (int v = alphaSize; --v >= 0;) { 732 len_t[v] = GREATER_ICOST; 733 } 734 } 735 736 /* Decide how many coding tables to use */ 737 // assert (this.nMTF > 0) : this.nMTF; 738 final int nGroups = this.nMTF < 200 ? 2 : this.nMTF < 600 ? 3 : this.nMTF < 1200 ? 4 : this.nMTF < 2400 ? 5 : 6; 739 740 /* Generate an initial set of coding tables */ 741 sendMTFValues0(nGroups, alphaSize); 742 743 /* 744 * Iterate up to N_ITERS times to improve the tables. 745 */ 746 final int nSelectors = sendMTFValues1(nGroups, alphaSize); 747 748 /* Compute MTF values for the selectors. */ 749 sendMTFValues2(nGroups, nSelectors); 750 751 /* Assign actual codes for the tables. */ 752 sendMTFValues3(nGroups, alphaSize); 753 754 /* Transmit the mapping table. */ 755 sendMTFValues4(); 756 757 /* Now the selectors. */ 758 sendMTFValues5(nGroups, nSelectors); 759 760 /* Now the coding tables. */ 761 sendMTFValues6(nGroups, alphaSize); 762 763 /* And finally, the block data proper */ 764 sendMTFValues7(); 765 } 766 767 private void sendMTFValues0(final int nGroups, final int alphaSize) { 768 final byte[][] len = this.data.sendMTFValues_len; 769 final int[] mtfFreq = this.data.mtfFreq; 770 771 int remF = this.nMTF; 772 int gs = 0; 773 774 for (int nPart = nGroups; nPart > 0; nPart--) { 775 final int tFreq = remF / nPart; 776 int ge = gs - 1; 777 int aFreq = 0; 778 779 for (final int a = alphaSize - 1; aFreq < tFreq && ge < a;) { 780 aFreq += mtfFreq[++ge]; 781 } 782 783 if (ge > gs && nPart != nGroups && nPart != 1 && (nGroups - nPart & 1) != 0) { 784 aFreq -= mtfFreq[ge--]; 785 } 786 787 final byte[] len_np = len[nPart - 1]; 788 for (int v = alphaSize; --v >= 0;) { 789 if (v >= gs && v <= ge) { 790 len_np[v] = LESSER_ICOST; 791 } else { 792 len_np[v] = GREATER_ICOST; 793 } 794 } 795 796 gs = ge + 1; 797 remF -= aFreq; 798 } 799 } 800 801 private int sendMTFValues1(final int nGroups, final int alphaSize) { 802 final Data dataShadow = this.data; 803 final int[][] rfreq = dataShadow.sendMTFValues_rfreq; 804 final int[] fave = dataShadow.sendMTFValues_fave; 805 final short[] cost = dataShadow.sendMTFValues_cost; 806 final char[] sfmap = dataShadow.sfmap; 807 final byte[] selector = dataShadow.selector; 808 final byte[][] len = dataShadow.sendMTFValues_len; 809 final byte[] len_0 = len[0]; 810 final byte[] len_1 = len[1]; 811 final byte[] len_2 = len[2]; 812 final byte[] len_3 = len[3]; 813 final byte[] len_4 = len[4]; 814 final byte[] len_5 = len[5]; 815 final int nMTFShadow = this.nMTF; 816 817 int nSelectors = 0; 818 819 for (int iter = 0; iter < N_ITERS; iter++) { 820 for (int t = nGroups; --t >= 0;) { 821 fave[t] = 0; 822 final int[] rfreqt = rfreq[t]; 823 for (int i = alphaSize; --i >= 0;) { 824 rfreqt[i] = 0; 825 } 826 } 827 828 nSelectors = 0; 829 830 for (int gs = 0; gs < this.nMTF;) { 831 // Set group start & end marks. 832 833 // Calculate the cost of this group as coded by each of the 834 // coding tables. 835 836 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); 837 838 final byte mask = (byte) 0xff; 839 if (nGroups == N_GROUPS) { 840 // unrolled version of the else-block 841 842 short cost0 = 0; 843 short cost1 = 0; 844 short cost2 = 0; 845 short cost3 = 0; 846 short cost4 = 0; 847 short cost5 = 0; 848 849 for (int i = gs; i <= ge; i++) { 850 final int icv = sfmap[i]; 851 cost0 += (short) (len_0[icv] & mask); 852 cost1 += (short) (len_1[icv] & mask); 853 cost2 += (short) (len_2[icv] & mask); 854 cost3 += (short) (len_3[icv] & mask); 855 cost4 += (short) (len_4[icv] & mask); 856 cost5 += (short) (len_5[icv] & mask); 857 } 858 859 cost[0] = cost0; 860 cost[1] = cost1; 861 cost[2] = cost2; 862 cost[3] = cost3; 863 cost[4] = cost4; 864 cost[5] = cost5; 865 866 } else { 867 for (int t = nGroups; --t >= 0;) { 868 cost[t] = 0; 869 } 870 871 for (int i = gs; i <= ge; i++) { 872 final int icv = sfmap[i]; 873 for (int t = nGroups; --t >= 0;) { 874 cost[t] += (short) (len[t][icv] & mask); 875 } 876 } 877 } 878 879 /* 880 * Find the coding table which is best for this group, and record its identity in the selector table. 881 */ 882 int bt = -1; 883 for (int t = nGroups, bc = 999999999; --t >= 0;) { 884 final int cost_t = cost[t]; 885 if (cost_t < bc) { 886 bc = cost_t; 887 bt = t; 888 } 889 } 890 891 fave[bt]++; 892 selector[nSelectors] = (byte) bt; 893 nSelectors++; 894 895 /* 896 * Increment the symbol frequencies for the selected table. 897 */ 898 final int[] rfreq_bt = rfreq[bt]; 899 for (int i = gs; i <= ge; i++) { 900 rfreq_bt[sfmap[i]]++; 901 } 902 903 gs = ge + 1; 904 } 905 906 /* 907 * Recompute the tables based on the accumulated frequencies. 908 */ 909 for (int t = 0; t < nGroups; t++) { 910 hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20); 911 } 912 } 913 914 return nSelectors; 915 } 916 917 private void sendMTFValues2(final int nGroups, final int nSelectors) { 918 // assert (nGroups < 8) : nGroups; 919 920 final Data dataShadow = this.data; 921 final byte[] pos = dataShadow.sendMTFValues2_pos; 922 923 for (int i = nGroups; --i >= 0;) { 924 pos[i] = (byte) i; 925 } 926 927 for (int i = 0; i < nSelectors; i++) { 928 final byte ll_i = dataShadow.selector[i]; 929 byte tmp = pos[0]; 930 int j = 0; 931 932 while (ll_i != tmp) { 933 j++; 934 final byte tmp2 = tmp; 935 tmp = pos[j]; 936 pos[j] = tmp2; 937 } 938 939 pos[0] = tmp; 940 dataShadow.selectorMtf[i] = (byte) j; 941 } 942 } 943 944 private void sendMTFValues3(final int nGroups, final int alphaSize) { 945 final int[][] code = this.data.sendMTFValues_code; 946 final byte[][] len = this.data.sendMTFValues_len; 947 948 for (int t = 0; t < nGroups; t++) { 949 int minLen = 32; 950 int maxLen = 0; 951 final byte[] len_t = len[t]; 952 for (int i = alphaSize; --i >= 0;) { 953 final int l = len_t[i] & 0xff; 954 if (l > maxLen) { 955 maxLen = l; 956 } 957 if (l < minLen) { 958 minLen = l; 959 } 960 } 961 962 // assert (maxLen <= 20) : maxLen; 963 // assert (minLen >= 1) : minLen; 964 965 hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); 966 } 967 } 968 969 private void sendMTFValues4() throws IOException { 970 final boolean[] inUse = this.data.inUse; 971 final boolean[] inUse16 = this.data.sentMTFValues4_inUse16; 972 973 for (int i = 16; --i >= 0;) { 974 inUse16[i] = false; 975 final int i16 = i * 16; 976 for (int j = 16; --j >= 0;) { 977 if (inUse[i16 + j]) { 978 inUse16[i] = true; 979 break; 980 } 981 } 982 } 983 984 for (int i = 0; i < 16; i++) { 985 bsW(1, inUse16[i] ? 1 : 0); 986 } 987 988 final OutputStream outShadow = this.out; 989 int bsLiveShadow = this.bsLive; 990 int bsBuffShadow = this.bsBuff; 991 992 for (int i = 0; i < 16; i++) { 993 if (inUse16[i]) { 994 final int i16 = i * 16; 995 for (int j = 0; j < 16; j++) { 996 // inlined: bsW(1, inUse[i16 + j] ? 1 : 0); 997 while (bsLiveShadow >= 8) { 998 outShadow.write(bsBuffShadow >> 24); // write 8-bit 999 bsBuffShadow <<= 8; 1000 bsLiveShadow -= 8; 1001 } 1002 if (inUse[i16 + j]) { 1003 bsBuffShadow |= 1 << 32 - bsLiveShadow - 1; 1004 } 1005 bsLiveShadow++; 1006 } 1007 } 1008 } 1009 1010 this.bsBuff = bsBuffShadow; 1011 this.bsLive = bsLiveShadow; 1012 } 1013 1014 private void sendMTFValues5(final int nGroups, final int nSelectors) throws IOException { 1015 bsW(3, nGroups); 1016 bsW(15, nSelectors); 1017 1018 final OutputStream outShadow = this.out; 1019 final byte[] selectorMtf = this.data.selectorMtf; 1020 1021 int bsLiveShadow = this.bsLive; 1022 int bsBuffShadow = this.bsBuff; 1023 1024 for (int i = 0; i < nSelectors; i++) { 1025 for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) { 1026 // inlined: bsW(1, 1); 1027 while (bsLiveShadow >= 8) { 1028 outShadow.write(bsBuffShadow >> 24); 1029 bsBuffShadow <<= 8; 1030 bsLiveShadow -= 8; 1031 } 1032 bsBuffShadow |= 1 << 32 - bsLiveShadow - 1; 1033 bsLiveShadow++; 1034 } 1035 1036 // inlined: bsW(1, 0); 1037 while (bsLiveShadow >= 8) { 1038 outShadow.write(bsBuffShadow >> 24); 1039 bsBuffShadow <<= 8; 1040 bsLiveShadow -= 8; 1041 } 1042 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); 1043 bsLiveShadow++; 1044 } 1045 1046 this.bsBuff = bsBuffShadow; 1047 this.bsLive = bsLiveShadow; 1048 } 1049 1050 private void sendMTFValues6(final int nGroups, final int alphaSize) throws IOException { 1051 final byte[][] len = this.data.sendMTFValues_len; 1052 final OutputStream outShadow = this.out; 1053 1054 int bsLiveShadow = this.bsLive; 1055 int bsBuffShadow = this.bsBuff; 1056 1057 for (int t = 0; t < nGroups; t++) { 1058 final byte[] len_t = len[t]; 1059 int curr = len_t[0] & 0xff; 1060 1061 // inlined: bsW(5, curr); 1062 while (bsLiveShadow >= 8) { 1063 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1064 bsBuffShadow <<= 8; 1065 bsLiveShadow -= 8; 1066 } 1067 bsBuffShadow |= curr << 32 - bsLiveShadow - 5; 1068 bsLiveShadow += 5; 1069 1070 for (int i = 0; i < alphaSize; i++) { 1071 final int lti = len_t[i] & 0xff; 1072 while (curr < lti) { 1073 // inlined: bsW(2, 2); 1074 while (bsLiveShadow >= 8) { 1075 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1076 bsBuffShadow <<= 8; 1077 bsLiveShadow -= 8; 1078 } 1079 bsBuffShadow |= 2 << 32 - bsLiveShadow - 2; 1080 bsLiveShadow += 2; 1081 1082 curr++; /* 10 */ 1083 } 1084 1085 while (curr > lti) { 1086 // inlined: bsW(2, 3); 1087 while (bsLiveShadow >= 8) { 1088 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1089 bsBuffShadow <<= 8; 1090 bsLiveShadow -= 8; 1091 } 1092 bsBuffShadow |= 3 << 32 - bsLiveShadow - 2; 1093 bsLiveShadow += 2; 1094 1095 curr--; /* 11 */ 1096 } 1097 1098 // inlined: bsW(1, 0); 1099 while (bsLiveShadow >= 8) { 1100 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1101 bsBuffShadow <<= 8; 1102 bsLiveShadow -= 8; 1103 } 1104 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); 1105 bsLiveShadow++; 1106 } 1107 } 1108 1109 this.bsBuff = bsBuffShadow; 1110 this.bsLive = bsLiveShadow; 1111 } 1112 1113 private void sendMTFValues7() throws IOException { 1114 final Data dataShadow = this.data; 1115 final byte[][] len = dataShadow.sendMTFValues_len; 1116 final int[][] code = dataShadow.sendMTFValues_code; 1117 final OutputStream outShadow = this.out; 1118 final byte[] selector = dataShadow.selector; 1119 final char[] sfmap = dataShadow.sfmap; 1120 final int nMTFShadow = this.nMTF; 1121 1122 int selCtr = 0; 1123 1124 int bsLiveShadow = this.bsLive; 1125 int bsBuffShadow = this.bsBuff; 1126 1127 for (int gs = 0; gs < nMTFShadow;) { 1128 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); 1129 final int selector_selCtr = selector[selCtr] & 0xff; 1130 final int[] code_selCtr = code[selector_selCtr]; 1131 final byte[] len_selCtr = len[selector_selCtr]; 1132 1133 while (gs <= ge) { 1134 final int sfmap_i = sfmap[gs]; 1135 1136 // 1137 // inlined: bsW(len_selCtr[sfmap_i] & 0xff, 1138 // code_selCtr[sfmap_i]); 1139 // 1140 while (bsLiveShadow >= 8) { 1141 outShadow.write(bsBuffShadow >> 24); 1142 bsBuffShadow <<= 8; 1143 bsLiveShadow -= 8; 1144 } 1145 final int n = len_selCtr[sfmap_i] & 0xFF; 1146 bsBuffShadow |= code_selCtr[sfmap_i] << 32 - bsLiveShadow - n; 1147 bsLiveShadow += n; 1148 1149 gs++; 1150 } 1151 1152 gs = ge + 1; 1153 selCtr++; 1154 } 1155 1156 this.bsBuff = bsBuffShadow; 1157 this.bsLive = bsLiveShadow; 1158 } 1159 1160 @Override 1161 public void write(final byte[] buf, int offs, final int len) throws IOException { 1162 if (offs < 0) { 1163 throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); 1164 } 1165 if (len < 0) { 1166 throw new IndexOutOfBoundsException("len(" + len + ") < 0."); 1167 } 1168 if (offs + len > buf.length) { 1169 throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + len + ") > buf.length(" + buf.length + ")."); 1170 } 1171 if (closed) { 1172 throw new IOException("Stream closed"); 1173 } 1174 1175 for (final int hi = offs + len; offs < hi;) { 1176 write0(buf[offs++]); 1177 } 1178 } 1179 1180 @Override 1181 public void write(final int b) throws IOException { 1182 if (closed) { 1183 throw new IOException("Closed"); 1184 } 1185 write0(b); 1186 } 1187 1188 /** 1189 * Keeps track of the last bytes written and implicitly performs run-length encoding as the first step of the bzip2 algorithm. 1190 */ 1191 private void write0(int b) throws IOException { 1192 if (this.currentChar != -1) { 1193 b &= 0xff; 1194 if (this.currentChar == b) { 1195 if (++this.runLength > 254) { 1196 writeRun(); 1197 this.currentChar = -1; 1198 this.runLength = 0; 1199 } 1200 // else nothing to do 1201 } else { 1202 writeRun(); 1203 this.runLength = 1; 1204 this.currentChar = b; 1205 } 1206 } else { 1207 this.currentChar = b & 0xff; 1208 this.runLength++; 1209 } 1210 } 1211 1212 /** 1213 * Writes the current byte to the buffer, run-length encoding it if it has been repeated at least four times (the first step RLEs sequences of four 1214 * identical bytes). 1215 * 1216 * <p> 1217 * Flushes the current block before writing data if it is full. 1218 * </p> 1219 * 1220 * <p> 1221 * "write to the buffer" means adding to data.buffer starting two steps "after" this.last - initially starting at index 1 (not 0) - and updating this.last 1222 * to point to the last index written minus 1. 1223 * </p> 1224 */ 1225 private void writeRun() throws IOException { 1226 final int lastShadow = this.last; 1227 1228 if (lastShadow < this.allowableBlockSize) { 1229 final int currentCharShadow = this.currentChar; 1230 final Data dataShadow = this.data; 1231 dataShadow.inUse[currentCharShadow] = true; 1232 final byte ch = (byte) currentCharShadow; 1233 1234 int runLengthShadow = this.runLength; 1235 this.crc.update(currentCharShadow, runLengthShadow); 1236 1237 switch (runLengthShadow) { 1238 case 1: 1239 dataShadow.block[lastShadow + 2] = ch; 1240 this.last = lastShadow + 1; 1241 break; 1242 1243 case 2: 1244 dataShadow.block[lastShadow + 2] = ch; 1245 dataShadow.block[lastShadow + 3] = ch; 1246 this.last = lastShadow + 2; 1247 break; 1248 1249 case 3: { 1250 final byte[] block = dataShadow.block; 1251 block[lastShadow + 2] = ch; 1252 block[lastShadow + 3] = ch; 1253 block[lastShadow + 4] = ch; 1254 this.last = lastShadow + 3; 1255 } 1256 break; 1257 1258 default: { 1259 runLengthShadow -= 4; 1260 dataShadow.inUse[runLengthShadow] = true; 1261 final byte[] block = dataShadow.block; 1262 block[lastShadow + 2] = ch; 1263 block[lastShadow + 3] = ch; 1264 block[lastShadow + 4] = ch; 1265 block[lastShadow + 5] = ch; 1266 block[lastShadow + 6] = (byte) runLengthShadow; 1267 this.last = lastShadow + 5; 1268 } 1269 break; 1270 1271 } 1272 } else { 1273 endBlock(); 1274 initBlock(); 1275 writeRun(); 1276 } 1277 } 1278 1279}