| | 1 | | using System; |
| | 2 | | using System.IO; |
| | 3 | | using ICSharpCode.SharpZipLib.Checksum; |
| | 4 | |
|
| | 5 | | namespace ICSharpCode.SharpZipLib.BZip2 |
| | 6 | | { |
| | 7 | | /// <summary> |
| | 8 | | /// An output stream that compresses into the BZip2 format |
| | 9 | | /// including file header chars into another stream. |
| | 10 | | /// </summary> |
| | 11 | | public class BZip2OutputStream : Stream |
| | 12 | | { |
| | 13 | | #region Constants |
| | 14 | | const int SETMASK = (1 << 21); |
| | 15 | | const int CLEARMASK = (~SETMASK); |
| | 16 | | const int GREATER_ICOST = 15; |
| | 17 | | const int LESSER_ICOST = 0; |
| | 18 | | const int SMALL_THRESH = 20; |
| | 19 | | const int DEPTH_THRESH = 10; |
| | 20 | |
|
| | 21 | | /*-- |
| | 22 | | If you are ever unlucky/improbable enough |
| | 23 | | to get a stack overflow whilst sorting, |
| | 24 | | increase the following constant and try |
| | 25 | | again. In practice I have never seen the |
| | 26 | | stack go above 27 elems, so the following |
| | 27 | | limit seems very generous. |
| | 28 | | --*/ |
| | 29 | | const int QSORT_STACK_SIZE = 1000; |
| | 30 | |
|
| | 31 | | /*-- |
| | 32 | | Knuth's increments seem to work better |
| | 33 | | than Incerpi-Sedgewick here. Possibly |
| | 34 | | because the number of elems to sort is |
| | 35 | | usually small, typically <= 20. |
| | 36 | | --*/ |
| 1 | 37 | | readonly int[] increments = { |
| 1 | 38 | | 1, 4, 13, 40, 121, 364, 1093, 3280, |
| 1 | 39 | | 9841, 29524, 88573, 265720, |
| 1 | 40 | | 797161, 2391484 |
| 1 | 41 | | }; |
| | 42 | | #endregion |
| | 43 | |
|
| | 44 | | #region Constructors |
| | 45 | | /// <summary> |
| | 46 | | /// Construct a default output stream with maximum block size |
| | 47 | | /// </summary> |
| | 48 | | /// <param name="stream">The stream to write BZip data onto.</param> |
| 1 | 49 | | public BZip2OutputStream(Stream stream) : this(stream, 9) |
| | 50 | | { |
| 1 | 51 | | } |
| | 52 | |
|
| | 53 | | /// <summary> |
| | 54 | | /// Initialise a new instance of the <see cref="BZip2OutputStream"></see> |
| | 55 | | /// for the specified stream, using the given blocksize. |
| | 56 | | /// </summary> |
| | 57 | | /// <param name="stream">The stream to write compressed data to.</param> |
| | 58 | | /// <param name="blockSize">The block size to use.</param> |
| | 59 | | /// <remarks> |
| | 60 | | /// Valid block sizes are in the range 1..9, with 1 giving |
| | 61 | | /// the lowest compression and 9 the highest. |
| | 62 | | /// </remarks> |
| 1 | 63 | | public BZip2OutputStream(Stream stream, int blockSize) |
| | 64 | | { |
| 1 | 65 | | BsSetStream(stream); |
| | 66 | |
|
| 1 | 67 | | workFactor = 50; |
| 1 | 68 | | if (blockSize > 9) { |
| 0 | 69 | | blockSize = 9; |
| | 70 | | } |
| | 71 | |
|
| 1 | 72 | | if (blockSize < 1) { |
| 0 | 73 | | blockSize = 1; |
| | 74 | | } |
| 1 | 75 | | blockSize100k = blockSize; |
| 1 | 76 | | AllocateCompressStructures(); |
| 1 | 77 | | Initialize(); |
| 1 | 78 | | InitBlock(); |
| 1 | 79 | | } |
| | 80 | | #endregion |
| | 81 | |
|
| | 82 | | #region Destructor |
| | 83 | | /// <summary> |
| | 84 | | /// Ensures that resources are freed and other cleanup operations |
| | 85 | | /// are performed when the garbage collector reclaims the BZip2OutputStream. |
| | 86 | | /// </summary> |
| | 87 | | ~BZip2OutputStream() |
| | 88 | | { |
| 0 | 89 | | Dispose(false); |
| 0 | 90 | | } |
| | 91 | | #endregion |
| | 92 | |
|
| | 93 | | /// <summary> |
| | 94 | | /// Get/set flag indicating ownership of underlying stream. |
| | 95 | | /// When the flag is true <see cref="Close"></see> will close the underlying stream also. |
| | 96 | | /// </summary> |
| | 97 | | public bool IsStreamOwner { |
| 1 | 98 | | get { return isStreamOwner; } |
| 0 | 99 | | set { isStreamOwner = value; } |
| | 100 | | } |
| | 101 | |
|
| | 102 | |
|
| | 103 | | #region Stream overrides |
| | 104 | | /// <summary> |
| | 105 | | /// Gets a value indicating whether the current stream supports reading |
| | 106 | | /// </summary> |
| | 107 | | public override bool CanRead { |
| | 108 | | get { |
| 0 | 109 | | return false; |
| | 110 | | } |
| | 111 | | } |
| | 112 | |
|
| | 113 | | /// <summary> |
| | 114 | | /// Gets a value indicating whether the current stream supports seeking |
| | 115 | | /// </summary> |
| | 116 | | public override bool CanSeek { |
| | 117 | | get { |
| 0 | 118 | | return false; |
| | 119 | | } |
| | 120 | | } |
| | 121 | |
|
| | 122 | | /// <summary> |
| | 123 | | /// Gets a value indicating whether the current stream supports writing |
| | 124 | | /// </summary> |
| | 125 | | public override bool CanWrite { |
| | 126 | | get { |
| 0 | 127 | | return baseStream.CanWrite; |
| | 128 | | } |
| | 129 | | } |
| | 130 | |
|
| | 131 | | /// <summary> |
| | 132 | | /// Gets the length in bytes of the stream |
| | 133 | | /// </summary> |
| | 134 | | public override long Length { |
| | 135 | | get { |
| 0 | 136 | | return baseStream.Length; |
| | 137 | | } |
| | 138 | | } |
| | 139 | |
|
| | 140 | | /// <summary> |
| | 141 | | /// Gets or sets the current position of this stream. |
| | 142 | | /// </summary> |
| | 143 | | public override long Position { |
| | 144 | | get { |
| 0 | 145 | | return baseStream.Position; |
| | 146 | | } |
| | 147 | | set { |
| 0 | 148 | | throw new NotSupportedException("BZip2OutputStream position cannot be set"); |
| | 149 | | } |
| | 150 | | } |
| | 151 | |
|
| | 152 | | /// <summary> |
| | 153 | | /// Sets the current position of this stream to the given value. |
| | 154 | | /// </summary> |
| | 155 | | /// <param name="offset">The point relative to the offset from which to being seeking.</param> |
| | 156 | | /// <param name="origin">The reference point from which to begin seeking.</param> |
| | 157 | | /// <returns>The new position in the stream.</returns> |
| | 158 | | public override long Seek(long offset, SeekOrigin origin) |
| | 159 | | { |
| 0 | 160 | | throw new NotSupportedException("BZip2OutputStream Seek not supported"); |
| | 161 | | } |
| | 162 | |
|
| | 163 | | /// <summary> |
| | 164 | | /// Sets the length of this stream to the given value. |
| | 165 | | /// </summary> |
| | 166 | | /// <param name="value">The new stream length.</param> |
| | 167 | | public override void SetLength(long value) |
| | 168 | | { |
| 0 | 169 | | throw new NotSupportedException("BZip2OutputStream SetLength not supported"); |
| | 170 | | } |
| | 171 | |
|
| | 172 | | /// <summary> |
| | 173 | | /// Read a byte from the stream advancing the position. |
| | 174 | | /// </summary> |
| | 175 | | /// <returns>The byte read cast to an int; -1 if end of stream.</returns> |
| | 176 | | public override int ReadByte() |
| | 177 | | { |
| 0 | 178 | | throw new NotSupportedException("BZip2OutputStream ReadByte not supported"); |
| | 179 | | } |
| | 180 | |
|
| | 181 | | /// <summary> |
| | 182 | | /// Read a block of bytes |
| | 183 | | /// </summary> |
| | 184 | | /// <param name="buffer">The buffer to read into.</param> |
| | 185 | | /// <param name="offset">The offset in the buffer to start storing data at.</param> |
| | 186 | | /// <param name="count">The maximum number of bytes to read.</param> |
| | 187 | | /// <returns>The total number of bytes read. This might be less than the number of bytes |
| | 188 | | /// requested if that number of bytes are not currently available, or zero |
| | 189 | | /// if the end of the stream is reached.</returns> |
| | 190 | | public override int Read(byte[] buffer, int offset, int count) |
| | 191 | | { |
| 0 | 192 | | throw new NotSupportedException("BZip2OutputStream Read not supported"); |
| | 193 | | } |
| | 194 | |
|
| | 195 | | /// <summary> |
| | 196 | | /// Write a block of bytes to the stream |
| | 197 | | /// </summary> |
| | 198 | | /// <param name="buffer">The buffer containing data to write.</param> |
| | 199 | | /// <param name="offset">The offset of the first byte to write.</param> |
| | 200 | | /// <param name="count">The number of bytes to write.</param> |
| | 201 | | public override void Write(byte[] buffer, int offset, int count) |
| | 202 | | { |
| 0 | 203 | | if (buffer == null) { |
| 0 | 204 | | throw new ArgumentNullException(nameof(buffer)); |
| | 205 | | } |
| | 206 | |
|
| 0 | 207 | | if (offset < 0) { |
| 0 | 208 | | throw new ArgumentOutOfRangeException(nameof(offset)); |
| | 209 | | } |
| | 210 | |
|
| 0 | 211 | | if (count < 0) { |
| 0 | 212 | | throw new ArgumentOutOfRangeException(nameof(count)); |
| | 213 | | } |
| | 214 | |
|
| 0 | 215 | | if (buffer.Length - offset < count) { |
| 0 | 216 | | throw new ArgumentException("Offset/count out of range"); |
| | 217 | | } |
| | 218 | |
|
| 0 | 219 | | for (int i = 0; i < count; ++i) { |
| 0 | 220 | | WriteByte(buffer[offset + i]); |
| | 221 | | } |
| 0 | 222 | | } |
| | 223 | |
|
| | 224 | | /// <summary> |
| | 225 | | /// Write a byte to the stream. |
| | 226 | | /// </summary> |
| | 227 | | /// <param name="value">The byte to write to the stream.</param> |
| | 228 | | public override void WriteByte(byte value) |
| | 229 | | { |
| 0 | 230 | | int b = (256 + value) % 256; |
| 0 | 231 | | if (currentChar != -1) { |
| 0 | 232 | | if (currentChar == b) { |
| 0 | 233 | | runLength++; |
| 0 | 234 | | if (runLength > 254) { |
| 0 | 235 | | WriteRun(); |
| 0 | 236 | | currentChar = -1; |
| 0 | 237 | | runLength = 0; |
| | 238 | | } |
| 0 | 239 | | } else { |
| 0 | 240 | | WriteRun(); |
| 0 | 241 | | runLength = 1; |
| 0 | 242 | | currentChar = b; |
| | 243 | | } |
| 0 | 244 | | } else { |
| 0 | 245 | | currentChar = b; |
| 0 | 246 | | runLength++; |
| | 247 | | } |
| 0 | 248 | | } |
| | 249 | |
|
| | 250 | | /// <summary> |
| | 251 | | /// End the current block and end compression. |
| | 252 | | /// Close the stream and free any resources |
| | 253 | | /// </summary> |
| | 254 | | public override void Close() |
| | 255 | | { |
| 1 | 256 | | Dispose(true); |
| 1 | 257 | | GC.SuppressFinalize(this); |
| 1 | 258 | | } |
| | 259 | |
|
| | 260 | | #endregion |
| | 261 | | void MakeMaps() |
| | 262 | | { |
| 0 | 263 | | nInUse = 0; |
| 0 | 264 | | for (int i = 0; i < 256; i++) { |
| 0 | 265 | | if (inUse[i]) { |
| 0 | 266 | | seqToUnseq[nInUse] = (char)i; |
| 0 | 267 | | unseqToSeq[i] = (char)nInUse; |
| 0 | 268 | | nInUse++; |
| | 269 | | } |
| | 270 | | } |
| 0 | 271 | | } |
| | 272 | |
|
| | 273 | | /// <summary> |
| | 274 | | /// Get the number of bytes written to output. |
| | 275 | | /// </summary> |
| | 276 | | void WriteRun() |
| | 277 | | { |
| 0 | 278 | | if (last < allowableBlockSize) { |
| 0 | 279 | | inUse[currentChar] = true; |
| 0 | 280 | | for (int i = 0; i < runLength; i++) { |
| 0 | 281 | | mCrc.Update(currentChar); |
| | 282 | | } |
| | 283 | |
|
| 0 | 284 | | switch (runLength) { |
| | 285 | | case 1: |
| 0 | 286 | | last++; |
| 0 | 287 | | block[last + 1] = (byte)currentChar; |
| 0 | 288 | | break; |
| | 289 | | case 2: |
| 0 | 290 | | last++; |
| 0 | 291 | | block[last + 1] = (byte)currentChar; |
| 0 | 292 | | last++; |
| 0 | 293 | | block[last + 1] = (byte)currentChar; |
| 0 | 294 | | break; |
| | 295 | | case 3: |
| 0 | 296 | | last++; |
| 0 | 297 | | block[last + 1] = (byte)currentChar; |
| 0 | 298 | | last++; |
| 0 | 299 | | block[last + 1] = (byte)currentChar; |
| 0 | 300 | | last++; |
| 0 | 301 | | block[last + 1] = (byte)currentChar; |
| 0 | 302 | | break; |
| | 303 | | default: |
| 0 | 304 | | inUse[runLength - 4] = true; |
| 0 | 305 | | last++; |
| 0 | 306 | | block[last + 1] = (byte)currentChar; |
| 0 | 307 | | last++; |
| 0 | 308 | | block[last + 1] = (byte)currentChar; |
| 0 | 309 | | last++; |
| 0 | 310 | | block[last + 1] = (byte)currentChar; |
| 0 | 311 | | last++; |
| 0 | 312 | | block[last + 1] = (byte)currentChar; |
| 0 | 313 | | last++; |
| 0 | 314 | | block[last + 1] = (byte)(runLength - 4); |
| 0 | 315 | | break; |
| | 316 | | } |
| | 317 | | } else { |
| 0 | 318 | | EndBlock(); |
| 0 | 319 | | InitBlock(); |
| 0 | 320 | | WriteRun(); |
| | 321 | | } |
| 0 | 322 | | } |
| | 323 | |
|
| | 324 | | /// <summary> |
| | 325 | | /// Get the number of bytes written to the output. |
| | 326 | | /// </summary> |
| | 327 | | public int BytesWritten { |
| 0 | 328 | | get { return bytesOut; } |
| | 329 | | } |
| | 330 | |
|
| | 331 | | /// <summary> |
| | 332 | | /// Releases the unmanaged resources used by the <see cref="BZip2OutputStream"/> and optionally releases the managed |
| | 333 | | /// </summary> |
| | 334 | | /// <param name="disposing">true to release both managed and unmanaged resources; false to release only unmanaged re |
| | 335 | | override protected void Dispose(bool disposing) |
| | 336 | | { |
| | 337 | | try { |
| 1 | 338 | | base.Dispose(disposing); |
| 1 | 339 | | if (!disposed_) { |
| 1 | 340 | | disposed_ = true; |
| | 341 | |
|
| 1 | 342 | | if (runLength > 0) { |
| 0 | 343 | | WriteRun(); |
| | 344 | | } |
| | 345 | |
|
| 1 | 346 | | currentChar = -1; |
| 1 | 347 | | EndBlock(); |
| 1 | 348 | | EndCompression(); |
| 1 | 349 | | Flush(); |
| | 350 | | } |
| 1 | 351 | | } finally { |
| 1 | 352 | | if (disposing) { |
| 1 | 353 | | if (IsStreamOwner) { |
| 1 | 354 | | baseStream.Close(); |
| | 355 | | } |
| | 356 | | } |
| 1 | 357 | | } |
| 1 | 358 | | } |
| | 359 | |
|
| | 360 | | /// <summary> |
| | 361 | | /// Flush output buffers |
| | 362 | | /// </summary> |
| | 363 | | public override void Flush() |
| | 364 | | { |
| 1 | 365 | | baseStream.Flush(); |
| 1 | 366 | | } |
| | 367 | |
|
| | 368 | | void Initialize() |
| | 369 | | { |
| 1 | 370 | | bytesOut = 0; |
| 1 | 371 | | nBlocksRandomised = 0; |
| | 372 | |
|
| | 373 | | /*--- Write header `magic' bytes indicating file-format == huffmanised, |
| | 374 | | followed by a digit indicating blockSize100k. |
| | 375 | | ---*/ |
| | 376 | |
|
| 1 | 377 | | BsPutUChar('B'); |
| 1 | 378 | | BsPutUChar('Z'); |
| | 379 | |
|
| 1 | 380 | | BsPutUChar('h'); |
| 1 | 381 | | BsPutUChar('0' + blockSize100k); |
| | 382 | |
|
| 1 | 383 | | combinedCRC = 0; |
| 1 | 384 | | } |
| | 385 | |
|
| | 386 | | void InitBlock() |
| | 387 | | { |
| 1 | 388 | | mCrc.Reset(); |
| 1 | 389 | | last = -1; |
| | 390 | |
|
| 514 | 391 | | for (int i = 0; i < 256; i++) { |
| 256 | 392 | | inUse[i] = false; |
| | 393 | | } |
| | 394 | |
|
| | 395 | | /*--- 20 is just a paranoia constant ---*/ |
| 1 | 396 | | allowableBlockSize = BZip2Constants.BaseBlockSize * blockSize100k - 20; |
| 1 | 397 | | } |
| | 398 | |
|
| | 399 | | void EndBlock() |
| | 400 | | { |
| 1 | 401 | | if (last < 0) { // dont do anything for empty files, (makes empty files compatible with original Bzip) |
| 1 | 402 | | return; |
| | 403 | | } |
| | 404 | |
|
| 0 | 405 | | blockCRC = unchecked((uint)mCrc.Value); |
| 0 | 406 | | combinedCRC = (combinedCRC << 1) | (combinedCRC >> 31); |
| 0 | 407 | | combinedCRC ^= blockCRC; |
| | 408 | |
|
| | 409 | | /*-- sort the block and establish position of original string --*/ |
| 0 | 410 | | DoReversibleTransformation(); |
| | 411 | |
|
| | 412 | | /*-- |
| | 413 | | A 6-byte block header, the value chosen arbitrarily |
| | 414 | | as 0x314159265359 :-). A 32 bit value does not really |
| | 415 | | give a strong enough guarantee that the value will not |
| | 416 | | appear by chance in the compressed datastream. Worst-case |
| | 417 | | probability of this event, for a 900k block, is about |
| | 418 | | 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits. |
| | 419 | | For a compressed file of size 100Gb -- about 100000 blocks -- |
| | 420 | | only a 48-bit marker will do. NB: normal compression/ |
| | 421 | | decompression do *not* rely on these statistical properties. |
| | 422 | | They are only important when trying to recover blocks from |
| | 423 | | damaged files. |
| | 424 | | --*/ |
| 0 | 425 | | BsPutUChar(0x31); |
| 0 | 426 | | BsPutUChar(0x41); |
| 0 | 427 | | BsPutUChar(0x59); |
| 0 | 428 | | BsPutUChar(0x26); |
| 0 | 429 | | BsPutUChar(0x53); |
| 0 | 430 | | BsPutUChar(0x59); |
| | 431 | |
|
| | 432 | | /*-- Now the block's CRC, so it is in a known place. --*/ |
| | 433 | | unchecked { |
| 0 | 434 | | BsPutint((int)blockCRC); |
| | 435 | | } |
| | 436 | |
|
| | 437 | | /*-- Now a single bit indicating randomisation. --*/ |
| 0 | 438 | | if (blockRandomised) { |
| 0 | 439 | | BsW(1, 1); |
| 0 | 440 | | nBlocksRandomised++; |
| 0 | 441 | | } else { |
| 0 | 442 | | BsW(1, 0); |
| | 443 | | } |
| | 444 | |
|
| | 445 | | /*-- Finally, block's contents proper. --*/ |
| 0 | 446 | | MoveToFrontCodeAndSend(); |
| 0 | 447 | | } |
| | 448 | |
|
| | 449 | | void EndCompression() |
| | 450 | | { |
| | 451 | | /*-- |
| | 452 | | Now another magic 48-bit number, 0x177245385090, to |
| | 453 | | indicate the end of the last block. (sqrt(pi), if |
| | 454 | | you want to know. I did want to use e, but it contains |
| | 455 | | too much repetition -- 27 18 28 18 28 46 -- for me |
| | 456 | | to feel statistically comfortable. Call me paranoid.) |
| | 457 | | --*/ |
| 1 | 458 | | BsPutUChar(0x17); |
| 1 | 459 | | BsPutUChar(0x72); |
| 1 | 460 | | BsPutUChar(0x45); |
| 1 | 461 | | BsPutUChar(0x38); |
| 1 | 462 | | BsPutUChar(0x50); |
| 1 | 463 | | BsPutUChar(0x90); |
| | 464 | |
|
| | 465 | | unchecked { |
| 1 | 466 | | BsPutint((int)combinedCRC); |
| | 467 | | } |
| | 468 | |
|
| 1 | 469 | | BsFinishedWithStream(); |
| 1 | 470 | | } |
| | 471 | |
|
| | 472 | | void BsSetStream(Stream stream) |
| | 473 | | { |
| 1 | 474 | | baseStream = stream; |
| 1 | 475 | | bsLive = 0; |
| 1 | 476 | | bsBuff = 0; |
| 1 | 477 | | bytesOut = 0; |
| 1 | 478 | | } |
| | 479 | |
|
| | 480 | | void BsFinishedWithStream() |
| | 481 | | { |
| 2 | 482 | | while (bsLive > 0) { |
| 1 | 483 | | int ch = (bsBuff >> 24); |
| 1 | 484 | | baseStream.WriteByte((byte)ch); // write 8-bit |
| 1 | 485 | | bsBuff <<= 8; |
| 1 | 486 | | bsLive -= 8; |
| 1 | 487 | | bytesOut++; |
| | 488 | | } |
| 1 | 489 | | } |
| | 490 | |
|
| | 491 | | void BsW(int n, int v) |
| | 492 | | { |
| 27 | 493 | | while (bsLive >= 8) { |
| 13 | 494 | | int ch = (bsBuff >> 24); |
| 13 | 495 | | unchecked { baseStream.WriteByte((byte)ch); } // write 8-bit |
| 13 | 496 | | bsBuff <<= 8; |
| 13 | 497 | | bsLive -= 8; |
| 13 | 498 | | ++bytesOut; |
| | 499 | | } |
| 14 | 500 | | bsBuff |= (v << (32 - bsLive - n)); |
| 14 | 501 | | bsLive += n; |
| 14 | 502 | | } |
| | 503 | |
|
| | 504 | | void BsPutUChar(int c) |
| | 505 | | { |
| 10 | 506 | | BsW(8, c); |
| 10 | 507 | | } |
| | 508 | |
|
| | 509 | | void BsPutint(int u) |
| | 510 | | { |
| 1 | 511 | | BsW(8, (u >> 24) & 0xFF); |
| 1 | 512 | | BsW(8, (u >> 16) & 0xFF); |
| 1 | 513 | | BsW(8, (u >> 8) & 0xFF); |
| 1 | 514 | | BsW(8, u & 0xFF); |
| 1 | 515 | | } |
| | 516 | |
|
| | 517 | | void BsPutIntVS(int numBits, int c) |
| | 518 | | { |
| 0 | 519 | | BsW(numBits, c); |
| 0 | 520 | | } |
| | 521 | |
|
| | 522 | | void SendMTFValues() |
| | 523 | | { |
| 0 | 524 | | char[][] len = new char[BZip2Constants.GroupCount][]; |
| 0 | 525 | | for (int i = 0; i < BZip2Constants.GroupCount; ++i) { |
| 0 | 526 | | len[i] = new char[BZip2Constants.MaximumAlphaSize]; |
| | 527 | | } |
| | 528 | |
|
| | 529 | | int gs, ge, totc, bt, bc, iter; |
| 0 | 530 | | int nSelectors = 0, alphaSize, minLen, maxLen, selCtr; |
| | 531 | | int nGroups; |
| | 532 | |
|
| 0 | 533 | | alphaSize = nInUse + 2; |
| 0 | 534 | | for (int t = 0; t < BZip2Constants.GroupCount; t++) { |
| 0 | 535 | | for (int v = 0; v < alphaSize; v++) { |
| 0 | 536 | | len[t][v] = (char)GREATER_ICOST; |
| | 537 | | } |
| | 538 | | } |
| | 539 | |
|
| | 540 | | /*--- Decide how many coding tables to use ---*/ |
| 0 | 541 | | if (nMTF <= 0) { |
| 0 | 542 | | Panic(); |
| | 543 | | } |
| | 544 | |
|
| 0 | 545 | | if (nMTF < 200) { |
| 0 | 546 | | nGroups = 2; |
| 0 | 547 | | } else if (nMTF < 600) { |
| 0 | 548 | | nGroups = 3; |
| 0 | 549 | | } else if (nMTF < 1200) { |
| 0 | 550 | | nGroups = 4; |
| 0 | 551 | | } else if (nMTF < 2400) { |
| 0 | 552 | | nGroups = 5; |
| 0 | 553 | | } else { |
| 0 | 554 | | nGroups = 6; |
| | 555 | | } |
| | 556 | |
|
| | 557 | | /*--- Generate an initial set of coding tables ---*/ |
| 0 | 558 | | int nPart = nGroups; |
| 0 | 559 | | int remF = nMTF; |
| 0 | 560 | | gs = 0; |
| 0 | 561 | | while (nPart > 0) { |
| 0 | 562 | | int tFreq = remF / nPart; |
| 0 | 563 | | int aFreq = 0; |
| 0 | 564 | | ge = gs - 1; |
| 0 | 565 | | while (aFreq < tFreq && ge < alphaSize - 1) { |
| 0 | 566 | | ge++; |
| 0 | 567 | | aFreq += mtfFreq[ge]; |
| | 568 | | } |
| | 569 | |
|
| 0 | 570 | | if (ge > gs && nPart != nGroups && nPart != 1 && ((nGroups - nPart) % 2 == 1)) { |
| 0 | 571 | | aFreq -= mtfFreq[ge]; |
| 0 | 572 | | ge--; |
| | 573 | | } |
| | 574 | |
|
| 0 | 575 | | for (int v = 0; v < alphaSize; v++) { |
| 0 | 576 | | if (v >= gs && v <= ge) { |
| 0 | 577 | | len[nPart - 1][v] = (char)LESSER_ICOST; |
| 0 | 578 | | } else { |
| 0 | 579 | | len[nPart - 1][v] = (char)GREATER_ICOST; |
| | 580 | | } |
| | 581 | | } |
| | 582 | |
|
| 0 | 583 | | nPart--; |
| 0 | 584 | | gs = ge + 1; |
| 0 | 585 | | remF -= aFreq; |
| | 586 | | } |
| | 587 | |
|
| 0 | 588 | | int[][] rfreq = new int[BZip2Constants.GroupCount][]; |
| 0 | 589 | | for (int i = 0; i < BZip2Constants.GroupCount; ++i) { |
| 0 | 590 | | rfreq[i] = new int[BZip2Constants.MaximumAlphaSize]; |
| | 591 | | } |
| | 592 | |
|
| 0 | 593 | | int[] fave = new int[BZip2Constants.GroupCount]; |
| 0 | 594 | | short[] cost = new short[BZip2Constants.GroupCount]; |
| | 595 | | /*--- |
| | 596 | | Iterate up to N_ITERS times to improve the tables. |
| | 597 | | ---*/ |
| 0 | 598 | | for (iter = 0; iter < BZip2Constants.NumberOfIterations; ++iter) { |
| 0 | 599 | | for (int t = 0; t < nGroups; ++t) { |
| 0 | 600 | | fave[t] = 0; |
| | 601 | | } |
| | 602 | |
|
| 0 | 603 | | for (int t = 0; t < nGroups; ++t) { |
| 0 | 604 | | for (int v = 0; v < alphaSize; ++v) { |
| 0 | 605 | | rfreq[t][v] = 0; |
| | 606 | | } |
| | 607 | | } |
| | 608 | |
|
| 0 | 609 | | nSelectors = 0; |
| 0 | 610 | | totc = 0; |
| 0 | 611 | | gs = 0; |
| 0 | 612 | | while (true) { |
| | 613 | | /*--- Set group start & end marks. --*/ |
| 0 | 614 | | if (gs >= nMTF) { |
| | 615 | | break; |
| | 616 | | } |
| 0 | 617 | | ge = gs + BZip2Constants.GroupSize - 1; |
| 0 | 618 | | if (ge >= nMTF) { |
| 0 | 619 | | ge = nMTF - 1; |
| | 620 | | } |
| | 621 | |
|
| | 622 | | /*-- |
| | 623 | | Calculate the cost of this group as coded |
| | 624 | | by each of the coding tables. |
| | 625 | | --*/ |
| 0 | 626 | | for (int t = 0; t < nGroups; t++) { |
| 0 | 627 | | cost[t] = 0; |
| | 628 | | } |
| | 629 | |
|
| 0 | 630 | | if (nGroups == 6) { |
| | 631 | | short cost0, cost1, cost2, cost3, cost4, cost5; |
| 0 | 632 | | cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0; |
| 0 | 633 | | for (int i = gs; i <= ge; ++i) { |
| 0 | 634 | | short icv = szptr[i]; |
| 0 | 635 | | cost0 += (short)len[0][icv]; |
| 0 | 636 | | cost1 += (short)len[1][icv]; |
| 0 | 637 | | cost2 += (short)len[2][icv]; |
| 0 | 638 | | cost3 += (short)len[3][icv]; |
| 0 | 639 | | cost4 += (short)len[4][icv]; |
| 0 | 640 | | cost5 += (short)len[5][icv]; |
| | 641 | | } |
| 0 | 642 | | cost[0] = cost0; |
| 0 | 643 | | cost[1] = cost1; |
| 0 | 644 | | cost[2] = cost2; |
| 0 | 645 | | cost[3] = cost3; |
| 0 | 646 | | cost[4] = cost4; |
| 0 | 647 | | cost[5] = cost5; |
| 0 | 648 | | } else { |
| 0 | 649 | | for (int i = gs; i <= ge; ++i) { |
| 0 | 650 | | short icv = szptr[i]; |
| 0 | 651 | | for (int t = 0; t < nGroups; t++) { |
| 0 | 652 | | cost[t] += (short)len[t][icv]; |
| | 653 | | } |
| | 654 | | } |
| | 655 | | } |
| | 656 | |
|
| | 657 | | /*-- |
| | 658 | | Find the coding table which is best for this group, |
| | 659 | | and record its identity in the selector table. |
| | 660 | | --*/ |
| 0 | 661 | | bc = 999999999; |
| 0 | 662 | | bt = -1; |
| 0 | 663 | | for (int t = 0; t < nGroups; ++t) { |
| 0 | 664 | | if (cost[t] < bc) { |
| 0 | 665 | | bc = cost[t]; |
| 0 | 666 | | bt = t; |
| | 667 | | } |
| | 668 | | } |
| 0 | 669 | | totc += bc; |
| 0 | 670 | | fave[bt]++; |
| 0 | 671 | | selector[nSelectors] = (char)bt; |
| 0 | 672 | | nSelectors++; |
| | 673 | |
|
| | 674 | | /*-- |
| | 675 | | Increment the symbol frequencies for the selected table. |
| | 676 | | --*/ |
| 0 | 677 | | for (int i = gs; i <= ge; ++i) { |
| 0 | 678 | | ++rfreq[bt][szptr[i]]; |
| | 679 | | } |
| | 680 | |
|
| 0 | 681 | | gs = ge + 1; |
| | 682 | | } |
| | 683 | |
|
| | 684 | | /*-- |
| | 685 | | Recompute the tables based on the accumulated frequencies. |
| | 686 | | --*/ |
| 0 | 687 | | for (int t = 0; t < nGroups; ++t) { |
| 0 | 688 | | HbMakeCodeLengths(len[t], rfreq[t], alphaSize, 20); |
| | 689 | | } |
| | 690 | | } |
| | 691 | |
|
| 0 | 692 | | rfreq = null; |
| 0 | 693 | | fave = null; |
| 0 | 694 | | cost = null; |
| | 695 | |
|
| 0 | 696 | | if (!(nGroups < 8)) { |
| 0 | 697 | | Panic(); |
| | 698 | | } |
| | 699 | |
|
| 0 | 700 | | if (!(nSelectors < 32768 && nSelectors <= (2 + (900000 / BZip2Constants.GroupSize)))) { |
| 0 | 701 | | Panic(); |
| | 702 | | } |
| | 703 | |
|
| | 704 | | /*--- Compute MTF values for the selectors. ---*/ |
| 0 | 705 | | char[] pos = new char[BZip2Constants.GroupCount]; |
| | 706 | | char ll_i, tmp2, tmp; |
| | 707 | |
|
| 0 | 708 | | for (int i = 0; i < nGroups; i++) { |
| 0 | 709 | | pos[i] = (char)i; |
| | 710 | | } |
| | 711 | |
|
| 0 | 712 | | for (int i = 0; i < nSelectors; i++) { |
| 0 | 713 | | ll_i = selector[i]; |
| 0 | 714 | | int j = 0; |
| 0 | 715 | | tmp = pos[j]; |
| 0 | 716 | | while (ll_i != tmp) { |
| 0 | 717 | | j++; |
| 0 | 718 | | tmp2 = tmp; |
| 0 | 719 | | tmp = pos[j]; |
| 0 | 720 | | pos[j] = tmp2; |
| | 721 | | } |
| 0 | 722 | | pos[0] = tmp; |
| 0 | 723 | | selectorMtf[i] = (char)j; |
| | 724 | | } |
| | 725 | |
|
| 0 | 726 | | int[][] code = new int[BZip2Constants.GroupCount][]; |
| | 727 | |
|
| 0 | 728 | | for (int i = 0; i < BZip2Constants.GroupCount; ++i) { |
| 0 | 729 | | code[i] = new int[BZip2Constants.MaximumAlphaSize]; |
| | 730 | | } |
| | 731 | |
|
| | 732 | | /*--- Assign actual codes for the tables. --*/ |
| 0 | 733 | | for (int t = 0; t < nGroups; t++) { |
| 0 | 734 | | minLen = 32; |
| 0 | 735 | | maxLen = 0; |
| 0 | 736 | | for (int i = 0; i < alphaSize; i++) { |
| 0 | 737 | | if (len[t][i] > maxLen) { |
| 0 | 738 | | maxLen = len[t][i]; |
| | 739 | | } |
| 0 | 740 | | if (len[t][i] < minLen) { |
| 0 | 741 | | minLen = len[t][i]; |
| | 742 | | } |
| | 743 | | } |
| 0 | 744 | | if (maxLen > 20) { |
| 0 | 745 | | Panic(); |
| | 746 | | } |
| 0 | 747 | | if (minLen < 1) { |
| 0 | 748 | | Panic(); |
| | 749 | | } |
| 0 | 750 | | HbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); |
| | 751 | | } |
| | 752 | |
|
| | 753 | | /*--- Transmit the mapping table. ---*/ |
| 0 | 754 | | bool[] inUse16 = new bool[16]; |
| 0 | 755 | | for (int i = 0; i < 16; ++i) { |
| 0 | 756 | | inUse16[i] = false; |
| 0 | 757 | | for (int j = 0; j < 16; ++j) { |
| 0 | 758 | | if (inUse[i * 16 + j]) { |
| 0 | 759 | | inUse16[i] = true; |
| | 760 | | } |
| | 761 | | } |
| | 762 | | } |
| | 763 | |
|
| 0 | 764 | | for (int i = 0; i < 16; ++i) { |
| 0 | 765 | | if (inUse16[i]) { |
| 0 | 766 | | BsW(1, 1); |
| 0 | 767 | | } else { |
| 0 | 768 | | BsW(1, 0); |
| | 769 | | } |
| | 770 | | } |
| | 771 | |
|
| 0 | 772 | | for (int i = 0; i < 16; ++i) { |
| 0 | 773 | | if (inUse16[i]) { |
| 0 | 774 | | for (int j = 0; j < 16; ++j) { |
| 0 | 775 | | if (inUse[i * 16 + j]) { |
| 0 | 776 | | BsW(1, 1); |
| 0 | 777 | | } else { |
| 0 | 778 | | BsW(1, 0); |
| | 779 | | } |
| | 780 | | } |
| | 781 | | } |
| | 782 | | } |
| | 783 | |
|
| | 784 | | /*--- Now the selectors. ---*/ |
| 0 | 785 | | BsW(3, nGroups); |
| 0 | 786 | | BsW(15, nSelectors); |
| 0 | 787 | | for (int i = 0; i < nSelectors; ++i) { |
| 0 | 788 | | for (int j = 0; j < selectorMtf[i]; ++j) { |
| 0 | 789 | | BsW(1, 1); |
| | 790 | | } |
| 0 | 791 | | BsW(1, 0); |
| | 792 | | } |
| | 793 | |
|
| | 794 | | /*--- Now the coding tables. ---*/ |
| 0 | 795 | | for (int t = 0; t < nGroups; ++t) { |
| 0 | 796 | | int curr = len[t][0]; |
| 0 | 797 | | BsW(5, curr); |
| 0 | 798 | | for (int i = 0; i < alphaSize; ++i) { |
| 0 | 799 | | while (curr < len[t][i]) { |
| 0 | 800 | | BsW(2, 2); |
| 0 | 801 | | curr++; /* 10 */ |
| | 802 | | } |
| 0 | 803 | | while (curr > len[t][i]) { |
| 0 | 804 | | BsW(2, 3); |
| 0 | 805 | | curr--; /* 11 */ |
| | 806 | | } |
| 0 | 807 | | BsW(1, 0); |
| | 808 | | } |
| | 809 | | } |
| | 810 | |
|
| | 811 | | /*--- And finally, the block data proper ---*/ |
| 0 | 812 | | selCtr = 0; |
| 0 | 813 | | gs = 0; |
| 0 | 814 | | while (true) { |
| 0 | 815 | | if (gs >= nMTF) { |
| | 816 | | break; |
| | 817 | | } |
| 0 | 818 | | ge = gs + BZip2Constants.GroupSize - 1; |
| 0 | 819 | | if (ge >= nMTF) { |
| 0 | 820 | | ge = nMTF - 1; |
| | 821 | | } |
| | 822 | |
|
| 0 | 823 | | for (int i = gs; i <= ge; i++) { |
| 0 | 824 | | BsW(len[selector[selCtr]][szptr[i]], code[selector[selCtr]][szptr[i]]); |
| | 825 | | } |
| | 826 | |
|
| 0 | 827 | | gs = ge + 1; |
| 0 | 828 | | ++selCtr; |
| | 829 | | } |
| 0 | 830 | | if (!(selCtr == nSelectors)) { |
| 0 | 831 | | Panic(); |
| | 832 | | } |
| 0 | 833 | | } |
| | 834 | |
|
| | 835 | | void MoveToFrontCodeAndSend() |
| | 836 | | { |
| 0 | 837 | | BsPutIntVS(24, origPtr); |
| 0 | 838 | | GenerateMTFValues(); |
| 0 | 839 | | SendMTFValues(); |
| 0 | 840 | | } |
| | 841 | |
|
| | 842 | | void SimpleSort(int lo, int hi, int d) |
| | 843 | | { |
| | 844 | | int i, j, h, bigN, hp; |
| | 845 | | int v; |
| | 846 | |
|
| 0 | 847 | | bigN = hi - lo + 1; |
| 0 | 848 | | if (bigN < 2) { |
| 0 | 849 | | return; |
| | 850 | | } |
| | 851 | |
|
| 0 | 852 | | hp = 0; |
| 0 | 853 | | while (increments[hp] < bigN) { |
| 0 | 854 | | hp++; |
| | 855 | | } |
| 0 | 856 | | hp--; |
| | 857 | |
|
| 0 | 858 | | for (; hp >= 0; hp--) { |
| 0 | 859 | | h = increments[hp]; |
| | 860 | |
|
| 0 | 861 | | i = lo + h; |
| | 862 | | while (true) { |
| | 863 | | /*-- copy 1 --*/ |
| 0 | 864 | | if (i > hi) |
| | 865 | | break; |
| 0 | 866 | | v = zptr[i]; |
| 0 | 867 | | j = i; |
| 0 | 868 | | while (FullGtU(zptr[j - h] + d, v + d)) { |
| 0 | 869 | | zptr[j] = zptr[j - h]; |
| 0 | 870 | | j = j - h; |
| 0 | 871 | | if (j <= (lo + h - 1)) |
| | 872 | | break; |
| | 873 | | } |
| 0 | 874 | | zptr[j] = v; |
| 0 | 875 | | i++; |
| | 876 | |
|
| | 877 | | /*-- copy 2 --*/ |
| 0 | 878 | | if (i > hi) { |
| | 879 | | break; |
| | 880 | | } |
| 0 | 881 | | v = zptr[i]; |
| 0 | 882 | | j = i; |
| 0 | 883 | | while (FullGtU(zptr[j - h] + d, v + d)) { |
| 0 | 884 | | zptr[j] = zptr[j - h]; |
| 0 | 885 | | j = j - h; |
| 0 | 886 | | if (j <= (lo + h - 1)) { |
| | 887 | | break; |
| | 888 | | } |
| | 889 | | } |
| 0 | 890 | | zptr[j] = v; |
| 0 | 891 | | i++; |
| | 892 | |
|
| | 893 | | /*-- copy 3 --*/ |
| 0 | 894 | | if (i > hi) { |
| | 895 | | break; |
| | 896 | | } |
| 0 | 897 | | v = zptr[i]; |
| 0 | 898 | | j = i; |
| 0 | 899 | | while (FullGtU(zptr[j - h] + d, v + d)) { |
| 0 | 900 | | zptr[j] = zptr[j - h]; |
| 0 | 901 | | j = j - h; |
| 0 | 902 | | if (j <= (lo + h - 1)) { |
| | 903 | | break; |
| | 904 | | } |
| | 905 | | } |
| 0 | 906 | | zptr[j] = v; |
| 0 | 907 | | i++; |
| | 908 | |
|
| 0 | 909 | | if (workDone > workLimit && firstAttempt) { |
| 0 | 910 | | return; |
| | 911 | | } |
| | 912 | | } |
| | 913 | | } |
| 0 | 914 | | } |
| | 915 | |
|
| | 916 | | void Vswap(int p1, int p2, int n) |
| | 917 | | { |
| 0 | 918 | | int temp = 0; |
| 0 | 919 | | while (n > 0) { |
| 0 | 920 | | temp = zptr[p1]; |
| 0 | 921 | | zptr[p1] = zptr[p2]; |
| 0 | 922 | | zptr[p2] = temp; |
| 0 | 923 | | p1++; |
| 0 | 924 | | p2++; |
| 0 | 925 | | n--; |
| | 926 | | } |
| 0 | 927 | | } |
| | 928 | |
|
| | 929 | | void QSort3(int loSt, int hiSt, int dSt) |
| | 930 | | { |
| | 931 | | int unLo, unHi, ltLo, gtHi, med, n, m; |
| | 932 | | int lo, hi, d; |
| | 933 | |
|
| 0 | 934 | | StackElement[] stack = new StackElement[QSORT_STACK_SIZE]; |
| | 935 | |
|
| 0 | 936 | | int sp = 0; |
| | 937 | |
|
| 0 | 938 | | stack[sp].ll = loSt; |
| 0 | 939 | | stack[sp].hh = hiSt; |
| 0 | 940 | | stack[sp].dd = dSt; |
| 0 | 941 | | sp++; |
| | 942 | |
|
| 0 | 943 | | while (sp > 0) { |
| 0 | 944 | | if (sp >= QSORT_STACK_SIZE) { |
| 0 | 945 | | Panic(); |
| | 946 | | } |
| | 947 | |
|
| 0 | 948 | | sp--; |
| 0 | 949 | | lo = stack[sp].ll; |
| 0 | 950 | | hi = stack[sp].hh; |
| 0 | 951 | | d = stack[sp].dd; |
| | 952 | |
|
| 0 | 953 | | if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) { |
| 0 | 954 | | SimpleSort(lo, hi, d); |
| 0 | 955 | | if (workDone > workLimit && firstAttempt) { |
| 0 | 956 | | return; |
| | 957 | | } |
| | 958 | | continue; |
| | 959 | | } |
| | 960 | |
|
| 0 | 961 | | med = Med3(block[zptr[lo] + d + 1], |
| 0 | 962 | | block[zptr[hi] + d + 1], |
| 0 | 963 | | block[zptr[(lo + hi) >> 1] + d + 1]); |
| | 964 | |
|
| 0 | 965 | | unLo = ltLo = lo; |
| 0 | 966 | | unHi = gtHi = hi; |
| | 967 | |
|
| 0 | 968 | | while (true) { |
| 0 | 969 | | while (true) { |
| 0 | 970 | | if (unLo > unHi) { |
| | 971 | | break; |
| | 972 | | } |
| 0 | 973 | | n = ((int)block[zptr[unLo] + d + 1]) - med; |
| 0 | 974 | | if (n == 0) { |
| 0 | 975 | | int temp = zptr[unLo]; |
| 0 | 976 | | zptr[unLo] = zptr[ltLo]; |
| 0 | 977 | | zptr[ltLo] = temp; |
| 0 | 978 | | ltLo++; |
| 0 | 979 | | unLo++; |
| 0 | 980 | | continue; |
| | 981 | | } |
| 0 | 982 | | if (n > 0) { |
| | 983 | | break; |
| | 984 | | } |
| 0 | 985 | | unLo++; |
| | 986 | | } |
| | 987 | |
|
| 0 | 988 | | while (true) { |
| 0 | 989 | | if (unLo > unHi) { |
| | 990 | | break; |
| | 991 | | } |
| 0 | 992 | | n = ((int)block[zptr[unHi] + d + 1]) - med; |
| 0 | 993 | | if (n == 0) { |
| 0 | 994 | | int temp = zptr[unHi]; |
| 0 | 995 | | zptr[unHi] = zptr[gtHi]; |
| 0 | 996 | | zptr[gtHi] = temp; |
| 0 | 997 | | gtHi--; |
| 0 | 998 | | unHi--; |
| 0 | 999 | | continue; |
| | 1000 | | } |
| 0 | 1001 | | if (n < 0) { |
| | 1002 | | break; |
| | 1003 | | } |
| 0 | 1004 | | unHi--; |
| | 1005 | | } |
| | 1006 | |
|
| 0 | 1007 | | if (unLo > unHi) { |
| | 1008 | | break; |
| | 1009 | | } |
| | 1010 | |
|
| | 1011 | | { |
| 0 | 1012 | | int temp = zptr[unLo]; |
| 0 | 1013 | | zptr[unLo] = zptr[unHi]; |
| 0 | 1014 | | zptr[unHi] = temp; |
| 0 | 1015 | | unLo++; |
| 0 | 1016 | | unHi--; |
| | 1017 | | } |
| | 1018 | | } |
| | 1019 | |
|
| 0 | 1020 | | if (gtHi < ltLo) { |
| 0 | 1021 | | stack[sp].ll = lo; |
| 0 | 1022 | | stack[sp].hh = hi; |
| 0 | 1023 | | stack[sp].dd = d + 1; |
| 0 | 1024 | | sp++; |
| 0 | 1025 | | continue; |
| | 1026 | | } |
| | 1027 | |
|
| 0 | 1028 | | n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo) : (unLo - ltLo); |
| 0 | 1029 | | Vswap(lo, unLo - n, n); |
| 0 | 1030 | | m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi) : (gtHi - unHi); |
| 0 | 1031 | | Vswap(unLo, hi - m + 1, m); |
| | 1032 | |
|
| 0 | 1033 | | n = lo + unLo - ltLo - 1; |
| 0 | 1034 | | m = hi - (gtHi - unHi) + 1; |
| | 1035 | |
|
| 0 | 1036 | | stack[sp].ll = lo; |
| 0 | 1037 | | stack[sp].hh = n; |
| 0 | 1038 | | stack[sp].dd = d; |
| 0 | 1039 | | sp++; |
| | 1040 | |
|
| 0 | 1041 | | stack[sp].ll = n + 1; |
| 0 | 1042 | | stack[sp].hh = m - 1; |
| 0 | 1043 | | stack[sp].dd = d + 1; |
| 0 | 1044 | | sp++; |
| | 1045 | |
|
| 0 | 1046 | | stack[sp].ll = m; |
| 0 | 1047 | | stack[sp].hh = hi; |
| 0 | 1048 | | stack[sp].dd = d; |
| 0 | 1049 | | sp++; |
| | 1050 | | } |
| 0 | 1051 | | } |
| | 1052 | |
|
| | 1053 | | void MainSort() |
| | 1054 | | { |
| | 1055 | | int i, j, ss, sb; |
| 0 | 1056 | | int[] runningOrder = new int[256]; |
| 0 | 1057 | | int[] copy = new int[256]; |
| 0 | 1058 | | bool[] bigDone = new bool[256]; |
| | 1059 | | int c1, c2; |
| | 1060 | | int numQSorted; |
| | 1061 | |
|
| | 1062 | | /*-- |
| | 1063 | | In the various block-sized structures, live data runs |
| | 1064 | | from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First, |
| | 1065 | | set up the overshoot area for block. |
| | 1066 | | --*/ |
| | 1067 | |
|
| | 1068 | | // if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" ); |
| 0 | 1069 | | for (i = 0; i < BZip2Constants.OvershootBytes; i++) { |
| 0 | 1070 | | block[last + i + 2] = block[(i % (last + 1)) + 1]; |
| | 1071 | | } |
| 0 | 1072 | | for (i = 0; i <= last + BZip2Constants.OvershootBytes; i++) { |
| 0 | 1073 | | quadrant[i] = 0; |
| | 1074 | | } |
| | 1075 | |
|
| 0 | 1076 | | block[0] = (byte)(block[last + 1]); |
| | 1077 | |
|
| 0 | 1078 | | if (last < 4000) { |
| | 1079 | | /*-- |
| | 1080 | | Use simpleSort(), since the full sorting mechanism |
| | 1081 | | has quite a large constant overhead. |
| | 1082 | | --*/ |
| 0 | 1083 | | for (i = 0; i <= last; i++) { |
| 0 | 1084 | | zptr[i] = i; |
| | 1085 | | } |
| 0 | 1086 | | firstAttempt = false; |
| 0 | 1087 | | workDone = workLimit = 0; |
| 0 | 1088 | | SimpleSort(0, last, 0); |
| 0 | 1089 | | } else { |
| 0 | 1090 | | numQSorted = 0; |
| 0 | 1091 | | for (i = 0; i <= 255; i++) { |
| 0 | 1092 | | bigDone[i] = false; |
| | 1093 | | } |
| 0 | 1094 | | for (i = 0; i <= 65536; i++) { |
| 0 | 1095 | | ftab[i] = 0; |
| | 1096 | | } |
| | 1097 | |
|
| 0 | 1098 | | c1 = block[0]; |
| 0 | 1099 | | for (i = 0; i <= last; i++) { |
| 0 | 1100 | | c2 = block[i + 1]; |
| 0 | 1101 | | ftab[(c1 << 8) + c2]++; |
| 0 | 1102 | | c1 = c2; |
| | 1103 | | } |
| | 1104 | |
|
| 0 | 1105 | | for (i = 1; i <= 65536; i++) { |
| 0 | 1106 | | ftab[i] += ftab[i - 1]; |
| | 1107 | | } |
| | 1108 | |
|
| 0 | 1109 | | c1 = block[1]; |
| 0 | 1110 | | for (i = 0; i < last; i++) { |
| 0 | 1111 | | c2 = block[i + 2]; |
| 0 | 1112 | | j = (c1 << 8) + c2; |
| 0 | 1113 | | c1 = c2; |
| 0 | 1114 | | ftab[j]--; |
| 0 | 1115 | | zptr[ftab[j]] = i; |
| | 1116 | | } |
| | 1117 | |
|
| 0 | 1118 | | j = ((block[last + 1]) << 8) + (block[1]); |
| 0 | 1119 | | ftab[j]--; |
| 0 | 1120 | | zptr[ftab[j]] = last; |
| | 1121 | |
|
| | 1122 | | /*-- |
| | 1123 | | Now ftab contains the first loc of every small bucket. |
| | 1124 | | Calculate the running order, from smallest to largest |
| | 1125 | | big bucket. |
| | 1126 | | --*/ |
| | 1127 | |
|
| 0 | 1128 | | for (i = 0; i <= 255; i++) { |
| 0 | 1129 | | runningOrder[i] = i; |
| | 1130 | | } |
| | 1131 | |
|
| | 1132 | | int vv; |
| 0 | 1133 | | int h = 1; |
| | 1134 | | do { |
| 0 | 1135 | | h = 3 * h + 1; |
| 0 | 1136 | | } while (h <= 256); |
| | 1137 | | do { |
| 0 | 1138 | | h = h / 3; |
| 0 | 1139 | | for (i = h; i <= 255; i++) { |
| 0 | 1140 | | vv = runningOrder[i]; |
| 0 | 1141 | | j = i; |
| 0 | 1142 | | while ((ftab[((runningOrder[j - h]) + 1) << 8] - ftab[(runningOrder[j - h]) << 8]) > (ftab[((vv) + 1) << 8] |
| 0 | 1143 | | runningOrder[j] = runningOrder[j - h]; |
| 0 | 1144 | | j = j - h; |
| 0 | 1145 | | if (j <= (h - 1)) { |
| | 1146 | | break; |
| | 1147 | | } |
| | 1148 | | } |
| 0 | 1149 | | runningOrder[j] = vv; |
| | 1150 | | } |
| 0 | 1151 | | } while (h != 1); |
| | 1152 | |
|
| | 1153 | | /*-- |
| | 1154 | | The main sorting loop. |
| | 1155 | | --*/ |
| 0 | 1156 | | for (i = 0; i <= 255; i++) { |
| | 1157 | |
|
| | 1158 | | /*-- |
| | 1159 | | Process big buckets, starting with the least full. |
| | 1160 | | --*/ |
| 0 | 1161 | | ss = runningOrder[i]; |
| | 1162 | |
|
| | 1163 | | /*-- |
| | 1164 | | Complete the big bucket [ss] by quicksorting |
| | 1165 | | any unsorted small buckets [ss, j]. Hopefully |
| | 1166 | | previous pointer-scanning phases have already |
| | 1167 | | completed many of the small buckets [ss, j], so |
| | 1168 | | we don't have to sort them at all. |
| | 1169 | | --*/ |
| 0 | 1170 | | for (j = 0; j <= 255; j++) { |
| 0 | 1171 | | sb = (ss << 8) + j; |
| 0 | 1172 | | if (!((ftab[sb] & SETMASK) == SETMASK)) { |
| 0 | 1173 | | int lo = ftab[sb] & CLEARMASK; |
| 0 | 1174 | | int hi = (ftab[sb + 1] & CLEARMASK) - 1; |
| 0 | 1175 | | if (hi > lo) { |
| 0 | 1176 | | QSort3(lo, hi, 2); |
| 0 | 1177 | | numQSorted += (hi - lo + 1); |
| 0 | 1178 | | if (workDone > workLimit && firstAttempt) { |
| 0 | 1179 | | return; |
| | 1180 | | } |
| | 1181 | | } |
| 0 | 1182 | | ftab[sb] |= SETMASK; |
| | 1183 | | } |
| | 1184 | | } |
| | 1185 | |
|
| | 1186 | | /*-- |
| | 1187 | | The ss big bucket is now done. Record this fact, |
| | 1188 | | and update the quadrant descriptors. Remember to |
| | 1189 | | update quadrants in the overshoot area too, if |
| | 1190 | | necessary. The "if (i < 255)" test merely skips |
| | 1191 | | this updating for the last bucket processed, since |
| | 1192 | | updating for the last bucket is pointless. |
| | 1193 | | --*/ |
| 0 | 1194 | | bigDone[ss] = true; |
| | 1195 | |
|
| 0 | 1196 | | if (i < 255) { |
| 0 | 1197 | | int bbStart = ftab[ss << 8] & CLEARMASK; |
| 0 | 1198 | | int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart; |
| 0 | 1199 | | int shifts = 0; |
| | 1200 | |
|
| 0 | 1201 | | while ((bbSize >> shifts) > 65534) { |
| 0 | 1202 | | shifts++; |
| | 1203 | | } |
| | 1204 | |
|
| 0 | 1205 | | for (j = 0; j < bbSize; j++) { |
| 0 | 1206 | | int a2update = zptr[bbStart + j]; |
| 0 | 1207 | | int qVal = (j >> shifts); |
| 0 | 1208 | | quadrant[a2update] = qVal; |
| 0 | 1209 | | if (a2update < BZip2Constants.OvershootBytes) { |
| 0 | 1210 | | quadrant[a2update + last + 1] = qVal; |
| | 1211 | | } |
| | 1212 | | } |
| | 1213 | |
|
| 0 | 1214 | | if (!(((bbSize - 1) >> shifts) <= 65535)) { |
| 0 | 1215 | | Panic(); |
| | 1216 | | } |
| | 1217 | | } |
| | 1218 | |
|
| | 1219 | | /*-- |
| | 1220 | | Now scan this big bucket so as to synthesise the |
| | 1221 | | sorted order for small buckets [t, ss] for all t != ss. |
| | 1222 | | --*/ |
| 0 | 1223 | | for (j = 0; j <= 255; j++) { |
| 0 | 1224 | | copy[j] = ftab[(j << 8) + ss] & CLEARMASK; |
| | 1225 | | } |
| | 1226 | |
|
| 0 | 1227 | | for (j = ftab[ss << 8] & CLEARMASK; j < (ftab[(ss + 1) << 8] & CLEARMASK); j++) { |
| 0 | 1228 | | c1 = block[zptr[j]]; |
| 0 | 1229 | | if (!bigDone[c1]) { |
| 0 | 1230 | | zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1; |
| 0 | 1231 | | copy[c1]++; |
| | 1232 | | } |
| | 1233 | | } |
| | 1234 | |
|
| 0 | 1235 | | for (j = 0; j <= 255; j++) { |
| 0 | 1236 | | ftab[(j << 8) + ss] |= SETMASK; |
| | 1237 | | } |
| | 1238 | | } |
| | 1239 | | } |
| 0 | 1240 | | } |
| | 1241 | |
|
| | 1242 | | void RandomiseBlock() |
| | 1243 | | { |
| | 1244 | | int i; |
| 0 | 1245 | | int rNToGo = 0; |
| 0 | 1246 | | int rTPos = 0; |
| 0 | 1247 | | for (i = 0; i < 256; i++) { |
| 0 | 1248 | | inUse[i] = false; |
| | 1249 | | } |
| | 1250 | |
|
| 0 | 1251 | | for (i = 0; i <= last; i++) { |
| 0 | 1252 | | if (rNToGo == 0) { |
| 0 | 1253 | | rNToGo = (int)BZip2Constants.RandomNumbers[rTPos]; |
| 0 | 1254 | | rTPos++; |
| 0 | 1255 | | if (rTPos == 512) { |
| 0 | 1256 | | rTPos = 0; |
| | 1257 | | } |
| | 1258 | | } |
| 0 | 1259 | | rNToGo--; |
| 0 | 1260 | | block[i + 1] ^= (byte)((rNToGo == 1) ? 1 : 0); |
| | 1261 | | // handle 16 bit signed numbers |
| 0 | 1262 | | block[i + 1] &= 0xFF; |
| | 1263 | |
|
| 0 | 1264 | | inUse[block[i + 1]] = true; |
| | 1265 | | } |
| 0 | 1266 | | } |
| | 1267 | |
|
| | 1268 | | void DoReversibleTransformation() |
| | 1269 | | { |
| 0 | 1270 | | workLimit = workFactor * last; |
| 0 | 1271 | | workDone = 0; |
| 0 | 1272 | | blockRandomised = false; |
| 0 | 1273 | | firstAttempt = true; |
| | 1274 | |
|
| 0 | 1275 | | MainSort(); |
| | 1276 | |
|
| 0 | 1277 | | if (workDone > workLimit && firstAttempt) { |
| 0 | 1278 | | RandomiseBlock(); |
| 0 | 1279 | | workLimit = workDone = 0; |
| 0 | 1280 | | blockRandomised = true; |
| 0 | 1281 | | firstAttempt = false; |
| 0 | 1282 | | MainSort(); |
| | 1283 | | } |
| | 1284 | |
|
| 0 | 1285 | | origPtr = -1; |
| 0 | 1286 | | for (int i = 0; i <= last; i++) { |
| 0 | 1287 | | if (zptr[i] == 0) { |
| 0 | 1288 | | origPtr = i; |
| 0 | 1289 | | break; |
| | 1290 | | } |
| | 1291 | | } |
| | 1292 | |
|
| 0 | 1293 | | if (origPtr == -1) { |
| 0 | 1294 | | Panic(); |
| | 1295 | | } |
| 0 | 1296 | | } |
| | 1297 | |
|
| | 1298 | | bool FullGtU(int i1, int i2) |
| | 1299 | | { |
| | 1300 | | int k; |
| | 1301 | | byte c1, c2; |
| | 1302 | | int s1, s2; |
| | 1303 | |
|
| 0 | 1304 | | c1 = block[i1 + 1]; |
| 0 | 1305 | | c2 = block[i2 + 1]; |
| 0 | 1306 | | if (c1 != c2) { |
| 0 | 1307 | | return c1 > c2; |
| | 1308 | | } |
| 0 | 1309 | | i1++; |
| 0 | 1310 | | i2++; |
| | 1311 | |
|
| 0 | 1312 | | c1 = block[i1 + 1]; |
| 0 | 1313 | | c2 = block[i2 + 1]; |
| 0 | 1314 | | if (c1 != c2) { |
| 0 | 1315 | | return c1 > c2; |
| | 1316 | | } |
| 0 | 1317 | | i1++; |
| 0 | 1318 | | i2++; |
| | 1319 | |
|
| 0 | 1320 | | c1 = block[i1 + 1]; |
| 0 | 1321 | | c2 = block[i2 + 1]; |
| 0 | 1322 | | if (c1 != c2) { |
| 0 | 1323 | | return c1 > c2; |
| | 1324 | | } |
| 0 | 1325 | | i1++; |
| 0 | 1326 | | i2++; |
| | 1327 | |
|
| 0 | 1328 | | c1 = block[i1 + 1]; |
| 0 | 1329 | | c2 = block[i2 + 1]; |
| 0 | 1330 | | if (c1 != c2) { |
| 0 | 1331 | | return c1 > c2; |
| | 1332 | | } |
| 0 | 1333 | | i1++; |
| 0 | 1334 | | i2++; |
| | 1335 | |
|
| 0 | 1336 | | c1 = block[i1 + 1]; |
| 0 | 1337 | | c2 = block[i2 + 1]; |
| 0 | 1338 | | if (c1 != c2) { |
| 0 | 1339 | | return c1 > c2; |
| | 1340 | | } |
| 0 | 1341 | | i1++; |
| 0 | 1342 | | i2++; |
| | 1343 | |
|
| 0 | 1344 | | c1 = block[i1 + 1]; |
| 0 | 1345 | | c2 = block[i2 + 1]; |
| 0 | 1346 | | if (c1 != c2) { |
| 0 | 1347 | | return c1 > c2; |
| | 1348 | | } |
| 0 | 1349 | | i1++; |
| 0 | 1350 | | i2++; |
| | 1351 | |
|
| 0 | 1352 | | k = last + 1; |
| | 1353 | |
|
| | 1354 | | do { |
| 0 | 1355 | | c1 = block[i1 + 1]; |
| 0 | 1356 | | c2 = block[i2 + 1]; |
| 0 | 1357 | | if (c1 != c2) { |
| 0 | 1358 | | return c1 > c2; |
| | 1359 | | } |
| 0 | 1360 | | s1 = quadrant[i1]; |
| 0 | 1361 | | s2 = quadrant[i2]; |
| 0 | 1362 | | if (s1 != s2) { |
| 0 | 1363 | | return s1 > s2; |
| | 1364 | | } |
| 0 | 1365 | | i1++; |
| 0 | 1366 | | i2++; |
| | 1367 | |
|
| 0 | 1368 | | c1 = block[i1 + 1]; |
| 0 | 1369 | | c2 = block[i2 + 1]; |
| 0 | 1370 | | if (c1 != c2) { |
| 0 | 1371 | | return c1 > c2; |
| | 1372 | | } |
| 0 | 1373 | | s1 = quadrant[i1]; |
| 0 | 1374 | | s2 = quadrant[i2]; |
| 0 | 1375 | | if (s1 != s2) { |
| 0 | 1376 | | return s1 > s2; |
| | 1377 | | } |
| 0 | 1378 | | i1++; |
| 0 | 1379 | | i2++; |
| | 1380 | |
|
| 0 | 1381 | | c1 = block[i1 + 1]; |
| 0 | 1382 | | c2 = block[i2 + 1]; |
| 0 | 1383 | | if (c1 != c2) { |
| 0 | 1384 | | return c1 > c2; |
| | 1385 | | } |
| 0 | 1386 | | s1 = quadrant[i1]; |
| 0 | 1387 | | s2 = quadrant[i2]; |
| 0 | 1388 | | if (s1 != s2) { |
| 0 | 1389 | | return s1 > s2; |
| | 1390 | | } |
| 0 | 1391 | | i1++; |
| 0 | 1392 | | i2++; |
| | 1393 | |
|
| 0 | 1394 | | c1 = block[i1 + 1]; |
| 0 | 1395 | | c2 = block[i2 + 1]; |
| 0 | 1396 | | if (c1 != c2) { |
| 0 | 1397 | | return c1 > c2; |
| | 1398 | | } |
| 0 | 1399 | | s1 = quadrant[i1]; |
| 0 | 1400 | | s2 = quadrant[i2]; |
| 0 | 1401 | | if (s1 != s2) { |
| 0 | 1402 | | return s1 > s2; |
| | 1403 | | } |
| 0 | 1404 | | i1++; |
| 0 | 1405 | | i2++; |
| | 1406 | |
|
| 0 | 1407 | | if (i1 > last) { |
| 0 | 1408 | | i1 -= last; |
| 0 | 1409 | | i1--; |
| | 1410 | | } |
| 0 | 1411 | | if (i2 > last) { |
| 0 | 1412 | | i2 -= last; |
| 0 | 1413 | | i2--; |
| | 1414 | | } |
| | 1415 | |
|
| 0 | 1416 | | k -= 4; |
| 0 | 1417 | | ++workDone; |
| 0 | 1418 | | } while (k >= 0); |
| | 1419 | |
|
| 0 | 1420 | | return false; |
| | 1421 | | } |
| | 1422 | |
|
| | 1423 | | void AllocateCompressStructures() |
| | 1424 | | { |
| 1 | 1425 | | int n = BZip2Constants.BaseBlockSize * blockSize100k; |
| 1 | 1426 | | block = new byte[(n + 1 + BZip2Constants.OvershootBytes)]; |
| 1 | 1427 | | quadrant = new int[(n + BZip2Constants.OvershootBytes)]; |
| 1 | 1428 | | zptr = new int[n]; |
| 1 | 1429 | | ftab = new int[65537]; |
| | 1430 | |
|
| 1 | 1431 | | if (block == null || quadrant == null || zptr == null || ftab == null) { |
| | 1432 | | // int totalDraw = (n + 1 + NUM_OVERSHOOT_BYTES) + (n + NUM_OVERSHOOT_BYTES) + n + 65537; |
| | 1433 | | // compressOutOfMemory ( totalDraw, n ); |
| | 1434 | | } |
| | 1435 | |
|
| | 1436 | | /* |
| | 1437 | | The back end needs a place to store the MTF values |
| | 1438 | | whilst it calculates the coding tables. We could |
| | 1439 | | put them in the zptr array. However, these values |
| | 1440 | | will fit in a short, so we overlay szptr at the |
| | 1441 | | start of zptr, in the hope of reducing the number |
| | 1442 | | of cache misses induced by the multiple traversals |
| | 1443 | | of the MTF values when calculating coding tables. |
| | 1444 | | Seems to improve compression speed by about 1%. |
| | 1445 | | */ |
| | 1446 | | // szptr = zptr; |
| | 1447 | |
|
| | 1448 | |
|
| 1 | 1449 | | szptr = new short[2 * n]; |
| 1 | 1450 | | } |
| | 1451 | |
|
| | 1452 | | void GenerateMTFValues() |
| | 1453 | | { |
| 0 | 1454 | | char[] yy = new char[256]; |
| | 1455 | | int i, j; |
| | 1456 | | char tmp; |
| | 1457 | | char tmp2; |
| | 1458 | | int zPend; |
| | 1459 | | int wr; |
| | 1460 | | int EOB; |
| | 1461 | |
|
| 0 | 1462 | | MakeMaps(); |
| 0 | 1463 | | EOB = nInUse + 1; |
| | 1464 | |
|
| 0 | 1465 | | for (i = 0; i <= EOB; i++) { |
| 0 | 1466 | | mtfFreq[i] = 0; |
| | 1467 | | } |
| | 1468 | |
|
| 0 | 1469 | | wr = 0; |
| 0 | 1470 | | zPend = 0; |
| 0 | 1471 | | for (i = 0; i < nInUse; i++) { |
| 0 | 1472 | | yy[i] = (char)i; |
| | 1473 | | } |
| | 1474 | |
|
| | 1475 | |
|
| 0 | 1476 | | for (i = 0; i <= last; i++) { |
| | 1477 | | char ll_i; |
| | 1478 | |
|
| 0 | 1479 | | ll_i = unseqToSeq[block[zptr[i]]]; |
| | 1480 | |
|
| 0 | 1481 | | j = 0; |
| 0 | 1482 | | tmp = yy[j]; |
| 0 | 1483 | | while (ll_i != tmp) { |
| 0 | 1484 | | j++; |
| 0 | 1485 | | tmp2 = tmp; |
| 0 | 1486 | | tmp = yy[j]; |
| 0 | 1487 | | yy[j] = tmp2; |
| | 1488 | | } |
| 0 | 1489 | | yy[0] = tmp; |
| | 1490 | |
|
| 0 | 1491 | | if (j == 0) { |
| 0 | 1492 | | zPend++; |
| 0 | 1493 | | } else { |
| 0 | 1494 | | if (zPend > 0) { |
| 0 | 1495 | | zPend--; |
| 0 | 1496 | | while (true) { |
| 0 | 1497 | | switch (zPend % 2) { |
| | 1498 | | case 0: |
| 0 | 1499 | | szptr[wr] = (short)BZip2Constants.RunA; |
| 0 | 1500 | | wr++; |
| 0 | 1501 | | mtfFreq[BZip2Constants.RunA]++; |
| 0 | 1502 | | break; |
| | 1503 | | case 1: |
| 0 | 1504 | | szptr[wr] = (short)BZip2Constants.RunB; |
| 0 | 1505 | | wr++; |
| 0 | 1506 | | mtfFreq[BZip2Constants.RunB]++; |
| | 1507 | | break; |
| | 1508 | | } |
| 0 | 1509 | | if (zPend < 2) { |
| | 1510 | | break; |
| | 1511 | | } |
| 0 | 1512 | | zPend = (zPend - 2) / 2; |
| | 1513 | | } |
| 0 | 1514 | | zPend = 0; |
| | 1515 | | } |
| 0 | 1516 | | szptr[wr] = (short)(j + 1); |
| 0 | 1517 | | wr++; |
| 0 | 1518 | | mtfFreq[j + 1]++; |
| | 1519 | | } |
| | 1520 | | } |
| | 1521 | |
|
| 0 | 1522 | | if (zPend > 0) { |
| 0 | 1523 | | zPend--; |
| 0 | 1524 | | while (true) { |
| 0 | 1525 | | switch (zPend % 2) { |
| | 1526 | | case 0: |
| 0 | 1527 | | szptr[wr] = (short)BZip2Constants.RunA; |
| 0 | 1528 | | wr++; |
| 0 | 1529 | | mtfFreq[BZip2Constants.RunA]++; |
| 0 | 1530 | | break; |
| | 1531 | | case 1: |
| 0 | 1532 | | szptr[wr] = (short)BZip2Constants.RunB; |
| 0 | 1533 | | wr++; |
| 0 | 1534 | | mtfFreq[BZip2Constants.RunB]++; |
| | 1535 | | break; |
| | 1536 | | } |
| 0 | 1537 | | if (zPend < 2) { |
| | 1538 | | break; |
| | 1539 | | } |
| 0 | 1540 | | zPend = (zPend - 2) / 2; |
| | 1541 | | } |
| | 1542 | | } |
| | 1543 | |
|
| 0 | 1544 | | szptr[wr] = (short)EOB; |
| 0 | 1545 | | wr++; |
| 0 | 1546 | | mtfFreq[EOB]++; |
| | 1547 | |
|
| 0 | 1548 | | nMTF = wr; |
| 0 | 1549 | | } |
| | 1550 | |
|
| | 1551 | | static void Panic() |
| | 1552 | | { |
| 0 | 1553 | | throw new BZip2Exception("BZip2 output stream panic"); |
| | 1554 | | } |
| | 1555 | |
|
| | 1556 | | static void HbMakeCodeLengths(char[] len, int[] freq, int alphaSize, int maxLen) |
| | 1557 | | { |
| | 1558 | | /*-- |
| | 1559 | | Nodes and heap entries run from 1. Entry 0 |
| | 1560 | | for both the heap and nodes is a sentinel. |
| | 1561 | | --*/ |
| | 1562 | | int nNodes, nHeap, n1, n2, j, k; |
| | 1563 | | bool tooLong; |
| | 1564 | |
|
| 0 | 1565 | | int[] heap = new int[BZip2Constants.MaximumAlphaSize + 2]; |
| 0 | 1566 | | int[] weight = new int[BZip2Constants.MaximumAlphaSize * 2]; |
| 0 | 1567 | | int[] parent = new int[BZip2Constants.MaximumAlphaSize * 2]; |
| | 1568 | |
|
| 0 | 1569 | | for (int i = 0; i < alphaSize; ++i) { |
| 0 | 1570 | | weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; |
| | 1571 | | } |
| | 1572 | |
|
| 0 | 1573 | | while (true) { |
| 0 | 1574 | | nNodes = alphaSize; |
| 0 | 1575 | | nHeap = 0; |
| | 1576 | |
|
| 0 | 1577 | | heap[0] = 0; |
| 0 | 1578 | | weight[0] = 0; |
| 0 | 1579 | | parent[0] = -2; |
| | 1580 | |
|
| 0 | 1581 | | for (int i = 1; i <= alphaSize; ++i) { |
| 0 | 1582 | | parent[i] = -1; |
| 0 | 1583 | | nHeap++; |
| 0 | 1584 | | heap[nHeap] = i; |
| 0 | 1585 | | int zz = nHeap; |
| 0 | 1586 | | int tmp = heap[zz]; |
| 0 | 1587 | | while (weight[tmp] < weight[heap[zz >> 1]]) { |
| 0 | 1588 | | heap[zz] = heap[zz >> 1]; |
| 0 | 1589 | | zz >>= 1; |
| | 1590 | | } |
| 0 | 1591 | | heap[zz] = tmp; |
| | 1592 | | } |
| 0 | 1593 | | if (!(nHeap < (BZip2Constants.MaximumAlphaSize + 2))) { |
| 0 | 1594 | | Panic(); |
| | 1595 | | } |
| | 1596 | |
|
| 0 | 1597 | | while (nHeap > 1) { |
| 0 | 1598 | | n1 = heap[1]; |
| 0 | 1599 | | heap[1] = heap[nHeap]; |
| 0 | 1600 | | nHeap--; |
| 0 | 1601 | | int zz = 1; |
| 0 | 1602 | | int yy = 0; |
| 0 | 1603 | | int tmp = heap[zz]; |
| 0 | 1604 | | while (true) { |
| 0 | 1605 | | yy = zz << 1; |
| 0 | 1606 | | if (yy > nHeap) { |
| | 1607 | | break; |
| | 1608 | | } |
| 0 | 1609 | | if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) { |
| 0 | 1610 | | yy++; |
| | 1611 | | } |
| 0 | 1612 | | if (weight[tmp] < weight[heap[yy]]) { |
| | 1613 | | break; |
| | 1614 | | } |
| | 1615 | |
|
| 0 | 1616 | | heap[zz] = heap[yy]; |
| 0 | 1617 | | zz = yy; |
| | 1618 | | } |
| 0 | 1619 | | heap[zz] = tmp; |
| 0 | 1620 | | n2 = heap[1]; |
| 0 | 1621 | | heap[1] = heap[nHeap]; |
| 0 | 1622 | | nHeap--; |
| | 1623 | |
|
| 0 | 1624 | | zz = 1; |
| 0 | 1625 | | yy = 0; |
| 0 | 1626 | | tmp = heap[zz]; |
| 0 | 1627 | | while (true) { |
| 0 | 1628 | | yy = zz << 1; |
| 0 | 1629 | | if (yy > nHeap) { |
| | 1630 | | break; |
| | 1631 | | } |
| 0 | 1632 | | if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) { |
| 0 | 1633 | | yy++; |
| | 1634 | | } |
| 0 | 1635 | | if (weight[tmp] < weight[heap[yy]]) { |
| | 1636 | | break; |
| | 1637 | | } |
| 0 | 1638 | | heap[zz] = heap[yy]; |
| 0 | 1639 | | zz = yy; |
| | 1640 | | } |
| 0 | 1641 | | heap[zz] = tmp; |
| 0 | 1642 | | nNodes++; |
| 0 | 1643 | | parent[n1] = parent[n2] = nNodes; |
| | 1644 | |
|
| 0 | 1645 | | weight[nNodes] = (int)((weight[n1] & 0xffffff00) + (weight[n2] & 0xffffff00)) | |
| 0 | 1646 | | (int)(1 + (((weight[n1] & 0x000000ff) > (weight[n2] & 0x000000ff)) ? (weight[n1] & 0x000000ff) : (weight[n2] |
| | 1647 | |
|
| 0 | 1648 | | parent[nNodes] = -1; |
| 0 | 1649 | | nHeap++; |
| 0 | 1650 | | heap[nHeap] = nNodes; |
| | 1651 | |
|
| 0 | 1652 | | zz = nHeap; |
| 0 | 1653 | | tmp = heap[zz]; |
| 0 | 1654 | | while (weight[tmp] < weight[heap[zz >> 1]]) { |
| 0 | 1655 | | heap[zz] = heap[zz >> 1]; |
| 0 | 1656 | | zz >>= 1; |
| | 1657 | | } |
| 0 | 1658 | | heap[zz] = tmp; |
| | 1659 | | } |
| 0 | 1660 | | if (!(nNodes < (BZip2Constants.MaximumAlphaSize * 2))) { |
| 0 | 1661 | | Panic(); |
| | 1662 | | } |
| | 1663 | |
|
| 0 | 1664 | | tooLong = false; |
| 0 | 1665 | | for (int i = 1; i <= alphaSize; ++i) { |
| 0 | 1666 | | j = 0; |
| 0 | 1667 | | k = i; |
| 0 | 1668 | | while (parent[k] >= 0) { |
| 0 | 1669 | | k = parent[k]; |
| 0 | 1670 | | j++; |
| | 1671 | | } |
| 0 | 1672 | | len[i - 1] = (char)j; |
| 0 | 1673 | | tooLong |= j > maxLen; |
| | 1674 | | } |
| | 1675 | |
|
| 0 | 1676 | | if (!tooLong) { |
| | 1677 | | break; |
| | 1678 | | } |
| | 1679 | |
|
| 0 | 1680 | | for (int i = 1; i < alphaSize; ++i) { |
| 0 | 1681 | | j = weight[i] >> 8; |
| 0 | 1682 | | j = 1 + (j / 2); |
| 0 | 1683 | | weight[i] = j << 8; |
| | 1684 | | } |
| | 1685 | | } |
| 0 | 1686 | | } |
| | 1687 | |
|
| | 1688 | | static void HbAssignCodes(int[] code, char[] length, int minLen, int maxLen, int alphaSize) |
| | 1689 | | { |
| 0 | 1690 | | int vec = 0; |
| 0 | 1691 | | for (int n = minLen; n <= maxLen; ++n) { |
| 0 | 1692 | | for (int i = 0; i < alphaSize; ++i) { |
| 0 | 1693 | | if (length[i] == n) { |
| 0 | 1694 | | code[i] = vec; |
| 0 | 1695 | | ++vec; |
| | 1696 | | } |
| | 1697 | | } |
| 0 | 1698 | | vec <<= 1; |
| | 1699 | | } |
| 0 | 1700 | | } |
| | 1701 | |
|
| | 1702 | | static byte Med3(byte a, byte b, byte c) |
| | 1703 | | { |
| | 1704 | | byte t; |
| 0 | 1705 | | if (a > b) { |
| 0 | 1706 | | t = a; |
| 0 | 1707 | | a = b; |
| 0 | 1708 | | b = t; |
| | 1709 | | } |
| 0 | 1710 | | if (b > c) { |
| 0 | 1711 | | t = b; |
| 0 | 1712 | | b = c; |
| 0 | 1713 | | c = t; |
| | 1714 | | } |
| 0 | 1715 | | if (a > b) { |
| 0 | 1716 | | b = a; |
| | 1717 | | } |
| 0 | 1718 | | return b; |
| | 1719 | | } |
| | 1720 | |
|
| | 1721 | | struct StackElement |
| | 1722 | | { |
| | 1723 | | public int ll; |
| | 1724 | | public int hh; |
| | 1725 | | public int dd; |
| | 1726 | | } |
| | 1727 | |
|
| | 1728 | | #region Instance Fields |
| 1 | 1729 | | bool isStreamOwner = true; |
| | 1730 | |
|
| | 1731 | | /*-- |
| | 1732 | | index of the last char in the block, so |
| | 1733 | | the block size == last + 1. |
| | 1734 | | --*/ |
| | 1735 | | int last; |
| | 1736 | |
|
| | 1737 | | /*-- |
| | 1738 | | index in zptr[] of original string after sorting. |
| | 1739 | | --*/ |
| | 1740 | | int origPtr; |
| | 1741 | |
|
| | 1742 | | /*-- |
| | 1743 | | always: in the range 0 .. 9. |
| | 1744 | | The current block size is 100000 * this number. |
| | 1745 | | --*/ |
| | 1746 | | int blockSize100k; |
| | 1747 | |
|
| | 1748 | | bool blockRandomised; |
| | 1749 | |
|
| | 1750 | | int bytesOut; |
| | 1751 | | int bsBuff; |
| | 1752 | | int bsLive; |
| 1 | 1753 | | IChecksum mCrc = new BZip2Crc(); |
| | 1754 | |
|
| 1 | 1755 | | bool[] inUse = new bool[256]; |
| | 1756 | | int nInUse; |
| | 1757 | |
|
| 1 | 1758 | | char[] seqToUnseq = new char[256]; |
| 1 | 1759 | | char[] unseqToSeq = new char[256]; |
| | 1760 | |
|
| 1 | 1761 | | char[] selector = new char[BZip2Constants.MaximumSelectors]; |
| 1 | 1762 | | char[] selectorMtf = new char[BZip2Constants.MaximumSelectors]; |
| | 1763 | |
|
| | 1764 | | byte[] block; |
| | 1765 | | int[] quadrant; |
| | 1766 | | int[] zptr; |
| | 1767 | | short[] szptr; |
| | 1768 | | int[] ftab; |
| | 1769 | |
|
| | 1770 | | int nMTF; |
| | 1771 | |
|
| 1 | 1772 | | int[] mtfFreq = new int[BZip2Constants.MaximumAlphaSize]; |
| | 1773 | |
|
| | 1774 | | /* |
| | 1775 | | * Used when sorting. If too many long comparisons |
| | 1776 | | * happen, we stop sorting, randomise the block |
| | 1777 | | * slightly, and try again. |
| | 1778 | | */ |
| | 1779 | | int workFactor; |
| | 1780 | | int workDone; |
| | 1781 | | int workLimit; |
| | 1782 | | bool firstAttempt; |
| | 1783 | | int nBlocksRandomised; |
| | 1784 | |
|
| 1 | 1785 | | int currentChar = -1; |
| | 1786 | | int runLength; |
| | 1787 | | uint blockCRC, combinedCRC; |
| | 1788 | | int allowableBlockSize; |
| | 1789 | | Stream baseStream; |
| | 1790 | | bool disposed_; |
| | 1791 | | #endregion |
| | 1792 | | } |
| | 1793 | | } |