////////////////////////////////////////////////////////////////////////////
//			     **** WAVPACK ****				  //
//		    Hybrid Lossless Wavefile Compressor			  //
//		Copyright (c) 1998 - 2002 Conifer Software.		  //
//			    All Rights Reserved.			  //
//      Distributed under the BSD Software License (see license.txt)      //
////////////////////////////////////////////////////////////////////////////

// pack.c

// This module actually handles the compression of the audio data, except for
// the entropy coding which is handled by the words? modules. For efficiency,
// the conversion is isolated to tight loops that handle an entire buffer. The
// actual bitstreams are "outbits" for the main WavPack file and "out2bits"
// for the .wvc file (if present) and these must be initialized with
// bs_open_write() before pack_samples() is called.
//
// The "COMPACT" define specifies an alternate version for the default lossless
// mode that uses less inline code but is somewhat slower. Use at your own
// discretion.

#include "wavpack.h"

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>


//////////////////////////////// local macros /////////////////////////////////

#define GET_24(ptr) (*(uchar*)(ptr)|((long)*(short*)((uchar*)(ptr)+1)<<8))

#define apply_weight(bits, weight, sample) ((weight * sample + (1 << (bits - 1))) >> bits)
#define apply_weight24(weight, sample) ((sample >= 0x800000 || sample < -0x800000) ? ((long)floor(((double) weight * sample + 128.0) / 256.0)) : ((weight * sample + 128) >> 8))

#define update_weight(bits, weight, source, result) \
    if (source && result) { \
	if ((source ^ result) >= 0) { if (weight++ == (1 << bits)) weight--; } \
	else if (weight-- ==  (-(1 << bits))) weight++; \
    }

#define outbits (&wpc->outbits)

//////////////////////////////// local tables ///////////////////////////////

// These two tables specify the characteristics of the decorrelation filters.
// Each term represents one layer of the sequential filter, where positive
// values indicate the relative sample involved from the same channel (1=prev)
// while -1 and -2 indicate cross channel decorrelation (in stereo only).

static const char extreme_terms [] = { 1,1,1,2,4,-1,1,2,3,6,-2,8,5,7,4,1,2,3 };
static const char default_terms [] = { 1,1,1,-1,2,1,-2 };

//////////////////////////////////////////////////////////////////////////////
// This function initializes everything required to pack WavPack bitstreams //
// and must be called BEFORE any other function in this module.             //
//                                                                          //
// The "flags" and "bits" fields of the WavpackHeader structure control     //
// the exact compression mode employed. However, some bit combinations that //
// are valid for reading are not valid for writing because the "pack" code  //
// has been cleaned of obsoleted modes. Currently, there are 6 basic modes  //
// that are supported for both reading and writing and the corresponding    //
// "flags" and "bits" values (with allowable variations) are shown here.    //
// ------------------------------------------------------------------------ //
// 1. Lossless High:                                                        //
//                                                                          //
//    command line: -h                                                      //
//                                                                          //
//    flags: HIGH_FLAG | NEW_HIGH_FLAG | CROSS_DECORR | NEW_DECORR_FLAG |   //
//           EXTREME_DECORR | JOINT_STEREO                                  //
//                                                                          //
//    bits: 0                                                               //
//                                                                          //
//    options: JOINT_STEREO can be cleared and this sometimes (but not      //
//             usually) results in better compression                       //
// ------------------------------------------------------------------------ //
// 2. Lossless Standard:                                                    //
//                                                                          //
//    command line: default operation                                       //
//                                                                          //
//    flags: HIGH_FLAG | NEW_HIGH_FLAG | CROSS_DECORR | NEW_DECORR_FLAG |   //
//           JOINT_STEREO                                                   //
//                                                                          //
//    bits: 0                                                               //
//                                                                          //
//    options: JOINT_STEREO can be cleared and this sometimes (but not      //
//             usually) results in better compression                       //
//                                                                          //
//    note: in stereo we must additionally set EXTREME_DECORR and           //
//          CANCEL_EXTREME before final write of flags to file, BUT NOT in  //
//          calls to pack_init() or pack_samples().                         //
// ------------------------------------------------------------------------ //
// 3. Lossless Fast:                                                        //
//                                                                          //
//    command line: -f                                                      //
//    flags: 0                                                              //
//    bits: 0                                                               //
// ------------------------------------------------------------------------ //
// 4. Lossless Very Fast:                                                   //
//                                                                          //
//    command line: -ff                                                     //
//    flags: FAST_FLAG                                                      //
//    bits: 0                                                               //
// ------------------------------------------------------------------------ //
// 5. Hybrid High:                                                          //
//                                                                          //
//    command line: -hb                                                     //
//                                                                          //
//    flags: NEW_HIGH_FLAG | NEW_DECORR_FLAG | EXTREME_DECORR               //
//                                                                          //
//    bits: target bitrate (both channels) in bits/sample * 256             //
//          (the minimum is 3 bits/sample mono, 6 bits/sample stereo; the   //
//          maximum is 32 bits/sample mono, 64 bits/sample stereo)          //
//                                                                          //
//    options: JOINT_STEREO may be set to cut noise and improve compression //
//             on stereo files with high inter-channel correlation          //
// ------------------------------------------------------------------------ //
// 6. Hybrid Standard:                                                      //
//                                                                          //
//    command line: -b                                                      //
//                                                                          //
//    flags: NEW_HIGH_FLAG | NEW_DECORR_FLAG                                //
//                                                                          //
//    bits: target bitrate (both channels) in bits/sample * 256             //
//          (the minimum is 3 bits/sample mono, 6 bits/sample stereo; the   //
//          maximum is 32 bits/sample mono, 64 bits/sample stereo)          //
//                                                                          //
//    options: JOINT_STEREO may be set to cut noise and improve compression //
//             on stereo files with high inter-channel correlation          //
// ------------------------------------------------------------------------ //
// Other flag notes:                                                        //
//                                                                          //
// 1. If file is mono, then JOINT_STEREO and CROSS_DECORR are cleared and   //
//    MONO_FLAG is set.                                                     //
//                                                                          //
// 2. If hybrid mode is used and sampling rate is >= 64,000 Hz, then set    //
//    LOSSY_SHAPE.                                                          //
//////////////////////////////////////////////////////////////////////////////

void pack_init (WavpackContext *wpc)
{
    int flags = wpc->wphdr->flags, ti;
    struct decorr_pass *dpp;

    CLEAR (wpc->decorr_passes);
    CLEAR (wpc->dc);

    if (flags & EXTREME_DECORR) {
	for (dpp = wpc->decorr_passes, ti = 0; ti < sizeof (extreme_terms); ti++)
	    if (extreme_terms [ti] >= 0 || (flags & CROSS_DECORR))
		dpp++->term = extreme_terms [ti];
    }
    else
	for (dpp = wpc->decorr_passes, ti = 0; ti < sizeof (default_terms); ti++)
	    if (default_terms [ti] >= 0 || (flags & CROSS_DECORR))
		dpp++->term = default_terms [ti];

    init_word1 (wpc);
    init_word3 (wpc);
    init_word4 (wpc);
}

///////////////////////////////////////////////////////////////////////////////
// This monster actually packs the 16-bit or 24-bit audio data and writes it //
// into the open bitstream(s). The function pack_init() must have been       //
// called and the bitstreams must have been opened with bs_open_write(). For //
// clarity, the function is broken up into segments that handle various      //
// modes. This results in a few extra flag checks, but makes the code easier //
// to follow because the nesting does not become so deep. For maximum        //
// efficiency, the conversion is isolated to tight loops that handle an      //
// entire buffer. The running CRC calculations are retrieved from and stored //
// back into the WavPack header each call. Note that two CRCs are required   //
// in hybrid mode because errors are detected with or without the correction //
// file. If noise calculations are desired, set the CALC_NOISE bit in the    //
// WavPack header and retrieve the results with the pack_noise() function at //
// the end of the pack operation. The decorrelation filters here provide     //
// efficient operation on values up to about 20-22 bits (using 32-bit ints), //
// which is not quite enough to handle 24-bit data. For this reason, special //
// provisions are made for this case. In lossless modes, the data values are //
// truncated to 20-bits before compression and the lower 4 bits are sent     //
// directly (or compressed with a very simple RLE). In lossy modes we simply //
// use floating point math in cases where there might be an overflow.        //
///////////////////////////////////////////////////////////////////////////////

void pack_samples (WavpackContext *wpc, void *buffer, uint sample_count)
{
    int flags = wpc->wphdr->flags, shift = wpc->wphdr->shift, m = wpc->dc.m;
    long crc = wpc->wphdr->crc, crc2 = wpc->wphdr->crc2;
    double noise_acc = 0.0, noise;
    struct decorr_pass *dpp;
    uchar *bptr;
    uint i;

#ifdef COMPACT
    long sample [2] [2];
    int weight [2] [1];
#else
    long sample [2] [5], dsample [2], csample [2];
    int weight [2] [5], cweight [4];
#endif

#ifdef COMPACT
    memcpy (sample, wpc->dc.sample, sizeof (sample));
    memcpy (weight, wpc->dc.weight, sizeof (weight));
#else
    memcpy (sample, wpc->dc.sample, sizeof (sample));
    memcpy (dsample, wpc->dc.dsample, sizeof (dsample));
    memcpy (csample, wpc->dc.csample, sizeof (csample));
    memcpy (weight, wpc->dc.weight, sizeof (weight));
    memcpy (cweight, wpc->dc.cweight, sizeof (cweight));
#endif

    ////////////////// handle lossless mono very fast mode ////////////////////
    ///////////////// (which used to be regular fast mode) ////////////////////

    if (!wpc->wphdr->bits && (flags & MONO_FLAG) && (flags & FAST_FLAG)) {
	if (flags & OVER_20)
	    for (bptr = buffer, i = 0; i < sample_count; bptr += 3, ++i) {
		long buf;

		crc = crc * 3 + (buf = GET_24 (bptr));
		sample [0] [0] += sample [0] [1] += send_word3 (wpc, (buf >> 4) - sample [0] [1] - sample [0] [0], 0);
		putbits (buf & 0xf, 4, outbits);
	    }
	else if (flags & BYTES_3)
	    for (bptr = buffer, i = 0; i < sample_count; bptr += 3, ++i) {
		sample [0] [1] += send_word3 (wpc, (GET_24 (bptr) >> shift) - sample [0] [1] - sample [0] [0], 0);
		crc = crc * 3 + (sample [0] [0] += sample [0] [1]);
	    }
	else
	    for (bptr = buffer, i = 0; i < sample_count; ++i) {
		sample [0] [1] += send_word3 (wpc, *((short*) bptr)++ - sample [0] [1] - sample [0] [0], 0);
		crc = crc * 3 + (sample [0] [0] += sample [0] [1]);
	    }
    }

    //////////////////// handle lossless mono high mode ///////////////////////

    else if (!wpc->wphdr->bits && (flags & MONO_FLAG) && (flags & HIGH_FLAG))
	for (bptr = buffer, i = 0; i < sample_count; ++i) {
	    long code, code2, buf;

	    if (flags & OVER_20) {
		crc = crc * 3 + (buf = GET_24 (bptr));
		code = buf >> 4;
		buf &= 0xf;
		bptr += 3;
	    }
	    else if (flags & BYTES_3) {
		crc = crc * 3 + (code = GET_24 (bptr) >> shift);
		bptr += 3;
	    }
	    else
		crc = crc * 3 + (code = *((short*) bptr)++);

	    if (flags & EXTREME_DECORR) {
		for (dpp = wpc->decorr_passes; dpp->term; dpp++) {
		    long sam = dpp->samples_A [m];

		    dpp->samples_A [(m + dpp->term) & (MAX_TERM - 1)] = code;
		    code -= apply_weight (9, dpp->weight_A, sam);
		    update_weight (9, dpp->weight_A, sam, code);
		}

		m = (m + 1) & (MAX_TERM - 1);
	    }
	else {
#ifdef COMPACT
	    for (dpp = wpc->decorr_passes; dpp->term; dpp++) {
		long sam = dpp->samples_A [m];

		dpp->samples_A [(m + dpp->term) & (MAX_TERM - 1)] = code;
		code -= apply_weight (8, dpp->weight_A, sam);
		update_weight (8, dpp->weight_A, sam, code);
	    }

	    m = (m + 1) & (MAX_TERM - 1);
#else
	    code2 = code - apply_weight (8, weight [0] [4], sample [0] [4]);
	    update_weight (8, weight [0] [4], sample [0] [4], code2);
	    sample [0] [4] = code;
	    code = code2 - apply_weight (8, weight [0] [3], sample [0] [3]);
	    update_weight (8, weight [0] [3], sample [0] [3], code);
	    sample [0] [3] = code2;
	    code2 = code - apply_weight (8, weight [0] [2], sample [0] [2]);
	    update_weight (8, weight [0] [2], sample [0] [2], code2);
	    sample [0] [2] = code;
	    code = code2 - apply_weight (8, weight [0] [1], sample [0] [1]);
	    update_weight (8, weight [0] [1], sample [0] [1], code);
	    sample [0] [1] = dsample [0];
	    dsample [0] = code2;
	    code2 = code - apply_weight (8, weight [0] [0], sample [0] [0]);
	    update_weight (8, weight [0] [0], sample [0] [0], code2);
	    sample [0] [0] = code;
	    code = code2;
#endif
	}

	send_word1 (wpc, code, 0);

	if (flags & OVER_20)
	    if (wpc->dc.extra_bits_count == 8) {
		if (wpc->dc.last_extra_bits == buf) {
		    putbit_1 (outbits);
		}
		else {
		    putbits (buf << 1, 5, outbits);
		    wpc->dc.last_extra_bits = buf;
		    wpc->dc.extra_bits_count = 0;
		}
	    }
	    else {
		putbits (buf, 4, outbits);

		if (wpc->dc.last_extra_bits != buf) {
		    wpc->dc.last_extra_bits = buf;
		    wpc->dc.extra_bits_count = 0;
		}
		else
		    ++wpc->dc.extra_bits_count;
	    }
	}

    /////////////////// handle lossless mono fast mode ////////////////////////
    ///////////////// (which used to be the default mode) /////////////////////

    else if (!wpc->wphdr->bits && (flags & MONO_FLAG) && !(flags & (FAST_FLAG | HIGH_FLAG)))
	for (bptr = buffer, i = 0; i < sample_count; ++i) {
	    long code, code2, buf;

	    if (flags & OVER_20) {
		crc = crc * 3 + (buf = GET_24 (bptr));
		code = buf >> 4;
		buf &= 0xf;
		bptr += 3;
	    }
	    else if (flags & BYTES_3) {
		crc = crc * 3 + (code = GET_24 (bptr) >> shift);
		bptr += 3;
	    }
	    else
		crc = crc * 3 + (code = *((short*) bptr)++);

	    code2 = code - ((sample [0] [1] * weight [0] [0] + 128) >> 8) - sample [0] [0];

	    if ((sample [0] [1] >= 0) == (code2 > 0)) {
		if (weight [0] [0]++ == 256)
		    weight [0] [0]--;
	    }
	    else if (weight [0] [0]-- == 0)
		weight [0] [0]++;

	    send_word3 (wpc, code2, 0);
	    sample [0] [0] += sample [0] [1] = code - sample [0] [0];

	    if (flags & OVER_20)
		if (wpc->dc.extra_bits_count == 8) {
		    if (wpc->dc.last_extra_bits == buf) {
			putbit_1 (outbits);
		    }
		    else {
			putbits (buf << 1, 5, outbits);
			wpc->dc.last_extra_bits = buf;
			wpc->dc.extra_bits_count = 0;
		    }
		}
		else {
		    putbits (buf, 4, outbits);

		    if (wpc->dc.last_extra_bits != buf) {
			wpc->dc.last_extra_bits = buf;
			wpc->dc.extra_bits_count = 0;
		    }
		    else
			++wpc->dc.extra_bits_count;
		}
	}

    /////////////// handle lossless stereo very fast mode ////////////////////
    ///////////////// (which was the regular fast mode) //////////////////////

    else if (!wpc->wphdr->bits && !(flags & MONO_FLAG) && (flags & FAST_FLAG)) {
	if (flags & OVER_20)
	    for (bptr = buffer, i = 0; i < sample_count; ++i) {
		long left, right, lbuf, rbuf;

		crc = crc * 3 + (lbuf = GET_24 (bptr));
		bptr += 3;
		crc = crc * 3 + (rbuf = GET_24 (bptr));
		bptr += 3;
		left = (lbuf >> 4) - sample [0] [1] - sample [0] [0];
		right = (rbuf >> 4) - sample [1] [1] - sample [1] [0];
		send_word3 (wpc, (left + right) >> 1, 0);
		send_word3 (wpc, left - right, 1);
		sample [0] [0] += sample [0] [1] += left;
		sample [1] [0] += sample [1] [1] += right;
		putbits (((lbuf & 0xf) << 4) + (rbuf & 0xf), 8, outbits);
	    }
	else if (flags & BYTES_3)
	    for (bptr = buffer, i = 0; i < sample_count; ++i) {
		long left, right;

		left = (GET_24 (bptr) >> shift) - sample [0] [1] - sample [0] [0];
		bptr += 3;
		right = (GET_24 (bptr) >> shift) - sample [1] [1] - sample [1] [0];
		bptr += 3;
		send_word3 (wpc, (left + right) >> 1, 0);
		send_word3 (wpc, left - right, 1);
		crc = crc * 3 + (sample [0] [0] += sample [0] [1] += left);
		crc = crc * 3 + (sample [1] [0] += sample [1] [1] += right);
	    }
	else
	    for (bptr = buffer, i = 0; i < sample_count; ++i) {
		long left, right;

		left = *((short*) bptr)++ - sample [0] [1] - sample [0] [0];
		right = *((short*) bptr)++ - sample [1] [1] - sample [1] [0];
		send_word3 (wpc, (left + right) >> 1, 0);
		send_word3 (wpc, left - right, 1);
		crc = crc * 3 + (sample [0] [0] += sample [0] [1] += left);
		crc = crc * 3 + (sample [1] [0] += sample [1] [1] += right);
	    }
    }

    ///////////////// handle the lossless stereo high mode ////////////////////

    else if (!wpc->wphdr->bits && !(flags & MONO_FLAG) && (flags & HIGH_FLAG))
	for (bptr = buffer, i = 0; i < sample_count; ++i) {
	    long left, right, left2, right2, sum, extra_bits;

	    if (flags & OVER_20) {
		crc = crc * 3 + (left = GET_24 (bptr));
		bptr += 3;
		crc = crc * 3 + (right = GET_24 (bptr));
		bptr += 3;
		extra_bits = ((left & 0xf) << 4) + (right & 0xf);
		left >>= 4;
		right >>= 4;
	    }
	    else if (flags & BYTES_3) {
		crc = crc * 3 + (left = GET_24 (bptr) >> shift);
		bptr += 3;
		crc = crc * 3 + (right = GET_24 (bptr) >> shift);
		bptr += 3;
	    }
	    else {
		crc = crc * 3 + (left = *((short*) bptr)++);
		crc = crc * 3 + (right = *((short*) bptr)++);
	    }

	    if (flags & JOINT_STEREO) {
		sum = (left + right) >> 1;
		left -= right;
		right = sum;
	    }

	    if (flags & EXTREME_DECORR) {
		for (dpp = wpc->decorr_passes;; dpp++)
		    if (dpp->term > 0) {
			long sam_A = dpp->samples_A [m], sam_B = dpp->samples_B [m];
			int k = (m + dpp->term) & (MAX_TERM - 1);

			dpp->samples_A [k] = left;
			dpp->samples_B [k] = right;
			left -= apply_weight (9, dpp->weight_A, sam_A);
			right -= apply_weight (9, dpp->weight_B, sam_B);
			update_weight (9, dpp->weight_A, sam_A, left);
			update_weight (9, dpp->weight_B, sam_B, right);
		    }
		    else if (dpp->term == -1) {
			long sam_A = dpp->samples_A [0];

			dpp->samples_A [0] = right;
			right -= apply_weight (9, dpp->weight_B, left);
			update_weight (9, dpp->weight_B, left, right);
			left -= apply_weight (9, dpp->weight_A, sam_A);
			update_weight (9, dpp->weight_A, sam_A, left);
		    }
		    else if (dpp->term == -2) {
			long sam_A = dpp->samples_A [0];

			dpp->samples_A [0] = left;
			left -= apply_weight (9, dpp->weight_B, right);
			update_weight (9, dpp->weight_B, right, left);
			right -= apply_weight (9, dpp->weight_A, sam_A);
			update_weight (9, dpp->weight_A, sam_A, right);
		    }
		    else
			break;

		m = (m + 1) & (MAX_TERM - 1);
	    }
	    else {
#ifdef COMPACT
		for (dpp = wpc->decorr_passes;; dpp++)
		    if (dpp->term > 0) {
			long sam_A = dpp->samples_A [m], sam_B = dpp->samples_B [m];
			int k = (m + dpp->term) & (MAX_TERM - 1);

			dpp->samples_A [k] = left;
			dpp->samples_B [k] = right;
			left -= apply_weight (8, dpp->weight_A, sam_A);
			right -= apply_weight (8, dpp->weight_B, sam_B);
			update_weight (8, dpp->weight_A, sam_A, left);
			update_weight (8, dpp->weight_B, sam_B, right);
		    }
		    else if (dpp->term == -1) {
			long sam_A = dpp->samples_A [0];

			dpp->samples_A [0] = right;
			right -= apply_weight (8, dpp->weight_B, left);
			update_weight (8, dpp->weight_B, left, right);
			left -= apply_weight (8, dpp->weight_A, sam_A);
			update_weight (8, dpp->weight_A, sam_A, left);
		    }
		    else if (dpp->term == -2) {
			long sam_A = dpp->samples_A [0];

			dpp->samples_A [0] = left;
			left -= apply_weight (8, dpp->weight_B, right);
			update_weight (8, dpp->weight_B, right, left);
			right -= apply_weight (8, dpp->weight_A, sam_A);
			update_weight (8, dpp->weight_A, sam_A, right);
		    }
		    else
			break;

		m = (m + 1) & (MAX_TERM - 1);
#else
		left2 = left - apply_weight (8, weight [0] [4], sample [0] [4]);
		right2 = right - apply_weight (8, weight [1] [4], sample [1] [4]);

		update_weight (8, weight [0] [4], sample [0] [4], left2);
		update_weight (8, weight [1] [4], sample [1] [4], right2);

		sample [0] [4] = left;
		sample [1] [4] = right;

		left = left2 - apply_weight (8, weight [0] [3], sample [0] [3]);
		right = right2 - apply_weight (8, weight [1] [3], sample [1] [3]);

		update_weight (8, weight [0] [3], sample [0] [3], left);
		update_weight (8, weight [1] [3], sample [1] [3], right);

		sample [0] [3] = left2;
		sample [1] [3] = right2;

		left2 = left - apply_weight (8, weight [0] [2], sample [0] [2]);
		right2 = right - apply_weight (8, weight [1] [2], sample [1] [2]);

		update_weight (8, weight [0] [2], sample [0] [2], left2);
		update_weight (8, weight [1] [2], sample [1] [2], right2);

		sample [0] [2] = left;
		sample [1] [2] = right;

		left = left2 - apply_weight (8, cweight [2], csample [1]);
		update_weight (8, cweight [2], csample [1], left);
		csample [1] = right2;
		right2 -= apply_weight (8, cweight [3], left2);
		update_weight (8, cweight [3], left2, right2);
		left2 = left;

		left = left2 - apply_weight (8, weight [0] [1], sample [0] [1]);
		right = right2 - apply_weight (8, weight [1] [1], sample [1] [1]);

		update_weight (8, weight [0] [1], sample [0] [1], left);
		update_weight (8, weight [1] [1], sample [1] [1], right);

		sample [0] [1] = dsample [0];
		sample [1] [1] = dsample [1];
		dsample [0] = left2;
		dsample [1] = right2;

		left2 = left - apply_weight (8, weight [0] [0], sample [0] [0]);
		right2 = right - apply_weight (8, weight [1] [0], sample [1] [0]);

		update_weight (8, weight [0] [0], sample [0] [0], left2);
		update_weight (8, weight [1] [0], sample [1] [0], right2);

		sample [0] [0] = left;
		sample [1] [0] = right;
		left = left2;
		right = right2;

		right2 = right - apply_weight (8, cweight [0], csample [0]);
		update_weight (8, cweight [0], csample [0], right2);
		csample [0] = left;
		left -= apply_weight (8, cweight [1], right);
		update_weight (8, cweight [1], right, left);
		right = right2;
#endif
	    }

	    send_word1 (wpc, left, 0);
	    send_word1 (wpc, right, 1);

	    if (flags & OVER_20) {
		if (wpc->dc.extra_bits_count == 8) {
		    if (wpc->dc.last_extra_bits == extra_bits) {
			putbit_1 (outbits);
		    }
		    else {
			putbits (extra_bits << 1, 9, outbits);
			wpc->dc.last_extra_bits = extra_bits;
			wpc->dc.extra_bits_count = 0;
		    }
		}
		else {
		    putbits (extra_bits, 8, outbits);

		    if (wpc->dc.last_extra_bits != extra_bits) {
			wpc->dc.last_extra_bits = extra_bits;
			wpc->dc.extra_bits_count = 0;
		    }
		    else
			++wpc->dc.extra_bits_count;
		}
	    }
	}

    ///////////////// handle the lossless stereo fast mode ////////////////////
    ///////////////// (which used to be the default mode) /////////////////////

    else if (!wpc->wphdr->bits && !(flags & MONO_FLAG) && !(flags & (FAST_FLAG | HIGH_FLAG))) {
	long left_level = wpc->dc.left_level, right_level = wpc->dc.right_level;
	long sum_level = wpc->dc.sum_level;

	for (bptr = buffer, i = 0; i < sample_count; ++i) {
	    long left, right, sum, lbuf, rbuf, extra_bits;

	    if (flags & OVER_20) {
		crc = crc * 3 + (lbuf = GET_24 (bptr));
		bptr += 3;
		crc = crc * 3 + (rbuf = GET_24 (bptr));
		bptr += 3;
		extra_bits = ((lbuf & 0xf) << 4) + (rbuf & 0xf);
		lbuf >>= 4;
		rbuf >>= 4;
	    }
	    else if (flags & BYTES_3) {
		crc = crc * 3 + (lbuf = GET_24 (bptr) >> shift);
		bptr += 3;
		crc = crc * 3 + (rbuf = GET_24 (bptr) >> shift);
		bptr += 3;
	    }
	    else {
		crc = crc * 3 + (lbuf = *((short*) bptr)++);
		crc = crc * 3 + (rbuf = *((short*) bptr)++);
	    }

	    left = lbuf - ((sample [0] [1] * weight [0] [0] + 128) >> 8) - sample [0] [0];
	    right = rbuf - ((sample [1] [1] * weight [1] [0] + 128) >> 8) - sample [1] [0];

	    if ((sample [0] [1] >= 0) == (left > 0)) {
		if (weight [0] [0]++ == 256)
		    weight [0] [0]--;
	    }
	    else if (weight [0] [0]-- == 0)
		weight [0] [0]++;

	    if ((sample [1] [1] >= 0) == (right > 0)) {
		if (weight [1] [0]++ == 256)
		    weight [1] [0]--;
	    }
	    else if (weight [1] [0]-- == 0)
		weight [1] [0]++;

	    sum = (left + right) >> 1;
	    send_word3 (wpc, left - right, 0);

	    if (sum_level <= right_level && sum_level <= left_level)
		send_word3 (wpc, sum, 1);
	    else
		send_word3 (wpc, left_level <= right_level ? left : right, 1);

	    sum_level = sum_level - (sum_level >> 8) + labs (sum);
	    left_level = left_level - (left_level >> 8) + labs (left);
	    right_level = right_level - (right_level >> 8) + labs (right);

	    sample [0] [0] += (sample [0] [1] = lbuf - sample [0] [0]);
	    sample [1] [0] += (sample [1] [1] = rbuf - sample [1] [0]);

	    if (flags & OVER_20) {
		if (wpc->dc.extra_bits_count == 8) {
		    if (wpc->dc.last_extra_bits == extra_bits) {
			putbit_1 (outbits);
		    }
		    else {
			putbits (extra_bits << 1, 9, outbits);
			wpc->dc.last_extra_bits = extra_bits;
			wpc->dc.extra_bits_count = 0;
		    }
		}
		else {
		    putbits (extra_bits, 8, outbits);

		    if (wpc->dc.last_extra_bits != extra_bits) {
			wpc->dc.last_extra_bits = extra_bits;
			wpc->dc.extra_bits_count = 0;
		    }
		    else
			++wpc->dc.extra_bits_count;
		}
	    }
	}

	wpc->dc.left_level = left_level;
	wpc->dc.right_level = right_level;
	wpc->dc.sum_level = sum_level;
    }

    /////////////////// handle the lossy/hybrid mono mode /////////////////////

    else if (wpc->wphdr->bits && (flags & MONO_FLAG))
	for (bptr = buffer, i = 0; i < sample_count; ++i) {
	    long code;

	    if (flags & BYTES_3) {
		code = GET_24 (bptr) >> shift;
		bptr += 3;
	    }
	    else
		code = *((short*) bptr)++;

	    crc2 = crc2 * 3 + code;

	    if (flags & LOSSY_SHAPE)
		wpc->dc.error [0] = -(code -= wpc->dc.error [0]);

	    if ((flags & BYTES_3) && shift < 4)
		for (dpp = wpc->decorr_passes; dpp->term; dpp++)
		    code -= (dpp->aweight_A = apply_weight24 (dpp->weight_A, dpp->samples_A [m]));
	    else
		for (dpp = wpc->decorr_passes; dpp->term; dpp++)
		    code -= (dpp->aweight_A = apply_weight (8, dpp->weight_A, dpp->samples_A [m]));

	    code = send_word4 (wpc, code, 0);

	    while (--dpp >= wpc->decorr_passes) {
		long sam = dpp->samples_A [m];

		update_weight (8, dpp->weight_A, sam, code);
		dpp->samples_A [(m + dpp->term) & (MAX_TERM - 1)] = (code += dpp->aweight_A);
	    }

	    m = (m + 1) & (MAX_TERM - 1);
	    crc = crc * 3 + code;
	    wpc->dc.error [0] += code;

	    if (flags & CALC_NOISE) {
		if (flags & BYTES_3)
		    noise = (code << shift) - GET_24 (bptr-3);
		else
		    noise = code - ((short*) bptr) [-1];

		noise_acc += noise *= noise;
		wpc->dc.noise_ave = (wpc->dc.noise_ave * 0.99) + (noise * 0.01);

		if (wpc->dc.noise_ave > wpc->dc.noise_max)
		    wpc->dc.noise_max = wpc->dc.noise_ave;
	    }
	}

    /////////////////// handle the lossy/hybrid stereo mode ///////////////////

    else if (wpc->wphdr->bits && !(flags & MONO_FLAG))
	for (bptr = buffer, i = 0; i < sample_count; ++i) {
	    long left, right, sum, diff;

	    if (flags & BYTES_3) {
		left = GET_24 (bptr) >> shift;
		bptr += 3;
		right = GET_24 (bptr) >> shift;
		bptr += 3;
	    }
	    else {
		left = *((short*) bptr)++;
		right = *((short*) bptr)++;
	    }

	    crc2 = crc2 * 3 + left;
	    crc2 = crc2 * 3 + right;

	    if (flags & JOINT_STEREO) {
		sum = (left + right) >> 1;
		left -= right;
		right = sum;
	    }

	    if (flags & LOSSY_SHAPE) {
		wpc->dc.error [0] = -(left -= wpc->dc.error [0]);
		wpc->dc.error [1] = -(right -= wpc->dc.error [1]);
	    }

	    if ((flags & BYTES_3) && shift < 4)
		for (dpp = wpc->decorr_passes; dpp->term; dpp++) {
		    left -= (dpp->aweight_A = apply_weight24 (dpp->weight_A, dpp->samples_A [m]));
		    right -= (dpp->aweight_B = apply_weight24 (dpp->weight_B, dpp->samples_B [m]));
		}
	    else
		for (dpp = wpc->decorr_passes; dpp->term; dpp++) {
		    left -= (dpp->aweight_A = apply_weight (8, dpp->weight_A, dpp->samples_A [m]));
		    right -= (dpp->aweight_B = apply_weight (8, dpp->weight_B, dpp->samples_B [m]));
		}

	    left = send_word4 (wpc, left, 0);
	    right = send_word4 (wpc, right, 1);

	    while (--dpp >= wpc->decorr_passes) {
		long sam_A = dpp->samples_A [m], sam_B = dpp->samples_B [m];
		int k = (m + dpp->term) & (MAX_TERM - 1);

		update_weight (8, dpp->weight_A, sam_A, left);
		dpp->samples_A [k] = (left += dpp->aweight_A);

		update_weight (8, dpp->weight_B, sam_B, right);
		dpp->samples_B [k] = (right += dpp->aweight_B);
	    }

	    m = (m + 1) & (MAX_TERM - 1);
	    wpc->dc.error [0] += left;
	    wpc->dc.error [1] += right;

	    if (flags & JOINT_STEREO) {
		right = ((sum = (right << 1) | (left & 1)) - (diff = left)) >> 1;
		left = (sum + diff) >> 1;
	    }

	    crc = crc * 3 + left;
	    crc = crc * 3 + right;

	    if (flags & CALC_NOISE) {
		if (flags & BYTES_3) {
		    noise = (double)((left << shift) - GET_24 (bptr-6)) * ((left << shift) - GET_24 (bptr-6));
		    noise += (double)((right << shift) - GET_24 (bptr-3)) * ((right << shift) - GET_24 (bptr-3));
		}
		else {
		    noise = (double)(left - ((short*) bptr) [-2]) * (left - ((short*) bptr) [-2]);
		    noise += (double)(right - ((short*) bptr) [-1]) * (right - ((short*) bptr) [-1]);
		}

		noise_acc += noise /= 2.0;
		wpc->dc.noise_ave = (wpc->dc.noise_ave * 0.99) + (noise * 0.01);

		if (wpc->dc.noise_ave > wpc->dc.noise_max)
		    wpc->dc.noise_max = wpc->dc.noise_ave;
	    }
	}

#ifdef COMPACT
    memcpy (wpc->dc.sample, sample, sizeof (sample));
    memcpy (wpc->dc.weight, weight, sizeof (weight));
#else
    memcpy (wpc->dc.sample, sample, sizeof (sample));
    memcpy (wpc->dc.dsample, dsample, sizeof (dsample));
    memcpy (wpc->dc.csample, csample, sizeof (csample));
    memcpy (wpc->dc.weight, weight, sizeof (weight));
    memcpy (wpc->dc.cweight, cweight, sizeof (cweight));
#endif

    wpc->dc.m = m;
    wpc->wphdr->crc = crc;
    wpc->wphdr->crc2 = crc2;

    if (flags & CALC_NOISE)
	wpc->dc.noise_sum += noise_acc;
}

//////////////////////////////////////////////////////////////////////////////
// This function returns the accumulated RMS noise as a double if the       //
// CALC_NOISE bit was set in the WavPack header. The peak noise can also be //
// returned if desired. See wavpack.c for the calculations required to      //
// convert this into decibels of noise below full scale.                    //
//////////////////////////////////////////////////////////////////////////////

double pack_noise (WavpackContext *wpc, double *peak)
{
    if (peak)
	*peak = wpc->dc.noise_max;

    return wpc->dc.noise_sum;
}
