// phasevocoder.h

/******************************************************************************
 *
 *  MiXViews - an X window system based sound & data editor/processor
 *
 *  Copyright (c) 1993, 1994 Regents of the University of California
 *
 *  Author:     Douglas Scott
 *  Date:       December 13, 1994
 *
 *  Permission to use, copy and modify this software and its documentation
 *  for research and/or educational purposes and without fee is hereby granted,
 *  provided that the above copyright notice appear in all copies and that
 *  both that copyright notice and this permission notice appear in
 *  supporting documentation. The author reserves the right to distribute this
 *  software and its documentation.  The University of California and the author
 *  make no representations about the suitability of this software for any 
 *  purpose, and in no event shall University of California be liable for any
 *  damage, loss of data, or profits resulting from its use.
 *  It is provided "as is" without express or implied warranty.
 *
 ******************************************************************************/


// This class is the engine which performs the actual analysis and resynthesis
// of Phase Vocoder data and sound.  It is used as a member class for the
// PVAnalyzer and PVSynthesizer classes.

// included here is the comment from the original C source code file:

/*------------------------------------------------------------------

PROGRAM:	Phase Vocoder 

AUTHOR: 	Mark Dolson
		Center for Music Experiment Q-037
		University of California, San Diego
		La Jolla, Ca. 92093

DATE:		November 1, 1984


	This is a second release of a phase vocoder being
developed at the Computer Audio Research Lab of the Center for 
Music Experiment at U.C.S.D.  It performs both analysis and synthesis
efficiently using a Weighted Overlap-Add algorithm.  Whenever
possible, the minimum mean-squared-error formulation of Griffin
and Lim is used ("Signal Estimation from Modified Short-Time
Fourier Transform", I.E.E.E. Trans. ASSP-32, No. 2, April, 1984);
otherwise, the technique is that described in "Non-Uniform
Time-Scale Modification of Speech" by Samuel Holtzman Dantus
(M.S. and E.E. Thesis, M.I.T., 1980) and in "A Weighted 
Overlap-Add Method of Short-Time Fourier Analysis/Synthesis" by
R. E. Crochiere (I.E.E.E. Trans. ASSP-28, No. 1, February, 1980).
The code is written entirely in the C programming language except
for standard FFT subroutines written in FORTRAN which are taken
from the I.E.E.E. Programs for Digital Signal Processing package.
This code runs at U.C.S.D. on a VAX 11-780 under Berkeley UNIX;
with some modification, it should run on nearly any machine
supporting FORTRAN and C.
------------------------------------------------------------------

		"R = input sample rate (automatically read from stdin)\n",
		"F = fundamental frequency (sampRate/256) DON'T USE -F AND -N\n",
		"N = # of bandpass filters (256 unless -F is specified)\n",
		"W = filter overlap factor: {1,2,(4),8} DON'T USE -W AND -M\n",
		(note -- these were {0, 1, (2), 3} originally)
		"M = analysis window length (fftSize unless -W is specified)\n",
		"L = synthesis window length (M) \n",
		"D = decimation factor (min((M/(8*timeScaleFactor)),(M/8))\n",
		"I = interpolation factor (=timeScaleFactor*inputFrameOffset) \n",
		"T = time-scale factor (1.)\n",
		"P = pitch-scale factor (1.) DON'T USE -T AND -P\n",
		"C = resynthesize odd (1) or even (2) channels only\n",
		"i = resynthesize bandpass filters i thru j only\n",
		"j = resynthesize bandpass filters i thru j only\n",
		"b = starting sample (0)\n",
		"e = final sample (end of input)\n",
		"w = warp factor for spectral envelope (1.)\n",
		"A:  analysis only: output will be analysis data\n",
		"E:  analysis only: output will be spectral envelope\n",
		"X:  analysis only: output will be magnitude values\n",
		"S:  synthesis only: input must be analysis data\n",
		"K:  use Kaiser filter instead of hamming\n",
		"V [filename]:  verbose (summarize on pvoc.stat or file)\n",
*/

#ifndef PHASEVOCODER_H
#ifdef __GNUG__
#pragma interface
#endif
#define PHASEVOCODER_H

#include "localdefs.h"

class InPipeAction;
class Data;
class Envelope;

class PhaseVocoder {
	friend class PVAnalyzer;
	friend class PVSynthesizer;
	friend class PvocRequester;
public:
	enum Mode { Analysis, Synthesis, Resynthesis, Spectrum, Magnitudes };
	struct Info {
		// ctor for analysis
		Info(double sr, float scale, Mode=PhaseVocoder::Analysis,
			 int n=0, int f=0, int m=0, int d=0,
		     float t=1, float p=1, boolean kais=false);
		// ctor for synthesis
		Info(double sr, float scale, int n=0, int f=0, int d=0, int l=0,
		     int i=0, float t=1, int ii=0, int jj=0, float warp=0,
			 boolean kais=false);

		int	fftSize,		// number of phase vocoder channels (bands)
			fundFreq,		// fundamental frequency (determines fftSize)
			inputFrameSize,	// length of analWindow impulse response
			inputFrameOffset, // decimation factor (default inputFrameSize/8)
			outputFrameSize,	// length of synWindow impulse response
			outputFrameOffset,	// interpolation factor
								// (default is outputFrameOffset=inputFrameOffset)
			firstBand,		// flag for resynthesizing chans i to j only
			lastBand;		// flag for resynthesizing chans i to j only

		boolean K;			// flag for Kaiser window

		float
			timeScaleFactor,	// time scale factor ( >1 to expand)
			pchScaleFactor,		// pitch scale factor 
			warp,				// spectral envelope warp factor 
			inputScalingFactor;	// to scale short int samps between 1 and -1
				
		double samplingRate;		// sample rate of sound

		Mode runMode;
	};
protected:
	PhaseVocoder(Info &);
	~PhaseVocoder();
	void initialize();
	void reset();
	boolean isGood();
	boolean analyzing() { return I.runMode == Analysis; }
	boolean synthesizing() { return I.runMode == Synthesis; }
	int runAnalysis(double *, Data *);
	int runSynthesis(Data *, InPipeAction *);

	int analysisChannels() { return I.fftSize + 2; }
	int freqBands() { return fftPoints + 1; }
	int getStartingOffset() { return startingOffset; }
	int calculateAnalysisLength(int inputLen);
	int getInputFrameOffset() { return I.inputFrameOffset; }
	int getOutputFrameOffset() { return scaledOutputOffset; }
	int roundedInputFrameSize() { return I.inputFrameSize + inputFrameEven; }
	boolean variableTimeScaling() { return timeScaleEnvelope != nil; }
private:
	void zeroPointers();
	void setDefaults();
	void setUpLog();
	boolean checkAndSetValues();
	void createAnalysisWindow();
	void createSynthesisWindow();
	void createBuffers();
	static void hamming(float *win, int winLen, int even);
	void updateAmps();
	void printToLog();
	
	void applyInputWindow(double *);
	void analyze();
	void convertToReal();
	void detectAndWarpEnvelope();
	void outputAnalysis(Data *);

	void loadAnalysis(Data *);
	void limitBands();
	void convertFromReal();
	void synthesize();
	void applyOutputWindow();
	int shiftOut(InPipeAction *);
	void getTimeScaleFactor();
	void calculateOffsetsAndIncrement();
private:	
    Info I;

	int
		startingOffset,		// initial number of samps for starting frame
		analWinLen,			// half-length of analysis window
		synWinLen,			// half-length of synthesis window
		outCount,			// number of samples written to output
		obuflen,			// length of output buffer
		inSamp,				// current input (analysis) sample
		outSamp;			// current output (synthesis) sample

	float *output,		// pointer to start of output buffer
		*anal,			// pointer to start of analysis buffer
		*syn,			// pointer to start of synthesis buffer
		*nextOut,		// pointer to next empty word in output
		*analWindowBuf,	// analysis window buffer
		*analWindow,	// pointer to center of analysis window
		*synWindowBuf,	// synthesis window buffer
		*synWindow,		// pointer to center of synthesis window
		*maxAmp,		// pointer to start of max amp buffer
		*avgAmp,		// pointer to start of avg amp buffer
		*avgFrq,		// pointer to start of avg frq buffer
		*env,			// pointer to start of spectral envelope
		*oldInPhase,	// pointer to start of input phase buffer
		*oldOutPhase;	// pointer to start of output phase buffer

	FILE	*fp;			// auxiliary output file (-V option)
	
	static float Pi;
	static float TwoPi;
	static float HalfPi;
	static float beta;				// for Kaiser window
	
	float
		RoverTwoPi,			// sampRate/inputFrameOffset divided by 2*Pi 
		TwoPioverR,			// 2*Pi divided by sampRate/outputFrameOffset 
		sum,				// scale factor for renormalizing windows 
		ftot;				// scale factor for calculating statistics 

	int
		sampsIn, // no. of new inputs to read (sampsIn <= inputFrameOffset)
		fftPoints,			// fftSize / 2
		invFftSize,			// synthesis:  fftSize / pchScaleFactor
		invFftPoints,		// invFftSize / 2
		scaledOutputOffset,	// synthesis:  outputFrameOffset/pchScaleFactor
		sampsOut,			// synthesis:  number of new outputs to write
		inputFrameEven,		// flag for even inputFrameSize
		outputFrameEven,	// flag for even outputFrameSize */
		C,					// flag for resynthesizing even or odd chans
		bandsLimited,		// flag for selected channel resynthesis
		verbose,			// verbose (summarize analysis) output flag
		X,		/* flag for magnitude output */
		E;		/* flag for spectral envelope output */
	boolean initialized;
	Envelope* timeScaleEnvelope;
};

#endif

