#line 2 "loop.c"
/*-
 * C-SaCzech
 * Copyright (c) 1996-2002 Jaromir Dolecek <dolecek@ics.muni.cz>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by Jaromir Dolecek
 *	for the CSacek project.
 * 4. The name of Jaromir Dolecek may not be used to endorse or promote
 *    products derived from this software without specific prior written
 *    permission.
 *
 * THIS SOFTWARE IS PROVIDED BY JAROMIR DOLECEK ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL JAROMIR DOLECEK BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/* $Id: loop.c,v 1.160.2.2 2002/03/09 09:00:04 dolecek Exp $ */

#include "csacek.h" 	
#include "csa_version.h"
#include "csa_cmds.h"

/* local contants */
/* space, which has to be every time free to use in buffer */
#define SAFE_SPACE		32

#define CMD_COMMENT		1  /* cmd has been in HTML comment <!-- .. -->*/
#define CMD_HTMLTAG		2  /* cmd has been HTML tag */
#define CMD_EXTENDED		4  /* cmd not existing in SaCzech */
#define CMD_EXEC_ALWAYS		8  /* execute even if we are in NOCODE block */
#define CMD_NOACCESSCHECK	16 /* execute always without taking care of */
				   /* PART or DOMAIN or CHARSET */
#define CMD_NOESCAPE		32 /* don't recognize '\' as escape char */
				   /* with this cmd */
#define CMD_CHANGEURL		64 /* command which changes URL of passed
				    * HTML tag */
#define CMD_HTMLALWAYS		128/* execute even if NOHTML flag is set */

/* structure used for storing information about each command */
typedef struct {
	size_t		 name_len;	/* length of name */
	const char	*name;          /* name of command */
	int	type;           /* type of command (CSA_CMD0 for example) */
	csa_cmdfunc_t	*func;	/* pointer on function implementing command */
				/* or NULL if explicit rule in cmds.y exists */
	void	*dta;		/* passed to func; usable to give additional */
				/* information to called cmd function */
	int	param_mask;     /* mask of parameters type accepted by cmd */
	int	flags;          /* additional informations */
} csa_cmd_t;

/* local functions */
static const csa_cmd_t * x_cmd_lookup __P((const char *, size_t len));
static int x_cmd_canexecute __P((csa_params_t *, const csa_cmd_t *, int));

/* structure which hold state while processing command */
struct csa_yy {
	csa_arglist_t	 *args;		/* argument for interpreted command */
	const csa_cmd_t	 *cmd;		/* pointer to cmd record in x_cmdtab[]*/
	int		  state;	/* state of csa_yylex() */
	const char	 *str;		/* text to be interpreted */
	const char	 *name;		/* name of command in source */
	size_t		  str_len;	/* total number of chars to be read */
	size_t		  index;	/* index of next char to be read */
	char		 *buf;		/* used while parsing cmd arguments */
#define YY_CURCHAR(YY)	((YY)->str[(YY)->index])
#define YY_VALID(YY)	((YY)->index < (YY)->str_len)
#define YY_REMAINS(YY)	((YY)->str_len - (YY)->index)
	int		 cmd_flags;	/* command flags as passed from
					 * csa_process_body() or any other
					 * caller of csa_run_cmd() */
	int		 yy_valid;	/* set to 1 if contents valid */
	int		 igneq;		/* set to true if we encountered */
					/* name=value pair and are parsing */
					/* the value part */
};

/*
 * Array of commands supported by CSacek, sorted by length and command name.
 * Don't forget to update x_cmd_idxlen[] array when you change
 * x_cmdtab[].
 */
#define CSA_COMMANDS	22
#define CSA_CMD_MAX_LEN	13
static const csa_cmd_t x_cmdtab[CSA_COMMANDS] =
{
#define CSA_CMD_IDXLEN1		0
	{  1, "A", CSA_CMDX, csa_ChangeURL, (void *)CSA_HREF,
		CSA_P_STRING|CSA_P_EQUATION,
		CMD_NOESCAPE|CMD_HTMLTAG|CMD_CHANGEURL },
#define CSA_CMD_IDXLEN2		-1
#define CSA_CMD_IDXLEN3		1
	{  3, "bar", CSA_CMDX, csa_Bar, NULL, CSA_P_STRING|CSA_P_EQUATION,
		CMD_EXTENDED },
	{  3, "set", CSA_CMDX, csa_Set, NULL, CSA_P_EQUATION,
		CMD_EXEC_ALWAYS|CMD_EXTENDED },
	{  3, "xml", CSA_CMDX, csa_Xml, NULL, CSA_P_STRING|CSA_P_EQUATION,
		CMD_EXTENDED },
#define CSA_CMD_IDXLEN4		4
	{  4, "AREA", CSA_CMDX, csa_ChangeURL, (void *)CSA_HREF,
		CSA_P_STRING|CSA_P_EQUATION,
		CMD_NOESCAPE|CMD_HTMLTAG|CMD_CHANGEURL },
	{  4, "BASE", CSA_CMDX, csa_ChangeURL, (void *)CSA_HREF,
		CSA_P_STRING|CSA_P_EQUATION,
		CMD_NOESCAPE|CMD_HTMLTAG|CMD_CHANGEURL },
	{  4, "code", CSA_CMD0, csa_Set, (void *)CSA_EXECCMDS, 0,
		CMD_EXEC_ALWAYS|CMD_EXTENDED|CMD_NOACCESSCHECK },
	{  4, "FONT", CSA_CMDX, csa_Font, NULL, CSA_P_EQUATION, CMD_HTMLTAG },
	{  4, "FORM", CSA_CMDX, csa_ChangeURL, (void *)CSA_ACTION,
		CSA_P_STRING|CSA_P_EQUATION,
		CMD_NOESCAPE|CMD_HTMLTAG|CMD_CHANGEURL },
	{  4, "META", CSA_CMDX, csa_Meta, NULL, CSA_P_STRING|CSA_P_EQUATION,
		CMD_HTMLTAG },
	{  4, "part", CSA_CMDX, csa_DocParts, (void *)CSA_PART,
		CSA_P_STRING|CSA_P_EQUATION, CMD_EXTENDED|CMD_NOACCESSCHECK },
#define CSA_CMD_IDXLEN5		11
	{  5, "FRAME", CSA_CMDX, csa_ChangeURL, (void *)CSA_SRC,
		CSA_P_STRING|CSA_P_EQUATION,
		CMD_NOESCAPE|CMD_HTMLTAG|CMD_CHANGEURL },
#define CSA_CMD_IDXLEN6		12
 	{  6, "bardef", CSA_CMDX, csa_BarDef, NULL, CSA_P_STRING|CSA_P_EQUATION,
		CMD_EXTENDED },
	{  6, "domain", CSA_CMDX, csa_DocParts, (void *)CSA_DOMAIN,
		CSA_P_STRING, CMD_EXTENDED|CMD_NOACCESSCHECK },
	{  6, "nocode", CSA_CMD0, csa_Set, (void *)CSA_NOEXECCMDS, 0,
		CMD_EXEC_ALWAYS|CMD_EXTENDED },
	{  6, "SCRIPT", CSA_CMDX, csa_Script, (void *)CSA_SRC,
		CSA_P_STRING|CSA_P_EQUATION,
		CMD_NOESCAPE|CMD_HTMLTAG },
#define CSA_CMD_IDXLEN7		16
	{  7, "/SCRIPT", CSA_CMD0, csa_Script, (void *)0, 0,
		CMD_NOESCAPE|CMD_HTMLTAG|CMD_HTMLALWAYS },
	{  7, "charset", CSA_CMDX, csa_DocParts, (void *)CSA_CHARSET,
		CSA_P_STRING, CMD_EXTENDED|CMD_NOACCESSCHECK},
#define CSA_CMD_IDXLEN8		-1
#define CSA_CMD_IDXLEN9		18
	{  9, "mycharset", CSA_MYCHARSET, csa_MyCharset, NULL, CSA_P_STRING,
		CMD_EXTENDED },
#define CSA_CMD_IDXLEN10	-1
#define CSA_CMD_IDXLEN11	-1
#define CSA_CMD_IDXLEN12	19
	{ /* command csacekServer is obsolete and is provided for
	     * compatibility only */
	  12, "csacekServer", CSA_CMDX, csa_csacekServers, NULL, CSA_P_STRING,
		CMD_EXEC_ALWAYS|CMD_EXTENDED },
#define CSA_CMD_IDXLEN13	20
	{ 13, "csacekServers", CSA_CMDX, csa_csacekServers, NULL, CSA_P_STRING,
		CMD_EXEC_ALWAYS|CMD_EXTENDED },
};

static const int x_cmd_idxlen[CSA_COMMANDS+1] = {
	-1,
	CSA_CMD_IDXLEN1, CSA_CMD_IDXLEN2, CSA_CMD_IDXLEN3, CSA_CMD_IDXLEN4,
	CSA_CMD_IDXLEN5, CSA_CMD_IDXLEN6, CSA_CMD_IDXLEN7, CSA_CMD_IDXLEN8,
	CSA_CMD_IDXLEN9, CSA_CMD_IDXLEN10, CSA_CMD_IDXLEN11, CSA_CMD_IDXLEN12,
	CSA_CMD_IDXLEN13,
};

/*
 * looks up command ``name'' in table and return associated record
 * or NULL of there is no such command in table
 */
static const csa_cmd_t *
x_cmd_lookup(name, len)
  const char *name; /* command name */
  size_t len;	    /* how much chars take for comparison */
{
        int i, cmp;
	
	if (len == 0 || len > CSA_CMD_MAX_LEN)
		return NULL;

	/* special check for /SCRIPT */
	if (name[0] == '/' && len != 7)
		return NULL;

	/* get offset of first command with given length */
	i = x_cmd_idxlen[len];
	if (i < 0)
		return NULL;

	/* do linear search for matching command */
	for(;x_cmdtab[i].name_len == len && i < CSA_COMMANDS; i++) {
                if (CSA_UPPER(x_cmdtab[i].name[0]) != CSA_UPPER(name[0]))
			continue;

		cmp = strncasecmp(x_cmdtab[i].name, name, len);
		if (cmp == 0)
                        return &x_cmdtab[i];

		/*
		 * If command name in table is lexicographically greater
		 * than the passed name, no following entry can match.
		 */
		if (cmp > 0)
			break;
	}

        return NULL;
}

/*
 * checks, if cmd can be executed (i.e. current PART & DOMAIN & CHARSET
 * is valid and code is on)
 */
static int
x_cmd_canexecute(p, cmd, cmd_flags)
  csa_params_t *p;
  const csa_cmd_t *cmd;
  int cmd_flags;
{
	if (p->yy && p->yy->yy_valid) {
		cmd_flags = p->yy->cmd_flags;
		cmd = p->yy->cmd;
	}

	return     (CSA_ISSET(p->flags_parts, CSA_VALID_MASK)
		        || CSA_ISSET(cmd->flags, CMD_NOACCESSCHECK))
		&& (!CSA_ISSET(p->flags, CSA_FL_NOEXECCMDS)
			|| CSA_ISSET(cmd->flags, CMD_EXEC_ALWAYS))
		&& (!CSA_ISSET(cmd->flags, CMD_HTMLTAG)
			|| (CSA_ISSET(cmd_flags, CMD_HTMLTAG)
				&& CSA_ISSET(p->flags, CSA_FL_ISHTML)
				&& (!CSA_ISSET(p->flags, CSA_FL_NOHTML)
                                    || CSA_ISSET(cmd->flags, CMD_HTMLALWAYS))))
		&& (!CSA_ISSET(cmd->flags, CMD_CHANGEURL)
			|| CSA_ISSET(p->flags, CSA_FL_CHANGEURL))
		;
}

/*
 * Gets begin and end tag used in original source for currently
 * processed command.
 * Return 0 if an error occurs, 1 otherwise.
 */
int
csa_yy_gettags(p, startp, endp)
  csa_params_t *p;
  const char **startp, ** endp;
{
	if (CSA_ISSET(p->yy->cmd_flags, CMD_HTMLTAG))
		*startp = "<", *endp = ">";
	else if (CSA_ISSET(p->yy->cmd_flags, CMD_COMMENT))
		*startp = "<!--", *endp = "-->";
	else if (CSA_ISSET(p->yy->cmd_flags, CMD_EXTENDED)) {
		char *startstr, *endstr;
		startstr = (char *)ap_palloc(p->pool_tmp, 3);
		endstr = (char *)ap_palloc(p->pool_tmp, 3);
		sprintf(startstr, "<%c", CSA_CMD_TAG);		/* safe */
		sprintf(endstr, "%c>", CSA_CMD_TAG);		/* safe */
		*startp = startstr, *endp = endstr;
	} else {
		/* the tags are unknown, return error */
		return 0;
	}

	return 1;
}

/* 
 * calls function associated with (previously read) command
 */
int
csa_cmd_execute(p)
  csa_params_t *p;
{
	int retval=1;

#ifdef CSA_DEBUG
	csa_debug(p->dbg, "csa_cmd_execute: called for command ``%s''",
		p->yy->cmd->name);
#endif

	if (x_cmd_canexecute(p, NULL, 0)) {
		retval = p->yy->cmd->func(p, p->yy->cmd->dta);
#ifdef CSA_DEBUG
		csa_debug(p->dbg, "csa_cmd_execute: command ``%s'' %s",
			p->yy->cmd->name,
			(retval == 0) ? "succeeded" : "FAILED");
#endif
	}

	return (retval);
}

/*
 * tries to interpret first ``len'' characters from the string ``s''
 * as a command
 * returns 0 if all is okay
 */
int
csa_run_cmd(p, s, len, cmd_flags)
  csa_params_t *p;
  const char *s;
  size_t  len;
  int cmd_flags;
{
	int retval;

#ifdef CSA_DEBUG
	csa_debug(p->dbg, "csa_run_cmd: str=\"\"\"%.*s\"\"\"" , len, s);
#endif

	if (!p->yy) {
		p->yy = (csa_yy_t *) ap_pcalloc(p->pool_req, sizeof(csa_yy_t));
		p->yy->args = csa_arg_newlist(p->pool_req);
	} else {
		csa_arglist_t *args = p->yy->args;
		memset(p->yy, '\0', sizeof(csa_yy_t));
		csa_arg_clrlist(args);
		p->yy->args = args;
	}
	p->yy->yy_valid = 1;
	p->yy->str     = s;
	p->yy->str_len = len;
	p->yy->cmd_flags   = cmd_flags;
	
	retval = csa_yyparse(p);
	
	p->yy->yy_valid = 0;

#ifdef CSA_DEBUG
	csa_debug(p->dbg, "csa_run_cmd: command %s",
		(retval != 0) ? "failed" : "was succesfull" );
#endif

	/* We need the tmp pool cleared */
	p->flags |= CSA_FL_CLRTMPPOOL;

	return retval;
}

/*
 * returns pointer to list holding arguments passed to command
 */
csa_arglist_t *
csa_yy_getarglist(yy)
  csa_yy_t *yy;
{
	return yy->args;
}

/*
 * returns mask of parameters acceptable to currently executed command
 */
int
csa_yy_getcmdparammask(yy)
  csa_yy_t *yy;
{
	return yy->cmd->param_mask;
}

/*
 * returns name of processed command
 */
const char *
csa_yy_getcmdname(yy)
  csa_yy_t *yy;
{
	return yy->name;
}

/*
 * recodes given string and adds it to output
 * the place str points to may be modified by the recoding routine
 * if ``buf'' can be null, we allocate and use our own buffer for the data
 */
int
csa_add_recode_output(p, str, len, buf)
  csa_params_t *p;
  char *str;
  size_t len;
  csa_String_b *buf;
{
	size_t dst_len, need_buf_len;
	char *output;

	if (len == 0) return 1;

	/*
	 * If we are recoding to 8bit encoding, it's safe to use the same
	 * string for result of recoding - the result can't be longer
	 * than source.
	 */
	if (!CSTOOLS_ISUNICODE(p->outcharset))
		output = str;
	else {
		/* need to use separate buffer */
		need_buf_len = 2*len;
		if (!buf || need_buf_len > buf->maxlen) {
			if (!buf) {
				buf = (csa_String_b *) ap_palloc(p->pool_tmp,
					sizeof(csa_String_b));
				if (!need_buf_len) need_buf_len = len;
			}
			CSA_FILLSTRING(buf, 
				ap_palloc(p->pool_tmp, (int)need_buf_len),
				0, need_buf_len);
			p->flags |= CSA_FL_CLRTMPPOOL;
		}
		output = buf->value;
	}

	/* handle special windows-1250 characters which do not have */
	/* equivalent in iso-8859-2 - only copyright, registered and */
	/* trade mark sign for now */
	if (p->incharset == CSTOOLS_CP1250 && p->outcharset != CSTOOLS_CP1250) {
		char outbuf[2*sizeof("&trade;") + 1];
		static const char * const substs[3][2] = {
			{ "&copy;",	"(c)" },
			{ "&reg;",	"(R)" },
			{ "&trade;",	"(TM)" } };
		const char *subst;
		size_t sindex, lastindex=0, offset;

		offset = CSA_ISSET(p->flags, CSA_FL_ISHTML) ? 0 : 1;
		
		for(sindex=0; sindex < len; sindex++) {
		    switch ((unsigned char)str[sindex]) {
		    case 0xa9: /* copyright sign */
			subst = substs[0][offset];
			break;
		    case 0xae: /* registered sign */
			subst = substs[1][offset];
			break;
		    case 0x99: /* trade mark sign */
			subst = substs[2][offset];
			break;
		    default: /* not a special char, continue */
			continue;
		    }
		    dst_len = cstools_recode(&(p->mp), &(str[lastindex]),
			output, sindex - lastindex);
		    csa_add_output(p, output, dst_len, 0);
		    dst_len = cstools_recode(&(p->mp), subst, outbuf,
			strlen(subst));
		    csa_add_output(p, outbuf, dst_len, 0);
		    lastindex = sindex + 1;
		}
		if (lastindex >= len) return 0; /* nothing more to do */
		if (lastindex > 0) {
			str += lastindex;
			len -= lastindex;
		}
	}
	
	dst_len = cstools_recode(&(p->mp), str, output, len);
	csa_add_output(p, output, dst_len, 0);

	return 0;
}

/*
 * makes supported substitutes and recoding on the supplied text
 * and adds it to output; ``len'' chars from input are processed
 * if ``add_all'' is non-zero, all output is flushed to output;
 * otherwise code checks if the text doesn't end by fragment of
 * __FOO__ string and if so, the fragment is not added to output
 * and it's copiend on the beginning of the ``text'' string
 */
int
csa_add_subs_output(p, text, out_len, add_all)
  csa_params_t *p;
  csa_String_b *text;
  size_t out_len;
  int add_all;
{
	int num_processed=out_len;
	csa_String_b buf;
	const csa_String *subs;
	char *laststr, *subs_buf=NULL;
	size_t remains, skipped, subs_buf_len=0, where;

#ifdef CSA_DEBUG
	csa_debug(p->dbg, "csa_add_subs_output: called");
#endif

	if (!CSA_ISSET(p->flags_parts, CSA_VALID_MASK)) {
#ifdef CSA_DEBUG
		csa_debug(p->dbg, "csa_add_subs_output: part flags \
invalid (%d), string discarded",
			p->flags_parts & CSA_VALID_MASK);
#endif
		goto out;
	}

	/* note that buf won't be used at all if output code set is 8bit -
	 * see csa_add_recode_output() why */
	buf.value = NULL;
	buf.maxlen = buf.len = 0;

	laststr = text->value;
	remains = out_len;
	while((skipped = csa_find_subs(p, laststr, remains, &where, &subs))!=0)
	{	/* put all the text up to "__" to output */
		csa_add_recode_output(p, laststr, where, &buf);

		/* add to output apropriate substitute for __FOO__ */
		if (subs->len > 0) {
			if (!subs_buf || subs->len > subs_buf_len) {
				subs_buf = (char *)csa_alloca(subs->len,
						p->pool_tmp);
				subs_buf_len = subs->len;
			}
			memcpy(subs_buf, subs->value, subs->len);
			csa_add_recode_output(p, subs_buf, subs->len, &buf);
		}

		laststr += where + skipped;
		remains -= where + skipped;
	} /* while() */

	if (remains == 0)
		goto out;

	/*
	 * If there is anything left in the buffer, add it to output;
	 * if we are not flushing the recode buffer, check the string
	 * if it contains some "hopeful" piece (i.e. something
	 * what can be prefix of some string we substitute) and if yes,
	 * keep the hopeful piece.
	 */
	if (!add_all) {
		size_t checklen = CSA_FINDSUBS_MAX, len;
		char *temp, *endstr, *checkstart;

		if (checklen > remains)
			checklen = remains;

		endstr = &laststr[remains];
		temp = endstr;
		checkstart = &laststr[remains - checklen];
		for(; temp > checkstart; temp--) {
			if (temp[0] != '_')
				continue;

			if (temp[-1] != '_' && (temp+1) != endstr)
				continue;

			len = temp - laststr;
			num_processed -= (remains - len);
			remains = len;
			break;
		}
	}

	if (remains > 0) 
	      csa_add_recode_output(p, laststr, remains, &buf);

    out:
	text->len -= num_processed;
	if (text->len > 0)
		memmove(text->value, &text->value[num_processed], text->len);

	return 0;
}

/* 
 * reads tokens and returns their identification to cmds.c:yyparse()
 */
int 
csa_yylex(p, parp)
  csa_params_t *p;
  void *parp;
{
	const csa_cmd_t *cmditem;
	size_t sindex;
	YYSTYPE *lvalp=parp;
	int retval=0;
	csa_yy_t *yy = p->yy;

	/* ignore leading white space */
	while(YY_VALID(yy) && isspace((unsigned char)YY_CURCHAR(yy)) )
		yy->index++;

	/* if nothing more remains in the buffer, exit now */
	if (!YY_VALID(yy))
		goto out;

	switch(yy->state) {
	case 0: /* start state before we find out name of command */
		sindex = yy->index;
		while(YY_VALID(yy)
			&& !isspace((unsigned char)YY_CURCHAR(yy))
			&& YY_CURCHAR(yy) != '='
			&& YY_CURCHAR(yy) != ';' ) yy->index++;
		cmditem = x_cmd_lookup(&(yy->str[sindex]), yy->index - sindex);

		/* if there was found command slot, fill in */
		/* p->yy with appropriate values and update */
		/* retval accordingly */
		if (cmditem) {
			lvalp->res.s = cmditem->name;
			yy->cmd = cmditem;			
			yy->state = 1;
			yy->name = ap_pstrndup(p->pool_tmp, &(yy->str[sindex]),
						(int)(yy->index - sindex));
			retval = cmditem->type;
		} else {
			/* nothing suitable found, return end-of-input */
			retval = 0;
		}

		break;

	case 1: /* reading arguments */
		if (YY_CURCHAR(yy) == '=' && !yy->igneq)
		{
			lvalp->res.s = "=";
			yy->index++;
			yy->igneq = 1;
			retval = '=';
		}
		else if (YY_CURCHAR(yy) == ';')
		{
			lvalp->res.s = ";";
			yy->index++;
			yy->state = 0;
			yy->igneq = 0;/* just in case this follows eq sign */
			retval = ';';
		}
		else {
			char *outp, quote = '\0', curchar, useq = '\0';
			size_t startidx = yy->index;
			int escapechar=0;

			if (!yy->buf) {
				yy->buf = (char *)
					ap_palloc(p->pool_tmp,
					(int)YY_REMAINS(yy) + 1);
			}
			outp = yy->buf;
			for(;YY_VALID(yy); yy->index++)
			{
				curchar = YY_CURCHAR(yy);

				if (escapechar) {
					/* this character has been escaped, */
					/* do not check if it's special */
					escapechar = 0;
					goto csa_yylex_addchar;
				}

				/* check quotes first */
				if (curchar == '\'' || curchar == '"') {
					/* If the handed string starts with
					 * quote char, remember it and
					 * strip it off.
					 */
					if (!useq && yy->index == startidx) {
						useq = quote = curchar;
						continue;
					}

					/* Switch the quote flag, otherwise
					 * leave the quotes intact.
					 */
					if (!quote)
						quote = curchar;
					else if (quote == curchar)
						quote = '\0';
				}

				/* break processing if the char is not */
				/* quoted and it's ';' or space or '=' */
				/* which is not to be ignored */
				if (!quote && 
					(curchar == ';' || isspace((unsigned char)curchar)
					 || (curchar == '=' && !yy->igneq)) )
				{
					break;
				}

				/* Special case \\ (escape char) - if it's
				 * encountered, next character won't be treated
				 * specially even if it is ", ', ; or similar.
				 * The \\ won't be passed, unless the
				 * command we are parsing wants it. */
				if (curchar == '\\') {
					escapechar = 1;
					if (!(yy->cmd->flags & CMD_NOESCAPE))
						continue;
				}

			    csa_yylex_addchar:
				/* add the character to output string */
				*(outp++) = curchar;
			} /* for */

			/*
			 * If the string started with quotes and ended too,
			 * strip the trailing quote (it's copied to the parsed
			 * string by the above code).
			 */
			if (!quote && useq && outp > yy->buf && useq==outp[-1])
				outp--;

			*outp = '\0'; /* end the string */

			/* If the string was partially quoted and ended before
			 * matching quote was found, we found an error in
			 * the input string, so pass the unparsed string
			 * instead of the parsed one */
			if (quote) {
				size_t len = yy->index - startidx;

				useq = '\0';
				strncpy(yy->buf, &yy->str[startidx], len);
				yy->buf[len] = '\0';
			}

			lvalp->res.s = yy->buf;
			lvalp->res.opaque = useq;
			yy->igneq = 0; /* clear igneq flag */

			retval = CSA_STRING;
		} /* else branch for state == 1 */
	}
			
    out:
	return retval;
}
	
/*
 * reads input data, processes them and prepares them for output
 * returns 0 in case of normal termination; returns CSA_DONE, if all
 * output was sent to client; calling code should NOT call csa_output()
 * then
 * we have to be able to re-enter this function in case response
 * data are not yet available, so count with it
 */
int 
csa_process_body(p)
  csa_params_t *p;
{
  csa_String_b *buf;
  char quote, *temp, *cmd, c;
  int cmd_flags, leave, escaped, add_all=0, need_read_more=1;
  size_t output_len, len, sindex, idx, cmd_index, cmd_start, remains;
  const csa_cmd_t *fcmd=NULL;

#ifdef CSA_DEBUG
   csa_debug(p->dbg, "process_body: begin");
#endif

  /* init process buffer */
  if (!p->body_buf) {
	p->body_buf = (csa_String_b *) ap_palloc(p->pool_req,
					sizeof(csa_String_b));
	CSA_FILLSTRING(p->body_buf, (char *) ap_palloc(p->pool_req, 8192),
		0, 8192);
  }
  buf = p->body_buf;

  for(len=1; len || buf->len; ) {
#ifdef CSA_DEBUG
	csa_debug(p->dbg, "process_body: start of cycle");
#endif

	/* free space allocated in temporary pool */
	if (p->flags & CSA_FL_CLRTMPPOOL) {
		ap_clear_pool(p->pool_tmp);
		p->flags &= ~CSA_FL_CLRTMPPOOL;
	}

	/* read a chunk, if needed */
	if (need_read_more || buf->len == 0) {
		len = csa_read_response(p, &buf->value[buf->len],
				buf->maxlen - buf->len);
		if (len == 0)
			break; /* no more data available, exit */
		buf->len += len;
		need_read_more = 0;
	}

	/* send data directly to output if we won't change it */
	if (!CSA_ISSET(p->flags, CSA_FL_CONVERT)) {
		csa_add_output(p, buf->value, buf->len, 0);
		buf->len = 0;
		continue;
	}

	/*
	 * If wanted, check for JavaScript indicies in first 200 bytes
	 * of the document or first three nonempty lines, whatever
	 * comes first.
	 */
	if (CSA_ISSET(p->flags, (CSA_FL_TESTJS|CSA_FL_ISHTML))) {
		/* Test for //, '/' + '*', ';\s*' */ 
		size_t idx, len = buf->len;
		const unsigned char *v = (unsigned char *) buf->value;
		int found=0, crs=0;

		/* Skip initial whitespace */
		for(idx=0; idx < len && isspace(v[idx]); idx++);

		for(; idx < len && idx < CSA_TESTJSBYTES && crs < 3; idx++) {
			switch (v[idx]) {
			case ';':
				for(found=0;
				    ++idx < len && idx < CSA_TESTJSBYTES; ) {
					if (!isspace(v[idx]))
						break;
					if (v[idx] == '\r' || v[idx] == '\n') {
						found = 1;
						break;
					}
				}
				break;
			case '/':
				if (idx+1 < len
				    && (v[idx+1] == '*' || v[idx+1] == '/'))
					found = 1;
				break;
			case '\n':
				crs++;
				break;
			default:
				/* ignore */
				break;
			}

			if (found)
				break;
		}

		if (found) {
			/* Identified as JavaScript, unset 'ISHTML' flag */
			CSA_UNSET(p->flags, CSA_FL_ISHTML);
		}

		/*
		 * Stop testing if either the amount of space tested
		 * is more than TESTJSBYTES, or the above loop ended
		 * prematurely (i.e. either match was found, or there were
		 * too many lines without sign of JavaScript).
		 */
		if (idx != len || p->content_length + len > CSA_TESTJSBYTES)
			CSA_UNSET(p->flags, CSA_FL_TESTJS);
	}

	cmd_index = 0;
	cmd_flags = 0;
	temp = buf->value;
	remains = buf->len;
	for(;; temp++, remains--) {
		temp = memchr(temp, '<', remains);
		if (!temp) break;

		remains = buf->len - (temp - buf->value);

		if (remains < 3) {
			need_read_more = 1;
			break;
		}
			
		cmd = &temp[1];

		if (cmd[0] == CSA_CMD_TAG) {
			cmd_index = 2;
			cmd_flags = CMD_EXTENDED;
		} else if (cmd[0] == '!' && cmd[1] == '-' && cmd[2] == '-') {
			cmd_index = 4;
			cmd_flags = CMD_COMMENT;
		} else if (CSA_ISSET(p->flags, CSA_FL_ISHTML)) {
			cmd_index = 1;
			cmd_flags = CMD_HTMLTAG;
		} else {
			/* no <FOO sequence we recognize, continue searching */
			continue;
		}

		/* skip leading whitespace */
		idx = cmd_index;
		for(; idx <remains && isspace((unsigned char)temp[idx]); idx++);

		cmd_start = idx;

		/* Allow / on the beginning for HTML tags. */
		if (cmd_flags == CMD_HTMLTAG && temp[idx] == '/')
			idx++;

		/* find end of command */
		for(; idx < remains; idx++) {
			c = temp[idx];
			if (c<'A' || (c>'Z' && c<'a') || c>'z')
				break;
		}

		/* if we encountered an end of string while searching
		 * for end of command name, we need to read more data */
		if (idx == remains) {
			need_read_more = 1;
			break;
		}
			
		/* if there were no "good" characters, skip to next < */
		if (idx == cmd_start)
			continue;

		/* if it's correct command, process it, otherwise
		 * continue searching */
		if ((fcmd = x_cmd_lookup(&temp[cmd_start], idx - cmd_start))
			!= NULL)
		{
			/* check if we are allowed to execute this cmd
			 * in this context - if yes, jump off the loop */
			if (x_cmd_canexecute(p, fcmd, cmd_flags))
				break;
		}

		cmd_index = 0;
	}
				
	add_all = 0;
	if (temp) {
		if (!need_read_more) add_all = 1;
		output_len = temp - buf->value;
	}
	else
		output_len = buf->len;
		
	/* add already read data into output */
	if (output_len)
		csa_add_subs_output(p, buf, output_len, add_all);

	/* if no command was found or need read more data to resolve one,
	 * read next chunk of data */
	if (!cmd_index || need_read_more)
		continue;

	sindex = cmd_index;
	leave = escaped = 0;
	quote = '\0';
	for(; !leave; sindex++) {
		size_t tail_len;

		/* allocate bigger buffer if we need one */
		if (sindex + SAFE_SPACE >= buf->maxlen) {
			buf->maxlen *= 2;
			temp = (char *) ap_palloc(p->pool_req,(int)buf->maxlen);
			memcpy(temp, buf->value, buf->len);
			buf->value = temp;
		}

		/* read more data, if needed */
		if (sindex >= buf->len) {
			len = csa_read_response(p, &buf->value[buf->len],
					buf->maxlen - buf->len);

			/* end of input encountered before command */
			/* has been recognized */
			if (!len) {
				need_read_more = 1;
				break;
			}

			buf->len += len;
		}

		/* ignore char if it's escaped */
		if (escaped) {
			escaped = 0;
			continue;
		}

		switch (buf->value[sindex]) {
		case '\\':
			/* backslash in quotes escapes next char */
			/* any time we get here, ``escaped'' is 0 */
			if (quote != '\'') escaped = 1;
			break;

		case '\'':
		case '"':
			if (quote == '\0') quote = buf->value[sindex];
			else if (quote == buf->value[sindex]) quote ='\0';
			break;

		case '>':
			if (quote && CSA_ISSET(cmd_flags, CMD_EXTENDED))
				break;

			/*
			 * If we encounter a '>' when quote is not null,
			 * it may be bug in document. Test whether
			 * it's correct end of currently parsed command
			 * and if yes, proceed anyway. csa_yyparse()
			 * will DTRT, too.
			 */ 
			if (CSA_ISSET(cmd_flags, CMD_COMMENT)
				&& buf->value[sindex-1] == '-'
				&& buf->value[sindex-2] == '-')
					tail_len = 3;
			else if (CSA_ISSET(cmd_flags, CMD_EXTENDED)
				&& buf->value[sindex-1] == CSA_CMD_TAG)
					tail_len = 2;
			else if (CSA_ISSET(cmd_flags, CMD_HTMLTAG))
					tail_len = 1;
			else /* just a random '>' */
					break;

			if (csa_run_cmd(p, &buf->value[cmd_index],
				sindex - cmd_index - tail_len + 1,
				cmd_flags) == 0)
			{
				/* succesful completition */
				buf->len -= sindex + 1;
				memmove(buf->value, &buf->value[sindex + 1],
					buf->len);
			} else {
				csa_add_subs_output(p, buf,
					(size_t)sindex + 1, 1);
			}

			leave = 1;
			break;
		} /* switch */
	} /* cmd parsing for loop */

	/* we would need read some more data, but it's not available, so quit */
	if (need_read_more) break;

  } /* for */

  /* Note: remains in buffer are handled by x_finish_body() - we can't */
  /* flush it now as csa_process_body() can be called again when there are */
  /* more data available */

  /* free tmp pool */
  ap_clear_pool(p->pool_tmp);

#ifdef CSA_DEBUG
   csa_debug(p->dbg, "process_body: end");
#endif
 
   return 0;
} /* csa_process_body */
