// QWeb - An SGML Web Browser
// Copyright (C) 1997  Sean Vyain
// svyain@mail.tds.net
// smvyain@softart.com
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#ifndef _SGMLLexer_h_
#define _SGMLLexer_h_

#include <qobject.h>
#include "Dtd.h"

//: The SgmlLexer class tokenizes a data stream from a Request.
//. The SgmlLexer accepts raw data from a request, and converts into SGML
//. tokens.  Each token is emitted one at a time, usually to an SgmlParser
//. or a DtdParser.  The SgmlLexer automatically destroys itself after the
//. last token has been emitted.
class SgmlLexer : public QObject {
public:
    enum Mode {
        CdataMode,
        RcdataMode,
        PcdataMode,
        MarkupMode
    };

    enum Token {
	NullToken,
        Ampersand, Any, Attlist,
        Cdata, Comma, Comment, Conref, Content, Current,
        DeclSubsetClose, DeclSubsetOpen, Doctype,
        Element, Empty, Endtag, EndTagOpen, Entities, Entity, EqualSign, ExclusionListStart,
        Fixed,
        GroupClose, GroupOpen,
        Id, Idref, Idrefs, Identifier, Ignore, Implied, Include, InclusionListStart, Integer,
        MarkupClose, MarkupDeclOpen, Md, MinusSign, Ms,
        Name, Names, Ndata, Nmtoken, Nmtokens, Notation, Number, Numbers, Nutoken, Nutokens,
        OptionalTag, OrOperator,
        Pcdata, Percent, Pi, PlusSign, ProcInstOpen, Public,
        QuestionMark,
        Rcdata, ReferenceClose, Required, ReservedNameIndicator,
        Sdata, Shortref, Simple, Star, Starttag, StartTagOpen, StringLiteral, Subdoc, System,
        Temp,
        Uselink, Usemap
    };

    struct Table {
        int         length;
        const char* text;
        Token       token;
    };
private:
    Q_OBJECT
    Dtd*    _dtd;
    char*   _buf;
    int     _bufSize;
    int     _bufStart;
    int     _bufEnd;
    char*   _token;
    int     _tokenSize;
    Mode    _mode;
    bool    _done;

    //. Emit the token, and move the input pointer to the next character.
    void doToken( SgmlLexer::Token token, int length );
public:
    //. Create a new SgmlLexer.
    SgmlLexer();

    //. Destroy the lexer.
    ~SgmlLexer();

    //. Set the DTD used for resolving parameter entity references.
    void dtd( Dtd* dtd ) { _dtd = dtd; }

    //. Return the current lexer mode.
    Mode mode();
public slots:
    //. Store the incoming text, and tokenize as much of it as we
    //. can.
    void data( const char* bytes, int end );

    //. Finish tokenizing the remaining data, and die.
    void endOfData();
    
    //. Sets the major mode for the lexer.  The mode can be CdataMode,
    //. RcdataMode, PcdataMode, or MarkupMode.  The tokens and delimiters
    //. are slightly different between modes.
    void mode( Mode m );
signals:
    //. This signal is emitted after the last token has been emitted.
    void done();

    //. This signal is emitted as each token is recognized.  The token is the
    //. type of token, and the text is the actual string that was recognized.
    void token( SgmlLexer::Token token, const char* text );
};

#endif
