#! /usr/bin/env python

# Copyright (c) 2008, PediaPress GmbH
# See README.txt for additional licensing information.
"""
Generate DocBook from the DOM tree generated by the parser.

Currently this is just a proof of concept which is very incomplete

see also: 
http://www.docbook.org/tdg/en/html/docbook.html
http://www.docbook.org/tdg5/en/html/ch02.html

basic convertion to other formats using jade works:
docbook2pdf -l /usr/share/sgml/declaration/xml.dcl -e no-valid  t.xml  

"""

import sys
import StringIO
from lxml import etree
ET = etree
#DTD = etree.DTD(fn)


"""
note: lxml.etree.DTD was not very helpful at construction time
as it validates the unfinished subtree (which fails).

we extracted our own minimal grammar using a script (working on the relaxedNG 
docbook content model description) in tools/ .
This helps us to do basic checks on allowed children and attributes.

We have an exception for <math> (using the MathML NS).


IF YOU ADD NEW DOCBOOK ELEMENTS TO THE WRITER YOU NEED TO REGENERATE THIS MODULE.
"""
from mwlib.docbook44grammar import grammar # REGENERATE IF ADDING ELEMENTS


from mwlib import parser
from mwlib import advtree
#from mwlib import xmltreecleaner
from mwlib import writerbase
from mwlib.treecleaner import TreeCleaner
from mwlib.xhtmlwriter import showNode
from mwlib.xhtmlwriter import indent
from mwlib.xhtmlwriter import validate
from mwlib.xhtmlwriter import setVList
from mwlib.log import Log

version = "0.2"
"""
v0.1:
 * initial implementation
v0.2: 
 * switched to content model validation on coinstruction time
 * added mathml support

"""


log = Log("docbookwriter")

'''
# fix iselement in ET
def _iselement(element):
    # FIXME: not sure about this; might be a better idea to look
    # for tag/attrib/text attributes
    if None in element.attrib.values():
        raise TypeError("None not allowed in %r %r" %(element, element.attrib)) # enforce valid values for attributes
    return isinstance(element, ET._ElementInterface) or hasattr(element, "tag")
ET.iselement = _iselement
'''



# -------- customize lxml.etree ------------------
#     http://codespeak.net/lxml/element_classes.html
class XElement(etree.ElementBase):
    "extend to support writeto"
    wtlookup = {} # shared between all, if ever initiated
    def setwriteto(self, other = None):
        self.wtlookup[self] = other
    def getwriteto(self):
        r =  self.wtlookup.get(self,  None)
        return r
    writeto = property(getwriteto, setwriteto)


MODULE_PARSER = etree.XMLParser()
MODULE_PARSER.set_element_class_lookup(
    etree.ElementDefaultClassLookup(element=XElement)
    )
Element = MODULE_PARSER.makeelement
SubElement = etree.SubElement

# validation helpers ---------------------------------
class InvalidChildElement(Exception):
    pass

class InvalidAttribute(Exception):
    pass


# writer ---------------------------------------------

class SkipChildren(object):
    "if returned by the writer no children are processed"
    def __init__(self, element=None):
        self.element = element


class DocBookWriter(object):
    ignoreUnknownNodes = True
    namedLinkCount = 1
    # stylesheet which uses the mozilla Extensible Binding Language
    # http://www.informatik.fh-wiesbaden.de/~werntges/home_t/proj/dbkcss102/wysiwygdocbook.xml
    # http://www.informatik.fh-wiesbaden.de/~werntges/home_t/proj/dbkcss102/wysiwygdocbook1.02.zip
    css = '<?xml-stylesheet href="http://www.informatik.fh-wiesbaden.de/~werntges/home_t/proj/dbkcss102/wysiwygdocbook1.02/driver.css" type="text/css"?>\n'
    header='''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE %s PUBLIC "-//OASIS//DTD DocBook EBNF Module V1.1CR1//EN"
               "http://www.oasis-open.org/docbook/xml/mathml/1.1CR1/dbmathml.dtd">\n%s'''

    def __init__(self, env=None, status_callback=None, documenttype="article", language="en", imagesrcresolver=None, debug=False):
        assert documenttype in ("article", "book")
        self.documenttype = documenttype
        self.environment = env
        self.status_callback = status_callback
        self.language = language
        self.imagesrcresolver = imagesrcresolver # e.g. "http://anyhost/redir?img=IMAGENAME" where IMAGENAME is substituted
        self.debug = debug
        self.references = []
        self.root = None
        self.errors = []
        self.languagelinks = []
        self.categorylinks = []
        
    def getHeader(self):
        return self.header % (self.documenttype, self.css)
        
    def getTree(self, debuginfo=""):
        indent(self.root) 
        if self.debug:
            r = validate(self.header + ET.tostring(self.root))
            if r:
                self.root.append(ET.Comment(r.replace("--", " - - ")))
        return self.root
    
    def asstring(self):
        return self.getHeader() + ET.tostring(self.getTree())

    
    def writeText(self, obj, parent):
        if parent.getchildren(): # add to tail of last tag
            t = parent.getchildren()[-1]
            if not t.tail:
                t.tail = obj.caption
            else:
                t.tail += obj.caption
        else:
            if not parent.text:
                parent.text = obj.caption
            else:
                parent.text += obj.caption

    def writedebug(self, obj, parent, comment=""):
        if not self.debug or parent is None:
            return 
        attrs = obj.__dict__.keys()
        stuff =  ["%s : %r" %(k,getattr(obj,k)) for k in attrs if 
                  (not k in ("_parentref", "children")) and getattr(obj,k)
                  ]
        text = obj.__class__.__name__  + repr(stuff) 
        if comment:
            text += "\n" + comment
        parent.append(ET.Comment(text.replace("--", " - - "))) # FIXME (hot fix)


    def writeparsetree(self, tree):
        out = StringIO.StringIO()
        parser.show(out, tree)
        self.root.append(ET.Comment(out.getvalue().replace("--", " - - ")))


    def write(self, obj, parent=None):
        assert parent is not None
        '''
        def saveAddChild(p,c):
            try:
                p.addElement(c)
                assert c.parentNode is not None
                return True
            except odf.element.IllegalChild:
                log("write:", c.type, "not allowed in ", p.type, ", dumping")
                return False
        '''
        def saveAddChild(p,c):
            #print p, p.tag, c, c.tag
            assert p is not None

            # check if subelement is allowed in parent
            assert c.tag in grammar
            exceptions = ("math")
            if not c.tag in grammar[p.tag]["children"] and c.tag not in exceptions:
                log("write:", c.tag, "not allowed in ", p.tag, " (%r)" % grammar[p.tag]["children"], ", dumping")
                return False

            # check for attributes
            vas = grammar[c.tag]["attributes"]
            for k in c.keys():
                if not k in vas and c.tag not in exceptions:
                    log("write: attrib",k, "not allowed in ", c.tag, " (%r)"%vas, ", failing")
                    return False
            p.append(c)
            return True

        
        while parent.writeto is not None:
            parent = parent.writeto # SPECIAL HANDLING 

        # if its text, append to last node
        if isinstance(obj, parser.Text):
            self.writeText(obj, parent)
        else:
            # check for method
            m = "dbwrite" + obj.__class__.__name__
            m=getattr(self, m, None)
            
            if m: # find handler
                e = m(obj)
            elif self.ignoreUnknownNodes:
                log("SKIPPED unknonw node %r" % obj)
                e = None
            else:
                raise Exception("unknown node:%r" % obj)
            
            if isinstance(e, SkipChildren): # do not process children of this node
                if e.element is not None:
                    saveAddChild(parent, e.element)
                return # skip
            elif e is None:
                pass # do nothing
                e = parent
            else:
                if not saveAddChild(parent, e):
                    return # 

            for c in obj.children[:]:
                ce = self.write(c,e)

            

    def writeChildren(self, obj, parent): # use this to avoid bugs!
        "writes only the children of a node"
        for c in obj:
            self.write(c, parent)

    '''
    def write(self, obj, parent=None):
        """
        translates a parse tree object to element tree XML Element
        returns an element, which the caller has to add (or not)
        text is directly added to the parents last child or the parent itself.
        """
        # if its text, append to last node
        if isinstance(obj, parser.Text):
            self.writeText(obj, parent)
        else:
            self.writedebug(obj, parent)
            # check for method
            m = "dbwrite" + obj.__class__.__name__
            m=getattr(self, m, None)
            
            if m: # find handler
                e = m(obj)
            elif self.ignoreUnknownNodes:
                self.writedebug(obj, parent, "was skipped")
                log("SKIPPED")
                showNode(obj)
                e = None
            else:
                raise Exception("unknown node:%r" % obj)
            

            if isinstance(e, SkipChildren): # do not process children of this node
                return e.element
            elif e is None:
                e = parent

            p = e
            if hasattr(e, "writeto"):
                p = e.writeto # SPECIAL HANDLING 
            
            for c in obj.children[:]:
                ce = self.write(c,p)
                if ce is not None and ce is not p:                    
                    p.append(ce)
            return e




    def writeChildren(self, obj, parent): # use this to avoid bugs!
        "writes only the children of a node"
        for c in obj:                    
            res = self.write(c, parent)
            if res is not None and res is not parent:
                parent.append(res)
    '''

    def writeBook(self, book, output=None):
        """
        use this method to pass a parseTree
        will initialize the root node
        """
        
        if isinstance(book, advtree.Book):
            r = self.dbwriteBook(book)
        else:
            assert isinstance(book, advtree.Article)
            r = self.dbwriteArticle(book)
        self.writeChildren(book, r)
        if output:
            open(output, "w").write(self.asstring())
    writeTest = writeBook

    def dbwriteBook(self, obj):
        e = Element("book")
        if self.root is None:
            self.root = e
        if self.environment and self.environment.metabook.get('title'):
            h = SubElement(e,"bookinfo")
            t = SubElement(h, "title")
            t.text = self.environment.metabook['title']
        return e


    def dbwriteArticle(self, a):
        """
        this generates the root element if not available
        """
        # add head + title
        e = Element("article", lang=self.language)
        if self.root is None:
            self.root = e
        h = SubElement(e,"articleinfo")
        t = SubElement(h, "title")
        if a.caption:
            t.text = a.caption
        
        # add a section and heading for this article 
        s = SubElement(e, "section")
        si = SubElement(s, "sectioninfo")
        h = SubElement(si, "title")
        h.text = a.caption
        e.writeto = s
        return e 


    def dbwriteNode(self, obj):
        pass

    def dbwriteBreakingReturn(self, obj):
        e = Element("literallayout")
        e.text = "\n"
        return e
        


    def dbwriteChapter(self, obj):
        e = Element("chapter")
        h = SubElement(e,"chapterinfo")
        t = SubElement(h, "title")
        if obj.caption:
            t.text = obj.caption
        return e


    def dbwriteSection(self, obj):
        e = Element("section")
        si = SubElement(e, "sectioninfo")
        h = SubElement(si,"title")
        self.write(obj.children[0], h)
        obj.children = obj.children[1:]
        return e

    def dbwritePreFormatted(self, n):
        return Element("programlisting")

    def dbwriteParagraph(self, obj):
        return Element("para")

    def dbwriteEmphasized(self, obj):
        return Element("emphasis")
    
    dbwriteStrong = dbwriteEmphasized
    

    
    def dbwriteBlockquote(self, s):
        "margin to the left & right"
        p = Element("blockquote")
        p.writeto = SubElement(p, "para")
        return p

    dbwriteIndented = dbwriteBlockquote 


    def dbwriteItem(self, item):
        p = Element("listitem")
        p.writeto = SubElement(p, "para")
        return p


    def dbwriteItemList(self, lst):
        if lst.numbered:
            return Element("orderedlist", numeration="arabic")
        else:
            return Element("itemizedlist")


    def dbwriteDefinitionList(self, obj):
        """
        <variablelist>
          <varlistentry>
           <term>x</term>
           <listitem><para>y</para></listitem>
          </varlistentry>
        </variablelist>
        """  
        e = Element("variablelist")
        
        for i,o in enumerate(obj.children):  # group every two children into a varlistentry
            if i%2 == 1:
                return 
            if isinstance(o, advtree.DefinitionTerm) and isinstance(o.next, advtree.DefinitionDescription):
                ve = Element("varlistentry")
                e.append(ve)
                self.write(o, ve)
                self.write(o.next, ve)
            else:
                log("broken %r %r" % (obj, obj.children))
        return SkipChildren(e)

    def dbwriteDefinitionTerm(self, obj):
        return  Element("term") 

    def dbwriteDefinitionDescription(self, obj):
        p = Element("listitem") # FIXME
        p.writeto = SubElement(p, "para")
        return p

    

    def dbwriteTable(self, t):           
        """
        rowspan & colspan are supported
        nested tables not supported in DocBook V4.4
        """

        table = Element("informaltable")
        setVList(table, t)           
        if t.caption:
            #c = SubElement(table, "caption")
            #self.writeText(t.caption, c)
            pass
        return table


    def dbwriteCell(self, cell):
        td = Element("td")
        setVList(td, cell)           
        return td
            
    def dbwriteRow(self, row):
        return Element("tr")

    def dbwriteCite(self, obj):
        return Element("quote")

    def dbwriteSup(self, obj):
        return Element("superscript")

    def dbwriteSub(self, obj):
        return Element("subscript")
    
 
    def dbwriteCode(self, n):
        return Element("programlisting", format="linespecific")

    dbwriteSource = dbwriteCode

    def dbwriteMath(self, obj): 
        r = writerbase.renderMath(obj.caption, output_mode='mathml', render_engine='blahtexml')        
        if not r:
            r = Element("phrase", role="texmath")
            r.text = obj.caption
            return r

        def _withETElement(e, parent):
            # translate to lxml.Elements
            for c in e.getchildren():
                #n = math.Element(qname=(math.MATHNS, str(c.tag)))
                n = Element(str(c.tag))
                parent.append(n)
                if c.text:
                    n.text = c.text
                _withETElement(c, n)

        m = Element("math", xmlns="http://www.w3.org/1998/Math/MathML")
        _withETElement(r,m)
        return m


    def dbwriteImageLink(self, obj): 
        if not obj.target:
            return 

        if obj.isInline():
            e = Element("inlinemediaobject")
        else:
            e = Element("mediaobject")
            #e.set("float", str(int(bool(obj.caption or obj.align)))) # FLOAT

        t = SubElement(e, "imageobject")
        e.writeto = SubElement(SubElement(e, "caption"), "para")

        imgsrc = None
        if self.imagesrcresolver:
            # use a resolver which redirects to the real image
            # e.g. "http://anyhost/redir?img=IMAGENAME"
            imgsrc = self.imagesrcresolver.replace("IMAGENAME", obj.target)
        elif self.environment and self.environment.images:
            imgsrc = self.environment.images.getURL(obj.target, obj.width or None)

        if imgsrc is None:
            imgsrc = obj.target

        img = SubElement(t, "imagedata", fileref=imgsrc, scalefit="1")
        if obj.width:
            img.set("contentwidth", "%dpx" % obj.width)
            img.set("width", "%dpx" % obj.width)
        if obj.height:
            img.set("contentdepth", "%dpx" % obj.height)
            img.set("depth", "%dpx" % obj.height)

        return e 



    # Links ---------------------------------------------------------

    def dbwriteLink(self, obj): 
        a = Element("ulink")
        if obj.target:
            a.set("url", obj.url or "#")
        if not obj.children:
            a.text = obj.target
        return a

    dbwriteArticleLink = dbwriteLink 
    dbwriteLangLink = dbwriteLink # FIXME
    dbwriteNamespaceLink = dbwriteLink# FIXME
    dbwriteInterwikiLink = dbwriteLink# FIXME
    dbwriteSpecialLink = dbwriteLink# FIXME

    def dbwriteURL(self, obj):
        a = Element("ulink", url=obj.caption)
        if not obj.children:
            a.text = obj.caption
        return a

    def dbwriteNamedURL(self, obj):
        a = Element("ulink", url=obj.caption)
        if not obj.children:
            name = "[%s]" % self.namedLinkCount
            self.namedLinkCount += 1
            a.text = name
        return a

    def dbwriteSpecialLink(self, obj): # whats that?
        a = Element("ulink", url=obj.target)
        if not obj.children:
            a.text = obj.target
        return a

    def dbwriteCategoryLink(self, obj):
        if not obj.colon and not obj.children:
            pass # FIXME
        a = Element("ulink", url=obj.target)
        a.text = obj.target
        return a


    def dbwriteLangLink(self, obj): # FIXME no valid url (but uri)
        if obj.target is None:
            return
        a = Element("ulink", url=obj.target)
        if not obj.children:
            a.text = obj.target
        return a


    def dbwriteImageMap(self, obj): # FIXME!
        if obj.imagemap.imagelink:
            return self.write(obj.imagemap.imagelink)


    def dbwriteGallery(self, obj):
        s = Element("para")
        #setVList(s, obj)
        return s


# ------------------------------------------------------------------------------

    def dbwriteDiv(self, obj):
        return Element("para") # FIXME

    def dbwriteSpan(self, obj):
        pass # FIXME

    def dbwriteHorizontalRule(self, obj):
        pass # There is no equivalent in docbook


    def dbwriteReference(self, t): # FIXME USE DOCBOOK FEATURES (needs parser support)
        self.references.append(t)
        t =  Element("superscript")
        t.text = u"[%d]" %  len(self.references)
#        self.references.append(t)
#       t =  Element("citation")
#        SubElement("xref", linked="ref-%d" % len(self.references), endterm="%d" % len(self.references))
        return SkipChildren(t)

    def dbwriteReferenceList(self, t): # FIXME USE DOCBOOK FEATURES
        if not self.references:
            return
        ol =  Element("orderedlist", numeration="arabic")
        for i,ref in enumerate(self.references):
            li = SubElement(ol, "listitem")
            p = SubElement(li, "para")
            self.writeChildren(ref, parent=p)
        self.references = []            
        return ol








# ----------------------------------- old xhtml writer stuff --------------


    # Special Objects


    def xwriteTimeline(self, obj): 
        s = Element("object")
        s.set("class", "mwx.timeline")
        s.set("type", "application/mediawiki-timeline")
        s.set("src", "data:text/plain;charset=utf-8,%s" % obj.caption)
        em = SubElement(s, "em")
        em.set("class", "mwx.timeline.alternate")
        em.text = u"Timeline"
        return s

    def xwriteHiero(self, obj): # FIXME parser support
        s = Element("object")
        s.set("class", "mwx.hiero")
        s.set("type", "application/mediawiki-hiero")
        s.set("src", "data:text/plain;charset=utf-8,%s" % obj.caption)
        em = SubElement(s, "em")
        em.set("class", "mwx.hiero.alternate")
        em.text = u"Hiero"
        return s



    # others: Index, Gallery, ImageMap  FIXME
    # see http://meta.wikimedia.org/wiki/Help:HTML_in_wikitext

    # ------- TAG nodes (deprecated) ----------------

    def xwriteOverline(self, s):
        e = Element("span")
        e.set("class", "mwx.style.overline")
        return e    

    def xwriteUnderline(self, s):
        e = Element("span")
        e.set("class", "mwx.style.underline")
        return e
    
    def xwriteCenter(self, s):
        e = Element("span")
        e.set("class", "mwx.style.center")
        return e

    def xwriteStrike(self, s):
        e = Element("span")
        e.set("class", "mwx.style.strike")
        return e

        
    def xwriteNode(self, n):
        pass # simply write children






def preprocess(root):
    advtree.buildAdvancedTree(root)
    tc = TreeCleaner(root)
    tc.cleanAll()


# - func  ---------------------------------------------------


def writer(env, output, status_callback):
    book = writerbase.build_book(env, status_callback=status_callback, progress_range=(10, 60))
    scb = lambda status, progress :  status_callback is not None and status_callback(status,progress)
    scb(status='preprocessing', progress=70)
    for c in book.children:
        preprocess(c)
    scb(status='rendering', progress=80)
    DocBookWriter(env, status_callback=scb, documenttype="book").writeBook(book, output=output)

writer.description = 'DocBook XML'
writer.content_type = 'text/xml'
writer.file_extension = 'xml'


def main():
    for fn in sys.argv[1:]:
        from mwlib.dummydb import DummyDB
        from mwlib.uparser import parseString
        db = DummyDB()
        input = unicode(open(fn).read(), 'utf8')
        r = parseString(title=fn, raw=input, wikidb=db)
        parser.show(sys.stdout, r)
        preprocess(r)
        parser.show(sys.stdout, r)
        dbw = DocBookWriter()
        dbw.writeBook(r)
        nf = open("%s.xml" % fn, "w")
        nf.write(dbw.asstring())
        
 
if __name__=="__main__":
    main()
