package ca.ucalgary.seahawk.util;

import java.util.Map;
import java.util.LinkedHashMap;

/**
 *  Converts a large subset of basic latex markup into equivalent HTML
 */
public class TEX2HTML{
    private static Map<String,String> subs;

    static{
	subs = new LinkedHashMap<String,String>();  //maintains key insertion order
	subs.put("^\\%[^\\n]*", "");
        subs.put("([^\\\\])\\%[^\\n]*", "$1");
	subs.put("<", "&lt;");
	subs.put(">", "&gt;");
	subs.put("\\\\<", "&lt;");
	subs.put("\\\\>", "&gt;");
	subs.put("\\n\\n+", "<p>\n\n");
	subs.put("\\^\\{(.*?)\\}", "<sup>$1</sup>");
	subs.put("\\_\\{(.*?)\\}", "<sub>$1</sub>");
	subs.put("\\$\\^(.*?)\\$", "<sup>$1</sup>");
	subs.put("\\$\\_(.*?)\\$", "<sub>$1</sub>");
	subs.put("\\^(.)", "<sup>$1</sup>");
	//subs.put("([^\\\\])\\_(.)", "$1<sub>$2</sub>");
	subs.put("\\\\frac\\{(.*?)\\}\\{(.*?)\\}", "<sup>$1</sup>/<sub>$2</sub>");
	subs.put("\\\\cal\\s*\\{(.*?)\\}", "<b><i>$1</i></b>");
	subs.put("\\\\cal\\s*(\\S)", "<b><i>$1</i></b>");
	subs.put("\\\\documentclass\\s*(?:\\[.*?\\])?\\s*\\{.*?\\}", "");
	subs.put("\\\\usepackage\\s*(?:\\[.*?\\])?\\s*\\{.*?\\}", "");
	subs.put("\\\\pagestyle\\{.*?\\}", "");
	subs.put("\\\\pagenumbering\\{.*?\\}", "");
	subs.put("\\\\bibliographystyle\\{.*?\\}", "");
	subs.put("\\\\bibliography\\{.*?\\}", "<br><font color=\"#FF0000\"><i>[Bibliography omitted by Seahawk]</i></font><br>");
	subs.put("\\\\hyphenation\\{.*?\\}", "");
	subs.put("\\\\cite\\{(.*?)\\}", "[$1]");
	subs.put("\\\\citealp\\{(.*?)\\}", "$1");
	subs.put("\\\\begin\\{document\\}", "");
	subs.put("\\\\end\\{document\\}", "");
	subs.put("\\\\begin\\{enumerate\\}", "<ol>");
	subs.put("\\\\end\\{enumerate\\}", "</ol>");
	subs.put("\\\\begin\\{itemize\\}", "<ul>");
	subs.put("\\\\end\\{itemize\\}", "</ul>");
	subs.put("\\\\begin\\{description\\}", "<dl>");
	subs.put("\\\\end\\{description\\}", "</dl>");
	subs.put("\\\\item\\s*\\[([^\\]]*)\\]", "<dt>$1<dd>");
	subs.put("\\\\item", "<li>");
	subs.put("\\\\begin\\{quotation\\}", "<blockquote>");
	subs.put("\\\\end\\{quotation\\}", "</blockquote>");
	subs.put("\\\\begin\\{quote\\}", "<blockquote>");
	subs.put("\\\\end\\{quote\\}", "</blockquote>");
	subs.put("\\\\begin\\{verbatim\\}", "<pre>");
	subs.put("\\\\end\\{verbatim\\}", "</pre>");
	subs.put("\\\\begin\\{tabbing\\}", "<pre>");
	subs.put("\\\\end\\{tabbing\\}", "</pre>");
	subs.put("\\\\begin\\{verse\\}", "<pre>");
	subs.put("\\\\end\\{verse\\}", "</pre>");
	subs.put("([^\\\\])\\$\\$?(.*?[^\\\\])\\$\\$?", "$1<tt>$2</tt>");
	subs.put("\\\\\\$", "\\$");
	subs.put("\\\\&", "&amp;");
	subs.put("\\\\%", "%");
	subs.put("\\\\_", "_");
	subs.put("\\\\-", "");  // "hyphenation allowable" syntax
	subs.put("\\\\#", "#");
	subs.put("\\\\ ", " ");
	subs.put("\\\\'([AEIOUYaeiouy])\\s+", "&$1acute;");
	subs.put("\\\\\"([AEIOUaeiouy])\\s+", "&$1uml;");
	subs.put("\\\\`([AEIOUaeiou])\\s+", "&$1grave;");
	subs.put("\\\\~([ANOano])\\s+", "&$1tilde;");
	subs.put("\\\\^([AEIOUaeiou])\\s+", "&$1circ;");
	subs.put("\\\\,([Cc])\\s+", "&$1cedil;");
	subs.put("\\\\([aoAO][Ee])\\s+", "&$1lig;");
	subs.put("\\\\ss", "&szlig;");
	subs.put("\\\\aa", "&aring;");
	subs.put("\\\\AA", "&Aring;");
	subs.put("\\\\([Oo])\\s+", "&$1slash;");
	subs.put("\\\\l?dots", "...");
	subs.put("\\\\allowbreak\\s", "");
	subs.put("\\\\char126\\s", "~");
	subs.put("\\\\char92\\s", "|");
	subs.put("\\\\_", "_");
	subs.put("\\\\char95\\s", "_");
        subs.put("\\\\Alpha", "&#x391;");
        subs.put("\\\\Beta", "&#x392;");
        subs.put("\\\\Gamma", "&#x393;");
        subs.put("\\\\Delta", "&#x394;");
        subs.put("\\\\Epsilon", "&#x395;");
        subs.put("\\\\Zeta", "&#x396;");
        subs.put("\\\\Eta", "&#x397;");
        subs.put("\\\\Theta", "&#x398;");
        subs.put("\\\\Iota", "&#x399;");
        subs.put("\\\\Kappa", "&#x39A;");
        subs.put("\\\\Lambda", "&#x39B;");
        subs.put("\\\\Mu", "&#x39C;");
        subs.put("\\\\Nu", "&#x39D;");
        subs.put("\\\\Xi", "&#x39E;");
        subs.put("\\\\Omicron", "&#x39F;");
        subs.put("\\\\Pi", "&#x3A0;");
        subs.put("\\\\Rho", "&#x3A1;");
        subs.put("\\\\Sigma", "&#x3A3;");
        subs.put("\\\\Tau", "&#x3A4;");
        subs.put("\\\\Upsilon", "&#x3A5;");
        subs.put("\\\\Phi", "&#x3A6;");
        subs.put("\\\\Chi", "&#x3A7;");
        subs.put("\\\\Psi", "&#x3A8;");
        subs.put("\\\\Omega", "&#x3A9;");
        subs.put("\\\\alpha", "&#x3B1;");
        subs.put("\\\\beta", "&#x3B2;");
        subs.put("\\\\gamma", "&#x3B3;");
        subs.put("\\\\delta", "&#x3B4;");
        subs.put("\\\\epsilon", "&#x3B5;");
        subs.put("\\\\zeta", "&#x3B6;");
        subs.put("\\\\eta", "&#x3B7;");
        subs.put("\\\\theta", "&#x3B8;");
        subs.put("\\\\iota", "&#x3B9;");
        subs.put("\\\\kappa", "&#x3BA;");
        subs.put("\\\\lambda", "&#x3BB;");
        subs.put("\\\\mu", "&#x3BC;");
        subs.put("\\\\nu", "&#x3BD;");
        subs.put("\\\\xi", "&#x3BE;");
        subs.put("\\\\omicron", "&#x3BF;");
        subs.put("\\\\pi", "&#x3C0;");
        subs.put("\\\\rho", "&#x3C1;");
        subs.put("\\\\sigma", "&#x3C3;");
        subs.put("\\\\tau", "&#x3C4;");
        subs.put("\\\\upsilon", "&#x3C5;");
        subs.put("\\\\phi", "&#x3C6;");
        subs.put("\\\\chi", "&#x3C7;");
        subs.put("\\\\psi", "&#x3C8;");
        subs.put("\\\\omega", "&#x3C9;");
	subs.put("\\\\rightarrow", "&#8594;");
	subs.put("\\\\infty", "&#8734;");
	subs.put("\\\\forall", "&#8704;");
	subs.put("\\\\exists", "&#8707;");
	subs.put("\\\\in\\s", "&#8712; ");
	subs.put("\\\\prod", "&#8719;");
	subs.put("\\\\sum", "&#8721;");
	subs.put("\\\\cap\\s", "&#8745; ");
	subs.put("\\\\cup", "&#8746;");
	subs.put("\\\\int", "&#8747;");
	subs.put("\\\\left", "(");
	subs.put("\\\\right", ")");
	subs.put("\\\\mathbb\\{R\\}", "&#8476;");
	subs.put("\\\\therefore", "&#8756;");
	subs.put("\\\\lim", "lim");
	subs.put("\\\\leq?", "&#8804;");
	subs.put("\\\\geq?", "&#8805;");
	subs.put("\\\\neq?\\s", "&#8800; ");
	subs.put("\\\\equiv", "&#8801;");
	subs.put("\\\\subseteq", "&#8838;");
	subs.put("\\\\supseteq", "&#8839;");
	subs.put("\\\\subset", "&#8834;");
	subs.put("\\\\supset", "&#8835;");
	subs.put("\\\\neg\\s", "! ");
	subs.put("\\\\char36\\s", "\\$");
	subs.put("\\\\text\\{(.*?)\\}", "$1");
	subs.put("([^\\\\])\\~", "$1&nbsp;");
	subs.put("^\\~", "&nbsp;");
	subs.put("\\\\\\\\", "<br>\n");
	subs.put("\\\\linespread\\s*\\{.*?\\}", "");
	subs.put("\\\\centering\\s*", "");
	subs.put("\\\"", "&quot;");
	subs.put("``", "&quot;");
	subs.put("''", "&quot;");
	subs.put("---", "&mdash;");
	subs.put("--", "&ndash;");
	subs.put("\\\\verb(.)(.*?)\\1", "<tt>$2</tt>");
	subs.put("\\\\texttt\\{(.*?)\\}", "<tt>$1</tt>");
	subs.put("\\\\textbf\\{(.*?)\\}", "<b>$1</b>");
	subs.put("\\\\textit\\{(.*?)\\}", "<i>$1</i>");
	subs.put("\\\\emph\\{(.*?)\\}", "<em>$1</em>");
	subs.put("\\s*\\{\\\\em\\s*([^}]*)\\}", " <em>$1</em>");
	subs.put("\\s*\\{\\\\bf\\s*([^}]*)\\}", " <b>$1</b>");
	subs.put("\\s*\\{\\\\it\\s*([^}]*)\\}", " <i>$1</i>");
	subs.put("\\s*\\{\\\\tt\\s*([^}]*)\\}", " <tt>$1</tt>");
	subs.put("\\s*\\{\\\\rm\\s*([^}]*)\\}", " <font face=\"serif\">$1</font>");
	subs.put("\\\\begin\\{em\\}", "<em>");
	subs.put("\\\\end\\{em\\}", "</em>");
	subs.put("\\\\begin\\{bf\\}", "<b>");
	subs.put("\\\\end\\{bf\\}", "</b>");
	subs.put("\\\\begin\\{it\\}", "<i>");
	subs.put("\\\\end\\{it\\}", "</i>");
	subs.put("\\\\begin\\{tt\\}", "<tt>");
	subs.put("\\\\end\\{tt\\}", "</tt>");
	subs.put("\\\\\\/", "/");
	//subs.put("\\\\\\\\", "<br>\n");
	subs.put("\\\\clearpage", "<br>\n");
	subs.put("\\\\noindent", "");
	subs.put("\\\\begin\\{singlespace\\}", "");
	subs.put("\\\\end\\{singlespace\\}", "");
	subs.put("\\\\begin\\{doublespace\\}", "");
	subs.put("\\\\end\\{doublespace\\}", "");
	subs.put("\\\\bigskip(?:\\s|\n)", "<br><br><br>");
	subs.put("\\\\medskip(?:\\s|\n)", "<br><br>");
	subs.put("\\\\section\\s*\\{([^}]*)\\}", "<h1>$1</h1>");
	subs.put("\\\\subsection\\s*\\{([^}]*)\\}", "<h2>$1</h2>");
	subs.put("\\\\subsubsection\\s*\\{([^}]*)\\}", "<h3>$1</h3>");
	subs.put("\\\\(?:alt)?chapter\\s*(?:\\[.*?\\])?\\s*\\{([^}]*)\\}", "<hr><h1>$1</h1>");
	subs.put("(http://[^ \\t\\r\\n<)\"]+)", "<a href=\"$1\">$1</a>");
	subs.put("\\\\\\{", "{");
	subs.put("\\\\\\}", "}");
	subs.put("\\\\E", "<b>E</b>");
	subs.put("\\\\P", "<b>P</b>");
	subs.put("([Pp])\\>\\s*\\<[Pp]\\>", "$1>");
    }

    public static String convert(String texData) throws Exception {
	String htmlData = texData;
	
	// TO DO: process includes?

	// Do all the substitutions of tex commands for HTML equivalents
	for(String texPattern: subs.keySet()){
	    htmlData = htmlData.replaceAll(texPattern, subs.get(texPattern));
	}
	
	if(htmlData.indexOf("\\maketitle") != -1){
	    String titleText = "";
	    if(htmlData.indexOf("\\title") != -1){
		int titleTagStart = htmlData.indexOf("\\title");
		int titleStart = htmlData.indexOf("{", titleTagStart)+1;
		int titleEnd = titleStart;
		int curlyCount = 1;
		while(curlyCount > 0){
		    titleEnd++;
		    if(htmlData.charAt(titleEnd) == '}'){
			curlyCount--;
		    }
		    else if(htmlData.charAt(titleEnd) == '{'){
			curlyCount++;
		    }
		}
		titleText = htmlData.substring(titleStart, titleEnd);
		htmlData = htmlData.substring(0, titleTagStart)+htmlData.substring(titleEnd+1);
	    }
	    String authorsText = "";
	    if(htmlData.indexOf("\\author") != -1){
		int authorTagStart = htmlData.indexOf("\\author");
		int authorStart = htmlData.indexOf("{", authorTagStart)+1;
		int authorEnd = authorStart;
		int curlyCount = 1;
		while(curlyCount > 0){
		    authorEnd++;
		    if(htmlData.charAt(authorEnd) == '}'){
			curlyCount--;
		    }
		    else if(htmlData.charAt(authorEnd) == '{'){
			curlyCount++;
		    }
		}
		authorsText = htmlData.substring(authorStart, authorEnd).trim();
		authorsText = authorsText.replaceAll("([^,])\\s+", "$1&nbsp;");
		authorsText = authorsText.replaceAll("([^\\\\]),", "$1<br>");
		authorsText = authorsText.replaceAll("\\\\,", ",");
		htmlData = htmlData.substring(0, authorTagStart)+htmlData.substring(authorEnd+1);
	    }
	    htmlData = htmlData.replace("\\maketitle", "<H1>"+titleText+"</H1><H2>"+authorsText+"</H2>");
	}


	while(htmlData.indexOf("{\\sc") != -1){
	    int smallCaseStart = htmlData.indexOf("{\\sc")+4;
	    int smallCaseEnd = htmlData.indexOf("}", smallCaseStart);
	    String smallCaseText = htmlData.substring(smallCaseStart, smallCaseEnd);
	    htmlData = htmlData.replace("\\{\\sc"+smallCaseText+"\\}", 
					"<font size=\"-1\">"+smallCaseText.toUpperCase()+"</font>");
	}

	int tableCount = 1;
	Map<String,Integer> labels = new LinkedHashMap<String,Integer>();
	for(int tableStart = htmlData.indexOf("\\begin{table}");
	    tableStart != -1; 
	    tableStart = htmlData.indexOf("\\begin{table}")){

	    int tableEnd = htmlData.indexOf("\\end{table}", tableStart);
	    String tableContents = htmlData.substring(tableStart+13, tableEnd);
	    tableContents = tableContents.replaceFirst("^\\[.*?\\]", "");  //placement info
	    tableContents = tableContents.replaceAll("\\\\caption\\{(.*?)}", "</td></tr><tr><td>Table "+tableCount+": $1");
	    if(tableContents.indexOf("\\label{") != -1){
		int labelStart = tableContents.indexOf("\\label{")+7;
		int labelEnd = tableContents.indexOf("}", labelStart);
		String labelText = tableContents.substring(labelStart, labelEnd);
		tableContents = tableContents.replace("\\label{"+labelText+"}", "<a name=\"ref-"+labelText+"\"></a>");
		labels.put(labelText, tableCount++);
	    }
	    tableContents = tableContents.replace("<p>", "");
	    htmlData = htmlData.substring(0, tableStart)+"<table width=\"100%\" border=\"1\"><tr><td>"+
		tableContents+"</td></tr></table>"+htmlData.substring(tableEnd+11);
	}

	int figureCount = 1;
	for(int figureStart = htmlData.indexOf("\\begin{figure}");
	    figureStart != -1; 
	    figureStart = htmlData.indexOf("\\begin{figure}")){

	    int figureEnd = htmlData.indexOf("\\end{figure}", figureStart);
	    String figureContents = htmlData.substring(figureStart+14, figureEnd);
	    figureContents = figureContents.replaceFirst("^\\[.*?\\]", "");  //placement info
	    figureContents = figureContents.replaceAll("\\\\caption\\{(.*?)}", "</td></tr><tr><td>Figure "+figureCount+": $1");
	    if(figureContents.indexOf("\\label{") != -1){
		int labelStart = figureContents.indexOf("\\label{")+7;
		int labelEnd = figureContents.indexOf("}", labelStart);
		String labelText = figureContents.substring(labelStart, labelEnd);
		figureContents = figureContents.replace("\\label{"+labelText+"}", "<a name=\"ref-"+labelText+"\"></a>");
		labels.put(labelText, figureCount++);
	    }
	    figureContents = figureContents.replace("<p>", "");
	    htmlData = htmlData.substring(0, figureStart)+"<table width=\"100%\" border=\"1\"><tr><td>"+
		figureContents+"</td></tr></table>"+htmlData.substring(figureEnd+12);
	}

	// TO DO: process toc, list figure, table commands?

	// Do some procedural substitutions (reference and footnotes) that require counting
	int refCount = 1;
	while(htmlData.indexOf("\\label{") != -1){
	    int labelStart = htmlData.indexOf("\\label{")+7;
	    int labelEnd = htmlData.indexOf("}", labelStart);
	    String labelText = htmlData.substring(labelStart, labelEnd);
            htmlData = htmlData.replace("\\label{"+labelText+"}", "<a name=\"ref-"+labelText+"\"></a>");
	    labels.put(labelText, refCount++);
	}
	for(String label: labels.keySet()){
	    while(htmlData.indexOf("\\ref{"+label+"}") != -1){
		htmlData = htmlData.replace("\\ref{"+label+"}", "<a href=\"#ref-"+label+"\">"+labels.get(label)+"</a>");
	    }
	}

	int footNoteNumber = 1;
	String footNotes = "";
	while(htmlData.indexOf("\\footnote{") != -1){
	    int footNoteStart = htmlData.indexOf("\\footnote{")+10;
	    int footNoteEnd = htmlData.indexOf("}", footNoteStart);
	    String footNoteText = htmlData.substring(footNoteStart, footNoteEnd);
	    htmlData = htmlData.replace("\\footnote{"+footNoteText+"}", 
					"<a href=\"#footnote-"+footNoteNumber+"\"><sup>"+footNoteNumber+"</sup></a>");
	    footNotes += "<a name=\"footnote-"+footNoteNumber+"\"></a><li>"+footNoteText+"</li>\n";
	    footNoteNumber++;
	}
	if(footNotes.length() > 0) { 
	    footNotes = "<hr>\n<h1>Footnotes</h1><ol>\n" + footNotes + "\n</ol>\n";
	}
	return "<html><body>\n"+ htmlData + footNotes +"</body></html>\n";
    }

}
