/*
 * Decompiled with CFR 0.152.
 */
package org.embl.ebi.escience.scuflui.workbench;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.StringTokenizer;
import java.util.Vector;
import javax.swing.tree.DefaultMutableTreeNode;
import javax.swing.tree.DefaultTreeModel;
import org.embl.ebi.escience.scuflui.workbench.Scavenger;
import org.embl.ebi.escience.scuflui.workbench.ScavengerCreationException;
import org.embl.ebi.escience.scuflworkers.talisman.TalismanProcessorFactory;
import org.embl.ebi.escience.scuflworkers.workflow.WorkflowScavenger;
import org.embl.ebi.escience.scuflworkers.wsdl.WSDLBasedScavenger;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.input.SAXBuilder;

public class WebScavenger
extends Scavenger {
    public static final String DISALLOW = "Disallow:";
    private DefaultTreeModel treeModel = null;
    private DefaultMutableTreeNode progressDisplayNode = new DefaultMutableTreeNode("Searching...");

    public WebScavenger(String initialURL, DefaultTreeModel model) throws ScavengerCreationException {
        super("Web crawl @ " + initialURL);
        this.treeModel = model;
        this.add(this.progressDisplayNode);
        final String theURL = initialURL;
        Thread urlThread = new Thread(){

            public void run() {
                try {
                    System.out.println("Created new thread...");
                    WebScavenger.this.getXScuflURLs(theURL);
                    WebScavenger.this.remove(WebScavenger.this.progressDisplayNode);
                    WebScavenger.this.treeModel.nodeStructureChanged(WebScavenger.this);
                    System.out.println("Done searching.");
                }
                catch (ScavengerCreationException scavengerCreationException) {
                    // empty catch block
                }
            }
        };
        urlThread.start();
    }

    void getXScuflURLs(String initialURL) throws ScavengerCreationException {
        String[] allURLs;
        try {
            allURLs = this.search(initialURL);
        }
        catch (MalformedURLException mue) {
            throw new ScavengerCreationException("Cannot crawl from an invalid URL");
        }
        SAXBuilder sb = new SAXBuilder(false);
        for (int i = 0; i < allURLs.length; ++i) {
            try {
                if (allURLs[i].toLowerCase().endsWith("wsdl")) {
                    try {
                        this.progressDisplayNode.setUserObject("Parsing WSDL at : " + allURLs[i]);
                        this.treeModel.nodeChanged(this.progressDisplayNode);
                        this.add(new WSDLBasedScavenger(allURLs[i]));
                    }
                    catch (ScavengerCreationException sce) {}
                    continue;
                }
                this.progressDisplayNode.setUserObject("Reading : " + allURLs[i]);
                this.treeModel.nodeChanged(this.progressDisplayNode);
                Document doc = sb.build((Reader)new InputStreamReader(new URL(allURLs[i]).openStream()));
                Element root = doc.getRootElement();
                if (root.getName().equals("scufl")) {
                    this.add(new WorkflowScavenger(allURLs[i]));
                    continue;
                }
                if (!root.getName().equals("tscript")) continue;
                TalismanProcessorFactory tpf = new TalismanProcessorFactory(allURLs[i]);
                this.add(new DefaultMutableTreeNode(tpf));
                continue;
            }
            catch (Exception e) {
                e.printStackTrace();
                throw new ScavengerCreationException(e.getMessage());
            }
        }
    }

    boolean robotSafe(URL url) {
        return true;
    }

    boolean robotSafeOld(URL url) {
        String strPath;
        StringTokenizer st;
        String strCommands;
        URL urlRobot;
        String strHost = url.getHost();
        String strRobot = "http://" + strHost + "/robots.txt";
        try {
            urlRobot = new URL(strRobot);
        }
        catch (MalformedURLException e) {
            return false;
        }
        try {
            InputStream urlRobotStream = urlRobot.openStream();
            byte[] b = new byte[1000];
            int numRead = urlRobotStream.read(b);
            strCommands = new String(b, 0, numRead);
            while (numRead != -1) {
                numRead = urlRobotStream.read(b);
                if (numRead == -1) continue;
                String newCommands = new String(b, 0, numRead);
                strCommands = strCommands + newCommands;
            }
            urlRobotStream.close();
        }
        catch (IOException e) {
            return true;
        }
        String strURL = url.getFile();
        int index = 0;
        while ((index = strCommands.indexOf(DISALLOW, index)) != -1 && (st = new StringTokenizer(strPath = strCommands.substring(index += DISALLOW.length()))).hasMoreTokens()) {
            String strBadPath = st.nextToken();
            if (strURL.indexOf(strBadPath) != 0) continue;
            return false;
        }
        return true;
    }

    private String[] search(String initialURL) throws MalformedURLException {
        int numberSearched = 0;
        int numberFound = 0;
        Vector<String> vectorMatches = new Vector<String>();
        Vector<String> vectorToSearch = new Vector<String>();
        Vector<String> vectorSearched = new Vector<String>();
        Vector<String> listMatches = new Vector<String>();
        if (initialURL.length() == 0) {
            return new String[0];
        }
        String strURL = initialURL;
        vectorToSearch.addElement(initialURL);
        while (vectorToSearch.size() > 0) {
            strURL = (String)vectorToSearch.elementAt(0);
            this.progressDisplayNode.setUserObject("Examining : " + strURL);
            this.treeModel.nodeChanged(this.progressDisplayNode);
            URL url = new URL(strURL);
            vectorToSearch.removeElementAt(0);
            vectorSearched.addElement(strURL);
            if (url.getProtocol().compareTo("http") != 0 || !this.robotSafe(url)) break;
            try {
                URLConnection urlConnection = url.openConnection();
                urlConnection.setAllowUserInteraction(false);
                InputStream urlStream = url.openStream();
                byte[] b = new byte[1000];
                int numRead = urlStream.read(b);
                String content = new String(b, 0, numRead);
                while (numRead != -1) {
                    numRead = urlStream.read(b);
                    if (numRead == -1) continue;
                    String newContent = new String(b, 0, numRead);
                    content = content + newContent;
                }
                urlStream.close();
                String lowerCaseContent = content.toLowerCase();
                int index = 0;
                while ((index = lowerCaseContent.indexOf("<a", index)) != -1 && (index = lowerCaseContent.indexOf("href", index)) != -1 && (index = lowerCaseContent.indexOf("=", index)) != -1) {
                    URL urlLink;
                    String remaining = content.substring(++index);
                    StringTokenizer st = new StringTokenizer(remaining, "\t\n\r\">#");
                    String strLink = st.nextToken();
                    try {
                        urlLink = new URL(url, strLink);
                        strLink = urlLink.toString();
                    }
                    catch (MalformedURLException e) {
                        continue;
                    }
                    boolean validURLToSearch = true;
                    if (urlLink.getProtocol().compareTo("http") != 0) {
                        validURLToSearch = false;
                    }
                    if (strLink.indexOf("?") > 0) {
                        validURLToSearch = false;
                    }
                    if (!strLink.toLowerCase().startsWith(initialURL.toLowerCase())) {
                        validURLToSearch = false;
                    }
                    if (strLink.toLowerCase().endsWith(".xml") || strLink.toLowerCase().endsWith(".txt") || strLink.toLowerCase().endsWith("wsdl")) {
                        validURLToSearch = false;
                    }
                    try {
                        if (!vectorSearched.contains(strLink) && !vectorToSearch.contains(strLink) && this.robotSafe(urlLink) && validURLToSearch) {
                            vectorToSearch.addElement(strLink);
                        }
                        if (strLink.indexOf(".xml") <= -1 && !strLink.toLowerCase().endsWith("wsdl") || vectorMatches.contains(strLink)) continue;
                        listMatches.add(strLink);
                        vectorMatches.addElement(strLink);
                        ++numberFound;
                    }
                    catch (Exception e) {}
                }
            }
            catch (IOException e) {
                break;
            }
            ++numberSearched;
        }
        return listMatches.toArray(new String[0]);
    }
}

