package com.ontologycentral.ldspider;

import com.ontologycentral.ldspider.frontier.RankedFrontier;
import com.ontologycentral.ldspider.hooks.error.ErrorHandler;
import com.ontologycentral.ldspider.hooks.error.ErrorHandlerLogger;
import com.ontologycentral.ldspider.hooks.error.ObjectThrowable;
import com.ontologycentral.ldspider.hooks.fetch.FetchFilterRdfXml;
import com.ontologycentral.ldspider.hooks.links.LinkFilter;
import com.ontologycentral.ldspider.hooks.links.LinkFilterDefault;
import com.ontologycentral.ldspider.hooks.links.LinkFilterDomain;
import com.ontologycentral.ldspider.hooks.links.LinkFilterDummy;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.logging.Logger;
import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.OptionGroup;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.semanticweb.yars.util.CallbackNQOutputStream;

/* loaded from: input_file:com/ontologycentral/ldspider/Main.class */
public class Main {
    private static final Logger _log = Logger.getLogger(Main.class.getSimpleName());

    public static void main(String[] strArr) {
        Options options = new Options();
        OptionGroup optionGroup = new OptionGroup();
        OptionBuilder.withArgName("file");
        OptionBuilder.hasArgs(1);
        OptionBuilder.withDescription("location of seed list");
        Option create = OptionBuilder.create("s");
        create.setRequired(true);
        optionGroup.addOption(create);
        OptionBuilder.withArgName("uri");
        OptionBuilder.hasArgs(1);
        OptionBuilder.withDescription("uri of an instance");
        Option create2 = OptionBuilder.create("u");
        create2.setRequired(true);
        optionGroup.addOption(create2);
        options.addOptionGroup(optionGroup);
        OptionGroup optionGroup2 = new OptionGroup();
        OptionBuilder.withArgName("directory max-uris");
        OptionBuilder.hasArgs(1);
        OptionBuilder.withDescription("use on-disk queue with URI selection based on frequency");
        optionGroup2.addOption(OptionBuilder.create("d"));
        Option option = new Option("b", false, "do strict breadth-first");
        option.setArgs(2);
        option.setArgName("depth uri-limit");
        optionGroup2.addOption(option);
        Option option2 = new Option("c", false, "use load balanced crawling strategy");
        option2.setArgs(1);
        option2.setArgName("max-uris");
        optionGroup2.addOption(option2);
        options.addOptionGroup(optionGroup2);
        OptionBuilder.withArgName("threads");
        OptionBuilder.hasArgs(1);
        OptionBuilder.withDescription("number of threads (default 2)");
        options.addOption(OptionBuilder.create("t"));
        OptionBuilder.withArgName("stay");
        OptionBuilder.hasArgs(0);
        OptionBuilder.withDescription("stay on domains of seed uris");
        options.addOption(OptionBuilder.create("y"));
        OptionBuilder.withArgName("redirects");
        OptionBuilder.hasArgs(1);
        OptionBuilder.withDescription("write redirects.nx file");
        options.addOption(OptionBuilder.create("r"));
        Option option3 = new Option("n", false, "do not extract links - just follow redirects");
        option3.setArgs(0);
        options.addOption(option3);
        OptionBuilder.withArgName("file");
        OptionBuilder.hasArgs(1);
        OptionBuilder.withDescription("name of NQuad file with output");
        options.addOption(OptionBuilder.create("o"));
        OptionBuilder.withArgName("file");
        OptionBuilder.hasArgs(1);
        OptionBuilder.withDescription("name of access log file");
        options.addOption(OptionBuilder.create("l"));
        options.addOption(new Option("h", "help", false, "print help"));
        BasicParser basicParser = new BasicParser();
        HelpFormatter helpFormatter = new HelpFormatter();
        try {
            CommandLine parse = basicParser.parse(options, strArr, true);
            if (parse.hasOption("h") || parse.hasOption("help")) {
                helpFormatter.printHelp(80, " ", "Crawling and lookups on the linked data web\n", options, "\nFeedback and comments are welcome", true);
                System.exit(0);
            } else if (!parse.hasOption("s") && !parse.hasOption("u")) {
                helpFormatter.printHelp(80, " ", "ERROR: Missing required option: s or u \n", options, "\nError occured! Please see the error message above", true);
                System.exit(-1);
            }
            run(parse);
        } catch (FileNotFoundException e) {
            helpFormatter.printHelp(80, " ", "ERROR: " + e.getMessage() + "\n", options, "\nError occured! Please see the error message above", true);
            System.exit(-1);
        } catch (NumberFormatException e2) {
            helpFormatter.printHelp(80, " ", "ERROR: " + e2.getMessage() + "\n", options, "\nError occured! Please see the error message above", true);
            System.exit(-1);
        } catch (ParseException e3) {
            helpFormatter.printHelp(80, " ", "ERROR: " + e3.getMessage() + "\n", options, "\nError occured! Please see the error message above", true);
            System.exit(-1);
        }
    }

    private static void run(CommandLine commandLine) throws FileNotFoundException {
        LinkFilter linkFilterDummy;
        Set<URI> set = null;
        if (commandLine.hasOption("s")) {
            File file = new File(commandLine.getOptionValue("s"));
            if (!file.exists()) {
                throw new FileNotFoundException("No file found at " + file.getAbsolutePath());
            }
            set = readSeeds(file);
        } else if (commandLine.hasOption("u")) {
            set = new HashSet();
            try {
                set.add(new URL(commandLine.getOptionValue("u").trim()).toURI());
            } catch (Exception e) {
                _log.warning("Discard invalid uri " + e.getMessage() + " for " + commandLine.hasOption("u"));
                e.printStackTrace();
                System.exit(-1);
            }
        }
        _log.info("no of seed uris " + set.size());
        OutputStream outputStream = System.out;
        if (commandLine.hasOption("o")) {
            outputStream = new FileOutputStream(commandLine.getOptionValue("o"));
        }
        PrintStream printStream = System.out;
        if (commandLine.hasOption("l")) {
            printStream = new PrintStream(new FileOutputStream(commandLine.getOptionValue("l")));
        }
        CallbackNQOutputStream callbackNQOutputStream = null;
        if (commandLine.hasOption("r")) {
            callbackNQOutputStream = new CallbackNQOutputStream(new FileOutputStream(commandLine.getOptionValue("r")));
            callbackNQOutputStream.startDocument();
        }
        ErrorHandler errorHandlerLogger = new ErrorHandlerLogger(printStream, callbackNQOutputStream);
        RankedFrontier rankedFrontier = new RankedFrontier();
        rankedFrontier.setErrorHandler(errorHandlerLogger);
        rankedFrontier.setBlacklist(CrawlerConstants.BLACKLIST);
        rankedFrontier.addAll(set);
        _log.info("frontier done");
        if (commandLine.hasOption("y")) {
            LinkFilterDomain linkFilterDomain = new LinkFilterDomain(rankedFrontier);
            Iterator<URI> it = set.iterator();
            while (it.hasNext()) {
                linkFilterDomain.addHost(it.next().getHost());
            }
            linkFilterDummy = linkFilterDomain;
        } else {
            linkFilterDummy = commandLine.hasOption("n") ? new LinkFilterDummy() : new LinkFilterDefault(rankedFrontier);
        }
        linkFilterDummy.setErrorHandler(errorHandlerLogger);
        int intValue = commandLine.hasOption("t") ? Integer.valueOf(commandLine.getOptionValue("t")).intValue() : 2;
        long currentTimeMillis = System.currentTimeMillis();
        FetchFilterRdfXml fetchFilterRdfXml = new FetchFilterRdfXml();
        fetchFilterRdfXml.setErrorHandler(errorHandlerLogger);
        _log.info("init crawler");
        Crawler crawler = new Crawler(intValue);
        crawler.setErrorHandler(errorHandlerLogger);
        crawler.setOutputCallback(new CallbackNQOutputStream(outputStream));
        crawler.setLinkFilter(linkFilterDummy);
        crawler.setFetchFilter(fetchFilterRdfXml);
        if (commandLine.hasOption("b")) {
            int parseInt = Integer.parseInt(commandLine.getOptionValues("b")[0]);
            int parseInt2 = Integer.parseInt(commandLine.getOptionValues("b")[1]);
            _log.info("breadth-first crawl with " + intValue + " threads, depth " + parseInt + " maxuris " + parseInt2);
            crawler.evaluateBreadthFirst(rankedFrontier, parseInt, parseInt2);
        } else if (commandLine.hasOption("c")) {
            int parseInt3 = Integer.parseInt(commandLine.getOptionValues("c")[0]);
            _log.info("load balanced crawl with " + intValue + " threads, maxuris " + parseInt3);
            crawler.evaluateLoadBalanced(rankedFrontier, parseInt3);
        } else if (commandLine.hasOption("d")) {
            String str = commandLine.getOptionValues("d")[0];
            int parseInt4 = Integer.parseInt(commandLine.getOptionValues("d")[1]);
            _log.info("on-disk optimised crawl with " + intValue + " threads, maxuris " + parseInt4);
            crawler.evaluateLoadBalanced(rankedFrontier, parseInt4, str);
        }
        for (ObjectThrowable objectThrowable : errorHandlerLogger) {
            System.err.println(objectThrowable.getThrowable().getMessage() + " " + objectThrowable.getObject());
        }
        System.err.println(errorHandlerLogger);
        crawler.close();
        long currentTimeMillis2 = System.currentTimeMillis();
        try {
            outputStream.close();
        } catch (IOException e2) {
            _log.warning("could not close output stream: " + e2.getMessage());
        }
        if (callbackNQOutputStream != null) {
            callbackNQOutputStream.endDocument();
        }
        System.err.println("time elapsed " + (currentTimeMillis2 - currentTimeMillis) + " ms " + (((float) errorHandlerLogger.lookups()) / ((currentTimeMillis2 - currentTimeMillis) / 1000.0d)) + " lookups/sec");
    }

    static Set<URI> readSeeds(File file) throws FileNotFoundException {
        HashSet hashSet = new HashSet();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        int i = 0;
        while (true) {
            try {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                i++;
                if (readLine != null) {
                    String trim = readLine.trim();
                    try {
                        hashSet.add(new URL(trim).toURI());
                    } catch (MalformedURLException e) {
                        _log.fine("Discard invalid uri " + e.getMessage() + " for " + trim);
                    } catch (URISyntaxException e2) {
                        _log.fine("Discard invalid uri " + e2.getMessage() + " for " + trim);
                    }
                }
            } catch (IOException e3) {
                e3.printStackTrace();
                _log.fine(e3.getMessage());
            }
        }
        _log.info("read " + i + " lines from seed file");
        return hashSet;
    }
}
