package com.ontologycentral.ldspider.frontier;

import com.ontologycentral.ldspider.CrawlerConstants;
import com.ontologycentral.ldspider.hooks.error.ErrorHandler;
import com.ontologycentral.ldspider.hooks.error.ErrorHandlerDummy;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Collection;
import java.util.Iterator;
import java.util.logging.Logger;
import org.apache.commons.httpclient.cookie.CookieSpec;
import org.apache.http.HttpHost;

/* loaded from: input_file:com/ontologycentral/ldspider/frontier/Frontier.class */
public abstract class Frontier {
    Logger _log = Logger.getLogger(getClass().getSimpleName());
    String[] _suffixes = new String[0];
    ErrorHandler _eh = new ErrorHandlerDummy();

    public void setErrorHandler(ErrorHandler errorHandler) {
        this._eh = errorHandler;
    }

    public void setBlacklist(String[] strArr) {
        this._suffixes = strArr;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public URI process(URI uri) {
        if (uri == null || uri.getScheme() == null) {
            return null;
        }
        if (!uri.getScheme().equals(HttpHost.DEFAULT_SCHEME_NAME) && !uri.getScheme().equals("https")) {
            this._log.fine("skipping " + uri + ", " + uri.getScheme() + " != http(s)");
            return null;
        }
        try {
            uri = normalise(uri);
            for (String str : this._suffixes) {
                if (uri.getPath().endsWith(str)) {
                    this._log.fine("skipping " + uri + ", suffix " + str + " blacklisted");
                    this._eh.handleStatus(uri, CrawlerConstants.SKIP_SUFFIX, null, 0L, -1L);
                    return null;
                }
            }
            return uri;
        } catch (URISyntaxException e) {
            this._log.fine("skipping " + uri + ", not parsable");
            return null;
        }
    }

    public static URI normalise(URI uri) throws URISyntaxException {
        String path = uri.getPath();
        if (path == null || path.length() == 0) {
            path = CookieSpec.PATH_DELIM;
        } else if (path.endsWith("/index.html")) {
            path = path.substring(0, path.length() - 10);
        } else if (path.endsWith("/index.htm") || path.endsWith("/index.php") || path.endsWith("/index.asp")) {
            path = path.substring(0, path.length() - 9);
        }
        if (uri.getHost() == null) {
            throw new URISyntaxException("no host in ", uri.toString());
        }
        return new URI(uri.getScheme().toLowerCase(), uri.getUserInfo(), uri.getHost().toLowerCase(), uri.getPort(), path, uri.getQuery(), null).normalize();
    }

    public abstract void add(URI uri);

    public void addAll(Collection<URI> collection) {
        Iterator<URI> it = collection.iterator();
        while (it.hasNext()) {
            add(it.next());
        }
    }

    public abstract void remove(URI uri);

    public abstract void removeAll(Collection<URI> collection);

    public abstract Iterator<URI> iterator();
}
