package com.ontologycentral.ldspider.http;

import com.ontologycentral.ldspider.CrawlerConstants;
import com.ontologycentral.ldspider.hooks.error.ErrorHandler;
import com.ontologycentral.ldspider.hooks.fetch.FetchFilter;
import com.ontologycentral.ldspider.http.robot.Robots;
import com.ontologycentral.ldspider.queue.SpiderQueue;
import java.net.URI;
import java.util.logging.Logger;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.semanticweb.yars.nx.Resource;
import org.semanticweb.yars.nx.parser.Callback;
import org.semanticweb.yars.util.Callbacks;
import org.semanticweb.yars2.rdfxml.RDFXMLParser;

/* loaded from: input_file:com/ontologycentral/ldspider/http/LookupThread.class */
public class LookupThread implements Runnable {
    Logger _log = Logger.getLogger(getClass().getSimpleName());
    SpiderQueue _q;
    Callback _content;
    Callback _links;
    FetchFilter _ff;
    Robots _robots;
    ErrorHandler _eh;
    ConnectionManager _hclient;

    public LookupThread(ConnectionManager connectionManager, SpiderQueue spiderQueue, Callback callback, Callback callback2, Robots robots, ErrorHandler errorHandler, FetchFilter fetchFilter) {
        this._hclient = connectionManager;
        this._q = spiderQueue;
        this._content = callback;
        this._links = callback2;
        this._robots = robots;
        this._ff = fetchFilter;
        this._eh = errorHandler;
    }

    @Override // java.lang.Runnable
    public void run() {
        this._log.info("starting thread ...");
        int i = 0;
        URI poll = this._q.poll();
        while (true) {
            URI uri = poll;
            if (uri == null) {
                this._log.info("finished thread after fetching " + i + " uris");
                return;
            }
            i++;
            long currentTimeMillis = System.currentTimeMillis();
            long currentTimeMillis2 = System.currentTimeMillis();
            long j = -1;
            int i2 = 0;
            String str = null;
            if (this._robots.accessOk(uri)) {
                long currentTimeMillis3 = System.currentTimeMillis();
                HttpGet httpGet = new HttpGet(uri);
                httpGet.setHeaders(CrawlerConstants.HEADERS);
                try {
                    HttpResponse connect = this._hclient.connect(httpGet);
                    HttpEntity entity = connect.getEntity();
                    i2 = connect.getStatusLine().getStatusCode();
                    if (connect.getFirstHeader("Content-Type") != null) {
                        str = connect.getFirstHeader("Content-Type").getValue();
                    }
                    this._log.info("lookup on " + uri + " status " + i2);
                    Headers.processHeaders(uri, i2, connect.getAllHeaders(), this._content);
                    if (i2 == 200) {
                        if (entity == null) {
                            this._log.info("HttpEntity for " + uri + " is null");
                        } else if (this._ff.fetchOk(uri, i2, entity)) {
                            new RDFXMLParser(entity.getContent(), true, true, uri.toString(), (Callback) new Callbacks(new Callback[]{this._content, this._links}), new Resource(uri.toString()));
                        } else {
                            this._log.info("disallowed via fetch filter " + uri);
                            this._eh.handleStatus(uri, CrawlerConstants.SKIP_MIMETYPE, null, 0L, -1L);
                            httpGet.abort();
                            entity = null;
                            i2 = 0;
                        }
                    } else if (i2 == 301 || i2 == 302 || i2 == 303) {
                        Header[] headers = connect.getHeaders("location");
                        this._log.info("redirecting (" + i2 + ") to " + headers[0].getValue());
                        URI uri2 = new URI(headers[0].getValue());
                        this._q.setRedirect(uri, uri2, i2);
                        this._eh.handleRedirect(uri, uri2, i2);
                    }
                    if (entity != null) {
                        j = entity.getContentLength();
                    }
                    httpGet.abort();
                } catch (Throwable th) {
                    httpGet.abort();
                    this._log.info("Exception " + th.getClass().getName());
                    th.printStackTrace();
                    this._eh.handleError(uri, th);
                }
                long currentTimeMillis4 = System.currentTimeMillis();
                if (i2 != 0) {
                    this._eh.handleStatus(uri, i2, str, currentTimeMillis4 - currentTimeMillis3, j);
                }
                this._log.info(uri + " " + (currentTimeMillis2 - currentTimeMillis) + " ms before lookup, " + (currentTimeMillis3 - currentTimeMillis2) + " ms to check if lookup is ok, " + (currentTimeMillis4 - currentTimeMillis3) + " ms for lookup");
            } else {
                this._log.info("access denied per robots.txt for " + uri);
                this._eh.handleStatus(uri, CrawlerConstants.SKIP_ROBOTS, null, 0L, -1L);
            }
            poll = this._q.poll();
        }
    }
}
