package org.semanticweb.yars.nx.clean;

import java.io.PrintStream;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.text.SimpleDateFormat;
import java.util.Iterator;
import java.util.regex.Pattern;
import org.apache.commons.httpclient.cookie.CookieSpec;
import org.apache.http.HttpHost;
import org.semanticweb.yars.nx.DateTimeLiteral;
import org.semanticweb.yars.nx.Literal;
import org.semanticweb.yars.nx.Node;
import org.semanticweb.yars.nx.Resource;
import org.semanticweb.yars.nx.parser.ParseException;

/* loaded from: input_file:org/semanticweb/yars/nx/clean/Cleaner.class */
public class Cleaner {
    static SimpleDateFormat _iso = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ");
    static SimpleDateFormat[] _formats = {new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"), new SimpleDateFormat("dd-MMM-yy"), new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss"), new SimpleDateFormat("EEE', 'dd' 'MMM' 'yyyy' 'HH:mm:ss' 'Z"), new SimpleDateFormat("MM/dd/yy")};

    public static void clean(Iterator<Node[]> it, PrintStream printStream, int i, boolean z) {
        int i2 = 0;
        long j = 0;
        while (it.hasNext()) {
            boolean z2 = true;
            Node[] next = it.next();
            j++;
            if (i == -1 || i == next.length) {
                Node[] nodeArr = new Node[next.length];
                for (int i3 = 0; i3 < next.length; i3++) {
                    try {
                        nodeArr[i3] = clean(next[i3], z);
                    } catch (Exception e) {
                        i2++;
                        z2 = false;
                        System.err.println(j + ": cannot parse entry " + next[i3] + " " + e.getMessage());
                    }
                }
                if (z2) {
                    for (Node node : nodeArr) {
                        printStream.print(node.toN3() + " ");
                    }
                    printStream.println(".");
                }
            } else {
                System.err.println(j + ": doesn't have " + i + " elements but " + next.length);
                i2++;
            }
        }
        System.err.println("Processed  " + j + " statements");
        System.err.println("Dropped " + i2 + " statements");
    }

    public static Node clean(Node node, boolean z) throws URISyntaxException, MalformedURLException {
        if (node instanceof Resource) {
            return new Resource(normaliseURI(node.toString()));
        }
        if (!(node instanceof Literal)) {
            return node;
        }
        Literal literal = (Literal) node;
        String trim = stripHTML(removeControlChars(literal.toString())).trim();
        if (trim.length() <= 0) {
            throw new URISyntaxException("", "dropping emtpy literals");
        }
        String escapeForNx = Literal.escapeForNx(trim);
        return (literal.getDatatype() == null && literal.getLanguageTag() == null) ? normaliseLiteral(new Literal(escapeForNx), z) : (literal.getDatatype() == null || literal.getLanguageTag() == null) ? literal.getDatatype() != null ? new Literal(escapeForNx, literal.getDatatype()) : new Literal(escapeForNx, literal.getLanguageTag()) : new Literal(escapeForNx, literal.getLanguageTag(), literal.getDatatype());
    }

    public static Literal normaliseLiteral(Literal literal, boolean z) throws URISyntaxException {
        if (literal.getLanguageTag() != null || literal.getDatatype() != null) {
            String trim = literal.getData().trim();
            return literal.getLanguageTag() != null ? new Literal(trim, literal.getLanguageTag()) : new Literal(trim, literal.getDatatype());
        }
        if (!z) {
            String trim2 = literal.getData().trim();
            return literal.getLanguageTag() != null ? new Literal(trim2, literal.getLanguageTag()) : new Literal(trim2, literal.getDatatype());
        }
        DateTimeLiteral dateTimeLiteral = null;
        String trim3 = literal.getData().trim();
        Pattern.compile("^[-+]?[0-9]*.?[0-9]+$");
        try {
            DateTimeLiteral.parseISO8601(trim3);
            dateTimeLiteral = new DateTimeLiteral(trim3);
            if (dateTimeLiteral != null) {
                return dateTimeLiteral;
            }
        } catch (ParseException e) {
        } catch (Throwable th) {
        }
        for (SimpleDateFormat simpleDateFormat : _formats) {
            try {
                dateTimeLiteral = new DateTimeLiteral(_iso.format(simpleDateFormat.parse(trim3)));
            } catch (java.text.ParseException e2) {
            } catch (Throwable th2) {
            }
            if (dateTimeLiteral != null) {
                break;
            }
        }
        return dateTimeLiteral != null ? dateTimeLiteral : literal;
    }

    public static String normaliseURI(String str) throws URISyntaxException, MalformedURLException {
        URI normalize = new URI(str.replaceAll(" ", "%20")).normalize();
        if (normalize.isOpaque()) {
            if (normalize.getScheme() == null) {
                throw new URISyntaxException("", "dropping uris without scheme");
            }
            return str;
        }
        String scheme = normalize.getScheme();
        if (scheme == null) {
            throw new URISyntaxException(str, "dropping uris without scheme");
        }
        String lowerCase = scheme.toLowerCase();
        if (lowerCase.startsWith(HttpHost.DEFAULT_SCHEME_NAME)) {
            normalize.toURL();
        }
        String path = normalize.getPath();
        if (path != null) {
            if (path.equals("")) {
                path = CookieSpec.PATH_DELIM;
            } else if (path.endsWith("/index.html") || path.endsWith("/index.htm") || path.endsWith("/index.asp") || path.toLowerCase().endsWith("/default.asp") || path.toLowerCase().endsWith("/default.aspx") || path.endsWith("/index.jsp") || path.endsWith("/index.php")) {
                path = path.substring(0, path.lastIndexOf(47) + 1);
            }
        }
        String host = normalize.getHost();
        if (host != null) {
            host = host.toLowerCase();
        }
        int port = normalize.getPort();
        if (port == 80) {
            port = -1;
        }
        return new URI(lowerCase, normalize.getUserInfo(), host, port, path, normalize.getQuery(), normalize.getFragment()).toString();
    }

    private static String removeControlChars(String str) {
        StringBuffer stringBuffer = new StringBuffer();
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            if (charAt < 0 || charAt > 31) {
                stringBuffer.append(charAt);
            } else {
                stringBuffer.append(" ");
            }
        }
        return stringBuffer.toString();
    }

    private static String stripHTML(String str) {
        return str.replaceAll("\\<.*?\\>", "");
    }
}
