package org.jpedal.examples.text;

import java.io.File;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.jpedal.PdfDecoder;
import org.jpedal.exception.PdfException;
import org.jpedal.exception.PdfSecurityException;
import org.jpedal.utils.LogWriter;
import org.w3c.dom.Document;

/* loaded from: input_file:org/jpedal/examples/text/ExtractStructuredText.class */
public class ExtractStructuredText {
    public static final boolean debug = true;
    protected String separator = System.getProperty("file.separator");
    protected PdfDecoder decodePdf = null;
    protected String outputFile = "";
    public static boolean isTest = false;
    protected static String output = System.getProperty("user.dir") + "xml";
    public static boolean showMessages = true;

    public ExtractStructuredText() {
    }

    public ExtractStructuredText(String str, String str2) {
        output = str2;
        if (!output.endsWith(this.separator)) {
            output += this.separator;
        }
        File file = new File(output);
        if (!file.exists()) {
            file.mkdirs();
        }
        if (str.toLowerCase().endsWith(".pdf")) {
            decodeFile(str);
            return;
        }
        String[] strArr = null;
        str = str.endsWith(this.separator) ? str : str + this.separator;
        try {
            File file2 = new File(str);
            if (!file2.isDirectory()) {
                System.err.println(str + " is not a directory. Exiting program");
            }
            strArr = file2.list();
        } catch (Exception e) {
            LogWriter.writeLog("Exception trying to access file " + e.getMessage());
        }
        long length = strArr.length;
        for (int i = 0; i < length; i++) {
            if (showMessages) {
                System.out.println(i + "/ " + length + ' ' + strArr[i]);
            }
            if (strArr[i].toLowerCase().endsWith(".pdf")) {
                if (showMessages) {
                    System.out.println(str + strArr[i]);
                }
                decodeFile(str + strArr[i]);
            }
        }
    }

    protected void decodeFile(String str) {
        Transformer newTransformer;
        System.out.println("Processing " + str);
        int lastIndexOf = str.lastIndexOf(47);
        int lastIndexOf2 = str.lastIndexOf(92);
        if (lastIndexOf2 > lastIndexOf) {
            lastIndexOf = lastIndexOf2;
        }
        this.outputFile = output + this.separator + str.substring(lastIndexOf + 1, str.length() - 4) + ".xml";
        try {
            this.decodePdf = new PdfDecoder(false);
            if (showMessages) {
                System.out.println("\n----------------------------");
            }
            if (showMessages) {
                System.out.println("Opening file :" + str);
            }
            this.decodePdf.openPdfFile(str);
        } catch (PdfSecurityException e) {
            System.err.println("Security Exception " + e + " in pdf code for text extraction on file " + this.decodePdf.getObjectStore().getCurrentFilename());
        } catch (PdfException e2) {
            System.err.println("Pdf Exception " + e2 + " in pdf code for text extraction on file " + this.decodePdf.getObjectStore().getCurrentFilename());
        } catch (Exception e3) {
            System.err.println("Exception " + e3 + " in pdf code for text extraction on file " + this.decodePdf.getObjectStore().getCurrentFilename());
        }
        if (this.decodePdf.isEncrypted() && !this.decodePdf.isPasswordSupplied() && !this.decodePdf.isExtractionAllowed()) {
            if (showMessages) {
                System.out.println("Encrypted settings");
                System.out.println("Please look at Viewer for code sample to handle such files");
                System.out.println("Or get support/consultancy");
                return;
            }
            return;
        }
        try {
            Document markedContent = this.decodePdf.getMarkedContent();
            if (markedContent == null) {
                System.out.println("No text found");
            } else {
                try {
                    try {
                        newTransformer = TransformerFactory.newInstance().newTransformer(new StreamSource(getClass().getResourceAsStream("/org/jpedal/examples/text/xmlstyle.xslt")));
                    } catch (Exception e4) {
                        e4.printStackTrace();
                        System.exit(1);
                    }
                } catch (Error e5) {
                    e5.printStackTrace();
                    System.exit(1);
                }
                if (markedContent == null || !markedContent.hasChildNodes()) {
                    System.out.println("No tree data " + markedContent);
                    return;
                }
                if (!markedContent.getDocumentElement().hasChildNodes()) {
                    markedContent.appendChild(markedContent.createComment("There is NO Structured text in the file to extract!!"));
                    markedContent.appendChild(markedContent.createComment("JPedal can only extract it if it has been added when PDF created"));
                    markedContent.appendChild(markedContent.createComment("Please read our blog post at http://www.jpedal.org/PDFblog/2010/09/the-easy-way-to-discover-if-a-pdf-file-contains-structured-content/ "));
                }
                newTransformer.transform(new DOMSource(markedContent), new StreamResult(this.outputFile));
                if (showMessages) {
                    System.out.println("Writing to " + this.outputFile);
                }
            }
            if (showMessages) {
                System.out.println("\n----------done--------------");
            }
            this.decodePdf.flushObjectValues(false);
        } catch (Exception e6) {
            this.decodePdf.closePdfFile();
            System.err.println("Exception " + e6.getMessage());
            e6.printStackTrace();
            System.out.println(this.decodePdf.getObjectStore().getCurrentFilename());
        }
        this.decodePdf.flushObjectValues(true);
        if (showMessages) {
            System.out.println("Text read");
        }
        this.decodePdf.closePdfFile();
    }

    public static void main(String[] strArr) {
        if (showMessages) {
            System.out.println("Simple demo to extract text objects");
        }
        if (strArr.length != 2) {
            System.out.println("Please call with parameters :-");
            System.out.println("FileName");
            System.out.println("outputDir");
            return;
        }
        String str = strArr[0];
        output = strArr[1];
        System.out.println("File :" + str);
        if (!new File(str).exists()) {
            System.out.println("File " + str + " not found");
        }
        long currentTimeMillis = System.currentTimeMillis();
        new ExtractStructuredText(str, output);
        long currentTimeMillis2 = System.currentTimeMillis();
        if (isTest) {
            return;
        }
        System.out.println("Time taken=" + ((currentTimeMillis2 - currentTimeMillis) / 1000));
    }
}
