package org.greenstone.LuceneWrapper;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.util.Stack;
import java.util.Vector;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.BufferedIndexInput;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:org/greenstone/LuceneWrapper/GS2LuceneIndexer.class */
public class GS2LuceneIndexer {
    protected static boolean debug = false;

    /* loaded from: input_file:org/greenstone/LuceneWrapper/GS2LuceneIndexer$Indexer.class */
    public static class Indexer extends DefaultHandler {
        IndexWriter writer_;
        Analyzer analyzer_;
        SAXParser sax_parser_;
        String doc_tag_level_;
        Stack stack_;
        String path_ = "";
        Document current_doc_ = null;
        String current_node_ = "";
        String current_doc_oid_ = "";
        String indexable_current_node_ = "";
        String current_contents_ = "";
        String mode_ = "";
        protected String file_id_ = null;
        private static String[] stop_words = GS2Analyzer.STOP_WORDS;

        /* JADX INFO: Access modifiers changed from: private */
        /* loaded from: input_file:org/greenstone/LuceneWrapper/GS2LuceneIndexer$Indexer$MyDocument.class */
        public class MyDocument {
            public Document doc;
            public String contents;
            public String tagname;

            private MyDocument() {
                this.doc = null;
                this.contents = null;
                this.tagname = "";
            }
        }

        public Indexer(String str, File file, boolean z) {
            this.writer_ = null;
            this.analyzer_ = null;
            this.sax_parser_ = null;
            this.doc_tag_level_ = null;
            this.stack_ = null;
            this.doc_tag_level_ = str;
            try {
                this.stack_ = new Stack();
                this.sax_parser_ = SAXParserFactory.newInstance().newSAXParser();
                this.sax_parser_.getXMLReader().setFeature("http://xml.org/sax/features/validation", false);
                this.analyzer_ = new GS2Analyzer(stop_words);
                this.writer_ = new IndexWriter(file.getPath(), this.analyzer_, z);
                this.writer_.setMaxFieldLength(Integer.MAX_VALUE);
                if (z) {
                    this.writer_.optimize();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }

        public void index(String str, File file) {
            this.mode_ = "add";
            this.file_id_ = str;
            this.path_ = "";
            String path = file.getPath();
            path.substring(0, path.lastIndexOf(File.separatorChar));
            try {
                this.sax_parser_.parse(new InputSource(new FileInputStream(file)), this);
            } catch (Exception e) {
                println("parse error:");
                e.printStackTrace();
            }
        }

        public void index(String str) {
            this.mode_ = "add";
            this.file_id_ = "<xml doc on stdin>";
            this.path_ = "";
            try {
                this.sax_parser_.parse(new InputSource(new StringReader(str)), this);
            } catch (Exception e) {
                println("parse error:");
                e.printStackTrace();
            }
        }

        public void delete(String str) {
            this.mode_ = "delete";
            this.file_id_ = "<delete doc>";
            this.path_ = "";
            try {
                this.sax_parser_.parse(new InputSource(new StringReader(str)), this);
            } catch (Exception e) {
                println("parse error:");
                e.printStackTrace();
            }
        }

        public void finish() {
            try {
                this.writer_.optimize();
                this.writer_.close();
            } catch (Exception e) {
            }
        }

        protected void print(String str) {
            System.err.print(str);
        }

        protected void println(String str) {
            System.err.println(str);
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startDocument() throws SAXException {
            println("Starting to process " + this.file_id_);
            print("[");
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endDocument() throws SAXException {
            println("]");
            println("... processing finished.");
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            this.path_ = appendPathLink(this.path_, str3, attributes);
            if (str3.equals(this.doc_tag_level_)) {
                this.mode_ = attributes.getValue("gs2:mode");
                pushOnStack();
                this.current_node_ = str3;
                this.current_doc_oid_ = attributes.getValue("gs2:docOID");
                print(" " + str3 + ": " + this.current_doc_oid_ + " (" + this.mode_ + ")");
                this.current_doc_.add(new Field("docOID", this.current_doc_oid_, Field.Store.YES, Field.Index.UN_TOKENIZED));
            }
            if (isIndexable(attributes)) {
                this.indexable_current_node_ = str3;
            } else {
                this.indexable_current_node_ = "";
            }
        }

        public static boolean isIndexable(Attributes attributes) {
            boolean z = false;
            String value = attributes.getValue("index");
            if (value != null && value.equals("1")) {
                z = true;
            }
            return z;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            if (this.mode_.equals("delete")) {
                try {
                    deleteDocument(this.current_doc_oid_);
                    return;
                } catch (IOException e) {
                    e.printStackTrace();
                    return;
                }
            }
            if (this.mode_.equals("add") || this.mode_.equals("update")) {
                if (str3.equals(this.indexable_current_node_)) {
                    this.current_doc_.add(new Field(str3, this.current_contents_, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES));
                    if (!str3.equals("TX") && !str3.equals("ZZ")) {
                        this.current_doc_.add(new Field("by" + str3, this.current_contents_, Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
                    }
                    this.current_contents_ = "";
                }
                if (str3.equals(this.doc_tag_level_)) {
                    try {
                        this.writer_.updateDocument(new Term("docOID", this.current_doc_oid_), this.current_doc_, this.analyzer_);
                    } catch (IOException e2) {
                        e2.printStackTrace();
                    }
                    popOffStack();
                }
                this.path_ = removePathLink(this.path_);
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
            String trim = new String(cArr, i, i2).trim();
            if (trim.length() > 0) {
                this.current_contents_ += trim;
            }
        }

        protected String appendPathLink(String str, String str2, Attributes attributes) {
            String str3 = str + "/" + str2;
            if (attributes.getLength() > 0) {
                String value = attributes.getValue("gs2:docOID");
                if (value != null) {
                    str3 = str3 + "[@gs2:docOID='" + value + "']";
                } else {
                    String value2 = attributes.getValue("gs3:id");
                    if (value2 != null) {
                        str3 = str3 + "[@gs3:id='" + value2 + "']";
                    }
                }
            }
            return str3;
        }

        protected String removePathLink(String str) {
            int lastIndexOf = str.lastIndexOf(47);
            return lastIndexOf == -1 ? "" : str.substring(0, lastIndexOf);
        }

        protected void pushOnStack() {
            if (this.current_doc_ != null) {
                MyDocument myDocument = new MyDocument();
                myDocument.doc = this.current_doc_;
                myDocument.contents = this.current_contents_;
                myDocument.tagname = this.current_node_;
                this.stack_.push(myDocument);
            }
            this.current_doc_ = new Document();
            this.current_contents_ = "";
            this.current_node_ = "";
        }

        protected void popOffStack() {
            if (this.stack_.empty()) {
                this.current_doc_ = new Document();
                this.current_contents_ = "";
                this.current_node_ = "";
            } else {
                MyDocument myDocument = (MyDocument) this.stack_.pop();
                this.current_doc_ = myDocument.doc;
                this.current_contents_ = myDocument.contents;
                this.current_node_ = myDocument.tagname;
            }
        }

        protected void deleteDocument(String str) throws IOException {
            GS2LuceneIndexer.debug("GS2LuceneDelete.deleteDocument(" + str + ")");
            GS2LuceneIndexer.debug("- Initial number of documents in index: " + this.writer_.docCount());
            this.writer_.deleteDocuments(new Term("docOID", str));
            GS2LuceneIndexer.debug("- Final number of documents in index: " + this.writer_.docCount());
        }
    }

    protected static void debug(String str) {
        if (debug) {
            System.err.println(str);
        }
    }

    public static void main(String[] strArr) throws Exception {
        boolean z = false;
        Vector vector = new Vector();
        int length = strArr.length;
        int i = 0;
        while (i < length) {
            if (!strArr[i].startsWith("-")) {
                vector.add(strArr[i]);
            } else if (strArr[i].equals("-removeold")) {
                z = true;
            } else if (strArr[i].equals("-verbosity")) {
                i++;
                if (i < length && Integer.parseInt(strArr[i]) >= 5) {
                    debug = true;
                }
            } else if (strArr[i].equals("-debug")) {
                debug = true;
            } else {
                System.err.println("Unrecognised option: " + strArr[i]);
            }
            i++;
        }
        if (vector.size() != 3) {
            System.err.println("Usage: java GS2LuceneIndexer [-removeold|-verbosity [num]] doc-tag-level building_dir index");
            return;
        }
        String str = (String) vector.get(0);
        String str2 = (String) vector.get(1);
        String str3 = (String) vector.get(2);
        String str4 = str2 + File.separator + "text";
        File file = new File(str4);
        File file2 = new File(str2);
        if (!file.exists()) {
            System.err.println("Couldn't find import directory: " + str4);
            return;
        }
        File file3 = new File(file2.getPath() + File.separator + str3 + File.separator);
        file3.mkdir();
        Indexer indexer = new Indexer(str, file3, z);
        try {
            InputStreamReader inputStreamReader = new InputStreamReader(System.in, "UTF-8");
            BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
            StringBuffer stringBuffer = new StringBuffer(BufferedIndexInput.BUFFER_SIZE);
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                stringBuffer.append(readLine);
                stringBuffer.append(" ");
                debug("Got line " + readLine);
                if (readLine.endsWith("</Delete>")) {
                    indexer.delete(stringBuffer.toString());
                    stringBuffer = new StringBuffer(BufferedIndexInput.BUFFER_SIZE);
                } else if (readLine.startsWith("</Doc>")) {
                    indexer.index(stringBuffer.toString());
                    stringBuffer = new StringBuffer(BufferedIndexInput.BUFFER_SIZE);
                }
            }
            bufferedReader.close();
            inputStreamReader.close();
        } catch (IOException e) {
            System.err.println("Error: unable to read from stdin");
            e.printStackTrace();
        }
        indexer.finish();
    }
}
