package ims.tiger.importfilter.imsutils;

import ims.tiger.importfilter.ImportFilter;
import ims.tiger.importfilter.ImportFilterException;
import ims.tiger.importfilter.TestImportFilterHandler;
import ims.tiger.system.Constants;
import ims.tiger.util.UtilitiesCollection;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import java.util.zip.ZipInputStream;
import org.apache.batik.dom.svg.SVGPathSegConstants;
import org.apache.batik.util.XMLConstants;
import org.apache.log4j.BasicConfigurator;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;

/* loaded from: input_file:ims/tiger/importfilter/imsutils/TreeTaggerFilter.class */
public class TreeTaggerFilter extends ImportFilter implements ContentHandler, ErrorHandler {
    protected static final int MAX_LIST_SIZE = 250;
    protected static final String ROOT_ELEMENT = "corpus";
    protected static final String SENTENCE = "sentence";
    protected static final String SENTENCE_CAT = "S";
    protected static final String PHRASE = "phrase";
    protected static final String CAT = "cat";
    protected static final String TOKEN = "token";
    protected static final String WORD = "word";
    protected static final String POS = "pos";
    protected BufferedWriter g;
    protected int line_count;
    protected long f_size;
    protected long f_count;
    protected long f_last;
    protected Locator locator;
    protected int guessed;
    protected String sentence_id;
    protected int nt_count;
    protected int t_count;
    protected String root;
    protected String token_id;
    protected String token_xml;
    protected boolean collect;
    protected String collect_file;
    protected HashMap feature_values;
    protected int progress;
    protected int old_progress;
    protected String local_att;
    protected String[] t_features_list;
    protected String[] nt_features_list;
    protected String[] t_features_node;
    protected String[] nt_features_node;
    protected int t_feature_max;
    protected int t_feature_number = 0;
    protected int sentence_count = 0;
    protected HashMap nt_nodes = new HashMap();
    protected HashMap t_nodes = new HashMap();
    protected HashMap edges = new HashMap();
    protected List parent_stack = new LinkedList();
    protected boolean handling_characters = false;
    protected StringBuffer lastvalue = new StringBuffer();
    protected String source_encoding = "ISO-8859-1";

    public TreeTaggerFilter() {
        this.header_file = "";
        this.progress = 0;
        this.old_progress = 0;
        this.maximum = 0;
        this.t_features_node = new String[2];
        this.t_features_list = new String[2];
        this.t_features_list[0] = WORD;
        this.t_features_list[1] = POS;
        this.t_feature_max = 2;
        this.nt_features_node = new String[1];
        this.nt_features_list = new String[1];
        this.nt_features_list[0] = CAT;
    }

    @Override // org.xml.sax.ContentHandler
    public void setDocumentLocator(Locator locator) {
        this.locator = locator;
    }

    @Override // org.xml.sax.ContentHandler
    public void startDocument() throws SAXException {
        try {
            printDocumentHeader();
            if (this.header_file.length() > 0) {
                this.collect = false;
                this.g.write(new StringBuffer("<head external=\"file:").append(this.header_file).append("\" />\n\n").toString());
            } else {
                this.collect = true;
                File file = new File(this.target);
                String absolutePath = file.getAbsolutePath();
                this.collect_file = new StringBuffer(String.valueOf(absolutePath.substring(0, absolutePath.indexOf(file.getName())))).append(File.separator).append(this.corpus_id).append("_generated_header.xml").toString();
                this.g.write(new StringBuffer("<head external=\"").append(new StringBuffer("file:").append(this.corpus_id).append("_generated_header.xml").toString()).append("\" />\n\n").toString());
                this.feature_values = new HashMap();
            }
            this.g.write("<body>\n\n");
        } catch (Exception e) {
            throw new SAXException(e.getMessage());
        }
    }

    @Override // org.xml.sax.ContentHandler
    public void endDocument() throws SAXException {
        try {
            printDocumentFooter();
            if (this.collect) {
                saveExternalHeaderFile();
            }
        } catch (IOException e) {
            throw new SAXException(e.getMessage());
        }
    }

    @Override // org.xml.sax.ContentHandler
    public void processingInstruction(String str, String str2) throws SAXException {
    }

    @Override // org.xml.sax.ContentHandler
    public void startPrefixMapping(String str, String str2) {
    }

    @Override // org.xml.sax.ContentHandler
    public void endPrefixMapping(String str) {
    }

    private void saveNTNode(String str) {
        String stringBuffer = new StringBuffer("<nt id=\"").append(str).append(XMLConstants.XML_DOUBLE_QUOTE).toString();
        for (int i = 0; i < this.nt_features_list.length; i++) {
            String str2 = this.nt_features_list[i];
            String str3 = this.nt_features_node[i];
            if (str3 != null) {
                if (this.collect) {
                    addFeatureValue(str2, str3);
                }
            } else if (this.collect) {
                addFeatureValue(str2, Constants.UNDEF);
            }
            String trimContent = UtilitiesCollection.trimContent(str2);
            stringBuffer = str3 != null ? new StringBuffer(String.valueOf(stringBuffer)).append(" ").append(trimContent).append(XMLConstants.XML_EQUAL_QUOT).append(UtilitiesCollection.trimContent(str3)).append(XMLConstants.XML_DOUBLE_QUOTE).toString() : new StringBuffer(String.valueOf(stringBuffer)).append(" ").append(trimContent).append("=\"--\"").toString();
        }
        this.nt_nodes.put(str, stringBuffer);
    }

    @Override // org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        if (str2.equals(ROOT_ELEMENT)) {
            return;
        }
        if (str2.equals(SENTENCE)) {
            this.sentence_count++;
            this.sentence_id = new StringBuffer(SVGPathSegConstants.PATHSEG_CURVETO_CUBIC_SMOOTH_REL_LETTER).append(new Integer(this.sentence_count).toString()).toString();
            this.handler.setMessage(new StringBuffer("Converting sentence: ").append(this.sentence_id).toString());
            if (this.maximum > 0) {
                this.progress = (this.sentence_count * 100) / this.maximum;
            } else {
                this.progress = (this.sentence_count * 100) / this.guessed;
            }
            if (this.progress < 0) {
                this.progress = 0;
            }
            if (this.progress > 100) {
                this.progress = 100;
            }
            if (this.progress > this.old_progress) {
                this.handler.setProgress(this.progress);
                this.old_progress = this.progress;
            }
            this.nt_count = 0;
            this.t_count = 0;
            this.nt_nodes.clear();
            this.t_nodes.clear();
            this.edges.clear();
            this.parent_stack.clear();
            this.nt_count++;
            String num = new Integer(this.nt_count).toString();
            if (num.length() == 1) {
                num = new StringBuffer("0").append(num).toString();
            }
            String stringBuffer = new StringBuffer(String.valueOf(this.sentence_id)).append("_nt").append(num).toString();
            this.root = stringBuffer;
            this.nt_features_node[0] = "S";
            saveNTNode(stringBuffer);
            push(stringBuffer);
            return;
        }
        if (str2.equals(TOKEN)) {
            this.t_count++;
            String num2 = new Integer(this.t_count).toString();
            if (num2.length() == 1) {
                num2 = new StringBuffer("0").append(num2).toString();
            }
            this.token_id = new StringBuffer(String.valueOf(this.sentence_id)).append("_t").append(num2).toString();
            this.token_xml = new StringBuffer("<t id=\"").append(this.token_id).append(XMLConstants.XML_DOUBLE_QUOTE).toString();
            put_Parent2ID_Edge(this.token_id);
            for (int i = 0; i < this.t_features_node.length; i++) {
                this.t_features_node[i] = attributes.getValue(this.t_features_list[i]);
            }
            this.t_feature_number = 0;
            return;
        }
        this.nt_count++;
        String num3 = new Integer(this.nt_count).toString();
        if (num3.length() == 1) {
            num3 = new StringBuffer("0").append(num3).toString();
        }
        String stringBuffer2 = new StringBuffer(String.valueOf(this.sentence_id)).append("_nt").append(num3).toString();
        for (int i2 = 0; i2 < this.nt_features_list.length; i2++) {
            this.nt_features_node[i2] = null;
        }
        this.nt_features_node[0] = attributes.getValue(CAT);
        saveNTNode(stringBuffer2);
        put_Parent2ID_Edge(stringBuffer2);
        push(stringBuffer2);
    }

    protected void push(String str) {
        this.parent_stack.add(str);
    }

    protected String pop() {
        String str = (String) this.parent_stack.get(this.parent_stack.size() - 1);
        this.parent_stack.remove(this.parent_stack.size() - 1);
        return str;
    }

    protected void put_Parent2ID_Edge(String str) {
        if (this.parent_stack.isEmpty()) {
            return;
        }
        String str2 = (String) this.parent_stack.get(this.parent_stack.size() - 1);
        if (this.edges.containsKey(str2)) {
            ((List) this.edges.get(str2)).add(str);
            return;
        }
        LinkedList linkedList = new LinkedList();
        linkedList.add(str);
        this.edges.put(str2, linkedList);
    }

    /* JADX WARN: Unreachable blocks removed: 1, instructions: 1 */
    @Override // org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        if (str2.equals(ROOT_ELEMENT)) {
            return;
        }
        if (str2.equals(TOKEN)) {
            for (int i = 0; i < this.t_features_list.length; i++) {
                String str4 = this.t_features_list[i];
                String str5 = this.t_features_node[i];
                if (str5 != null) {
                    if (this.collect) {
                        addFeatureValue(str4, str5);
                    }
                } else if (this.collect) {
                    addFeatureValue(str4, Constants.UNDEF);
                }
                String trimContent = UtilitiesCollection.trimContent(str4);
                if (str5 != null) {
                    this.token_xml = new StringBuffer(String.valueOf(this.token_xml)).append(" ").append(trimContent).append(XMLConstants.XML_EQUAL_QUOT).append(UtilitiesCollection.trimContent(str5)).append(XMLConstants.XML_DOUBLE_QUOTE).toString();
                } else {
                    this.token_xml = new StringBuffer(String.valueOf(this.token_xml)).append(" ").append(trimContent).append("=\"--\"").toString();
                }
            }
            this.t_nodes.put(this.token_id, this.token_xml);
            return;
        }
        if (!str2.equals(SENTENCE)) {
            pop();
            return;
        }
        try {
            this.g.write(new StringBuffer("<s id=\"").append(this.sentence_id).append("\">\n").toString());
            this.g.write(new StringBuffer("<graph root=\"").append(this.root).append("\">\n").toString());
            this.g.write(" <terminals>\n");
            Object[] array = this.t_nodes.keySet().toArray();
            Arrays.sort(array);
            for (Object obj : array) {
                this.g.write(new StringBuffer("  ").append(this.t_nodes.get((String) obj)).append(" />\n").toString());
            }
            this.g.write(" </terminals>\n");
            this.g.write(" <nonterminals>\n");
            Object[] array2 = this.nt_nodes.keySet().toArray();
            Arrays.sort(array2);
            for (Object obj2 : array2) {
                String str6 = (String) obj2;
                this.g.write(new StringBuffer("  ").append(this.nt_nodes.get(str6)).toString());
                if (this.edges.containsKey(str6)) {
                    this.g.write(">\n");
                    List list = (List) this.edges.get(str6);
                    for (int i2 = 0; i2 < list.size(); i2++) {
                        this.g.write(new StringBuffer("    <edge idref=\"").append((String) list.get(i2)).append("\" />\n").toString());
                    }
                    this.g.write("  </nt>\n");
                } else {
                    this.handler.addWarning(new StringBuffer("Warning: Inner node ").append(str6).append(" doesn't have any children.").toString());
                    this.g.write(" />\n");
                }
            }
            this.g.write(" </nonterminals>\n");
            this.g.write("</graph>\n");
            this.g.write("</s>\n\n");
            if (this.maximum > 0 && this.sentence_count >= this.maximum) {
                endDocument();
                throw new SAXException("Enough");
            }
            if (this.handler.isAborted()) {
                throw new SAXException("Stopped");
            }
        } catch (IOException e) {
            throw new SAXException(e.getMessage());
        }
    }

    @Override // org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) throws SAXException {
        if (this.handling_characters) {
            this.lastvalue.append(new String(cArr, i, i2));
        }
    }

    @Override // org.xml.sax.ContentHandler
    public void ignorableWhitespace(char[] cArr, int i, int i2) throws SAXException {
        if (this.handling_characters) {
            this.lastvalue.append(new String(cArr, i, i2));
        }
    }

    @Override // org.xml.sax.ContentHandler
    public void skippedEntity(String str) throws SAXException {
    }

    @Override // org.xml.sax.ErrorHandler
    public void warning(SAXParseException sAXParseException) throws SAXException {
        this.handler.addWarning(new StringBuffer("Warning: Line: ").append(sAXParseException.getLineNumber()).append(" Column: ").append(sAXParseException.getColumnNumber()).append(" ").append(sAXParseException.getMessage()).toString());
    }

    @Override // org.xml.sax.ErrorHandler
    public void error(SAXParseException sAXParseException) throws SAXException {
        new StringBuffer("Error: Line: ").append(sAXParseException.getLineNumber()).append(" Column: ").append(sAXParseException.getColumnNumber()).append(" ").append(sAXParseException.getMessage()).toString();
        throw new SAXException(sAXParseException);
    }

    @Override // org.xml.sax.ErrorHandler
    public void fatalError(SAXParseException sAXParseException) throws SAXException {
        throw new SAXException(new StringBuffer("Error (fatal): Line: ").append(sAXParseException.getLineNumber()).append(" Column: ").append(sAXParseException.getColumnNumber()).append(" ").append(sAXParseException.getMessage()).toString());
    }

    @Override // ims.tiger.importfilter.ImportFilter
    public void startConversion() throws ImportFilterException {
        InputSource inputSource;
        try {
            XMLReader createXMLReader = XMLReaderFactory.createXMLReader(Constants.SAXREADER);
            createXMLReader.setFeature("http://xml.org/sax/features/validation", false);
            createXMLReader.setFeature("http://xml.org/sax/features/namespaces", true);
            createXMLReader.setContentHandler(this);
            createXMLReader.setErrorHandler(this);
            int i = 1;
            if (this.source.endsWith(".gz")) {
                inputSource = new InputSource(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(this.source)), this.source_encoding)));
                i = 8;
            } else if (this.source.endsWith(".zip")) {
                ZipInputStream zipInputStream = new ZipInputStream(new FileInputStream(this.source));
                zipInputStream.getNextEntry();
                inputSource = new InputSource(new BufferedReader(new InputStreamReader(zipInputStream, this.source_encoding)));
                i = 8;
            } else {
                inputSource = new InputSource(this.source);
            }
            if (this.guessed == 0) {
                File file = new File(this.source);
                if (!file.exists()) {
                    throw new ImportFilterException("Input file does not exist.");
                }
                this.guessed = (int) ((file.length() * i) / 2500);
                if (this.guessed == 0) {
                    this.guessed = 10;
                }
            }
            if (this.compress) {
                this.g = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(this.target))));
            } else {
                this.g = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(this.target)));
            }
            createXMLReader.parse(inputSource);
            this.g.close();
            this.handler.destroy();
        } catch (IOException e) {
            throw new ImportFilterException(new StringBuffer("IO:").append(e.getMessage()).toString());
        } catch (SAXException e2) {
            String message = e2.getMessage();
            if (message == null || !message.equals("Enough")) {
                if (message != null && message.equals("Stopped")) {
                    throw new ImportFilterException("Stopped", true);
                }
                throw new ImportFilterException(new StringBuffer("SAX:").append(e2.getMessage()).append("\n").append(new StringBuffer("Line: ").append(this.locator.getLineNumber()).append(" Column: ").append(this.locator.getColumnNumber()).toString()).toString());
            }
            try {
                this.g.close();
                this.handler.destroy();
            } catch (IOException e3) {
                throw new ImportFilterException(new StringBuffer("IO:").append(e3.getMessage()).toString());
            }
        }
    }

    protected void printDocumentHeader() throws IOException {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n\n");
        stringBuffer.append("<corpus xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n");
        stringBuffer.append(new StringBuffer("        xsi:noNamespaceSchemaLocation=\"").append(this.schema).append("\"\n").toString());
        stringBuffer.append(new StringBuffer("        id=\"").append(this.corpus_id).append("\">\n\n").toString());
        this.g.write(stringBuffer.toString());
    }

    protected void addFeatureValue(String str, String str2) {
        if (!this.feature_values.containsKey(str)) {
            HashSet hashSet = new HashSet();
            hashSet.add(str2);
            this.feature_values.put(str, hashSet);
        } else {
            HashSet hashSet2 = (HashSet) this.feature_values.get(str);
            if (hashSet2.size() > 250) {
                return;
            }
            hashSet2.add(str2);
        }
    }

    protected void saveExternalHeaderFile() throws IOException {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n\n");
        stringBuffer.append("<head>\n\n");
        stringBuffer.append("  <meta>\n");
        stringBuffer.append("    <format>TreeTagger format</format>\n");
        stringBuffer.append("  </meta>\n\n");
        stringBuffer.append("  <annotation>\n\n");
        for (int i = 0; i < this.nt_features_list.length; i++) {
            String str = this.nt_features_list[i];
            stringBuffer.append(new StringBuffer("    <feature name=\"").append(str).append("\" domain=\"NT\">\n").toString());
            if (this.feature_values.containsKey(str)) {
                Set set = (Set) this.feature_values.get(str);
                if (set.size() <= 250) {
                    Object[] array = set.toArray();
                    Arrays.sort(array);
                    for (Object obj : array) {
                        stringBuffer.append(new StringBuffer("      <value name=\"").append(UtilitiesCollection.trimContent((String) obj)).append("\" />\n").toString());
                    }
                }
            }
            stringBuffer.append("    </feature>\n\n");
        }
        for (int i2 = 0; i2 < this.t_features_list.length; i2++) {
            String str2 = this.t_features_list[i2];
            stringBuffer.append(new StringBuffer("    <feature name=\"").append(str2).append("\" domain=\"T\">\n").toString());
            if (this.feature_values.containsKey(str2)) {
                Set set2 = (Set) this.feature_values.get(str2);
                if (set2.size() <= 250) {
                    Object[] array2 = set2.toArray();
                    Arrays.sort(array2);
                    for (Object obj2 : array2) {
                        stringBuffer.append(new StringBuffer("      <value name=\"").append(UtilitiesCollection.trimContent((String) obj2)).append("\" />\n").toString());
                    }
                }
            }
            stringBuffer.append("    </feature>\n\n");
        }
        stringBuffer.append("  </annotation>\n\n");
        stringBuffer.append("</head>\n\n");
        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(this.collect_file));
        bufferedWriter.write(stringBuffer.toString());
        bufferedWriter.close();
    }

    protected void printDocumentFooter() throws IOException {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("</body>\n\n");
        stringBuffer.append("</corpus>\n\n");
        this.g.write(stringBuffer.toString());
    }

    @Override // ims.tiger.importfilter.ImportFilter
    public int getNumberOfSentences() {
        return this.sentence_count;
    }

    @Override // ims.tiger.importfilter.ImportFilter
    public boolean isExternalHeaderGenerated() {
        return this.collect;
    }

    @Override // ims.tiger.importfilter.ImportFilter
    public String getExternalHeaderPath() {
        return this.collect_file;
    }

    public static void main(String[] strArr) {
        BasicConfigurator.configure();
        TreeTaggerFilter treeTaggerFilter = new TreeTaggerFilter();
        treeTaggerFilter.setSourceFilename("/projekte/TIGER/java/test/work/helmut.xml");
        treeTaggerFilter.setXMLTargetFilename("/projekte/TIGER/java/test/work/output.xml");
        treeTaggerFilter.setXMLTargetID("TEST");
        treeTaggerFilter.setSchemaFilename("file:/projekte/TIGER/java/deliverable/local/schema/TigerXML.xsd");
        treeTaggerFilter.setCompression(false);
        treeTaggerFilter.setImportFilterHandler(new TestImportFilterHandler());
        try {
            treeTaggerFilter.startConversion();
        } catch (ImportFilterException e) {
            if (e.isStopped()) {
                System.out.println("STOP");
            } else {
                e.printStackTrace();
            }
        }
    }
}
