package edu.stanford.nlp.trees.international.pennchinese;

import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.StringUtils;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Serializable;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: classes.dex */
public class ChineseEnglishWordMap implements Serializable {
    private static final boolean DEBUG = false;
    private static final String defaultCharset = "UTF-8";
    private static final String defaultDelimiter = "[/;]";
    private static final String defaultPattern = "[^ ]+ ([^ ]+)[^/]+/(.+)/";
    private static final String[] punctuations = {"（.*?）", "\\(.*?\\)", "<.*?>", "[″⃝○◯‹〈⟨›〉⟩«⟪»⟫⌈⌋⟦⟧〰～“‶”″⌇〜〒⧄《》\u3000]", "^to "};
    private static final long serialVersionUID = 7655332268578049993L;
    private Map<String, Set<String>> map;
    private boolean normalized;

    /* loaded from: classes.dex */
    private static class SingletonHolder {
        private static final ChineseEnglishWordMap INSTANCE = new ChineseEnglishWordMap();

        private SingletonHolder() {
        }
    }

    public ChineseEnglishWordMap() {
        this.map = Generics.newHashMap(10000);
        this.normalized = false;
        readCEDict(CEDict.path());
    }

    public ChineseEnglishWordMap(String str) {
        this.map = Generics.newHashMap(10000);
        this.normalized = false;
        readCEDict(str);
    }

    public ChineseEnglishWordMap(String str, String str2, String str3, String str4) {
        this.map = Generics.newHashMap(10000);
        this.normalized = false;
        readCEDict(str, str2, str3, str4);
    }

    public ChineseEnglishWordMap(String str, String str2, String str3, String str4, boolean z) {
        this.map = Generics.newHashMap(10000);
        this.normalized = false;
        this.normalized = z;
        readCEDict(str, str2, str3, str4);
    }

    public ChineseEnglishWordMap(String str, boolean z) {
        this.map = Generics.newHashMap(10000);
        this.normalized = false;
        this.normalized = z;
        readCEDict(str);
    }

    public static ChineseEnglishWordMap getInstance() {
        return SingletonHolder.INSTANCE;
    }

    private static boolean isDigits(String str) {
        int length = str.length();
        for (int i = 0; i < length; i++) {
            if (!Character.isDigit(str.charAt(i))) {
                return false;
            }
        }
        return true;
    }

    public static void main(String[] strArr) throws IOException {
        Map newHashMap = Generics.newHashMap();
        newHashMap.put("-dictPath", 1);
        newHashMap.put("-encoding", 1);
        Map<String, String[]> argsToMap = StringUtils.argsToMap(strArr, newHashMap);
        String[] strArr2 = argsToMap.get(null);
        if (strArr2.length < 1) {
            System.err.println("usage: ChineseEnglishWordMap [-all] [-dictPath path] [-encoding enc_string] inputFile");
            System.exit(1);
        }
        String str = strArr2[0];
        boolean containsKey = argsToMap.containsKey("-all");
        String str2 = argsToMap.containsKey("-encoding") ? argsToMap.get("-encoding")[0] : "UTF-8";
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), str2));
        ChineseTreebankLanguagePack chineseTreebankLanguagePack = new ChineseTreebankLanguagePack();
        String[] strArr3 = argsToMap.get("-dictPath");
        ChineseEnglishWordMap chineseEnglishWordMap = strArr3 == null ? new ChineseEnglishWordMap() : new ChineseEnglishWordMap(strArr3[0]);
        int i = 0;
        int i2 = 0;
        PrintWriter printWriter = new PrintWriter((Writer) new OutputStreamWriter(System.out, str2), true);
        for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
            for (String str3 : readLine.split("\\s", 1000)) {
                i++;
                if (str3.length() != 0) {
                    printWriter.print(StringUtils.pad(str3 + ':', 8));
                    if (chineseTreebankLanguagePack.isPunctuationWord(str3)) {
                        i--;
                        printWriter.print(str3);
                    } else if (isDigits(str3)) {
                        printWriter.print(str3 + " [NUMBER]");
                    } else if (chineseEnglishWordMap.containsKey(str3)) {
                        i2++;
                        if (containsKey) {
                            ArrayList<String> arrayList = new ArrayList(chineseEnglishWordMap.getAllTranslations(str3));
                            for (String str4 : arrayList) {
                                printWriter.print((arrayList.indexOf(str4) > 0 ? "|" : "") + str4);
                            }
                        } else {
                            printWriter.print(chineseEnglishWordMap.getFirstTranslation(str3));
                        }
                    } else {
                        printWriter.print("[UNK]");
                    }
                    printWriter.println();
                }
            }
            printWriter.println();
        }
        bufferedReader.close();
        System.err.print("Finished translating " + i + " words (");
        System.err.println(i2 + " were in dictionary).");
    }

    private String normalize(String str) {
        if (!this.normalized) {
            return str;
        }
        for (String str2 : punctuations) {
            str = str.replaceAll(str2, "");
        }
        return str.trim();
    }

    private Set<String> normalize(Set<String> set) {
        if (!this.normalized) {
            return set;
        }
        Set<String> newHashSet = Generics.newHashSet();
        Iterator<String> it = set.iterator();
        while (it.hasNext()) {
            String normalize = normalize(it.next());
            if (!normalize.equals("")) {
                newHashSet.add(normalize);
            }
        }
        return newHashSet;
    }

    public int addMap(Map<String, Set<String>> map) {
        int i = 0;
        for (Map.Entry<String, Set<String>> entry : map.entrySet()) {
            String key = entry.getKey();
            Set<String> value = entry.getValue();
            Set<String> set = this.map.get(key);
            if (set == null) {
                this.map.put(key, new LinkedHashSet(value));
                Set<String> set2 = this.map.get(key);
                if (set2 != null && set2.size() != 0) {
                    i += value.size();
                }
            } else {
                for (String str : value) {
                    if (!set.contains(str)) {
                        set.add(str);
                        i++;
                    }
                }
            }
        }
        return i;
    }

    public boolean containsKey(String str) {
        return this.map.containsKey(str.toLowerCase().trim());
    }

    public Set<String> getAllTranslations(String str) {
        return this.map.get(str.toLowerCase().trim());
    }

    public String getFirstTranslation(String str) {
        Set<String> set = this.map.get(str.toLowerCase().trim());
        if (set == null) {
            return null;
        }
        return set.iterator().next();
    }

    public Map<String, Set<String>> getReverseMap() {
        Set<Map.Entry<String, Set<String>>> entrySet = this.map.entrySet();
        Map<String, Set<String>> newHashMap = Generics.newHashMap(entrySet.size());
        for (Map.Entry<String, Set<String>> entry : entrySet) {
            String key = entry.getKey();
            for (String str : entry.getValue()) {
                Set<String> set = newHashMap.get(str);
                if (set == null) {
                    LinkedHashSet linkedHashSet = new LinkedHashSet(6);
                    linkedHashSet.add(key);
                    newHashMap.put(str, linkedHashSet);
                } else {
                    set.add(key);
                }
            }
        }
        return newHashMap;
    }

    public void readCEDict(String str) {
        readCEDict(str, defaultPattern, defaultDelimiter, "UTF-8");
    }

    public void readCEDict(String str, String str2, String str3, String str4) {
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), str4));
            Pattern compile = Pattern.compile(str2);
            for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                Matcher matcher = compile.matcher(readLine);
                if (matcher.matches()) {
                    String trim = matcher.group(1).toLowerCase().trim();
                    String[] split = matcher.group(2).split(str3);
                    if (this.map.containsKey(trim)) {
                        Set<String> set = this.map.get(trim);
                        for (String str5 : split) {
                            String normalize = normalize(str5);
                            if (!normalize.equals("") && !set.contains(normalize)) {
                                set.add(normalize);
                            }
                        }
                    } else {
                        LinkedHashSet linkedHashSet = new LinkedHashSet(Arrays.asList(split));
                        String normalize2 = normalize(trim);
                        Set<String> normalize3 = normalize(linkedHashSet);
                        if (!normalize2.equals("") && normalize3.size() > 0) {
                            this.map.put(normalize2, normalize3);
                        }
                    }
                }
            }
            bufferedReader.close();
        } catch (IOException e) {
            throw new RuntimeException("IOException reading CEDict from file " + str, e);
        }
    }

    public int size() {
        return this.map.size();
    }

    public String toString() {
        return this.map.toString();
    }
}
