package com.xiaomi.ai.nlp.lm.core;

import com.google.gson.JsonObject;
import com.xiaomi.ai.nlp.lm.data.DoubleArrayTrie;
import com.xiaomi.ai.nlp.lm.data.NgramCorpusData;
import com.xiaomi.ai.nlp.lm.data.NgramInfo;
import com.xiaomi.ai.nlp.lm.smooth.BaseSmooth;
import com.xiaomi.ai.nlp.lm.smooth.KatzBackoff;
import com.xiaomi.ai.nlp.lm.util.Constant;
import com.xiaomi.ai.nlp.lm.util.NgramHelper;
import com.xiaomi.ai.nlp.lm.util.Pair;
import com.xiaomi.ai.nlp.lm.util.StringUtils;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/* loaded from: classes17.dex */
public class LanguageModel {
    private static final float DEFAULT_SCORE = -40.0f;
    private static final String readmeUrl = "http://v9.git.n.xiaomi.com/ai-service/ai-common-libs/tree/master/nlp-ai/nlp-parser/docs/lm";
    private Map<String, List<String>> expandSlots;
    private List<NgramInfo> index2Info;
    private DoubleArrayTrie ngram2Index;
    private int order;
    private BaseSmooth smooth;

    /* loaded from: classes17.dex */
    public enum SourceType {
        LABEL_SYSTEM,
        THIRD_CORPUS
    }

    public LanguageModel() {
        this.ngram2Index = new DoubleArrayTrie();
        this.index2Info = new ArrayList();
        this.expandSlots = new HashMap();
    }

    public LanguageModel(InputStream inputStream) {
        this.ngram2Index = new DoubleArrayTrie();
        this.index2Info = new ArrayList();
        this.expandSlots = new HashMap();
        readNgram(inputStream, this.index2Info);
        loadModel(this.index2Info);
    }

    public LanguageModel(InputStream inputStream, InputStream inputStream2) {
        this.ngram2Index = new DoubleArrayTrie();
        this.index2Info = new ArrayList();
        this.expandSlots = new HashMap();
        this.ngram2Index.open(inputStream);
        inputStream.close();
        DataInputStream dataInputStream = new DataInputStream(new BufferedInputStream(inputStream2, 16384));
        int available = dataInputStream.available() / 8;
        for (int i = 0; i < available; i++) {
            float readFloat = dataInputStream.readFloat();
            this.index2Info.add(new NgramInfo("", Pair.of(Float.valueOf(readFloat), Float.valueOf(readFloat)), dataInputStream.readFloat()));
        }
        dataInputStream.close();
        inputStream2.close();
    }

    public LanguageModel(InputStream inputStream, Map<String, List<String>> map) {
        this.ngram2Index = new DoubleArrayTrie();
        this.index2Info = new ArrayList();
        this.expandSlots = new HashMap();
        this.expandSlots = map;
        readNgram(inputStream, this.index2Info);
        loadModel(this.index2Info);
    }

    public LanguageModel(InputStream inputStream, Map<String, List<String>> map, int i) {
        this.ngram2Index = new DoubleArrayTrie();
        this.index2Info = new ArrayList();
        this.expandSlots = new HashMap();
        this.smooth = new KatzBackoff(i);
        this.expandSlots = map;
        loadArpaModel(inputStream);
    }

    public LanguageModel(String str) {
        this(new FileInputStream(new File(str)));
    }

    private void loadArpaModel(InputStream inputStream) {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return;
            }
            if (readLine.contains("\t")) {
                String[] split = readLine.split("\\t");
                if (split.length < 2) {
                    throw new IOException("invalid language model format.");
                }
                double parseDouble = Double.parseDouble(split[0]);
                List<String> asList = Arrays.asList(split[1].split(Constant.BLANK));
                double parseDouble2 = split.length == 3 ? Double.parseDouble(split[2]) : 0.0d;
                insert(asList, parseDouble, parseDouble2);
                for (Map.Entry<String, List<String>> entry : this.expandSlots.entrySet()) {
                    StringBuilder sb = new StringBuilder();
                    String str = "<any>/";
                    sb.append("<any>/");
                    sb.append(entry.getKey());
                    if (asList.contains(sb.toString())) {
                        for (String str2 : entry.getValue()) {
                            asList = Arrays.asList(split[1].trim().replace(str + entry.getKey(), str + str2).split(Constant.BLANK));
                            insert(asList, parseDouble, parseDouble2);
                            str = str;
                        }
                    }
                }
            }
        }
    }

    private void loadModel(List<NgramInfo> list) {
        ArrayList arrayList = new ArrayList();
        Iterator<NgramInfo> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().getNgram());
        }
        DoubleArrayTrie doubleArrayTrie = new DoubleArrayTrie();
        if (doubleArrayTrie.build(arrayList) != 0) {
            throw new RuntimeException("language model loaded to double array trie failed!");
        }
        this.ngram2Index = doubleArrayTrie;
        this.index2Info = list;
    }

    private void readNgram(InputStream inputStream, List<NgramInfo> list) {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                Collections.sort(list);
                return;
            }
            if (readLine.contains("\t")) {
                String[] split = readLine.split("\\t");
                if (split.length < 2) {
                    throw new IOException("language model file format error: " + readLine);
                }
                float parseFloat = Float.parseFloat(split[0]);
                String str = split[1];
                float parseFloat2 = split.length == 3 ? Float.parseFloat(split[2]) : 0.0f;
                list.add(new NgramInfo(str.trim(), Pair.of(Float.valueOf(parseFloat), Float.valueOf(parseFloat)), parseFloat2));
                for (Map.Entry<String, List<String>> entry : this.expandSlots.entrySet()) {
                    if (str.contains("<any>/" + entry.getKey())) {
                        for (String str2 : entry.getValue()) {
                            list.add(new NgramInfo(str.trim().replace("<any>/" + entry.getKey(), "<any>/" + str2), Pair.of(Float.valueOf(parseFloat), Float.valueOf(parseFloat)), parseFloat2));
                        }
                    }
                }
            }
        }
    }

    private void save(NgramCorpusData ngramCorpusData, String str) {
        if (StringUtils.isBlank(str)) {
            return;
        }
        FileWriter fileWriter = null;
        try {
            FileWriter fileWriter2 = new FileWriter(str);
            try {
                fileWriter2.write(ngramCorpusData.toString());
                fileWriter2.close();
            } catch (Throwable th) {
                th = th;
                fileWriter = fileWriter2;
                if (fileWriter != null) {
                    fileWriter.close();
                }
                throw th;
            }
        } catch (Throwable th2) {
            th = th2;
        }
    }

    public JsonObject bigramLogProb(String str, String str2) {
        JsonObject jsonObject = new JsonObject();
        int exactMatchSearch = this.ngram2Index.exactMatchSearch(str2 + Constant.BLANK + str);
        if (exactMatchSearch != -1) {
            jsonObject.addProperty("type", "bigram");
            jsonObject.addProperty("score", this.index2Info.get(exactMatchSearch).getLogProb().getLeft());
            return jsonObject;
        }
        int exactMatchSearch2 = this.ngram2Index.exactMatchSearch(str);
        if (exactMatchSearch2 == -1) {
            exactMatchSearch2 = this.ngram2Index.exactMatchSearch("<unk>");
        }
        int exactMatchSearch3 = this.ngram2Index.exactMatchSearch(str2);
        if (exactMatchSearch3 != -1) {
            jsonObject.addProperty("type", "prBow + unigram");
            jsonObject.addProperty("score", Float.valueOf(this.index2Info.get(exactMatchSearch2).getLogProb().getRight().floatValue() + this.index2Info.get(exactMatchSearch3).getLogBow()));
            return jsonObject;
        }
        if (exactMatchSearch2 != -1) {
            jsonObject.addProperty("type", "unigram");
            jsonObject.addProperty("score", this.index2Info.get(exactMatchSearch2).getLogProb().getRight());
            return jsonObject;
        }
        jsonObject.addProperty("type", "unigram");
        jsonObject.addProperty("score", Float.valueOf(DEFAULT_SCORE));
        return jsonObject;
    }

    public boolean containsBigram(String str, String str2) {
        StringBuilder sb = new StringBuilder();
        sb.append(str2);
        sb.append(Constant.BLANK);
        sb.append(str);
        return this.ngram2Index.exactMatchSearch(sb.toString()) != -1;
    }

    public boolean containsBigram2(String str, String str2) {
        if (this.smooth == null) {
            return containsBigram(str, str2);
        }
        ArrayList arrayList = new ArrayList();
        arrayList.add(str2);
        arrayList.add(str);
        JsonObject ngramProb = this.smooth.getNgramProb(arrayList);
        return ngramProb.get("type").getAsString().equals("ngram") && ngramProb.get("score").getAsDouble() != -1000.0d;
    }

    public boolean containsUnigram(String str) {
        return this.ngram2Index.exactMatchSearch(str) != -1;
    }

    public JsonObject getNgramProb(List<String> list) {
        return this.smooth.getNgramProb(list);
    }

    public int getOrder() {
        return this.order;
    }

    public BaseSmooth getSmooth() {
        return this.smooth;
    }

    public void insert(List<String> list, double d, double d2) {
        this.smooth.insert(list, d, d2);
    }

    public NgramCorpusData loadCorpusData(List<List<String>> list) {
        ArrayList arrayList = new ArrayList(this.order + 1);
        for (int i = 0; i <= this.order; i++) {
            arrayList.add(new HashMap());
        }
        HashSet hashSet = new HashSet();
        int i2 = 0;
        for (List<String> list2 : list) {
            ArrayList arrayList2 = new ArrayList();
            arrayList2.add("<s>");
            for (String str : list2) {
                if (!StringUtils.isBlank(str)) {
                    if (str.contains("/")) {
                        String[] split = str.split("/");
                        if (str.endsWith("/eps") || str.startsWith("<any>/")) {
                            if (split.length != 2) {
                                throw new IllegalArgumentException("format of corpus data should be token/eps or <any>/slot, format error: " + str + " ref: " + readmeUrl);
                            }
                            arrayList2.add(str);
                        } else if (str.startsWith("eps/")) {
                            if (split.length != 2) {
                                throw new IllegalArgumentException("eps token should only contain one '/', format error: " + str + " ref: " + readmeUrl);
                            }
                            arrayList2.add(String.format("%s/%s", split[1], split[0]));
                        } else {
                            if (split.length != 3) {
                                throw new IllegalArgumentException("slot token should only contain two '/', format error: " + str + " ref: " + readmeUrl);
                            }
                            arrayList2.add(String.format("<any>/%s", split[0]));
                        }
                    } else {
                        arrayList2.add(str);
                    }
                }
            }
            arrayList2.add("</s>");
            if (arrayList2.size() != 2) {
                hashSet.addAll(arrayList2);
                i2 += arrayList2.size() - 1;
                for (int i3 = 1; i3 <= this.order; i3++) {
                    for (String str2 : NgramHelper.extract(arrayList2, i3)) {
                        if (((Map) arrayList.get(i3)).containsKey(str2)) {
                            NgramCorpusData.NgramInfo ngramInfo = (NgramCorpusData.NgramInfo) ((Map) arrayList.get(i3)).get(str2);
                            ngramInfo.setCount(ngramInfo.getCount() + 1);
                        } else {
                            NgramCorpusData.NgramInfo ngramInfo2 = new NgramCorpusData.NgramInfo();
                            ngramInfo2.setCount(1);
                            ((Map) arrayList.get(i3)).put(str2, ngramInfo2);
                        }
                    }
                }
            }
        }
        NgramCorpusData.NgramInfo ngramInfo3 = new NgramCorpusData.NgramInfo();
        ngramInfo3.setCount(1);
        ((Map) arrayList.get(1)).put("<unk>", ngramInfo3);
        return new NgramCorpusData(arrayList, i2 + 1, hashSet.size(), list.size());
    }

    public float logBowProb(String str) {
        int exactMatchSearch = this.ngram2Index.exactMatchSearch(str);
        if (exactMatchSearch == -1) {
            return 0.0f;
        }
        return this.index2Info.get(exactMatchSearch).getLogBow();
    }

    public float logUnigramProb(String str) {
        int exactMatchSearch = this.ngram2Index.exactMatchSearch(str);
        if (exactMatchSearch == -1) {
            exactMatchSearch = this.ngram2Index.exactMatchSearch("<unk>");
        }
        return exactMatchSearch != -1 ? this.index2Info.get(exactMatchSearch).getLogProb().getRight().floatValue() : DEFAULT_SCORE;
    }

    public void setOrder(int i) {
        this.order = i;
    }

    public void setSmooth(BaseSmooth baseSmooth) {
        this.smooth = baseSmooth;
    }

    public void train(List<List<String>> list, Map<String, List<String>> map, SmoothType smoothType, int i, String str) {
        if (i > 9) {
            throw new IllegalArgumentException("order should be less than 9");
        }
        this.order = i;
        NgramCorpusData loadCorpusData = loadCorpusData(list);
        KatzBackoff katzBackoff = new KatzBackoff(i);
        this.smooth = katzBackoff;
        katzBackoff.estimate(loadCorpusData);
        this.smooth.createBackoffTrie(loadCorpusData, map);
        save(loadCorpusData, str);
    }

    @Deprecated
    public String trainToArpaText(List<List<String>> list, Map<String, List<String>> map, SmoothType smoothType, int i) {
        if (i > 9) {
            throw new IllegalArgumentException("order should be less than 9");
        }
        this.order = i;
        NgramCorpusData loadCorpusData = loadCorpusData(list);
        KatzBackoff katzBackoff = new KatzBackoff(i);
        this.smooth = katzBackoff;
        katzBackoff.estimate(loadCorpusData);
        this.smooth.createBackoffTrie(loadCorpusData, map);
        return loadCorpusData.toString();
    }
}
