package com.mayabot.nlp.fasttext.dictionary;

import com.mayabot.nlp.fasttext.args.Args;
import com.mayabot.nlp.fasttext.train.SampleLine;
import com.mayabot.nlp.fasttext.utils.LogUtilsKt;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import kotlin.Metadata;
import kotlin.Unit;
import kotlin.io.CloseableKt;
import kotlin.io.TextStreamsKt;
import kotlin.jvm.functions.Function0;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.Ref;
import kotlin.sequences.Sequence;
import kotlin.sequences.SequencesKt;
import kotlin.text.Charsets;
import kotlin.text.StringsKt;

/* compiled from: BuildDictFromSource.kt */
@Metadata(bv = {1, 0, 3}, d1 = {"\u0000$\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010 \n\u0002\u0010\u001c\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\b\n\u0002\b\u0002\u001a8\u0010\u0000\u001a\u00020\u00012\u0006\u0010\u0002\u001a\u00020\u00032\u0012\u0010\u0004\u001a\u000e\u0012\n\u0012\b\u0012\u0004\u0012\u00020\u00070\u00060\u00052\b\b\u0002\u0010\b\u001a\u00020\t2\b\b\u0002\u0010\n\u001a\u00020\tH\u0007¨\u0006\u000b"}, d2 = {"buildFromFile", "Lcom/mayabot/nlp/fasttext/dictionary/Dictionary;", "args", "Lcom/mayabot/nlp/fasttext/args/Args;", "sources", "", "", "Lcom/mayabot/nlp/fasttext/train/SampleLine;", "maxVocabSize", "", "initWordListSize", "mynlp"}, k = 2, mv = {1, 4, 1})
/* loaded from: classes.dex */
public final class BuildDictFromSourceKt {
    public static final Dictionary buildFromFile(Args args, List<? extends Iterable<SampleLine>> sources, int i, int i2) throws Exception {
        int indexOf$default;
        Intrinsics.checkNotNullParameter(args, "args");
        Intrinsics.checkNotNullParameter(sources, "sources");
        final DictionaryBuilder dictionaryBuilder = new DictionaryBuilder(args.getLabel(), i, Integer.valueOf(i2));
        final double d = i * 0.75d;
        final Ref.LongRef longRef = new Ref.LongRef();
        longRef.element = 1L;
        LogUtilsKt.loggerln("Read file build dictionary ...");
        final Ref.IntRef intRef = new Ref.IntRef();
        intRef.element = 0;
        Function0<Unit> function0 = new Function0<Unit>() { // from class: com.mayabot.nlp.fasttext.dictionary.BuildDictFromSourceKt$buildFromFile$1
            /* JADX INFO: Access modifiers changed from: package-private */
            /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
            {
                super(0);
            }

            @Override // kotlin.jvm.functions.Function0
            public /* bridge */ /* synthetic */ Unit invoke() {
                invoke2();
                return Unit.INSTANCE;
            }

            /* renamed from: invoke, reason: avoid collision after fix types in other method */
            public final void invoke2() {
                long j = Ref.LongRef.this.element;
                intRef.element++;
                while (dictionaryBuilder.getSize() > d * 0.75f) {
                    int size = dictionaryBuilder.getSize();
                    dictionaryBuilder.threshold(j, j);
                    Ref.LongRef.this.element = j;
                    LogUtilsKt.loggerln("word size from " + size + " to " + dictionaryBuilder.getSize() + " , threshold min " + j);
                    j++;
                }
            }
        };
        if (args.getKeepPreTrainedVector() && args.getPreTrainedVectors() != null) {
            File preTrainedVectors = args.getPreTrainedVectors();
            Intrinsics.checkNotNull(preTrainedVectors);
            Reader inputStreamReader = new InputStreamReader(new FileInputStream(preTrainedVectors), Charsets.UTF_8);
            BufferedReader bufferedReader = inputStreamReader instanceof BufferedReader ? (BufferedReader) inputStreamReader : new BufferedReader(inputStreamReader, 8192);
            Throwable th = (Throwable) null;
            try {
                Sequence<String> lineSequence = TextStreamsKt.lineSequence(bufferedReader);
                int minCount = args.getMinCount() * 10;
                int i3 = 0;
                for (String str : SequencesKt.drop(lineSequence, 1)) {
                    if ((str.length() > 0) && (indexOf$default = StringsKt.indexOf$default((CharSequence) str, ' ', 0, false, 6, (Object) null)) != -1 && indexOf$default != 0) {
                        if (str == null) {
                            throw new NullPointerException("null cannot be cast to non-null type java.lang.String");
                        }
                        String substring = str.substring(0, indexOf$default);
                        Intrinsics.checkNotNullExpressionValue(substring, "(this as java.lang.Strin…ing(startIndex, endIndex)");
                        if ((!Intrinsics.areEqual(substring, DictionaryKt.EOS)) && (!Intrinsics.areEqual(substring, DictionaryKt.BOW)) && (!Intrinsics.areEqual(substring, DictionaryKt.EOW))) {
                            dictionaryBuilder.add(substring, minCount);
                            int i4 = i3 + 1;
                            if (dictionaryBuilder.getSize() > d) {
                                function0.invoke2();
                            }
                            i3 = i4;
                        }
                    }
                }
                Unit unit = Unit.INSTANCE;
                CloseableKt.closeFinally(bufferedReader, th);
                DictionaryBuilder.add$default(dictionaryBuilder, DictionaryKt.EOS, 0, 2, null);
                System.out.println((Object) ("Load " + i3 + " words from preTrainedVectors"));
            } finally {
            }
        }
        Iterator<? extends Iterable<SampleLine>> it = sources.iterator();
        while (it.hasNext()) {
            Iterator<SampleLine> it2 = it.next().iterator();
            while (it2.hasNext()) {
                Iterator<T> it3 = it2.next().getWords().iterator();
                while (it3.hasNext()) {
                    DictionaryBuilder.add$default(dictionaryBuilder, (String) it3.next(), 0, 2, null);
                    Iterator<? extends Iterable<SampleLine>> it4 = it;
                    Iterator<SampleLine> it5 = it2;
                    long j = 1000000;
                    if (dictionaryBuilder.getNtokens() % j == 0) {
                        LogUtilsKt.logger("\rRead " + (dictionaryBuilder.getNtokens() / j) + "M words");
                    }
                    if (dictionaryBuilder.getSize() > d) {
                        function0.invoke2();
                    }
                    it = it4;
                    it2 = it5;
                }
                DictionaryBuilder.add$default(dictionaryBuilder, DictionaryKt.EOS, 0, 2, null);
                it = it;
                it2 = it2;
            }
        }
        dictionaryBuilder.threshold(args.getMinCount(), args.getMinCountLabel());
        dictionaryBuilder.getWordIdMap().collapseWordHash2Id();
        Object[] objArr = {Long.valueOf(dictionaryBuilder.getNtokens())};
        String format = String.format("\rRead %d words\n", Arrays.copyOf(objArr, objArr.length));
        Intrinsics.checkNotNullExpressionValue(format, "java.lang.String.format(this, *args)");
        LogUtilsKt.loggerln(format);
        LogUtilsKt.loggerln("Number of words:  " + dictionaryBuilder.getNwords());
        LogUtilsKt.loggerln("Number of labels: " + dictionaryBuilder.getNlabels());
        if (intRef.element > 0) {
            LogUtilsKt.loggerln("Max threshold count: " + longRef.element);
        }
        if (dictionaryBuilder.getWordIdMap().getSize() == 0) {
            throw new RuntimeException("Empty vocabulary. Try a smaller -minCount second.");
        }
        Dictionary dictionary = dictionaryBuilder.toDictionary(args);
        LogUtilsKt.loggerln("Number of wordHash2Id: " + dictionary.getOnehotMap().getWordHash2WordId().length);
        return dictionary;
    }

    public static /* synthetic */ Dictionary buildFromFile$default(Args args, List list, int i, int i2, int i3, Object obj) throws Exception {
        if ((i3 & 4) != 0) {
            i = 500000;
        }
        if ((i3 & 8) != 0) {
            i2 = 5000;
        }
        return buildFromFile(args, list, i, i2);
    }
}
