package com.terark.mobilesearch.wordseg.adapter.dat;

import android.util.Log;
import com.terark.mobilesearch.wordseg.SegWord;
import com.terark.mobilesearch.wordseg.dat.DATWordSegmentor;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.Iterator;
import java.util.TreeSet;

/* loaded from: classes.dex */
public class WordSegAdapter {
    private static final String TAG = "TERARK_WordSeg_Adapter";
    private static final String ignoreWords = "~!@#$%^&*()_-+= {}|:\"<>?,.';\\][\t、/";
    private final InputStream dictObjectInstance;
    private Iterator<SegWord> it;
    private String raw = null;
    private int position = 0;

    public WordSegAdapter(Reader reader, InputStream inputStream) {
        this.dictObjectInstance = inputStream;
    }

    public SegWord getNextWord() {
        return this.it.next();
    }

    public boolean hasNext() {
        return this.it.hasNext();
    }

    public void reset(Reader reader) {
        try {
            StringBuilder sb = new StringBuilder();
            char[] cArr = new char[512];
            while (true) {
                int read = reader.read(cArr, 0, cArr.length);
                if (read == -1) {
                    break;
                } else {
                    sb.append(new String(cArr, 0, read));
                }
            }
            this.raw = sb.toString().trim();
        } catch (IOException e) {
            e.printStackTrace();
        }
        TreeSet<SegWord> treeSet = DATWordSegmentor.tokenize(this.raw, this.dictObjectInstance);
        this.it = treeSet.iterator();
        Log.i(TAG, "分词的数量: " + treeSet.size());
        this.position = 0;
    }
}
