package com.terark.mobilesearch.wordseg.adapter.dat;

import com.terark.mobilesearch.wordseg.SegWord;
import java.io.InputStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;

/* loaded from: classes.dex */
public class WordSegTokenizer extends Tokenizer {
    private static final String TAG = "TERARK_WordSeg_Tokenizer";
    private WordSegAdapter adapter;
    private final CharTermAttribute termAtt = (CharTermAttribute) addAttribute(CharTermAttribute.class);
    private final OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
    private final TypeAttribute typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
    private int endPosition = 0;

    public WordSegTokenizer(InputStream inputStream) {
        this.adapter = new WordSegAdapter(this.input, inputStream);
    }

    @Override // org.apache.lucene.analysis.Tokenizer, org.apache.lucene.analysis.TokenStream, java.io.Closeable, java.lang.AutoCloseable
    public void close() {
        super.close();
    }

    @Override // org.apache.lucene.analysis.TokenStream
    public void end() {
        super.end();
        int correctOffset = correctOffset(this.endPosition);
        this.offsetAtt.setOffset(correctOffset, correctOffset);
    }

    @Override // org.apache.lucene.analysis.TokenStream
    public boolean incrementToken() {
        clearAttributes();
        if (!this.adapter.hasNext()) {
            return false;
        }
        SegWord nextWord = this.adapter.getNextWord();
        this.termAtt.setEmpty();
        this.termAtt.append(nextWord.word);
        this.termAtt.setLength(nextWord.word.length());
        this.offsetAtt.setOffset(nextWord.offset, nextWord.end);
        this.endPosition = nextWord.end;
        this.typeAtt.setType("TERARK_SEG_WORDS");
        return true;
    }

    @Override // org.apache.lucene.analysis.Tokenizer, org.apache.lucene.analysis.TokenStream
    public void reset() {
        super.reset();
        this.adapter.reset(this.input);
    }
}
