package com.etri.nmt;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;

/* loaded from: classes.dex */
public class jTokenizer {
    public static final String[] _ENG_NONBREAK_PREFIX_SET_PLANE = {"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "Adj", "Adm", "Adv", "Asst", "Bart", "Bldg", "Brig", "Bros", "Capt", "Cmdr", "Col", "Comdr", "Con", "Corp", "Cpl", "DR", "Dr", "Drs", "Ens", "Gen", "Gov", "Hon", "Hr", "Hosp", "Insp", "Lt", "MM", "MR", "MRS", "MS", "Maj", "Messrs", "Mlle", "Mme", "Mr", "Mrs", "Ms", "Msgr", "Op", "Ord", "Pfc", "Ph", "Prof", "Pvt", "Rep", "Reps", "Res", "Rev", "Rt", "Sen", "Sens", "Sfc", "Sgt", "Sr", "St", "Supt", "Surg", "v", "vs", "i.e", "rev", "e.g", "Nos", "Nr", "Jan", "Feb", "Mar", "Apr", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "Revs", "Ltd", "Co", "Inc"};
    public static final String[] _ENG_NONBREAK_PREFIX_BUT_NUMBER_NEEDED_PLANE = {"No", "Art", "pp", "Fig", "FIG", "Tbl", "Page", "Pg", "pg"};
    public static final Set<String> _ENG_NONBREAK_PREFIX_SET = new HashSet(Arrays.asList(_ENG_NONBREAK_PREFIX_SET_PLANE));
    public static final Set<String> _ENG_NONBREAK_PREFIX_BUT_NUMBER_NEEDED = new HashSet(Arrays.asList(_ENG_NONBREAK_PREFIX_BUT_NUMBER_NEEDED_PLANE));

    public String monotonicDetokenize(String str, String str2) {
        String replaceAll = str2.trim().replaceAll("(\\[|\\{|\\(|/)\\s+", "$1").replaceAll("([0-9]) : ([0-9])", "$1:$2").replaceAll("([0-9]) \\. ([0-9])", "$1.$2").replaceAll("([0-9]) , ([0-9])", "$1,$2").replaceAll(" ([,\\.?!;:%}\\]])", "$1").replaceAll("([$¿¡¥£¢￦￥＄]) ([0-9,\\.])", "$1$2");
        if (str == "en") {
            replaceAll = replaceAll.replaceAll("(\\p{Alnum}) '(m|re|ll|d|ve|t|er|s)", "$1'$2");
        } else if (str == "fr") {
            replaceAll = replaceAll.replaceAll("(\\p{Alpha})' (\\p{Alpha})", "$1'$2");
        }
        return replaceAll.replaceAll("\" (.*) \"", "\"$1\" ").replaceAll("' (.*) '", "'%1'");
    }

    public String monotonicTokenize(String str, String str2) {
        int i;
        String replaceAll = str2.trim().replaceAll("\\s+", StringUtils.SPACE).replaceAll("[\\000-\\037]", "").replaceAll("([^\\p{Alnum}\\s\\.'`,-])", " $1 ").replaceAll("\\.(\\.+)", " DOTMULTI$1");
        while (-1 != replaceAll.indexOf("DOTMULTI.")) {
            replaceAll = replaceAll.replaceAll("DOTMULTI\\.([^\\.])", "DOTDOTMULTI $1").replaceAll("DOTMULTI\\.", "DOTDOTMULTI");
        }
        String replaceAll2 = replaceAll.replaceAll("([^\\p{Digit}])[,]", "$1 , ").replaceAll("[,]([^\\p{Digit}])", " , $1").replaceAll("(\\p{Digit})[,]$", "$1 ,");
        String[] split = (str == "en" ? replaceAll2.replaceAll("([^\\p{Alpha}])[']([^\\p{Alpha}])", "$1 ' $2").replaceAll("([^\\p{Alpha}\\p{Digit}])['](\\p{Alpha})", "$1 ' $2").replaceAll("(\\p{Alpha})[']([^\\p{Alpha}])", "$1 ' $2").replaceAll("(\\p{Alpha})['](\\p{Alpha})", "$1 '$2").replaceAll("(\\p{Digit})[']([s])", "$1 '$2") : (str == "fr" || str == "it") ? replaceAll2.replaceAll("([^\\p{Alpha}])[']([^\\p{Alpha}])", "$1 ' $2").replaceAll("([^\\p{Alpha}])['](\\p{Alpha})", "$1 ' $2").replaceAll("(\\p{Alpha})[']([^\\p{Alpha}])", "$1 ' $2").replaceAll("(\\p{Alpha})['](\\p{Alpha})", "$1' $2") : replaceAll2.replaceAll("'", " ' ")).split("\\s");
        String str3 = "";
        for (int i2 = 0; i2 < split.length; i2++) {
            if (split[i2].matches("^(\\S+)\\.$")) {
                String substring = split[i2].substring(0, split[i2].length() - 1);
                if ((true != substring.contains(".") || true != Pattern.compile("\\p{Alpha}").matcher(substring).find()) && !_ENG_NONBREAK_PREFIX_SET.contains(substring) && (((i = i2 + 1) >= split.length || true != Pattern.compile("^\\p{Lower}").matcher(split[i]).find()) && (!_ENG_NONBREAK_PREFIX_BUT_NUMBER_NEEDED.contains(substring) || i >= split.length || !split[i].matches("^[0-9]+$")))) {
                    split[i2] = substring + " .";
                }
            }
            str3 = str3 + split[i2] + StringUtils.SPACE;
        }
        String trim = str3.replaceAll("\\s+", StringUtils.SPACE).replaceAll("\\.' ?$", " . ' ").trim();
        while (-1 != trim.indexOf("DOTDOTMULTI")) {
            trim = trim.replaceAll("DOTDOTMULTI", "DOTMULTI.");
        }
        return trim.replaceAll("DOTMULTI", ".").replaceAll("\\s+", StringUtils.SPACE);
    }
}
