package com.kingstudio.sdkcollect.studyengine.parser;

import android.text.TextUtils;
import com.kingstudio.sdkcollect.studyengine.utils.f;
import com.kingstudio.sdkcollect.studyengine.utils.m;
import com.tencent.open.SocialConstants;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.g;
import org.jsoup.nodes.k;
import org.jsoup.select.Elements;

/* loaded from: classes.dex */
public class KReadability {

    /* renamed from: a, reason: collision with root package name */
    private final Document f1170a;

    /* renamed from: b, reason: collision with root package name */
    private String f1171b;
    private g c;
    private int d;
    private f e;
    private int f;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: classes.dex */
    public class Patterns {

        /* renamed from: a, reason: collision with root package name */
        private static Pattern f1172a;

        /* renamed from: b, reason: collision with root package name */
        private static Pattern f1173b;
        private static Pattern c;
        private static Pattern d;
        private static Pattern e;
        private static Pattern f;

        /* loaded from: classes.dex */
        public enum RegEx {
            UNLIKELY_CANDIDATES,
            OK_MAYBE_ITS_A_CANDIDATE,
            POSITIVE,
            NEGATIVE,
            DIV_TO_P_ELEMENTS,
            VIDEO
        }

        public static Pattern a(RegEx regEx) {
            switch (regEx) {
                case UNLIKELY_CANDIDATES:
                    if (f1172a == null) {
                        f1172a = Pattern.compile("combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter", 2);
                    }
                    return f1172a;
                case OK_MAYBE_ITS_A_CANDIDATE:
                    if (f1173b == null) {
                        f1173b = Pattern.compile("and|article|body|column|main|shadow|m-photo|g-article|article__content|img-wrapper-embedded|com-insert-images|g-wrapper-big-pic", 2);
                    }
                    return f1173b;
                case POSITIVE:
                    if (c == null) {
                        c = Pattern.compile("article|body|content|entry|hentry|main|page|pagination|post|text|blog|story|m-photo|g-article|img-wrapper-embedded|com-insert-images|g-wrapper-big-pic", 2);
                    }
                    return c;
                case NEGATIVE:
                    if (d == null) {
                        d = Pattern.compile("combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget", 2);
                    }
                    return d;
                case DIV_TO_P_ELEMENTS:
                    if (e == null) {
                        e = Pattern.compile("<(a|blockquote|dl|div|img|ol|p|pre|table|td|tr|ul|m-photo|g-article|img-wrapper-embedded|com-insert-images|g-wrapper-big-pic)", 2);
                    }
                    return e;
                case VIDEO:
                    if (f == null) {
                        f = Pattern.compile("http:\\/\\/(www\\.)?(youtube|vimeo)\\.com", 2);
                    }
                    return f;
                default:
                    return null;
            }
        }
    }

    public KReadability(String str, f fVar, int i) {
        this.f1170a = org.jsoup.e.a(str);
        this.e = fVar;
        this.f = i;
    }

    public KReadability(Document document, f fVar, int i) {
        this.f1170a = document;
        this.e = fVar;
        this.f = i;
    }

    private String a(g gVar, boolean z) {
        String trim = gVar.w().trim();
        return z ? trim.replaceAll("\\s{2,}", "") : trim;
    }

    private g a(g gVar, double d) {
        gVar.b("readabilityContentScore", Double.toString(d(gVar) + d));
        return gVar;
    }

    private g a(g gVar, String str) {
        g a2 = this.f1170a.a(str);
        ArrayList<k> arrayList = new ArrayList();
        arrayList.addAll(gVar.E());
        for (k kVar : arrayList) {
            kVar.J();
            a2.a(kVar);
        }
        gVar.f(a2);
        return a2;
    }

    private void a(g gVar) {
        gVar.b("readabilityContentScore", Integer.toString(0));
        String k = gVar.k();
        if ("div".equalsIgnoreCase(k)) {
            a(gVar, 5.0d);
        } else if ("pre".equalsIgnoreCase(k) || "td".equalsIgnoreCase(k) || "tr".equalsIgnoreCase(k) || "table".equalsIgnoreCase(k) || "blockquote".equalsIgnoreCase(k) || "figure".equalsIgnoreCase(k) || "span".equalsIgnoreCase(k) || "h1".equalsIgnoreCase(k) || "h2".equalsIgnoreCase(k) || "h3".equalsIgnoreCase(k) || "h4".equalsIgnoreCase(k) || "h5".equalsIgnoreCase(k) || "h6".equalsIgnoreCase(k) || "th".equalsIgnoreCase(k)) {
            a(gVar, 3.0d);
        } else if ("address".equalsIgnoreCase(k) || "ol".equalsIgnoreCase(k) || "ul".equalsIgnoreCase(k) || "dl".equalsIgnoreCase(k) || "dd".equalsIgnoreCase(k) || "dt".equalsIgnoreCase(k) || "li".equalsIgnoreCase(k) || "form".equalsIgnoreCase(k)) {
            a(gVar, -3.0d);
        }
        a(gVar, c(gVar));
    }

    private double b(g gVar) {
        Elements c = c(gVar, "a");
        double length = a(gVar, true).length();
        double d = 0.0d;
        while (c.iterator().hasNext()) {
            d = a(r8.next(), true).length() + d;
        }
        if (length != 0.0d) {
            return d / length;
        }
        return 0.0d;
    }

    private int b(g gVar, String str) {
        return 0;
    }

    private Pattern b(String str) {
        return Pattern.compile(str, 2);
    }

    private g b(g gVar, double d) {
        gVar.b("readabilityContentScore", Double.toString(Double.valueOf(Double.valueOf(d(gVar)).doubleValue() * d).doubleValue()));
        return gVar;
    }

    private void b(boolean z) {
        if (this.f1170a.c() != null && TextUtils.isEmpty(this.f1171b)) {
            this.f1171b = this.f1170a.c().A();
        }
        if (b()) {
            c(z);
        } else {
            this.e.a(this.f, 4, 65536, true);
        }
    }

    private boolean b() {
        if (this.f1170a.g("frame").size() > 0) {
            this.c = null;
            return false;
        }
        if (this.f1170a.c() == null) {
            this.f1170a.d("body");
        }
        Iterator<g> it = this.f1170a.g("script").iterator();
        while (it.hasNext()) {
            it.next().J();
        }
        Iterator<g> it2 = this.f1170a.g("noscript").iterator();
        while (it2.hasNext()) {
            a(it2.next(), "div");
        }
        Iterator<g> it3 = c(this.f1170a.b(), "link").iterator();
        while (it3.hasNext()) {
            g next = it3.next();
            if ("stylesheet".equalsIgnoreCase(next.k("rel"))) {
                next.J();
            }
        }
        Iterator<g> it4 = this.f1170a.g("style").iterator();
        while (it4.hasNext()) {
            it4.next().J();
        }
        Iterator<g> it5 = this.f1170a.g("blockquote").iterator();
        while (it5.hasNext()) {
            a(it5.next(), "blockquote><p");
        }
        c();
        d();
        e();
        h();
        f();
        if (this.d == 1) {
            g();
        }
        return true;
    }

    private double c(g gVar) {
        double d = 0.0d;
        String z = gVar.z();
        if (!TextUtils.isEmpty(z)) {
            Pattern a2 = Patterns.a(Patterns.RegEx.NEGATIVE);
            if (a2 != null && a2.matcher(z).find()) {
                d = 0.0d - 25.0d;
            }
            Pattern a3 = Patterns.a(Patterns.RegEx.POSITIVE);
            if (a3 != null && a3.matcher(z).find()) {
                d += 25.0d;
            }
        }
        String n = gVar.n();
        if (TextUtils.isEmpty(n)) {
            return d;
        }
        Pattern a4 = Patterns.a(Patterns.RegEx.NEGATIVE);
        if (a4 != null && a4.matcher(n).find()) {
            d -= 25.0d;
        }
        Pattern a5 = Patterns.a(Patterns.RegEx.POSITIVE);
        return (a5 == null || !a5.matcher(n).find()) ? d : d + 25.0d;
    }

    private Elements c(g gVar, String str) {
        Elements g = gVar.g(str);
        g.remove(gVar);
        return g;
    }

    private void c() {
        String A = this.f1170a.c().A();
        A.replaceAll("<p></p>", "<p>");
        this.f1170a.c().i(A);
    }

    private void c(boolean z) {
        g a2 = this.f1170a.a("div");
        g a3 = this.f1170a.a("div");
        g a4 = a();
        g a5 = a(z);
        this.c = a5;
        if (!TextUtils.isEmpty(a(a5, false))) {
            this.e.a(this.f, 4, 0);
        } else if (!z) {
            this.f1170a.c().i(this.f1171b);
            b(true);
            return;
        } else {
            a5.i("<p>ParseFailed.</p>");
            this.e.a(this.f, 4, 131072, true);
        }
        a3.a((k) a4);
        a3.a((k) a5);
        a2.a((k) a3);
        this.f1170a.c().i("");
        this.f1170a.c().b(a2);
    }

    private double d(g gVar) {
        try {
            return Double.parseDouble(gVar.k("readabilityContentScore"));
        } catch (NumberFormatException e) {
            return 0.0d;
        }
    }

    private void d() {
        this.f1170a.c().i(this.f1170a.c().A().replaceAll("(?i)<(\\/?)article[^>]*>", "<$1p>"));
    }

    private void d(g gVar, String str) {
        Iterator<g> it = c(gVar, str).iterator();
        while (it.hasNext()) {
            it.next().J();
        }
    }

    private void e() {
        g gVar;
        g gVar2;
        Iterator<g> it = this.f1170a.c("br + br").iterator();
        while (it.hasNext()) {
            g next = it.next();
            Iterator<g> it2 = next.p().iterator();
            while (true) {
                if (!it2.hasNext()) {
                    gVar = null;
                    break;
                } else {
                    gVar = it2.next();
                    if (gVar.l().a().equals("p")) {
                        break;
                    }
                }
            }
            if (gVar == null) {
                gVar2 = next.B();
                gVar2.j("<p></p>");
            } else {
                gVar2 = gVar;
            }
            gVar2.i(b("(?i)(<br[^>]*>[ \n\r\t]*){2,}").matcher(gVar2.A()).replaceAll("</p><p>"));
        }
    }

    private void e(g gVar) {
        f(gVar);
        e(gVar, "form");
        if (c(gVar, "h2").size() == 1) {
            d(gVar, "h2");
        }
        e(gVar, "ul");
        e(gVar, "div");
        Iterator<g> it = c(gVar, "p").iterator();
        while (it.hasNext()) {
            g next = it.next();
            int size = c(next, "figure").size();
            int size2 = c(next, SocialConstants.PARAM_IMG_URL).size();
            int size3 = c(next, "embed").size();
            int size4 = c(next, "object").size();
            if (size2 == 0 && size3 == 0 && size4 == 0 && size == 0 && TextUtils.isEmpty(a(next, false))) {
                next.J();
            }
        }
        try {
            gVar.i(gVar.A().replaceAll("(?i)<br[^>]*>\\s*<p", "<p"));
        } catch (Exception e) {
        }
    }

    private void e(g gVar, String str) {
        Iterator<g> it = c(gVar, str).iterator();
        while (it.hasNext()) {
            g next = it.next();
            double c = c(next);
            if (c < 0.0d) {
                next.J();
            } else if (b(next, ",") < 10 || b(next, "，") < 5) {
                int size = c(next, "p").size();
                int size2 = c(next, SocialConstants.PARAM_IMG_URL).size();
                int size3 = c(next, "li").size() - 100;
                int size4 = c(next, "input").size();
                int i = 0;
                Iterator<g> it2 = c(next, "embed").iterator();
                while (true) {
                    int i2 = i;
                    if (!it2.hasNext()) {
                        break;
                    }
                    g next2 = it2.next();
                    Pattern a2 = Patterns.a(Patterns.RegEx.VIDEO);
                    if (a2 != null && !a2.matcher(next2.o("src")).find()) {
                        i2++;
                    }
                    i = i2;
                }
                double b2 = b(next);
                int length = a(next, true).length();
                boolean z = false;
                if (size3 > size && !"ul".equalsIgnoreCase(str) && !"ol".equalsIgnoreCase(str)) {
                    z = true;
                } else if (size4 > Math.floor(size / 3)) {
                    z = true;
                } else if (length < 25 && (size2 == 0 || size2 > 2)) {
                    z = true;
                } else if (c < 25.0d && b2 > 0.20000000298023224d) {
                    z = true;
                } else if (c > 25.0d && b2 > 0.5d) {
                    z = true;
                }
                if (z) {
                    next.J();
                }
            }
        }
    }

    private void f() {
        this.f1170a.c().i(this.f1170a.c().A().replaceAll("(?i)<(\\/?)font[^>]*>", "<$1span>"));
    }

    private void f(g gVar) {
        Iterator<g> it = gVar.v().iterator();
        while (it.hasNext()) {
            it.next().m("style");
        }
    }

    private void g() {
        this.f1170a.c().i(this.f1170a.c().A().replaceAll("(?i)<(\\/?)section[^>]*>", "<$1p>"));
    }

    private void h() {
        this.f1170a.c().i(this.f1170a.c().A().replaceAll("(?i)<(\\/?)span[^>]*>", ""));
    }

    public String a(String str) {
        String str2 = "";
        for (String str3 : this.f1171b.split("\n")) {
            str2 = m.b(str3, str);
            if (!TextUtils.isEmpty(str2)) {
                break;
            }
        }
        return str2;
    }

    public g a() {
        g a2 = this.f1170a.a("h1");
        a2.i(this.f1170a.d());
        return a2;
    }

    public g a(boolean z) {
        g B;
        Iterator<g> it = this.f1170a.v().iterator();
        while (it.hasNext()) {
            g next = it.next();
            if (!z) {
                String str = next.z() + next.n();
                Pattern a2 = Patterns.a(Patterns.RegEx.UNLIKELY_CANDIDATES);
                Matcher matcher = a2 != null ? a2.matcher(str) : null;
                Pattern a3 = Patterns.a(Patterns.RegEx.OK_MAYBE_ITS_A_CANDIDATE);
                Matcher matcher2 = a3 != null ? a3.matcher(str) : null;
                if (matcher != null && matcher.find() && matcher2 != null && !matcher2.find() && !"body".equalsIgnoreCase(next.k())) {
                    next.J();
                }
            }
            if ("div".equalsIgnoreCase(next.k())) {
                Pattern a4 = Patterns.a(Patterns.RegEx.DIV_TO_P_ELEMENTS);
                Matcher matcher3 = a4 != null ? a4.matcher(next.A()) : null;
                if (matcher3 != null && !matcher3.find()) {
                    try {
                        next.b("p");
                    } catch (Exception e) {
                    }
                }
            }
        }
        Elements g = this.f1170a.g("p");
        ArrayList arrayList = new ArrayList();
        Iterator<g> it2 = g.iterator();
        while (it2.hasNext()) {
            g next2 = it2.next();
            g B2 = next2.B();
            if (B2 != null && (B = B2.B()) != null) {
                String a5 = a(next2, true);
                if (!B2.l("readabilityContentScore")) {
                    a(B2);
                    arrayList.add(B2);
                }
                if (B.B() != null && !B.l("readabilityContentScore")) {
                    a(B);
                    arrayList.add(B);
                }
                double length = 0.0d + 1.0d + a5.split(",").length + a5.split("，").length + Math.min(Math.floor(a5.length() / 100.0d), 3.0d);
                a(B2, length);
                a(B, length / 2.0d);
            }
        }
        g gVar = null;
        Iterator it3 = arrayList.iterator();
        while (it3.hasNext()) {
            g gVar2 = (g) it3.next();
            b(gVar2, 1.0d - b(gVar2));
            if (gVar != null && d(gVar2) <= d(gVar)) {
                gVar2 = gVar;
            }
            gVar = gVar2;
        }
        if (gVar == null || "body".equalsIgnoreCase(gVar.k())) {
            gVar = this.f1170a.a("div");
            gVar.i(this.f1170a.c().A());
            this.f1170a.c().i("");
            this.f1170a.c().a((k) gVar);
            a(gVar);
        }
        g a6 = this.f1170a.a("div");
        a6.b("id", "readability-content");
        double max = Math.max(10.0d, d(gVar) * 0.2d);
        Iterator<g> it4 = gVar.B().q().iterator();
        while (it4.hasNext()) {
            g next3 = it4.next();
            boolean z2 = next3 == gVar;
            if (d(next3) >= max) {
                z2 = true;
            }
            if ("p".equalsIgnoreCase(next3.k())) {
                double b2 = b(next3);
                String a7 = a(next3, true);
                int length2 = a7.length();
                if (length2 > 80 && b2 == 0.25d) {
                    z2 = true;
                } else if (length2 < 80 && b2 == 0.0d && a7.matches(".*\\.( |$).*")) {
                    z2 = true;
                }
            }
            if (z2) {
                if (!"div".equalsIgnoreCase(next3.k()) && !"p".equalsIgnoreCase(next3.k())) {
                    next3 = a(next3, "div");
                }
                next3.m("class");
                a6.a((k) next3);
            }
        }
        e(a6);
        return a6;
    }

    public final void a(int i) {
        this.d = i;
        b(false);
    }
}
