package com.dotfun.novel.client.autotask;

import com.dotfun.media.util.FormatedLogAppender;
import com.dotfun.novel.client.crawler.getter.NovelListMatchRule;
import com.dotfun.novel.client.search.CrawlerFailReason;
import com.dotfun.novel.client.search.SearchResultNotify;
import com.dotfun.novel.client.search.SearchResultUpdater;
import com.dotfun.novel.common.Novel;
import com.dotfun.novel.common.NovelSearchIdx;
import com.dotfun.novel.common.NovelSearchTodo;
import com.dotfun.novel.common.SearchSiteOfCrawler;
import com.dotfun.novel.common.storage.EncHelperOfStorage;
import com.dotfun.novel.common.storage.FactoryOfAsyncSaver;
import com.dtfun.helper.htmlunit.HtmlUnitCallParams;
import com.dtfun.helper.htmlunit.HtmlUnitCallResult;
import com.dtfun.helper.htmlunit.HtmlUtils;
import com.dtfun.helper.htmlunit.PageResult;
import com.dtfun.helper.htmlunit.crawler.NovelSearchUseHtmlUnitImp1;
import com.dtlib.IAppGlobal;
import com.dtlib.htmlunit.MatchRule;
import com.gargoylesoftware.htmlunit.html.DomElement;
import com.gargoylesoftware.htmlunit.html.DomNodeList;
import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
import com.gargoylesoftware.htmlunit.html.HtmlDivision;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.xalan.templates.Constants;
import org.slf4j.LoggerFactory;

/* loaded from: classes.dex */
public class NovelSearchFromTypeEntryUseHtmlUnitImp extends NovelSearchUseHtmlUnitImp1 implements Runnable {
    private static final String PAGE_NEXT_ANCHOR_MATCH_RULE = "page.next.anchor.match.rule";
    private static final String PAGE_NEXT_BODY_DIV_ATTR_NAME = "page.next.div.attr.name";
    private static final String PAGE_NEXT_BODY_DIV_ATTR_VALUE = "page.next.div.attr.value";
    private static final String PAGE_NEXT_BODY_TAG_ATTR_NAME = "page.next.tag.attr.key";
    private static final String PAGE_NEXT_BODY_TAG_ATTR_VALUE = "page.next.tag.attr.value";
    private static final String PAGE_NEXT_BODY_TAG_NAME = "page.next.tag.name";
    private static final String PAGE_NEXT_MATCH_TYPE = "page.next.match.type";
    private final HtmlUnitCallParams _callParam;
    private final String _defaultTypeNames;
    private final AtomicReference<CrawlerFailReason> _failReason;
    private final AtomicBoolean _isDone;
    private final String _strEntryURL;
    private final String _strHostEntry;

    public NovelSearchFromTypeEntryUseHtmlUnitImp(EncHelperOfStorage encHelperOfStorage, IAppGlobal iAppGlobal, FormatedLogAppender formatedLogAppender, SearchSiteOfCrawler searchSiteOfCrawler, SearchResultNotify searchResultNotify, SearchResultUpdater searchResultUpdater, Set<String> set, NovelSearchTodo novelSearchTodo, String str, String str2, HtmlUnitCallParams htmlUnitCallParams, AtomicReference<CrawlerFailReason> atomicReference, boolean z, String str3) {
        super(encHelperOfStorage, iAppGlobal, formatedLogAppender, searchSiteOfCrawler, searchResultNotify, searchResultUpdater, set, novelSearchTodo);
        this._isDone = new AtomicBoolean(false);
        this._strEntryURL = str;
        this._defaultTypeNames = str2;
        this._callParam = htmlUnitCallParams;
        this._failReason = atomicReference;
        this._allowSameNovelDifferentSite = z;
        this._strHostEntry = str3 == null ? htmlUnitCallParams.get_entryURL() : str3;
    }

    private Map<Novel, List<NovelSearchIdx>> groupByNovel(List<NovelSearchIdx> list) {
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        for (NovelSearchIdx novelSearchIdx : list) {
            List list2 = (List) linkedHashMap.get(novelSearchIdx.get_novel());
            if (list2 == null) {
                list2 = new ArrayList();
                linkedHashMap.put(novelSearchIdx.get_novel(), list2);
            }
            list2.add(novelSearchIdx);
        }
        return linkedHashMap;
    }

    private void searchNovel() throws Exception {
        String str;
        String str2 = this._defaultTypeNames;
        String str3 = "";
        String str4 = this._strEntryURL;
        AtomicReference<CrawlerFailReason> atomicReference = this._failReason;
        HtmlUnitCallParams htmlUnitCallParams = this._callParam;
        try {
            LinkedHashSet linkedHashSet = new LinkedHashSet();
            if (this._strHostEntry.length() > 0) {
                try {
                    str3 = this._strHostEntry;
                    HtmlUnitCallResult htmlUnitCallResult = new HtmlUnitCallResult();
                    htmlUnitCallParams.set_entryURL(this._strHostEntry);
                    if (callEntryPage(htmlUnitCallParams, htmlUnitCallResult, new AtomicReference<>())) {
                        this._logAppender.append("call host entry first succ");
                    }
                    linkedHashSet.add(this._strHostEntry);
                } catch (Throwable th) {
                    this._logAppender.append("call entry host failed,url=" + str3, th);
                }
            }
            ArrayList arrayList = new ArrayList();
            arrayList.add(str4);
            List<NovelSearchIdx> list = null;
            this._logAppender.warnOutThenClear(LoggerFactory.getLogger("crawler"));
            while (true) {
                if (arrayList.isEmpty()) {
                    break;
                }
                str = (String) arrayList.remove(0);
                if (!str.isEmpty()) {
                    TimeUnit.SECONDS.sleep(1L);
                    try {
                        long currentTimeMillis = System.currentTimeMillis();
                        AtomicReference<PageResult> atomicReference2 = new AtomicReference<>();
                        HtmlUnitCallResult htmlUnitCallResult2 = new HtmlUnitCallResult();
                        htmlUnitCallParams.set_entryURL(str);
                        this._logAppender = new FormatedLogAppender();
                        this._logAppender.append("type-crawler start:" + str + ",type=" + str2);
                        if (!callEntryPage(htmlUnitCallParams, htmlUnitCallResult2, atomicReference2)) {
                            atomicReference.set(CrawlerFailReason.ACCESS_URL_FAILED);
                            this._logAppender.append("failed for entry page,entry=" + str);
                            this._logAppender.warnOutThenClear(LoggerFactory.getLogger("search"));
                            break;
                        }
                        PageResult pageResult = atomicReference2.get();
                        if (pageResult == null || pageResult.isNullPage()) {
                            break;
                        }
                        addCost(currentTimeMillis);
                        List<String> tryFindNextPage = tryFindNextPage(htmlUnitCallParams, pageResult);
                        for (String str5 : tryFindNextPage) {
                            if (!linkedHashSet.contains(str5) && !arrayList.contains(str5)) {
                                arrayList.add(str5);
                            }
                        }
                        this._logAppender.append("next page found,cnt=" + tryFindNextPage.size());
                        try {
                            NovelListMatchRule novelListMatchRule = getNovelListMatchRule(htmlUnitCallParams, "novel.list.match.type");
                            if (novelListMatchRule.equals(NovelListMatchRule.TABLE)) {
                                list = parseEntrysFromTable(htmlUnitCallParams, pageResult.get_htmlPage(), atomicReference, -1, str2, false);
                            } else if (novelListMatchRule.equals(NovelListMatchRule.DIV_CLASS)) {
                                list = parseEntryFromDivs(htmlUnitCallParams, pageResult.get_htmlPage(), atomicReference, -1, str2, false);
                            } else if (novelListMatchRule.equals(NovelListMatchRule.LIST_ITEM)) {
                                list = parseEntrysFromListItem(htmlUnitCallParams, pageResult.get_htmlPage(), atomicReference, -1, str2, false);
                            } else {
                                this._logAppender.append("nosupport:" + novelListMatchRule);
                                atomicReference.set(CrawlerFailReason.MATCH_FAILED);
                                this._logAppender.warnOutThenClear(LoggerFactory.getLogger("search"));
                            }
                        } catch (Throwable th2) {
                            this._logAppender.append("parse novel item from search result failed,site-key=" + this._site.get_siteKey(), th2);
                        }
                        this._logAppender.append("novel get from current page.cnt=" + list.size() + ",type=" + str2);
                        Iterator<Map.Entry<Novel, List<NovelSearchIdx>>> it = groupByNovel(list).entrySet().iterator();
                        while (it.hasNext()) {
                            FactoryOfAsyncSaver.getInstance().saveSearchIdxs(it.next().getKey(), list, "saveIdx");
                            TimeUnit.MILLISECONDS.sleep(10L);
                        }
                        list.clear();
                        TimeUnit.SECONDS.sleep(10L);
                        this._logAppender.warnOutThenClear(LoggerFactory.getLogger("search"));
                    } catch (Throwable th3) {
                        this._logAppender.append("get novels in one page failed,url=" + str + ",type=" + str2, th3);
                    } finally {
                    }
                }
            }
            this._logAppender.append("failed for null page(entry page)" + str);
            atomicReference.set(CrawlerFailReason.ACCESS_URL_FAILED);
            this._logAppender.warnOutThenClear(LoggerFactory.getLogger("search"));
        } catch (Throwable th4) {
            this._logAppender.append("get novels in one page failed,type=" + str2 + ",url=", th4);
        } finally {
        }
    }

    private List<String> tryFindNextPage(HtmlUnitCallParams htmlUnitCallParams, PageResult pageResult) {
        String attribute;
        String otherParam = htmlUnitCallParams.getOtherParam(PAGE_NEXT_MATCH_TYPE);
        if (otherParam == null || otherParam.length() == 0) {
            this._logAppender.append("next-page match ignor,for body  match-type not define");
            return new ArrayList(0);
        }
        if (getMatchRuleOfNovelPage(htmlUnitCallParams, PAGE_NEXT_ANCHOR_MATCH_RULE).isEmpty()) {
            this._logAppender.append("next-page match ignor,for next-page anchor  match-rule not define");
            return new ArrayList(0);
        }
        if (otherParam.equalsIgnoreCase("divAttr")) {
            HtmlDivision findDivByAttributeValue = findDivByAttributeValue(pageResult.get_htmlPage(), htmlUnitCallParams, PAGE_NEXT_BODY_DIV_ATTR_VALUE, PAGE_NEXT_BODY_DIV_ATTR_NAME);
            if (findDivByAttributeValue == null) {
                this._logAppender.append("next-page body not found by divAttr");
                return new ArrayList(0);
            }
            ArrayList arrayList = new ArrayList();
            String findNextPageAnchorURL = findNextPageAnchorURL(findDivByAttributeValue, htmlUnitCallParams, PAGE_NEXT_ANCHOR_MATCH_RULE, pageResult.get_htmlPage());
            if (findNextPageAnchorURL.length() <= 0) {
                return arrayList;
            }
            arrayList.add(findNextPageAnchorURL);
            return arrayList;
        }
        if (otherParam.equalsIgnoreCase("tagAttr")) {
            String otherParam2 = htmlUnitCallParams.getOtherParam(PAGE_NEXT_BODY_TAG_NAME);
            if (otherParam2 == null || otherParam2.isEmpty()) {
                this._logAppender.append("next-page body not found for missing tag-name for by tagAttr");
                return new ArrayList(0);
            }
            HtmlElement findElementByTagAndAttributeValue = findElementByTagAndAttributeValue(pageResult.get_htmlPage(), otherParam2, htmlUnitCallParams, PAGE_NEXT_BODY_TAG_ATTR_VALUE, PAGE_NEXT_BODY_TAG_ATTR_NAME);
            if (findElementByTagAndAttributeValue == null) {
                this._logAppender.append("next-page body not found by divAttr");
                return new ArrayList(0);
            }
            ArrayList arrayList2 = new ArrayList();
            String findNextPageAnchorURL2 = findNextPageAnchorURL(findElementByTagAndAttributeValue, htmlUnitCallParams, PAGE_NEXT_ANCHOR_MATCH_RULE, pageResult.get_htmlPage());
            if (findNextPageAnchorURL2.length() <= 0) {
                return arrayList2;
            }
            arrayList2.add(findNextPageAnchorURL2);
            return arrayList2;
        }
        if (!otherParam.equalsIgnoreCase("divAnchorList")) {
            this._logAppender.append("next-page body not found for unsupport type:" + otherParam);
            return new ArrayList(0);
        }
        HtmlDivision findDivByAttributeValue2 = findDivByAttributeValue(pageResult.get_htmlPage(), htmlUnitCallParams, PAGE_NEXT_BODY_DIV_ATTR_VALUE, PAGE_NEXT_BODY_DIV_ATTR_NAME);
        if (findDivByAttributeValue2 == null) {
            this._logAppender.append("next-page body not found by divAnchorList");
            return new ArrayList(0);
        }
        DomNodeList<HtmlElement> elementsByTagName = findDivByAttributeValue2.getElementsByTagName("a");
        List<MatchRule> matchRuleOfNovelPage = getMatchRuleOfNovelPage(htmlUnitCallParams, PAGE_NEXT_ANCHOR_MATCH_RULE);
        ArrayList arrayList3 = new ArrayList();
        for (HtmlElement htmlElement : elementsByTagName) {
            String lowerCase = htmlElement.asXml().toLowerCase(Locale.CHINA);
            Iterator<MatchRule> it = matchRuleOfNovelPage.iterator();
            while (it.hasNext()) {
                if (HtmlUtils.isMatchIgnorcase(lowerCase, it.next().getKeywordsLowCase(), true) && (attribute = htmlElement.getAttribute(Constants.ATTRNAME_HREF)) != null && !attribute.isEmpty()) {
                    String trim = attribute.trim();
                    if (!trim.isEmpty()) {
                        if (!trim.startsWith("http")) {
                            try {
                                trim = pageResult.get_htmlPage().getFullyQualifiedUrl(trim).toString();
                            } catch (Throwable th) {
                                addLogLine("invalid url entry:" + trim);
                            }
                        }
                        arrayList3.add(trim);
                    }
                }
            }
        }
        return arrayList3;
    }

    protected String findNextPageAnchorURL(DomElement domElement, HtmlUnitCallParams htmlUnitCallParams, String str, HtmlPage htmlPage) {
        String attribute;
        DomNodeList<HtmlElement> elementsByTagName = domElement.getElementsByTagName("a");
        if (elementsByTagName.isEmpty()) {
            return "";
        }
        List<MatchRule> matchRuleOfNovelPage = getMatchRuleOfNovelPage(htmlUnitCallParams, str);
        if (matchRuleOfNovelPage.isEmpty()) {
            return "";
        }
        for (HtmlElement htmlElement : elementsByTagName) {
            String lowerCase = htmlElement.asXml().toLowerCase(Locale.CHINA);
            Iterator<MatchRule> it = matchRuleOfNovelPage.iterator();
            while (it.hasNext()) {
                if (HtmlUtils.isMatchIgnorcase(lowerCase, it.next().getKeywordsLowCase(), true) && (attribute = htmlElement.getAttribute(Constants.ATTRNAME_HREF)) != null && !attribute.isEmpty()) {
                    String trim = attribute.trim();
                    if (trim.isEmpty()) {
                        continue;
                    } else {
                        if (trim.startsWith("http")) {
                            return trim;
                        }
                        try {
                            return htmlPage.getFullyQualifiedUrl(trim).toString();
                        } catch (Throwable th) {
                            addLogLine("invalid url entry:" + trim);
                        }
                    }
                }
            }
        }
        return "";
    }

    protected String findNextPageAnchorURL(HtmlPage htmlPage, HtmlUnitCallParams htmlUnitCallParams, String str) {
        String hrefAttribute;
        List<HtmlAnchor> anchors = htmlPage.getAnchors();
        if (anchors.isEmpty()) {
            return "";
        }
        List<MatchRule> matchRuleOfNovelPage = getMatchRuleOfNovelPage(htmlUnitCallParams, str);
        if (matchRuleOfNovelPage.isEmpty()) {
            return "";
        }
        for (HtmlAnchor htmlAnchor : anchors) {
            String lowerCase = htmlAnchor.asXml().toLowerCase(Locale.CHINA);
            Iterator<MatchRule> it = matchRuleOfNovelPage.iterator();
            while (it.hasNext()) {
                if (HtmlUtils.isMatchIgnorcase(lowerCase, it.next().getKeywordsLowCase(), true) && (hrefAttribute = htmlAnchor.getHrefAttribute()) != null && hrefAttribute.length() > 0) {
                    return hrefAttribute;
                }
            }
        }
        return "";
    }

    public String[] getTypeAndEntry() {
        return new String[]{this._defaultTypeNames, this._strEntryURL};
    }

    public boolean isDone() {
        return this._isDone.get();
    }

    @Override // java.lang.Runnable
    public void run() {
        try {
            searchNovel();
        } catch (Throwable th) {
            close();
            LoggerFactory.getLogger("crawler").warn(getLoggedString(), th);
            this._isDone.set(true);
        }
    }
}
