package com.dtfun.helper.htmlunit.crawler;

import com.dotfun.media.util.FormatedLogAppender;
import com.dotfun.novel.client.crawler.getter.NovelPageGetter;
import com.dotfun.novel.client.search.CrawlerFailReason;
import com.dotfun.novel.common.Novel;
import com.dotfun.novel.common.NovelChapter;
import com.dotfun.novel.common.NovelSearchIdx;
import com.dotfun.novel.common.SearchSiteOfCrawler;
import com.dotfun.novel.common.storage.EncHelperOfStorage;
import com.dtfun.helper.htmlunit.HtmlUnitCallParams;
import com.dtfun.helper.htmlunit.HtmlUnitCallResult;
import com.dtfun.helper.htmlunit.HtmlUtils;
import com.dtfun.helper.htmlunit.PageResult;
import com.dtlib.IAppGlobal;
import com.dtlib.htmlunit.MatchRule;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import java.util.List;
import java.util.concurrent.atomic.AtomicReference;

/* loaded from: classes.dex */
public class PageCrawlerUseHtmlUnit extends AbstractHelperForNovelUseHtmlUnit implements NovelPageGetter {
    private static final String RULE_IMAGE_CONTENT = "match.img.content";
    private final IAppGlobal _appGlobal;
    private boolean _isHostEntryCalled;
    private final Novel _novel;
    private final NovelSearchIdx _searchIdx;
    private final SearchSiteOfCrawler _site;

    public PageCrawlerUseHtmlUnit(FormatedLogAppender formatedLogAppender, IAppGlobal iAppGlobal, SearchSiteOfCrawler searchSiteOfCrawler, Novel novel, NovelSearchIdx novelSearchIdx, EncHelperOfStorage encHelperOfStorage) {
        super(formatedLogAppender, encHelperOfStorage);
        this._isHostEntryCalled = false;
        this._appGlobal = iAppGlobal;
        this._site = searchSiteOfCrawler;
        this._novel = novel;
        this._searchIdx = novelSearchIdx;
    }

    private boolean isImageContent(PageResult pageResult, HtmlUnitCallParams htmlUnitCallParams) {
        List<MatchRule> matchRuleOfNovelPage = getMatchRuleOfNovelPage(htmlUnitCallParams, RULE_IMAGE_CONTENT);
        if (matchRuleOfNovelPage.isEmpty()) {
            return false;
        }
        for (MatchRule matchRule : matchRuleOfNovelPage) {
            if (HtmlUtils.isMatchIgnorcase(pageResult.asLowcaseXML(), matchRule.getKeywordsLowCase(), matchRule.is_matchByAnd())) {
                return true;
            }
        }
        return false;
    }

    protected void finalize() throws Throwable {
        close();
        super.finalize();
    }

    @Override // com.dotfun.novel.client.crawler.getter.NovelPageGetter
    public NovelChapter getOnePageFromPageEntry(HtmlUnitCallParams htmlUnitCallParams, AtomicReference<CrawlerFailReason> atomicReference, int i) throws Exception {
        HtmlUnitCallResult htmlUnitCallResult = new HtmlUnitCallResult();
        long currentTimeMillis = System.currentTimeMillis();
        if (!this._isHostEntryCalled) {
            String str = htmlUnitCallParams.get_entryURL();
            for (String str2 : htmlUnitCallParams.getParams("host.entry")) {
                if (str2 != null && str2.length() > 0) {
                    htmlUnitCallParams.set_entryURL(str2);
                    this._logAppender.append("host.entry.call=" + callEntryURL(htmlUnitCallParams, htmlUnitCallResult).get_pageType());
                    htmlUnitCallParams.setRefererURL(str2);
                }
            }
            htmlUnitCallParams.set_entryURL(str);
            this._isHostEntryCalled = true;
        }
        PageResult callEntryURL = callEntryURL(htmlUnitCallParams, htmlUnitCallResult);
        if (callEntryURL.isNullPage()) {
            addLogLine("failed for null page(entry page)");
            atomicReference.set(CrawlerFailReason.ACCESS_URL_FAILED);
            return null;
        }
        this._logAppender.append("get page.cost=" + (System.currentTimeMillis() - currentTimeMillis) + ",chap-no=" + i);
        System.currentTimeMillis();
        HtmlPage htmlPage = callEntryURL.get_htmlPage();
        if (htmlPage == null) {
            atomicReference.set(CrawlerFailReason.ACCESS_URL_FAILED);
            this._logAppender.append("failed for not valid html-page,page-type=" + callEntryURL.get_pageType());
            return null;
        }
        String trim = findTextContentFromPage(htmlPage, htmlUnitCallParams, getTitleMatchKeywords()).trim();
        if (trim.isEmpty()) {
            this._logAppender.append("failed for no title matched");
            atomicReference.set(CrawlerFailReason.MATCH_FAILED);
            return null;
        }
        long parseHtmlPageModifyTime = parseHtmlPageModifyTime(htmlPage, System.currentTimeMillis(), htmlUnitCallParams.getOtherParam("gmt.pattern"));
        String filterContent = filterContent(htmlUnitCallParams, findFormatTextContentFromPage(htmlPage, htmlUnitCallParams, getContentMatchKeywords()), "novel.filter.type", "novel.filter.keyword.prefix");
        if (filterContent == null || filterContent.length() <= 0) {
            if (isImageContent(callEntryURL, htmlUnitCallParams)) {
                this._logAppender.append("failed for image content matched");
                atomicReference.set(CrawlerFailReason.IMG_TEXT);
                return null;
            }
            this._logAppender.append("failed for no content matched");
            atomicReference.set(CrawlerFailReason.MISSING_CONTENT);
            return null;
        }
        NovelChapter novelChapter = new NovelChapter(this._novel, i);
        novelChapter.set_chaptListURL(this._searchIdx.get_chaptListUrl());
        novelChapter.set_content(filterContent);
        novelChapter.set_updateTime(parseHtmlPageModifyTime);
        novelChapter.set_siteName(this._site.get_siteKey());
        novelChapter.set_sourceURL(htmlUnitCallParams.get_entryURL());
        novelChapter.set_title(trim);
        novelChapter.setSiteDesc(this._site.getDescOfSite());
        return novelChapter;
    }
}
