package org.apache.james.mailbox.store.extractor;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import org.apache.commons.io.IOUtils;
import org.apache.james.mailbox.extractor.ParsedContent;
import org.apache.james.mailbox.extractor.TextExtractor;
import org.apache.james.mailbox.model.ContentType;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import reactor.core.publisher.Mono;
import reactor.core.scheduler.Schedulers;

/* loaded from: input_file:org/apache/james/mailbox/store/extractor/JsoupTextExtractor.class */
public class JsoupTextExtractor implements TextExtractor {
    private static final String TITLE_HTML_TAG = "title";
    private static final String NO_BASE_URI = "";
    private static final ContentType.MimeType TEXT_HTML = ContentType.MimeType.of("text/html");
    private static final ContentType.MimeType TEXT_PLAIN = ContentType.MimeType.of("text/plain");

    public boolean applicable(ContentType contentType) {
        if (contentType == null) {
            return false;
        }
        return contentType.mimeType().equals(TEXT_HTML) || contentType.mimeType().equals(TEXT_PLAIN);
    }

    public ParsedContent extractContent(InputStream inputStream, ContentType contentType) throws Exception {
        if (inputStream == null || contentType == null) {
            return ParsedContent.empty();
        }
        Charset charset = (Charset) contentType.charset().orElse(StandardCharsets.UTF_8);
        return contentType.mimeType().equals(TEXT_HTML) ? parseHtmlContent(inputStream, charset) : contentType.mimeType().equals(TEXT_PLAIN) ? parsePlainTextContent(inputStream, charset) : ParsedContent.empty();
    }

    public Mono<ParsedContent> extractContentReactive(InputStream inputStream, ContentType contentType) {
        if (inputStream == null || contentType == null) {
            return Mono.just(ParsedContent.empty());
        }
        Charset charset = (Charset) contentType.charset().orElse(StandardCharsets.UTF_8);
        return contentType.mimeType().equals(TEXT_HTML) ? Mono.fromCallable(() -> {
            return parseHtmlContent(inputStream, charset);
        }).subscribeOn(Schedulers.boundedElastic()) : contentType.mimeType().equals(TEXT_PLAIN) ? Mono.fromCallable(() -> {
            return parsePlainTextContent(inputStream, charset);
        }).subscribeOn(Schedulers.boundedElastic()) : Mono.just(ParsedContent.empty());
    }

    private ParsedContent parsePlainTextContent(InputStream inputStream, Charset charset) throws IOException {
        return ParsedContent.of(IOUtils.toString(inputStream, charset));
    }

    private ParsedContent parseHtmlContent(InputStream inputStream, Charset charset) throws IOException {
        Document parse = Jsoup.parse(inputStream, charset.name(), NO_BASE_URI);
        parse.select(TITLE_HTML_TAG).remove();
        return ParsedContent.of(parse.text());
    }
}
