package org.apache.james.mailbox.store.search;

import com.google.common.base.Preconditions;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import org.apache.commons.io.IOUtils;
import org.apache.james.mailbox.extractor.ParsedContent;
import org.apache.james.mailbox.extractor.TextExtractor;
import org.apache.james.mailbox.model.ContentType;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

/* loaded from: input_file:org/apache/james/mailbox/store/search/PDFTextExtractor.class */
public class PDFTextExtractor implements TextExtractor {
    static final ContentType.MimeType PDF_TYPE = ContentType.MimeType.of("application/pdf");

    public ParsedContent extractContent(InputStream inputStream, ContentType contentType) throws Exception {
        Preconditions.checkNotNull(inputStream);
        Preconditions.checkNotNull(contentType);
        return isPDF(contentType) ? extractTextFromPDF(inputStream) : ParsedContent.of(IOUtils.toString(inputStream, StandardCharsets.UTF_8));
    }

    private boolean isPDF(ContentType contentType) {
        return contentType.mimeType().equals(PDF_TYPE);
    }

    private ParsedContent extractTextFromPDF(InputStream inputStream) throws IOException {
        return ParsedContent.of(new PDFTextStripper().getText(PDDocument.load(inputStream)));
    }
}
