"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.supportedMimeTypes = void 0;
const locale_1 = require("../../locale");
const Resource_1 = require("../../models/Resource");
const Setting_1 = require("../../models/Setting");
const shim_1 = require("../../shim");
const types_1 = require("../database/types");
const types_2 = require("./utils/types");
const time_1 = require("@joplin/utils/time");
const Logger_1 = require("@joplin/utils/Logger");
const TaskQueue_1 = require("../../TaskQueue");
const eventManager_1 = require("../../eventManager");
const logger = Logger_1.default.create('OcrService');
// From: https://github.com/naptha/tesseract.js/blob/master/docs/image-format.md
exports.supportedMimeTypes = [
    'application/pdf',
    'image/bmp',
    'image/jpeg',
    'image/jpg',
    'image/png',
    'image/webp',
    'image/x-portable-bitmap',
];
const resourceInfo = (resource) => {
    return `${resource.id} (type ${resource.mime})`;
};
const getOcrDriverId = (resource) => {
    // Default to PrintedText. When syncing with certain (older?) clients, resources can be assigned an
    // ocr_driver_id of zero.
    // https://github.com/laurent22/joplin/issues/13043
    return resource.ocr_driver_id === 0 ? types_1.ResourceOcrDriverId.PrintedText : resource.ocr_driver_id;
};
class OcrService {
    constructor(drivers) {
        this.isRunningInBackground_ = false;
        // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
        this.maintenanceTimer_ = null;
        this.pdfExtractDir_ = null;
        this.isProcessingResources_ = false;
        this.printedTextQueue_ = null;
        this.handwrittenTextQueue_ = null;
        this.drivers_ = drivers;
        this.printedTextQueue_ = new TaskQueue_1.default('printed', logger);
        this.printedTextQueue_.setConcurrency(5);
        this.printedTextQueue_.keepTaskResults = false;
        this.handwrittenTextQueue_ = new TaskQueue_1.default('handwritten', logger);
        this.handwrittenTextQueue_.setConcurrency(1);
        this.handwrittenTextQueue_.keepTaskResults = false;
    }
    async pdfExtractDir() {
        if (this.pdfExtractDir_ !== null)
            return this.pdfExtractDir_;
        const p = `${Setting_1.default.value('tempDir')}/ocr_pdf_extract`;
        await shim_1.default.fsDriver().mkdir(p);
        this.pdfExtractDir_ = p;
        return this.pdfExtractDir_;
    }
    get running() {
        return this.runInBackground;
    }
    async recognize(language, resource) {
        if (resource.encryption_applied)
            throw new Error(`Cannot OCR encrypted resource: ${resource.id}`);
        if (getOcrDriverId(resource) === types_1.ResourceOcrDriverId.HandwrittenText && !Setting_1.default.value('ocr.handwrittenTextDriverEnabled')) {
            logger.debug('Skipping OCR of', resource.id, 'with the HandwrittenText driver. The HTR driver has been disabled by the user.');
            return null;
        }
        const resourceFilePath = Resource_1.default.fullPath(resource);
        const driver = this.drivers_.find(d => d.driverId === getOcrDriverId(resource));
        if (!driver)
            throw new Error(`Unknown driver ID: ${resource.ocr_driver_id}`);
        if (resource.mime === 'application/pdf') {
            // OCR can be slow for large PDFs.
            // Skip it if the PDF already includes text.
            const pageTexts = await shim_1.default.pdfExtractEmbeddedText(resourceFilePath);
            const pagesWithText = pageTexts.filter(text => !!text.trim().length);
            if (pagesWithText.length > 0) {
                return Object.assign(Object.assign({}, (0, types_2.emptyRecognizeResult)()), { ocr_status: types_1.ResourceOcrStatus.Done, ocr_text: pageTexts.join('\n') });
            }
            const imageFilePaths = await shim_1.default.pdfToImages(resourceFilePath, await this.pdfExtractDir());
            const results = [];
            let pageIndex = 0;
            for (const imageFilePath of imageFilePaths) {
                logger.info(`Recognize: ${resourceInfo(resource)}: Processing PDF page ${pageIndex + 1} / ${imageFilePaths.length}...`);
                results.push(await driver.recognize(language, imageFilePath, resource.id));
                pageIndex++;
            }
            for (const imageFilePath of imageFilePaths) {
                await shim_1.default.fsDriver().remove(imageFilePath);
            }
            return Object.assign(Object.assign({}, (0, types_2.emptyRecognizeResult)()), { ocr_status: types_1.ResourceOcrStatus.Done, ocr_text: results.map(r => r.ocr_text).join('\n') });
        }
        else {
            return driver.recognize(language, resourceFilePath, resource.id);
        }
    }
    async dispose() {
        for (const d of this.drivers_) {
            await d.dispose();
        }
    }
    async processResources() {
        if (this.isProcessingResources_)
            return;
        this.isProcessingResources_ = true;
        const totalResourcesToProcess = await Resource_1.default.needOcrCount(exports.supportedMimeTypes);
        const skippedResourceIds = [];
        logger.info(`Found ${totalResourcesToProcess} resources to process...`);
        const makeQueueAction = (totalProcessed, language, resource) => {
            return async () => {
                logger.info(`Processing resource ${totalProcessed + 1} / ${totalResourcesToProcess}: ${resourceInfo(resource)}...`);
                let toSave = {
                    id: resource.id,
                };
                try {
                    const fetchStatus = await Resource_1.default.localState(resource.id);
                    if (fetchStatus.fetch_status === Resource_1.default.FETCH_STATUS_ERROR) {
                        throw new Error(`Cannot process resource ${resourceInfo(resource)} because it cannot be fetched from the server: ${fetchStatus.fetch_error}`);
                    }
                    if (fetchStatus.fetch_status !== Resource_1.default.FETCH_STATUS_DONE) {
                        skippedResourceIds.push(resource.id);
                        logger.info(`Skipping resource ${resourceInfo(resource)} because it has not been downloaded yet`);
                        return;
                    }
                    const recognizeResult = await this.recognize(language, resource);
                    if (recognizeResult) {
                        toSave = Object.assign(Object.assign({}, toSave), recognizeResult);
                    }
                }
                catch (error) {
                    const errorMessage = typeof error === 'string' ? error : error === null || error === void 0 ? void 0 : error.message;
                    logger.warn(`Could not process resource ${resourceInfo(resource)}`, error);
                    toSave.ocr_status = types_1.ResourceOcrStatus.Error;
                    toSave.ocr_text = '';
                    toSave.ocr_details = '';
                    toSave.ocr_error = errorMessage || 'Unknown error';
                }
                await Resource_1.default.save(toSave);
            };
        };
        try {
            const language = (0, locale_1.toIso639Alpha3)(Setting_1.default.value('locale'));
            const processedResourceIds = [];
            // Queue all resources for processing
            let lastProcessedCount = -1;
            while (processedResourceIds.length > lastProcessedCount) {
                lastProcessedCount = processedResourceIds.length;
                const resources = await Resource_1.default.needOcr(exports.supportedMimeTypes, skippedResourceIds.concat(processedResourceIds), 100, {
                    fields: [
                        'id',
                        'mime',
                        'file_extension',
                        'encryption_applied',
                        'ocr_driver_id',
                    ],
                });
                for (const resource of resources) {
                    const makeCurrentQueueAction = () => makeQueueAction(processedResourceIds.length, language, resource);
                    let processed = true;
                    if (getOcrDriverId(resource) === types_1.ResourceOcrDriverId.PrintedText) {
                        await this.printedTextQueue_.pushAsync(resource.id, makeCurrentQueueAction());
                    }
                    else if (getOcrDriverId(resource) === types_1.ResourceOcrDriverId.HandwrittenText) {
                        await this.handwrittenTextQueue_.pushAsync(resource.id, makeCurrentQueueAction());
                    }
                    else {
                        logger.info('Skipped processing', resource.id, 'with OCR: Unsupported ocr_driver_id', resource.ocr_driver_id);
                        processed = false;
                    }
                    if (processed) {
                        processedResourceIds.push(resource.id);
                    }
                    else {
                        skippedResourceIds.push(resource.id);
                    }
                }
            }
            // Wait for processing to finish
            await this.printedTextQueue_.waitForAll();
            await this.handwrittenTextQueue_.waitForAll();
            const totalProcessed = processedResourceIds.length;
            if (totalProcessed) {
                eventManager_1.default.emit(eventManager_1.EventName.OcrServiceResourcesProcessed);
            }
            logger.info(`${totalProcessed} resources have been processed.`);
        }
        finally {
            this.isProcessingResources_ = false;
        }
    }
    async maintenance() {
        await this.processResources();
    }
    async runInBackground() {
        if (this.isRunningInBackground_)
            return;
        this.isRunningInBackground_ = true;
        if (this.maintenanceTimer_)
            return;
        logger.info('Starting background service...');
        await this.maintenance();
        this.maintenanceTimer_ = shim_1.default.setInterval(async () => {
            await this.maintenance();
            this.maintenanceTimer_ = null;
        }, 5 * time_1.Minute);
    }
    async stopRunInBackground() {
        logger.info('Stopping background service...');
        if (this.maintenanceTimer_)
            shim_1.default.clearInterval(this.maintenanceTimer_);
        this.maintenanceTimer_ = null;
        this.isRunningInBackground_ = false;
        await this.printedTextQueue_.stop();
        await this.handwrittenTextQueue_.stop();
    }
}
exports.default = OcrService;
//# sourceMappingURL=OcrService.js.map