friendica-addons/tesseract/tesseract.php

39 lines
1.0 KiB
PHP
Raw Normal View History

2024-01-14 19:21:08 +00:00
<?php
/**
* Name: Tesseract OCR
* Description: Use OCR to get text from images
* Version: 0.1
* Author: Michael Vogel <http://pirati.ca/profile/heluecht>
*/
use Friendica\Core\Hook;
use Friendica\Core\Logger;
use Friendica\Core\System;
use thiagoalessio\TesseractOCR\TesseractOCR;
require_once __DIR__ . DIRECTORY_SEPARATOR . 'vendor' . DIRECTORY_SEPARATOR . 'autoload.php';
function tesseract_install()
{
Hook::register('ocr-detection', __FILE__, 'tesseract_ocr_detection');
Logger::notice('installed tesseract');
}
function tesseract_ocr_detection(&$media)
{
$ocr = new TesseractOCR();
try {
2024-01-22 18:01:51 +00:00
$languages = $ocr->availableLanguages();
if ($languages) {
2024-12-09 23:08:31 +00:00
/** @phpstan-ignore-next-line ignore call of \thiagoalessio\TesseractOCR\Option::lang() */
2024-01-22 18:01:51 +00:00
$ocr->lang(implode('+', $languages));
}
2024-01-14 19:21:08 +00:00
$ocr->tempDir(System::getTempPath());
$ocr->imageData($media['img_str'], strlen($media['img_str']));
$media['description'] = $ocr->run();
} catch (\Throwable $th) {
Logger::info('Error calling TesseractOCR', ['message' => $th->getMessage()]);
2024-12-09 23:08:31 +00:00
}
2024-01-14 19:21:08 +00:00
}