New addon "tesseract" for OCR

This commit is contained in:
Michael 2024-01-14 19:21:08 +00:00 committed by Hypolite Petovan
parent a179bab747
commit 4dd903b473
28 changed files with 1904 additions and 0 deletions

33
tesseract/tesseract.php Normal file
View file

@ -0,0 +1,33 @@
<?php
/**
* Name: Tesseract OCR
* Description: Use OCR to get text from images
* Version: 0.1
* Author: Michael Vogel <http://pirati.ca/profile/heluecht>
*/
use Friendica\Core\Hook;
use Friendica\Core\Logger;
use Friendica\Core\System;
use thiagoalessio\TesseractOCR\TesseractOCR;
require_once __DIR__ . DIRECTORY_SEPARATOR . 'vendor' . DIRECTORY_SEPARATOR . 'autoload.php';
function tesseract_install()
{
Hook::register('ocr-detection', __FILE__, 'tesseract_ocr_detection');
Logger::notice('installed tesseract');
}
function tesseract_ocr_detection(&$media)
{
$ocr = new TesseractOCR();
try {
$ocr->tempDir(System::getTempPath());
$ocr->imageData($media['img_str'], strlen($media['img_str']));
$media['description'] = $ocr->run();
} catch (\Throwable $th) {
Logger::info('Error calling TesseractOCR', ['message' => $th->getMessage()]);
}
}