<?php
/**
 * Name: Compact Language Detector
 * Description: Improved language detection
 * Version: 0.1
 * Author: Michael Vogel <heluecht@pirati.ca>
 */

use Friendica\Core\Hook;
use Friendica\Core\Logger;
use Friendica\DI;

function cld_install()
{
	Hook::register('detect_languages', 'addon/cld/cld.php', 'cld_detect_languages');
}

function cld_detect_languages(array &$data)
{
	if (!in_array('cld2', get_loaded_extensions())) {
		Logger::warning('CLD2 is not installed.');
		return;
	}

	$cld2 = new \CLD2Detector();

	$cld2->setEncodingHint(CLD2Encoding::UTF8); // optional, hints about text encoding

	$result = $cld2->detect($data['text']);
	
	if ($data['detected']) {
		$original = array_key_first($data['detected']);
	} else {
		$original = '';
	}

	$detected = $result['language_code'];
	if ($detected == 'pt') {
		$detected = 'pt-PT';
	} elseif ($detected == 'el') {
		$detected = 'el-monoton';
	} elseif ($detected == 'no') {
		$detected = 'nb';
	} elseif ($detected == 'zh') {
		$detected = 'zh-Hans';
	} elseif ($detected == 'zh-Hant') {
		$detected = 'zh-hant';
	}

	if (!$result['is_reliable']) {
		Logger::debug('Unreliable detection', ['original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
		return;
	}

	if ($original == $detected) {
		return;
	}

	$available = array_keys(DI::l10n()->convertForLanguageDetection(DI::l10n()->getAvailableLanguages(true)));
	
	if (!in_array($detected, $available)) {
		Logger::debug('Unsupported language', ['original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
		return;
	}

	Logger::debug('Detected different language', ['original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
	$data['detected'] = [$detected => $result['language_probability'] / 100];
}