CLD: Keep the original detected language array
parent
fbafa80815
commit
73c6a0ff0c
22
cld/cld.php
22
cld/cld.php
|
@ -28,7 +28,7 @@ function cld_detect_languages(array &$data)
|
||||||
$cld2->setPlainText(true);
|
$cld2->setPlainText(true);
|
||||||
|
|
||||||
$result = $cld2->detect($data['text']);
|
$result = $cld2->detect($data['text']);
|
||||||
|
|
||||||
if ($data['detected']) {
|
if ($data['detected']) {
|
||||||
$original = array_key_first($data['detected']);
|
$original = array_key_first($data['detected']);
|
||||||
} else {
|
} else {
|
||||||
|
@ -69,10 +69,9 @@ function cld_detect_languages(array &$data)
|
||||||
|
|
||||||
if (!$result['is_reliable']) {
|
if (!$result['is_reliable']) {
|
||||||
Logger::debug('Unreliable detection', ['uri-id' => $data['uri-id'], 'original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
|
Logger::debug('Unreliable detection', ['uri-id' => $data['uri-id'], 'original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
|
||||||
return;
|
if (($original == $detected) && ($data['detected'][$original] < $result['language_probability'] / 100)) {
|
||||||
}
|
$data['detected'][$original] = $result['language_probability'] / 100;
|
||||||
|
}
|
||||||
if ($original == $detected) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -83,6 +82,15 @@ function cld_detect_languages(array &$data)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Logger::debug('Detected different language', ['uri-id' => $data['uri-id'], 'original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
|
if ($original != $detected) {
|
||||||
$data['detected'] = [$detected => $result['language_probability'] / 100];
|
Logger::debug('Detected different language', ['uri-id' => $data['uri-id'], 'original' => $original, 'detected' => $detected, 'name' => $result['language_name'], 'probability' => $result['language_probability'], 'text' => $data['text']]);
|
||||||
|
}
|
||||||
|
|
||||||
|
$length = count($data['detected']);
|
||||||
|
if ($length > 0) {
|
||||||
|
unset($data['detected'][$detected]);
|
||||||
|
$data['detected'] = array_merge([$detected => $result['language_probability'] / 100], array_slice($data['detected'], 0, $length - 1));
|
||||||
|
} else {
|
||||||
|
$data['detected'] = [$detected => $result['language_probability'] / 100];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue