Rapatriement modifs PROD : types de docs étendus (NOTICE, BLOG, TRAD, REPORT, UNDEFINED, POSTER, OTHER) + mapping catégories + fonction_auteur TRAD + strip suffixe version

This commit is contained in:
2026-06-02 19:12:13 +02:00
parent f93fe77a12
commit 20e2158612
4 changed files with 38 additions and 4 deletions

View File

@@ -37,7 +37,20 @@ class Thalim_HAL_API {
*/
public function fetch_by_hal_ids(array $hal_ids, int $batch = 100) {
$docs = [];
$chunks = array_chunk(array_values(array_unique($hal_ids)), $batch);
// HAL Solr's halId_s is the canonical ID without a version suffix
// (e.g. "hal-03583975", not "hal-03583975v2"). Some legacy SPIP entries
// carry a version suffix, so strip it before querying and keep a map
// to re-key the result under the original caller-supplied ID.
$originals = array_values(array_unique($hal_ids));
$stripped_map = []; // stripped_id => [original_id, ...]
foreach ($originals as $orig) {
$stripped = preg_replace('/v\d+$/', '', $orig);
$stripped_map[$stripped][] = $orig;
}
$query_ids = array_keys($stripped_map);
$chunks = array_chunk($query_ids, $batch);
foreach ($chunks as $chunk) {
$filter = 'halId_s:(' . implode(' OR ', $chunk) . ')';
$params = [
@@ -51,8 +64,11 @@ class Thalim_HAL_API {
$data = $this->request($url);
if (is_wp_error($data)) return $data;
foreach ($data['response']['docs'] ?? [] as $doc) {
if (!empty($doc['halId_s'])) {
$docs[$doc['halId_s']] = $doc;
$canonical = $doc['halId_s'] ?? '';
if ($canonical === '') continue;
// Key the doc under every original ID that stripped to this canonical form
foreach ($stripped_map[$canonical] ?? [$canonical] as $orig) {
$docs[$orig] = $doc;
}
}
// Be polite with HAL if we have multiple chunks