Rapatriement modifs PROD : types de docs étendus (NOTICE, BLOG, TRAD, REPORT, UNDEFINED, POSTER, OTHER) + mapping catégories + fonction_auteur TRAD + strip suffixe version
This commit is contained in:
@@ -37,7 +37,20 @@ class Thalim_HAL_API {
|
||||
*/
|
||||
public function fetch_by_hal_ids(array $hal_ids, int $batch = 100) {
|
||||
$docs = [];
|
||||
$chunks = array_chunk(array_values(array_unique($hal_ids)), $batch);
|
||||
|
||||
// HAL Solr's halId_s is the canonical ID without a version suffix
|
||||
// (e.g. "hal-03583975", not "hal-03583975v2"). Some legacy SPIP entries
|
||||
// carry a version suffix, so strip it before querying and keep a map
|
||||
// to re-key the result under the original caller-supplied ID.
|
||||
$originals = array_values(array_unique($hal_ids));
|
||||
$stripped_map = []; // stripped_id => [original_id, ...]
|
||||
foreach ($originals as $orig) {
|
||||
$stripped = preg_replace('/v\d+$/', '', $orig);
|
||||
$stripped_map[$stripped][] = $orig;
|
||||
}
|
||||
$query_ids = array_keys($stripped_map);
|
||||
|
||||
$chunks = array_chunk($query_ids, $batch);
|
||||
foreach ($chunks as $chunk) {
|
||||
$filter = 'halId_s:(' . implode(' OR ', $chunk) . ')';
|
||||
$params = [
|
||||
@@ -51,8 +64,11 @@ class Thalim_HAL_API {
|
||||
$data = $this->request($url);
|
||||
if (is_wp_error($data)) return $data;
|
||||
foreach ($data['response']['docs'] ?? [] as $doc) {
|
||||
if (!empty($doc['halId_s'])) {
|
||||
$docs[$doc['halId_s']] = $doc;
|
||||
$canonical = $doc['halId_s'] ?? '';
|
||||
if ($canonical === '') continue;
|
||||
// Key the doc under every original ID that stripped to this canonical form
|
||||
foreach ($stripped_map[$canonical] ?? [$canonical] as $orig) {
|
||||
$docs[$orig] = $doc;
|
||||
}
|
||||
}
|
||||
// Be polite with HAL if we have multiple chunks
|
||||
|
||||
Reference in New Issue
Block a user