build_url($rows, $start, $sort, $date_from, $date_to, $author_hal_id); return $this->request($url); } /** * Fetch full HAL docs by a list of hal_ids (batched). * Uses Solr fq=halId_s:(id1 OR id2 OR ...) syntax. No structId filter — * fetch by halId exact, regardless of structure. * * @param string[] $hal_ids HAL IDs to fetch. * @param int $batch Batch size (default 100). * @return array|WP_Error Array keyed by halId_s, or WP_Error on failure. */ public function fetch_by_hal_ids(array $hal_ids, int $batch = 100) { $docs = []; // HAL Solr's halId_s is the canonical ID without a version suffix // (e.g. "hal-03583975", not "hal-03583975v2"). Some legacy SPIP entries // carry a version suffix, so strip it before querying and keep a map // to re-key the result under the original caller-supplied ID. $originals = array_values(array_unique($hal_ids)); $stripped_map = []; // stripped_id => [original_id, ...] foreach ($originals as $orig) { $stripped = preg_replace('/v\d+$/', '', $orig); $stripped_map[$stripped][] = $orig; } $query_ids = array_keys($stripped_map); $chunks = array_chunk($query_ids, $batch); foreach ($chunks as $chunk) { $filter = 'halId_s:(' . implode(' OR ', $chunk) . ')'; $params = [ 'q=' . urlencode('*:*'), 'fq=' . urlencode($filter), 'rows=' . count($chunk), 'fl=' . urlencode(self::FIELDS), 'wt=json', ]; $url = THALIM_HAL_API_BASE . '?' . implode('&', $params); $data = $this->request($url); if (is_wp_error($data)) return $data; foreach ($data['response']['docs'] ?? [] as $doc) { $canonical = $doc['halId_s'] ?? ''; if ($canonical === '') continue; // Key the doc under every original ID that stripped to this canonical form foreach ($stripped_map[$canonical] ?? [$canonical] as $orig) { $docs[$orig] = $doc; } } // Be polite with HAL if we have multiple chunks if (count($chunks) > 1) usleep(250000); } return $docs; } /** * Check which HAL IDs exist in HAL's author referential (ref/author). * Returns map [normalized_hal_id => bool|null] — null when the API errored * (treated as "unknown", not "invalid"). */ public function validate_hal_ids(array $hal_ids) { $result = []; $clean = []; foreach ($hal_ids as $id) { $id = trim((string) $id); if ($id !== '') $clean[strtolower($id)] = $id; } if (empty($clean)) return $result; // Default all to false; flip to true when found foreach ($clean as $norm => $_) $result[$norm] = false; $endpoint = 'https://api.archives-ouvertes.fr/ref/author/'; $chunks = array_chunk(array_values($clean), 100); foreach ($chunks as $chunk) { // Solr OR with quoted values to tolerate dashes/dots in slugs $quoted = array_map(fn($id) => '"' . str_replace('"', '', $id) . '"', $chunk); $params = [ 'q=' . urlencode('idHal_s:(' . implode(' OR ', $quoted) . ')'), 'fl=' . urlencode('idHal_s'), 'rows=' . count($chunk), 'wt=json', ]; $data = $this->request($endpoint . '?' . implode('&', $params)); if (is_wp_error($data)) { foreach ($chunk as $id) $result[strtolower($id)] = null; continue; } foreach ($data['response']['docs'] ?? [] as $doc) { if (!empty($doc['idHal_s'])) { $result[strtolower($doc['idHal_s'])] = true; } } if (count($chunks) > 1) usleep(250000); } return $result; } /** * Test API connection */ public function test_connection() { $result = $this->fetch_publications(5); if (is_wp_error($result)) return $result; return [ 'success' => true, 'total' => $result['response']['numFound'] ?? 0, 'sample' => $result['response']['docs'] ?? [] ]; } /** * Build API URL with proper fq parameter handling * * @param int $rows * @param int $start * @param string $sort * @param string $date_from YYYY-MM-DD or empty * @param string $date_to YYYY-MM-DD or empty */ private function build_url($rows = 5, $start = 0, $sort = 'modifiedDate_tdate desc', $date_from = '', $date_to = '', $author_hal_id = '') { $doc_types = implode(' OR ', THALIM_HAL_DOC_TYPES); $from = $date_from ? $date_from . 'T00:00:00Z' : '*'; $to = $date_to ? $date_to . 'T23:59:59Z' : '*'; $params = [ 'q=' . urlencode('*:*'), 'fq=' . urlencode('structId_i:' . THALIM_HAL_STRUCT_ID), 'fq=' . urlencode('docType_s:(' . $doc_types . ')'), ]; if ($from !== '*' || $to !== '*') { $params[] = 'fq=' . urlencode('producedDate_tdate:[' . $from . ' TO ' . $to . ']'); } if ($author_hal_id !== '') { $params[] = 'fq=' . urlencode('authIdHal_s:' . $author_hal_id); } $params = array_merge($params, [ 'rows=' . intval($rows), 'start=' . intval($start), 'sort=' . urlencode($sort), 'fl=' . urlencode(self::FIELDS), 'wt=json' ]); return THALIM_HAL_API_BASE . '?' . implode('&', $params); } /** * Get API URL for debugging display */ public function get_api_url($rows = 5) { return $this->build_url($rows, 0); } /** * Make HTTP request */ private function request($url) { $response = wp_remote_get($url, ['timeout' => 30, 'headers' => ['Accept' => 'application/json']]); if (is_wp_error($response)) return $response; $code = wp_remote_retrieve_response_code($response); if ($code !== 200) return new WP_Error('api_error', "HTTP $code"); $data = json_decode(wp_remote_retrieve_body($response), true); return json_last_error() === JSON_ERROR_NONE ? $data : new WP_Error('json_error', 'Invalid JSON'); } }