Files
thalim-plugin-hal-importer/includes/class-hal-api.php
2026-05-12 23:33:56 +02:00

138 lines
5.0 KiB
PHP

<?php
/**
* HAL API Class - Handles communication with HAL API
*/
if (!defined('ABSPATH')) {
exit;
}
class Thalim_HAL_API {
// Fields to retrieve from HAL (expanded for import preview)
private const FIELDS = 'halId_s,title_s,docType_s,authFullName_s,authIdHal_s,publicationDate_s,producedDate_s,submittedDate_s,journalTitle_s,bookTitle_s,uri_s,fileMain_s,abstract_s,keyword_s,publisher_s,modifiedDate_s,doiId_s,citationFull_s,conferenceTitle_s,city_s,country_s,defenseDate_s';
/**
* Fetch publications from THALIM structure
*
* @param int $rows Number of results to fetch
* @param int $start Offset
* @param string $sort Solr sort expression
* @param string $date_from Filter start date (YYYY-MM-DD), empty = no lower bound
* @param string $date_to Filter end date (YYYY-MM-DD), empty = no upper bound
*/
public function fetch_publications($rows = 100, $start = 0, $sort = 'producedDate_tdate desc', $date_from = '', $date_to = '', $author_hal_id = '') {
$url = $this->build_url($rows, $start, $sort, $date_from, $date_to, $author_hal_id);
return $this->request($url);
}
/**
* Fetch full HAL docs by a list of hal_ids (batched).
* Uses Solr fq=halId_s:(id1 OR id2 OR ...) syntax. No structId filter —
* fetch by halId exact, regardless of structure.
*
* @param string[] $hal_ids HAL IDs to fetch.
* @param int $batch Batch size (default 100).
* @return array|WP_Error Array keyed by halId_s, or WP_Error on failure.
*/
public function fetch_by_hal_ids(array $hal_ids, int $batch = 100) {
$docs = [];
$chunks = array_chunk(array_values(array_unique($hal_ids)), $batch);
foreach ($chunks as $chunk) {
$filter = 'halId_s:(' . implode(' OR ', $chunk) . ')';
$params = [
'q=' . urlencode('*:*'),
'fq=' . urlencode($filter),
'rows=' . count($chunk),
'fl=' . urlencode(self::FIELDS),
'wt=json',
];
$url = THALIM_HAL_API_BASE . '?' . implode('&', $params);
$data = $this->request($url);
if (is_wp_error($data)) return $data;
foreach ($data['response']['docs'] ?? [] as $doc) {
if (!empty($doc['halId_s'])) {
$docs[$doc['halId_s']] = $doc;
}
}
// Be polite with HAL if we have multiple chunks
if (count($chunks) > 1) usleep(250000);
}
return $docs;
}
/**
* Test API connection
*/
public function test_connection() {
$result = $this->fetch_publications(5);
if (is_wp_error($result)) return $result;
return [
'success' => true,
'total' => $result['response']['numFound'] ?? 0,
'sample' => $result['response']['docs'] ?? []
];
}
/**
* Build API URL with proper fq parameter handling
*
* @param int $rows
* @param int $start
* @param string $sort
* @param string $date_from YYYY-MM-DD or empty
* @param string $date_to YYYY-MM-DD or empty
*/
private function build_url($rows = 5, $start = 0, $sort = 'modifiedDate_tdate desc', $date_from = '', $date_to = '', $author_hal_id = '') {
$doc_types = implode(' OR ', THALIM_HAL_DOC_TYPES);
$from = $date_from ? $date_from . 'T00:00:00Z' : '*';
$to = $date_to ? $date_to . 'T23:59:59Z' : '*';
$params = [
'q=' . urlencode('*:*'),
'fq=' . urlencode('structId_i:' . THALIM_HAL_STRUCT_ID),
'fq=' . urlencode('docType_s:(' . $doc_types . ')'),
];
if ($from !== '*' || $to !== '*') {
$params[] = 'fq=' . urlencode('producedDate_tdate:[' . $from . ' TO ' . $to . ']');
}
if ($author_hal_id !== '') {
$params[] = 'fq=' . urlencode('authIdHal_s:' . $author_hal_id);
}
$params = array_merge($params, [
'rows=' . intval($rows),
'start=' . intval($start),
'sort=' . urlencode($sort),
'fl=' . urlencode(self::FIELDS),
'wt=json'
]);
return THALIM_HAL_API_BASE . '?' . implode('&', $params);
}
/**
* Get API URL for debugging display
*/
public function get_api_url($rows = 5) {
return $this->build_url($rows, 0);
}
/**
* Make HTTP request
*/
private function request($url) {
$response = wp_remote_get($url, ['timeout' => 30, 'headers' => ['Accept' => 'application/json']]);
if (is_wp_error($response)) return $response;
$code = wp_remote_retrieve_response_code($response);
if ($code !== 200) return new WP_Error('api_error', "HTTP $code");
$data = json_decode(wp_remote_retrieve_body($response), true);
return json_last_error() === JSON_ERROR_NONE ? $data : new WP_Error('json_error', 'Invalid JSON');
}
}