['id' => int, 'name' => string]
// Document type labels
private const DOC_TYPE_LABELS = [
'ART' => 'Article',
'COUV' => "Chapitre d'ouvrage",
'OUV' => 'Ouvrage',
'COMM' => 'Communication',
'ISSUE' => 'Direction de numéro',
'PROCEEDINGS' => 'Colloque',
'THESE' => 'Thèse',
'HDR' => 'HDR',
'SON' => 'Son',
'VIDEO' => 'Vidéo',
];
public function __construct() {
$this->api = new Thalim_HAL_API();
}
public function render() {
if (!current_user_can('edit_others_posts')) {
wp_die('Unauthorized');
}
$this->handle_actions();
echo '
THALIM HAL Importer
';
$this->render_styles();
$this->render_message();
$this->render_config();
$this->render_preview();
$this->render_csv_import();
echo '';
}
private function handle_actions() {
if (!isset($_POST['thalim_hal_action'])) return;
if (!wp_verify_nonce($_POST['_wpnonce'] ?? '', 'thalim_hal_action')) {
$this->message = ['error', 'Security check failed.'];
return;
}
$action = sanitize_text_field($_POST['thalim_hal_action']);
if ($action === 'test_api') {
$result = $this->api->test_connection();
$this->message = is_wp_error($result)
? ['error', 'API Error: ' . $result->get_error_message()]
: ['success', "Connection OK! Found {$result['total']} publications."];
}
if ($action === 'refresh') {
// Clear all preview transients (they are keyed by date range hash)
global $wpdb;
$wpdb->query("DELETE FROM {$wpdb->options} WHERE option_name LIKE '_transient_thalim_hal_preview_%'");
$wpdb->query("DELETE FROM {$wpdb->options} WHERE option_name LIKE '_transient_timeout_thalim_hal_preview_%'");
$this->message = ['success', 'Preview data refreshed from HAL API.'];
}
if ($action === 'import_pending') {
$this->handle_import();
}
if ($action === 'csv_upload') $this->handle_csv_upload();
if ($action === 'csv_batch') $this->handle_csv_batch();
if ($action === 'csv_cancel') $this->handle_csv_cancel();
if ($action === 'csv_download_report') $this->handle_csv_download_report();
}
/**
* Handle bulk import of ready publications as pending posts.
* Uses cached raw HAL docs to avoid a second outbound API call.
*/
private function handle_import() {
$date_from = sanitize_text_field($_POST['hal_date_from'] ?? '');
$date_to = sanitize_text_field($_POST['hal_date_to'] ?? '');
$author_hal_id = sanitize_text_field($_POST['hal_author_id'] ?? '');
// Reuse the cached preview data — raw_docs are stored alongside processed docs
$preview = $this->get_preview_data($date_from, $date_to, $author_hal_id);
if (is_wp_error($preview)) {
$this->message = ['error', 'API Error: ' . $preview->get_error_message()];
return;
}
$raw_docs = $preview['raw_docs'] ?? [];
if (empty($raw_docs)) {
$this->message = ['warning', 'Aucune publication dans le cache. Utilisez Filtrer pour charger les données d\'abord.'];
return;
}
$this->load_wp_users_hal_ids();
$importer = new Thalim_HAL_Importer_Logic();
$imported = 0;
$skipped = 0;
$errors = [];
foreach ($raw_docs as $doc) {
$hal_id = $doc['halId_s'] ?? '';
$author_hal_ids = $doc['authIdHal_s'] ?? [];
$matched_users = $this->match_authors_to_users($author_hal_ids);
if (empty($matched_users) || $importer->is_imported($hal_id)) {
$skipped++;
continue;
}
$post_id = $importer->import($doc, $this->wp_users_by_hal_id);
if (is_wp_error($post_id)) {
$errors[] = $hal_id . ': ' . $post_id->get_error_message();
} else {
$imported++;
}
}
$msg = sprintf('%d publication(s) importée(s) en statut "En attente".', $imported);
if ($skipped) $msg .= sprintf(' %d ignorée(s) (déjà importées ou sans membre THALIM correspondant).', $skipped);
if (!empty($errors)) $msg .= ' Erreurs : ' . implode('; ', $errors);
$this->message = [empty($errors) ? 'success' : 'warning', $msg];
}
private function render_styles() {
?>
message) return;
printf('',
esc_attr($this->message[0]), esc_html($this->message[1]));
}
private function render_config() {
?>
load_wp_users_hal_ids();
$preview = $this->get_preview_data($date_from, $date_to, $author_hal_id);
$ready_count = is_wp_error($preview) ? 0 : $preview['stats']['ready'];
?>
Import Preview
render_wp_users_debug(); ?>
render_summary($preview['stats']); ?>
render_preview_table($preview['docs']); ?>
render_legend(); ?>
Total in HAL
Already Imported
Ready to Import
No Matched User
No publications found.';
return;
}
?>
| Statut |
HAL ID |
Titre |
Type |
Auteurs |
IDs HAL auteurs |
Date |
Membres THALIM |
Lien HAL |
| get_status_icon($doc); ?> |
|
|
|
|
wp_users_by_hal_id[$normalized]);
?>
aucun
|
|
Aucun
|
Voir sur HAL
|
Légende :
✓ Importé
★ Prêt Membre THALIM identifié
✗ Bloqué Aucun membre THALIM ne correspond aux IDs auteurs HAL
load_wp_users_hal_ids();
if (empty($this->wp_users_by_hal_id)) {
echo 'Aucun utilisateur WordPress n\'a le champ identifiant_hal renseigné.
';
return;
}
?>
Utilisateurs WordPress avec identifiant HAL (wp_users_by_hal_id); ?> utilisateurs) — Cliquer pour déplier
| Utilisateur | Identifiant HAL | Debug (brut) | Modifier |
wp_users_by_hal_id as $hal_id => $user): ?>
| (ID : ) |
|
"" ( car.) |
Modifier |
api->fetch_publications($rows, 0, 'producedDate_tdate desc', $date_from, $date_to, $author_hal_id);
if (is_wp_error($result)) return $result;
$importer = new Thalim_HAL_Importer_Logic();
$this->load_wp_users_hal_ids();
$preview = [
'stats' => [
'total' => $result['response']['numFound'] ?? 0,
'imported' => 0,
'ready' => 0,
'blocked' => 0
],
'docs' => [],
'raw_docs' => [], // Raw HAL docs kept for import, avoids a second API call
];
foreach ($result['response']['docs'] ?? [] as $doc) {
$hal_id = $doc['halId_s'] ?? '';
$is_imported = $importer->is_imported($hal_id);
$author_hal_ids = $doc['authIdHal_s'] ?? [];
$matched_users = $this->match_authors_to_users($author_hal_ids);
$has_match = !empty($matched_users);
// Update stats
if ($is_imported) {
$preview['stats']['imported']++;
} elseif ($has_match) {
$preview['stats']['ready']++;
} else {
$preview['stats']['blocked']++;
}
$preview['docs'][] = [
'hal_id' => $hal_id,
'title' => $doc['title_s'][0] ?? 'N/A',
'type' => $doc['docType_s'] ?? '',
'authors' => $doc['authFullName_s'] ?? [],
'author_hal_ids' => $author_hal_ids,
'publication_date' => $doc['publicationDate_s'] ?? '',
'produced_date' => $doc['submittedDate_s'] ?? '',
'journal' => $doc['journalTitle_s'] ?? $doc['bookTitle_s'] ?? '',
'url' => $doc['uri_s'] ?? '',
'is_imported' => $is_imported,
'matched_users' => $matched_users,
'has_match' => $has_match,
];
$preview['raw_docs'][] = $doc; // Full HAL doc kept for import
}
set_transient($cache_key, $preview, 300);
return $preview;
}
/**
* Load all WordPress users with HAL IDs into cache.
* Stores: normalized_hal_id => ['id' => int, 'name' => string]
*/
private function load_wp_users_hal_ids() {
if ($this->wp_users_by_hal_id !== null) return;
$this->wp_users_by_hal_id = [];
$users = get_users([
'meta_key' => 'identifiant_hal',
'meta_compare' => 'EXISTS'
]);
foreach ($users as $user) {
$hal_id = get_user_meta($user->ID, 'identifiant_hal', true);
if (!empty($hal_id)) {
$normalized = strtolower(trim($hal_id));
$this->wp_users_by_hal_id[$normalized] = [
'id' => $user->ID,
'name' => $user->display_name,
'hal_id' => trim($hal_id), // original value for API filter
];
}
}
}
/**
* Match HAL author IDs to WordPress users.
* Returns array of display names (for preview display).
*/
private function match_authors_to_users($author_hal_ids) {
$matched = [];
foreach ($author_hal_ids as $hal_id) {
$normalized = strtolower(trim($hal_id));
if (isset($this->wp_users_by_hal_id[$normalized])) {
$matched[] = $this->wp_users_by_hal_id[$normalized]['name'];
}
}
return $matched;
}
// ========================================================================
// CSV bulk import (phase 2 — legacy publications from SPIP)
// ========================================================================
private const CSV_QUEUE_OPTION = 'thalim_hal_csv_queue';
private const CSV_BATCH_SIZE = 100;
private function render_csv_import() {
$queue = get_option(self::CSV_QUEUE_OPTION, null);
?>
Import en masse depuis CSV
Uploader le couple hal-to-import.csv + hal-to-import-context.json
(généré par php scripts/prepare-csv-context.php) pour importer les publications legacy.
Chaque batch traite publications — cliquer plusieurs fois jusqu'à terminaison.
render_csv_progress($queue); ?>
0 ? round(100 * $done / $total, 1) : 0;
$report_ct = count($queue['report'] ?? []);
?>
File d'attente active — statut cible :
— backdate :
/
publications traitées (%)
— restantes
Dernière mise à jour :
0): ?>
message = ['error', 'CSV ou fichier contexte manquant.'];
return;
}
// Parse CSV -> list of hal_ids
$fh = fopen($_FILES['csv_file']['tmp_name'], 'r');
if (!$fh) { $this->message = ['error', 'Impossible de lire le CSV.']; return; }
$header = fgetcsv($fh);
$hal_col = array_search('hal_id', $header);
$spip_col = array_search('spip_id', $header);
if ($hal_col === false) {
fclose($fh);
$this->message = ['error', 'Header CSV : colonne hal_id manquante.'];
return;
}
$hal_ids = [];
$spip_map = []; // hal_id => spip_id
while (($row = fgetcsv($fh)) !== false) {
$hid = trim($row[$hal_col] ?? '');
if ($hid === '') continue;
$hal_ids[] = $hid;
if ($spip_col !== false) $spip_map[$hid] = trim($row[$spip_col] ?? '');
}
fclose($fh);
$hal_ids = array_values(array_unique($hal_ids));
// Parse JSON context
$ctx_raw = file_get_contents($_FILES['ctx_file']['tmp_name']);
$ctx_data = json_decode($ctx_raw, true);
if (!is_array($ctx_data) || !isset($ctx_data['ctx'])) {
$this->message = ['error', 'Fichier contexte JSON invalide.'];
return;
}
$status = ($_POST['post_status'] ?? 'publish') === 'pending' ? 'pending' : 'publish';
$backdate = !empty($_POST['backdate_post']);
$queue = [
'hal_ids' => $hal_ids,
'spip_map' => $spip_map,
'status' => $status,
'backdate' => $backdate,
'total' => count($hal_ids),
'done' => 0,
'spip_ctx' => $ctx_data['ctx'],
'wp_users_by_hal_id' => $ctx_data['wp_users_by_hal_id'] ?? [],
'report' => [],
'last_error' => '',
'updated_at' => current_time('mysql'),
];
update_option(self::CSV_QUEUE_OPTION, $queue, false);
$this->message = ['success', sprintf(
'CSV chargé : %d publications prêtes. Statut cible : %s. Cliquer "Traiter le prochain batch" pour lancer.',
count($hal_ids), $status
)];
}
private function handle_csv_batch(): void {
$queue = get_option(self::CSV_QUEUE_OPTION, null);
if (!$queue) { $this->message = ['error', 'Aucune queue active.']; return; }
$batch = array_slice($queue['hal_ids'], $queue['done'], self::CSV_BATCH_SIZE);
if (empty($batch)) {
$this->message = ['success', 'Import terminé — tous les batches ont été traités.'];
return;
}
$docs = $this->api->fetch_by_hal_ids($batch, self::CSV_BATCH_SIZE);
if (is_wp_error($docs)) {
$queue['last_error'] = $docs->get_error_message();
$queue['updated_at'] = current_time('mysql');
update_option(self::CSV_QUEUE_OPTION, $queue, false);
$this->message = ['error', 'Erreur HAL API : ' . $docs->get_error_message()];
return;
}
// Normalize wp_users_by_hal_id keys to lowercase for the importer
$users_map = [];
foreach ($queue['wp_users_by_hal_id'] as $hid => $u) {
$users_map[strtolower(trim((string) $hid))] = $u;
}
$importer = new Thalim_HAL_Importer_Logic();
$batch_imported = 0;
$batch_skipped = 0;
$batch_errors = 0;
foreach ($batch as $hal_id) {
$spip_id = $queue['spip_map'][$hal_id] ?? '';
$doc = $docs[$hal_id] ?? null;
$ctx = $queue['spip_ctx'][$hal_id] ?? [];
if (!$doc) {
$queue['report'][] = [$hal_id, $spip_id, '', 'not_found_in_hal', 'false', 'none', 'HAL API did not return this hal_id'];
$batch_errors++;
continue;
}
$post_id = $importer->import($doc, $users_map, $queue['status'], (bool) $queue['backdate'], $ctx);
if (is_wp_error($post_id)) {
$code = $post_id->get_error_code();
$queue['report'][] = [$hal_id, $spip_id, '', $code, 'false', 'none', $post_id->get_error_message()];
if ($code === 'exists') $batch_skipped++;
else $batch_errors++;
} else {
$source = $importer->last_axes_source;
$has_axe = $source !== 'none' ? 'true' : 'false';
$queue['report'][] = [$hal_id, $spip_id, (string) $post_id, 'imported', $has_axe, $source, ''];
$batch_imported++;
}
}
$queue['done'] += count($batch);
$queue['last_error'] = '';
$queue['updated_at'] = current_time('mysql');
update_option(self::CSV_QUEUE_OPTION, $queue, false);
$this->message = ['success', sprintf(
'Batch traité : %d importé(s), %d déjà importé(s), %d erreur(s). Progression : %d / %d.',
$batch_imported, $batch_skipped, $batch_errors,
$queue['done'], $queue['total']
)];
}
private function handle_csv_cancel(): void {
delete_option(self::CSV_QUEUE_OPTION);
$this->message = ['success', 'Queue CSV annulée.'];
}
private function handle_csv_download_report(): void {
$queue = get_option(self::CSV_QUEUE_OPTION, null);
if (!$queue || empty($queue['report'])) {
$this->message = ['warning', 'Aucun rapport à télécharger.'];
return;
}
$filename = 'hal-import-report-' . date('Ymd-His') . '.csv';
header('Content-Type: text/csv; charset=utf-8');
header('Content-Disposition: attachment; filename="' . $filename . '"');
$out = fopen('php://output', 'w');
fputcsv($out, ['hal_id', 'spip_id', 'post_id', 'status', 'has_axe', 'axes_source', 'error']);
foreach ($queue['report'] as $row) fputcsv($out, $row);
fclose($out);
exit;
}
// ========================================================================
// End CSV bulk import
// ========================================================================
private function get_row_class($doc) {
if ($doc['is_imported']) return 'hal-status-imported';
if ($doc['has_match']) return 'hal-status-ready';
return 'hal-status-blocked';
}
private function get_status_icon($doc) {
if ($doc['is_imported']) return '✓';
if ($doc['has_match']) return '★';
return '✗';
}
}