commit 57719052f3c7da91c37a5e233674c8ffa91c242a
Author: Valentin Le Moign
Date: Tue May 12 23:33:56 2026 +0200
Initial commit
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9507787
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,12 @@
+# --- OS / éditeur ---
+.DS_Store
+Thumbs.db
+*.swp
+*~
+.idea/
+.vscode/
+
+# --- Archives ---
+*.tar.gz
+*.tgz
+*.zip
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..50252c2
--- /dev/null
+++ b/README.md
@@ -0,0 +1,103 @@
+# thalim-hal-importer
+
+Plugin WordPress qui importe les publications du laboratoire THALIM depuis l'archive ouverte [HAL](https://hal.science/) (structure `254015`) et les transforme en posts WordPress avec tous les champs Pods renseignés.
+
+- **Version :** 2.0.0
+- **Auteur :** THALIM Dev
+- **Licence :** GPL v2 or later
+
+## Installation
+
+```bash
+cd wp-content/plugins
+git clone gitea@figureslibres.io:valentin_le_moign/thalim-plugin-hal-importer.git thalim-hal-importer
+```
+
+Puis activer depuis l'admin WordPress. Dans le cadre du projet THALIM, le clonage est automatisé par `bootstrap.sh` du repo [`thalim-stack`](https://figureslibres.io/valentin_le_moign/thalim-stack).
+
+## Utilisation
+
+Une fois activé, le plugin ajoute une page d'administration : **Outils → HAL Import** (capacité requise : `edit_others_posts`).
+
+La page propose deux flux d'import :
+
+### 1. Aperçu live + import incrémental
+
+- Filtres : plage de dates (`producedDate_s`) et auteur (idHAL d'un membre THALIM)
+- Liste les publications HAL correspondantes avec statut coloré :
+ - **vert** : déjà importée (présence du meta `hal_id` côté WP)
+ - **jaune** : prête à être importée (au moins un auteur HAL matche un user WordPress)
+ - **rouge** : aucun auteur THALIM identifié → ignorée
+- Bouton **Importer** : crée tous les posts « prêts » en statut `pending` (à publier après relecture)
+- Cache des aperçus en transient (5 min, clé hashée sur les filtres), rafraîchissable manuellement
+- Bouton **Test API** pour vérifier la connexion
+
+### 2. Import en masse via CSV (legacy SPIP)
+
+Permet d'importer des publications anciennes par lots de 100 :
+
+- Upload d'un CSV avec une colonne `hal_id` + d'un fichier de contexte SPIP (axes/tags/programmes/owner par publication)
+- Traitement par batchs séquentiels (cliquer plusieurs fois)
+- Rapport CSV téléchargeable en fin de file
+- Annulation possible à tout moment
+
+## Mapping des types HAL → catégories WordPress
+
+`DOC_TYPE_MAP` dans `includes/class-importer.php` :
+
+| Type HAL | Description | Catégorie WP |
+| ------------- | ---------------------------- | ------------ |
+| `ART` | Article | `16` |
+| `COUV` | Chapitre d'ouvrage | `16` |
+| `OUV` | Ouvrage | `15` |
+| `COMM` | Communication | `13` |
+| `ISSUE` | Direction de numéro | `16` |
+| `PROCEEDINGS` | Direction d'ouvrage / actes | `15` |
+| `THESE` | Thèse | `14` |
+| `HDR` | HDR | `14` |
+| `SON` | Son | `19` |
+| `VIDEO` | Vidéo | `19` |
+
+`COMM`, `THESE`, `HDR`, `SON`, `VIDEO` sont traités comme événements et utilisent le champ Pods `date_de_debut`. Les autres utilisent `datetime`.
+
+## Champs HAL → champs WP
+
+À l'import, chaque publication remplit :
+
+- **Identification** : `hal_id`, `hal_url` (URI HAL), `lien_externe_1` (PDF si dispo) avec titre `Document HAL // HAL Document`
+- **Titre** : `post_title` (issu de HAL)
+- **Catégorie** : triple écriture (term assignment + meta `categorie` + meta `_pods_categorie`)
+- **Auteurs HAL → membres** : matching via `authIdHal_s` ↔ user meta `identifiant_hal`. Stockage en triple-pattern Pods (`membres` add_post_meta + `_pods_membres`)
+- **Date** : `date_de_debut` ou `datetime` selon le type, optionnellement backdaté sur `producedDate_s`
+- **Métadonnées** : `journal`, `editeur`, `sous-titre` (book/conference title), `lieu` (city/country pour PROCEEDINGS), `reference_bibliographique` (`citationFull_s`)
+- **Fonction** : `fonction_auteur` (varie selon doc type : « Auteur du chapitre // Chapter author », « Direction de numéro // Editor-in-Chief », etc.)
+- **Axes thématiques** : cascade `spip_context` → co-auteurs THALIM → owner. Source effective stockée dans `$importer->last_axes_source`
+- **Programmes de recherche** et **étiquettes** : depuis `spip_context` (import CSV uniquement)
+
+## Dédoublonnage
+
+L'import vérifie le meta `hal_id` avant chaque insertion : une publication ne peut pas être importée deux fois. Le `is_imported($hal_id)` est aussi affiché en colonne de statut dans l'aperçu.
+
+## Prérequis
+
+- WordPress 6.0+
+- PHP 7.4+
+- Plugin **Pods** (le pod `post` et le champ user `identifiant_hal`)
+- IDs de catégorie WordPress conformes au mapping (8/13/14/15/16/19) — codés en dur dans `DOC_TYPE_MAP`
+
+## Structure
+
+```
+.
+├── thalim-hal-importer.php # point d'entrée, constantes, bootstrap
+└── includes/
+ ├── class-hal-api.php # client API HAL (fetch_publications, fetch_by_hal_ids)
+ ├── class-admin-page.php # UI Tools > HAL Import (aperçu + CSV)
+ └── class-importer.php # mapping HAL → posts WP (triple-storage, axes cascade)
+```
+
+## API HAL
+
+- Base : `https://api.archives-ouvertes.fr/search/`
+- Structure THALIM : `254015`
+- Documentation :
diff --git a/includes/class-admin-page.php b/includes/class-admin-page.php
new file mode 100644
index 0000000..9af033b
--- /dev/null
+++ b/includes/class-admin-page.php
@@ -0,0 +1,750 @@
+ ['id' => int, 'name' => string]
+
+ // Document type labels
+ private const DOC_TYPE_LABELS = [
+ 'ART' => 'Article',
+ 'COUV' => "Chapitre d'ouvrage",
+ 'OUV' => 'Ouvrage',
+ 'COMM' => 'Communication',
+ 'ISSUE' => 'Direction de numéro',
+ 'PROCEEDINGS' => 'Colloque',
+ 'THESE' => 'Thèse',
+ 'HDR' => 'HDR',
+ 'SON' => 'Son',
+ 'VIDEO' => 'Vidéo',
+ ];
+
+ public function __construct() {
+ $this->api = new Thalim_HAL_API();
+ }
+
+ public function render() {
+ if (!current_user_can('edit_others_posts')) {
+ wp_die('Unauthorized');
+ }
+ $this->handle_actions();
+ echo 'THALIM HAL Importer
';
+ $this->render_styles();
+ $this->render_message();
+ $this->render_config();
+ $this->render_preview();
+ $this->render_csv_import();
+ echo '
';
+ }
+
+ private function handle_actions() {
+ if (!isset($_POST['thalim_hal_action'])) return;
+ if (!wp_verify_nonce($_POST['_wpnonce'] ?? '', 'thalim_hal_action')) {
+ $this->message = ['error', 'Security check failed.'];
+ return;
+ }
+ $action = sanitize_text_field($_POST['thalim_hal_action']);
+
+ if ($action === 'test_api') {
+ $result = $this->api->test_connection();
+ $this->message = is_wp_error($result)
+ ? ['error', 'API Error: ' . $result->get_error_message()]
+ : ['success', "Connection OK! Found {$result['total']} publications."];
+ }
+
+ if ($action === 'refresh') {
+ // Clear all preview transients (they are keyed by date range hash)
+ global $wpdb;
+ $wpdb->query("DELETE FROM {$wpdb->options} WHERE option_name LIKE '_transient_thalim_hal_preview_%'");
+ $wpdb->query("DELETE FROM {$wpdb->options} WHERE option_name LIKE '_transient_timeout_thalim_hal_preview_%'");
+ $this->message = ['success', 'Preview data refreshed from HAL API.'];
+ }
+
+ if ($action === 'import_pending') {
+ $this->handle_import();
+ }
+
+ if ($action === 'csv_upload') $this->handle_csv_upload();
+ if ($action === 'csv_batch') $this->handle_csv_batch();
+ if ($action === 'csv_cancel') $this->handle_csv_cancel();
+ if ($action === 'csv_download_report') $this->handle_csv_download_report();
+ }
+
+ /**
+ * Handle bulk import of ready publications as pending posts.
+ * Uses cached raw HAL docs to avoid a second outbound API call.
+ */
+ private function handle_import() {
+ $date_from = sanitize_text_field($_POST['hal_date_from'] ?? '');
+ $date_to = sanitize_text_field($_POST['hal_date_to'] ?? '');
+ $author_hal_id = sanitize_text_field($_POST['hal_author_id'] ?? '');
+
+ // Reuse the cached preview data — raw_docs are stored alongside processed docs
+ $preview = $this->get_preview_data($date_from, $date_to, $author_hal_id);
+ if (is_wp_error($preview)) {
+ $this->message = ['error', 'API Error: ' . $preview->get_error_message()];
+ return;
+ }
+
+ $raw_docs = $preview['raw_docs'] ?? [];
+ if (empty($raw_docs)) {
+ $this->message = ['warning', 'Aucune publication dans le cache. Utilisez Filtrer pour charger les données d\'abord.'];
+ return;
+ }
+
+ $this->load_wp_users_hal_ids();
+ $importer = new Thalim_HAL_Importer_Logic();
+ $imported = 0;
+ $skipped = 0;
+ $errors = [];
+
+ foreach ($raw_docs as $doc) {
+ $hal_id = $doc['halId_s'] ?? '';
+ $author_hal_ids = $doc['authIdHal_s'] ?? [];
+ $matched_users = $this->match_authors_to_users($author_hal_ids);
+
+ if (empty($matched_users) || $importer->is_imported($hal_id)) {
+ $skipped++;
+ continue;
+ }
+
+ $post_id = $importer->import($doc, $this->wp_users_by_hal_id);
+ if (is_wp_error($post_id)) {
+ $errors[] = $hal_id . ': ' . $post_id->get_error_message();
+ } else {
+ $imported++;
+ }
+ }
+
+ $msg = sprintf('%d publication(s) importée(s) en statut "En attente".', $imported);
+ if ($skipped) $msg .= sprintf(' %d ignorée(s) (déjà importées ou sans membre THALIM correspondant).', $skipped);
+ if (!empty($errors)) $msg .= ' Erreurs : ' . implode('; ', $errors);
+
+ $this->message = [empty($errors) ? 'success' : 'warning', $msg];
+ }
+
+ private function render_styles() {
+ ?>
+
+ message) return;
+ printf('',
+ esc_attr($this->message[0]), esc_html($this->message[1]));
+ }
+
+ private function render_config() {
+ ?>
+
+ load_wp_users_hal_ids();
+
+ $preview = $this->get_preview_data($date_from, $date_to, $author_hal_id);
+ $ready_count = is_wp_error($preview) ? 0 : $preview['stats']['ready'];
+ ?>
+
+
Import Preview
+
+
+
+
+
+
+ render_wp_users_debug(); ?>
+ render_summary($preview['stats']); ?>
+ render_preview_table($preview['docs']); ?>
+ render_legend(); ?>
+
+
+
+
+
+
+ Total in HAL
+
+
+
+ Already Imported
+
+
+
+ Ready to Import
+
+
+
+ No Matched User
+
+
+ No publications found.';
+ return;
+ }
+ ?>
+
+
+
+ | Statut |
+ HAL ID |
+ Titre |
+ Type |
+ Auteurs |
+ IDs HAL auteurs |
+ Date |
+ Membres THALIM |
+ Lien HAL |
+
+
+
+
+
+ | get_status_icon($doc); ?> |
+ |
+
+
+
+
+
+ |
+
+
+
+
+ |
+
+
+ |
+
+
+ wp_users_by_hal_id[$normalized]);
+ ?>
+
+
+
+ aucun
+
+ |
+ |
+
+
+
+
+
+
+ Aucun
+
+ |
+
+ Voir sur HAL
+ |
+
+
+
+
+
+
+ Légende :
+ ✓ Importé
+ ★ Prêt Membre THALIM identifié
+ ✗ Bloqué Aucun membre THALIM ne correspond aux IDs auteurs HAL
+
+ load_wp_users_hal_ids();
+ if (empty($this->wp_users_by_hal_id)) {
+ echo 'Aucun utilisateur WordPress n\'a le champ identifiant_hal renseigné.
';
+ return;
+ }
+ ?>
+
+
+ Utilisateurs WordPress avec identifiant HAL (wp_users_by_hal_id); ?> utilisateurs) — Cliquer pour déplier
+
+
+ | Utilisateur | Identifiant HAL | Debug (brut) | Modifier |
+
+ wp_users_by_hal_id as $hal_id => $user): ?>
+
+ | (ID : ) |
+ |
+ "" ( car.) |
+ Modifier |
+
+
+
+
+
+ api->fetch_publications($rows, 0, 'producedDate_tdate desc', $date_from, $date_to, $author_hal_id);
+ if (is_wp_error($result)) return $result;
+
+ $importer = new Thalim_HAL_Importer_Logic();
+ $this->load_wp_users_hal_ids();
+
+ $preview = [
+ 'stats' => [
+ 'total' => $result['response']['numFound'] ?? 0,
+ 'imported' => 0,
+ 'ready' => 0,
+ 'blocked' => 0
+ ],
+ 'docs' => [],
+ 'raw_docs' => [], // Raw HAL docs kept for import, avoids a second API call
+ ];
+
+ foreach ($result['response']['docs'] ?? [] as $doc) {
+ $hal_id = $doc['halId_s'] ?? '';
+ $is_imported = $importer->is_imported($hal_id);
+ $author_hal_ids = $doc['authIdHal_s'] ?? [];
+ $matched_users = $this->match_authors_to_users($author_hal_ids);
+ $has_match = !empty($matched_users);
+
+ // Update stats
+ if ($is_imported) {
+ $preview['stats']['imported']++;
+ } elseif ($has_match) {
+ $preview['stats']['ready']++;
+ } else {
+ $preview['stats']['blocked']++;
+ }
+
+ $preview['docs'][] = [
+ 'hal_id' => $hal_id,
+ 'title' => $doc['title_s'][0] ?? 'N/A',
+ 'type' => $doc['docType_s'] ?? '',
+ 'authors' => $doc['authFullName_s'] ?? [],
+ 'author_hal_ids' => $author_hal_ids,
+ 'publication_date' => $doc['publicationDate_s'] ?? '',
+ 'produced_date' => $doc['submittedDate_s'] ?? '',
+ 'journal' => $doc['journalTitle_s'] ?? $doc['bookTitle_s'] ?? '',
+ 'url' => $doc['uri_s'] ?? '',
+ 'is_imported' => $is_imported,
+ 'matched_users' => $matched_users,
+ 'has_match' => $has_match,
+ ];
+ $preview['raw_docs'][] = $doc; // Full HAL doc kept for import
+ }
+
+ set_transient($cache_key, $preview, 300);
+ return $preview;
+ }
+
+ /**
+ * Load all WordPress users with HAL IDs into cache.
+ * Stores: normalized_hal_id => ['id' => int, 'name' => string]
+ */
+ private function load_wp_users_hal_ids() {
+ if ($this->wp_users_by_hal_id !== null) return;
+
+ $this->wp_users_by_hal_id = [];
+ $users = get_users([
+ 'meta_key' => 'identifiant_hal',
+ 'meta_compare' => 'EXISTS'
+ ]);
+
+ foreach ($users as $user) {
+ $hal_id = get_user_meta($user->ID, 'identifiant_hal', true);
+ if (!empty($hal_id)) {
+ $normalized = strtolower(trim($hal_id));
+ $this->wp_users_by_hal_id[$normalized] = [
+ 'id' => $user->ID,
+ 'name' => $user->display_name,
+ 'hal_id' => trim($hal_id), // original value for API filter
+ ];
+ }
+ }
+ }
+
+ /**
+ * Match HAL author IDs to WordPress users.
+ * Returns array of display names (for preview display).
+ */
+ private function match_authors_to_users($author_hal_ids) {
+ $matched = [];
+ foreach ($author_hal_ids as $hal_id) {
+ $normalized = strtolower(trim($hal_id));
+ if (isset($this->wp_users_by_hal_id[$normalized])) {
+ $matched[] = $this->wp_users_by_hal_id[$normalized]['name'];
+ }
+ }
+ return $matched;
+ }
+
+ // ========================================================================
+ // CSV bulk import (phase 2 — legacy publications from SPIP)
+ // ========================================================================
+
+ private const CSV_QUEUE_OPTION = 'thalim_hal_csv_queue';
+ private const CSV_BATCH_SIZE = 100;
+
+ private function render_csv_import() {
+ $queue = get_option(self::CSV_QUEUE_OPTION, null);
+ ?>
+
+
Import en masse depuis CSV
+
+ Uploader le couple hal-to-import.csv + hal-to-import-context.json
+ (généré par php scripts/prepare-csv-context.php) pour importer les publications legacy.
+ Chaque batch traite publications — cliquer plusieurs fois jusqu'à terminaison.
+
+
+
+
+
+ render_csv_progress($queue); ?>
+
+
+ 0 ? round(100 * $done / $total, 1) : 0;
+ $report_ct = count($queue['report'] ?? []);
+ ?>
+
+
File d'attente active — statut cible :
+ — backdate :
+
+ /
+ publications traitées (%)
+ — restantes
+
+
+
Dernière mise à jour :
+
+
+
+
+ Erreur dernier batch :
+
+
+
+
+
+ 0): ?>
+
+
+
+
+
+ message = ['error', 'CSV ou fichier contexte manquant.'];
+ return;
+ }
+
+ // Parse CSV -> list of hal_ids
+ $fh = fopen($_FILES['csv_file']['tmp_name'], 'r');
+ if (!$fh) { $this->message = ['error', 'Impossible de lire le CSV.']; return; }
+ $header = fgetcsv($fh);
+ $hal_col = array_search('hal_id', $header);
+ $spip_col = array_search('spip_id', $header);
+ if ($hal_col === false) {
+ fclose($fh);
+ $this->message = ['error', 'Header CSV : colonne hal_id manquante.'];
+ return;
+ }
+ $hal_ids = [];
+ $spip_map = []; // hal_id => spip_id
+ while (($row = fgetcsv($fh)) !== false) {
+ $hid = trim($row[$hal_col] ?? '');
+ if ($hid === '') continue;
+ $hal_ids[] = $hid;
+ if ($spip_col !== false) $spip_map[$hid] = trim($row[$spip_col] ?? '');
+ }
+ fclose($fh);
+ $hal_ids = array_values(array_unique($hal_ids));
+
+ // Parse JSON context
+ $ctx_raw = file_get_contents($_FILES['ctx_file']['tmp_name']);
+ $ctx_data = json_decode($ctx_raw, true);
+ if (!is_array($ctx_data) || !isset($ctx_data['ctx'])) {
+ $this->message = ['error', 'Fichier contexte JSON invalide.'];
+ return;
+ }
+
+ $status = ($_POST['post_status'] ?? 'publish') === 'pending' ? 'pending' : 'publish';
+ $backdate = !empty($_POST['backdate_post']);
+
+ $queue = [
+ 'hal_ids' => $hal_ids,
+ 'spip_map' => $spip_map,
+ 'status' => $status,
+ 'backdate' => $backdate,
+ 'total' => count($hal_ids),
+ 'done' => 0,
+ 'spip_ctx' => $ctx_data['ctx'],
+ 'wp_users_by_hal_id' => $ctx_data['wp_users_by_hal_id'] ?? [],
+ 'report' => [],
+ 'last_error' => '',
+ 'updated_at' => current_time('mysql'),
+ ];
+ update_option(self::CSV_QUEUE_OPTION, $queue, false);
+ $this->message = ['success', sprintf(
+ 'CSV chargé : %d publications prêtes. Statut cible : %s. Cliquer "Traiter le prochain batch" pour lancer.',
+ count($hal_ids), $status
+ )];
+ }
+
+ private function handle_csv_batch(): void {
+ $queue = get_option(self::CSV_QUEUE_OPTION, null);
+ if (!$queue) { $this->message = ['error', 'Aucune queue active.']; return; }
+
+ $batch = array_slice($queue['hal_ids'], $queue['done'], self::CSV_BATCH_SIZE);
+ if (empty($batch)) {
+ $this->message = ['success', 'Import terminé — tous les batches ont été traités.'];
+ return;
+ }
+
+ $docs = $this->api->fetch_by_hal_ids($batch, self::CSV_BATCH_SIZE);
+ if (is_wp_error($docs)) {
+ $queue['last_error'] = $docs->get_error_message();
+ $queue['updated_at'] = current_time('mysql');
+ update_option(self::CSV_QUEUE_OPTION, $queue, false);
+ $this->message = ['error', 'Erreur HAL API : ' . $docs->get_error_message()];
+ return;
+ }
+
+ // Normalize wp_users_by_hal_id keys to lowercase for the importer
+ $users_map = [];
+ foreach ($queue['wp_users_by_hal_id'] as $hid => $u) {
+ $users_map[strtolower(trim((string) $hid))] = $u;
+ }
+
+ $importer = new Thalim_HAL_Importer_Logic();
+ $batch_imported = 0;
+ $batch_skipped = 0;
+ $batch_errors = 0;
+
+ foreach ($batch as $hal_id) {
+ $spip_id = $queue['spip_map'][$hal_id] ?? '';
+ $doc = $docs[$hal_id] ?? null;
+ $ctx = $queue['spip_ctx'][$hal_id] ?? [];
+
+ if (!$doc) {
+ $queue['report'][] = [$hal_id, $spip_id, '', 'not_found_in_hal', 'false', 'none', 'HAL API did not return this hal_id'];
+ $batch_errors++;
+ continue;
+ }
+
+ $post_id = $importer->import($doc, $users_map, $queue['status'], (bool) $queue['backdate'], $ctx);
+ if (is_wp_error($post_id)) {
+ $code = $post_id->get_error_code();
+ $queue['report'][] = [$hal_id, $spip_id, '', $code, 'false', 'none', $post_id->get_error_message()];
+ if ($code === 'exists') $batch_skipped++;
+ else $batch_errors++;
+ } else {
+ $source = $importer->last_axes_source;
+ $has_axe = $source !== 'none' ? 'true' : 'false';
+ $queue['report'][] = [$hal_id, $spip_id, (string) $post_id, 'imported', $has_axe, $source, ''];
+ $batch_imported++;
+ }
+ }
+
+ $queue['done'] += count($batch);
+ $queue['last_error'] = '';
+ $queue['updated_at'] = current_time('mysql');
+ update_option(self::CSV_QUEUE_OPTION, $queue, false);
+
+ $this->message = ['success', sprintf(
+ 'Batch traité : %d importé(s), %d déjà importé(s), %d erreur(s). Progression : %d / %d.',
+ $batch_imported, $batch_skipped, $batch_errors,
+ $queue['done'], $queue['total']
+ )];
+ }
+
+ private function handle_csv_cancel(): void {
+ delete_option(self::CSV_QUEUE_OPTION);
+ $this->message = ['success', 'Queue CSV annulée.'];
+ }
+
+ private function handle_csv_download_report(): void {
+ $queue = get_option(self::CSV_QUEUE_OPTION, null);
+ if (!$queue || empty($queue['report'])) {
+ $this->message = ['warning', 'Aucun rapport à télécharger.'];
+ return;
+ }
+ $filename = 'hal-import-report-' . date('Ymd-His') . '.csv';
+ header('Content-Type: text/csv; charset=utf-8');
+ header('Content-Disposition: attachment; filename="' . $filename . '"');
+ $out = fopen('php://output', 'w');
+ fputcsv($out, ['hal_id', 'spip_id', 'post_id', 'status', 'has_axe', 'axes_source', 'error']);
+ foreach ($queue['report'] as $row) fputcsv($out, $row);
+ fclose($out);
+ exit;
+ }
+
+ // ========================================================================
+ // End CSV bulk import
+ // ========================================================================
+
+ private function get_row_class($doc) {
+ if ($doc['is_imported']) return 'hal-status-imported';
+ if ($doc['has_match']) return 'hal-status-ready';
+ return 'hal-status-blocked';
+ }
+
+ private function get_status_icon($doc) {
+ if ($doc['is_imported']) return '✓';
+ if ($doc['has_match']) return '★';
+ return '✗';
+ }
+}
diff --git a/includes/class-hal-api.php b/includes/class-hal-api.php
new file mode 100644
index 0000000..026092c
--- /dev/null
+++ b/includes/class-hal-api.php
@@ -0,0 +1,137 @@
+build_url($rows, $start, $sort, $date_from, $date_to, $author_hal_id);
+ return $this->request($url);
+ }
+
+ /**
+ * Fetch full HAL docs by a list of hal_ids (batched).
+ * Uses Solr fq=halId_s:(id1 OR id2 OR ...) syntax. No structId filter —
+ * fetch by halId exact, regardless of structure.
+ *
+ * @param string[] $hal_ids HAL IDs to fetch.
+ * @param int $batch Batch size (default 100).
+ * @return array|WP_Error Array keyed by halId_s, or WP_Error on failure.
+ */
+ public function fetch_by_hal_ids(array $hal_ids, int $batch = 100) {
+ $docs = [];
+ $chunks = array_chunk(array_values(array_unique($hal_ids)), $batch);
+ foreach ($chunks as $chunk) {
+ $filter = 'halId_s:(' . implode(' OR ', $chunk) . ')';
+ $params = [
+ 'q=' . urlencode('*:*'),
+ 'fq=' . urlencode($filter),
+ 'rows=' . count($chunk),
+ 'fl=' . urlencode(self::FIELDS),
+ 'wt=json',
+ ];
+ $url = THALIM_HAL_API_BASE . '?' . implode('&', $params);
+ $data = $this->request($url);
+ if (is_wp_error($data)) return $data;
+ foreach ($data['response']['docs'] ?? [] as $doc) {
+ if (!empty($doc['halId_s'])) {
+ $docs[$doc['halId_s']] = $doc;
+ }
+ }
+ // Be polite with HAL if we have multiple chunks
+ if (count($chunks) > 1) usleep(250000);
+ }
+ return $docs;
+ }
+
+ /**
+ * Test API connection
+ */
+ public function test_connection() {
+ $result = $this->fetch_publications(5);
+ if (is_wp_error($result)) return $result;
+ return [
+ 'success' => true,
+ 'total' => $result['response']['numFound'] ?? 0,
+ 'sample' => $result['response']['docs'] ?? []
+ ];
+ }
+
+ /**
+ * Build API URL with proper fq parameter handling
+ *
+ * @param int $rows
+ * @param int $start
+ * @param string $sort
+ * @param string $date_from YYYY-MM-DD or empty
+ * @param string $date_to YYYY-MM-DD or empty
+ */
+ private function build_url($rows = 5, $start = 0, $sort = 'modifiedDate_tdate desc', $date_from = '', $date_to = '', $author_hal_id = '') {
+ $doc_types = implode(' OR ', THALIM_HAL_DOC_TYPES);
+
+ $from = $date_from ? $date_from . 'T00:00:00Z' : '*';
+ $to = $date_to ? $date_to . 'T23:59:59Z' : '*';
+
+ $params = [
+ 'q=' . urlencode('*:*'),
+ 'fq=' . urlencode('structId_i:' . THALIM_HAL_STRUCT_ID),
+ 'fq=' . urlencode('docType_s:(' . $doc_types . ')'),
+ ];
+
+ if ($from !== '*' || $to !== '*') {
+ $params[] = 'fq=' . urlencode('producedDate_tdate:[' . $from . ' TO ' . $to . ']');
+ }
+
+ if ($author_hal_id !== '') {
+ $params[] = 'fq=' . urlencode('authIdHal_s:' . $author_hal_id);
+ }
+
+ $params = array_merge($params, [
+ 'rows=' . intval($rows),
+ 'start=' . intval($start),
+ 'sort=' . urlencode($sort),
+ 'fl=' . urlencode(self::FIELDS),
+ 'wt=json'
+ ]);
+
+ return THALIM_HAL_API_BASE . '?' . implode('&', $params);
+ }
+
+ /**
+ * Get API URL for debugging display
+ */
+ public function get_api_url($rows = 5) {
+ return $this->build_url($rows, 0);
+ }
+
+ /**
+ * Make HTTP request
+ */
+ private function request($url) {
+ $response = wp_remote_get($url, ['timeout' => 30, 'headers' => ['Accept' => 'application/json']]);
+ if (is_wp_error($response)) return $response;
+
+ $code = wp_remote_retrieve_response_code($response);
+ if ($code !== 200) return new WP_Error('api_error', "HTTP $code");
+
+ $data = json_decode(wp_remote_retrieve_body($response), true);
+ return json_last_error() === JSON_ERROR_NONE ? $data : new WP_Error('json_error', 'Invalid JSON');
+ }
+}
diff --git a/includes/class-importer.php b/includes/class-importer.php
new file mode 100644
index 0000000..76ba277
--- /dev/null
+++ b/includes/class-importer.php
@@ -0,0 +1,523 @@
+ WordPress category ID
+ private const DOC_TYPE_MAP = [
+ 'ART' => 16, // Article
+ 'COUV' => 16, // Chapitre -> Articles
+ 'OUV' => 15, // Ouvrage -> Ouvrages
+ 'COMM' => 13, // Communication -> Communications
+ 'ISSUE' => 16, // Direction de numéro -> Articles
+ 'PROCEEDINGS' => 15, // Direction d'ouvrage/Proceedings -> Ouvrages
+ 'THESE' => 14, // Thèse -> Soutenances
+ 'HDR' => 14, // HDR -> Soutenances
+ 'SON' => 19, // Son -> Captations audio/vidéo
+ 'VIDEO' => 19, // Vidéo -> Captations audio/vidéo
+ ];
+
+ // Doc types that use date_de_debut instead of datetime
+ private const EVENT_DOC_TYPES = ['COMM', 'THESE', 'HDR', 'SON', 'VIDEO'];
+
+ // Pods IDs — queried from the DB, stable per installation
+ private const POD_ID_POST = 8;
+ private const FIELD_ID_CATEGORIE = 16; // "Type d'annonce" (picks from WP category)
+ private const FIELD_ID_MEMBRES = 178;
+ private const FIELD_ID_AUTRE_MBRES = 195; // autre_membres (unused in import, for reference)
+ private const FIELD_ID_AXES = 270; // axes_thematiques (picks from axe_thematique)
+ private const FIELD_ID_PROGRAMMES = 271; // programmes_de_recherche (picks from programme_de_recherche)
+ private const FIELD_ID_ETIQUETTES = 652; // étiquettes (picks from post_tag)
+
+ /** Source of the axes applied on the last import(): 'spip' | 'coauthors' | 'owner' | 'none'. */
+ public $last_axes_source = 'none';
+
+ /**
+ * Check if publication is already imported
+ */
+ public function is_imported($hal_id) {
+ if (empty($hal_id)) return false;
+ global $wpdb;
+ return (int) $wpdb->get_var($wpdb->prepare(
+ "SELECT COUNT(*) FROM {$wpdb->postmeta} WHERE meta_key = 'hal_id' AND meta_value = %s",
+ $hal_id
+ )) > 0;
+ }
+
+ /**
+ * Get category ID for HAL doc type
+ */
+ public function get_category_id($doc_type) {
+ return self::DOC_TYPE_MAP[$doc_type] ?? null;
+ }
+
+ /**
+ * Get doc type mappings
+ */
+ public function get_doc_type_map() {
+ return self::DOC_TYPE_MAP;
+ }
+
+ /**
+ * Import a HAL publication as a WordPress post.
+ *
+ * @param array $hal_doc Raw HAL API document.
+ * @param array $wp_users_by_hal_id Map of normalized_hal_id => ['id' => int, 'name' => string].
+ * @param string $post_status Target post_status (default 'pending').
+ * @param bool $backdate_post Use producedDate_s as post_date (default false).
+ * @param array $spip_context SPIP-derived context for bulk imports:
+ * ['axes' => int[], 'tags' => int[], 'programmes' => int[], 'owner_user_id' => ?int]
+ * @return int|WP_Error New post ID on success, WP_Error on failure.
+ * The axes source is stored in $this->last_axes_source for caller reporting.
+ */
+ public function import(
+ array $hal_doc,
+ array $wp_users_by_hal_id = [],
+ string $post_status = 'pending',
+ bool $backdate_post = false,
+ array $spip_context = []
+ ) {
+ $hal_id = $hal_doc['halId_s'] ?? '';
+ $doc_type = $hal_doc['docType_s'] ?? '';
+
+ if (empty($hal_id)) return new WP_Error('no_id', 'Missing HAL ID');
+ if ($this->is_imported($hal_id)) return new WP_Error('exists', 'Already imported: ' . $hal_id);
+
+ // --- Resolve post author from HAL author IDs ---
+ $author_hal_ids = $hal_doc['authIdHal_s'] ?? [];
+ $author_names = $hal_doc['authFullName_s'] ?? [];
+ $matched_user_ids = [];
+ $matched_user_names = [];
+ foreach ($author_hal_ids as $hal_author_id) {
+ $normalized = strtolower(trim($hal_author_id));
+ if (isset($wp_users_by_hal_id[$normalized])) {
+ $user = $wp_users_by_hal_id[$normalized];
+ $matched_user_ids[] = $user['id'];
+ $matched_user_names[] = $user['name'];
+ }
+ }
+ $post_author = !empty($matched_user_ids) ? $matched_user_ids[0] : 1;
+
+ // --- Create the post ---
+ $post_args = [
+ 'post_title' => wp_strip_all_tags($hal_doc['title_s'][0] ?? ''),
+ 'post_content' => wp_kses_post($hal_doc['abstract_s'][0] ?? ''),
+ 'post_status' => $post_status,
+ 'post_type' => 'post',
+ 'post_author' => $post_author,
+ ];
+
+ // Backdate post_date to HAL producedDate_s when requested (for legacy bulk imports)
+ if ($backdate_post) {
+ $backdate_ymd = $this->parse_hal_date($hal_doc['producedDate_s'] ?? '');
+ if ($backdate_ymd) {
+ $post_args['post_date'] = $backdate_ymd . ' 12:00:00';
+ $post_args['post_date_gmt'] = $backdate_ymd . ' 12:00:00';
+ }
+ }
+
+ $post_id = wp_insert_post($post_args, true);
+ if (is_wp_error($post_id)) return $post_id;
+
+ // --- Category — Pods triple-storage pattern ---
+ $cat_id = self::DOC_TYPE_MAP[$doc_type] ?? null;
+ if ($cat_id) {
+ global $wpdb;
+
+ // 1. Native WP category assignment
+ wp_set_post_categories($post_id, [$cat_id]);
+
+ // 2. Pods postmeta: single integer value
+ update_post_meta($post_id, 'categorie', $cat_id);
+
+ // 3. Pods _pods_ meta: serialized array of one integer
+ update_post_meta($post_id, '_pods_categorie', [$cat_id]);
+
+ // 4. wp_podsrel row
+ $wpdb->insert(
+ $wpdb->prefix . 'podsrel',
+ [
+ 'pod_id' => self::POD_ID_POST,
+ 'field_id' => self::FIELD_ID_CATEGORIE,
+ 'item_id' => $post_id,
+ 'related_pod_id' => 0,
+ 'related_field_id'=> 0,
+ 'related_item_id' => $cat_id,
+ 'weight' => 0,
+ ],
+ ['%d', '%d', '%d', '%d', '%d', '%d', '%d']
+ );
+ }
+
+ // --- Core meta ---
+ update_post_meta($post_id, 'hal_id', $hal_id);
+ update_post_meta($post_id, 'hal_url', $hal_doc['uri_s'] ?? '');
+
+ // HAL PDF file -> lien_externe_1
+ $file_url = $hal_doc['fileMain_s'] ?? '';
+ if ($file_url) {
+ update_post_meta($post_id, 'lien_externe_1', $file_url);
+ update_post_meta($post_id, 'titre_du_lien_externe_1', 'Document HAL // HAL Document');
+ }
+
+ // Journal (ART)
+ $journal = $hal_doc['journalTitle_s'] ?? '';
+ if ($journal) {
+ update_post_meta($post_id, 'journal', $journal);
+ }
+
+ // Book title as sous-titre (COUV), only if different from post title
+ $book_title = $hal_doc['bookTitle_s'] ?? '';
+ $post_title = $hal_doc['title_s'][0] ?? '';
+ if ($book_title && $book_title !== $post_title) {
+ update_post_meta($post_id, 'sous-titre', $book_title);
+ }
+
+ // Publisher -> editeur (plain text, no Pods triple-storage needed)
+ $publisher = $hal_doc['publisher_s'] ?? '';
+ if (is_array($publisher)) $publisher = $publisher[0] ?? '';
+ if ($publisher) {
+ update_post_meta($post_id, 'editeur', $publisher);
+ }
+
+ // Fonction label: bilingual plain text (only relevant for cats 4, 15, 16)
+ if ($doc_type === 'COUV') {
+ update_post_meta($post_id, 'fonction_auteur', 'Auteur du chapitre // Chapter author');
+ } elseif ($doc_type === 'ISSUE') {
+ update_post_meta($post_id, 'fonction_auteur', 'Direction de numéro // Editor-in-Chief');
+ }
+
+ // --- Keywords -> étiquettes (Pods triple-storage, picks from post_tag) ---
+ $hal_keywords = $hal_doc['keyword_s'] ?? [];
+ if (!empty($hal_keywords)) {
+ $matched_term_ids = $this->match_keywords_to_tags($hal_keywords);
+ if (!empty($matched_term_ids)) {
+ global $wpdb;
+
+ // 1. Native WP term relationship
+ wp_set_object_terms($post_id, $matched_term_ids, 'post_tag', true);
+
+ // 2. Individual postmeta rows (one per term ID)
+ foreach ($matched_term_ids as $tid) {
+ add_post_meta($post_id, 'etiquettes', (string) $tid);
+ }
+
+ // 3. _pods_etiquettes: serialized array of term IDs as integers
+ update_post_meta($post_id, '_pods_etiquettes', array_map('intval', $matched_term_ids));
+
+ // 4. wp_podsrel rows
+ foreach ($matched_term_ids as $weight => $tid) {
+ $wpdb->insert(
+ $wpdb->prefix . 'podsrel',
+ [
+ 'pod_id' => self::POD_ID_POST,
+ 'field_id' => self::FIELD_ID_ETIQUETTES,
+ 'item_id' => $post_id,
+ 'related_pod_id' => 0,
+ 'related_field_id' => 0,
+ 'related_item_id' => (int) $tid,
+ 'weight' => $weight,
+ ],
+ ['%d', '%d', '%d', '%d', '%d', '%d', '%d']
+ );
+ }
+ }
+ }
+
+ // --- Date meta ---
+ $date_raw = $hal_doc['producedDate_s'] ?? '';
+ // THESE/HDR: use defenseDate_s if available, fallback to producedDate_s
+ if (in_array($doc_type, ['THESE', 'HDR'])) {
+ $defense = $hal_doc['defenseDate_s'] ?? '';
+ if ($defense) $date_raw = $defense;
+ }
+ $date_meta = $this->parse_hal_date($date_raw);
+ if ($date_meta) {
+ $date_field = in_array($doc_type, self::EVENT_DOC_TYPES) ? 'date_de_debut' : 'datetime';
+ update_post_meta($post_id, $date_field, $date_meta);
+ }
+
+ // --- Type pick fields (pick custom-simple — no triple-storage) ---
+ $type_picks = [
+ 'PROCEEDINGS' => ['type_colloque', 'Colloque'],
+ 'THESE' => ['type_soutenance', 'Soutenance de thèse'],
+ 'HDR' => ['type_soutenance', "Soutenance d'habilitation"],
+ 'SON' => ['type_captation', 'Son'],
+ 'VIDEO' => ['type_captation', 'Vidéo'],
+ ];
+ if (isset($type_picks[$doc_type])) {
+ [$field, $value] = $type_picks[$doc_type];
+ update_post_meta($post_id, $field, $value);
+ }
+
+ // --- Lieu for PROCEEDINGS (city, country from HAL) ---
+ if ($doc_type === 'PROCEEDINGS') {
+ $city = $hal_doc['city_s'] ?? '';
+ $country = $hal_doc['country_s'] ?? '';
+ if (is_array($city)) $city = $city[0] ?? '';
+ if (is_array($country)) $country = $country[0] ?? '';
+ $lieu = trim("$city, $country", ', ');
+ if ($lieu) {
+ update_post_meta($post_id, 'lieu', $lieu);
+ }
+ }
+
+ // --- Conference title as sous-titre for PROCEEDINGS ---
+ if ($doc_type === 'PROCEEDINGS') {
+ $conf_title = $hal_doc['conferenceTitle_s'] ?? '';
+ if ($conf_title) {
+ update_post_meta($post_id, 'sous-titre', $conf_title);
+ }
+ }
+
+ // --- Reference bibliographique from citationFull_s (cats 4, 15, 16) ---
+ $citation = $hal_doc['citationFull_s'] ?? '';
+ if ($citation && in_array($cat_id, [4, 15, 16])) {
+ update_post_meta($post_id, 'reference_bibliographique', wp_kses_post($citation));
+ }
+
+ // --- Store matched THALIM members — Pods triple-storage pattern
+ if (!empty($matched_user_ids)) {
+ global $wpdb;
+
+ // 1. Individual postmeta rows (one per user ID, as string)
+ foreach ($matched_user_ids as $uid) {
+ add_post_meta($post_id, 'membres', (string) $uid);
+ }
+
+ // 2. _pods_ meta: serialized PHP array of user IDs as integers
+ update_post_meta($post_id, '_pods_membres', array_map('intval', $matched_user_ids));
+
+ // 3. wp_podsrel rows (one per user, weight = position)
+ foreach ($matched_user_ids as $weight => $uid) {
+ $wpdb->insert(
+ $wpdb->prefix . 'podsrel',
+ [
+ 'pod_id' => self::POD_ID_POST,
+ 'field_id' => self::FIELD_ID_MEMBRES,
+ 'item_id' => $post_id,
+ 'related_pod_id' => 0,
+ 'related_field_id'=> 0,
+ 'related_item_id' => (int) $uid,
+ 'weight' => $weight,
+ ],
+ ['%d', '%d', '%d', '%d', '%d', '%d', '%d']
+ );
+ }
+ }
+
+ // --- Axes thématiques : cascade (SPIP direct > co-auteurs > owner) ---
+ $axes_resolution = $this->resolve_axes_cascade($matched_user_ids, $spip_context);
+ $this->last_axes_source = $axes_resolution['source'];
+ if (!empty($axes_resolution['term_ids'])) {
+ $this->set_pods_taxonomy_multi(
+ $post_id, 'axes_thematiques', self::FIELD_ID_AXES,
+ $axes_resolution['term_ids'], 'axe_thematique'
+ );
+ }
+
+ // --- Programmes de recherche : SPIP direct OR keyword matching ---
+ $prog_ids = !empty($spip_context['programmes'])
+ ? array_map('intval', $spip_context['programmes'])
+ : $this->match_terms_by_keywords($hal_doc['keyword_s'] ?? [], 'programme_de_recherche');
+ if (!empty($prog_ids)) {
+ $this->set_pods_taxonomy_multi(
+ $post_id, 'programmes_de_recherche', self::FIELD_ID_PROGRAMMES,
+ $prog_ids, 'programme_de_recherche'
+ );
+ }
+
+ // --- Étiquettes SPIP directes (en plus du matching HAL déjà fait plus haut) ---
+ if (!empty($spip_context['tags'])) {
+ // Merge avec les tags déjà posés par le bloc étiquettes plus haut
+ $existing = wp_get_object_terms($post_id, 'post_tag', ['fields' => 'ids']);
+ $merged = array_values(array_unique(array_merge(
+ is_array($existing) ? array_map('intval', $existing) : [],
+ array_map('intval', $spip_context['tags'])
+ )));
+ $this->set_pods_taxonomy_multi(
+ $post_id, 'etiquettes', self::FIELD_ID_ETIQUETTES,
+ array_diff($merged, is_array($existing) ? $existing : []),
+ 'post_tag'
+ );
+ }
+
+ // Unmatched authors as free text — remove matched names from the full list
+ $unmatched = array_filter($author_names, function($name) use ($matched_user_names) {
+ foreach ($matched_user_names as $matched) {
+ // Loose comparison: ignore case and extra spaces
+ if (mb_strtolower(trim($name)) === mb_strtolower(trim($matched))) {
+ return false;
+ }
+ }
+ return true;
+ });
+ if (!empty($unmatched)) {
+ update_post_meta($post_id, 'autrepersonnes', implode(', ', array_values($unmatched)));
+ }
+
+ // --- Polylang: assign French language ---
+ if (function_exists('pll_set_post_language')) {
+ pll_set_post_language($post_id, 'fr');
+ }
+
+ return $post_id;
+ }
+
+ /**
+ * Match HAL keyword strings against existing WordPress terms in a given taxonomy.
+ *
+ * WP terms are often stored bilingually as "Terme FR // English term".
+ * Matching is case-insensitive against both the FR and EN parts.
+ *
+ * @param string[] $hal_keywords Raw keyword strings from HAL keyword_s field.
+ * @param string $taxonomy WordPress taxonomy (e.g. 'post_tag', 'programme_de_recherche').
+ * @return int[] Matched term IDs.
+ */
+ private function match_terms_by_keywords(array $hal_keywords, string $taxonomy = 'post_tag'): array {
+ if (empty($hal_keywords)) return [];
+
+ $terms = get_terms(['taxonomy' => $taxonomy, 'hide_empty' => false]);
+ if (is_wp_error($terms) || empty($terms)) return [];
+
+ // Normalise HAL keywords once for comparison
+ $hal_lower = array_map(fn($kw) => mb_strtolower(trim($kw)), $hal_keywords);
+
+ $matched = [];
+ foreach ($terms as $term) {
+ $parts = explode(' // ', $term->name, 2);
+ $fr = mb_strtolower(trim($parts[0]));
+ $en = isset($parts[1]) ? mb_strtolower(trim($parts[1])) : null;
+
+ if (in_array($fr, $hal_lower, true) ||
+ ($en !== null && in_array($en, $hal_lower, true))) {
+ $matched[] = (int) $term->term_id;
+ }
+ }
+
+ return $matched;
+ }
+
+ /**
+ * Backwards-compatible alias for the renamed method.
+ */
+ private function match_keywords_to_tags(array $hal_keywords): array {
+ return $this->match_terms_by_keywords($hal_keywords, 'post_tag');
+ }
+
+ /**
+ * Resolve axes thématiques through a cascade of strategies.
+ *
+ * 1. Direct SPIP links ($spip_context['axes'])
+ * 2. Axes from all matched WP co-authors (_pods_axes_thematiques)
+ * 3. Axe of the SPIP flux owner user ($spip_context['owner_user_id'])
+ *
+ * @return array{source: string, term_ids: int[]}
+ */
+ private function resolve_axes_cascade(array $matched_user_ids, array $spip_context): array {
+ // 1. SPIP direct
+ if (!empty($spip_context['axes'])) {
+ $ids = array_values(array_unique(array_map('intval', $spip_context['axes'])));
+ if (!empty($ids)) return ['source' => 'spip', 'term_ids' => $ids];
+ }
+
+ // 2. Co-authors matched (any matched THALIM member with an axe)
+ $from_authors = [];
+ foreach ($matched_user_ids as $uid) {
+ $axes = get_user_meta((int) $uid, '_pods_axes_thematiques', true);
+ if (is_array($axes)) {
+ foreach ($axes as $tid) $from_authors[] = (int) $tid;
+ } elseif (is_string($axes) && $axes !== '') {
+ $unser = @unserialize($axes);
+ if (is_array($unser)) {
+ foreach ($unser as $tid) $from_authors[] = (int) $tid;
+ }
+ }
+ }
+ $from_authors = array_values(array_unique(array_filter($from_authors)));
+ if (!empty($from_authors)) {
+ return ['source' => 'coauthors', 'term_ids' => $from_authors];
+ }
+
+ // 3. SPIP flux owner user
+ if (!empty($spip_context['owner_user_id'])) {
+ $axes = get_user_meta((int) $spip_context['owner_user_id'], '_pods_axes_thematiques', true);
+ $ids = [];
+ if (is_array($axes)) {
+ $ids = array_map('intval', $axes);
+ } elseif (is_string($axes) && $axes !== '') {
+ $unser = @unserialize($axes);
+ if (is_array($unser)) $ids = array_map('intval', $unser);
+ }
+ $ids = array_values(array_unique(array_filter($ids)));
+ if (!empty($ids)) return ['source' => 'owner', 'term_ids' => $ids];
+ }
+
+ return ['source' => 'none', 'term_ids' => []];
+ }
+
+ /**
+ * Parse a HAL date (YYYY, YYYY-MM, YYYY-MM-DD, or ISO datetime) to Y-m-d.
+ * Returns '' on failure. HAL often emits partial dates that strtotime
+ * mishandles (e.g. strtotime("2022") interprets 2022 as a time, not a year).
+ */
+ private function parse_hal_date(string $raw): string {
+ $raw = trim($raw);
+ if ($raw === '') return '';
+ if (preg_match('/^(\d{4})-(\d{2})-(\d{2})/', $raw, $m)) {
+ return "{$m[1]}-{$m[2]}-{$m[3]}";
+ }
+ if (preg_match('/^(\d{4})-(\d{2})$/', $raw, $m)) {
+ return "{$m[1]}-{$m[2]}-01";
+ }
+ if (preg_match('/^(\d{4})$/', $raw, $m)) {
+ return "{$m[1]}-01-01";
+ }
+ $ts = strtotime($raw);
+ return $ts ? date('Y-m-d', $ts) : '';
+ }
+
+ /**
+ * Generic Pods triple-storage writer for multi-value taxonomy fields.
+ * Writes to: wp_term_relationships, postmeta rows, _pods_ meta, wp_podsrel.
+ */
+ private function set_pods_taxonomy_multi(int $post_id, string $field_name, int $field_id, array $term_ids, string $taxonomy): void {
+ if (empty($term_ids)) return;
+ global $wpdb;
+ $term_ids = array_values(array_unique(array_map('intval', $term_ids)));
+
+ // 1. wp_term_relationships
+ wp_set_object_terms($post_id, $term_ids, $taxonomy, true);
+
+ // 2. postmeta (one row per term ID, as string)
+ foreach ($term_ids as $tid) {
+ add_post_meta($post_id, $field_name, (string) $tid);
+ }
+
+ // 3. _pods_ meta: serialized array of ints
+ update_post_meta($post_id, '_pods_' . $field_name, $term_ids);
+
+ // 4. wp_podsrel rows (weight = position)
+ foreach ($term_ids as $weight => $tid) {
+ $wpdb->insert(
+ $wpdb->prefix . 'podsrel',
+ [
+ 'pod_id' => self::POD_ID_POST,
+ 'field_id' => $field_id,
+ 'item_id' => $post_id,
+ 'related_pod_id' => 0,
+ 'related_field_id' => 0,
+ 'related_item_id' => (int) $tid,
+ 'weight' => $weight,
+ ],
+ ['%d', '%d', '%d', '%d', '%d', '%d', '%d']
+ );
+ }
+ }
+}
diff --git a/thalim-hal-importer.php b/thalim-hal-importer.php
new file mode 100644
index 0000000..d7b7613
--- /dev/null
+++ b/thalim-hal-importer.php
@@ -0,0 +1,86 @@
+load_dependencies();
+ $this->init_hooks();
+ }
+
+ private function load_dependencies() {
+ require_once THALIM_HAL_PLUGIN_DIR . 'includes/class-hal-api.php';
+ require_once THALIM_HAL_PLUGIN_DIR . 'includes/class-admin-page.php';
+ require_once THALIM_HAL_PLUGIN_DIR . 'includes/class-importer.php';
+ }
+
+ private function init_hooks() {
+ add_action('admin_menu', [$this, 'add_admin_menu']);
+ }
+
+ public function add_admin_menu() {
+ add_management_page(
+ 'HAL Import',
+ 'HAL Import',
+ 'edit_others_posts',
+ 'thalim-hal-importer',
+ [$this, 'render_admin_page']
+ );
+ }
+
+ public function render_admin_page() {
+ $admin_page = new Thalim_HAL_Admin_Page();
+ $admin_page->render();
+ }
+}
+
+// Activation hook
+register_activation_hook(__FILE__, function() {
+ add_option('thalim_hal_version', THALIM_HAL_VERSION);
+});
+
+// Deactivation hook
+register_deactivation_hook(__FILE__, function() {
+ delete_transient('thalim_hal_preview_data');
+});
+
+// Initialize plugin
+add_action('plugins_loaded', function() {
+ Thalim_HAL_Importer::get_instance();
+});