slug de catégorie WP (résolu en term_id au runtime — // les IDs auto-incrémentés ne survivent pas à une réimportation de base) private const DOC_TYPE_SLUGS = [ 'ART' => 'articles', // Article 'COUV' => 'articles', // Chapitre -> Articles 'OUV' => 'ouvrages', // Ouvrage -> Ouvrages 'COMM' => 'communications', // Communication -> Communications 'ISSUE' => 'articles', // Direction de numéro -> Articles 'PROCEEDINGS' => 'ouvrages', // Direction d'ouvrage/Proceedings -> Ouvrages 'THESE' => 'soutenances', // Thèse -> Soutenances 'HDR' => 'soutenances', // HDR -> Soutenances 'SON' => 'medias', // Son -> Médias 'VIDEO' => 'medias', // Vidéo -> Médias 'NOTICE' => 'articles', // Notice/recension -> Articles 'BLOG' => 'medias', // Blog/tribune -> Médias 'TRAD' => 'ouvrages', // Traduction -> Ouvrages (fonction auteur "Traduction") 'REPORT' => 'publications-et-productions', // Rapport -> Publications et productions 'UNDEFINED' => 'publications-et-productions', // Non défini -> Publications et productions 'POSTER' => 'publications-et-productions', // Poster -> Publications et productions 'OTHER' => 'publications-et-productions', // Autre -> Publications et productions ]; // Doc types that use date_de_debut instead of datetime private const EVENT_DOC_TYPES = ['COMM', 'THESE', 'HDR', 'SON', 'VIDEO']; /** * Résout un slug de catégorie en term_id (cache statique par requête). */ private function cat_id_by_slug(string $slug): ?int { static $cache = []; if (!array_key_exists($slug, $cache)) { $term = get_term_by('slug', $slug, 'category'); $cache[$slug] = $term ? (int) $term->term_id : null; } return $cache[$slug]; } /** Source of the axes applied on the last import(): 'spip' | 'coauthors' | 'owner' | 'none'. */ public $last_axes_source = 'none'; /** * Check if publication is already imported */ public function is_imported($hal_id) { if (empty($hal_id)) return false; global $wpdb; return (int) $wpdb->get_var($wpdb->prepare( "SELECT COUNT(*) FROM {$wpdb->postmeta} WHERE meta_key = 'hal_id' AND meta_value = %s", $hal_id )) > 0; } /** * Return ['id' => int, 'status' => string] for the post matching this hal_id, or null. */ public function get_imported_post($hal_id) { if (empty($hal_id)) return null; global $wpdb; $row = $wpdb->get_row($wpdb->prepare( "SELECT p.ID, p.post_status FROM {$wpdb->posts} p INNER JOIN {$wpdb->postmeta} pm ON pm.post_id = p.ID WHERE pm.meta_key = 'hal_id' AND pm.meta_value = %s LIMIT 1", $hal_id )); return $row ? ['id' => (int) $row->ID, 'status' => $row->post_status] : null; } /** * Get category ID for HAL doc type */ public function get_category_id($doc_type) { $slug = self::DOC_TYPE_SLUGS[$doc_type] ?? null; return $slug ? $this->cat_id_by_slug($slug) : null; } /** * Get doc type mappings (doc type => category term_id) */ public function get_doc_type_map() { $map = []; foreach (self::DOC_TYPE_SLUGS as $type => $slug) { $map[$type] = $this->cat_id_by_slug($slug); } return $map; } /** * Import a HAL publication as a WordPress post. * * @param array $hal_doc Raw HAL API document. * @param array $wp_users_by_hal_id Map of normalized_hal_id => ['id' => int, 'name' => string]. * @param string $post_status Target post_status (default 'pending'). * @param bool $backdate_post Use producedDate_s as post_date (default false). * @param array $spip_context SPIP-derived context for bulk imports: * ['axes' => int[], 'tags' => int[], 'programmes' => int[], 'owner_user_id' => ?int] * @return int|WP_Error New post ID on success, WP_Error on failure. * The axes source is stored in $this->last_axes_source for caller reporting. */ public function import( array $hal_doc, array $wp_users_by_hal_id = [], string $post_status = 'pending', bool $backdate_post = false, array $spip_context = [], ?int $force_post_author = null ) { $hal_id = $hal_doc['halId_s'] ?? ''; $doc_type = $hal_doc['docType_s'] ?? ''; if (empty($hal_id)) return new WP_Error('no_id', 'Missing HAL ID'); if ($this->is_imported($hal_id)) return new WP_Error('exists', 'Already imported: ' . $hal_id); // --- Resolve post author from HAL author IDs --- $author_hal_ids = $hal_doc['authIdHal_s'] ?? []; $author_names = $hal_doc['authFullName_s'] ?? []; $matched_user_ids = []; $matched_user_names = []; foreach ($author_hal_ids as $hal_author_id) { $normalized = strtolower(trim($hal_author_id)); if (isset($wp_users_by_hal_id[$normalized])) { $user = $wp_users_by_hal_id[$normalized]; $matched_user_ids[] = $user['id']; $matched_user_names[] = $user['name']; } } $post_author = $force_post_author ?? (!empty($matched_user_ids) ? $matched_user_ids[0] : 1); // --- Create the post --- $post_args = [ 'post_title' => wp_strip_all_tags($hal_doc['title_s'][0] ?? ''), 'post_content' => wp_kses_post($hal_doc['abstract_s'][0] ?? ''), 'post_status' => $post_status, 'post_type' => 'post', 'post_author' => $post_author, ]; // Backdate post_date to HAL producedDate_s when requested (for legacy bulk imports) if ($backdate_post) { $backdate_ymd = $this->parse_hal_date($hal_doc['producedDate_s'] ?? ''); if ($backdate_ymd) { $post_args['post_date'] = $backdate_ymd . ' 12:00:00'; $post_args['post_date_gmt'] = $backdate_ymd . ' 12:00:00'; } } $post_id = wp_insert_post($post_args, true); if (is_wp_error($post_id)) return $post_id; // --- Category — stockage Pods centralisé (cf. class-pods-storage.php) --- $cat_id = $this->get_category_id($doc_type); if ($cat_id) { Thalim_HAL_Pods_Storage::set_categorie($post_id, $cat_id); } // --- Core meta --- update_post_meta($post_id, 'hal_id', $hal_id); update_post_meta($post_id, 'hal_url', $hal_doc['uri_s'] ?? ''); // HAL PDF file -> lien_externe_1 $file_url = $hal_doc['fileMain_s'] ?? ''; if ($file_url) { update_post_meta($post_id, 'lien_externe_1', $file_url); update_post_meta($post_id, 'titre_du_lien_externe_1', 'Document HAL // HAL Document'); } // Journal (ART) $journal = $hal_doc['journalTitle_s'] ?? ''; if ($journal) { update_post_meta($post_id, 'journal', $journal); } // Book title as sous-titre (COUV), only if different from post title $book_title = $hal_doc['bookTitle_s'] ?? ''; $post_title = $hal_doc['title_s'][0] ?? ''; if ($book_title && $book_title !== $post_title) { update_post_meta($post_id, 'sous-titre', $book_title); } // Publisher -> editeur (plain text, no Pods triple-storage needed) $publisher = $hal_doc['publisher_s'] ?? ''; if (is_array($publisher)) $publisher = $publisher[0] ?? ''; if ($publisher) { update_post_meta($post_id, 'editeur', $publisher); } // Fonction label: bilingual plain text (only relevant for cats 4, 15, 16) if ($doc_type === 'COUV') { update_post_meta($post_id, 'fonction_auteur', 'Auteur du chapitre // Chapter author'); } elseif ($doc_type === 'ISSUE') { update_post_meta($post_id, 'fonction_auteur', 'Direction de numéro // Editor-in-Chief'); } elseif ($doc_type === 'TRAD') { update_post_meta($post_id, 'fonction_auteur', 'Traduction // Translation'); } // --- Keywords HAL + tags SPIP -> étiquettes (une seule écriture Pods) --- $etiquette_ids = $this->match_keywords_to_tags($hal_doc['keyword_s'] ?? []); if (!empty($spip_context['tags'])) { $etiquette_ids = array_merge($etiquette_ids, array_map('intval', $spip_context['tags'])); } if (!empty($etiquette_ids)) { Thalim_HAL_Pods_Storage::set_relation($post_id, 'etiquettes', $etiquette_ids, 'post_tag'); } // --- Date meta --- $date_raw = $hal_doc['producedDate_s'] ?? ''; // THESE/HDR: use defenseDate_s if available, fallback to producedDate_s if (in_array($doc_type, ['THESE', 'HDR'])) { $defense = $hal_doc['defenseDate_s'] ?? ''; if ($defense) $date_raw = $defense; } $date_meta = $this->parse_hal_date($date_raw); if ($date_meta) { $date_field = in_array($doc_type, self::EVENT_DOC_TYPES) ? 'date_de_debut' : 'datetime'; update_post_meta($post_id, $date_field, $date_meta); } // --- Type pick fields (pick custom-simple — no triple-storage) --- $type_picks = [ 'PROCEEDINGS' => ['type_colloque', 'Colloque'], 'THESE' => ['type_soutenance', 'Soutenance de thèse'], 'HDR' => ['type_soutenance', "Soutenance d'habilitation"], 'SON' => ['type_captation', 'Son'], 'VIDEO' => ['type_captation', 'Vidéo'], ]; if (isset($type_picks[$doc_type])) { [$field, $value] = $type_picks[$doc_type]; update_post_meta($post_id, $field, $value); } // --- Lieu for PROCEEDINGS (city, country from HAL) --- if ($doc_type === 'PROCEEDINGS') { $city = $hal_doc['city_s'] ?? ''; $country = $hal_doc['country_s'] ?? ''; if (is_array($city)) $city = $city[0] ?? ''; if (is_array($country)) $country = $country[0] ?? ''; $lieu = trim("$city, $country", ', '); if ($lieu) { update_post_meta($post_id, 'lieu', $lieu); } } // --- Conference title as sous-titre for PROCEEDINGS --- if ($doc_type === 'PROCEEDINGS') { $conf_title = $hal_doc['conferenceTitle_s'] ?? ''; if ($conf_title) { update_post_meta($post_id, 'sous-titre', $conf_title); } } // --- Reference bibliographique from citationFull_s (publications/ouvrages/articles) --- $citation_cats = array_filter([ $this->cat_id_by_slug('publications-et-productions'), $this->cat_id_by_slug('ouvrages'), $this->cat_id_by_slug('articles'), ]); $citation = $hal_doc['citationFull_s'] ?? ''; if ($citation && in_array($cat_id, $citation_cats, true)) { update_post_meta($post_id, 'reference_bibliographique', wp_kses_post($citation)); } // --- Store matched THALIM members --- if (!empty($matched_user_ids)) { Thalim_HAL_Pods_Storage::set_relation($post_id, 'membres', $matched_user_ids, null); } // --- Axes thématiques : cascade (SPIP direct > co-auteurs > owner) --- $axes_resolution = $this->resolve_axes_cascade($matched_user_ids, $spip_context); $this->last_axes_source = $axes_resolution['source']; if (!empty($axes_resolution['term_ids'])) { Thalim_HAL_Pods_Storage::set_relation($post_id, 'axes_thematiques', $axes_resolution['term_ids'], 'axe_thematique'); } // --- Programmes de recherche : SPIP direct OR keyword matching --- $prog_ids = !empty($spip_context['programmes']) ? array_map('intval', $spip_context['programmes']) : $this->match_terms_by_keywords($hal_doc['keyword_s'] ?? [], 'programme_de_recherche'); if (!empty($prog_ids)) { Thalim_HAL_Pods_Storage::set_relation($post_id, 'programmes_de_recherche', $prog_ids, 'programme_de_recherche'); } // Unmatched authors as free text — remove matched names from the full list $unmatched = array_filter($author_names, function($name) use ($matched_user_names) { foreach ($matched_user_names as $matched) { // Loose comparison: ignore case and extra spaces if (mb_strtolower(trim($name)) === mb_strtolower(trim($matched))) { return false; } } return true; }); if (!empty($unmatched)) { update_post_meta($post_id, 'autrepersonnes', implode(', ', array_values($unmatched))); } return $post_id; } /** * Match HAL keyword strings against existing WordPress terms in a given taxonomy. * * WP terms are often stored bilingually as "Terme FR // English term". * Matching is case-insensitive against both the FR and EN parts. * * @param string[] $hal_keywords Raw keyword strings from HAL keyword_s field. * @param string $taxonomy WordPress taxonomy (e.g. 'post_tag', 'programme_de_recherche'). * @return int[] Matched term IDs. */ private function match_terms_by_keywords(array $hal_keywords, string $taxonomy = 'post_tag'): array { if (empty($hal_keywords)) return []; $terms = get_terms(['taxonomy' => $taxonomy, 'hide_empty' => false]); if (is_wp_error($terms) || empty($terms)) return []; // Normalise HAL keywords once for comparison $hal_lower = array_map(fn($kw) => mb_strtolower(trim($kw)), $hal_keywords); $matched = []; foreach ($terms as $term) { $parts = explode(' // ', $term->name, 2); $fr = mb_strtolower(trim($parts[0])); $en = isset($parts[1]) ? mb_strtolower(trim($parts[1])) : null; if (in_array($fr, $hal_lower, true) || ($en !== null && in_array($en, $hal_lower, true))) { $matched[] = (int) $term->term_id; } } return $matched; } /** * Backwards-compatible alias for the renamed method. */ private function match_keywords_to_tags(array $hal_keywords): array { return $this->match_terms_by_keywords($hal_keywords, 'post_tag'); } /** * Resolve axes thématiques through a cascade of strategies. * * 1. Direct SPIP links ($spip_context['axes']) * 2. Axes from all matched WP co-authors (_pods_axes_thematiques) * 3. Axe of the SPIP flux owner user ($spip_context['owner_user_id']) * * @return array{source: string, term_ids: int[]} */ private function resolve_axes_cascade(array $matched_user_ids, array $spip_context): array { // 1. SPIP direct if (!empty($spip_context['axes'])) { $ids = array_values(array_unique(array_map('intval', $spip_context['axes']))); if (!empty($ids)) return ['source' => 'spip', 'term_ids' => $ids]; } // 2. Co-authors matched (any matched THALIM member with an axe) $from_authors = []; foreach ($matched_user_ids as $uid) { $axes = get_user_meta((int) $uid, '_pods_axes_thematiques', true); if (is_array($axes)) { foreach ($axes as $tid) $from_authors[] = (int) $tid; } elseif (is_string($axes) && $axes !== '') { $unser = @unserialize($axes); if (is_array($unser)) { foreach ($unser as $tid) $from_authors[] = (int) $tid; } } } $from_authors = array_values(array_unique(array_filter($from_authors))); if (!empty($from_authors)) { return ['source' => 'coauthors', 'term_ids' => $from_authors]; } // 3. SPIP flux owner user if (!empty($spip_context['owner_user_id'])) { $axes = get_user_meta((int) $spip_context['owner_user_id'], '_pods_axes_thematiques', true); $ids = []; if (is_array($axes)) { $ids = array_map('intval', $axes); } elseif (is_string($axes) && $axes !== '') { $unser = @unserialize($axes); if (is_array($unser)) $ids = array_map('intval', $unser); } $ids = array_values(array_unique(array_filter($ids))); if (!empty($ids)) return ['source' => 'owner', 'term_ids' => $ids]; } return ['source' => 'none', 'term_ids' => []]; } /** * Parse a HAL date (YYYY, YYYY-MM, YYYY-MM-DD, or ISO datetime) to Y-m-d. * Returns '' on failure. HAL often emits partial dates that strtotime * mishandles (e.g. strtotime("2022") interprets 2022 as a time, not a year). */ private function parse_hal_date(string $raw): string { $raw = trim($raw); if ($raw === '') return ''; if (preg_match('/^(\d{4})-(\d{2})-(\d{2})/', $raw, $m)) { return "{$m[1]}-{$m[2]}-{$m[3]}"; } if (preg_match('/^(\d{4})-(\d{2})$/', $raw, $m)) { return "{$m[1]}-{$m[2]}-01"; } if (preg_match('/^(\d{4})$/', $raw, $m)) { return "{$m[1]}-01-01"; } $ts = strtotime($raw); return $ts ? date('Y-m-d', $ts) : ''; } }