WordPress category ID private const DOC_TYPE_MAP = [ 'ART' => 16, // Article 'COUV' => 16, // Chapitre -> Articles 'OUV' => 15, // Ouvrage -> Ouvrages 'COMM' => 13, // Communication -> Communications 'ISSUE' => 16, // Direction de numéro -> Articles 'PROCEEDINGS' => 15, // Direction d'ouvrage/Proceedings -> Ouvrages 'THESE' => 14, // Thèse -> Soutenances 'HDR' => 14, // HDR -> Soutenances 'SON' => 19, // Son -> Captations audio/vidéo 'VIDEO' => 19, // Vidéo -> Captations audio/vidéo ]; // Doc types that use date_de_debut instead of datetime private const EVENT_DOC_TYPES = ['COMM', 'THESE', 'HDR', 'SON', 'VIDEO']; // Pods IDs — queried from the DB, stable per installation private const POD_ID_POST = 8; private const FIELD_ID_CATEGORIE = 16; // "Type d'annonce" (picks from WP category) private const FIELD_ID_MEMBRES = 178; private const FIELD_ID_AUTRE_MBRES = 195; // autre_membres (unused in import, for reference) private const FIELD_ID_AXES = 270; // axes_thematiques (picks from axe_thematique) private const FIELD_ID_PROGRAMMES = 271; // programmes_de_recherche (picks from programme_de_recherche) private const FIELD_ID_ETIQUETTES = 652; // étiquettes (picks from post_tag) /** Source of the axes applied on the last import(): 'spip' | 'coauthors' | 'owner' | 'none'. */ public $last_axes_source = 'none'; /** * Check if publication is already imported */ public function is_imported($hal_id) { if (empty($hal_id)) return false; global $wpdb; return (int) $wpdb->get_var($wpdb->prepare( "SELECT COUNT(*) FROM {$wpdb->postmeta} WHERE meta_key = 'hal_id' AND meta_value = %s", $hal_id )) > 0; } /** * Get category ID for HAL doc type */ public function get_category_id($doc_type) { return self::DOC_TYPE_MAP[$doc_type] ?? null; } /** * Get doc type mappings */ public function get_doc_type_map() { return self::DOC_TYPE_MAP; } /** * Import a HAL publication as a WordPress post. * * @param array $hal_doc Raw HAL API document. * @param array $wp_users_by_hal_id Map of normalized_hal_id => ['id' => int, 'name' => string]. * @param string $post_status Target post_status (default 'pending'). * @param bool $backdate_post Use producedDate_s as post_date (default false). * @param array $spip_context SPIP-derived context for bulk imports: * ['axes' => int[], 'tags' => int[], 'programmes' => int[], 'owner_user_id' => ?int] * @return int|WP_Error New post ID on success, WP_Error on failure. * The axes source is stored in $this->last_axes_source for caller reporting. */ public function import( array $hal_doc, array $wp_users_by_hal_id = [], string $post_status = 'pending', bool $backdate_post = false, array $spip_context = [] ) { $hal_id = $hal_doc['halId_s'] ?? ''; $doc_type = $hal_doc['docType_s'] ?? ''; if (empty($hal_id)) return new WP_Error('no_id', 'Missing HAL ID'); if ($this->is_imported($hal_id)) return new WP_Error('exists', 'Already imported: ' . $hal_id); // --- Resolve post author from HAL author IDs --- $author_hal_ids = $hal_doc['authIdHal_s'] ?? []; $author_names = $hal_doc['authFullName_s'] ?? []; $matched_user_ids = []; $matched_user_names = []; foreach ($author_hal_ids as $hal_author_id) { $normalized = strtolower(trim($hal_author_id)); if (isset($wp_users_by_hal_id[$normalized])) { $user = $wp_users_by_hal_id[$normalized]; $matched_user_ids[] = $user['id']; $matched_user_names[] = $user['name']; } } $post_author = !empty($matched_user_ids) ? $matched_user_ids[0] : 1; // --- Create the post --- $post_args = [ 'post_title' => wp_strip_all_tags($hal_doc['title_s'][0] ?? ''), 'post_content' => wp_kses_post($hal_doc['abstract_s'][0] ?? ''), 'post_status' => $post_status, 'post_type' => 'post', 'post_author' => $post_author, ]; // Backdate post_date to HAL producedDate_s when requested (for legacy bulk imports) if ($backdate_post) { $backdate_ymd = $this->parse_hal_date($hal_doc['producedDate_s'] ?? ''); if ($backdate_ymd) { $post_args['post_date'] = $backdate_ymd . ' 12:00:00'; $post_args['post_date_gmt'] = $backdate_ymd . ' 12:00:00'; } } $post_id = wp_insert_post($post_args, true); if (is_wp_error($post_id)) return $post_id; // --- Category — Pods triple-storage pattern --- $cat_id = self::DOC_TYPE_MAP[$doc_type] ?? null; if ($cat_id) { global $wpdb; // 1. Native WP category assignment wp_set_post_categories($post_id, [$cat_id]); // 2. Pods postmeta: single integer value update_post_meta($post_id, 'categorie', $cat_id); // 3. Pods _pods_ meta: serialized array of one integer update_post_meta($post_id, '_pods_categorie', [$cat_id]); // 4. wp_podsrel row $wpdb->insert( $wpdb->prefix . 'podsrel', [ 'pod_id' => self::POD_ID_POST, 'field_id' => self::FIELD_ID_CATEGORIE, 'item_id' => $post_id, 'related_pod_id' => 0, 'related_field_id'=> 0, 'related_item_id' => $cat_id, 'weight' => 0, ], ['%d', '%d', '%d', '%d', '%d', '%d', '%d'] ); } // --- Core meta --- update_post_meta($post_id, 'hal_id', $hal_id); update_post_meta($post_id, 'hal_url', $hal_doc['uri_s'] ?? ''); // HAL PDF file -> lien_externe_1 $file_url = $hal_doc['fileMain_s'] ?? ''; if ($file_url) { update_post_meta($post_id, 'lien_externe_1', $file_url); update_post_meta($post_id, 'titre_du_lien_externe_1', 'Document HAL // HAL Document'); } // Journal (ART) $journal = $hal_doc['journalTitle_s'] ?? ''; if ($journal) { update_post_meta($post_id, 'journal', $journal); } // Book title as sous-titre (COUV), only if different from post title $book_title = $hal_doc['bookTitle_s'] ?? ''; $post_title = $hal_doc['title_s'][0] ?? ''; if ($book_title && $book_title !== $post_title) { update_post_meta($post_id, 'sous-titre', $book_title); } // Publisher -> editeur (plain text, no Pods triple-storage needed) $publisher = $hal_doc['publisher_s'] ?? ''; if (is_array($publisher)) $publisher = $publisher[0] ?? ''; if ($publisher) { update_post_meta($post_id, 'editeur', $publisher); } // Fonction label: bilingual plain text (only relevant for cats 4, 15, 16) if ($doc_type === 'COUV') { update_post_meta($post_id, 'fonction_auteur', 'Auteur du chapitre // Chapter author'); } elseif ($doc_type === 'ISSUE') { update_post_meta($post_id, 'fonction_auteur', 'Direction de numéro // Editor-in-Chief'); } // --- Keywords -> étiquettes (Pods triple-storage, picks from post_tag) --- $hal_keywords = $hal_doc['keyword_s'] ?? []; if (!empty($hal_keywords)) { $matched_term_ids = $this->match_keywords_to_tags($hal_keywords); if (!empty($matched_term_ids)) { global $wpdb; // 1. Native WP term relationship wp_set_object_terms($post_id, $matched_term_ids, 'post_tag', true); // 2. Individual postmeta rows (one per term ID) foreach ($matched_term_ids as $tid) { add_post_meta($post_id, 'etiquettes', (string) $tid); } // 3. _pods_etiquettes: serialized array of term IDs as integers update_post_meta($post_id, '_pods_etiquettes', array_map('intval', $matched_term_ids)); // 4. wp_podsrel rows foreach ($matched_term_ids as $weight => $tid) { $wpdb->insert( $wpdb->prefix . 'podsrel', [ 'pod_id' => self::POD_ID_POST, 'field_id' => self::FIELD_ID_ETIQUETTES, 'item_id' => $post_id, 'related_pod_id' => 0, 'related_field_id' => 0, 'related_item_id' => (int) $tid, 'weight' => $weight, ], ['%d', '%d', '%d', '%d', '%d', '%d', '%d'] ); } } } // --- Date meta --- $date_raw = $hal_doc['producedDate_s'] ?? ''; // THESE/HDR: use defenseDate_s if available, fallback to producedDate_s if (in_array($doc_type, ['THESE', 'HDR'])) { $defense = $hal_doc['defenseDate_s'] ?? ''; if ($defense) $date_raw = $defense; } $date_meta = $this->parse_hal_date($date_raw); if ($date_meta) { $date_field = in_array($doc_type, self::EVENT_DOC_TYPES) ? 'date_de_debut' : 'datetime'; update_post_meta($post_id, $date_field, $date_meta); } // --- Type pick fields (pick custom-simple — no triple-storage) --- $type_picks = [ 'PROCEEDINGS' => ['type_colloque', 'Colloque'], 'THESE' => ['type_soutenance', 'Soutenance de thèse'], 'HDR' => ['type_soutenance', "Soutenance d'habilitation"], 'SON' => ['type_captation', 'Son'], 'VIDEO' => ['type_captation', 'Vidéo'], ]; if (isset($type_picks[$doc_type])) { [$field, $value] = $type_picks[$doc_type]; update_post_meta($post_id, $field, $value); } // --- Lieu for PROCEEDINGS (city, country from HAL) --- if ($doc_type === 'PROCEEDINGS') { $city = $hal_doc['city_s'] ?? ''; $country = $hal_doc['country_s'] ?? ''; if (is_array($city)) $city = $city[0] ?? ''; if (is_array($country)) $country = $country[0] ?? ''; $lieu = trim("$city, $country", ', '); if ($lieu) { update_post_meta($post_id, 'lieu', $lieu); } } // --- Conference title as sous-titre for PROCEEDINGS --- if ($doc_type === 'PROCEEDINGS') { $conf_title = $hal_doc['conferenceTitle_s'] ?? ''; if ($conf_title) { update_post_meta($post_id, 'sous-titre', $conf_title); } } // --- Reference bibliographique from citationFull_s (cats 4, 15, 16) --- $citation = $hal_doc['citationFull_s'] ?? ''; if ($citation && in_array($cat_id, [4, 15, 16])) { update_post_meta($post_id, 'reference_bibliographique', wp_kses_post($citation)); } // --- Store matched THALIM members — Pods triple-storage pattern if (!empty($matched_user_ids)) { global $wpdb; // 1. Individual postmeta rows (one per user ID, as string) foreach ($matched_user_ids as $uid) { add_post_meta($post_id, 'membres', (string) $uid); } // 2. _pods_ meta: serialized PHP array of user IDs as integers update_post_meta($post_id, '_pods_membres', array_map('intval', $matched_user_ids)); // 3. wp_podsrel rows (one per user, weight = position) foreach ($matched_user_ids as $weight => $uid) { $wpdb->insert( $wpdb->prefix . 'podsrel', [ 'pod_id' => self::POD_ID_POST, 'field_id' => self::FIELD_ID_MEMBRES, 'item_id' => $post_id, 'related_pod_id' => 0, 'related_field_id'=> 0, 'related_item_id' => (int) $uid, 'weight' => $weight, ], ['%d', '%d', '%d', '%d', '%d', '%d', '%d'] ); } } // --- Axes thématiques : cascade (SPIP direct > co-auteurs > owner) --- $axes_resolution = $this->resolve_axes_cascade($matched_user_ids, $spip_context); $this->last_axes_source = $axes_resolution['source']; if (!empty($axes_resolution['term_ids'])) { $this->set_pods_taxonomy_multi( $post_id, 'axes_thematiques', self::FIELD_ID_AXES, $axes_resolution['term_ids'], 'axe_thematique' ); } // --- Programmes de recherche : SPIP direct OR keyword matching --- $prog_ids = !empty($spip_context['programmes']) ? array_map('intval', $spip_context['programmes']) : $this->match_terms_by_keywords($hal_doc['keyword_s'] ?? [], 'programme_de_recherche'); if (!empty($prog_ids)) { $this->set_pods_taxonomy_multi( $post_id, 'programmes_de_recherche', self::FIELD_ID_PROGRAMMES, $prog_ids, 'programme_de_recherche' ); } // --- Étiquettes SPIP directes (en plus du matching HAL déjà fait plus haut) --- if (!empty($spip_context['tags'])) { // Merge avec les tags déjà posés par le bloc étiquettes plus haut $existing = wp_get_object_terms($post_id, 'post_tag', ['fields' => 'ids']); $merged = array_values(array_unique(array_merge( is_array($existing) ? array_map('intval', $existing) : [], array_map('intval', $spip_context['tags']) ))); $this->set_pods_taxonomy_multi( $post_id, 'etiquettes', self::FIELD_ID_ETIQUETTES, array_diff($merged, is_array($existing) ? $existing : []), 'post_tag' ); } // Unmatched authors as free text — remove matched names from the full list $unmatched = array_filter($author_names, function($name) use ($matched_user_names) { foreach ($matched_user_names as $matched) { // Loose comparison: ignore case and extra spaces if (mb_strtolower(trim($name)) === mb_strtolower(trim($matched))) { return false; } } return true; }); if (!empty($unmatched)) { update_post_meta($post_id, 'autrepersonnes', implode(', ', array_values($unmatched))); } // --- Polylang: assign French language --- if (function_exists('pll_set_post_language')) { pll_set_post_language($post_id, 'fr'); } return $post_id; } /** * Match HAL keyword strings against existing WordPress terms in a given taxonomy. * * WP terms are often stored bilingually as "Terme FR // English term". * Matching is case-insensitive against both the FR and EN parts. * * @param string[] $hal_keywords Raw keyword strings from HAL keyword_s field. * @param string $taxonomy WordPress taxonomy (e.g. 'post_tag', 'programme_de_recherche'). * @return int[] Matched term IDs. */ private function match_terms_by_keywords(array $hal_keywords, string $taxonomy = 'post_tag'): array { if (empty($hal_keywords)) return []; $terms = get_terms(['taxonomy' => $taxonomy, 'hide_empty' => false]); if (is_wp_error($terms) || empty($terms)) return []; // Normalise HAL keywords once for comparison $hal_lower = array_map(fn($kw) => mb_strtolower(trim($kw)), $hal_keywords); $matched = []; foreach ($terms as $term) { $parts = explode(' // ', $term->name, 2); $fr = mb_strtolower(trim($parts[0])); $en = isset($parts[1]) ? mb_strtolower(trim($parts[1])) : null; if (in_array($fr, $hal_lower, true) || ($en !== null && in_array($en, $hal_lower, true))) { $matched[] = (int) $term->term_id; } } return $matched; } /** * Backwards-compatible alias for the renamed method. */ private function match_keywords_to_tags(array $hal_keywords): array { return $this->match_terms_by_keywords($hal_keywords, 'post_tag'); } /** * Resolve axes thématiques through a cascade of strategies. * * 1. Direct SPIP links ($spip_context['axes']) * 2. Axes from all matched WP co-authors (_pods_axes_thematiques) * 3. Axe of the SPIP flux owner user ($spip_context['owner_user_id']) * * @return array{source: string, term_ids: int[]} */ private function resolve_axes_cascade(array $matched_user_ids, array $spip_context): array { // 1. SPIP direct if (!empty($spip_context['axes'])) { $ids = array_values(array_unique(array_map('intval', $spip_context['axes']))); if (!empty($ids)) return ['source' => 'spip', 'term_ids' => $ids]; } // 2. Co-authors matched (any matched THALIM member with an axe) $from_authors = []; foreach ($matched_user_ids as $uid) { $axes = get_user_meta((int) $uid, '_pods_axes_thematiques', true); if (is_array($axes)) { foreach ($axes as $tid) $from_authors[] = (int) $tid; } elseif (is_string($axes) && $axes !== '') { $unser = @unserialize($axes); if (is_array($unser)) { foreach ($unser as $tid) $from_authors[] = (int) $tid; } } } $from_authors = array_values(array_unique(array_filter($from_authors))); if (!empty($from_authors)) { return ['source' => 'coauthors', 'term_ids' => $from_authors]; } // 3. SPIP flux owner user if (!empty($spip_context['owner_user_id'])) { $axes = get_user_meta((int) $spip_context['owner_user_id'], '_pods_axes_thematiques', true); $ids = []; if (is_array($axes)) { $ids = array_map('intval', $axes); } elseif (is_string($axes) && $axes !== '') { $unser = @unserialize($axes); if (is_array($unser)) $ids = array_map('intval', $unser); } $ids = array_values(array_unique(array_filter($ids))); if (!empty($ids)) return ['source' => 'owner', 'term_ids' => $ids]; } return ['source' => 'none', 'term_ids' => []]; } /** * Parse a HAL date (YYYY, YYYY-MM, YYYY-MM-DD, or ISO datetime) to Y-m-d. * Returns '' on failure. HAL often emits partial dates that strtotime * mishandles (e.g. strtotime("2022") interprets 2022 as a time, not a year). */ private function parse_hal_date(string $raw): string { $raw = trim($raw); if ($raw === '') return ''; if (preg_match('/^(\d{4})-(\d{2})-(\d{2})/', $raw, $m)) { return "{$m[1]}-{$m[2]}-{$m[3]}"; } if (preg_match('/^(\d{4})-(\d{2})$/', $raw, $m)) { return "{$m[1]}-{$m[2]}-01"; } if (preg_match('/^(\d{4})$/', $raw, $m)) { return "{$m[1]}-01-01"; } $ts = strtotime($raw); return $ts ? date('Y-m-d', $ts) : ''; } /** * Generic Pods triple-storage writer for multi-value taxonomy fields. * Writes to: wp_term_relationships, postmeta rows, _pods_ meta, wp_podsrel. */ private function set_pods_taxonomy_multi(int $post_id, string $field_name, int $field_id, array $term_ids, string $taxonomy): void { if (empty($term_ids)) return; global $wpdb; $term_ids = array_values(array_unique(array_map('intval', $term_ids))); // 1. wp_term_relationships wp_set_object_terms($post_id, $term_ids, $taxonomy, true); // 2. postmeta (one row per term ID, as string) foreach ($term_ids as $tid) { add_post_meta($post_id, $field_name, (string) $tid); } // 3. _pods_ meta: serialized array of ints update_post_meta($post_id, '_pods_' . $field_name, $term_ids); // 4. wp_podsrel rows (weight = position) foreach ($term_ids as $weight => $tid) { $wpdb->insert( $wpdb->prefix . 'podsrel', [ 'pod_id' => self::POD_ID_POST, 'field_id' => $field_id, 'item_id' => $post_id, 'related_pod_id' => 0, 'related_field_id' => 0, 'related_item_id' => (int) $tid, 'weight' => $weight, ], ['%d', '%d', '%d', '%d', '%d', '%d', '%d'] ); } } }