425 lines
18 KiB
PHP
425 lines
18 KiB
PHP
<?php
|
|
/**
|
|
* Importer Logic Class - Handles import from HAL to WordPress
|
|
*/
|
|
|
|
if (!defined('ABSPATH')) {
|
|
exit;
|
|
}
|
|
|
|
class Thalim_HAL_Importer_Logic {
|
|
|
|
// HAL doc type -> slug de catégorie WP (résolu en term_id au runtime —
|
|
// les IDs auto-incrémentés ne survivent pas à une réimportation de base)
|
|
private const DOC_TYPE_SLUGS = [
|
|
'ART' => 'articles', // Article
|
|
'COUV' => 'articles', // Chapitre -> Articles
|
|
'OUV' => 'ouvrages', // Ouvrage -> Ouvrages
|
|
'COMM' => 'communications', // Communication -> Communications
|
|
'ISSUE' => 'articles', // Direction de numéro -> Articles
|
|
'PROCEEDINGS' => 'ouvrages', // Direction d'ouvrage/Proceedings -> Ouvrages
|
|
'THESE' => 'soutenances', // Thèse -> Soutenances
|
|
'HDR' => 'soutenances', // HDR -> Soutenances
|
|
'SON' => 'medias', // Son -> Médias
|
|
'VIDEO' => 'medias', // Vidéo -> Médias
|
|
'NOTICE' => 'articles', // Notice/recension -> Articles
|
|
'BLOG' => 'medias', // Blog/tribune -> Médias
|
|
'TRAD' => 'ouvrages', // Traduction -> Ouvrages (fonction auteur "Traduction")
|
|
'REPORT' => 'publications-et-productions', // Rapport -> Publications et productions
|
|
'UNDEFINED' => 'publications-et-productions', // Non défini -> Publications et productions
|
|
'POSTER' => 'publications-et-productions', // Poster -> Publications et productions
|
|
'OTHER' => 'publications-et-productions', // Autre -> Publications et productions
|
|
];
|
|
|
|
// Doc types that use date_de_debut instead of datetime
|
|
private const EVENT_DOC_TYPES = ['COMM', 'THESE', 'HDR', 'SON', 'VIDEO'];
|
|
|
|
/**
|
|
* Résout un slug de catégorie en term_id (cache statique par requête).
|
|
*/
|
|
private function cat_id_by_slug(string $slug): ?int {
|
|
static $cache = [];
|
|
if (!array_key_exists($slug, $cache)) {
|
|
$term = get_term_by('slug', $slug, 'category');
|
|
$cache[$slug] = $term ? (int) $term->term_id : null;
|
|
}
|
|
return $cache[$slug];
|
|
}
|
|
|
|
/** Source of the axes applied on the last import(): 'spip' | 'coauthors' | 'owner' | 'none'. */
|
|
public $last_axes_source = 'none';
|
|
|
|
/**
|
|
* Check if publication is already imported
|
|
*/
|
|
public function is_imported($hal_id) {
|
|
if (empty($hal_id)) return false;
|
|
global $wpdb;
|
|
return (int) $wpdb->get_var($wpdb->prepare(
|
|
"SELECT COUNT(*) FROM {$wpdb->postmeta} WHERE meta_key = 'hal_id' AND meta_value = %s",
|
|
$hal_id
|
|
)) > 0;
|
|
}
|
|
|
|
/**
|
|
* Return ['id' => int, 'status' => string] for the post matching this hal_id, or null.
|
|
*/
|
|
public function get_imported_post($hal_id) {
|
|
if (empty($hal_id)) return null;
|
|
global $wpdb;
|
|
$row = $wpdb->get_row($wpdb->prepare(
|
|
"SELECT p.ID, p.post_status FROM {$wpdb->posts} p
|
|
INNER JOIN {$wpdb->postmeta} pm ON pm.post_id = p.ID
|
|
WHERE pm.meta_key = 'hal_id' AND pm.meta_value = %s
|
|
LIMIT 1",
|
|
$hal_id
|
|
));
|
|
return $row ? ['id' => (int) $row->ID, 'status' => $row->post_status] : null;
|
|
}
|
|
|
|
/**
|
|
* Get category ID for HAL doc type
|
|
*/
|
|
public function get_category_id($doc_type) {
|
|
$slug = self::DOC_TYPE_SLUGS[$doc_type] ?? null;
|
|
return $slug ? $this->cat_id_by_slug($slug) : null;
|
|
}
|
|
|
|
/**
|
|
* Get doc type mappings (doc type => category term_id)
|
|
*/
|
|
public function get_doc_type_map() {
|
|
$map = [];
|
|
foreach (self::DOC_TYPE_SLUGS as $type => $slug) {
|
|
$map[$type] = $this->cat_id_by_slug($slug);
|
|
}
|
|
return $map;
|
|
}
|
|
|
|
/**
|
|
* Import a HAL publication as a WordPress post.
|
|
*
|
|
* @param array $hal_doc Raw HAL API document.
|
|
* @param array $wp_users_by_hal_id Map of normalized_hal_id => ['id' => int, 'name' => string].
|
|
* @param string $post_status Target post_status (default 'pending').
|
|
* @param bool $backdate_post Use producedDate_s as post_date (default false).
|
|
* @param array $spip_context SPIP-derived context for bulk imports:
|
|
* ['axes' => int[], 'tags' => int[], 'programmes' => int[], 'owner_user_id' => ?int]
|
|
* @return int|WP_Error New post ID on success, WP_Error on failure.
|
|
* The axes source is stored in $this->last_axes_source for caller reporting.
|
|
*/
|
|
public function import(
|
|
array $hal_doc,
|
|
array $wp_users_by_hal_id = [],
|
|
string $post_status = 'pending',
|
|
bool $backdate_post = false,
|
|
array $spip_context = [],
|
|
?int $force_post_author = null
|
|
) {
|
|
$hal_id = $hal_doc['halId_s'] ?? '';
|
|
$doc_type = $hal_doc['docType_s'] ?? '';
|
|
|
|
if (empty($hal_id)) return new WP_Error('no_id', 'Missing HAL ID');
|
|
if ($this->is_imported($hal_id)) return new WP_Error('exists', 'Already imported: ' . $hal_id);
|
|
|
|
// --- Resolve post author from HAL author IDs ---
|
|
$author_hal_ids = $hal_doc['authIdHal_s'] ?? [];
|
|
$author_names = $hal_doc['authFullName_s'] ?? [];
|
|
$matched_user_ids = [];
|
|
$matched_user_names = [];
|
|
foreach ($author_hal_ids as $hal_author_id) {
|
|
$normalized = strtolower(trim($hal_author_id));
|
|
if (isset($wp_users_by_hal_id[$normalized])) {
|
|
$user = $wp_users_by_hal_id[$normalized];
|
|
$matched_user_ids[] = $user['id'];
|
|
$matched_user_names[] = $user['name'];
|
|
}
|
|
}
|
|
$post_author = $force_post_author
|
|
?? (!empty($matched_user_ids) ? $matched_user_ids[0] : 1);
|
|
|
|
// --- Create the post ---
|
|
$post_args = [
|
|
'post_title' => wp_strip_all_tags($hal_doc['title_s'][0] ?? ''),
|
|
'post_content' => wp_kses_post($hal_doc['abstract_s'][0] ?? ''),
|
|
'post_status' => $post_status,
|
|
'post_type' => 'post',
|
|
'post_author' => $post_author,
|
|
];
|
|
|
|
// Backdate post_date to HAL producedDate_s when requested (for legacy bulk imports)
|
|
if ($backdate_post) {
|
|
$backdate_ymd = $this->parse_hal_date($hal_doc['producedDate_s'] ?? '');
|
|
if ($backdate_ymd) {
|
|
$post_args['post_date'] = $backdate_ymd . ' 12:00:00';
|
|
$post_args['post_date_gmt'] = $backdate_ymd . ' 12:00:00';
|
|
}
|
|
}
|
|
|
|
$post_id = wp_insert_post($post_args, true);
|
|
if (is_wp_error($post_id)) return $post_id;
|
|
|
|
// --- Category — stockage Pods centralisé (cf. class-pods-storage.php) ---
|
|
$cat_id = $this->get_category_id($doc_type);
|
|
if ($cat_id) {
|
|
Thalim_HAL_Pods_Storage::set_categorie($post_id, $cat_id);
|
|
}
|
|
|
|
// --- Core meta ---
|
|
update_post_meta($post_id, 'hal_id', $hal_id);
|
|
update_post_meta($post_id, 'hal_url', $hal_doc['uri_s'] ?? '');
|
|
|
|
// HAL PDF file -> lien_externe_1
|
|
$file_url = $hal_doc['fileMain_s'] ?? '';
|
|
if ($file_url) {
|
|
update_post_meta($post_id, 'lien_externe_1', $file_url);
|
|
update_post_meta($post_id, 'titre_du_lien_externe_1', 'Document HAL // HAL Document');
|
|
}
|
|
|
|
// Journal (ART)
|
|
$journal = $hal_doc['journalTitle_s'] ?? '';
|
|
if ($journal) {
|
|
update_post_meta($post_id, 'journal', $journal);
|
|
}
|
|
|
|
// Book title as sous-titre (COUV), only if different from post title
|
|
$book_title = $hal_doc['bookTitle_s'] ?? '';
|
|
$post_title = $hal_doc['title_s'][0] ?? '';
|
|
if ($book_title && $book_title !== $post_title) {
|
|
update_post_meta($post_id, 'sous-titre', $book_title);
|
|
}
|
|
|
|
// Publisher -> editeur (plain text, no Pods triple-storage needed)
|
|
$publisher = $hal_doc['publisher_s'] ?? '';
|
|
if (is_array($publisher)) $publisher = $publisher[0] ?? '';
|
|
if ($publisher) {
|
|
update_post_meta($post_id, 'editeur', $publisher);
|
|
}
|
|
|
|
// Fonction label: bilingual plain text (only relevant for cats 4, 15, 16)
|
|
if ($doc_type === 'COUV') {
|
|
update_post_meta($post_id, 'fonction_auteur', 'Auteur du chapitre // Chapter author');
|
|
} elseif ($doc_type === 'ISSUE') {
|
|
update_post_meta($post_id, 'fonction_auteur', 'Direction de numéro // Editor-in-Chief');
|
|
} elseif ($doc_type === 'TRAD') {
|
|
update_post_meta($post_id, 'fonction_auteur', 'Traduction // Translation');
|
|
}
|
|
|
|
// --- Keywords HAL + tags SPIP -> étiquettes (une seule écriture Pods) ---
|
|
$etiquette_ids = $this->match_keywords_to_tags($hal_doc['keyword_s'] ?? []);
|
|
if (!empty($spip_context['tags'])) {
|
|
$etiquette_ids = array_merge($etiquette_ids, array_map('intval', $spip_context['tags']));
|
|
}
|
|
if (!empty($etiquette_ids)) {
|
|
Thalim_HAL_Pods_Storage::set_relation($post_id, 'etiquettes', $etiquette_ids, 'post_tag');
|
|
}
|
|
|
|
// --- Date meta ---
|
|
$date_raw = $hal_doc['producedDate_s'] ?? '';
|
|
// THESE/HDR: use defenseDate_s if available, fallback to producedDate_s
|
|
if (in_array($doc_type, ['THESE', 'HDR'])) {
|
|
$defense = $hal_doc['defenseDate_s'] ?? '';
|
|
if ($defense) $date_raw = $defense;
|
|
}
|
|
$date_meta = $this->parse_hal_date($date_raw);
|
|
if ($date_meta) {
|
|
$date_field = in_array($doc_type, self::EVENT_DOC_TYPES) ? 'date_de_debut' : 'datetime';
|
|
update_post_meta($post_id, $date_field, $date_meta);
|
|
}
|
|
|
|
// --- Type pick fields (pick custom-simple — no triple-storage) ---
|
|
$type_picks = [
|
|
'PROCEEDINGS' => ['type_colloque', 'Colloque'],
|
|
'THESE' => ['type_soutenance', 'Soutenance de thèse'],
|
|
'HDR' => ['type_soutenance', "Soutenance d'habilitation"],
|
|
'SON' => ['type_captation', 'Son'],
|
|
'VIDEO' => ['type_captation', 'Vidéo'],
|
|
];
|
|
if (isset($type_picks[$doc_type])) {
|
|
[$field, $value] = $type_picks[$doc_type];
|
|
update_post_meta($post_id, $field, $value);
|
|
}
|
|
|
|
// --- Lieu for PROCEEDINGS (city, country from HAL) ---
|
|
if ($doc_type === 'PROCEEDINGS') {
|
|
$city = $hal_doc['city_s'] ?? '';
|
|
$country = $hal_doc['country_s'] ?? '';
|
|
if (is_array($city)) $city = $city[0] ?? '';
|
|
if (is_array($country)) $country = $country[0] ?? '';
|
|
$lieu = trim("$city, $country", ', ');
|
|
if ($lieu) {
|
|
update_post_meta($post_id, 'lieu', $lieu);
|
|
}
|
|
}
|
|
|
|
// --- Conference title as sous-titre for PROCEEDINGS ---
|
|
if ($doc_type === 'PROCEEDINGS') {
|
|
$conf_title = $hal_doc['conferenceTitle_s'] ?? '';
|
|
if ($conf_title) {
|
|
update_post_meta($post_id, 'sous-titre', $conf_title);
|
|
}
|
|
}
|
|
|
|
// --- Reference bibliographique from citationFull_s (publications/ouvrages/articles) ---
|
|
$citation_cats = array_filter([
|
|
$this->cat_id_by_slug('publications-et-productions'),
|
|
$this->cat_id_by_slug('ouvrages'),
|
|
$this->cat_id_by_slug('articles'),
|
|
]);
|
|
$citation = $hal_doc['citationFull_s'] ?? '';
|
|
if ($citation && in_array($cat_id, $citation_cats, true)) {
|
|
update_post_meta($post_id, 'reference_bibliographique', wp_kses_post($citation));
|
|
}
|
|
|
|
// --- Store matched THALIM members ---
|
|
if (!empty($matched_user_ids)) {
|
|
Thalim_HAL_Pods_Storage::set_relation($post_id, 'membres', $matched_user_ids, null);
|
|
}
|
|
|
|
// --- Axes thématiques : cascade (SPIP direct > co-auteurs > owner) ---
|
|
$axes_resolution = $this->resolve_axes_cascade($matched_user_ids, $spip_context);
|
|
$this->last_axes_source = $axes_resolution['source'];
|
|
if (!empty($axes_resolution['term_ids'])) {
|
|
Thalim_HAL_Pods_Storage::set_relation($post_id, 'axes_thematiques', $axes_resolution['term_ids'], 'axe_thematique');
|
|
}
|
|
|
|
// --- Programmes de recherche : SPIP direct OR keyword matching ---
|
|
$prog_ids = !empty($spip_context['programmes'])
|
|
? array_map('intval', $spip_context['programmes'])
|
|
: $this->match_terms_by_keywords($hal_doc['keyword_s'] ?? [], 'programme_de_recherche');
|
|
if (!empty($prog_ids)) {
|
|
Thalim_HAL_Pods_Storage::set_relation($post_id, 'programmes_de_recherche', $prog_ids, 'programme_de_recherche');
|
|
}
|
|
|
|
// Unmatched authors as free text — remove matched names from the full list
|
|
$unmatched = array_filter($author_names, function($name) use ($matched_user_names) {
|
|
foreach ($matched_user_names as $matched) {
|
|
// Loose comparison: ignore case and extra spaces
|
|
if (mb_strtolower(trim($name)) === mb_strtolower(trim($matched))) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
});
|
|
if (!empty($unmatched)) {
|
|
update_post_meta($post_id, 'autrepersonnes', implode(', ', array_values($unmatched)));
|
|
}
|
|
|
|
return $post_id;
|
|
}
|
|
|
|
/**
|
|
* Match HAL keyword strings against existing WordPress terms in a given taxonomy.
|
|
*
|
|
* WP terms are often stored bilingually as "Terme FR // English term".
|
|
* Matching is case-insensitive against both the FR and EN parts.
|
|
*
|
|
* @param string[] $hal_keywords Raw keyword strings from HAL keyword_s field.
|
|
* @param string $taxonomy WordPress taxonomy (e.g. 'post_tag', 'programme_de_recherche').
|
|
* @return int[] Matched term IDs.
|
|
*/
|
|
private function match_terms_by_keywords(array $hal_keywords, string $taxonomy = 'post_tag'): array {
|
|
if (empty($hal_keywords)) return [];
|
|
|
|
$terms = get_terms(['taxonomy' => $taxonomy, 'hide_empty' => false]);
|
|
if (is_wp_error($terms) || empty($terms)) return [];
|
|
|
|
// Normalise HAL keywords once for comparison
|
|
$hal_lower = array_map(fn($kw) => mb_strtolower(trim($kw)), $hal_keywords);
|
|
|
|
$matched = [];
|
|
foreach ($terms as $term) {
|
|
$parts = explode(' // ', $term->name, 2);
|
|
$fr = mb_strtolower(trim($parts[0]));
|
|
$en = isset($parts[1]) ? mb_strtolower(trim($parts[1])) : null;
|
|
|
|
if (in_array($fr, $hal_lower, true) ||
|
|
($en !== null && in_array($en, $hal_lower, true))) {
|
|
$matched[] = (int) $term->term_id;
|
|
}
|
|
}
|
|
|
|
return $matched;
|
|
}
|
|
|
|
/**
|
|
* Backwards-compatible alias for the renamed method.
|
|
*/
|
|
private function match_keywords_to_tags(array $hal_keywords): array {
|
|
return $this->match_terms_by_keywords($hal_keywords, 'post_tag');
|
|
}
|
|
|
|
/**
|
|
* Resolve axes thématiques through a cascade of strategies.
|
|
*
|
|
* 1. Direct SPIP links ($spip_context['axes'])
|
|
* 2. Axes from all matched WP co-authors (_pods_axes_thematiques)
|
|
* 3. Axe of the SPIP flux owner user ($spip_context['owner_user_id'])
|
|
*
|
|
* @return array{source: string, term_ids: int[]}
|
|
*/
|
|
private function resolve_axes_cascade(array $matched_user_ids, array $spip_context): array {
|
|
// 1. SPIP direct
|
|
if (!empty($spip_context['axes'])) {
|
|
$ids = array_values(array_unique(array_map('intval', $spip_context['axes'])));
|
|
if (!empty($ids)) return ['source' => 'spip', 'term_ids' => $ids];
|
|
}
|
|
|
|
// 2. Co-authors matched (any matched THALIM member with an axe)
|
|
$from_authors = [];
|
|
foreach ($matched_user_ids as $uid) {
|
|
$axes = get_user_meta((int) $uid, '_pods_axes_thematiques', true);
|
|
if (is_array($axes)) {
|
|
foreach ($axes as $tid) $from_authors[] = (int) $tid;
|
|
} elseif (is_string($axes) && $axes !== '') {
|
|
$unser = @unserialize($axes);
|
|
if (is_array($unser)) {
|
|
foreach ($unser as $tid) $from_authors[] = (int) $tid;
|
|
}
|
|
}
|
|
}
|
|
$from_authors = array_values(array_unique(array_filter($from_authors)));
|
|
if (!empty($from_authors)) {
|
|
return ['source' => 'coauthors', 'term_ids' => $from_authors];
|
|
}
|
|
|
|
// 3. SPIP flux owner user
|
|
if (!empty($spip_context['owner_user_id'])) {
|
|
$axes = get_user_meta((int) $spip_context['owner_user_id'], '_pods_axes_thematiques', true);
|
|
$ids = [];
|
|
if (is_array($axes)) {
|
|
$ids = array_map('intval', $axes);
|
|
} elseif (is_string($axes) && $axes !== '') {
|
|
$unser = @unserialize($axes);
|
|
if (is_array($unser)) $ids = array_map('intval', $unser);
|
|
}
|
|
$ids = array_values(array_unique(array_filter($ids)));
|
|
if (!empty($ids)) return ['source' => 'owner', 'term_ids' => $ids];
|
|
}
|
|
|
|
return ['source' => 'none', 'term_ids' => []];
|
|
}
|
|
|
|
/**
|
|
* Parse a HAL date (YYYY, YYYY-MM, YYYY-MM-DD, or ISO datetime) to Y-m-d.
|
|
* Returns '' on failure. HAL often emits partial dates that strtotime
|
|
* mishandles (e.g. strtotime("2022") interprets 2022 as a time, not a year).
|
|
*/
|
|
private function parse_hal_date(string $raw): string {
|
|
$raw = trim($raw);
|
|
if ($raw === '') return '';
|
|
if (preg_match('/^(\d{4})-(\d{2})-(\d{2})/', $raw, $m)) {
|
|
return "{$m[1]}-{$m[2]}-{$m[3]}";
|
|
}
|
|
if (preg_match('/^(\d{4})-(\d{2})$/', $raw, $m)) {
|
|
return "{$m[1]}-{$m[2]}-01";
|
|
}
|
|
if (preg_match('/^(\d{4})$/', $raw, $m)) {
|
|
return "{$m[1]}-01-01";
|
|
}
|
|
$ts = strtotime($raw);
|
|
return $ts ? date('Y-m-d', $ts) : '';
|
|
}
|
|
|
|
}
|