commit 7227613354ebb47a6a9a5bcd8d9ff3ab37fadcb8 Author: Bachir Soussi Chiadmi Date: Tue May 28 10:36:12 2019 +0200 first core draft diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..dadba7d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*/data diff --git a/materio/conf/accents_en.txt b/materio/conf/accents_en.txt new file mode 100644 index 0000000..bed0514 --- /dev/null +++ b/materio/conf/accents_en.txt @@ -0,0 +1,164 @@ +# À => A +"\u00C0" => "A" +# Á => A +"\u00C1" => "A" +#  => A +"\u00C2" => "A" +# à => A +"\u00C3" => "A" +# Ä => A +"\u00C4" => "A" +# Å => A +"\u00C5" => "A" +# Ą => A +"\u0104" => "A" +# Æ => AE +"\u00C6" => "AE" +# Ç => C +"\u00C7" => "C" +# Ć => C +"\U0106" => "C" +# È => E +"\u00C8" => "E" +# É => E +"\u00C9" => "E" +# Ê => E +"\u00CA" => "E" +# Ë => E +"\u00CB" => "E" +# Ę => E +"\u0118" => "E" +# Ì => I +"\u00CC" => "I" +# Í => I +"\u00CD" => "I" +# Î => I +"\u00CE" => "I" +# Ï => I +"\u00CF" => "I" +# IJ => IJ +"\u0132" => "IJ" +# Ð => D +"\u00D0" => "D" +# Ł => L +"\u0141" => "L" +# Ñ => N +"\u00D1" => "N" +# Ń => N +"\u0143" => "N" +# Ò => O +"\u00D2" => "O" +# Ó => O +"\u00D3" => "O" +# Ô => O +"\u00D4" => "O" +# Õ => O +"\u00D5" => "O" +# Ö => O +"\u00D6" => "O" +# Ø => O +"\u00D8" => "O" +# Œ => OE +"\u0152" => "OE" +# Þ +"\u00DE" => "TH" +# Ù => U +"\u00D9" => "U" +# Ú => U +"\u00DA" => "U" +# Û => U +"\u00DB" => "U" +# Ü => U +"\u00DC" => "U" +# Ý => Y +"\u00DD" => "Y" +# Ÿ => Y +"\u0178" => "Y" +# à => a +"\u00E0" => "a" +# á => a +"\u00E1" => "a" +# â => a +"\u00E2" => "a" +# ã => a +"\u00E3" => "a" +# ä => a +"\u00E4" => "a" +# å => a +"\u00E5" => "a" +# æ => ae +"\u00E6" => "ae" +# ç => c +"\u00E7" => "c" +# è => e +"\u00E8" => "e" +# é => e +"\u00E9" => "e" +# ê => e +"\u00EA" => "e" +# ë => e +"\u00EB" => "e" +# ì => i +"\u00EC" => "i" +# í => i +"\u00ED" => "i" +# î => i +"\u00EE" => "i" +# ï => i +"\u00EF" => "i" +# ij => ij +"\u0133" => "ij" +# ð => d +"\u00F0" => "d" +# ñ => n +"\u00F1" => "n" +# ò => o +"\u00F2" => "o" +# ó => o +"\u00F3" => "o" +# ô => o +"\u00F4" => "o" +# õ => o +"\u00F5" => "o" +# ö => o +"\u00F6" => "o" +# ø => o +"\u00F8" => "o" +# œ => oe +"\u0153" => "oe" +# ß => ss +"\u00DF" => "ss" +# Ś => S +"\u015a" => "S" +# þ => th +"\u00FE" => "th" +# ù => u +"\u00F9" => "u" +# ú => u +"\u00FA" => "u" +# û => u +"\u00FB" => "u" +# ü => u +"\u00FC" => "u" +# ý => y +"\u00FD" => "y" +# ÿ => y +"\u00FF" => "y" +# Ź => Z +"\u0179" => "Z" +# Ż => Z +"\u017b" => "Z" +# ff => ff +"\uFB00" => "ff" +# fi => fi +"\uFB01" => "fi" +# fl => fl +"\uFB02" => "fl" +# ffi => ffi +"\uFB03" => "ffi" +# ffl => ffl +"\uFB04" => "ffl" +# ſt => st +"\uFB05" => "st" +# st => st +"\uFB06" => "st" diff --git a/materio/conf/accents_fr.txt b/materio/conf/accents_fr.txt new file mode 100644 index 0000000..47de7e2 --- /dev/null +++ b/materio/conf/accents_fr.txt @@ -0,0 +1,164 @@ +# À => A +#"\u00C0" => "A" +# Á => A +#"\u00C1" => "A" +#  => A +#"\u00C2" => "A" +# à => A +"\u00C3" => "A" +# Ä => A +"\u00C4" => "A" +# Å => A +"\u00C5" => "A" +# Ą => A +"\u0104" => "A" +# Æ => AE +"\u00C6" => "AE" +# Ç => C +#"\u00C7" => "C" +# Ć => C +"\U0106" => "C" +# È => E +#"\u00C8" => "E" +# É => E +#"\u00C9" => "E" +# Ê => E +#"\u00CA" => "E" +# Ë => E +"\u00CB" => "E" +# Ę => E +"\u0118" => "E" +# Ì => I +#"\u00CC" => "I" +# Í => I +#"\u00CD" => "I" +# Î => I +#"\u00CE" => "I" +# Ï => I +"\u00CF" => "I" +# IJ => IJ +"\u0132" => "IJ" +# Ð => D +"\u00D0" => "D" +# Ł => L +"\u0141" => "L" +# Ñ => N +"\u00D1" => "N" +# Ń => N +"\u0143" => "N" +# Ò => O +#"\u00D2" => "O" +# Ó => O +#"\u00D3" => "O" +# Ô => O +#"\u00D4" => "O" +# Õ => O +"\u00D5" => "O" +# Ö => O +"\u00D6" => "O" +# Ø => O +"\u00D8" => "O" +# Œ => OE +"\u0152" => "OE" +# Þ +"\u00DE" => "TH" +# Ù => U +#"\u00D9" => "U" +# Ú => U +#"\u00DA" => "U" +# Û => U +#"\u00DB" => "U" +# Ü => U +"\u00DC" => "U" +# Ý => Y +#"\u00DD" => "Y" +# Ÿ => Y +"\u0178" => "Y" +# à => a +#"\u00E0" => "a" +# á => a +#"\u00E1" => "a" +# â => a +#"\u00E2" => "a" +# ã => a +"\u00E3" => "a" +# ä => a +"\u00E4" => "a" +# å => a +"\u00E5" => "a" +# æ => ae +"\u00E6" => "ae" +# ç => c +#"\u00E7" => "c" +# è => e +#"\u00E8" => "e" +# é => e +#"\u00E9" => "e" +# ê => e +#"\u00EA" => "e" +# ë => e +"\u00EB" => "e" +# ì => i +#"\u00EC" => "i" +# í => i +#"\u00ED" => "i" +# î => i +#"\u00EE" => "i" +# ï => i +"\u00EF" => "i" +# ij => ij +"\u0133" => "ij" +# ð => d +"\u00F0" => "d" +# ñ => n +"\u00F1" => "n" +# ò => o +#"\u00F2" => "o" +# ó => o +#"\u00F3" => "o" +# ô => o +#"\u00F4" => "o" +# õ => o +"\u00F5" => "o" +# ö => o +"\u00F6" => "o" +# ø => o +"\u00F8" => "o" +# œ => oe +"\u0153" => "oe" +# ß => ss +"\u00DF" => "ss" +# Ś => S +"\u015a" => "S" +# þ => th +"\u00FE" => "th" +# ù => u +#"\u00F9" => "u" +# ú => u +#"\u00FA" => "u" +# û => u +#"\u00FB" => "u" +# ü => u +"\u00FC" => "u" +# ý => y +#"\u00FD" => "y" +# ÿ => y +"\u00FF" => "y" +# Ź => Z +"\u0179" => "Z" +# Ż => Z +"\u017b" => "Z" +# ff => ff +"\uFB00" => "ff" +# fi => fi +"\uFB01" => "fi" +# fl => fl +"\uFB02" => "fl" +# ffi => ffi +"\uFB03" => "ffi" +# ffl => ffl +"\uFB04" => "ffl" +# ſt => st +"\uFB05" => "st" +# st => st +"\uFB06" => "st" diff --git a/materio/conf/accents_und.txt b/materio/conf/accents_und.txt new file mode 100644 index 0000000..7c883f8 --- /dev/null +++ b/materio/conf/accents_und.txt @@ -0,0 +1,148 @@ +# À => A +"\u00C0" => "A" +# Á => A +"\u00C1" => "A" +#  => A +"\u00C2" => "A" +# à => A +"\u00C3" => "A" +# Ä => A +"\u00C4" => "A" +# Å => A +"\u00C5" => "A" +# Æ => AE +"\u00C6" => "AE" +# Ç => C +"\u00C7" => "C" +# È => E +"\u00C8" => "E" +# É => E +"\u00C9" => "E" +# Ê => E +"\u00CA" => "E" +# Ë => E +"\u00CB" => "E" +# Ì => I +"\u00CC" => "I" +# Í => I +"\u00CD" => "I" +# Î => I +"\u00CE" => "I" +# Ï => I +"\u00CF" => "I" +# IJ => IJ +"\u0132" => "IJ" +# Ð => D +"\u00D0" => "D" +# Ñ => N +"\u00D1" => "N" +# Ò => O +"\u00D2" => "O" +# Ó => O +"\u00D3" => "O" +# Ô => O +"\u00D4" => "O" +# Õ => O +"\u00D5" => "O" +# Ö => O +"\u00D6" => "O" +# Ø => O +"\u00D8" => "O" +# Œ => OE +"\u0152" => "OE" +# Þ +"\u00DE" => "TH" +# Ù => U +"\u00D9" => "U" +# Ú => U +"\u00DA" => "U" +# Û => U +"\u00DB" => "U" +# Ü => U +"\u00DC" => "U" +# Ý => Y +"\u00DD" => "Y" +# Ÿ => Y +"\u0178" => "Y" +# à => a +"\u00E0" => "a" +# á => a +"\u00E1" => "a" +# â => a +"\u00E2" => "a" +# ã => a +"\u00E3" => "a" +# ä => a +"\u00E4" => "a" +# å => a +"\u00E5" => "a" +# æ => ae +"\u00E6" => "ae" +# ç => c +"\u00E7" => "c" +# è => e +"\u00E8" => "e" +# é => e +"\u00E9" => "e" +# ê => e +"\u00EA" => "e" +# ë => e +"\u00EB" => "e" +# ì => i +"\u00EC" => "i" +# í => i +"\u00ED" => "i" +# î => i +"\u00EE" => "i" +# ï => i +"\u00EF" => "i" +# ij => ij +"\u0133" => "ij" +# ð => d +"\u00F0" => "d" +# ñ => n +"\u00F1" => "n" +# ò => o +"\u00F2" => "o" +# ó => o +"\u00F3" => "o" +# ô => o +"\u00F4" => "o" +# õ => o +"\u00F5" => "o" +# ö => o +"\u00F6" => "o" +# ø => o +"\u00F8" => "o" +# œ => oe +"\u0153" => "oe" +# ß => ss +"\u00DF" => "ss" +# þ => th +"\u00FE" => "th" +# ù => u +"\u00F9" => "u" +# ú => u +"\u00FA" => "u" +# û => u +"\u00FB" => "u" +# ü => u +"\u00FC" => "u" +# ý => y +"\u00FD" => "y" +# ÿ => y +"\u00FF" => "y" +# ff => ff +"\uFB00" => "ff" +# fi => fi +"\uFB01" => "fi" +# fl => fl +"\uFB02" => "fl" +# ffi => ffi +"\uFB03" => "ffi" +# ffl => ffl +"\uFB04" => "ffl" +# ſt => st +"\uFB05" => "st" +# st => st +"\uFB06" => "st" diff --git a/materio/conf/elevate.xml b/materio/conf/elevate.xml new file mode 100644 index 0000000..193a0e7 --- /dev/null +++ b/materio/conf/elevate.xml @@ -0,0 +1,27 @@ + + + + + + + + + + diff --git a/materio/conf/nouns_fr.txt b/materio/conf/nouns_fr.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/materio/conf/nouns_fr.txt @@ -0,0 +1 @@ + diff --git a/materio/conf/protwords_en.txt b/materio/conf/protwords_en.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/materio/conf/protwords_en.txt @@ -0,0 +1 @@ + diff --git a/materio/conf/protwords_fr.txt b/materio/conf/protwords_fr.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/materio/conf/protwords_fr.txt @@ -0,0 +1 @@ + diff --git a/materio/conf/protwords_und.txt b/materio/conf/protwords_und.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/materio/conf/protwords_und.txt @@ -0,0 +1 @@ + diff --git a/materio/conf/schema.xml b/materio/conf/schema.xml new file mode 100644 index 0000000..c8b0037 --- /dev/null +++ b/materio/conf/schema.xml @@ -0,0 +1,474 @@ + + + + +]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + &extrafields; + + + &extratypes; + + + id + + + + + diff --git a/materio/conf/schema_extra_fields.xml b/materio/conf/schema_extra_fields.xml new file mode 100644 index 0000000..bf5b300 --- /dev/null +++ b/materio/conf/schema_extra_fields.xml @@ -0,0 +1,102 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/materio/conf/schema_extra_types.xml b/materio/conf/schema_extra_types.xml new file mode 100644 index 0000000..7dc1dc7 --- /dev/null +++ b/materio/conf/schema_extra_types.xml @@ -0,0 +1,306 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/materio/conf/solrconfig.xml b/materio/conf/solrconfig.xml new file mode 100644 index 0000000..49939a5 --- /dev/null +++ b/materio/conf/solrconfig.xml @@ -0,0 +1,1390 @@ + + + + +]> + + + + + + + ${solr.abortOnConfigurationError:true} + + + ${solr.luceneMatchVersion:LUCENE_70} + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + + + + + ${solr.hdfs.home:} + + ${solr.hdfs.confdir:} + + ${solr.hdfs.blockcache.enabled:true} + + ${solr.hdfs.blockcache.global:true} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.lock.type:native} + + + + + + + + + + + + + true + + + &index; + + + + + + + + + ${solr.ulog.dir:} + + + + + ${solr.autoCommit.MaxDocs:-1} + ${solr.autoCommit.MaxTime:15000} + false + + + + + + ${solr.autoSoftCommit.MaxDocs:-1} + ${solr.autoSoftCommit.MaxTime:-1} + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + + + + + + + + + + + + + + + + + + + + + + lucene + id + explicit + true + + ${solr.selectSearchHandler.timeAllowed:-1} + + false + + + spellcheck + elevator + + + + + + + explicit + json + true + text + + + + + + + + + + content + 1 + 1 + 3 + 15 + 20 + false + + ${solr.mlt.timeAllowed:2000} + + + + + + + content + explicit + true + + + + + + text + + + + + + + true + ignored_ + + + true + links + ignored_ + + + + + + + + + + + + + + + explicit + true + + + + + + + ${solr.replication.master:false} + commit + startup + ${solr.replication.confFiles:schema.xml,elevate.xml} + + + ${solr.replication.slave:false} + ${solr.replication.masterUrl:http://localhost:8983/solr}/replication + ${solr.replication.pollInterval:00:00:60} + + + + + + + true + json + true + + + + + + + &extra; + + + + + spell + und + on + false + false + 1 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + true + und + 10 + + + suggest + + + + + + + + + text + true + + + tvComponent + + + + + + + + + + true + false + + + terms + + + + + + + false + false + false + true + false + 1 + false + 10 + + + terms + spellcheck + suggest + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 5 + + + + + + + + + + + + + + + diff --git a/materio/conf/solrconfig_extra.xml b/materio/conf/solrconfig_extra.xml new file mode 100644 index 0000000..44d3ffb --- /dev/null +++ b/materio/conf/solrconfig_extra.xml @@ -0,0 +1,80 @@ + + + en + spellcheck_en + solr.DirectSolrSpellChecker + internal + 0.5 + 2 + 1 + 5 + 4 + 0.01 + .01 + true + + + + fr + spellcheck_fr + solr.DirectSolrSpellChecker + internal + 0.5 + 2 + 1 + 5 + 4 + 0.01 + .01 + true + + + + und + spellcheck_und + solr.DirectSolrSpellChecker + internal + 0.5 + 2 + 1 + 5 + 4 + 0.01 + .01 + true + + + + + en + AnalyzingInfixLookupFactory + DocumentDictionaryFactory + twm_suggest + text_en + sm_context_tags + true + false + + + + fr + AnalyzingInfixLookupFactory + DocumentDictionaryFactory + twm_suggest + text_fr + sm_context_tags + true + false + + + + und + AnalyzingInfixLookupFactory + DocumentDictionaryFactory + twm_suggest + text_und + sm_context_tags + true + false + + diff --git a/materio/conf/solrconfig_index.xml b/materio/conf/solrconfig_index.xml new file mode 100644 index 0000000..e69de29 diff --git a/materio/conf/solrcore.properties b/materio/conf/solrcore.properties new file mode 100644 index 0000000..02a6d36 --- /dev/null +++ b/materio/conf/solrcore.properties @@ -0,0 +1,13 @@ +solr.replication.master=false +solr.replication.slave=false +solr.replication.pollInterval=00:00:60 +solr.replication.masterUrl=http://localhost:8983/solr +solr.replication.confFiles=schema.xml,elevate.xml,stopwords_en.txt,synonyms_en.txt,protwords_en.txt,accents_en.txt,stopwords_fr.txt,synonyms_fr.txt,nouns_fr.txt,protwords_fr.txt,accents_fr.txt,stopwords_und.txt,synonyms_und.txt,protwords_und.txt,accents_und.txt +solr.mlt.timeAllowed=2000 +solr.luceneMatchVersion=8.0 +solr.selectSearchHandler.timeAllowed=-1 +solr.autoCommit.MaxDocs=-1 +solr.autoCommit.MaxTime=15000 +solr.autoSoftCommit.MaxDocs=-1 +solr.autoSoftCommit.MaxTime=-1 +solr.install.dir=/opt/solr/ diff --git a/materio/conf/stopwords_en.txt b/materio/conf/stopwords_en.txt new file mode 100644 index 0000000..6981050 --- /dev/null +++ b/materio/conf/stopwords_en.txt @@ -0,0 +1,35 @@ +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +s +such +t +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/materio/conf/stopwords_fr.txt b/materio/conf/stopwords_fr.txt new file mode 100644 index 0000000..c78ec5a --- /dev/null +++ b/materio/conf/stopwords_fr.txt @@ -0,0 +1,163 @@ +au +aux +avec +ce +ces +dans +de +des +du +elle +en +et +eux +il +je +la +le +leur +lui +ma +mais +me +même +mes +moi +mon +ne +nos +notre +nous +on +ou +par +pas +pour +qu +que +qui +sa +se +ses +son +sur +ta +te +tes +toi +ton +tu +un +une +vos +votre +vous +c +d +j +l +à +m +n +s +t +y +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent +ceci +celà +cet +cette +ici +ils +les +leurs +quel +quels +quelle +quelles +sans +soi diff --git a/materio/conf/stopwords_und.txt b/materio/conf/stopwords_und.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/materio/conf/stopwords_und.txt @@ -0,0 +1 @@ + diff --git a/materio/conf/synonyms_en.txt b/materio/conf/synonyms_en.txt new file mode 100644 index 0000000..91689ff --- /dev/null +++ b/materio/conf/synonyms_en.txt @@ -0,0 +1 @@ +drupal, durpal diff --git a/materio/conf/synonyms_fr.txt b/materio/conf/synonyms_fr.txt new file mode 100644 index 0000000..91689ff --- /dev/null +++ b/materio/conf/synonyms_fr.txt @@ -0,0 +1 @@ +drupal, durpal diff --git a/materio/conf/synonyms_und.txt b/materio/conf/synonyms_und.txt new file mode 100644 index 0000000..91689ff --- /dev/null +++ b/materio/conf/synonyms_und.txt @@ -0,0 +1 @@ +drupal, durpal diff --git a/materio/core.properties b/materio/core.properties new file mode 100644 index 0000000..3a7b8ec --- /dev/null +++ b/materio/core.properties @@ -0,0 +1 @@ +name=materio