schema_extra_types.xml 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. <!--
  2. Edge NGram String Field
  3. 6.0.0
  4. -->
  5. <fieldType name="text_edgenstring" class="solr.TextField" positionIncrementGap="100">
  6. <analyzer type="index">
  7. <tokenizer class="solr.KeywordTokenizerFactory"/>
  8. <filter class="solr.LowerCaseFilterFactory"/>
  9. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  10. <filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="25"/>
  11. </analyzer>
  12. <analyzer type="query">
  13. <tokenizer class="solr.KeywordTokenizerFactory"/>
  14. <filter class="solr.LowerCaseFilterFactory"/>
  15. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  16. </analyzer>
  17. </fieldType>
  18. <!--
  19. Edge NGram Text Field
  20. 7.0.0
  21. -->
  22. <fieldType name="text_edge" class="solr.TextField" positionIncrementGap="100">
  23. <analyzer type="index">
  24. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  25. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  26. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_und.txt"/>
  27. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_und.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  28. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  29. <filter class="solr.LowerCaseFilterFactory"/>
  30. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  31. <filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="25"/>
  32. </analyzer>
  33. <analyzer type="query">
  34. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  35. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  36. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  37. <filter class="solr.LowerCaseFilterFactory"/>
  38. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  39. </analyzer>
  40. </fieldType>
  41. <!--
  42. English Text Field
  43. 7.0.0
  44. -->
  45. <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
  46. <analyzer type="index">
  47. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_en.txt"/>
  48. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  49. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
  50. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_en.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  51. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  52. <filter class="solr.LowerCaseFilterFactory"/>
  53. <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords_en.txt"/>
  54. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  55. </analyzer>
  56. <analyzer type="query">
  57. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_en.txt"/>
  58. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  59. <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms_en.txt" expand="true" ignoreCase="true"/>
  60. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
  61. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" protected="protwords_en.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  62. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  63. <filter class="solr.LowerCaseFilterFactory"/>
  64. <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords_en.txt"/>
  65. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  66. </analyzer>
  67. </fieldType>
  68. <!--
  69. English Text Field collated
  70. 7.0.0
  71. -->
  72. <fieldType name="collated_en" class="solr.ICUCollationField" locale="en" strength="primary" caseLevel="false"/>
  73. <!--
  74. English Text Field unstemmed
  75. 7.0.0
  76. -->
  77. <fieldType name="text_unstemmed_en" class="solr.TextField" positionIncrementGap="100">
  78. <analyzer type="index">
  79. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_en.txt"/>
  80. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  81. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
  82. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_en.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  83. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  84. <filter class="solr.LowerCaseFilterFactory"/>
  85. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  86. </analyzer>
  87. <analyzer type="query">
  88. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_en.txt"/>
  89. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  90. <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms_en.txt" expand="true" ignoreCase="true"/>
  91. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
  92. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" protected="protwords_en.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  93. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  94. <filter class="solr.LowerCaseFilterFactory"/>
  95. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  96. </analyzer>
  97. </fieldType>
  98. <!--
  99. French Text Field
  100. 7.0.0
  101. -->
  102. <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
  103. <analyzer type="index">
  104. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_fr.txt"/>
  105. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  106. <filter class="solr.ElisionFilterFactory"/>
  107. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_fr.txt" splitOnCaseChange="1" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  108. <filter class="solr.LowerCaseFilterFactory"/>
  109. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_fr.txt"/>
  110. <filter class="solr.SnowballPorterFilterFactory" language="French" protected="protwords_fr.txt"/>
  111. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  112. </analyzer>
  113. <analyzer type="query">
  114. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_fr.txt"/>
  115. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  116. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="0" protected="protwords_fr.txt" splitOnCaseChange="1" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  117. <filter class="solr.LowerCaseFilterFactory"/>
  118. <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms_fr.txt" expand="true" ignoreCase="true"/>
  119. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_fr.txt"/>
  120. <filter class="solr.SnowballPorterFilterFactory" language="French" protected="protwords_fr.txt"/>
  121. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  122. </analyzer>
  123. </fieldType>
  124. <!--
  125. French Text Field collated
  126. 7.0.0
  127. -->
  128. <fieldType name="collated_fr" class="solr.ICUCollationField" locale="fr" strength="primary" caseLevel="false"/>
  129. <!--
  130. French Text Field unstemmed
  131. 7.0.0
  132. -->
  133. <fieldType name="text_unstemmed_fr" class="solr.TextField" positionIncrementGap="100">
  134. <analyzer type="index">
  135. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_fr.txt"/>
  136. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  137. <filter class="solr.ElisionFilterFactory"/>
  138. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_fr.txt" splitOnCaseChange="1" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  139. <filter class="solr.LowerCaseFilterFactory"/>
  140. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_fr.txt"/>
  141. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  142. </analyzer>
  143. <analyzer type="query">
  144. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_fr.txt"/>
  145. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  146. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="0" protected="protwords_fr.txt" splitOnCaseChange="1" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  147. <filter class="solr.LowerCaseFilterFactory"/>
  148. <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms_fr.txt" expand="true" ignoreCase="true"/>
  149. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_fr.txt"/>
  150. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  151. </analyzer>
  152. </fieldType>
  153. <!--
  154. Fulltext Phonetic
  155. 7.0.0
  156. -->
  157. <fieldType name="text_phonetic_und" class="solr.TextField" positionIncrementGap="100">
  158. <analyzer type="index">
  159. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  160. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_und.txt"/>
  161. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_und.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  162. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  163. <filter class="solr.LowerCaseFilterFactory"/>
  164. <filter class="solr.BeiderMorseFilterFactory" languageSet="auto" nameType="GENERIC" ruleType="APPROX" concat="true"/>
  165. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  166. </analyzer>
  167. <analyzer type="query">
  168. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  169. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_und.txt"/>
  170. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" protected="protwords_und.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  171. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  172. <filter class="solr.LowerCaseFilterFactory"/>
  173. <filter class="solr.BeiderMorseFilterFactory" languageSet="auto" nameType="GENERIC" ruleType="APPROX" concat="true"/>
  174. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  175. </analyzer>
  176. </fieldType>
  177. <!--
  178. Fulltext Phonetic English
  179. 7.0.0
  180. -->
  181. <fieldType name="text_phonetic_en" class="solr.TextField" positionIncrementGap="100">
  182. <analyzer type="index">
  183. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  184. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
  185. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_en.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  186. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  187. <filter class="solr.LowerCaseFilterFactory"/>
  188. <filter class="solr.BeiderMorseFilterFactory" languageSet="english" nameType="GENERIC" ruleType="APPROX" concat="true"/>
  189. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  190. </analyzer>
  191. <analyzer type="query">
  192. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  193. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
  194. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" protected="protwords_en.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  195. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  196. <filter class="solr.LowerCaseFilterFactory"/>
  197. <filter class="solr.BeiderMorseFilterFactory" languageSet="english" nameType="GENERIC" ruleType="APPROX" concat="true"/>
  198. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  199. </analyzer>
  200. </fieldType>
  201. <!--
  202. Fulltext Phonetic French
  203. 7.0.0
  204. -->
  205. <fieldType name="text_phonetic_fr" class="solr.TextField" positionIncrementGap="100">
  206. <analyzer type="index">
  207. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  208. <filter class="solr.ElisionFilterFactory"/>
  209. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_fr.txt" splitOnCaseChange="1" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  210. <filter class="solr.LowerCaseFilterFactory"/>
  211. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_fr.txt"/>
  212. <filter class="solr.BeiderMorseFilterFactory" languageSet="french" nameType="GENERIC" ruleType="APPROX" concat="true"/>
  213. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  214. </analyzer>
  215. <analyzer type="query">
  216. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  217. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="0" protected="protwords_fr.txt" splitOnCaseChange="1" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  218. <filter class="solr.LowerCaseFilterFactory"/>
  219. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_fr.txt"/>
  220. <filter class="solr.BeiderMorseFilterFactory" languageSet="french" nameType="GENERIC" ruleType="APPROX" concat="true"/>
  221. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  222. </analyzer>
  223. </fieldType>
  224. <!--
  225. Language Undefined Text Field
  226. 7.0.0
  227. -->
  228. <fieldType name="text_und" class="solr.TextField" positionIncrementGap="100">
  229. <analyzer type="index">
  230. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  231. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  232. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_und.txt"/>
  233. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_und.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  234. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  235. <filter class="solr.LowerCaseFilterFactory"/>
  236. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  237. </analyzer>
  238. <analyzer type="query">
  239. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  240. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  241. <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms_und.txt" expand="true" ignoreCase="true"/>
  242. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_und.txt"/>
  243. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" protected="protwords_und.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  244. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  245. <filter class="solr.LowerCaseFilterFactory"/>
  246. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  247. </analyzer>
  248. </fieldType>
  249. <!--
  250. Language Undefined Text Field spellcheck
  251. 7.0.0
  252. -->
  253. <fieldType name="text_spell_und" class="solr.TextField" positionIncrementGap="100">
  254. <analyzer>
  255. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  256. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  257. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  258. <filter class="solr.LowerCaseFilterFactory"/>
  259. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  260. </analyzer>
  261. </fieldType>
  262. <!--
  263. Language Undefined Text Field collated
  264. 7.0.0
  265. -->
  266. <fieldType name="collated_und" class="solr.ICUCollationField" locale="en" strength="primary" caseLevel="false"/>
  267. <!--
  268. NGram String Field
  269. 6.0.0
  270. -->
  271. <fieldType name="text_ngramstring" class="solr.TextField" positionIncrementGap="100">
  272. <analyzer type="index">
  273. <tokenizer class="solr.KeywordTokenizerFactory"/>
  274. <filter class="solr.LowerCaseFilterFactory"/>
  275. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  276. <filter class="solr.NGramFilterFactory" minGramSize="2" maxGramSize="25"/>
  277. </analyzer>
  278. <analyzer type="query">
  279. <tokenizer class="solr.KeywordTokenizerFactory"/>
  280. <filter class="solr.LowerCaseFilterFactory"/>
  281. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  282. </analyzer>
  283. </fieldType>
  284. <!--
  285. NGram Text Field
  286. 7.0.0
  287. -->
  288. <fieldType name="text_ngram" class="solr.TextField" positionIncrementGap="100">
  289. <analyzer type="index">
  290. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  291. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  292. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_und.txt"/>
  293. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_und.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  294. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  295. <filter class="solr.LowerCaseFilterFactory"/>
  296. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  297. <filter class="solr.NGramFilterFactory" minGramSize="2" maxGramSize="25"/>
  298. </analyzer>
  299. <analyzer type="query">
  300. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  301. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  302. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  303. <filter class="solr.LowerCaseFilterFactory"/>
  304. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  305. </analyzer>
  306. </fieldType>