schema_extra_types.xml 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. <!--
  2. Edge NGram ASCII String Field
  3. 6.0.0
  4. -->
  5. <fieldType name="text_edgenasciistring" class="solr.TextField" positionIncrementGap="100">
  6. <analyzer type="index">
  7. <tokenizer class="solr.StandardTokenizerFactory"/>
  8. <filter class="solr.LowerCaseFilterFactory"/>
  9. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  10. <filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="25"/>
  11. <filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="true"/>
  12. <filter class="solr.WordDelimiterGraphFilterFactory" preserveOriginal="1"/>
  13. </analyzer>
  14. <analyzer type="query">
  15. <tokenizer class="solr.StandardTokenizerFactory"/>
  16. <filter class="solr.LowerCaseFilterFactory"/>
  17. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  18. <filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="true"/>
  19. <filter class="solr.WordDelimiterGraphFilterFactory" preserveOriginal="1"/>
  20. </analyzer>
  21. </fieldType>
  22. <!--
  23. Edge NGram String Field
  24. 6.0.0
  25. -->
  26. <fieldType name="text_edgenstring" class="solr.TextField" positionIncrementGap="100">
  27. <analyzer type="index">
  28. <tokenizer class="solr.KeywordTokenizerFactory"/>
  29. <filter class="solr.LowerCaseFilterFactory"/>
  30. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  31. <filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="25"/>
  32. </analyzer>
  33. <analyzer type="query">
  34. <tokenizer class="solr.KeywordTokenizerFactory"/>
  35. <filter class="solr.LowerCaseFilterFactory"/>
  36. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  37. </analyzer>
  38. </fieldType>
  39. <!--
  40. Edge NGram Text Field
  41. 7.0.0
  42. -->
  43. <fieldType name="text_edge" class="solr.TextField" positionIncrementGap="100">
  44. <analyzer type="index">
  45. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  46. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  47. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_und.txt"/>
  48. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_und.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  49. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  50. <filter class="solr.LowerCaseFilterFactory"/>
  51. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  52. <filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="25"/>
  53. </analyzer>
  54. <analyzer type="query">
  55. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  56. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  57. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  58. <filter class="solr.LowerCaseFilterFactory"/>
  59. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  60. </analyzer>
  61. </fieldType>
  62. <!--
  63. English Text Field
  64. 7.0.0
  65. -->
  66. <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
  67. <analyzer type="index">
  68. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_en.txt"/>
  69. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  70. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
  71. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_en.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  72. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  73. <filter class="solr.LowerCaseFilterFactory"/>
  74. <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords_en.txt"/>
  75. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  76. </analyzer>
  77. <analyzer type="query">
  78. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_en.txt"/>
  79. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  80. <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms_en.txt" expand="true" ignoreCase="true"/>
  81. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
  82. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" protected="protwords_en.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  83. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  84. <filter class="solr.LowerCaseFilterFactory"/>
  85. <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords_en.txt"/>
  86. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  87. </analyzer>
  88. </fieldType>
  89. <!--
  90. English Text Field collated
  91. 7.0.0
  92. -->
  93. <fieldType name="collated_en" class="solr.ICUCollationField" locale="en" strength="primary" caseLevel="false"/>
  94. <!--
  95. English Text Field unstemmed
  96. 7.0.0
  97. -->
  98. <fieldType name="text_unstemmed_en" class="solr.TextField" positionIncrementGap="100">
  99. <analyzer type="index">
  100. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_en.txt"/>
  101. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  102. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
  103. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_en.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  104. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  105. <filter class="solr.LowerCaseFilterFactory"/>
  106. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  107. </analyzer>
  108. <analyzer type="query">
  109. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_en.txt"/>
  110. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  111. <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms_en.txt" expand="true" ignoreCase="true"/>
  112. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
  113. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" protected="protwords_en.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  114. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  115. <filter class="solr.LowerCaseFilterFactory"/>
  116. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  117. </analyzer>
  118. </fieldType>
  119. <!--
  120. French Text Field
  121. 7.0.0
  122. -->
  123. <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
  124. <analyzer type="index">
  125. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_fr.txt"/>
  126. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  127. <filter class="solr.ElisionFilterFactory"/>
  128. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_fr.txt" splitOnCaseChange="1" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  129. <filter class="solr.LowerCaseFilterFactory"/>
  130. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_fr.txt"/>
  131. <filter class="solr.SnowballPorterFilterFactory" language="French" protected="protwords_fr.txt"/>
  132. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  133. </analyzer>
  134. <analyzer type="query">
  135. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_fr.txt"/>
  136. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  137. <filter class="solr.ElisionFilterFactory"/>
  138. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="0" protected="protwords_fr.txt" splitOnCaseChange="1" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  139. <filter class="solr.LowerCaseFilterFactory"/>
  140. <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms_fr.txt" expand="true" ignoreCase="true"/>
  141. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_fr.txt"/>
  142. <filter class="solr.SnowballPorterFilterFactory" language="French" protected="protwords_fr.txt"/>
  143. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  144. </analyzer>
  145. </fieldType>
  146. <!--
  147. French Text Field collated
  148. 7.0.0
  149. -->
  150. <fieldType name="collated_fr" class="solr.ICUCollationField" locale="fr" strength="primary" caseLevel="false"/>
  151. <!--
  152. French Text Field unstemmed
  153. 7.0.0
  154. -->
  155. <fieldType name="text_unstemmed_fr" class="solr.TextField" positionIncrementGap="100">
  156. <analyzer type="index">
  157. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_fr.txt"/>
  158. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  159. <filter class="solr.ElisionFilterFactory"/>
  160. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_fr.txt" splitOnCaseChange="1" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  161. <filter class="solr.LowerCaseFilterFactory"/>
  162. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_fr.txt"/>
  163. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  164. </analyzer>
  165. <analyzer type="query">
  166. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_fr.txt"/>
  167. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  168. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="0" protected="protwords_fr.txt" splitOnCaseChange="1" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  169. <filter class="solr.LowerCaseFilterFactory"/>
  170. <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms_fr.txt" expand="true" ignoreCase="true"/>
  171. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_fr.txt"/>
  172. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  173. </analyzer>
  174. </fieldType>
  175. <!--
  176. Fulltext Phonetic
  177. 7.0.0
  178. -->
  179. <fieldType name="text_phonetic_und" class="solr.TextField" positionIncrementGap="100">
  180. <analyzer type="index">
  181. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  182. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_und.txt"/>
  183. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_und.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  184. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  185. <filter class="solr.LowerCaseFilterFactory"/>
  186. <filter class="solr.BeiderMorseFilterFactory" languageSet="auto" nameType="GENERIC" ruleType="APPROX" concat="true"/>
  187. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  188. </analyzer>
  189. <analyzer type="query">
  190. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  191. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_und.txt"/>
  192. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" protected="protwords_und.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  193. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  194. <filter class="solr.LowerCaseFilterFactory"/>
  195. <filter class="solr.BeiderMorseFilterFactory" languageSet="auto" nameType="GENERIC" ruleType="APPROX" concat="true"/>
  196. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  197. </analyzer>
  198. </fieldType>
  199. <!--
  200. Fulltext Phonetic English
  201. 7.0.0
  202. -->
  203. <fieldType name="text_phonetic_en" class="solr.TextField" positionIncrementGap="100">
  204. <analyzer type="index">
  205. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  206. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
  207. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_en.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  208. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  209. <filter class="solr.LowerCaseFilterFactory"/>
  210. <filter class="solr.BeiderMorseFilterFactory" languageSet="english" nameType="GENERIC" ruleType="APPROX" concat="true"/>
  211. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  212. </analyzer>
  213. <analyzer type="query">
  214. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  215. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
  216. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" protected="protwords_en.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  217. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  218. <filter class="solr.LowerCaseFilterFactory"/>
  219. <filter class="solr.BeiderMorseFilterFactory" languageSet="english" nameType="GENERIC" ruleType="APPROX" concat="true"/>
  220. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  221. </analyzer>
  222. </fieldType>
  223. <!--
  224. Fulltext Phonetic French
  225. 7.0.0
  226. -->
  227. <fieldType name="text_phonetic_fr" class="solr.TextField" positionIncrementGap="100">
  228. <analyzer type="index">
  229. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  230. <filter class="solr.ElisionFilterFactory"/>
  231. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_fr.txt" splitOnCaseChange="1" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  232. <filter class="solr.LowerCaseFilterFactory"/>
  233. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_fr.txt"/>
  234. <filter class="solr.BeiderMorseFilterFactory" languageSet="french" nameType="GENERIC" ruleType="APPROX" concat="true"/>
  235. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  236. </analyzer>
  237. <analyzer type="query">
  238. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  239. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="0" protected="protwords_fr.txt" splitOnCaseChange="1" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  240. <filter class="solr.LowerCaseFilterFactory"/>
  241. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_fr.txt"/>
  242. <filter class="solr.BeiderMorseFilterFactory" languageSet="french" nameType="GENERIC" ruleType="APPROX" concat="true"/>
  243. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  244. </analyzer>
  245. </fieldType>
  246. <!--
  247. Language Undefined Text Field
  248. 7.0.0
  249. -->
  250. <fieldType name="text_und" class="solr.TextField" positionIncrementGap="100">
  251. <analyzer type="index">
  252. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  253. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  254. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_und.txt"/>
  255. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_und.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  256. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  257. <filter class="solr.LowerCaseFilterFactory"/>
  258. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  259. </analyzer>
  260. <analyzer type="query">
  261. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  262. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  263. <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms_und.txt" expand="true" ignoreCase="true"/>
  264. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_und.txt"/>
  265. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" protected="protwords_und.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  266. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  267. <filter class="solr.LowerCaseFilterFactory"/>
  268. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  269. </analyzer>
  270. </fieldType>
  271. <!--
  272. Language Undefined Text Field spellcheck
  273. 7.0.0
  274. -->
  275. <fieldType name="text_spell_und" class="solr.TextField" positionIncrementGap="100">
  276. <analyzer>
  277. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  278. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  279. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  280. <filter class="solr.LowerCaseFilterFactory"/>
  281. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  282. </analyzer>
  283. </fieldType>
  284. <!--
  285. Language Undefined Text Field collated
  286. 7.0.0
  287. -->
  288. <fieldType name="collated_und" class="solr.ICUCollationField" locale="en" strength="primary" caseLevel="false"/>
  289. <!--
  290. NGram String Field
  291. 6.0.0
  292. -->
  293. <fieldType name="text_ngramstring" class="solr.TextField" positionIncrementGap="100">
  294. <analyzer type="index">
  295. <tokenizer class="solr.KeywordTokenizerFactory"/>
  296. <filter class="solr.LowerCaseFilterFactory"/>
  297. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  298. <filter class="solr.NGramFilterFactory" minGramSize="2" maxGramSize="25"/>
  299. </analyzer>
  300. <analyzer type="query">
  301. <tokenizer class="solr.KeywordTokenizerFactory"/>
  302. <filter class="solr.LowerCaseFilterFactory"/>
  303. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  304. </analyzer>
  305. </fieldType>
  306. <!--
  307. NGram Text Field
  308. 7.0.0
  309. -->
  310. <fieldType name="text_ngram" class="solr.TextField" positionIncrementGap="100">
  311. <analyzer type="index">
  312. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  313. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  314. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_und.txt"/>
  315. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_und.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  316. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  317. <filter class="solr.LowerCaseFilterFactory"/>
  318. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  319. <filter class="solr.NGramFilterFactory" minGramSize="2" maxGramSize="25"/>
  320. </analyzer>
  321. <analyzer type="query">
  322. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  323. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  324. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  325. <filter class="solr.LowerCaseFilterFactory"/>
  326. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  327. </analyzer>
  328. </fieldType>