schema_extra_types.xml 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. <!--
  2. Edge NGram String Field
  3. 6.0.0
  4. -->
  5. <fieldType name="text_edgenstring" class="solr.TextField" positionIncrementGap="100">
  6. <analyzer type="index">
  7. <tokenizer class="solr.KeywordTokenizerFactory"/>
  8. <filter class="solr.LowerCaseFilterFactory"/>
  9. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  10. <filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="25"/>
  11. </analyzer>
  12. <analyzer type="query">
  13. <tokenizer class="solr.KeywordTokenizerFactory"/>
  14. <filter class="solr.LowerCaseFilterFactory"/>
  15. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  16. </analyzer>
  17. </fieldType>
  18. <!--
  19. Edge NGram Text Field
  20. 7.0.0
  21. -->
  22. <fieldType name="text_edge" class="solr.TextField" positionIncrementGap="100">
  23. <analyzer type="index">
  24. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  25. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  26. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_und.txt"/>
  27. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_und.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  28. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  29. <filter class="solr.LowerCaseFilterFactory"/>
  30. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  31. <filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="25"/>
  32. </analyzer>
  33. <analyzer type="query">
  34. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  35. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  36. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  37. <filter class="solr.LowerCaseFilterFactory"/>
  38. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  39. </analyzer>
  40. </fieldType>
  41. <!--
  42. English Text Field
  43. 7.0.0
  44. -->
  45. <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
  46. <analyzer type="index">
  47. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_en.txt"/>
  48. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  49. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
  50. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_en.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  51. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  52. <filter class="solr.LowerCaseFilterFactory"/>
  53. <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords_en.txt"/>
  54. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  55. </analyzer>
  56. <analyzer type="query">
  57. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_en.txt"/>
  58. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  59. <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms_en.txt" expand="true" ignoreCase="true"/>
  60. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
  61. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" protected="protwords_en.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  62. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  63. <filter class="solr.LowerCaseFilterFactory"/>
  64. <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords_en.txt"/>
  65. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  66. </analyzer>
  67. </fieldType>
  68. <!--
  69. English Text Field collated
  70. 7.0.0
  71. -->
  72. <fieldType name="collated_en" class="solr.ICUCollationField" locale="en" strength="primary" caseLevel="false"/>
  73. <!--
  74. English Text Field unstemmed
  75. 7.0.0
  76. -->
  77. <fieldType name="text_unstemmed_en" class="solr.TextField" positionIncrementGap="100">
  78. <analyzer type="index">
  79. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_en.txt"/>
  80. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  81. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
  82. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_en.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  83. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  84. <filter class="solr.LowerCaseFilterFactory"/>
  85. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  86. </analyzer>
  87. <analyzer type="query">
  88. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_en.txt"/>
  89. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  90. <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms_en.txt" expand="true" ignoreCase="true"/>
  91. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
  92. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" protected="protwords_en.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  93. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  94. <filter class="solr.LowerCaseFilterFactory"/>
  95. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  96. </analyzer>
  97. </fieldType>
  98. <!--
  99. French Text Field
  100. 7.0.0
  101. -->
  102. <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
  103. <analyzer type="index">
  104. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_fr.txt"/>
  105. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  106. <filter class="solr.ElisionFilterFactory"/>
  107. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_fr.txt" splitOnCaseChange="1" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  108. <filter class="solr.LowerCaseFilterFactory"/>
  109. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_fr.txt"/>
  110. <filter class="solr.SnowballPorterFilterFactory" language="French" protected="protwords_fr.txt"/>
  111. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  112. </analyzer>
  113. <analyzer type="query">
  114. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_fr.txt"/>
  115. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  116. <filter class="solr.ElisionFilterFactory"/>
  117. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="0" protected="protwords_fr.txt" splitOnCaseChange="1" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  118. <filter class="solr.LowerCaseFilterFactory"/>
  119. <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms_fr.txt" expand="true" ignoreCase="true"/>
  120. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_fr.txt"/>
  121. <filter class="solr.SnowballPorterFilterFactory" language="French" protected="protwords_fr.txt"/>
  122. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  123. </analyzer>
  124. </fieldType>
  125. <!--
  126. French Text Field collated
  127. 7.0.0
  128. -->
  129. <fieldType name="collated_fr" class="solr.ICUCollationField" locale="fr" strength="primary" caseLevel="false"/>
  130. <!--
  131. French Text Field unstemmed
  132. 7.0.0
  133. -->
  134. <fieldType name="text_unstemmed_fr" class="solr.TextField" positionIncrementGap="100">
  135. <analyzer type="index">
  136. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_fr.txt"/>
  137. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  138. <filter class="solr.ElisionFilterFactory"/>
  139. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_fr.txt" splitOnCaseChange="1" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  140. <filter class="solr.LowerCaseFilterFactory"/>
  141. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_fr.txt"/>
  142. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  143. </analyzer>
  144. <analyzer type="query">
  145. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_fr.txt"/>
  146. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  147. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="0" protected="protwords_fr.txt" splitOnCaseChange="1" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  148. <filter class="solr.LowerCaseFilterFactory"/>
  149. <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms_fr.txt" expand="true" ignoreCase="true"/>
  150. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_fr.txt"/>
  151. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  152. </analyzer>
  153. </fieldType>
  154. <!--
  155. Fulltext Phonetic
  156. 7.0.0
  157. -->
  158. <fieldType name="text_phonetic_und" class="solr.TextField" positionIncrementGap="100">
  159. <analyzer type="index">
  160. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  161. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_und.txt"/>
  162. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_und.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  163. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  164. <filter class="solr.LowerCaseFilterFactory"/>
  165. <filter class="solr.BeiderMorseFilterFactory" languageSet="auto" nameType="GENERIC" ruleType="APPROX" concat="true"/>
  166. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  167. </analyzer>
  168. <analyzer type="query">
  169. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  170. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_und.txt"/>
  171. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" protected="protwords_und.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  172. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  173. <filter class="solr.LowerCaseFilterFactory"/>
  174. <filter class="solr.BeiderMorseFilterFactory" languageSet="auto" nameType="GENERIC" ruleType="APPROX" concat="true"/>
  175. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  176. </analyzer>
  177. </fieldType>
  178. <!--
  179. Fulltext Phonetic English
  180. 7.0.0
  181. -->
  182. <fieldType name="text_phonetic_en" class="solr.TextField" positionIncrementGap="100">
  183. <analyzer type="index">
  184. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  185. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
  186. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_en.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  187. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  188. <filter class="solr.LowerCaseFilterFactory"/>
  189. <filter class="solr.BeiderMorseFilterFactory" languageSet="english" nameType="GENERIC" ruleType="APPROX" concat="true"/>
  190. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  191. </analyzer>
  192. <analyzer type="query">
  193. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  194. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
  195. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" protected="protwords_en.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  196. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  197. <filter class="solr.LowerCaseFilterFactory"/>
  198. <filter class="solr.BeiderMorseFilterFactory" languageSet="english" nameType="GENERIC" ruleType="APPROX" concat="true"/>
  199. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  200. </analyzer>
  201. </fieldType>
  202. <!--
  203. Fulltext Phonetic French
  204. 7.0.0
  205. -->
  206. <fieldType name="text_phonetic_fr" class="solr.TextField" positionIncrementGap="100">
  207. <analyzer type="index">
  208. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  209. <filter class="solr.ElisionFilterFactory"/>
  210. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_fr.txt" splitOnCaseChange="1" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  211. <filter class="solr.LowerCaseFilterFactory"/>
  212. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_fr.txt"/>
  213. <filter class="solr.BeiderMorseFilterFactory" languageSet="french" nameType="GENERIC" ruleType="APPROX" concat="true"/>
  214. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  215. </analyzer>
  216. <analyzer type="query">
  217. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  218. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="0" protected="protwords_fr.txt" splitOnCaseChange="1" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  219. <filter class="solr.LowerCaseFilterFactory"/>
  220. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_fr.txt"/>
  221. <filter class="solr.BeiderMorseFilterFactory" languageSet="french" nameType="GENERIC" ruleType="APPROX" concat="true"/>
  222. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  223. </analyzer>
  224. </fieldType>
  225. <!--
  226. Language Undefined Text Field
  227. 7.0.0
  228. -->
  229. <fieldType name="text_und" class="solr.TextField" positionIncrementGap="100">
  230. <analyzer type="index">
  231. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  232. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  233. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_und.txt"/>
  234. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_und.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  235. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  236. <filter class="solr.LowerCaseFilterFactory"/>
  237. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  238. </analyzer>
  239. <analyzer type="query">
  240. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  241. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  242. <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms_und.txt" expand="true" ignoreCase="true"/>
  243. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_und.txt"/>
  244. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" protected="protwords_und.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="0"/>
  245. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  246. <filter class="solr.LowerCaseFilterFactory"/>
  247. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  248. </analyzer>
  249. </fieldType>
  250. <!--
  251. Language Undefined Text Field spellcheck
  252. 7.0.0
  253. -->
  254. <fieldType name="text_spell_und" class="solr.TextField" positionIncrementGap="100">
  255. <analyzer>
  256. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  257. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  258. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  259. <filter class="solr.LowerCaseFilterFactory"/>
  260. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  261. </analyzer>
  262. </fieldType>
  263. <!--
  264. Language Undefined Text Field collated
  265. 7.0.0
  266. -->
  267. <fieldType name="collated_und" class="solr.ICUCollationField" locale="en" strength="primary" caseLevel="false"/>
  268. <!--
  269. NGram String Field
  270. 6.0.0
  271. -->
  272. <fieldType name="text_ngramstring" class="solr.TextField" positionIncrementGap="100">
  273. <analyzer type="index">
  274. <tokenizer class="solr.KeywordTokenizerFactory"/>
  275. <filter class="solr.LowerCaseFilterFactory"/>
  276. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  277. <filter class="solr.NGramFilterFactory" minGramSize="2" maxGramSize="25"/>
  278. </analyzer>
  279. <analyzer type="query">
  280. <tokenizer class="solr.KeywordTokenizerFactory"/>
  281. <filter class="solr.LowerCaseFilterFactory"/>
  282. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  283. </analyzer>
  284. </fieldType>
  285. <!--
  286. NGram Text Field
  287. 7.0.0
  288. -->
  289. <fieldType name="text_ngram" class="solr.TextField" positionIncrementGap="100">
  290. <analyzer type="index">
  291. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  292. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  293. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_und.txt"/>
  294. <filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" protected="protwords_und.txt" splitOnCaseChange="0" generateWordParts="1" preserveOriginal="1" catenateAll="0" catenateWords="1"/>
  295. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  296. <filter class="solr.LowerCaseFilterFactory"/>
  297. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  298. <filter class="solr.NGramFilterFactory" minGramSize="2" maxGramSize="25"/>
  299. </analyzer>
  300. <analyzer type="query">
  301. <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
  302. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  303. <filter class="solr.LengthFilterFactory" min="2" max="100"/>
  304. <filter class="solr.LowerCaseFilterFactory"/>
  305. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  306. </analyzer>
  307. </fieldType>