|
a/schema.xml |
|
b/schema.xml |
|
... |
|
... |
216 |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
216 |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
217 |
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
217 |
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
218 |
<filter class="solr.LowerCaseFilterFactory"/>
|
218 |
<filter class="solr.LowerCaseFilterFactory"/>
|
219 |
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
|
219 |
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
|
220 |
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
220 |
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
|
|
221 |
</analyzer>
|
|
|
222 |
</fieldType>
|
|
|
223 |
|
|
|
224 |
<!-- A general unstemmed text field - good if one does not know the language of the field -->
|
|
|
225 |
<fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
|
|
|
226 |
<analyzer type="index">
|
|
|
227 |
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
|
|
228 |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
|
|
229 |
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
|
|
230 |
<filter class="solr.LowerCaseFilterFactory"/>
|
|
|
231 |
</analyzer>
|
|
|
232 |
<analyzer type="query">
|
|
|
233 |
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
|
|
234 |
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
|
|
235 |
<filter class="solr.StopFilterFactory"
|
|
|
236 |
ignoreCase="true"
|
|
|
237 |
words="stopwords.txt"
|
|
|
238 |
enablePositionIncrements="true"
|
|
|
239 |
/>
|
|
|
240 |
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
|
|
241 |
<filter class="solr.LowerCaseFilterFactory"/>
|
|
|
242 |
</analyzer>
|
|
|
243 |
</fieldType>
|
|
|
244 |
|
|
|
245 |
<!-- A general unstemmed text field that indexes tokens normally and also
|
|
|
246 |
reversed (via ReversedWildcardFilterFactory), to enable more efficient
|
|
|
247 |
leading wildcard queries. -->
|
|
|
248 |
<!--
|
|
|
249 |
<fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
|
|
|
250 |
<analyzer type="index">
|
|
|
251 |
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
|
|
252 |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
|
|
253 |
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
|
|
254 |
<filter class="solr.LowerCaseFilterFactory"/>
|
|
|
255 |
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
|
|
|
256 |
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
|
|
|
257 |
</analyzer>
|
|
|
258 |
<analyzer type="query">
|
|
|
259 |
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
|
|
260 |
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
|
|
261 |
<filter class="solr.StopFilterFactory"
|
|
|
262 |
ignoreCase="true"
|
|
|
263 |
words="stopwords.txt"
|
|
|
264 |
enablePositionIncrements="true"
|
|
|
265 |
/>
|
|
|
266 |
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
|
|
267 |
<filter class="solr.LowerCaseFilterFactory"/>
|
|
|
268 |
</analyzer>
|
|
|
269 |
</fieldType>
|
|
|
270 |
-->
|
|
|
271 |
|
|
|
272 |
<!-- charFilter + WhitespaceTokenizer -->
|
|
|
273 |
<!--
|
|
|
274 |
<fieldType name="textCharNorm" class="solr.TextField" positionIncrementGap="100" >
|
|
|
275 |
<analyzer>
|
|
|
276 |
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
|
|
|
277 |
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
|
|
278 |
</analyzer>
|
|
|
279 |
</fieldType>
|
|
|
280 |
-->
|
|
|
281 |
|
|
|
282 |
<!-- This is an example of using the KeywordTokenizer along
|
|
|
283 |
With various TokenFilterFactories to produce a sortable field
|
|
|
284 |
that does not include some properties of the source text
|
|
|
285 |
-->
|
|
|
286 |
|
|
|
287 |
<!--
|
|
|
288 |
<fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
|
|
|
289 |
<analyzer>
|
|
|
290 |
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
|
|
291 |
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
|
|
|
292 |
</analyzer>
|
|
|
293 |
</fieldtype>
|
|
|
294 |
-->
|
|
|
295 |
|
|
|
296 |
<!-- lowercases the entire field value, keeping it as a single token. -->
|
|
|
297 |
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
|
|
|
298 |
<analyzer>
|
|
|
299 |
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
|
|
300 |
<filter class="solr.LowerCaseFilterFactory" />
|
221 |
</analyzer>
|
301 |
</analyzer>
|
222 |
</fieldType>
|
302 |
</fieldType>
|
223 |
|
303 |
|
224 |
<!--
|
304 |
<!--
|
225 |
Setup simple analysis for spell checking
|
305 |
Setup simple analysis for spell checking
|
|
... |
|
... |
315 |
<field name="unix_group_name" type="string" indexed="true" stored="true" />
|
395 |
<field name="unix_group_name" type="string" indexed="true" stored="true" />
|
316 |
<field name="source" type="string" indexed="true" stored="true" />
|
396 |
<field name="source" type="string" indexed="true" stored="true" />
|
317 |
<field name="rating" type="float" indexed="true" stored="true" />
|
397 |
<field name="rating" type="float" indexed="true" stored="true" />
|
318 |
<field name="review_count" type="integer" indexed="true" stored="true" />
|
398 |
<field name="review_count" type="integer" indexed="true" stored="true" />
|
319 |
|
399 |
|
|
|
400 |
<field name="id" type="string" indexed="true" stored="true" required="true" />
|
|
|
401 |
<field name="sku" type="textTight" indexed="true" stored="true" omitNorms="true"/>
|
|
|
402 |
<field name="alphaNameSort" type="alphaOnlySort" indexed="true" stored="false"/>
|
|
|
403 |
<field name="manu" type="textgen" indexed="true" stored="true" omitNorms="true"/>
|
|
|
404 |
<field name="cat" type="text_ws" indexed="true" stored="true" multiValued="true" omitNorms="true" />
|
|
|
405 |
<field name="features" type="text" indexed="true" stored="true" multiValued="true"/>
|
|
|
406 |
<field name="includes" type="text" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
|
|
|
407 |
|
|
|
408 |
<field name="weight" type="float" indexed="true" stored="true"/>
|
|
|
409 |
<field name="price" type="float" indexed="true" stored="true"/>
|
|
|
410 |
<field name="popularity" type="int" indexed="true" stored="true" />
|
|
|
411 |
<field name="inStock" type="boolean" indexed="true" stored="true" />
|
|
|
412 |
|
|
|
413 |
<!-- Common metadata fields, named specifically to match up with
|
|
|
414 |
SolrCell metadata when parsing rich documents such as Word, PDF.
|
|
|
415 |
Some fields are multiValued only because Tika currently may return
|
|
|
416 |
multiple values for them.
|
|
|
417 |
-->
|
|
|
418 |
<field name="title" type="text" indexed="true" stored="true" multiValued="true"/>
|
|
|
419 |
<field name="subject" type="text" indexed="true" stored="true"/>
|
|
|
420 |
<field name="comments" type="text" indexed="true" stored="true"/>
|
|
|
421 |
<field name="author" type="textgen" indexed="true" stored="true"/>
|
|
|
422 |
<field name="keywords" type="textgen" indexed="true" stored="true"/>
|
|
|
423 |
<field name="category" type="textgen" indexed="true" stored="true"/>
|
|
|
424 |
<field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
|
|
|
425 |
<field name="last_modified" type="date" indexed="true" stored="true"/>
|
|
|
426 |
<field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
|
|
|
427 |
|
|
|
428 |
|
|
|
429 |
<!-- catchall field, containing all other searchable text fields (implemented
|
|
|
430 |
via copyField further on in this schema -->
|
|
|
431 |
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
|
|
|
432 |
|
|
|
433 |
<!-- catchall text field that indexes tokens both normally and in reverse for efficient
|
|
|
434 |
leading wildcard queries. -->
|
|
|
435 |
<!--
|
|
|
436 |
<field name="text_rev" type="text_rev" indexed="true" stored="false" multiValued="true"/>
|
|
|
437 |
-->
|
|
|
438 |
|
|
|
439 |
<!-- non-tokenized version of manufacturer to make it easier to sort or group
|
|
|
440 |
results by manufacturer. copied from "manu" via copyField -->
|
|
|
441 |
<field name="manu_exact" type="string" indexed="true" stored="false"/>
|
|
|
442 |
|
|
|
443 |
<!--
|
|
|
444 |
<field name="payloads" type="payloads" indexed="true" stored="true"/>
|
|
|
445 |
-->
|
|
|
446 |
|
320 |
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
|
447 |
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
|
321 |
will be used if the name matches any of the patterns.
|
448 |
will be used if the name matches any of the patterns.
|
322 |
RESTRICTION: the glob-like pattern in the name attribute must have
|
449 |
RESTRICTION: the glob-like pattern in the name attribute must have
|
323 |
a "*" only at the start or the end.
|
450 |
a "*" only at the start or the end.
|
324 |
EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
|
451 |
EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
|
|
... |
|
... |
353 |
</fields>
|
480 |
</fields>
|
354 |
|
481 |
|
355 |
<!-- Field to use to determine and enforce document uniqueness.
|
482 |
<!-- Field to use to determine and enforce document uniqueness.
|
356 |
Unless this field is marked with required="false", it will be a required field
|
483 |
Unless this field is marked with required="false", it will be a required field
|
357 |
-->
|
484 |
-->
|
358 |
<uniqueKey>project_doc_id</uniqueKey>
|
485 |
<uniqueKey>id</uniqueKey>
|
359 |
<!-- field for the QueryParser to use when an explicit fieldname is absent -->
|
486 |
<!-- field for the QueryParser to use when an explicit fieldname is absent -->
|
360 |
<defaultSearchField>description</defaultSearchField>
|
487 |
<defaultSearchField>text</defaultSearchField>
|
361 |
|
488 |
|
362 |
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
|
489 |
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
|
363 |
<solrQueryParser defaultOperator="OR"/>
|
490 |
<solrQueryParser defaultOperator="OR"/>
|
|
|
491 |
|
|
|
492 |
<!-- copyField commands copy one field to another at the time a document
|
|
|
493 |
is added to the index. It's used either to index the same field differently,
|
|
|
494 |
or to add multiple fields to the same field for easier/faster searching. -->
|
|
|
495 |
|
|
|
496 |
<copyField source="cat" dest="text"/>
|
|
|
497 |
<copyField source="name" dest="text"/>
|
|
|
498 |
<copyField source="manu" dest="text"/>
|
|
|
499 |
<copyField source="features" dest="text"/>
|
|
|
500 |
<copyField source="includes" dest="text"/>
|
|
|
501 |
<copyField source="manu" dest="manu_exact"/>
|
|
|
502 |
|
364 |
</schema>
|
503 |
</schema>
|