Language analyzers
editLanguage analyzers
editA set of analyzers aimed at analyzing specific language text. The
following types are supported:
arabic
,
armenian
,
basque
,
bengali
,
brazilian
,
bulgarian
,
catalan
,
cjk
,
czech
,
danish
,
dutch
,
english
,
estonian
,
finnish
,
french
,
galician
,
german
,
greek
,
hindi
,
hungarian
,
indonesian
,
irish
,
italian
,
latvian
,
lithuanian
,
norwegian
,
persian
,
portuguese
,
romanian
,
russian
,
serbian
,
sorani
,
spanish
,
swedish
,
turkish
,
thai
.
Configuring language analyzers
editStopwords
editAll analyzers support setting custom stopwords
either internally in
the config, or by using an external stopwords file by setting
stopwords_path
. Check Stop Analyzer for
more details.
Excluding words from stemming
editThe stem_exclusion
parameter allows you to specify an array
of lowercase words that should not be stemmed. Internally, this
functionality is implemented by adding the
keyword_marker
token filter
with the keywords
set to the value of the stem_exclusion
parameter.
The following analyzers support setting custom stem_exclusion
list:
arabic
, armenian
, basque
, bengali
, bulgarian
, catalan
, czech
,
dutch
, english
, finnish
, french
, galician
,
german
, hindi
, hungarian
, indonesian
, irish
, italian
, latvian
,
lithuanian
, norwegian
, portuguese
, romanian
, russian
, serbian
,
sorani
, spanish
, swedish
, turkish
.
Reimplementing language analyzers
editThe built-in language analyzers can be reimplemented as custom
analyzers
(as described below) in order to customize their behaviour.
If you do not intend to exclude words from being stemmed (the
equivalent of the stem_exclusion
parameter above), then you should remove
the keyword_marker
token filter from the custom analyzer configuration.
arabic
analyzer
editThe arabic
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="arabic_example", settings={ "analysis": { "filter": { "arabic_stop": { "type": "stop", "stopwords": "_arabic_" }, "arabic_keywords": { "type": "keyword_marker", "keywords": [ "مثال" ] }, "arabic_stemmer": { "type": "stemmer", "language": "arabic" } }, "analyzer": { "rebuilt_arabic": { "tokenizer": "standard", "filter": [ "lowercase", "decimal_digit", "arabic_stop", "arabic_normalization", "arabic_keywords", "arabic_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'arabic_example', body: { settings: { analysis: { filter: { arabic_stop: { type: 'stop', stopwords: '_arabic_' }, arabic_keywords: { type: 'keyword_marker', keywords: [ 'مثال' ] }, arabic_stemmer: { type: 'stemmer', language: 'arabic' } }, analyzer: { rebuilt_arabic: { tokenizer: 'standard', filter: [ 'lowercase', 'decimal_digit', 'arabic_stop', 'arabic_normalization', 'arabic_keywords', 'arabic_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "arabic_example", settings: { analysis: { filter: { arabic_stop: { type: "stop", stopwords: "_arabic_", }, arabic_keywords: { type: "keyword_marker", keywords: ["مثال"], }, arabic_stemmer: { type: "stemmer", language: "arabic", }, }, analyzer: { rebuilt_arabic: { tokenizer: "standard", filter: [ "lowercase", "decimal_digit", "arabic_stop", "arabic_normalization", "arabic_keywords", "arabic_stemmer", ], }, }, }, }, }); console.log(response);
PUT /arabic_example { "settings": { "analysis": { "filter": { "arabic_stop": { "type": "stop", "stopwords": "_arabic_" }, "arabic_keywords": { "type": "keyword_marker", "keywords": ["مثال"] }, "arabic_stemmer": { "type": "stemmer", "language": "arabic" } }, "analyzer": { "rebuilt_arabic": { "tokenizer": "standard", "filter": [ "lowercase", "decimal_digit", "arabic_stop", "arabic_normalization", "arabic_keywords", "arabic_stemmer" ] } } } } }
armenian
analyzer
editThe armenian
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="armenian_example", settings={ "analysis": { "filter": { "armenian_stop": { "type": "stop", "stopwords": "_armenian_" }, "armenian_keywords": { "type": "keyword_marker", "keywords": [ "օրինակ" ] }, "armenian_stemmer": { "type": "stemmer", "language": "armenian" } }, "analyzer": { "rebuilt_armenian": { "tokenizer": "standard", "filter": [ "lowercase", "armenian_stop", "armenian_keywords", "armenian_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'armenian_example', body: { settings: { analysis: { filter: { armenian_stop: { type: 'stop', stopwords: '_armenian_' }, armenian_keywords: { type: 'keyword_marker', keywords: [ 'օրինակ' ] }, armenian_stemmer: { type: 'stemmer', language: 'armenian' } }, analyzer: { rebuilt_armenian: { tokenizer: 'standard', filter: [ 'lowercase', 'armenian_stop', 'armenian_keywords', 'armenian_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "armenian_example", settings: { analysis: { filter: { armenian_stop: { type: "stop", stopwords: "_armenian_", }, armenian_keywords: { type: "keyword_marker", keywords: ["օրինակ"], }, armenian_stemmer: { type: "stemmer", language: "armenian", }, }, analyzer: { rebuilt_armenian: { tokenizer: "standard", filter: [ "lowercase", "armenian_stop", "armenian_keywords", "armenian_stemmer", ], }, }, }, }, }); console.log(response);
PUT /armenian_example { "settings": { "analysis": { "filter": { "armenian_stop": { "type": "stop", "stopwords": "_armenian_" }, "armenian_keywords": { "type": "keyword_marker", "keywords": ["օրինակ"] }, "armenian_stemmer": { "type": "stemmer", "language": "armenian" } }, "analyzer": { "rebuilt_armenian": { "tokenizer": "standard", "filter": [ "lowercase", "armenian_stop", "armenian_keywords", "armenian_stemmer" ] } } } } }
basque
analyzer
editThe basque
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="basque_example", settings={ "analysis": { "filter": { "basque_stop": { "type": "stop", "stopwords": "_basque_" }, "basque_keywords": { "type": "keyword_marker", "keywords": [ "Adibidez" ] }, "basque_stemmer": { "type": "stemmer", "language": "basque" } }, "analyzer": { "rebuilt_basque": { "tokenizer": "standard", "filter": [ "lowercase", "basque_stop", "basque_keywords", "basque_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'basque_example', body: { settings: { analysis: { filter: { basque_stop: { type: 'stop', stopwords: '_basque_' }, basque_keywords: { type: 'keyword_marker', keywords: [ 'Adibidez' ] }, basque_stemmer: { type: 'stemmer', language: 'basque' } }, analyzer: { rebuilt_basque: { tokenizer: 'standard', filter: [ 'lowercase', 'basque_stop', 'basque_keywords', 'basque_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "basque_example", settings: { analysis: { filter: { basque_stop: { type: "stop", stopwords: "_basque_", }, basque_keywords: { type: "keyword_marker", keywords: ["Adibidez"], }, basque_stemmer: { type: "stemmer", language: "basque", }, }, analyzer: { rebuilt_basque: { tokenizer: "standard", filter: [ "lowercase", "basque_stop", "basque_keywords", "basque_stemmer", ], }, }, }, }, }); console.log(response);
PUT /basque_example { "settings": { "analysis": { "filter": { "basque_stop": { "type": "stop", "stopwords": "_basque_" }, "basque_keywords": { "type": "keyword_marker", "keywords": ["Adibidez"] }, "basque_stemmer": { "type": "stemmer", "language": "basque" } }, "analyzer": { "rebuilt_basque": { "tokenizer": "standard", "filter": [ "lowercase", "basque_stop", "basque_keywords", "basque_stemmer" ] } } } } }
bengali
analyzer
editThe bengali
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="bengali_example", settings={ "analysis": { "filter": { "bengali_stop": { "type": "stop", "stopwords": "_bengali_" }, "bengali_keywords": { "type": "keyword_marker", "keywords": [ "উদাহরণ" ] }, "bengali_stemmer": { "type": "stemmer", "language": "bengali" } }, "analyzer": { "rebuilt_bengali": { "tokenizer": "standard", "filter": [ "lowercase", "decimal_digit", "bengali_keywords", "indic_normalization", "bengali_normalization", "bengali_stop", "bengali_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'bengali_example', body: { settings: { analysis: { filter: { bengali_stop: { type: 'stop', stopwords: '_bengali_' }, bengali_keywords: { type: 'keyword_marker', keywords: [ 'উদাহরণ' ] }, bengali_stemmer: { type: 'stemmer', language: 'bengali' } }, analyzer: { rebuilt_bengali: { tokenizer: 'standard', filter: [ 'lowercase', 'decimal_digit', 'bengali_keywords', 'indic_normalization', 'bengali_normalization', 'bengali_stop', 'bengali_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "bengali_example", settings: { analysis: { filter: { bengali_stop: { type: "stop", stopwords: "_bengali_", }, bengali_keywords: { type: "keyword_marker", keywords: ["উদাহরণ"], }, bengali_stemmer: { type: "stemmer", language: "bengali", }, }, analyzer: { rebuilt_bengali: { tokenizer: "standard", filter: [ "lowercase", "decimal_digit", "bengali_keywords", "indic_normalization", "bengali_normalization", "bengali_stop", "bengali_stemmer", ], }, }, }, }, }); console.log(response);
PUT /bengali_example { "settings": { "analysis": { "filter": { "bengali_stop": { "type": "stop", "stopwords": "_bengali_" }, "bengali_keywords": { "type": "keyword_marker", "keywords": ["উদাহরণ"] }, "bengali_stemmer": { "type": "stemmer", "language": "bengali" } }, "analyzer": { "rebuilt_bengali": { "tokenizer": "standard", "filter": [ "lowercase", "decimal_digit", "bengali_keywords", "indic_normalization", "bengali_normalization", "bengali_stop", "bengali_stemmer" ] } } } } }
brazilian
analyzer
editThe brazilian
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="brazilian_example", settings={ "analysis": { "filter": { "brazilian_stop": { "type": "stop", "stopwords": "_brazilian_" }, "brazilian_keywords": { "type": "keyword_marker", "keywords": [ "exemplo" ] }, "brazilian_stemmer": { "type": "stemmer", "language": "brazilian" } }, "analyzer": { "rebuilt_brazilian": { "tokenizer": "standard", "filter": [ "lowercase", "brazilian_stop", "brazilian_keywords", "brazilian_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'brazilian_example', body: { settings: { analysis: { filter: { brazilian_stop: { type: 'stop', stopwords: '_brazilian_' }, brazilian_keywords: { type: 'keyword_marker', keywords: [ 'exemplo' ] }, brazilian_stemmer: { type: 'stemmer', language: 'brazilian' } }, analyzer: { rebuilt_brazilian: { tokenizer: 'standard', filter: [ 'lowercase', 'brazilian_stop', 'brazilian_keywords', 'brazilian_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "brazilian_example", settings: { analysis: { filter: { brazilian_stop: { type: "stop", stopwords: "_brazilian_", }, brazilian_keywords: { type: "keyword_marker", keywords: ["exemplo"], }, brazilian_stemmer: { type: "stemmer", language: "brazilian", }, }, analyzer: { rebuilt_brazilian: { tokenizer: "standard", filter: [ "lowercase", "brazilian_stop", "brazilian_keywords", "brazilian_stemmer", ], }, }, }, }, }); console.log(response);
PUT /brazilian_example { "settings": { "analysis": { "filter": { "brazilian_stop": { "type": "stop", "stopwords": "_brazilian_" }, "brazilian_keywords": { "type": "keyword_marker", "keywords": ["exemplo"] }, "brazilian_stemmer": { "type": "stemmer", "language": "brazilian" } }, "analyzer": { "rebuilt_brazilian": { "tokenizer": "standard", "filter": [ "lowercase", "brazilian_stop", "brazilian_keywords", "brazilian_stemmer" ] } } } } }
bulgarian
analyzer
editThe bulgarian
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="bulgarian_example", settings={ "analysis": { "filter": { "bulgarian_stop": { "type": "stop", "stopwords": "_bulgarian_" }, "bulgarian_keywords": { "type": "keyword_marker", "keywords": [ "пример" ] }, "bulgarian_stemmer": { "type": "stemmer", "language": "bulgarian" } }, "analyzer": { "rebuilt_bulgarian": { "tokenizer": "standard", "filter": [ "lowercase", "bulgarian_stop", "bulgarian_keywords", "bulgarian_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'bulgarian_example', body: { settings: { analysis: { filter: { bulgarian_stop: { type: 'stop', stopwords: '_bulgarian_' }, bulgarian_keywords: { type: 'keyword_marker', keywords: [ 'пример' ] }, bulgarian_stemmer: { type: 'stemmer', language: 'bulgarian' } }, analyzer: { rebuilt_bulgarian: { tokenizer: 'standard', filter: [ 'lowercase', 'bulgarian_stop', 'bulgarian_keywords', 'bulgarian_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "bulgarian_example", settings: { analysis: { filter: { bulgarian_stop: { type: "stop", stopwords: "_bulgarian_", }, bulgarian_keywords: { type: "keyword_marker", keywords: ["пример"], }, bulgarian_stemmer: { type: "stemmer", language: "bulgarian", }, }, analyzer: { rebuilt_bulgarian: { tokenizer: "standard", filter: [ "lowercase", "bulgarian_stop", "bulgarian_keywords", "bulgarian_stemmer", ], }, }, }, }, }); console.log(response);
PUT /bulgarian_example { "settings": { "analysis": { "filter": { "bulgarian_stop": { "type": "stop", "stopwords": "_bulgarian_" }, "bulgarian_keywords": { "type": "keyword_marker", "keywords": ["пример"] }, "bulgarian_stemmer": { "type": "stemmer", "language": "bulgarian" } }, "analyzer": { "rebuilt_bulgarian": { "tokenizer": "standard", "filter": [ "lowercase", "bulgarian_stop", "bulgarian_keywords", "bulgarian_stemmer" ] } } } } }
catalan
analyzer
editThe catalan
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="catalan_example", settings={ "analysis": { "filter": { "catalan_elision": { "type": "elision", "articles": [ "d", "l", "m", "n", "s", "t" ], "articles_case": True }, "catalan_stop": { "type": "stop", "stopwords": "_catalan_" }, "catalan_keywords": { "type": "keyword_marker", "keywords": [ "example" ] }, "catalan_stemmer": { "type": "stemmer", "language": "catalan" } }, "analyzer": { "rebuilt_catalan": { "tokenizer": "standard", "filter": [ "catalan_elision", "lowercase", "catalan_stop", "catalan_keywords", "catalan_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'catalan_example', body: { settings: { analysis: { filter: { catalan_elision: { type: 'elision', articles: [ 'd', 'l', 'm', 'n', 's', 't' ], articles_case: true }, catalan_stop: { type: 'stop', stopwords: '_catalan_' }, catalan_keywords: { type: 'keyword_marker', keywords: [ 'example' ] }, catalan_stemmer: { type: 'stemmer', language: 'catalan' } }, analyzer: { rebuilt_catalan: { tokenizer: 'standard', filter: [ 'catalan_elision', 'lowercase', 'catalan_stop', 'catalan_keywords', 'catalan_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "catalan_example", settings: { analysis: { filter: { catalan_elision: { type: "elision", articles: ["d", "l", "m", "n", "s", "t"], articles_case: true, }, catalan_stop: { type: "stop", stopwords: "_catalan_", }, catalan_keywords: { type: "keyword_marker", keywords: ["example"], }, catalan_stemmer: { type: "stemmer", language: "catalan", }, }, analyzer: { rebuilt_catalan: { tokenizer: "standard", filter: [ "catalan_elision", "lowercase", "catalan_stop", "catalan_keywords", "catalan_stemmer", ], }, }, }, }, }); console.log(response);
PUT /catalan_example { "settings": { "analysis": { "filter": { "catalan_elision": { "type": "elision", "articles": [ "d", "l", "m", "n", "s", "t"], "articles_case": true }, "catalan_stop": { "type": "stop", "stopwords": "_catalan_" }, "catalan_keywords": { "type": "keyword_marker", "keywords": ["example"] }, "catalan_stemmer": { "type": "stemmer", "language": "catalan" } }, "analyzer": { "rebuilt_catalan": { "tokenizer": "standard", "filter": [ "catalan_elision", "lowercase", "catalan_stop", "catalan_keywords", "catalan_stemmer" ] } } } } }
cjk
analyzer
editYou may find that icu_analyzer
in the ICU analysis plugin works better
for CJK text than the cjk
analyzer. Experiment with your text and queries.
The cjk
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="cjk_example", settings={ "analysis": { "filter": { "english_stop": { "type": "stop", "stopwords": [ "a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "s", "such", "t", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with", "www" ] } }, "analyzer": { "rebuilt_cjk": { "tokenizer": "standard", "filter": [ "cjk_width", "lowercase", "cjk_bigram", "english_stop" ] } } } }, ) print(resp)
response = client.indices.create( index: 'cjk_example', body: { settings: { analysis: { filter: { english_stop: { type: 'stop', stopwords: [ 'a', 'and', 'are', 'as', 'at', 'be', 'but', 'by', 'for', 'if', 'in', 'into', 'is', 'it', 'no', 'not', 'of', 'on', 'or', 's', 'such', 't', 'that', 'the', 'their', 'then', 'there', 'these', 'they', 'this', 'to', 'was', 'will', 'with', 'www' ] } }, analyzer: { rebuilt_cjk: { tokenizer: 'standard', filter: [ 'cjk_width', 'lowercase', 'cjk_bigram', 'english_stop' ] } } } } } ) puts response
const response = await client.indices.create({ index: "cjk_example", settings: { analysis: { filter: { english_stop: { type: "stop", stopwords: [ "a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "s", "such", "t", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with", "www", ], }, }, analyzer: { rebuilt_cjk: { tokenizer: "standard", filter: ["cjk_width", "lowercase", "cjk_bigram", "english_stop"], }, }, }, }, }); console.log(response);
PUT /cjk_example { "settings": { "analysis": { "filter": { "english_stop": { "type": "stop", "stopwords": [ "a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "s", "such", "t", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with", "www" ] } }, "analyzer": { "rebuilt_cjk": { "tokenizer": "standard", "filter": [ "cjk_width", "lowercase", "cjk_bigram", "english_stop" ] } } } } }
czech
analyzer
editThe czech
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="czech_example", settings={ "analysis": { "filter": { "czech_stop": { "type": "stop", "stopwords": "_czech_" }, "czech_keywords": { "type": "keyword_marker", "keywords": [ "příklad" ] }, "czech_stemmer": { "type": "stemmer", "language": "czech" } }, "analyzer": { "rebuilt_czech": { "tokenizer": "standard", "filter": [ "lowercase", "czech_stop", "czech_keywords", "czech_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'czech_example', body: { settings: { analysis: { filter: { czech_stop: { type: 'stop', stopwords: '_czech_' }, czech_keywords: { type: 'keyword_marker', keywords: [ 'příklad' ] }, czech_stemmer: { type: 'stemmer', language: 'czech' } }, analyzer: { rebuilt_czech: { tokenizer: 'standard', filter: [ 'lowercase', 'czech_stop', 'czech_keywords', 'czech_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "czech_example", settings: { analysis: { filter: { czech_stop: { type: "stop", stopwords: "_czech_", }, czech_keywords: { type: "keyword_marker", keywords: ["příklad"], }, czech_stemmer: { type: "stemmer", language: "czech", }, }, analyzer: { rebuilt_czech: { tokenizer: "standard", filter: [ "lowercase", "czech_stop", "czech_keywords", "czech_stemmer", ], }, }, }, }, }); console.log(response);
PUT /czech_example { "settings": { "analysis": { "filter": { "czech_stop": { "type": "stop", "stopwords": "_czech_" }, "czech_keywords": { "type": "keyword_marker", "keywords": ["příklad"] }, "czech_stemmer": { "type": "stemmer", "language": "czech" } }, "analyzer": { "rebuilt_czech": { "tokenizer": "standard", "filter": [ "lowercase", "czech_stop", "czech_keywords", "czech_stemmer" ] } } } } }
danish
analyzer
editThe danish
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="danish_example", settings={ "analysis": { "filter": { "danish_stop": { "type": "stop", "stopwords": "_danish_" }, "danish_keywords": { "type": "keyword_marker", "keywords": [ "eksempel" ] }, "danish_stemmer": { "type": "stemmer", "language": "danish" } }, "analyzer": { "rebuilt_danish": { "tokenizer": "standard", "filter": [ "lowercase", "danish_stop", "danish_keywords", "danish_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'danish_example', body: { settings: { analysis: { filter: { danish_stop: { type: 'stop', stopwords: '_danish_' }, danish_keywords: { type: 'keyword_marker', keywords: [ 'eksempel' ] }, danish_stemmer: { type: 'stemmer', language: 'danish' } }, analyzer: { rebuilt_danish: { tokenizer: 'standard', filter: [ 'lowercase', 'danish_stop', 'danish_keywords', 'danish_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "danish_example", settings: { analysis: { filter: { danish_stop: { type: "stop", stopwords: "_danish_", }, danish_keywords: { type: "keyword_marker", keywords: ["eksempel"], }, danish_stemmer: { type: "stemmer", language: "danish", }, }, analyzer: { rebuilt_danish: { tokenizer: "standard", filter: [ "lowercase", "danish_stop", "danish_keywords", "danish_stemmer", ], }, }, }, }, }); console.log(response);
PUT /danish_example { "settings": { "analysis": { "filter": { "danish_stop": { "type": "stop", "stopwords": "_danish_" }, "danish_keywords": { "type": "keyword_marker", "keywords": ["eksempel"] }, "danish_stemmer": { "type": "stemmer", "language": "danish" } }, "analyzer": { "rebuilt_danish": { "tokenizer": "standard", "filter": [ "lowercase", "danish_stop", "danish_keywords", "danish_stemmer" ] } } } } }
dutch
analyzer
editThe dutch
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="dutch_example", settings={ "analysis": { "filter": { "dutch_stop": { "type": "stop", "stopwords": "_dutch_" }, "dutch_keywords": { "type": "keyword_marker", "keywords": [ "voorbeeld" ] }, "dutch_stemmer": { "type": "stemmer", "language": "dutch" }, "dutch_override": { "type": "stemmer_override", "rules": [ "fiets=>fiets", "bromfiets=>bromfiets", "ei=>eier", "kind=>kinder" ] } }, "analyzer": { "rebuilt_dutch": { "tokenizer": "standard", "filter": [ "lowercase", "dutch_stop", "dutch_keywords", "dutch_override", "dutch_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'dutch_example', body: { settings: { analysis: { filter: { dutch_stop: { type: 'stop', stopwords: '_dutch_' }, dutch_keywords: { type: 'keyword_marker', keywords: [ 'voorbeeld' ] }, dutch_stemmer: { type: 'stemmer', language: 'dutch' }, dutch_override: { type: 'stemmer_override', rules: [ 'fiets=>fiets', 'bromfiets=>bromfiets', 'ei=>eier', 'kind=>kinder' ] } }, analyzer: { rebuilt_dutch: { tokenizer: 'standard', filter: [ 'lowercase', 'dutch_stop', 'dutch_keywords', 'dutch_override', 'dutch_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "dutch_example", settings: { analysis: { filter: { dutch_stop: { type: "stop", stopwords: "_dutch_", }, dutch_keywords: { type: "keyword_marker", keywords: ["voorbeeld"], }, dutch_stemmer: { type: "stemmer", language: "dutch", }, dutch_override: { type: "stemmer_override", rules: [ "fiets=>fiets", "bromfiets=>bromfiets", "ei=>eier", "kind=>kinder", ], }, }, analyzer: { rebuilt_dutch: { tokenizer: "standard", filter: [ "lowercase", "dutch_stop", "dutch_keywords", "dutch_override", "dutch_stemmer", ], }, }, }, }, }); console.log(response);
PUT /dutch_example { "settings": { "analysis": { "filter": { "dutch_stop": { "type": "stop", "stopwords": "_dutch_" }, "dutch_keywords": { "type": "keyword_marker", "keywords": ["voorbeeld"] }, "dutch_stemmer": { "type": "stemmer", "language": "dutch" }, "dutch_override": { "type": "stemmer_override", "rules": [ "fiets=>fiets", "bromfiets=>bromfiets", "ei=>eier", "kind=>kinder" ] } }, "analyzer": { "rebuilt_dutch": { "tokenizer": "standard", "filter": [ "lowercase", "dutch_stop", "dutch_keywords", "dutch_override", "dutch_stemmer" ] } } } } }
english
analyzer
editThe english
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="english_example", settings={ "analysis": { "filter": { "english_stop": { "type": "stop", "stopwords": "_english_" }, "english_keywords": { "type": "keyword_marker", "keywords": [ "example" ] }, "english_stemmer": { "type": "stemmer", "language": "english" }, "english_possessive_stemmer": { "type": "stemmer", "language": "possessive_english" } }, "analyzer": { "rebuilt_english": { "tokenizer": "standard", "filter": [ "english_possessive_stemmer", "lowercase", "english_stop", "english_keywords", "english_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'english_example', body: { settings: { analysis: { filter: { english_stop: { type: 'stop', stopwords: '_english_' }, english_keywords: { type: 'keyword_marker', keywords: [ 'example' ] }, english_stemmer: { type: 'stemmer', language: 'english' }, english_possessive_stemmer: { type: 'stemmer', language: 'possessive_english' } }, analyzer: { rebuilt_english: { tokenizer: 'standard', filter: [ 'english_possessive_stemmer', 'lowercase', 'english_stop', 'english_keywords', 'english_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "english_example", settings: { analysis: { filter: { english_stop: { type: "stop", stopwords: "_english_", }, english_keywords: { type: "keyword_marker", keywords: ["example"], }, english_stemmer: { type: "stemmer", language: "english", }, english_possessive_stemmer: { type: "stemmer", language: "possessive_english", }, }, analyzer: { rebuilt_english: { tokenizer: "standard", filter: [ "english_possessive_stemmer", "lowercase", "english_stop", "english_keywords", "english_stemmer", ], }, }, }, }, }); console.log(response);
PUT /english_example { "settings": { "analysis": { "filter": { "english_stop": { "type": "stop", "stopwords": "_english_" }, "english_keywords": { "type": "keyword_marker", "keywords": ["example"] }, "english_stemmer": { "type": "stemmer", "language": "english" }, "english_possessive_stemmer": { "type": "stemmer", "language": "possessive_english" } }, "analyzer": { "rebuilt_english": { "tokenizer": "standard", "filter": [ "english_possessive_stemmer", "lowercase", "english_stop", "english_keywords", "english_stemmer" ] } } } } }
estonian
analyzer
editThe estonian
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="estonian_example", settings={ "analysis": { "filter": { "estonian_stop": { "type": "stop", "stopwords": "_estonian_" }, "estonian_keywords": { "type": "keyword_marker", "keywords": [ "näide" ] }, "estonian_stemmer": { "type": "stemmer", "language": "estonian" } }, "analyzer": { "rebuilt_estonian": { "tokenizer": "standard", "filter": [ "lowercase", "estonian_stop", "estonian_keywords", "estonian_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'estonian_example', body: { settings: { analysis: { filter: { estonian_stop: { type: 'stop', stopwords: '_estonian_' }, estonian_keywords: { type: 'keyword_marker', keywords: [ 'näide' ] }, estonian_stemmer: { type: 'stemmer', language: 'estonian' } }, analyzer: { rebuilt_estonian: { tokenizer: 'standard', filter: [ 'lowercase', 'estonian_stop', 'estonian_keywords', 'estonian_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "estonian_example", settings: { analysis: { filter: { estonian_stop: { type: "stop", stopwords: "_estonian_", }, estonian_keywords: { type: "keyword_marker", keywords: ["näide"], }, estonian_stemmer: { type: "stemmer", language: "estonian", }, }, analyzer: { rebuilt_estonian: { tokenizer: "standard", filter: [ "lowercase", "estonian_stop", "estonian_keywords", "estonian_stemmer", ], }, }, }, }, }); console.log(response);
PUT /estonian_example { "settings": { "analysis": { "filter": { "estonian_stop": { "type": "stop", "stopwords": "_estonian_" }, "estonian_keywords": { "type": "keyword_marker", "keywords": ["näide"] }, "estonian_stemmer": { "type": "stemmer", "language": "estonian" } }, "analyzer": { "rebuilt_estonian": { "tokenizer": "standard", "filter": [ "lowercase", "estonian_stop", "estonian_keywords", "estonian_stemmer" ] } } } } }
finnish
analyzer
editThe finnish
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="finnish_example", settings={ "analysis": { "filter": { "finnish_stop": { "type": "stop", "stopwords": "_finnish_" }, "finnish_keywords": { "type": "keyword_marker", "keywords": [ "esimerkki" ] }, "finnish_stemmer": { "type": "stemmer", "language": "finnish" } }, "analyzer": { "rebuilt_finnish": { "tokenizer": "standard", "filter": [ "lowercase", "finnish_stop", "finnish_keywords", "finnish_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'finnish_example', body: { settings: { analysis: { filter: { finnish_stop: { type: 'stop', stopwords: '_finnish_' }, finnish_keywords: { type: 'keyword_marker', keywords: [ 'esimerkki' ] }, finnish_stemmer: { type: 'stemmer', language: 'finnish' } }, analyzer: { rebuilt_finnish: { tokenizer: 'standard', filter: [ 'lowercase', 'finnish_stop', 'finnish_keywords', 'finnish_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "finnish_example", settings: { analysis: { filter: { finnish_stop: { type: "stop", stopwords: "_finnish_", }, finnish_keywords: { type: "keyword_marker", keywords: ["esimerkki"], }, finnish_stemmer: { type: "stemmer", language: "finnish", }, }, analyzer: { rebuilt_finnish: { tokenizer: "standard", filter: [ "lowercase", "finnish_stop", "finnish_keywords", "finnish_stemmer", ], }, }, }, }, }); console.log(response);
PUT /finnish_example { "settings": { "analysis": { "filter": { "finnish_stop": { "type": "stop", "stopwords": "_finnish_" }, "finnish_keywords": { "type": "keyword_marker", "keywords": ["esimerkki"] }, "finnish_stemmer": { "type": "stemmer", "language": "finnish" } }, "analyzer": { "rebuilt_finnish": { "tokenizer": "standard", "filter": [ "lowercase", "finnish_stop", "finnish_keywords", "finnish_stemmer" ] } } } } }
french
analyzer
editThe french
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="french_example", settings={ "analysis": { "filter": { "french_elision": { "type": "elision", "articles_case": True, "articles": [ "l", "m", "t", "qu", "n", "s", "j", "d", "c", "jusqu", "quoiqu", "lorsqu", "puisqu" ] }, "french_stop": { "type": "stop", "stopwords": "_french_" }, "french_keywords": { "type": "keyword_marker", "keywords": [ "Example" ] }, "french_stemmer": { "type": "stemmer", "language": "light_french" } }, "analyzer": { "rebuilt_french": { "tokenizer": "standard", "filter": [ "french_elision", "lowercase", "french_stop", "french_keywords", "french_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'french_example', body: { settings: { analysis: { filter: { french_elision: { type: 'elision', articles_case: true, articles: [ 'l', 'm', 't', 'qu', 'n', 's', 'j', 'd', 'c', 'jusqu', 'quoiqu', 'lorsqu', 'puisqu' ] }, french_stop: { type: 'stop', stopwords: '_french_' }, french_keywords: { type: 'keyword_marker', keywords: [ 'Example' ] }, french_stemmer: { type: 'stemmer', language: 'light_french' } }, analyzer: { rebuilt_french: { tokenizer: 'standard', filter: [ 'french_elision', 'lowercase', 'french_stop', 'french_keywords', 'french_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "french_example", settings: { analysis: { filter: { french_elision: { type: "elision", articles_case: true, articles: [ "l", "m", "t", "qu", "n", "s", "j", "d", "c", "jusqu", "quoiqu", "lorsqu", "puisqu", ], }, french_stop: { type: "stop", stopwords: "_french_", }, french_keywords: { type: "keyword_marker", keywords: ["Example"], }, french_stemmer: { type: "stemmer", language: "light_french", }, }, analyzer: { rebuilt_french: { tokenizer: "standard", filter: [ "french_elision", "lowercase", "french_stop", "french_keywords", "french_stemmer", ], }, }, }, }, }); console.log(response);
PUT /french_example { "settings": { "analysis": { "filter": { "french_elision": { "type": "elision", "articles_case": true, "articles": [ "l", "m", "t", "qu", "n", "s", "j", "d", "c", "jusqu", "quoiqu", "lorsqu", "puisqu" ] }, "french_stop": { "type": "stop", "stopwords": "_french_" }, "french_keywords": { "type": "keyword_marker", "keywords": ["Example"] }, "french_stemmer": { "type": "stemmer", "language": "light_french" } }, "analyzer": { "rebuilt_french": { "tokenizer": "standard", "filter": [ "french_elision", "lowercase", "french_stop", "french_keywords", "french_stemmer" ] } } } } }
galician
analyzer
editThe galician
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="galician_example", settings={ "analysis": { "filter": { "galician_stop": { "type": "stop", "stopwords": "_galician_" }, "galician_keywords": { "type": "keyword_marker", "keywords": [ "exemplo" ] }, "galician_stemmer": { "type": "stemmer", "language": "galician" } }, "analyzer": { "rebuilt_galician": { "tokenizer": "standard", "filter": [ "lowercase", "galician_stop", "galician_keywords", "galician_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'galician_example', body: { settings: { analysis: { filter: { galician_stop: { type: 'stop', stopwords: '_galician_' }, galician_keywords: { type: 'keyword_marker', keywords: [ 'exemplo' ] }, galician_stemmer: { type: 'stemmer', language: 'galician' } }, analyzer: { rebuilt_galician: { tokenizer: 'standard', filter: [ 'lowercase', 'galician_stop', 'galician_keywords', 'galician_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "galician_example", settings: { analysis: { filter: { galician_stop: { type: "stop", stopwords: "_galician_", }, galician_keywords: { type: "keyword_marker", keywords: ["exemplo"], }, galician_stemmer: { type: "stemmer", language: "galician", }, }, analyzer: { rebuilt_galician: { tokenizer: "standard", filter: [ "lowercase", "galician_stop", "galician_keywords", "galician_stemmer", ], }, }, }, }, }); console.log(response);
PUT /galician_example { "settings": { "analysis": { "filter": { "galician_stop": { "type": "stop", "stopwords": "_galician_" }, "galician_keywords": { "type": "keyword_marker", "keywords": ["exemplo"] }, "galician_stemmer": { "type": "stemmer", "language": "galician" } }, "analyzer": { "rebuilt_galician": { "tokenizer": "standard", "filter": [ "lowercase", "galician_stop", "galician_keywords", "galician_stemmer" ] } } } } }
german
analyzer
editThe german
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="german_example", settings={ "analysis": { "filter": { "german_stop": { "type": "stop", "stopwords": "_german_" }, "german_keywords": { "type": "keyword_marker", "keywords": [ "Beispiel" ] }, "german_stemmer": { "type": "stemmer", "language": "light_german" } }, "analyzer": { "rebuilt_german": { "tokenizer": "standard", "filter": [ "lowercase", "german_stop", "german_keywords", "german_normalization", "german_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'german_example', body: { settings: { analysis: { filter: { german_stop: { type: 'stop', stopwords: '_german_' }, german_keywords: { type: 'keyword_marker', keywords: [ 'Beispiel' ] }, german_stemmer: { type: 'stemmer', language: 'light_german' } }, analyzer: { rebuilt_german: { tokenizer: 'standard', filter: [ 'lowercase', 'german_stop', 'german_keywords', 'german_normalization', 'german_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "german_example", settings: { analysis: { filter: { german_stop: { type: "stop", stopwords: "_german_", }, german_keywords: { type: "keyword_marker", keywords: ["Beispiel"], }, german_stemmer: { type: "stemmer", language: "light_german", }, }, analyzer: { rebuilt_german: { tokenizer: "standard", filter: [ "lowercase", "german_stop", "german_keywords", "german_normalization", "german_stemmer", ], }, }, }, }, }); console.log(response);
PUT /german_example { "settings": { "analysis": { "filter": { "german_stop": { "type": "stop", "stopwords": "_german_" }, "german_keywords": { "type": "keyword_marker", "keywords": ["Beispiel"] }, "german_stemmer": { "type": "stemmer", "language": "light_german" } }, "analyzer": { "rebuilt_german": { "tokenizer": "standard", "filter": [ "lowercase", "german_stop", "german_keywords", "german_normalization", "german_stemmer" ] } } } } }
greek
analyzer
editThe greek
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="greek_example", settings={ "analysis": { "filter": { "greek_stop": { "type": "stop", "stopwords": "_greek_" }, "greek_lowercase": { "type": "lowercase", "language": "greek" }, "greek_keywords": { "type": "keyword_marker", "keywords": [ "παράδειγμα" ] }, "greek_stemmer": { "type": "stemmer", "language": "greek" } }, "analyzer": { "rebuilt_greek": { "tokenizer": "standard", "filter": [ "greek_lowercase", "greek_stop", "greek_keywords", "greek_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'greek_example', body: { settings: { analysis: { filter: { greek_stop: { type: 'stop', stopwords: '_greek_' }, greek_lowercase: { type: 'lowercase', language: 'greek' }, greek_keywords: { type: 'keyword_marker', keywords: [ 'παράδειγμα' ] }, greek_stemmer: { type: 'stemmer', language: 'greek' } }, analyzer: { rebuilt_greek: { tokenizer: 'standard', filter: [ 'greek_lowercase', 'greek_stop', 'greek_keywords', 'greek_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "greek_example", settings: { analysis: { filter: { greek_stop: { type: "stop", stopwords: "_greek_", }, greek_lowercase: { type: "lowercase", language: "greek", }, greek_keywords: { type: "keyword_marker", keywords: ["παράδειγμα"], }, greek_stemmer: { type: "stemmer", language: "greek", }, }, analyzer: { rebuilt_greek: { tokenizer: "standard", filter: [ "greek_lowercase", "greek_stop", "greek_keywords", "greek_stemmer", ], }, }, }, }, }); console.log(response);
PUT /greek_example { "settings": { "analysis": { "filter": { "greek_stop": { "type": "stop", "stopwords": "_greek_" }, "greek_lowercase": { "type": "lowercase", "language": "greek" }, "greek_keywords": { "type": "keyword_marker", "keywords": ["παράδειγμα"] }, "greek_stemmer": { "type": "stemmer", "language": "greek" } }, "analyzer": { "rebuilt_greek": { "tokenizer": "standard", "filter": [ "greek_lowercase", "greek_stop", "greek_keywords", "greek_stemmer" ] } } } } }
hindi
analyzer
editThe hindi
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="hindi_example", settings={ "analysis": { "filter": { "hindi_stop": { "type": "stop", "stopwords": "_hindi_" }, "hindi_keywords": { "type": "keyword_marker", "keywords": [ "उदाहरण" ] }, "hindi_stemmer": { "type": "stemmer", "language": "hindi" } }, "analyzer": { "rebuilt_hindi": { "tokenizer": "standard", "filter": [ "lowercase", "decimal_digit", "hindi_keywords", "indic_normalization", "hindi_normalization", "hindi_stop", "hindi_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'hindi_example', body: { settings: { analysis: { filter: { hindi_stop: { type: 'stop', stopwords: '_hindi_' }, hindi_keywords: { type: 'keyword_marker', keywords: [ 'उदाहरण' ] }, hindi_stemmer: { type: 'stemmer', language: 'hindi' } }, analyzer: { rebuilt_hindi: { tokenizer: 'standard', filter: [ 'lowercase', 'decimal_digit', 'hindi_keywords', 'indic_normalization', 'hindi_normalization', 'hindi_stop', 'hindi_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "hindi_example", settings: { analysis: { filter: { hindi_stop: { type: "stop", stopwords: "_hindi_", }, hindi_keywords: { type: "keyword_marker", keywords: ["उदाहरण"], }, hindi_stemmer: { type: "stemmer", language: "hindi", }, }, analyzer: { rebuilt_hindi: { tokenizer: "standard", filter: [ "lowercase", "decimal_digit", "hindi_keywords", "indic_normalization", "hindi_normalization", "hindi_stop", "hindi_stemmer", ], }, }, }, }, }); console.log(response);
PUT /hindi_example { "settings": { "analysis": { "filter": { "hindi_stop": { "type": "stop", "stopwords": "_hindi_" }, "hindi_keywords": { "type": "keyword_marker", "keywords": ["उदाहरण"] }, "hindi_stemmer": { "type": "stemmer", "language": "hindi" } }, "analyzer": { "rebuilt_hindi": { "tokenizer": "standard", "filter": [ "lowercase", "decimal_digit", "hindi_keywords", "indic_normalization", "hindi_normalization", "hindi_stop", "hindi_stemmer" ] } } } } }
hungarian
analyzer
editThe hungarian
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="hungarian_example", settings={ "analysis": { "filter": { "hungarian_stop": { "type": "stop", "stopwords": "_hungarian_" }, "hungarian_keywords": { "type": "keyword_marker", "keywords": [ "példa" ] }, "hungarian_stemmer": { "type": "stemmer", "language": "hungarian" } }, "analyzer": { "rebuilt_hungarian": { "tokenizer": "standard", "filter": [ "lowercase", "hungarian_stop", "hungarian_keywords", "hungarian_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'hungarian_example', body: { settings: { analysis: { filter: { hungarian_stop: { type: 'stop', stopwords: '_hungarian_' }, hungarian_keywords: { type: 'keyword_marker', keywords: [ 'példa' ] }, hungarian_stemmer: { type: 'stemmer', language: 'hungarian' } }, analyzer: { rebuilt_hungarian: { tokenizer: 'standard', filter: [ 'lowercase', 'hungarian_stop', 'hungarian_keywords', 'hungarian_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "hungarian_example", settings: { analysis: { filter: { hungarian_stop: { type: "stop", stopwords: "_hungarian_", }, hungarian_keywords: { type: "keyword_marker", keywords: ["példa"], }, hungarian_stemmer: { type: "stemmer", language: "hungarian", }, }, analyzer: { rebuilt_hungarian: { tokenizer: "standard", filter: [ "lowercase", "hungarian_stop", "hungarian_keywords", "hungarian_stemmer", ], }, }, }, }, }); console.log(response);
PUT /hungarian_example { "settings": { "analysis": { "filter": { "hungarian_stop": { "type": "stop", "stopwords": "_hungarian_" }, "hungarian_keywords": { "type": "keyword_marker", "keywords": ["példa"] }, "hungarian_stemmer": { "type": "stemmer", "language": "hungarian" } }, "analyzer": { "rebuilt_hungarian": { "tokenizer": "standard", "filter": [ "lowercase", "hungarian_stop", "hungarian_keywords", "hungarian_stemmer" ] } } } } }
indonesian
analyzer
editThe indonesian
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="indonesian_example", settings={ "analysis": { "filter": { "indonesian_stop": { "type": "stop", "stopwords": "_indonesian_" }, "indonesian_keywords": { "type": "keyword_marker", "keywords": [ "contoh" ] }, "indonesian_stemmer": { "type": "stemmer", "language": "indonesian" } }, "analyzer": { "rebuilt_indonesian": { "tokenizer": "standard", "filter": [ "lowercase", "indonesian_stop", "indonesian_keywords", "indonesian_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'indonesian_example', body: { settings: { analysis: { filter: { indonesian_stop: { type: 'stop', stopwords: '_indonesian_' }, indonesian_keywords: { type: 'keyword_marker', keywords: [ 'contoh' ] }, indonesian_stemmer: { type: 'stemmer', language: 'indonesian' } }, analyzer: { rebuilt_indonesian: { tokenizer: 'standard', filter: [ 'lowercase', 'indonesian_stop', 'indonesian_keywords', 'indonesian_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "indonesian_example", settings: { analysis: { filter: { indonesian_stop: { type: "stop", stopwords: "_indonesian_", }, indonesian_keywords: { type: "keyword_marker", keywords: ["contoh"], }, indonesian_stemmer: { type: "stemmer", language: "indonesian", }, }, analyzer: { rebuilt_indonesian: { tokenizer: "standard", filter: [ "lowercase", "indonesian_stop", "indonesian_keywords", "indonesian_stemmer", ], }, }, }, }, }); console.log(response);
PUT /indonesian_example { "settings": { "analysis": { "filter": { "indonesian_stop": { "type": "stop", "stopwords": "_indonesian_" }, "indonesian_keywords": { "type": "keyword_marker", "keywords": ["contoh"] }, "indonesian_stemmer": { "type": "stemmer", "language": "indonesian" } }, "analyzer": { "rebuilt_indonesian": { "tokenizer": "standard", "filter": [ "lowercase", "indonesian_stop", "indonesian_keywords", "indonesian_stemmer" ] } } } } }
irish
analyzer
editThe irish
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="irish_example", settings={ "analysis": { "filter": { "irish_hyphenation": { "type": "stop", "stopwords": [ "h", "n", "t" ], "ignore_case": True }, "irish_elision": { "type": "elision", "articles": [ "d", "m", "b" ], "articles_case": True }, "irish_stop": { "type": "stop", "stopwords": "_irish_" }, "irish_lowercase": { "type": "lowercase", "language": "irish" }, "irish_keywords": { "type": "keyword_marker", "keywords": [ "sampla" ] }, "irish_stemmer": { "type": "stemmer", "language": "irish" } }, "analyzer": { "rebuilt_irish": { "tokenizer": "standard", "filter": [ "irish_hyphenation", "irish_elision", "irish_lowercase", "irish_stop", "irish_keywords", "irish_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'irish_example', body: { settings: { analysis: { filter: { irish_hyphenation: { type: 'stop', stopwords: [ 'h', 'n', 't' ], ignore_case: true }, irish_elision: { type: 'elision', articles: [ 'd', 'm', 'b' ], articles_case: true }, irish_stop: { type: 'stop', stopwords: '_irish_' }, irish_lowercase: { type: 'lowercase', language: 'irish' }, irish_keywords: { type: 'keyword_marker', keywords: [ 'sampla' ] }, irish_stemmer: { type: 'stemmer', language: 'irish' } }, analyzer: { rebuilt_irish: { tokenizer: 'standard', filter: [ 'irish_hyphenation', 'irish_elision', 'irish_lowercase', 'irish_stop', 'irish_keywords', 'irish_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "irish_example", settings: { analysis: { filter: { irish_hyphenation: { type: "stop", stopwords: ["h", "n", "t"], ignore_case: true, }, irish_elision: { type: "elision", articles: ["d", "m", "b"], articles_case: true, }, irish_stop: { type: "stop", stopwords: "_irish_", }, irish_lowercase: { type: "lowercase", language: "irish", }, irish_keywords: { type: "keyword_marker", keywords: ["sampla"], }, irish_stemmer: { type: "stemmer", language: "irish", }, }, analyzer: { rebuilt_irish: { tokenizer: "standard", filter: [ "irish_hyphenation", "irish_elision", "irish_lowercase", "irish_stop", "irish_keywords", "irish_stemmer", ], }, }, }, }, }); console.log(response);
PUT /irish_example { "settings": { "analysis": { "filter": { "irish_hyphenation": { "type": "stop", "stopwords": [ "h", "n", "t" ], "ignore_case": true }, "irish_elision": { "type": "elision", "articles": [ "d", "m", "b" ], "articles_case": true }, "irish_stop": { "type": "stop", "stopwords": "_irish_" }, "irish_lowercase": { "type": "lowercase", "language": "irish" }, "irish_keywords": { "type": "keyword_marker", "keywords": ["sampla"] }, "irish_stemmer": { "type": "stemmer", "language": "irish" } }, "analyzer": { "rebuilt_irish": { "tokenizer": "standard", "filter": [ "irish_hyphenation", "irish_elision", "irish_lowercase", "irish_stop", "irish_keywords", "irish_stemmer" ] } } } } }
italian
analyzer
editThe italian
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="italian_example", settings={ "analysis": { "filter": { "italian_elision": { "type": "elision", "articles": [ "c", "l", "all", "dall", "dell", "nell", "sull", "coll", "pell", "gl", "agl", "dagl", "degl", "negl", "sugl", "un", "m", "t", "s", "v", "d" ], "articles_case": True }, "italian_stop": { "type": "stop", "stopwords": "_italian_" }, "italian_keywords": { "type": "keyword_marker", "keywords": [ "esempio" ] }, "italian_stemmer": { "type": "stemmer", "language": "light_italian" } }, "analyzer": { "rebuilt_italian": { "tokenizer": "standard", "filter": [ "italian_elision", "lowercase", "italian_stop", "italian_keywords", "italian_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'italian_example', body: { settings: { analysis: { filter: { italian_elision: { type: 'elision', articles: [ 'c', 'l', 'all', 'dall', 'dell', 'nell', 'sull', 'coll', 'pell', 'gl', 'agl', 'dagl', 'degl', 'negl', 'sugl', 'un', 'm', 't', 's', 'v', 'd' ], articles_case: true }, italian_stop: { type: 'stop', stopwords: '_italian_' }, italian_keywords: { type: 'keyword_marker', keywords: [ 'esempio' ] }, italian_stemmer: { type: 'stemmer', language: 'light_italian' } }, analyzer: { rebuilt_italian: { tokenizer: 'standard', filter: [ 'italian_elision', 'lowercase', 'italian_stop', 'italian_keywords', 'italian_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "italian_example", settings: { analysis: { filter: { italian_elision: { type: "elision", articles: [ "c", "l", "all", "dall", "dell", "nell", "sull", "coll", "pell", "gl", "agl", "dagl", "degl", "negl", "sugl", "un", "m", "t", "s", "v", "d", ], articles_case: true, }, italian_stop: { type: "stop", stopwords: "_italian_", }, italian_keywords: { type: "keyword_marker", keywords: ["esempio"], }, italian_stemmer: { type: "stemmer", language: "light_italian", }, }, analyzer: { rebuilt_italian: { tokenizer: "standard", filter: [ "italian_elision", "lowercase", "italian_stop", "italian_keywords", "italian_stemmer", ], }, }, }, }, }); console.log(response);
PUT /italian_example { "settings": { "analysis": { "filter": { "italian_elision": { "type": "elision", "articles": [ "c", "l", "all", "dall", "dell", "nell", "sull", "coll", "pell", "gl", "agl", "dagl", "degl", "negl", "sugl", "un", "m", "t", "s", "v", "d" ], "articles_case": true }, "italian_stop": { "type": "stop", "stopwords": "_italian_" }, "italian_keywords": { "type": "keyword_marker", "keywords": ["esempio"] }, "italian_stemmer": { "type": "stemmer", "language": "light_italian" } }, "analyzer": { "rebuilt_italian": { "tokenizer": "standard", "filter": [ "italian_elision", "lowercase", "italian_stop", "italian_keywords", "italian_stemmer" ] } } } } }
latvian
analyzer
editThe latvian
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="latvian_example", settings={ "analysis": { "filter": { "latvian_stop": { "type": "stop", "stopwords": "_latvian_" }, "latvian_keywords": { "type": "keyword_marker", "keywords": [ "piemērs" ] }, "latvian_stemmer": { "type": "stemmer", "language": "latvian" } }, "analyzer": { "rebuilt_latvian": { "tokenizer": "standard", "filter": [ "lowercase", "latvian_stop", "latvian_keywords", "latvian_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'latvian_example', body: { settings: { analysis: { filter: { latvian_stop: { type: 'stop', stopwords: '_latvian_' }, latvian_keywords: { type: 'keyword_marker', keywords: [ 'piemērs' ] }, latvian_stemmer: { type: 'stemmer', language: 'latvian' } }, analyzer: { rebuilt_latvian: { tokenizer: 'standard', filter: [ 'lowercase', 'latvian_stop', 'latvian_keywords', 'latvian_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "latvian_example", settings: { analysis: { filter: { latvian_stop: { type: "stop", stopwords: "_latvian_", }, latvian_keywords: { type: "keyword_marker", keywords: ["piemērs"], }, latvian_stemmer: { type: "stemmer", language: "latvian", }, }, analyzer: { rebuilt_latvian: { tokenizer: "standard", filter: [ "lowercase", "latvian_stop", "latvian_keywords", "latvian_stemmer", ], }, }, }, }, }); console.log(response);
PUT /latvian_example { "settings": { "analysis": { "filter": { "latvian_stop": { "type": "stop", "stopwords": "_latvian_" }, "latvian_keywords": { "type": "keyword_marker", "keywords": ["piemērs"] }, "latvian_stemmer": { "type": "stemmer", "language": "latvian" } }, "analyzer": { "rebuilt_latvian": { "tokenizer": "standard", "filter": [ "lowercase", "latvian_stop", "latvian_keywords", "latvian_stemmer" ] } } } } }
lithuanian
analyzer
editThe lithuanian
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="lithuanian_example", settings={ "analysis": { "filter": { "lithuanian_stop": { "type": "stop", "stopwords": "_lithuanian_" }, "lithuanian_keywords": { "type": "keyword_marker", "keywords": [ "pavyzdys" ] }, "lithuanian_stemmer": { "type": "stemmer", "language": "lithuanian" } }, "analyzer": { "rebuilt_lithuanian": { "tokenizer": "standard", "filter": [ "lowercase", "lithuanian_stop", "lithuanian_keywords", "lithuanian_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'lithuanian_example', body: { settings: { analysis: { filter: { lithuanian_stop: { type: 'stop', stopwords: '_lithuanian_' }, lithuanian_keywords: { type: 'keyword_marker', keywords: [ 'pavyzdys' ] }, lithuanian_stemmer: { type: 'stemmer', language: 'lithuanian' } }, analyzer: { rebuilt_lithuanian: { tokenizer: 'standard', filter: [ 'lowercase', 'lithuanian_stop', 'lithuanian_keywords', 'lithuanian_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "lithuanian_example", settings: { analysis: { filter: { lithuanian_stop: { type: "stop", stopwords: "_lithuanian_", }, lithuanian_keywords: { type: "keyword_marker", keywords: ["pavyzdys"], }, lithuanian_stemmer: { type: "stemmer", language: "lithuanian", }, }, analyzer: { rebuilt_lithuanian: { tokenizer: "standard", filter: [ "lowercase", "lithuanian_stop", "lithuanian_keywords", "lithuanian_stemmer", ], }, }, }, }, }); console.log(response);
PUT /lithuanian_example { "settings": { "analysis": { "filter": { "lithuanian_stop": { "type": "stop", "stopwords": "_lithuanian_" }, "lithuanian_keywords": { "type": "keyword_marker", "keywords": ["pavyzdys"] }, "lithuanian_stemmer": { "type": "stemmer", "language": "lithuanian" } }, "analyzer": { "rebuilt_lithuanian": { "tokenizer": "standard", "filter": [ "lowercase", "lithuanian_stop", "lithuanian_keywords", "lithuanian_stemmer" ] } } } } }
norwegian
analyzer
editThe norwegian
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="norwegian_example", settings={ "analysis": { "filter": { "norwegian_stop": { "type": "stop", "stopwords": "_norwegian_" }, "norwegian_keywords": { "type": "keyword_marker", "keywords": [ "eksempel" ] }, "norwegian_stemmer": { "type": "stemmer", "language": "norwegian" } }, "analyzer": { "rebuilt_norwegian": { "tokenizer": "standard", "filter": [ "lowercase", "norwegian_stop", "norwegian_keywords", "norwegian_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'norwegian_example', body: { settings: { analysis: { filter: { norwegian_stop: { type: 'stop', stopwords: '_norwegian_' }, norwegian_keywords: { type: 'keyword_marker', keywords: [ 'eksempel' ] }, norwegian_stemmer: { type: 'stemmer', language: 'norwegian' } }, analyzer: { rebuilt_norwegian: { tokenizer: 'standard', filter: [ 'lowercase', 'norwegian_stop', 'norwegian_keywords', 'norwegian_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "norwegian_example", settings: { analysis: { filter: { norwegian_stop: { type: "stop", stopwords: "_norwegian_", }, norwegian_keywords: { type: "keyword_marker", keywords: ["eksempel"], }, norwegian_stemmer: { type: "stemmer", language: "norwegian", }, }, analyzer: { rebuilt_norwegian: { tokenizer: "standard", filter: [ "lowercase", "norwegian_stop", "norwegian_keywords", "norwegian_stemmer", ], }, }, }, }, }); console.log(response);
PUT /norwegian_example { "settings": { "analysis": { "filter": { "norwegian_stop": { "type": "stop", "stopwords": "_norwegian_" }, "norwegian_keywords": { "type": "keyword_marker", "keywords": ["eksempel"] }, "norwegian_stemmer": { "type": "stemmer", "language": "norwegian" } }, "analyzer": { "rebuilt_norwegian": { "tokenizer": "standard", "filter": [ "lowercase", "norwegian_stop", "norwegian_keywords", "norwegian_stemmer" ] } } } } }
persian
analyzer
editThe persian
analyzer could be reimplemented as a custom
analyzer as follows:
response = client.indices.create( index: 'persian_example', body: { settings: { analysis: { char_filter: { zero_width_spaces: { type: 'mapping', mappings: [ '\\u200C=>\\u0020' ] } }, filter: { persian_stop: { type: 'stop', stopwords: '_persian_' } }, analyzer: { rebuilt_persian: { tokenizer: 'standard', char_filter: [ 'zero_width_spaces' ], filter: [ 'lowercase', 'decimal_digit', 'arabic_normalization', 'persian_normalization', 'persian_stop' ] } } } } } ) puts response
PUT /persian_example { "settings": { "analysis": { "char_filter": { "zero_width_spaces": { "type": "mapping", "mappings": [ "\\u200C=>\\u0020"] } }, "filter": { "persian_stop": { "type": "stop", "stopwords": "_persian_" } }, "analyzer": { "rebuilt_persian": { "tokenizer": "standard", "char_filter": [ "zero_width_spaces" ], "filter": [ "lowercase", "decimal_digit", "arabic_normalization", "persian_normalization", "persian_stop" ] } } } } }
portuguese
analyzer
editThe portuguese
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="portuguese_example", settings={ "analysis": { "filter": { "portuguese_stop": { "type": "stop", "stopwords": "_portuguese_" }, "portuguese_keywords": { "type": "keyword_marker", "keywords": [ "exemplo" ] }, "portuguese_stemmer": { "type": "stemmer", "language": "light_portuguese" } }, "analyzer": { "rebuilt_portuguese": { "tokenizer": "standard", "filter": [ "lowercase", "portuguese_stop", "portuguese_keywords", "portuguese_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'portuguese_example', body: { settings: { analysis: { filter: { portuguese_stop: { type: 'stop', stopwords: '_portuguese_' }, portuguese_keywords: { type: 'keyword_marker', keywords: [ 'exemplo' ] }, portuguese_stemmer: { type: 'stemmer', language: 'light_portuguese' } }, analyzer: { rebuilt_portuguese: { tokenizer: 'standard', filter: [ 'lowercase', 'portuguese_stop', 'portuguese_keywords', 'portuguese_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "portuguese_example", settings: { analysis: { filter: { portuguese_stop: { type: "stop", stopwords: "_portuguese_", }, portuguese_keywords: { type: "keyword_marker", keywords: ["exemplo"], }, portuguese_stemmer: { type: "stemmer", language: "light_portuguese", }, }, analyzer: { rebuilt_portuguese: { tokenizer: "standard", filter: [ "lowercase", "portuguese_stop", "portuguese_keywords", "portuguese_stemmer", ], }, }, }, }, }); console.log(response);
PUT /portuguese_example { "settings": { "analysis": { "filter": { "portuguese_stop": { "type": "stop", "stopwords": "_portuguese_" }, "portuguese_keywords": { "type": "keyword_marker", "keywords": ["exemplo"] }, "portuguese_stemmer": { "type": "stemmer", "language": "light_portuguese" } }, "analyzer": { "rebuilt_portuguese": { "tokenizer": "standard", "filter": [ "lowercase", "portuguese_stop", "portuguese_keywords", "portuguese_stemmer" ] } } } } }
romanian
analyzer
editThe romanian
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="romanian_example", settings={ "analysis": { "filter": { "romanian_stop": { "type": "stop", "stopwords": "_romanian_" }, "romanian_keywords": { "type": "keyword_marker", "keywords": [ "exemplu" ] }, "romanian_stemmer": { "type": "stemmer", "language": "romanian" } }, "analyzer": { "rebuilt_romanian": { "tokenizer": "standard", "filter": [ "lowercase", "romanian_stop", "romanian_keywords", "romanian_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'romanian_example', body: { settings: { analysis: { filter: { romanian_stop: { type: 'stop', stopwords: '_romanian_' }, romanian_keywords: { type: 'keyword_marker', keywords: [ 'exemplu' ] }, romanian_stemmer: { type: 'stemmer', language: 'romanian' } }, analyzer: { rebuilt_romanian: { tokenizer: 'standard', filter: [ 'lowercase', 'romanian_stop', 'romanian_keywords', 'romanian_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "romanian_example", settings: { analysis: { filter: { romanian_stop: { type: "stop", stopwords: "_romanian_", }, romanian_keywords: { type: "keyword_marker", keywords: ["exemplu"], }, romanian_stemmer: { type: "stemmer", language: "romanian", }, }, analyzer: { rebuilt_romanian: { tokenizer: "standard", filter: [ "lowercase", "romanian_stop", "romanian_keywords", "romanian_stemmer", ], }, }, }, }, }); console.log(response);
PUT /romanian_example { "settings": { "analysis": { "filter": { "romanian_stop": { "type": "stop", "stopwords": "_romanian_" }, "romanian_keywords": { "type": "keyword_marker", "keywords": ["exemplu"] }, "romanian_stemmer": { "type": "stemmer", "language": "romanian" } }, "analyzer": { "rebuilt_romanian": { "tokenizer": "standard", "filter": [ "lowercase", "romanian_stop", "romanian_keywords", "romanian_stemmer" ] } } } } }
russian
analyzer
editThe russian
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="russian_example", settings={ "analysis": { "filter": { "russian_stop": { "type": "stop", "stopwords": "_russian_" }, "russian_keywords": { "type": "keyword_marker", "keywords": [ "пример" ] }, "russian_stemmer": { "type": "stemmer", "language": "russian" } }, "analyzer": { "rebuilt_russian": { "tokenizer": "standard", "filter": [ "lowercase", "russian_stop", "russian_keywords", "russian_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'russian_example', body: { settings: { analysis: { filter: { russian_stop: { type: 'stop', stopwords: '_russian_' }, russian_keywords: { type: 'keyword_marker', keywords: [ 'пример' ] }, russian_stemmer: { type: 'stemmer', language: 'russian' } }, analyzer: { rebuilt_russian: { tokenizer: 'standard', filter: [ 'lowercase', 'russian_stop', 'russian_keywords', 'russian_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "russian_example", settings: { analysis: { filter: { russian_stop: { type: "stop", stopwords: "_russian_", }, russian_keywords: { type: "keyword_marker", keywords: ["пример"], }, russian_stemmer: { type: "stemmer", language: "russian", }, }, analyzer: { rebuilt_russian: { tokenizer: "standard", filter: [ "lowercase", "russian_stop", "russian_keywords", "russian_stemmer", ], }, }, }, }, }); console.log(response);
PUT /russian_example { "settings": { "analysis": { "filter": { "russian_stop": { "type": "stop", "stopwords": "_russian_" }, "russian_keywords": { "type": "keyword_marker", "keywords": ["пример"] }, "russian_stemmer": { "type": "stemmer", "language": "russian" } }, "analyzer": { "rebuilt_russian": { "tokenizer": "standard", "filter": [ "lowercase", "russian_stop", "russian_keywords", "russian_stemmer" ] } } } } }
serbian
analyzer
editThe serbian
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="serbian_example", settings={ "analysis": { "filter": { "serbian_stop": { "type": "stop", "stopwords": "_serbian_" }, "serbian_keywords": { "type": "keyword_marker", "keywords": [ "пример" ] }, "serbian_stemmer": { "type": "stemmer", "language": "serbian" } }, "analyzer": { "rebuilt_serbian": { "tokenizer": "standard", "filter": [ "lowercase", "serbian_stop", "serbian_keywords", "serbian_stemmer", "serbian_normalization" ] } } } }, ) print(resp)
response = client.indices.create( index: 'serbian_example', body: { settings: { analysis: { filter: { serbian_stop: { type: 'stop', stopwords: '_serbian_' }, serbian_keywords: { type: 'keyword_marker', keywords: [ 'пример' ] }, serbian_stemmer: { type: 'stemmer', language: 'serbian' } }, analyzer: { rebuilt_serbian: { tokenizer: 'standard', filter: [ 'lowercase', 'serbian_stop', 'serbian_keywords', 'serbian_stemmer', 'serbian_normalization' ] } } } } } ) puts response
const response = await client.indices.create({ index: "serbian_example", settings: { analysis: { filter: { serbian_stop: { type: "stop", stopwords: "_serbian_", }, serbian_keywords: { type: "keyword_marker", keywords: ["пример"], }, serbian_stemmer: { type: "stemmer", language: "serbian", }, }, analyzer: { rebuilt_serbian: { tokenizer: "standard", filter: [ "lowercase", "serbian_stop", "serbian_keywords", "serbian_stemmer", "serbian_normalization", ], }, }, }, }, }); console.log(response);
PUT /serbian_example { "settings": { "analysis": { "filter": { "serbian_stop": { "type": "stop", "stopwords": "_serbian_" }, "serbian_keywords": { "type": "keyword_marker", "keywords": ["пример"] }, "serbian_stemmer": { "type": "stemmer", "language": "serbian" } }, "analyzer": { "rebuilt_serbian": { "tokenizer": "standard", "filter": [ "lowercase", "serbian_stop", "serbian_keywords", "serbian_stemmer", "serbian_normalization" ] } } } } }
sorani
analyzer
editThe sorani
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="sorani_example", settings={ "analysis": { "filter": { "sorani_stop": { "type": "stop", "stopwords": "_sorani_" }, "sorani_keywords": { "type": "keyword_marker", "keywords": [ "mînak" ] }, "sorani_stemmer": { "type": "stemmer", "language": "sorani" } }, "analyzer": { "rebuilt_sorani": { "tokenizer": "standard", "filter": [ "sorani_normalization", "lowercase", "decimal_digit", "sorani_stop", "sorani_keywords", "sorani_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'sorani_example', body: { settings: { analysis: { filter: { sorani_stop: { type: 'stop', stopwords: '_sorani_' }, sorani_keywords: { type: 'keyword_marker', keywords: [ 'mînak' ] }, sorani_stemmer: { type: 'stemmer', language: 'sorani' } }, analyzer: { rebuilt_sorani: { tokenizer: 'standard', filter: [ 'sorani_normalization', 'lowercase', 'decimal_digit', 'sorani_stop', 'sorani_keywords', 'sorani_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "sorani_example", settings: { analysis: { filter: { sorani_stop: { type: "stop", stopwords: "_sorani_", }, sorani_keywords: { type: "keyword_marker", keywords: ["mînak"], }, sorani_stemmer: { type: "stemmer", language: "sorani", }, }, analyzer: { rebuilt_sorani: { tokenizer: "standard", filter: [ "sorani_normalization", "lowercase", "decimal_digit", "sorani_stop", "sorani_keywords", "sorani_stemmer", ], }, }, }, }, }); console.log(response);
PUT /sorani_example { "settings": { "analysis": { "filter": { "sorani_stop": { "type": "stop", "stopwords": "_sorani_" }, "sorani_keywords": { "type": "keyword_marker", "keywords": ["mînak"] }, "sorani_stemmer": { "type": "stemmer", "language": "sorani" } }, "analyzer": { "rebuilt_sorani": { "tokenizer": "standard", "filter": [ "sorani_normalization", "lowercase", "decimal_digit", "sorani_stop", "sorani_keywords", "sorani_stemmer" ] } } } } }
spanish
analyzer
editThe spanish
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="spanish_example", settings={ "analysis": { "filter": { "spanish_stop": { "type": "stop", "stopwords": "_spanish_" }, "spanish_keywords": { "type": "keyword_marker", "keywords": [ "ejemplo" ] }, "spanish_stemmer": { "type": "stemmer", "language": "light_spanish" } }, "analyzer": { "rebuilt_spanish": { "tokenizer": "standard", "filter": [ "lowercase", "spanish_stop", "spanish_keywords", "spanish_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'spanish_example', body: { settings: { analysis: { filter: { spanish_stop: { type: 'stop', stopwords: '_spanish_' }, spanish_keywords: { type: 'keyword_marker', keywords: [ 'ejemplo' ] }, spanish_stemmer: { type: 'stemmer', language: 'light_spanish' } }, analyzer: { rebuilt_spanish: { tokenizer: 'standard', filter: [ 'lowercase', 'spanish_stop', 'spanish_keywords', 'spanish_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "spanish_example", settings: { analysis: { filter: { spanish_stop: { type: "stop", stopwords: "_spanish_", }, spanish_keywords: { type: "keyword_marker", keywords: ["ejemplo"], }, spanish_stemmer: { type: "stemmer", language: "light_spanish", }, }, analyzer: { rebuilt_spanish: { tokenizer: "standard", filter: [ "lowercase", "spanish_stop", "spanish_keywords", "spanish_stemmer", ], }, }, }, }, }); console.log(response);
PUT /spanish_example { "settings": { "analysis": { "filter": { "spanish_stop": { "type": "stop", "stopwords": "_spanish_" }, "spanish_keywords": { "type": "keyword_marker", "keywords": ["ejemplo"] }, "spanish_stemmer": { "type": "stemmer", "language": "light_spanish" } }, "analyzer": { "rebuilt_spanish": { "tokenizer": "standard", "filter": [ "lowercase", "spanish_stop", "spanish_keywords", "spanish_stemmer" ] } } } } }
swedish
analyzer
editThe swedish
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="swedish_example", settings={ "analysis": { "filter": { "swedish_stop": { "type": "stop", "stopwords": "_swedish_" }, "swedish_keywords": { "type": "keyword_marker", "keywords": [ "exempel" ] }, "swedish_stemmer": { "type": "stemmer", "language": "swedish" } }, "analyzer": { "rebuilt_swedish": { "tokenizer": "standard", "filter": [ "lowercase", "swedish_stop", "swedish_keywords", "swedish_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'swedish_example', body: { settings: { analysis: { filter: { swedish_stop: { type: 'stop', stopwords: '_swedish_' }, swedish_keywords: { type: 'keyword_marker', keywords: [ 'exempel' ] }, swedish_stemmer: { type: 'stemmer', language: 'swedish' } }, analyzer: { rebuilt_swedish: { tokenizer: 'standard', filter: [ 'lowercase', 'swedish_stop', 'swedish_keywords', 'swedish_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "swedish_example", settings: { analysis: { filter: { swedish_stop: { type: "stop", stopwords: "_swedish_", }, swedish_keywords: { type: "keyword_marker", keywords: ["exempel"], }, swedish_stemmer: { type: "stemmer", language: "swedish", }, }, analyzer: { rebuilt_swedish: { tokenizer: "standard", filter: [ "lowercase", "swedish_stop", "swedish_keywords", "swedish_stemmer", ], }, }, }, }, }); console.log(response);
PUT /swedish_example { "settings": { "analysis": { "filter": { "swedish_stop": { "type": "stop", "stopwords": "_swedish_" }, "swedish_keywords": { "type": "keyword_marker", "keywords": ["exempel"] }, "swedish_stemmer": { "type": "stemmer", "language": "swedish" } }, "analyzer": { "rebuilt_swedish": { "tokenizer": "standard", "filter": [ "lowercase", "swedish_stop", "swedish_keywords", "swedish_stemmer" ] } } } } }
turkish
analyzer
editThe turkish
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="turkish_example", settings={ "analysis": { "filter": { "turkish_stop": { "type": "stop", "stopwords": "_turkish_" }, "turkish_lowercase": { "type": "lowercase", "language": "turkish" }, "turkish_keywords": { "type": "keyword_marker", "keywords": [ "örnek" ] }, "turkish_stemmer": { "type": "stemmer", "language": "turkish" } }, "analyzer": { "rebuilt_turkish": { "tokenizer": "standard", "filter": [ "apostrophe", "turkish_lowercase", "turkish_stop", "turkish_keywords", "turkish_stemmer" ] } } } }, ) print(resp)
response = client.indices.create( index: 'turkish_example', body: { settings: { analysis: { filter: { turkish_stop: { type: 'stop', stopwords: '_turkish_' }, turkish_lowercase: { type: 'lowercase', language: 'turkish' }, turkish_keywords: { type: 'keyword_marker', keywords: [ 'örnek' ] }, turkish_stemmer: { type: 'stemmer', language: 'turkish' } }, analyzer: { rebuilt_turkish: { tokenizer: 'standard', filter: [ 'apostrophe', 'turkish_lowercase', 'turkish_stop', 'turkish_keywords', 'turkish_stemmer' ] } } } } } ) puts response
const response = await client.indices.create({ index: "turkish_example", settings: { analysis: { filter: { turkish_stop: { type: "stop", stopwords: "_turkish_", }, turkish_lowercase: { type: "lowercase", language: "turkish", }, turkish_keywords: { type: "keyword_marker", keywords: ["örnek"], }, turkish_stemmer: { type: "stemmer", language: "turkish", }, }, analyzer: { rebuilt_turkish: { tokenizer: "standard", filter: [ "apostrophe", "turkish_lowercase", "turkish_stop", "turkish_keywords", "turkish_stemmer", ], }, }, }, }, }); console.log(response);
PUT /turkish_example { "settings": { "analysis": { "filter": { "turkish_stop": { "type": "stop", "stopwords": "_turkish_" }, "turkish_lowercase": { "type": "lowercase", "language": "turkish" }, "turkish_keywords": { "type": "keyword_marker", "keywords": ["örnek"] }, "turkish_stemmer": { "type": "stemmer", "language": "turkish" } }, "analyzer": { "rebuilt_turkish": { "tokenizer": "standard", "filter": [ "apostrophe", "turkish_lowercase", "turkish_stop", "turkish_keywords", "turkish_stemmer" ] } } } } }
thai
analyzer
editThe thai
analyzer could be reimplemented as a custom
analyzer as follows:
resp = client.indices.create( index="thai_example", settings={ "analysis": { "filter": { "thai_stop": { "type": "stop", "stopwords": "_thai_" } }, "analyzer": { "rebuilt_thai": { "tokenizer": "thai", "filter": [ "lowercase", "decimal_digit", "thai_stop" ] } } } }, ) print(resp)
response = client.indices.create( index: 'thai_example', body: { settings: { analysis: { filter: { thai_stop: { type: 'stop', stopwords: '_thai_' } }, analyzer: { rebuilt_thai: { tokenizer: 'thai', filter: [ 'lowercase', 'decimal_digit', 'thai_stop' ] } } } } } ) puts response
const response = await client.indices.create({ index: "thai_example", settings: { analysis: { filter: { thai_stop: { type: "stop", stopwords: "_thai_", }, }, analyzer: { rebuilt_thai: { tokenizer: "thai", filter: ["lowercase", "decimal_digit", "thai_stop"], }, }, }, }, }); console.log(response);