Reputation: 15
I am totally new to elastic search. So please forgive me if this is a stupid Question and my Questions might have been answered somewhere else already but I couldn't find it. I want to use Elastic Search as a search engine for PDF'S and docx's in my network. I used fscrawler to ingest the PDF's to elastic search. Since the documents I want to ingest are in several languages I wanted to use n-graming for stemming. To do so I wanted to update my mapping like this
PUT test/_mappings/_all
{
"mappings": {
"title": {
"properties": {
"title": {
"type": "text",
"fields": {
"de": {
"type": "string",
"analyzer": "german"
},
"en": {
"type": "string",
"analyzer": "english"
},
"general": {
"type": "string",
"analyzer": "trigrams"
}
}
}
}
}
}
}
And now I get this Errormessage
{ "error": { "root_cause": [ { "type": "mapper_parsing_exception", "reason": "Root mapping definition has unsupported parameters: [mappings : {title={properties={title={type=text, fields={de={type=string, analyzer=german}, en={type=string, analyzer=english}, general={type=string, analyzer=trigrams}}}}}}]" } ], "type": "mapper_parsing_exception", "reason": "Root mapping definition has unsupported parameters: [mappings : {title={properties={title={type=text, fields={de={type=string, analyzer=german}, en={type=string, analyzer=english}, general={type=string, analyzer=trigrams}}}}}}]"
}, "status": 400 }
Do you have any idea how i can fix this? Or do you have an idea how I can ingest the files with the right mapping without using fscrawler?
Upvotes: 0
Views: 685
Reputation: 15
My mapping
{
"test": {
"mappings": {
"_doc": {
"dynamic_templates": [
{
"raw_as_text": {
"path_match": "meta.raw.*",
"mapping": {
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
},
"type": "text"
}
}
}
],
"properties": {
"attachment": {
"type": "binary"
},
"attributes": {
"properties": {
"group": {
"type": "keyword"
},
"owner": {
"type": "keyword"
}
}
},
"content": {
"type": "text"
},
"file": {
"properties": {
"checksum": {
"type": "keyword"
},
"content_type": {
"type": "keyword"
},
"created": {
"type": "date",
"format": "dateOptionalTime"
},
"extension": {
"type": "keyword"
},
"filename": {
"type": "keyword",
"store": true
},
"filesize": {
"type": "long"
},
"indexed_chars": {
"type": "long"
},
"indexing_date": {
"type": "date",
"format": "dateOptionalTime"
},
"last_accessed": {
"type": "date",
"format": "dateOptionalTime"
},
"last_modified": {
"type": "date",
"format": "dateOptionalTime"
},
"url": {
"type": "keyword",
"index": false
}
}
},
"meta": {
"properties": {
"altitude": {
"type": "text"
},
"author": {
"type": "text"
},
"comments": {
"type": "text"
},
"contributor": {
"type": "text"
},
"coverage": {
"type": "text"
},
"created": {
"type": "date",
"format": "dateOptionalTime"
},
"creator_tool": {
"type": "keyword"
},
"date": {
"type": "date",
"format": "dateOptionalTime"
},
"description": {
"type": "text"
},
"format": {
"type": "text"
},
"identifier": {
"type": "text"
},
"keywords": {
"type": "text"
},
"language": {
"type": "keyword"
},
"latitude": {
"type": "text"
},
"longitude": {
"type": "text"
},
"metadata_date": {
"type": "date",
"format": "dateOptionalTime"
},
"modifier": {
"type": "text"
},
"print_date": {
"type": "date",
"format": "dateOptionalTime"
},
"publisher": {
"type": "text"
},
"rating": {
"type": "byte"
},
"relation": {
"type": "text"
},
"rights": {
"type": "text"
},
"source": {
"type": "text"
},
"title": {
"type": "text"
},
"type": {
"type": "text"
}
}
},
"path": {
"properties": {
"real": {
"type": "keyword",
"fields": {
"fulltext": {
"type": "text"
},
"tree": {
"type": "text",
"analyzer": "fscrawler_path",
"fielddata": true
}
}
},
"root": {
"type": "keyword"
},
"virtual": {
"type": "keyword",
"fields": {
"fulltext": {
"type": "text"
},
"tree": {
"type": "text",
"analyzer": "fscrawler_path",
"fielddata": true
}
}
}
}
}
}
}
}
}
}
Upvotes: 0
Reputation: 15
those are my settings
{
"test": {
"settings": {
"index": {
"mapping": {
"total_fields": {
"limit": "2000"
}
},
"number_of_shards": "5",
"provided_name": "test",
"creation_date": "1542031632596",
"analysis": {
"filter": {
"trigrams_filter": {
"type": "ngram",
"min_gram": "3",
"max_gram": "3"
}
},
"analyzer": {
"fscrawler_path": {
"tokenizer": "fscrawler_path"
},
"trigrams": {
"filter": [
"lowercase",
"trigrams_filter"
],
"type": "custom",
"tokenizer": "standard"
}
},
"tokenizer": {
"fscrawler_path": {
"type": "path_hierarchy"
}
}
},
"number_of_replicas": "1",
"uuid": "7L3QE5_xRACECVbTFlFY-Q",
"version": {
"created": "6040399"
}
}
}
}
}
Upvotes: 0