Reputation: 1506
I am trying to transform an AVRO schema into an ElasticSearch index template. Both are JSON structured with a few things to check while transforming. I tried using recursion to get all the nested elements out and then pair them with their parents but writing to a dictionary while parsing deep with recursion compelled me to ask this question.
So basically I have this AVRO schema file:
{
"name": "animal",
"type": [
"null",
{
"type": "record",
"name": "zooAnimals",
"fields": [{
"name": "color",
"type": ["null", "string"],
"default": null
},
{
"name": "skinType",
"type": ["null", "string"],
"default": null
},
{
"name": "species",
"type": {
"type": "record",
"name": "AnimalSpecies",
"fields": [{
"name": "terrestrial",
"type": "string"
},
{
"name": "aquatic",
"type": "string"
}
]
}
},
{
"name": "behavior",
"type": [
"null",
{
"type": "record",
"name": "AnimalBehaviors",
"fields": [{
"name": "sound",
"type": ["null", "string"],
"default": null
},
{
"name": "hunt",
"type": ["null", "string"],
"default": null
}
]
}
],
"default": null
}
]
}
]
}
and I would like it to get transformed into this (Elasticsearch index template format):
{
"properties": {
"color" :{
"type" : "keyword"
},
"skinType" :{
"type" : "keyword"
},
"species" :{
"properties" : {
"terrestrial" : {
"type" : "keyword"
},
"aquatic" : {
"type" : "keyword"
},
}
},
"behavior" : {
"properties" : {
"sound" : {
"type" : "keyword"
},
"hunt" : {
"type" : "keyword"
}
}
}
}
}
Important Notes: The nesting on the AVRO schema could be furthermore nested and that's why I was thinking recursion to solve. Also, the type of the type
filed could be an Array
or a Map
as shown for behavior
vs. species
where behavior has an array and species has a map.
If you must see that I did my trial and error, here's my code that's not getting me anywhere:
const checkDataTypeFromObject = function (obj) {
if (Object.prototype.toString.call(obj) === "[object Array]") {
obj.map(function (item) {
if (Object.prototype.toString.call(item) === "[object Object]") {
// so this is an object that could contain further nested fields
dataType = item;
mappings.properties[item.name] = { "type" : item.type}
if (item.hasOwnProperty("fields")) {
checkDataTypeFromObject(item.fields);
} else if (item.hasOwnProperty("type")) {
checkDataTypeFromObject(item.type);
}
} else if (item === null) {
// discard the nulls, nothing to do here
} else {
// if not dict or null, this is the dataType we are looking for
dataType = item;
}
return item.name
});
Upvotes: 1
Views: 442
Reputation: 135406
We can break it down using inductive reasoning. The numbered points below correspond the the numbered comments in the code -
t
, is null, return an empty objectt
is not null. If t.type
is an object, transform
each leaf and sum into a single objectt
is not null and t.type
is not an object. If t.fields
is an object, transform
each leaf, assign to { [name]: ... }
, and sum into a single properties objectt
is not null and t.type
is not an object, and t.fields
is not an object. Return keyword.const transform = t =>
t === "null"
? {} // <- 1
: isObject(t.type)
? arr(t.type) // <- 2
.map(transform)
.reduce(assign, {})
: isObject(t.fields)
? { propertries: // <- 3
arr(t.fields)
.map(v => ({ [v.name]: transform(v) }))
.reduce(assign, {})
}
: { type: "keyword" } // <- 4
With a few helpers to keep complexity out of our way -
const assign = (t, u) =>
Object.assign(t, u)
const arr = t =>
Array.isArray(t) ? t : [t]
const isObject = t =>
Object(t) === t
Simply run the transform
-
console.log(transform(input))
Expand the snippet below to verify the result in your browser -
const assign = (t, u) =>
Object.assign(t, u)
const arr = t =>
Array.isArray(t) ? t : [t]
const isObject = t =>
Object(t) === t
const transform = t =>
t === "null"
? {}
: isObject(t.type)
? arr(t.type)
.map(transform)
.reduce(assign, {})
: isObject(t.fields)
? { propertries:
arr(t.fields)
.map(v => ({ [v.name]: transform(v) }))
.reduce(assign, {})
}
: { type: "keyword" }
const input =
{name: "animal", type: ["null", {type: "record", name: "zooAnimals", fields: [{name: "color", type: ["null", "string"], default: null}, {name: "skinType", type: ["null", "string"], default: null}, {name: "species", type: {type: "record", name: "AnimalSpecies", fields: [{name: "terrestrial", type: "string"}, {name: "aquatic", type: "string"}]}}, {name: "behavior", type: ["null", {type: "record", name: "AnimalBehaviors", fields: [{name: "sound", type: ["null", "string"], default: null}, {name: "hunt", type: ["null", "string"], default: null}]}], default: null}]}]}
console.log(transform(input))
Output -
{
"propertries": {
"color": {
"type": "keyword"
},
"skinType": {
"type": "keyword"
},
"species": {
"propertries": {
"terrestrial": {
"type": "keyword"
},
"aquatic": {
"type": "keyword"
}
}
},
"behavior": {
"propertries": {
"sound": {
"type": "keyword"
},
"hunt": {
"type": "keyword"
}
}
}
}
}
nota bene
In step 2 we could have a complex type
such as -
{ name: "foo"
, type: [ "null", { obj1 }, { obj2 }, ... ]
, ...
}
In such a case, obj1
and obj2
may each transform
into a { properties: ... }
object. Using .reduce(assign, {})
means properties of obj1
will be overwritten by properties of obj2
-
: isObject(t.type)
? arr(t.type)
.map(transform)
.reduce(assign, {}) // <- cannot simply use `assign`
To remedy this, we change step 2 to merge
complex types more intelligently -
: isObject(t.type)
? arr(t.type)
.map(transform)
.reduce(merge, {}) // <- define a more sophisticated merge
Where merge
could be something like -
const merge = (t, u) =>
t.properties && u.properties // <- both
? { properties: Object.assign(t.properties, u.properties) }
: t.properties // <- only t
? { properties: Object.assign(t.properties, u) }
: u.properties // <- only u
? { properties: Object.assign(t, u.properties) }
: Object.assign(t, u) // <- neither
Or the same merge
but using a different logical approach -
const merge = (t, u) =.
t.properties || u.properties // <- either
? { properties:
Object.assign
( t.properties || t
, u.properties || u
)
}
: Object.assign(t, u) // <- neither
Upvotes: 3
Reputation: 50807
I don't know your input format nor your output one. So this is probably incomplete. It captures your sample case, though, and it might serve as a baseline to which you can add clauses/conditions:
const convertField = ({name, type, fields}) =>
Array .isArray (type) && type [0] === 'null' && type [1] === 'string'
? [name, {type: 'keyword'}]
: Array .isArray (type) && type [0] === 'null' && Object (type [1]) === type [1]
? [name, {properties: Object .fromEntries (type [1] .fields .map (convertField))}]
: Object (type) === type
? [name, {properties: Object .fromEntries (type .fields .map (convertField))}]
: // else
[name, {type: 'keyword'}]
const convert = (obj) =>
convertField (obj) [1]
const input = {name: "animal", type: ["null", {type: "record", name: "zooAnimals", fields: [{name: "color", type: ["null", "string"], default: null}, {name: "skinType", type: ["null", "string"], default: null}, {name: "species", type: {type: "record", name: "AnimalSpecies", fields: [{name: "terrestrial", type: "string"}, {name: "aquatic", type: "string"}]}}, {name: "behavior", type: ["null", {type: "record", name: "AnimalBehaviors", fields: [{name: "sound", type: ["null", "string"], default: null}, {name: "hunt", type: ["null", "string"], default: null}]}], default: null}]}]}
console .log (convert (input))
.as-console-wrapper {min-height: 100% !important; top: 0}
The helper function, convertField
, converts one field of your input into the format [name, <something>]
, where the <something>
varies by the structure of the type
property. In two cases we use an array of these structures as input to Object .fromEntries
in order to create an object.
The main function, convert
, simply grabs the second property off the result of convertField
called on the root. That works if the overall structure always starts as it does in this example.
Note that the results of two of the clauses (the first and the fourth) are identical, and the other two are quite similar to one another. Also the tests for the first and second clauses are quite close. So there might be some reasonable ways to simplify this. But because the matching tests don't line up well with the matching output, it probably won't be trivial.
You can add other conditions and results easily enough. In fact, I originally wrote it with the final two lines replaced by this:
: type === 'string'
? [name, {type: 'keyword'}]
: // else
[name, {type: 'unknown'}]
which shows better where to add your other clauses, and also adds a notation (unknown
) to the result if you missed a case.
Upvotes: 2