Reputation: 873
I'm trying to convert JSON to CSV using the ConvertRecord
processor but the only error I'm getting back is Could not parse incoming data
. As this is not very descriptive, I'm at a loss as to how to diagnose the issue.
I know that my avro schema is valid because A) NiFi doesn't throw an error regarding the schema when I insert it into the Schema Registry and B) I tested my schema on here and it didn't give me an issue.
I also know that my JSON is valid because I can load it in Python using json.loads()
and it doesn't give me any problems.
I'm just not quite sure where I've gone wrong, nor how to fix it.
JSON
{
"DOC": {
"DOCID": "1234",
"Subjects": {
"Subject_xref": ["2233"]
},
"TXT": {
"COUNTRY": ["United States"],
"ESTATE": ["Mount Vernon"],
"PERSON": ["George Washington"]
},
"RAW_TXT": "George Washington lived in his family home, Mount Vernon, located in the United States.",
"RELINFO": [
{"ID" : "REL-1234-100",
"RELTYPE" : "PER-PROP",
"PERID" : "PER-1234-009",
"PROPID" : "PROP-1234-001",
"SENTID" : "1234-SENT-001",
"PROP_NORM" : "Mount Vernon",
"PROP_MENTION" : "Mount Vernon",
"PER_NORM" : "George Washington",
"PER_MENTION" : "George Washington"}
],
"ENTINFO": [
{"ID": "PER-1234-009", "TYPE": "PERSON", "NORM": "George Washington", "REFID": "PER-1234-009", "MENTION": "George Washington"},
{"ID": "CTRY-1234-003", "TYPE": "COUNTRY", "NORM": "United States", "REFID": "CTRY-1234-003", "MENTION": "United States."},
{"ID": "PROP-1234-001", "TYPE": "ESTATE", "NORM": "Mount Vernon", "REFID": "PROP-1234-001", "MENTION": "Mount Vernon"}
]
}
}
Avro
{
"type": "record",
"namespace": "name.space",
"name": "nlp_output",
"fields": [
{"name": "DOC", "type": {
"name": "DOCDocument", "type": "record", "namespace": "doc.name.space", "fields": [
{"name": "DOCID", "type": ["long","null"], "default": null},
{"name": "Subjects", "type": {
"name": "Subjects", "type": "record", "namespace": "subjects.name.space", "fields": [
{"name": "SubjectIdentificationID", "aliases": ["Subject_xref"], "type": ["long","null"], "default": null}
]
}},
{"name": "TXT", "type": {
"name": "TXT", "type": "record", "namespace": "text.name.space", "fields": [
{"name": "COUNTRY", "type": {"type": "array", "items": ["string", "null"]}, "default": null, "doc": ""},
{"name": "ESTATE", "type": {"type": "array", "items": ["string", "null"]}, "default": null, "doc": ""},
{"name": "PERSON", "type": {"type": "array", "items": ["string", "null"]}, "default": null, "doc": ""}
]
}},
{"name": "RAW_TXT", "type": ["string","null"], "default": null},
{"name": "RELINFO", "type": {
"name": "RelatedEntities", "type": "record", "namespace": "relent.name.space", "fields": [
{"name": "ID", "type": ["string", "null"], "default": null},
{"name": "RELTYPE", "type": ["string", "null"], "default": null},
{"name": "PERID", "type": ["string", "null"], "default": null},
{"name": "PROPID", "type": ["string", "null"], "default": null},
{"name": "SENTID", "type": ["string", "null"], "default": null},
{"name": "PROP_NORM", "type": ["string", "null"], "default": null},
{"name": "PROP_MENTION", "type": ["string", "null"], "default": null},
{"name": "PER_NORM", "type": ["string", "null"], "default": null},
{"name": "PER_MENTION", "type": ["string", "null"], "default": null}
]
}},
{"name": "ENTINFO", "doc": "Sentences stripped of tags for ease of reading", "type": {
"name": "Entities", "type": "record", "namespace": "entities.name.space", "fields": [
{"name": "ID", "type": ["string", "null"], "default": null},
{"name": "TYPE", "type": ["string", "null"], "default": null},
{"name": "NORM", "type": ["string", "null"], "default": null},
{"name": "REFID", "type": ["string", "null"], "default": null},
{"name": "MENTION", "type": ["string", "null"], "default": null}
]
}}
]
}}
]
}
Upvotes: 3
Views: 4949
Reputation: 398
Your schema doesn't match your JSON. You have SubjectIdentificationID
defined as long
or null
but in the JSON Subject_xref
is an array.
{
"type": "record",
"namespace": "name.space",
"name": "nlp_output",
"fields": [
{"name": "DOC", "type": {
"name": "DOCDocument", "type": "record", "namespace": "doc.name.space", "fields": [
{"name": "DOCID", "type": ["long","null"], "default": null},
{"name": "Subjects", "type": {
"name": "Subjects", "type": "record", "namespace": "subjects.name.space", "fields": [
{"name": "SubjectIdentificationID", "aliases": ["Subject_xref"], "type": {"type": "array", "items": ["long", "null"]}, "default": null}
]
}},
{"name": "TXT", "type": {
"name": "TXT", "type": "record", "namespace": "text.name.space", "fields": [
{"name": "COUNTRY", "type": {"type": "array", "items": ["string", "null"]}, "default": null, "doc": ""},
{"name": "ESTATE", "type": {"type": "array", "items": ["string", "null"]}, "default": null, "doc": ""},
{"name": "PERSON", "type": {"type": "array", "items": ["string", "null"]}, "default": null, "doc": ""}
]
}},
{"name": "RAW_TXT", "type": ["string","null"], "default": null},
{"name": "RELINFO", "type": {
"name": "RelatedEntities", "type": "record", "namespace": "relent.name.space", "fields": [
{"name": "ID", "type": ["string", "null"], "default": null},
{"name": "RELTYPE", "type": ["string", "null"], "default": null},
{"name": "PERID", "type": ["string", "null"], "default": null},
{"name": "PROPID", "type": ["string", "null"], "default": null},
{"name": "SENTID", "type": ["string", "null"], "default": null},
{"name": "PROP_NORM", "type": ["string", "null"], "default": null},
{"name": "PROP_MENTION", "type": ["string", "null"], "default": null},
{"name": "PER_NORM", "type": ["string", "null"], "default": null},
{"name": "PER_MENTION", "type": ["string", "null"], "default": null}
]
}},
{"name": "ENTINFO", "doc": "Sentences stripped of tags for ease of reading", "type": {
"name": "Entities", "type": "record", "namespace": "entities.name.space", "fields": [
{"name": "ID", "type": ["string", "null"], "default": null},
{"name": "TYPE", "type": ["string", "null"], "default": null},
{"name": "NORM", "type": ["string", "null"], "default": null},
{"name": "REFID", "type": ["string", "null"], "default": null},
{"name": "MENTION", "type": ["string", "null"], "default": null}
]
}}
]
}}
]
}
Upvotes: 4