Reputation: 91
We have an Athena table in which there is a column that contains JSON values. The datatype of the main column(which contains JSON values) in Athena is a string datatype.
DDL of Athena table is like below:
CREATE EXTERNAL TABLE 'table1'(
'id' bigint,
'requestbody' string //this column contains json values
)
also, this 'requestbody' column contains around 5000 JSON fields in it. And, it has also nested arrays in it.
Example,
{
"flow": {
"req": {
"application": {
"applicants": [
{
"id": 191915,
"applicationID": 189176,
"applicantIndex": null,
"sequenceNumber": 1,
"relationType": 1,
"relationCode": null,
"customerType": 1,
"prefix": "MR",
"incomeData": [
{
"name": "Applicant Income",
"description": "Executive Communication",
"submittedIncome": 4000,
"adjustedIncome": 4000,
"isAdjusted": false,
"adjustedBy": null
},
{
"name": "Applicant Other Income",
"description": "Car Allowance Car Allowance",
"submittedIncome": 100,
"adjustedIncome": 100,
"isAdjusted": false,
"adjustedBy": null,
"adjustedDate": null,
"comment": null,
"customerAppIncomeOrder": 1,
"combine": true
}
]
}
]
}
}
}
}
I tried below code to read this:
with A as (
SELECT
CAST(json_extract(requestbody, '$.flow.req.application.applicants')as ARRAY(MAP(VARCHAR,VARCHAR))) applicants
FROM
table1
)
, b AS (
SELECT
incomedata1['name'] name
FROM
A
CROSS JOIN UNNEST (applicants) t (applicant)
CROSS JOIN UNNEST (applicant.incomedata) t (incomedata1)
)
select name from b;
But I am getting an error:
line 14:23: Expression applicant is not of type ROW
Upvotes: 0
Views: 55
Reputation: 2956
The problem is that the casting statement of
CAST(json_extract(requestbody, '$.flow.req.application.applicants')as ARRAY(MAP(VARCHAR,VARCHAR))) applicants
is not valid. The data you provided shows that not all values for all keys are varchar. You will need to explicitly cast by using ROW. The following should work:
A as (
SELECT
CAST(json_extract(requestbody, '$.flow.req.application.applicants') as ARRAY<ROW(
id BIGINT,
applicationID BIGINT,
applicantIndex BIGINT,
sequenceNumber BIGINT,
relationType BIGINT,
relationCode BIGINT,
customerType BIGINT,
prefix VARCHAR,
incomeData ARRAY<ROW(
name VARCHAR,
description VARCHAR,
submittedIncome BIGINT,
adjustedIncome BIGINT,
isAdjusted BOOLEAN,
adjustedBy VARCHAR,
adjustedDate VARCHAR,
comment VARCHAR,
customerAppIncomeOrder VARCHAR,
combine BOOLEAN
)>
)>) applicants
FROM table1
)
, b AS (
SELECT
incomedata1.name name
FROM
A
CROSS JOIN UNNEST (applicants) t (applicant)
CROSS JOIN UNNEST (applicant.incomedata) t (incomedata1)
)
select name from b;
Upvotes: 0