Reputation: 3379
I have a query that collects data from a dynamic date range (last 7 days) from one dataset in BigQuery - my data source is Google Analytics, so I have other datasets connected with identical schema. I'd like my query to also return data from other datasets, usually I would use a UNION ALL for this, but my query contains a complex categorization query which needs to be updated regularly and I'd rather not do this multiple times for each set.
Could you advise on how to query across datasets, or suggest a more elegant way to handle the UNION ALL approach?
SELECT
Date,
COUNT(DISTINCT VisitId) AS users,
COUNT(VisitId) AS sessions,
SUM(totals.transactions) AS orders,
CASE
# Organic Search - Google
WHEN ( channelGrouping LIKE "Organic Search"
OR trafficSource.source LIKE "com.google.android.googlequicksearchbox")
AND trafficSource.source LIKE "%google%" THEN "Organic Search - Google"
ELSE "Other"
END AS Channel,
hits.page.hostname AS site
FROM
`xxx.dataset1.ga_sessions_20*`
CROSS JOIN
UNNEST (hits) AS hits
WHERE
parse_DATE('%y%m%d',
_table_suffix) BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 7 day)
AND DATE_SUB(CURRENT_DATE(), INTERVAL 1 day)
AND totals.visits = 1
AND hits.isEntrance IS TRUE
GROUP BY
Date,
Channel,
hits.isEntrance
ORDER BY
Users DESC
UPDATE: I have got as far as follows thanks to the responses below, the following queries all datasets in the UNION but the date range is not applying, instead all data is being queried, any ideas why it's not picking it up?
SELECT
Date,
LOWER(hits.page.hostname) AS site,
IFNULL(COUNT(VisitId),0) AS sessions,
IFNULL(SUM(totals.transactions),0) AS orders,
IFNULL(ROUND(SUM(totals.transactions)/COUNT(VisitId),4),0) AS conv_rate,
# Channel definition starts here
CASE
# Organic Search - Google
WHEN ( channelGrouping LIKE "Organic Search"
OR trafficSource.source LIKE "com.google.android.googlequicksearchbox")
AND trafficSource.source LIKE "%google%" THEN "Organic Search - Google"
ELSE "Other"
END AS Channel
FROM (
SELECT * FROM `xxx.43786551.ga_sessions_20*` UNION ALL
SELECT * FROM `xxx.43786097.ga_sessions_20*` UNION ALL
SELECT * FROM `xxx.43786092.ga_sessions_20*`
WHERE PARSE_DATE('%Y%m%d',_TABLE_SUFFIX) BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 3 DAY)
AND DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
)
CROSS JOIN UNNEST (hits) AS hits
WHERE totals.visits = 1
AND hits.isEntrance IS TRUE
GROUP BY
Date,
channel,
hits.isEntrance,
site
HAVING hits.isEntrance IS TRUE
Upvotes: 0
Views: 1842
Reputation: 173190
#standardSQL
SELECT
DATE,
COUNT(DISTINCT VisitId) AS users,
COUNT(VisitId) AS sessions,
SUM(totals.transactions) AS orders,
CASE
# Organic Search - Google
WHEN ( channelGrouping LIKE "Organic Search"
OR trafficSource.source LIKE "com.google.android.googlequicksearchbox")
AND trafficSource.source LIKE "%google%" THEN "Organic Search - Google"
ELSE "Other"
END AS Channel,
hits.page.hostname AS site
FROM (
SELECT * FROM `xxx.dataset1.ga_sessions_20*` WHERE PARSE_DATE('%y%m%d',_TABLE_SUFFIX) BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY) AND DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
UNION ALL SELECT * FROM `xxx.dataset2.ga_sessions_20*` WHERE PARSE_DATE('%y%m%d',_TABLE_SUFFIX) BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY) AND DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
UNION ALL SELECT * FROM `xxx.dataset3.ga_sessions_20*` WHERE PARSE_DATE('%y%m%d',_TABLE_SUFFIX) BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY) AND DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
)
CROSS JOIN UNNEST (hits) AS hits
WHERE totals.visits = 1
AND hits.isEntrance IS TRUE
GROUP BY
DATE,
Channel,
site
ORDER BY
Users DESC
Upvotes: 3