Reputation: 191
I am using the following query in BigQuery to join tables and get the visitnumber and source of people who ordered a product:
QUERY:
SELECT visitor, transactionId as OrderNumber, source, referralPath, medium, keyword, campaign, visitNumber, visitStartTime, conversionTime, visitLastClikcTime
FROM (
SELECT conversions.fullVisitorId as visitor, conversions.transactionId as transactionId,
clickChanges.visitNumber as visitNumber, conversions.visitNumber as transactionVisitNumber,
clickChanges.source as source, clickChanges.referralPath as referralPath,
clickChanges.medium as medium, clickChanges.keyword as keyword, clickChanges.campaign as campaign,
conversionTime, min(clikcTime) as visitStartTime, max(clikcTime) as visitLastClikcTime
FROM (SELECT fullVisitorId, hits.transaction.transactionId as transactionId, visitNumber as visitNumber,
CONCAT(CONCAT(SUBSTR(date, 7, 2), '.', SUBSTR(date, 5, 2), '.', SUBSTR(date, 0, 4)), ' ', CONCAT(STRING(hits.hour), ':', STRING(hits.minute))) as conversionTime
FROM
(TABLE_DATE_RANGE([54247008.ga_sessions_],
TIMESTAMP('2014-11-16'),
TIMESTAMP('2014-11-21')))
WHERE hits.transaction.transactionId is not null
ORDER BY fullVisitorId, visitNumber
) conversions
JOIN EACH(
SELECT fullVisitorId, trafficSource.referralPath as referralPath, trafficSource.medium as medium, trafficSource.source as source, visitNumber,
CONCAT(CONCAT(SUBSTR(date, 7, 2), '.', SUBSTR(date, 5, 2), '.', SUBSTR(date, 0, 4)), ' ', CONCAT(STRING(hits.hour), ':', STRING(hits.minute))) as clikcTime,
trafficSource.campaign as campaign, trafficSource.keyword as keyword
FROM (TABLE_DATE_RANGE([54247008.ga_sessions_],
TIMESTAMP('2014-11-16'),
TIMESTAMP('2014-11-21')))
GROUP BY fullVisitorId, referralPath, medium, source, visitNumber, clikcTime, campaign, keyword
ORDER BY fullVisitorId, visitNumber
) clickChanges
ON
conversions.fullVisitorId = clickChanges.fullVisitorId
GROUP BY visitor, transactionId, visitNumber, transactionVisitNumber, source, referralPath, medium, keyword, campaign, conversionTime
HAVING visitNumber <= transactionVisitNumber
ORDER BY transactionId, visitNumber
)
Now I want to get an aggregate column with the MAX(visitnumber) per transationId and show me information like source related to that MAX(visitnumber). To do this, I researched on here and built another JOIN
of the whole query which you can see below, which is working, but it is also extremely long and repetitive:
SAME QUERY (slightly modified):
SELECT a.visitor, a.transactionId as OrderNumber, a.source, a.referralPath, a.medium, a.keyword, a.campaign, a.visitNumber as max_visitnumber, a.visitStartTime, a.conversionTime, a.visitLastClikcTime
FROM (
SELECT conversions.fullVisitorId as visitor, conversions.transactionId as transactionId,
clickChanges.visitNumber as visitNumber, conversions.visitNumber as transactionVisitNumber,
clickChanges.source as source, clickChanges.referralPath as referralPath,
clickChanges.medium as medium, clickChanges.keyword as keyword, clickChanges.campaign as campaign,
conversionTime, min(clikcTime) as visitStartTime, max(clikcTime) as visitLastClikcTime
FROM (SELECT fullVisitorId, hits.transaction.transactionId as transactionId, visitNumber as visitNumber,
CONCAT(CONCAT(SUBSTR(date, 7, 2), '.', SUBSTR(date, 5, 2), '.', SUBSTR(date, 0, 4)), ' ', CONCAT(STRING(hits.hour), ':', STRING(hits.minute))) as conversionTime
FROM
(TABLE_DATE_RANGE([54247008.ga_sessions_],
TIMESTAMP('2014-11-16'),
TIMESTAMP('2014-11-21')))
WHERE hits.transaction.transactionId is not null
ORDER BY fullVisitorId, visitNumber
) conversions
JOIN EACH(
SELECT fullVisitorId, trafficSource.referralPath as referralPath, trafficSource.medium as medium, trafficSource.source as source, visitNumber,
CONCAT(CONCAT(SUBSTR(date, 7, 2), '.', SUBSTR(date, 5, 2), '.', SUBSTR(date, 0, 4)), ' ', CONCAT(STRING(hits.hour), ':', STRING(hits.minute))) as clikcTime,
trafficSource.campaign as campaign, trafficSource.keyword as keyword
FROM (TABLE_DATE_RANGE([54247008.ga_sessions_],
TIMESTAMP('2014-11-16'),
TIMESTAMP('2014-11-21')))
GROUP BY fullVisitorId, referralPath, medium, source, visitNumber, clikcTime, campaign, keyword
ORDER BY fullVisitorId, visitNumber
) clickChanges
ON
conversions.fullVisitorId = clickChanges.fullVisitorId
GROUP BY visitor, transactionId, visitNumber, transactionVisitNumber, source, referralPath, medium, keyword, campaign, conversionTime
HAVING visitNumber <= transactionVisitNumber
ORDER BY transactionId, visitNumber
) a
JOIN EACH
(
SELECT MAX(visitNumber) as max_visitnumber, transactionId
FROM (
SELECT conversions.fullVisitorId as visitor, conversions.transactionId as transactionId,
clickChanges.visitNumber as visitNumber, conversions.visitNumber as transactionVisitNumber,
clickChanges.source as source, clickChanges.referralPath as referralPath,
clickChanges.medium as medium, clickChanges.keyword as keyword, clickChanges.campaign as campaign,
conversionTime, min(clikcTime) as visitStartTime, max(clikcTime) as visitLastClikcTime
FROM (SELECT fullVisitorId, hits.transaction.transactionId as transactionId, visitNumber as visitNumber,
CONCAT(CONCAT(SUBSTR(date, 7, 2), '.', SUBSTR(date, 5, 2), '.', SUBSTR(date, 0, 4)), ' ', CONCAT(STRING(hits.hour), ':', STRING(hits.minute))) as conversionTime
FROM
(TABLE_DATE_RANGE([54247008.ga_sessions_],
TIMESTAMP('2014-11-16'),
TIMESTAMP('2014-11-21')))
WHERE hits.transaction.transactionId is not null
ORDER BY fullVisitorId, visitNumber
) conversions
JOIN EACH(
SELECT fullVisitorId, trafficSource.referralPath as referralPath, trafficSource.medium as medium, trafficSource.source as source, visitNumber,
CONCAT(CONCAT(SUBSTR(date, 7, 2), '.', SUBSTR(date, 5, 2), '.', SUBSTR(date, 0, 4)), ' ', CONCAT(STRING(hits.hour), ':', STRING(hits.minute))) as clikcTime,
trafficSource.campaign as campaign, trafficSource.keyword as keyword
FROM (TABLE_DATE_RANGE([54247008.ga_sessions_],
TIMESTAMP('2014-11-16'),
TIMESTAMP('2014-11-21')))
GROUP BY fullVisitorId, referralPath, medium, source, visitNumber, clikcTime, campaign, keyword
ORDER BY fullVisitorId, visitNumber
) clickChanges
ON
conversions.fullVisitorId = clickChanges.fullVisitorId
GROUP BY visitor, transactionId, visitNumber, transactionVisitNumber, source, referralPath, medium, keyword, campaign, conversionTime
HAVING visitNumber <= transactionVisitNumber
ORDER BY transactionId, visitNumber
)
GROUP EACH BY transactionId
) b
ON
b.max_visitnumber = a.visitnumber and
b.transactionId = a.transactionId
ORDER BY max_visitnumber DESC
So I have put almost the same query two times, only changing a tiny part of it. Is there any neater way to do this and shorten the query?
I found somebody on StackOverflow suggesting a regular table expression of WITH AS
, but it does not seem to work in BigQuery. Any other ideas to make this query more efficient?
Upvotes: 1
Views: 2078
Reputation: 59325
As the comments say, a view is what you want: An alias for a query you can reuse in other queries.
https://cloud.google.com/bigquery/querying-data#views
Note that views are [currently] not compatible with TABLE_DATE_RANGE, so you would need to rewrite your query/view to explicitly call out the tables.
Upvotes: 3