Reputation: 323
I would like to calculate the days between the first visit and the visit with the transaction per channelpath. See the following example.
#standardSQL
WITH yourTable AS (
SELECT 1 AS user_id,'1a' AS visit_id, '2017-01-01 14:10:12' AS DATETIME,
'google cpc' AS channelgrouping, 0 AS transaction , 1 AS visit UNION ALL
SELECT 1, '1b', '2017-01-01 20:10:12', 'email', 1, 1 UNION ALL
SELECT 1, '1c','2017-01-03 08:10:12', 'direct', 0, 1 UNION ALL
SELECT 1, '1d','2017-01-04 13:10:14', 'organic', 1, 1 UNION ALL
Select 1, '1e','2017-03-04 13:10:14', 'organic', 0, 1 UNION ALL
Select 1, '1f','2017-03-04 13:10:14', 'direct', 0, 1
)
SELECT
user_id,
STRING_AGG(channelgrouping, ' > ' ORDER BY DATETIME) AS channelgrouping_path,
SUM(transaction) AS transaction,
SUM(visit) AS visits
FROM (
SELECT
*,
SUM(transaction) OVER(PARTITION BY user_id ORDER BY datetime
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
) AS grp
FROM yourTable
)
GROUP BY user_id, IFNULL(grp, 0)
Current OUTPUT TABLE
user_id|channgelgrouping_path |Transactions|Visits
1 |google cpc > email | 1 | 2
1 |direct > organic | 1 | 2
1 |organic > direct | 0 | 2
DESIRED OUTPUT TABLE
user_id|channgelgrouping_path |Transactions|Visits|Days before purchase
1 |google cpc > email | 1 | 2 |0
1 |direct > organic | 1 | 2 |1
1 |organic > direct | 0 | 2 |NULL
Upvotes: 1
Views: 869
Reputation: 172994
for BigQuery Standard SQL
#standardSQL
WITH yourTable AS (
SELECT 1 AS user_id,'1a' AS visit_id, '2017-01-01 14:10:12' AS DATETIME,
'google cpc' AS channelgrouping, 0 AS transaction , 1 AS visit UNION ALL
SELECT 1, '1b', '2017-01-01 20:10:12', 'email', 1, 1 UNION ALL
SELECT 1, '1c','2017-01-03 08:10:12', 'direct', 0, 1 UNION ALL
SELECT 1, '1d','2017-01-04 13:10:14', 'organic', 1, 1 UNION ALL
SELECT 1, '1e','2017-03-04 13:10:14', 'organic', 0, 1 UNION ALL
SELECT 1, '1f','2017-03-04 13:10:14', 'direct', 0, 1
)
SELECT
user_id,
STRING_AGG(channelgrouping, ' > ' ORDER BY DATETIME) AS channelgrouping_path,
SUM(transaction) AS transaction,
SUM(visit) AS visits,
IF(SUM(transaction) = 0, NULL,
TIMESTAMP_DIFF(MAX(CAST(datetime AS TIMESTAMP)), MIN(CAST(datetime AS TIMESTAMP)), DAY)
) AS days_before_purchase
FROM (
SELECT
*,
SUM(transaction) OVER(PARTITION BY user_id ORDER BY datetime
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
) AS grp
FROM yourTable
)
GROUP BY user_id, IFNULL(grp, 0)
Upvotes: 1