gvkleef
gvkleef

Reputation: 323

How to determine the number of days before a purchase in BigQuery?

I would like to calculate the days between the first visit and the visit with the transaction per channelpath. See the following example.

  #standardSQL
WITH yourTable AS (
  SELECT 1 AS user_id,'1a' AS visit_id, '2017-01-01 14:10:12' AS DATETIME, 
'google cpc' AS channelgrouping, 0 AS transaction , 1 AS visit UNION ALL
  SELECT 1, '1b', '2017-01-01 20:10:12', 'email', 1, 1 UNION ALL
  SELECT 1, '1c','2017-01-03 08:10:12', 'direct', 0, 1 UNION ALL
  SELECT 1, '1d','2017-01-04 13:10:14', 'organic', 1, 1 UNION ALL
  Select 1, '1e','2017-03-04 13:10:14', 'organic', 0, 1 UNION ALL
  Select 1, '1f','2017-03-04 13:10:14', 'direct', 0, 1 
)
SELECT
  user_id, 
  STRING_AGG(channelgrouping, ' > ' ORDER BY DATETIME) AS channelgrouping_path,
  SUM(transaction) AS transaction,
  SUM(visit) AS visits
FROM (
  SELECT 
*, 
SUM(transaction) OVER(PARTITION BY user_id ORDER BY datetime 
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
) AS grp
  FROM yourTable
)
GROUP BY user_id, IFNULL(grp, 0) 

Current OUTPUT TABLE

user_id|channgelgrouping_path                |Transactions|Visits
1      |google cpc > email                   | 1          | 2    
1      |direct > organic                     | 1          | 2    
1      |organic > direct                     | 0          | 2    

DESIRED OUTPUT TABLE

user_id|channgelgrouping_path                |Transactions|Visits|Days before purchase
1      |google cpc > email                   | 1          | 2    |0
1      |direct > organic                     | 1          | 2    |1
1      |organic > direct                     | 0          | 2    |NULL

Upvotes: 1

Views: 869

Answers (1)

Mikhail Berlyant
Mikhail Berlyant

Reputation: 172994

for BigQuery Standard SQL

#standardSQL
WITH yourTable AS (
  SELECT 1 AS user_id,'1a' AS visit_id, '2017-01-01 14:10:12' AS DATETIME, 
    'google cpc' AS channelgrouping, 0 AS transaction , 1 AS visit UNION ALL
  SELECT 1, '1b', '2017-01-01 20:10:12', 'email', 1, 1 UNION ALL
  SELECT 1, '1c','2017-01-03 08:10:12', 'direct', 0, 1 UNION ALL
  SELECT 1, '1d','2017-01-04 13:10:14', 'organic', 1, 1 UNION ALL
  SELECT 1, '1e','2017-03-04 13:10:14', 'organic', 0, 1 UNION ALL
  SELECT 1, '1f','2017-03-04 13:10:14', 'direct', 0, 1 
)
SELECT
  user_id, 
  STRING_AGG(channelgrouping, ' > ' ORDER BY DATETIME) AS channelgrouping_path,
  SUM(transaction) AS transaction,
  SUM(visit) AS visits,
  IF(SUM(transaction) = 0, NULL, 
    TIMESTAMP_DIFF(MAX(CAST(datetime AS TIMESTAMP)), MIN(CAST(datetime AS TIMESTAMP)), DAY)
  ) AS days_before_purchase
FROM (
  SELECT 
*, 
SUM(transaction) OVER(PARTITION BY user_id ORDER BY datetime 
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
) AS grp
  FROM yourTable
)
GROUP BY user_id, IFNULL(grp, 0) 

Upvotes: 1

Related Questions