domplexity
domplexity

Reputation: 38

conditional logic for within on repeated fields

Say I have a table structure created with the following code:

WITH YourTable AS (
  SELECT 'a1' AS item,  TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' as class, 'u1' AS userid, 4 as score UNION ALL
  SELECT 'a1' AS item,  TIMEsTAMP('2016-03-03 19:53:23 UTC') AS click_time, 'm' as class, 'u2' AS userid, 1 as score UNION ALL
  SELECT 'a1' AS item,  TIMEsTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' as class, 'u3' AS userid, 0 as score UNION ALL
  SELECT 'a1' AS item,  TIMEsTAMP('2016-03-03 19:51:23 UTC') AS click_time, 'd' as class, 'u4' AS userid, 8 as score UNION ALL
  SELECT 'a2' AS item,  TIMEsTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' as class, 'u1' AS userid, 5 as score UNION ALL
  SELECT 'a2' AS item,  TIMEsTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' as class, 'u2' AS userid, 2 as score
)
SELECT
  item,
  ARRAY_AGG(STRUCT(click_time, userid, class, score)) as clicks
  FROM
  YourTable
GROUP BY
  item

Now I would like to select the timestamp of the first occurrence (within an item) of a click of the class 'm'. Currently I do not see a way to do this without a subquery.

Any ideas whether this is possible at all?

Upvotes: 0

Views: 47

Answers (2)

Mikhail Berlyant
Mikhail Berlyant

Reputation: 173046

BigQuery Standard SQL

#standardSQL
WITH YourTable AS (
  SELECT 'a1' AS item,  TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' AS class, 'u1' AS userid, 4 AS score UNION ALL
  SELECT 'a1' AS item,  TIMESTAMP('2016-03-03 19:53:23 UTC') AS click_time, 'm' AS class, 'u2' AS userid, 1 AS score UNION ALL
  SELECT 'a1' AS item,  TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' AS class, 'u3' AS userid, 0 AS score UNION ALL
  SELECT 'a1' AS item,  TIMESTAMP('2016-03-03 19:51:23 UTC') AS click_time, 'd' AS class, 'u4' AS userid, 8 AS score UNION ALL
  SELECT 'a2' AS item,  TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' AS class, 'u1' AS userid, 5 AS score UNION ALL
  SELECT 'a2' AS item,  TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' AS class, 'u2' AS userid, 2 AS score
)
SELECT item,
  ARRAY_AGG(STRUCT(click_time, userid, class, score) ORDER BY CASE class WHEN 'm' THEN 0 ELSE 1 END, click_time LIMIT 1) AS clicks
FROM YourTable
GROUP BY item

with result as below

item    clicks.click_time           clicks.userid   clicks.class    clicks.score  
a1      2016-03-03 19:52:23 UTC     u1              m               4    
a2      2016-03-03 19:52:23 UTC     u2              m               2    

or

#standardSQL
WITH YourTable AS (
  SELECT 'a1' AS item,  TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' AS class, 'u1' AS userid, 4 AS score UNION ALL
  SELECT 'a1' AS item,  TIMESTAMP('2016-03-03 19:53:23 UTC') AS click_time, 'm' AS class, 'u2' AS userid, 1 AS score UNION ALL
  SELECT 'a1' AS item,  TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' AS class, 'u3' AS userid, 0 AS score UNION ALL
  SELECT 'a1' AS item,  TIMESTAMP('2016-03-03 19:51:23 UTC') AS click_time, 'd' AS class, 'u4' AS userid, 8 AS score UNION ALL
  SELECT 'a2' AS item,  TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' AS class, 'u1' AS userid, 5 AS score UNION ALL
  SELECT 'a2' AS item,  TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' AS class, 'u2' AS userid, 2 AS score
), 
TransformedTable AS (
  SELECT item,
    ARRAY_AGG(STRUCT(click_time, userid, class, score)) AS clicks
  FROM YourTable
  GROUP BY item
)
SELECT item,
  ( SELECT click 
    FROM UNNEST(clicks) click
    WHERE class = 'm'
    ORDER BY click_time 
    LIMIT 1
  ) AS clicks
FROM TransformedTable 

with exactly same output as first query

Upvotes: 1

Elliott Brossard
Elliott Brossard

Reputation: 33755

I'm not sure why you want to avoid a subquery. In this case, you need a scalar subquery over the array, which is evaluated for each row. Try this, using your sample data and schema as a basis:

WITH YourTable AS (
  SELECT 'a1' AS item,  TIMESTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' as class, 'u1' AS userid, 4 as score UNION ALL
  SELECT 'a1' AS item,  TIMEsTAMP('2016-03-03 19:53:23 UTC') AS click_time, 'm' as class, 'u2' AS userid, 1 as score UNION ALL
  SELECT 'a1' AS item,  TIMEsTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' as class, 'u3' AS userid, 0 as score UNION ALL
  SELECT 'a1' AS item,  TIMEsTAMP('2016-03-03 19:51:23 UTC') AS click_time, 'd' as class, 'u4' AS userid, 8 as score UNION ALL
  SELECT 'a2' AS item,  TIMEsTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'd' as class, 'u1' AS userid, 5 as score UNION ALL
  SELECT 'a2' AS item,  TIMEsTAMP('2016-03-03 19:52:23 UTC') AS click_time, 'm' as class, 'u2' AS userid, 2 as score
), TransformedTable AS (
  SELECT
    item,
    ARRAY_AGG(STRUCT(click_time, userid, class, score)) as clicks
  FROM
    YourTable
  GROUP BY
    item
)
SELECT
  item,
  (SELECT click_time FROM UNNEST(clicks)
   WHERE class = 'm'
   ORDER BY click_time LIMIT 1
  ) AS first_click_time
FROM TransformedTable;

Upvotes: 0

Related Questions