Reputation: 243
I have a simple cte query that removes duplicates.
with cte as (
select Agent_SK
, Listing_Agent_License_Number
, Listing_Agent_Name
, Listing_Agent_Address
, Listing_Agent_Phone
, Listing_Agent_Email
, Office_Name
, Office_Address
, Office_Phone
, Office_Email
, Update_Timestamp
, ROW_NUMBER() OVER (PARTITION BY Listing_Agent_Name
, Listing_Agent_Address
, Listing_Agent_Phone
, Listing_Agent_Email
ORDER BY Update_Timestamp DESC) AS rn
from `mother-216719.VALUATION.MLS`
where Agent_SK is not null
) select
Agent_SK
, Listing_Agent_License_Number
, Listing_Agent_Name
, Listing_Agent_Address
, Listing_Agent_Phone
, Listing_Agent_Email
, Office_Name
, Office_Address
, Office_Phone
, Office_Email
, Update_Timestamp
from cte
where rn = 1;
This query is giving a exceeded resources. I think it's about the row_number()
function. How can I go around this?
Upvotes: 1
Views: 100
Reputation: 173190
Below is for BigQuery Standard SQL and should do the trick
#standardSQL
WITH cte AS (
SELECT
Agent_SK
, Listing_Agent_License_Number
, Listing_Agent_Name
, Listing_Agent_Address
, Listing_Agent_Phone
, Listing_Agent_Email
, Office_Name
, Office_Address
, Office_Phone
, Office_Email
, Update_Timestamp
FROM `mother-216719.VALUATION.MLS`
WHERE Agent_SK IS NOT NULL
)
SELECT AS VALUE ARRAY_AGG(t ORDER BY Update_Timestamp DESC LIMIT 1)[OFFSET(0)]
FROM cte t
GROUP BY
Listing_Agent_Name
, Listing_Agent_Address
, Listing_Agent_Phone
, Listing_Agent_Email
Upvotes: 3