Reputation: 17
I have a table in BQ
WITH tbl_campaign_ipmapping AS
(
SELECT 'advertiser1' as advertiser, 'campaign1' as campaign, ['10.0.0.0','20.0.0.0','30.0.0.0', '40.0.0.0'] AS ip_array UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign2' as campaign, ['10.0.0.0', '20.0.0.0', '50.0.0.0'] UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign3' as campaign, ['10.0.0.0', '40.0.0.0', '60.0.0.0', '70.0.0.0', '80.0.0.0'] UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign4' as campaign, ['10.0.0.0', '20.0.0.0', '30.0.0.0'] UNION ALL
SELECT 'advertiser2' , 'campaign1' , ['10.1.1.1','20.1.1.1','30.1.1.1', '40.1.1.1'] UNION ALL
SELECT 'advertiser2' , 'campaign2' , ['10.1.1.1', '20.1.1.1', '50.1.1.1'] UNION ALL
SELECT 'advertiser2' , 'campaign3' , ['10.1.1.1', '40.1.1.1', '60.1.1.1', '70.1.1.1', '80.1.1.1'] UNION ALL
SELECT 'advertiser2', 'campaign4' , ['10.1.1.1', '20.1.1.1', '30.1.1.1']
) select * from tbl_campaign_ipmapping
For an input advertiser lets say advertiser1 The logic to be implemented is depending For each row retrieve the list of IP's and compare them with the IP's from all other rows(excluding the current row) and print the IP's that's not present.
So the expected output is
advertiser1, campaign1, ['50.0.0.0', '60.0.0.0', '70.0.0.0']
advertiser1,campaign2, ['30.0.0.0', '40.0.0.0', '60.0.0.0', '70.0.0.0']
advertiser1,campaign3, ['20.0.0.0','30.0.0.0', '50.0.0.0' ]
advertiser1,campaign4, ['40.0.0.0', '50.0.0.0', '60.0.0.0', '70.0.0.0'
Actually I don't need the array just the count. Just listed the array in the above output to clarify the logic.
Upvotes: 0
Views: 241
Reputation: 173046
Try below version - I feel it should be more optimal then previous ones
with advertiser_campaign_ip as (
select advertiser, campaign, ip
from (
select advertiser, ip
from tbl_campaign_ipmapping, unnest(ip_array) ip
group by advertiser, ip
) join (
select advertiser, campaign
from tbl_campaign_ipmapping
)
using (advertiser)
)
select a.advertiser, a.campaign,
array_agg(a.ip) as missing_ip
from advertiser_campaign_ip a
left join (select advertiser, campaign, ip from tbl_campaign_ipmapping, unnest(ip_array) ip) b
using (advertiser, campaign, ip)
where b.ip is null
group by a.advertiser, a.campaign
Upvotes: 1
Reputation: 833
Try This. Not sure if this is optimized way, But I tried using SET operators
WITH tbl_campaign_ipmapping AS
(
SELECT 'advertiser1' as advertiser, 'campaign1' as campaign, ['10.0.0.0','20.0.0.0','30.0.0.0', '40.0.0.0'] AS ip_array UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign2' as campaign, ['10.0.0.0', '20.0.0.0', '50.0.0.0'] UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign3' as campaign, ['10.0.0.0', '40.0.0.0', '60.0.0.0', '70.0.0.0', '80.0.0.0'] UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign4' as campaign, ['10.0.0.0', '20.0.0.0', '30.0.0.0'] UNION ALL
SELECT 'advertiser2' , 'campaign1' , ['10.1.1.1','20.1.1.1','30.1.1.1', '40.1.1.1'] UNION ALL
SELECT 'advertiser2' , 'campaign2' , ['10.1.1.1', '20.1.1.1', '50.1.1.1'] UNION ALL
SELECT 'advertiser2' , 'campaign3' , ['10.1.1.1', '40.1.1.1', '60.1.1.1', '70.1.1.1', '80.1.1.1'] UNION ALL
SELECT 'advertiser2', 'campaign4' , ['10.1.1.1', '20.1.1.1', '30.1.1.1']
),
advertiser_all_ip as
(
select advertiser ,array_concat_agg(ip_array ) as all_ips
from tbl_campaign_ipmapping
group by advertiser
),cte as (
select tci.advertiser ,campaign ,ip_array , all_ips
from tbl_campaign_ipmapping tci
join advertiser_all_ip aaip
on tci.advertiser = aaip.advertiser
)
select advertiser ,campaign,array_length(array_agg(arr)) as distinct_count
from (
select advertiser ,campaign,arr from cte,unnest(all_ips) as arr
except distinct
select advertiser ,campaign,arr from cte,unnest(ip_array) as arr
)
group by advertiser ,campaign
Upvotes: 0
Reputation: 173046
Consider below approach
with advertiser_ips as (
select advertiser, array_agg(distinct ip) ip_array
from tbl_campaign_ipmapping, unnest(ip_array) ip
group by advertiser
)
select advertiser, campaign,
array( select distinct ip
from b.ip_array ip
where not ip in unnest(a.ip_array)
) as missing_ip
from tbl_campaign_ipmapping a
join advertiser_ips b
using(advertiser)
if applied to sample data in your question - output is
Upvotes: 0
Reputation: 10035
You may try utilizing another CTE to find all possible IPs before using this list to gather all other ips not apart of the current ip array eg
WITH tbl_campaign_ipmapping AS
(
SELECT 'advertiser1' as advertiser, 'campaign1' as campaign, ['10.0.0.0','20.0.0.0','30.0.0.0', '40.0.0.0'] AS ip_array UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign2' as campaign, ['10.0.0.0', '20.0.0.0', '50.0.0.0'] UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign3' as campaign, ['10.0.0.0', '40.0.0.0', '60.0.0.0', '70.0.0.0', '80.0.0.0'] UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign4' as campaign, ['10.0.0.0', '20.0.0.0', '30.0.0.0'] UNION ALL
SELECT 'advertiser2' , 'campaign1' , ['10.1.1.1','20.1.1.1','30.1.1.1', '40.1.1.1'] UNION ALL
SELECT 'advertiser2' , 'campaign2' , ['10.1.1.1', '20.1.1.1', '50.1.1.1'] UNION ALL
SELECT 'advertiser2' , 'campaign3' , ['10.1.1.1', '40.1.1.1', '60.1.1.1', '70.1.1.1', '80.1.1.1'] UNION ALL
SELECT 'advertiser2', 'campaign4' , ['10.1.1.1', '20.1.1.1', '30.1.1.1']
),
all_ips AS (
SELECT DISTINCT ip
FROM tbl_campaign_ipmapping, unnest(ip_array) ip
WHERE advertiser='advertiser1'
)
select
advertiser ,
campaign,
ARRAY(
SELECT i.ip
FROM all_ips i
WHERE i.ip NOT IN (
SELECT ipa FROM UNNEST(t.ip_array) ipa
)
) as other_ips
from
tbl_campaign_ipmapping t
WHERE advertiser='advertiser1'
or joining on a table of all possible ips (you may also consider materializing all_ips) eg
WITH tbl_campaign_ipmapping AS
(
SELECT 'advertiser1' as advertiser, 'campaign1' as campaign, ['10.0.0.0','20.0.0.0','30.0.0.0', '40.0.0.0'] AS ip_array UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign2' as campaign, ['10.0.0.0', '20.0.0.0', '50.0.0.0'] UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign3' as campaign, ['10.0.0.0', '40.0.0.0', '60.0.0.0', '70.0.0.0', '80.0.0.0'] UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign4' as campaign, ['10.0.0.0', '20.0.0.0', '30.0.0.0'] UNION ALL
SELECT 'advertiser2' , 'campaign1' , ['10.1.1.1','20.1.1.1','30.1.1.1', '40.1.1.1'] UNION ALL
SELECT 'advertiser2' , 'campaign2' , ['10.1.1.1', '20.1.1.1', '50.1.1.1'] UNION ALL
SELECT 'advertiser2' , 'campaign3' , ['10.1.1.1', '40.1.1.1', '60.1.1.1', '70.1.1.1', '80.1.1.1'] UNION ALL
SELECT 'advertiser2', 'campaign4' , ['10.1.1.1', '20.1.1.1', '30.1.1.1']
),
all_ips AS (
SELECT DISTINCT ip
FROM tbl_campaign_ipmapping, unnest(ip_array) ip
WHERE advertiser='advertiser1'
)
select
advertiser,
campaign,
ARRAY_AGG(i.ip) as other_ips
from
tbl_campaign_ipmapping t
inner join
all_ips i on i.ip NOT IN UNNEST(t.ip_array)
WHERE advertiser='advertiser1'
group by
advertiser,campaign
Let me know if this works for you
Upvotes: 0