A H
A H

Reputation: 2570

Generate CASE WHEN statement using another table

I would like to create a query that does the following:

Using a regex_mapping table, find all rows in the sample data that REGEXP_MATCH on x

WITH sample_data AS (
SELECT x, y
FROM (SELECT "asd rmkt asdf" AS x, true AS y UNION ALL  -- should map to remekartier
      SELECT "as asdf", true UNION ALL -- should map to ali sneider
      SELECT "asdafsd", false)  -- should map to NULL
),

regex_mapping AS (
SELECT regex, map
FROM (SELECT "as" AS regex, "ali sneider" AS map UNION ALL
      SELECT "rmkt" AS regex, "remekartier" AS map )
)

SELECT sample_data.*, mapped_item
FROM sample_data
-- but here, use multiple REGEXP_MATCH with CASE WHEN looping over the regex_mappings.
-- e.g. CASE WHEN REGEXP_MATCH(x, "as") THEN "ali sneider"
             WHEN REGEXP_MATCH(x, "rmkt") THEN "remakrtier" END AS mapped_item)

Upvotes: 0

Views: 408

Answers (1)

Suds
Suds

Reputation: 156

Try this -

WITH sample_data AS (
SELECT x, y
FROM (SELECT "asd rmkt asdf" AS x, true AS y UNION ALL  -- should map to remekartier
      SELECT "as asdf", true UNION ALL -- should map to ali sneider
      SELECT "asdafsd", false)
),
regex_mapping AS (
SELECT regex, map
FROM (SELECT "as" AS regex, "ali sneider" AS map UNION ALL
      SELECT "rmkt" AS regex, "remekartier" AS map )
)
SELECT s.*, r.map 
FROM sample_data s, regex_mapping r
WHERE regexp_contains(s.x,concat('\\b',r.regex,'\\b')) 

The results ->

enter image description here

Second way: Instead of cross-join, use a scalar subquery. I have used limit so that the subquery doesn't return more than 1 row and if multiple regexp matches, then it will return only one of them

--- same WITH clause as above query ---
SELECT s.*, (SELECT r.map 
               FROM regex_mapping r 
              WHERE regexp_contains(s.x,concat('\\b',r.regex,'\\b')) 
              LIMIT 1) as map
FROM sample_data s

The results ->

enter image description here

Third way: Deduplicated Data

WITH sample_data AS (
  SELECT campaign_name, placement_name
  FROM (SELECT "as_rmkt_asdf" AS campaign_name, "xdd" AS placement_name UNION ALL  -- should map to remekartier
        SELECT "as_asdf", "sdfsdf" UNION ALL -- should map to ali sneider
        SELECT "as_rmkt_dafsd", "sdfg" UNION ALL -- should map to rmkt
        SELECT "asf_adsdf", "gdf" -- should map to NULL (because higher priority)
        )
),
regex_mapping AS (
  SELECT regex, map, priority
  FROM (SELECT "rmkt" AS regex, "remekartier" AS map, 1 AS priority UNION ALL
        SELECT "as" AS regex, "ali sneider" AS map, 2 AS priority)
),

X AS (
SELECT s.*,
       CASE WHEN regexp_contains(s.campaign_name, concat('(^|_)',r.regex,'($|_)')) THEN r.map ELSE NULL END AS map,
       ROW_NUMBER() OVER (PARTITION BY s.campaign_name ORDER BY regexp_contains(s.campaign_name, concat('(^|_)',r.regex,'($|_)')) DESC, r.priority) AS rn
FROM sample_data s
CROSS JOIN regex_mapping r

)

SELECT * EXCEPT (rn)
FROM X
WHERE rn = 1

Upvotes: 1

Related Questions