Tamir Klein
Tamir Klein

Reputation: 3642

How to split field with dynamic delimiter

I have a table in BigQuery with contact emails.

[email protected]

[email protected]

[email protected]

I need to extract name and family to separate columns. I wrote this SQL code but looking for other/better ways to do this


WITH emailWithUnserscore AS
      (SELECT *,
              SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'_')[SAFE_OFFSET(0)] AS firstName,
              SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'_')[SAFE_OFFSET(1)] AS lasttName
       FROM `project.dataset.contacts`
       WHERE LENGTH(SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'_')[SAFE_OFFSET(1)]) > 0 ),
         emailWithMinus AS
      (SELECT *,
              SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'-')[SAFE_OFFSET(0)] AS firstName,
              SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'-')[SAFE_OFFSET(1)] AS lasttName
       FROM `project.dataset.contacts`
       WHERE LENGTH(SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'-')[SAFE_OFFSET(1)]) > 0 ),
         emailWithDot AS
      (SELECT *,
              SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'.')[SAFE_OFFSET(0)] AS firstName,
              SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'.')[SAFE_OFFSET(1)] AS lasttName
       FROM `project.dataset.contacts`
       WHERE LENGTH(SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'.')[SAFE_OFFSET(1)]) > 0 ),
         allEmails AS
      (SELECT *,
              SPLIT(string_field_0, '@')[SAFE_OFFSET(0)] AS firstName,
              '' AS lasttName
       FROM `project.dataset.contacts`)
    SELECT allEmails.string_field_0 AS Email,
           if(LENGTH(emailWithUnserscore.lasttName) > 0, emailWithUnserscore.firstName, if(LENGTH(emailWithMinus.lasttName) > 0, emailWithMinus.firstName, if(LENGTH(emailWithDot.lasttName) > 0, emailWithDot.firstName, allEmails.firstName))) AS firstName,
           if(LENGTH(emailWithUnserscore.lasttName) > 0, emailWithUnserscore.lasttName, if(LENGTH(emailWithMinus.lasttName) > 0, emailWithMinus.lasttName, if(LENGTH(emailWithDot.lasttName) > 0, emailWithDot.lasttName, allEmails.lasttName))) AS lastName
    FROM allEmails
    LEFT JOIN emailWithUnserscore ON allEmails.string_field_0 = emailWithUnserscore.string_field_0
    LEFT JOIN emailWithMinus ON allEmails.string_field_0 = emailWithMinus.string_field_0
    LEFT JOIN emailWithDot ON allEmails.string_field_0 = emailWithDot.string_field_0
    ORDER BY Email DES

Upvotes: 1

Views: 228

Answers (1)

Mikhail Berlyant
Mikhail Berlyant

Reputation: 173028

#standardSQL
WITH `project.dataset.contacts` AS (
  SELECT '[email protected]' email UNION ALL
  SELECT '[email protected]' UNION ALL
  SELECT '[email protected]' 
)
SELECT 
  email,
  REGEXP_EXTRACT(email, r'(.*?)[_\-.]') firstName,
  REGEXP_EXTRACT(email, r'[_\-.](.*?)@') lastName
FROM `project.dataset.contacts`

result

Row     email                       firstName   lastName     
1       [email protected]     name        family   
2       [email protected]     name        family   
3       [email protected]     name        family   

Upvotes: 2

Related Questions