Travis
Travis

Reputation: 15

SQL Extracting Data from Column

Looking for some SQL help selecting particular data from a column in my table, and creating new columns with that. Here's a sample of the data in the AttributeDescription column:

AttributeDescription
----------------------------------
Color: black<br />Size: L
Color: black<br />Size: M
Color: black<br />Size: S
Colour: turquoise<br />Size: XXL
Colour: white<br />Size: M
Colour: white<br />Size: XL
Model: iPhone 5/5s
Model: iPhone 6
Size: 2X-Large
Size: Large
Size: Medium
Size: Medium / Small
Size: Small
Size: XL
Size: X-Large
Size: X-Large / Large`

I would like to turn this into three columns named "Variant_Model", "Variant_Size" and "Variant_Color". End result should be like:

Variant_Model |Variant_Size | Variant_Color
--------------+-------------+-------------
              | L           | black
              | M           | black
              | S           | black
              | XXL         | turquoise
              | M           | white
              | XL          | white
iPhone 5/5s   |             |
iPhone 6      |             |
              | 2X-Large    |
              | Large       |
              | Medium      |
              | Medium / Small|
              | Small       |
              | XL          |
              | X-Large     |
              | X-Large / Large|   `

Note that in the original column, the attribute could appear as either "Color" or "Colour".

Bonus points for standardizing the abbreviated sizes (eg. "L", "M") into the appropriate words!

Thanks very much in advance!

Upvotes: 0

Views: 84

Answers (1)

Kashif Qureshi
Kashif Qureshi

Reputation: 1490

One Way to do it is like this

Query:

SELECT Model AS Variant_Model,
       CASE
           WHEN size = 'L'
           THEN 'Large'
           WHEN size = 'M'
           THEN 'Medium'
           WHEN size = 'S'
           THEN 'Small'
           WHEN size = 'XL'
           THEN 'X-Large'
           WHEN size = 'XXL'
           THEN '2X-Large'
           ELSE size
       END AS Variant_Size,
       COALESCE(color, Colour) AS Variant_Color
FROM
(
    SELECT id,
           LEFT(REPLACE(REPLACE(CAST(T2.Loc.query('.') AS    VARCHAR(255)), '<X>', ''), '</X>', ''), CHARINDEX(':', REPLACE(REPLACE(CAST(T2.Loc.query('.') AS VARCHAR(255)), '<X>', ''), '</X>', ''))-1) AS header,
           REVERSE(LEFT(REVERSE(REPLACE(REPLACE(CAST(T2.Loc.query('.') AS VARCHAR(255)), '<X>', ''), '</X>', '')), CHARINDEX(':', REVERSE(REPLACE(REPLACE(CAST(T2.Loc.query('.') AS VARCHAR(255)), '<X>', ''), '</X>', '')))-2)) AS Value
    FROM
    (
        SELECT ROW_NUMBER() OVER(ORDER BY AttributeDescription) AS id,
               CAST(('<X>'+REPLACE(REPLACE(AttributeDescription, '<br />', ','), ',', '</X><X>')+'</X>') AS XML) AS String
        FROM #a
    ) T
    CROSS APPLY String.nodes('/X') AS T2(Loc)
) data PIVOT(MAX(value) FOR header IN(Color,
                                      Size,
                                      Model,
                                      Colour)) p;

Complete Working Script

IF OBJECT_ID('tempdb..#a') IS NOT NULL
    DROP TABLE #a;

CREATE TABLE #a(AttributeDescription VARCHAR(255));

INSERT INTO #a
VALUES('Color: black<br />Size: L');

INSERT INTO #a
VALUES('Color: black<br />Size: M');

INSERT INTO #a
VALUES('Color: black<br />Size: S');

INSERT INTO #a
VALUES('Colour: turquoise<br />Size: XXL');

INSERT INTO #a
VALUES('Colour: white<br />Size: M');

INSERT INTO #a
VALUES('Colour: white<br />Size: XL');

INSERT INTO #a
VALUES('Model: iPhone 5/5s');

INSERT INTO #a
VALUES('Model: iPhone 6');

INSERT INTO #a
VALUES('Size: 2X-Large');

INSERT INTO #a
VALUES('Size: Large');

INSERT INTO #a
VALUES('Size: Medium');

INSERT INTO #a
VALUES('Size: Medium / Small');

INSERT INTO #a
VALUES('Size: Small');

INSERT INTO #a
VALUES('Size: XL');

INSERT INTO #a
VALUES('Size: X-Large');

INSERT INTO #a
VALUES('Size: X-Large / Large');

SELECT Model AS Variant_Model,
       CASE
           WHEN size = 'L'
           THEN 'Large'
           WHEN size = 'M'
           THEN 'Medium'
           WHEN size = 'S'
           THEN 'Small'
           WHEN size = 'XL'
           THEN 'X-Large'
           WHEN size = 'XXL'
           THEN '2X-Large'
           ELSE size
       END AS Variant_Size,
       COALESCE(color, Colour) AS Variant_Color
FROM
(
    SELECT id,
           LEFT(REPLACE(REPLACE(CAST(T2.Loc.query('.') AS    VARCHAR(255)), '<X>', ''), '</X>', ''), CHARINDEX(':', REPLACE(REPLACE(CAST(T2.Loc.query('.') AS VARCHAR(255)), '<X>', ''), '</X>', ''))-1) AS header,
           REVERSE(LEFT(REVERSE(REPLACE(REPLACE(CAST(T2.Loc.query('.') AS VARCHAR(255)), '<X>', ''), '</X>', '')), CHARINDEX(':', REVERSE(REPLACE(REPLACE(CAST(T2.Loc.query('.') AS VARCHAR(255)), '<X>', ''), '</X>', '')))-2)) AS Value
    FROM
    (
        SELECT ROW_NUMBER() OVER(ORDER BY AttributeDescription) AS id,
               CAST(('<X>'+REPLACE(REPLACE(AttributeDescription, '<br />', ','), ',', '</X><X>')+'</X>') AS XML) AS String
        FROM #a
    ) T
    CROSS APPLY String.nodes('/X') AS T2(Loc)
) data PIVOT(MAX(value) FOR header IN(Color,
                                      Size,
                                      Model,
                                      Colour)) p;

Upvotes: 1

Related Questions