Reputation: 1
I am currently trying to get better on AdventureWorks but I've got one issue. Indeed, I would like to use the data from the column "Demographics" on the table Sales_Store but I don't know how I can separate the different elements. Example of one record below:
\<StoreSurvey xmlns="http://schemas.microsoft.com/sqlserver/2004/07/adventure-works/StoreSurvey"\>
\<AnnualSales\>800000\</AnnualSales\>
\<AnnualRevenue\>80000\</AnnualRevenue\>
\<BankName\>United Security\</BankName\>
\<BusinessType\>BM\</BusinessType\>
\<YearOpened\>1996\</YearOpened\>
\<Specialty\>Mountain\</Specialty\>
\<SquareFeet\>21000\</SquareFeet\>
\<Brands\>2\</Brands\>
\<Internet\>ISDN\</Internet\>
\<NumberEmployees\>13\</NumberEmployees\>
\</StoreSurvey\>
I am working with access but i can also use MySQL.
I already use the the function Split but i don't know why it did not work at all.
SELECT
Split(Split(Demographics, '<')(1), '>')(0) AS AnnualSales,
Split(Split(Demographics, '<AnnualRevenue>')(1), '</AnnualRevenue>')(0) AS AnnualRevenue,
Split(Split(Demographics, '<BankName>')(1), '</BankName>')(0) AS BankName,
Split(Split(Demographics, '<BusinessType>')(1), '</BusinessType>')(0) AS BusinessType,
Split(Split(Demographics, '<YearOpened>')(1), '</YearOpened>')(0) AS YearOpened,
Split(Split(Demographics, '<Specialty>')(1), '</Specialty>')(0) AS Specialty,
Split(Split(Demographics, '<SquareFeet>')(1), '</SquareFeet>')(0) AS SquareFeet,
Split(Split(Demographics, '<Brands>')(1), '</Brands>')(0) AS Brands,
Split(Split(Demographics, '<Internet>')(1), '</Internet>')(0) AS Internet,
Split(Split(Demographics, '<NumberEmployees>')(1), '</NumberEmployees>')(0) AS NumberEmployees
FROM
Sales_Store
SELECT
ExtractValue(Demographics, 'StoreSurvey/AnnualSales') AS AnnualSales,
ExtractValue(Demographics, 'StoreSurvey/AnnualRevenue') AS AnnualRevenue,
ExtractValue(Demographics, 'StoreSurvey/BankName') AS BankName,
ExtractValue(Demographics, 'StoreSurvey/BusinessType') AS BusinessType,
ExtractValue(Demographics, 'StoreSurvey/YearOpened') AS YearOpened,
ExtractValue(Demographics, 'StoreSurvey/Specialty') AS Specialty,
ExtractValue(Demographics, 'StoreSurvey/SquareFeet') AS SquareFeet,
ExtractValue(Demographics, 'StoreSurvey/Brands') AS Brands,
ExtractValue(Demographics, 'StoreSurvey/Internet') AS Internet,
ExtractValue(Demographics, 'StoreSurvey/NumberEmployees') AS NumberEmployees
FROM
Sales_Store
The link for the database is right there https://github.com/Microsoft/sql-server-samples/releases/download/adventureworks/AdventureWorks2016.bak
Upvotes: 0
Views: 156
Reputation: 1
WITH CustomerInfo AS (
SELECT
C.CustomerID,
EXTRACT(YEAR FROM CURRENT_DATE()) - EXTRACT(YEAR FROM SAFE_CAST(LEFT(REGEXP_EXTRACT(C.demographics, r'<BirthDate>([^<]+)</BirthDate>'), LENGTH(REGEXP_EXTRACT(C.demographics, r'<BirthDate>([^<]+)</BirthDate>')) - 1) AS DATE)) AS Age,
REGEXP_EXTRACT(C.demographics, r'<Gender>([^<]+)</Gender>') AS Gender,
REGEXP_EXTRACT(C.demographics, r'<TotalPurchaseYTD>([^<]+)</TotalPurchaseYTD>') AS TotalPurchaseYTD,
REGEXP_EXTRACT(C.demographics, r'<MaritalStatus>([^<]+)</MaritalStatus>') AS MaritalStatus,
REGEXP_EXTRACT(C.demographics, r'<YearlyIncome>([^<]+)</YearlyIncome>') AS YearlyIncome,
REGEXP_EXTRACT(C.demographics, r'<TotalChildren>([^<]+)</TotalChildren>') AS TotalChildren,
REGEXP_EXTRACT(C.demographics, r'<NumberChildrenAtHome>([^<]+)</NumberChildrenAtHome>') AS NumberChildrenAtHome,
REGEXP_EXTRACT(C.demographics, r'<Education>([^<]+)</Education>') AS Education,
REGEXP_EXTRACT(C.demographics, r'<Occupation>([^<]+)</Occupation>') AS Occupation,
REGEXP_EXTRACT(C.demographics, r'<HomeOwnerFlag>([^<]+)</HomeOwnerFlag>') AS HomeOwnerFlag,
REGEXP_EXTRACT(C.demographics, r'<NumberCarsOwned>([^<]+)</NumberCarsOwned>') AS NumberCarsOwned,
REGEXP_EXTRACT(C.demographics, r'<CommuteDistance>([^<]+)</CommuteDistance>') AS CommuteDistance
FROM
`adwentureworks_db.individual` C
)
SELECT
O.CustomerID,
con.Firstname,
con.LastName,
CustomerInfo.Gender,
CASE
WHEN CustomerInfo.Age >= 18 AND CustomerInfo.Age <= 30 THEN '18–30'
WHEN CustomerInfo.Age > 30 AND CustomerInfo.Age <= 45 THEN '31–45'
WHEN CustomerInfo.Age > 45 AND CustomerInfo.Age <= 60 THEN '46–60'
ELSE 'Above 60'
END AS AgeGroup,
COUNT(*) AS TotalCustomers,
SUM(O.TotalDue) AS TotalRevenue,
CustomerInfo.TotalPurchaseYTD,
CustomerInfo.MaritalStatus,
CustomerInfo.YearlyIncome,
CustomerInfo.TotalChildren,
CustomerInfo.NumberChildrenAtHome,
CustomerInfo.Education,
CustomerInfo.Occupation,
CustomerInfo.HomeOwnerFlag,
CustomerInfo.NumberCarsOwned,
CustomerInfo.CommuteDistance
FROM
CustomerInfo
JOIN
`adwentureworks_db.salesorderheader` O
ON
CustomerInfo.CustomerID = O.CustomerID
JOIN
adwentureworks_db.individual ind
ON
O.CustomerID = ind.CustomerID
JOIN
adwentureworks_db.contact con
ON
ind.ContactID = con.ContactID
GROUP BY
con.Firstname,
con.LastName,
O.CustomerID,
CustomerInfo.Gender,
AgeGroup,
CustomerInfo.TotalPurchaseYTD,
CustomerInfo.MaritalStatus,
CustomerInfo.YearlyIncome,
CustomerInfo.TotalChildren,
CustomerInfo.NumberChildrenAtHome,
CustomerInfo.Education,
CustomerInfo.Occupation,
CustomerInfo.HomeOwnerFlag,
CustomerInfo.NumberCarsOwned,
CustomerInfo.CommuteDistance
ORDER BY
CustomerInfo.Gender,
AgeGroup;
Upvotes: 0