Reputation: 39
I have a Textcol that looks like this:
example of Text:
Note:test note
Phone Call: Fairview Wines & Spirits
Number Called: 1 604 601-8000
Phone Description: Main
Subject: Arrange meeting
Result: Arranged interview
Duration: 00:00:09
I want to get the subject, Result and note into four separate columns if possible.. I have tried two approached:
First example:
SELECT "Note"=SUBSTRING(TextCol,1,25),"Subject"=SUBSTRING(TextCol,110,25)
,"Result"=SUBSTRING(TextCol,135,33) FROM AMGR_Notes where type = 2
Second example:
SELECT
RTRIM(LTRIM(REPLACE(REPLACE(SUBSTRING(T.TextCol, A.SubjectStart + 8, A.ResultStart - A.SubjectStart - 8), CHAR(10), ''), CHAR(13), ''))) AS [Subject]
,RTRIM(LTRIM(REPLACE(REPLACE(SUBSTRING(T.TextCol, A.ResultStart + 7, A.DurationStart - A.ResultStart - 7), CHAR(10), ''), CHAR(13), ''))) AS Result
,RTRIM(LTRIM(REPLACE(REPLACE(SUBSTRING(T.TextCol, 6, A.PhoneStart - 6), CHAR(10), ''), CHAR(13), ''))) AS Note
FROM AMGR_Notes T
CROSS APPLY
(
VALUES
(
CHARINDEX('Phone Call:', T.TextCol)
,CHARINDEX('Subject:', T.TextCol)
,CHARINDEX('Result:', T.TextCol)
,CHARINDEX('Duration:', T.TextCol)
)
) A (PhoneStart, SubjectStart, ResultStart, DurationStart);
The second example gives an error:
Msg 537, Level 16, State 3, Line 1 Invalid length parameter passed to the LEFT or SUBSTRING function.
I am not a super genuis when it comes to SQL, so i dont understand the error. I am also wondering if a CASE statement might work better in this instance
Any help would be greatly appreciated
Upvotes: 0
Views: 109
Reputation: 7918
I think this can be done super clean and efficiently just using CROSS APPLY. Here's some on-the-fly inline function logic to handle a single string (forgive my naming - I'm in a hurry):
DECLARE @string varchar(8000) =
'Phone Call: Fairview Wines & Spirits
Number Called: 1 604 601-8000
Phone Description: Main
Subject: Arrange meeting
Result: Arranged interview
Duration: 00:00:09';
SELECT
[Phone Call] = MAX(CASE s.n WHEN 1 THEN here.yougo END),
[Subject] = MAX(CASE s.n WHEN 2 THEN here.yougo END),
[Result] = MAX(CASE s.n WHEN 3 THEN here.yougo END),
[Duration] = MAX(CASE s.n WHEN 4 THEN here.yougo END)
FROM (VALUES (@string)) t(s) -- target.string
CROSS APPLY (VALUES (1,'Phone Call:'),(2,'Subject:'),(3,'Result:'),(4,'Duration:')) s(n,t) -- search.text
CROSS APPLY (VALUES (CHARINDEX(s.t, t.s), LEN(s.t))) st(l,ln) -- start.location, start.length
CROSS APPLY (VALUES (ISNULL(NULLIF(CHARINDEX(char(10), t.s, st.l+st.ln+1),0),8000))) e(ln) -- end line
CROSS APPLY (VALUES (SUBSTRING(t.s, st.l+st.ln, e.ln-(st.l+st.ln)))) here(yougo);
This returns:
Phone Call Subject Result Duration
-------------------------- ---------------- ------------------- ---------
Fairview Wines & Spirits Arrange meeting Arranged interview 00:00:09
The execution plan is fantastic - a total subtree cost of 0.0000071 (on my PC). To apply this logic to a table you would do this:
DECLARE @AMGR_Notes TABLE (someid int identity primary key, somestring varchar(8000));
INSERT @AMGR_Notes VALUES
('Phone Call: ACME Treats
Number Called: 1 604 555-9988
Phone Description: Old School Landline
Subject: Buy Ice Cream
Result: He sold me some
Duration: 00:00:01'),
('Phone Call: Fairview Wines & Spirits
Number Called: 1 604 601-8000
Phone Description: Main
Subject: Arrange meeting
Result: Arranged interview
Duration: 00:00:09');
SELECT
t.someid,
[Phone Call] = MAX(CASE s.n WHEN 1 THEN here.yougo END),
[Subject] = MAX(CASE s.n WHEN 2 THEN here.yougo END),
[Result] = MAX(CASE s.n WHEN 3 THEN here.yougo END),
[Duration] = MAX(CASE s.n WHEN 4 THEN here.yougo END)
FROM @AMGR_Notes tt
CROSS APPLY (VALUES (tt.someid,tt.somestring)) t(someid,s) -- target.string
CROSS APPLY (VALUES (1,'Phone Call:'),(2,'Subject:'),(3,'Result:'),(4,'Duration:')) s(n,t) -- search.text
CROSS APPLY (VALUES (CHARINDEX(s.t, t.s), LEN(s.t))) st(l,ln) -- start.location, start.length
CROSS APPLY (VALUES (ISNULL(NULLIF(CHARINDEX(char(10), t.s, st.l+st.ln+1),0),8000))) e(ln) -- end line
CROSS APPLY (VALUES (SUBSTRING(t.s, st.l+st.ln, e.ln-(st.l+st.ln)))) here(yougo)
GROUP BY t.someid;
The MAX-CASE logic is another way to pivot but is cleaner and easier IMO (Credit to Jeff Moden for teaching me the technique.) The key for performance here (also true for using pivot) would be to make sure there is an index on the column which you are grouping by which, in my example, is the someid column.
Upvotes: 1
Reputation: 67311
Try it like this:
DECLARE @tbl TABLE(ID INT IDENTITY, TextCol VARCHAR(500));
INSERT INTO @tbl VALUES
('Phone Call: Fairview Wines & Spirits
Number Called: 1 604 601-8000
Phone Description: Main
Subject: Arrange meeting
Result: Arranged interview
Duration: 00:00:09
');
WITH RowWise AS
(
SELECT CAST('<x>' + REPLACE((SELECT REPLACE(TextCol,CHAR(13),'') AS [*] FOR XML PATH('')),CHAR(10),'</x><x>') + '</x>' AS XML) AS OneRow
FROM @tbl
)
,EachRow AS
(
SELECT LTRIM(RTRIM(r.value('text()[1]','nvarchar(max)'))) AS RowText
FROM RowWise
CROSS APPLY OneRow.nodes('/x[text()]') AS A(r)
)
SELECT LEFT(RowText,CHARINDEX(':',RowText)-1)
,SUBSTRING(RowText,CHARINDEX(':',RowText)+1,1000)
FROM EachRow;
The result
Phone Call Fairview Wines & Spirits
Number Called 1 604 601-8000
Phone Description Main
Subject Arrange meeting
Result Arranged interview
Duration 00:00:09
Added PIVOT
to get this in one line
WITH PlainLineBreak AS
(
SELECT ID
,REPLACE(TextCol,CHAR(13),CHAR(10)) AS TextCol
FROM @tbl
)
,LineWise AS
(
SELECT CAST('<x>' + REPLACE((SELECT TextCol AS [*] FOR XML PATH('')),CHAR(10),'</x><x>') + '</x>' AS XML) AS OneLine
FROM PlainLineBreak
)
,EachLine AS
(
SELECT LTRIM(RTRIM(r.value('text()[1]','nvarchar(max)'))) AS LineText
FROM LineWise
CROSS APPLY OneLine.nodes('/x[text()]') AS A(r)
)
SELECT p.*
FROM
(
SELECT LEFT(LineText,CHARINDEX(':',LineText)-1) AS ColumnName
,SUBSTRING(LineText,CHARINDEX(':',LineText)+1,1000) AS ColumnValue
FROM EachLine
) AS t
PIVOT
(
MAX(ColumnValue) FOR ColumnName IN([Note],[Phone Call],[Number Called],[Phone Description],[Subject],[Result],[Duration])
) As p
the result
+-----------+--------------------------+-----------------+-------------------+-----------------+--------------------+----------+
| Note | Phone Call | Number Called | Phone Description | Subject | Result | Duration |
+-----------+--------------------------+-----------------+-------------------+-----------------+--------------------+----------+
| test note | Fairview Wines & Spirits | 1 604 601-8000 | Main | Arrange meeting | Arranged interview | 00:00:09 |
+-----------+--------------------------+-----------------+-------------------+-----------------+--------------------+----------+
Upvotes: 0
Reputation: 39
Figured it out with a case statement as below:
SELECT
CASE
WHEN A.ResultStart - A.SubjectStart >= 8
THEN RTRIM(LTRIM(REPLACE(REPLACE(SUBSTRING(T.TextCol, A.SubjectStart + 8, A.ResultStart - A.SubjectStart - 8), CHAR(10), ''), CHAR(13), '')))
ELSE ''
END
AS [Subject],
CASE
WHEN A.DurationStart - A.ResultStart >= 7
THEN RTRIM(LTRIM(REPLACE(REPLACE(SUBSTRING(T.TextCol, A.ResultStart + 7, A.DurationStart - A.ResultStart - 7), CHAR(10), ''), CHAR(13), '')))
ELSE ''
END
AS Result,
CASE
WHEN A.DurationStart - A.ResultStart >= 7
THEN RTRIM(LTRIM(REPLACE(REPLACE(SUBSTRING(T.TextCol, A.DurationStart + 9, A.DurationStart - A.ResultStart - 9), CHAR(10), ''), CHAR(13), '')))
ELSE ''
END
AS Duration,
CASE
WHEN A.PhoneStart >= 6
THEN RTRIM(LTRIM(REPLACE(REPLACE(SUBSTRING(T.TextCol, 6, A.PhoneStart
- 6), CHAR(10), ''), CHAR(13), '')))
ELSE ''
END
AS Note
FROM AMGR_Notes T
CROSS APPLY (
VALUES
(
CHARINDEX('Phone Call:', T.TextCol)
,CHARINDEX('Subject:', T.TextCol)
,CHARINDEX('Result:', T.TextCol)
,CHARINDEX('Duration:', T.TextCol)
)
) A (PhoneStart, SubjectStart, ResultStart, DurationStart)
where t.type = '2'
Upvotes: 1
Reputation: 1269853
The simplest way may be:
FROM AMGR_Notes T CROSS APPLY
(VALUES (NULLIF(CHARINDEX('Phone Call:', T.TextCol), 0)
NULLIF(CHARINDEX('Subject:', T.TextCol), 0),
NULLIF(CHARINDEX('Result:', T.TextCol), 0),
NULLIF(CHARINDEX('Duration:', T.TextCol), 0)
)
) A(PhoneStart, SubjectStart, ResultStart, DurationStart);
Upvotes: -1