Reputation: 593
In our database (Oracle), there is a field named CONVERSATION containing speech to text records (formatted as CLOB). After some pre-processing and replacement of unnecessary characters, currently this field has a format as the example below.
I want to split texts of agents and customers into separate columns. And I want them separeted by comma for each part starts with "a:" or "c:". How can I do that?
"a:" stands for agent and "c:" stands for customer
CREATE TABLE TEXT_RECORDS (
CONVERSATION CLOB
);
INSERT INTO TEXT_RECORDS
(CONVERSATION)
VALUES
('a:some text 1 c:some text 2 a:some text 3 c:some text 4 a:some text 5 c:some text 6');
--EDITED (previously it was 'a:some_text_1 c:some_text_2 a:some_text_3 c:some_text_4 a:some_text_5 c:some_text_6')
Desired output as two separate fields:
CONV_AGENT CONV_CUSTOMER
some text 1 ,some text 3, some text 5 some text 2 ,some text 4, some text 6
Upvotes: 0
Views: 295
Reputation: 167981
You can just remove the sub-strings which do not have the correct prefix:
Oracle 11g R2 Schema Setup:
CREATE TABLE TEXT_RECORDS (
CONVERSATION CLOB
);
INSERT INTO TEXT_RECORDS(CONVERSATION)
SELECT 'a:some_text_1 c:some_text_2 a:some_text_3 c:some_text_4 a:some_text_5 c:some_text_6' FROM DUAL UNION ALL
SELECT 'a:some_text_1 a:some_text_2 a:some_text_3' FROM DUAL UNION ALL
SELECT 'c:some_text_1 a:some_text_2 a:some_text_3 c:some_text_4' FROM DUAL;
Query 1:
SELECT REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
conversation,
'.*?(a:(\S+))?(\s|$)', -- Find each word starting with "a:"
'\2, ' -- replace with just that part without prefix
),
'(, ){2,}', -- Replace multiple delimiters
', ' -- With a single delimiter
),
'^, |, $' -- Remove leading and trailing delimiters
) AS conv_agent,
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
conversation,
'.*?(c:(\S+))?(\s|$)', -- Find each word starting with "c:"
'\2, ' -- replace with just that part without prefix
),
'(, ){2,}', -- Replace multiple delimiters
', ' -- With a single delimiter
),
'^, |, $' -- Remove leading and trailing delimiters
) AS conv_customer
FROM text_records
| CONV_AGENT | CONV_CUSTOMER |
|---------------------------------------|---------------------------------------|
| some_text_1, some_text_3, some_text_5 | some_text_2, some_text_4, some_text_6 |
| some_text_1, some_text_2, some_text_3 | |
| some_text_2, some_text_3 | some_text_1, some_text_4 |
Updated - Spaces in conversation sentences
Oracle 11g R2 Schema Setup:
CREATE TABLE TEXT_RECORDS (
CONVERSATION CLOB
);
INSERT INTO TEXT_RECORDS(CONVERSATION)
SELECT 'a:some text 1 c:some text 2 a:some text 3 c:some text 4 a:some text 5 c:some text 6' FROM DUAL UNION ALL
SELECT 'a:some text 1 a:some text 2 a:some text 3' FROM DUAL UNION ALL
SELECT 'c:some text 1 a:some text 2 a:some text 3 c:some text 4' FROM DUAL;
Query 1:
SELECT REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
conversation,
'.*?(a:([^:]*))?(\s|$)',
'\2, '
),
'(, ){2,}',
', '
),
'^, |, $'
) AS conv_agent,
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
conversation,
'.*?(c:([^:]*))?(\s|$)',
'\2, '
),
'(, ){2,}',
', '
),
'^, |, $'
) AS conv_customer
FROM text_records
| CONV_AGENT | CONV_CUSTOMER |
|---------------------------------------|---------------------------------------|
| some text 1, some text 3, some text 5 | some text 2, some text 4, some text 6 |
| some text 1, some text 2, some text 3 | |
| some text 2, some text 3 | some text 1, some text 4 |
Upvotes: 2
Reputation: 1157
You can create two functions, one that get the agent conversation and the other is for customer conversation, see below function to get for agent conversation.
CREATE OR REPLACE FUNCTION get_agent_conv(p_text CLOB) RETURN clob
IS
v_indx NUMBER := 1;
v_agent_conv CLOB;
v_occur NUMBER := 0;
BEGIN
LOOP
v_occur := v_occur + 1;
v_indx := DBMS_LOB.INSTR(p_text, 'a:', 1, v_occur);
v_agent_conv := v_agent_conv||', '||SUBSTR(p_text, v_indx+2, (DBMS_LOB.INSTR(p_text, 'c:', 1, v_occur)-4)-(v_indx-1));
EXIT WHEN v_indx = 0;
END LOOP;
RETURN TRIM(', ' FROM v_agent_conv);
END;
/
SELECT GET_AGENT_CONV(conversation) agent_conversation
FROM text_records;
AGENT_CONVERSATION
-------------------------------------
some_text_1, some_text_3, some_text_5
Upvotes: 0