Ana
Ana

Reputation: 325

How to retain the same case in the text in Oracle?

I have a table with columns word and sentence. The idea is to replace the words in the sentence with the link(includes the word itself) if the word is found in the word column. The query below replaces perfectly but since the link is constructed from the temp.word column, the case of the words in the sentence is changed to the case of the words in the word column. Is there a way to retain the same case in the sentence itself?

Create table temp(
  id       NUMBER,
  word     VARCHAR2(1000),
  sentence VARCHAR2(2000)
);

insert into temp
SELECT 1,'automation testing', 'automtestingation Testing is popular kind of testing' FROM DUAL UNION ALL
SELECT 2,'testing','manual testing' FROM DUAL UNION ALL
SELECT 3,'manual testing','this is an old method of testing' FROM DUAL UNION ALL
SELECT 4,'punctuation','automation testing,manual testing,punctuation,automanual testing-testing' FROM DUAL UNION ALL
SELECT 5,'B-number analysis','B-number analysis table' FROM DUAL UNION ALL
SELECT 6,'B-number analysis table','testing B-number analysis' FROM DUAL UNION ALL
SELECT 7,'Not Matched','testing testing testing' FROM DUAL;

with words(id, word, word_length, search1, replace1, search2, replace2) as (
  select id, word, length(word),
  '(^|\W)' || REGEXP_REPLACE(word, '([][)(}{|^$\.*+?])', '\\\1') || '($|\W)',
  '\1{'|| id ||'}\2',
  '{'|| id ||'}',
  'http://localhost/' || id || '/<u>' || word || '</u>'
  FROM temp
)
, joined_data as (
  select w.search1, w.replace1, w.search2, w.replace2,
    s.rowid s_rid, s.sentence,
    row_number() over(partition by s.rowid order by word_length desc) rn
  from words w
  join temp s
  on instr(UPPER(s.sentence), UPPER(w.word)) > 0
  and regexp_like(s.sentence, w.search1)
)
, unpivoted_data as (
  select S_RID, SENTENCE, PHASE, SEARCH_STRING, REPLACE_STRING,
    row_number() over(partition by s_rid order by phase, rn) rn,
    case when row_number() over(partition by s_rid order by phase, rn)
      = count(*) over(partition by s_rid)
      then 1
      else 0
    end is_last
  from joined_data
  unpivot(
    (search_string, replace_string) 
    for phase in ( (search1, replace1) as 1, (search2, replace2) as 2 ))
)
, replaced_data(S_RID, RN, is_last, SENTENCE) as (
  select S_RID, RN, is_last,
    regexp_replace(SENTENCE, search_string, replace_string,1,0,'i')
  from unpivoted_data
  where rn = 1
  union all
  select n.S_RID, n.RN, n.is_last,
    case when n.phase = 1
      then regexp_replace(o.SENTENCE, n.search_string, n.replace_string,1,0,'i')
      else replace(o.SENTENCE, n.search_string, n.replace_string)
    end
  from unpivoted_data n
  join replaced_data o
    on o.s_rid = n.s_rid and n.rn = o.rn + 1  
)
select s_rid, sentence from replaced_data
where is_last = 1
order by s_rid;

For example, for id = 1, the sentence is automtestingation Testing is popular kind of testing
After replacement it will be automtestingation http://localhost/2/<u>testing</u> is popular kind of http://localhost/2/<u>testing</u>.

The word Testing is replaced with testing(from the temp.word column).

The expected outcome is

automtestingation http://localhost/2/<u>Testing</u> is popular kind of http://localhost/2/<u>testing</u>

Upvotes: 1

Views: 270

Answers (2)

MT0
MT0

Reputation: 167774

Oracle Setup:

Create table temp(
  id       NUMBER,
  word     VARCHAR2(1000),
  Sentence VARCHAR2(2000)
);

insert into temp
SELECT 1,'automation testing', 'automtestingation TeStInG TEST is popular kind of testing' FROM DUAL UNION ALL
SELECT 2,'testing','manual testing' FROM DUAL UNION ALL
select 2,'test', 'test' FROM DUAL UNION ALL
SELECT 3,'manual testing','this is an old method of testing' FROM DUAL UNION ALL
SELECT 4,'punctuation','automation testing,manual testing,punctuation,automanual testing-testing' FROM DUAL UNION ALL
SELECT 5,'B-number analysis','B-number analysis table' FROM DUAL UNION ALL
SELECT 6,'B-number analysis table','testing B-number analysis' FROM DUAL UNION ALL
SELECT 7,'Not Matched','testing testing testing' FROM DUAL UNION ALL
SELECT 8,'^[($','testing characters ^[($ that need escaping in a regular expression' FROM DUAL;

SQL Types:

CREATE TYPE stringlist IS TABLE OF VARCHAR2(4000);
/
CREATE TYPE intlist IS TABLE OF NUMBER(20,0);
/

PL/SQL Function:

CREATE FUNCTION replace_words(
  word_list IN  stringlist,
  id_list   IN  intlist,
  sentence  IN  temp.sentence%TYPE
) RETURN temp.sentence%TYPE
IS
  p_sentence       temp.sentence%TYPE := UPPER( sentence );
  p_pos            PLS_INTEGER := 1;
  p_min_word_index PLS_INTEGER;
  p_word_index     PLS_INTEGER;
  p_start          PLS_INTEGER;
  p_index          PLS_INTEGER;
  o_sentence       temp.sentence%TYPE;
BEGIN
  LOOP
    p_min_word_index := NULL;
    p_index          := NULL;
    FOR i IN 1 .. word_list.COUNT LOOP
      p_word_index := p_pos;
      LOOP
        p_word_index := INSTR( p_sentence, word_list(i), p_word_index );
        EXIT WHEN p_word_index = 0;
        IF (   p_word_index  > 1
           AND REGEXP_LIKE( SUBSTR( p_sentence, p_word_index - 1, 1 ), '\w' )
           )
           OR  REGEXP_LIKE( SUBSTR( p_sentence, p_word_index + LENGTH( word_list(i) ), 1 ), '\w' )
        THEN
           p_word_index := p_word_index + 1;
           CONTINUE;
        END IF;
        IF p_min_word_index IS NULL OR p_word_index < p_min_word_index THEN
          p_min_word_index := p_word_index;
          p_index := i;
        END IF;
        EXIT;
      END LOOP;
    END LOOP;
    IF p_index IS NULL THEN
      o_sentence := o_sentence || SUBSTR( sentence, p_pos );
      EXIT;
    ELSE
      o_sentence := o_sentence
                    || SUBSTR( sentence, p_pos, p_min_word_index - p_pos )
                    || 'http://localhost/'
                    || id_list(p_index)
                    || '/<u>'
                    || SUBSTR( sentence, p_min_word_index, LENGTH( word_list( p_index ) ) )
                    || '</u>';
      p_pos := p_min_word_index + LENGTH( word_list( p_index ) );
    END IF;
  END LOOP;
  RETURN o_sentence;
END;
/

Merge:

MERGE INTO temp dst
USING (
  WITH lists ( word_list, id_list ) AS (
    SELECT CAST(
             COLLECT(
               UPPER( word )
               ORDER BY LENGTH( word ) DESC, UPPER( word ) ASC, ROWNUM
             )
             AS stringlist
           ),
           CAST(
             COLLECT(
               id
               ORDER BY LENGTH( word ) DESC, UPPER( word ) ASC, ROWNUM
             )
             AS intlist
           )
    FROM   temp
  )
  SELECT t.ROWID rid,
         replace_words(
           word_list,
           id_list,
           sentence
         ) AS replaced_sentence
  FROM   temp t
         CROSS JOIN lists
) src
ON ( dst.ROWID = src.RID )
WHEN MATCHED THEN
  UPDATE SET sentence = src.replaced_sentence;

Output:

SELECT * FROM temp;
ID | WORD                    | SENTENCE                                                                                                                                                                                                  
-: | :---------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 1 | automation testing      | automtestingation http://localhost/2/<u>TeStInG</u> http://localhost/2/<u>TEST</u> is popular kind of http://localhost/2/<u>testing</u>                                                                   
 2 | testing                 | http://localhost/3/<u>manual testing</u>                                                                                                                                                                  
 2 | test                    | http://localhost/2/<u>test</u>                                                                                                                                                                            
 3 | manual testing          | this is an old method of http://localhost/2/<u>testing</u>                                                                                                                                                
 4 | punctuation             | http://localhost/1/<u>automation testing</u>,http://localhost/3/<u>manual testing</u>,http://localhost/4/<u>punctuation</u>,automanual http://localhost/2/<u>testing</u>-http://localhost/2/<u>testing</u>
 5 | B-number analysis       | http://localhost/6/<u>B-number analysis table</u>                                                                                                                                                         
 6 | B-number analysis table | http://localhost/2/<u>testing</u> http://localhost/5/<u>B-number analysis</u>                                                                                                                             
 7 | Not Matched             | http://localhost/2/<u>testing</u> http://localhost/2/<u>testing</u> http://localhost/2/<u>testing</u>                                                                                                     
 8 | ^[($                    | http://localhost/2/<u>testing</u> characters http://localhost/8/<u>^[($</u> that need escaping in a regular expression                                                                                    

db<>fiddle here

Upvotes: 1

Jon Heller
Jon Heller

Reputation: 36798

While there's certainly a way to do this in a single SQL statement, I think this problem is better solved with a separate function:

create or replace function replace_words(p_word varchar2, p_sentence varchar2) return varchar2 is
    v_match_position number;
    v_match_count    number := 0;
    v_new_sentence   varchar2(4000) := p_sentence;
begin
    --Find all matches.
    loop
        --Find Nth case-insensitive match
        v_match_count := v_match_count + 1;
        v_match_position := regexp_instr(
            srcstr     => v_new_sentence,
            pattern    => p_word,
            occurrence => v_match_count,
            modifier   => 'i');

        exit when v_match_position = 0;

        --Insert text, instead of replacing, to keep the original case.
        v_new_sentence :=
            substr(v_new_sentence, 1, v_match_position - 1) || 'http://localhost/2/<u>'||
            substr(v_new_sentence, v_match_position, length(p_word)) || '</u>'||
            substr(v_new_sentence, v_match_position + length(p_word));

    end loop;

    return v_new_sentence;
end;
/

Then the SQL query looks like this:

select id, word, sentence, replace_words(word, sentence) from temp;

Upvotes: 0

Related Questions