Library

Firebird Stored Procedure to split line with delimiters to recordset

This is one of the tasks that every developer on Firebird PSQL faces sooner or later - how to split line which contains values with delimiters to the array of separate values? Something like this:


select * from sp_split_into_words('foo rio bar firebird,mysql,postgresql;mssql;redis/rty');
 
Output:
######
 
WORD
==========
foo
rio
bar
firebird
mysql
postgresql
mssql
redis
rty

It is possible to make it on the pure SQL - please use the following stored procedure code, the author is Pavel Zotov, Firebird QA engineer and IBSurgeon lead DBA.
Please note that this procedure contains a wide list of possible delimiters, including space, so if you need only a comma or double dot, you can remove other delimiters from the variable a_del.
Also, pay attention to the size of returning varchar (return variable word) - by default, it is varchar(50), you can adjust it according to your needs.

 

set term ^;
create or alter procedure sp_split_into_words(
    a_text varchar(2048) character set utf8,
    a_dels varchar(20) default ',.<>/?;:''"[]{}`~!@#$%^&*()-_=+\|/',
    a_special char(1) default ' '
)
returns (
  word varchar(50)
) as
begin
-- Aux SP, used only in oltp_data_filling.sql to filling table PATTERNS
-- with miscelan combinations of words to be used in SIMILAR TO testing.
for
    with recursive
    j as( -- loop #1: transform list of delimeters to rows
        select s,1 i, substring(s from 1 for 1) del
        from(
          select replace(:a_dels,:a_special,'') s
          from rdb$database
        )
        
        UNION ALL
        
        select s, i+1, substring(s from i+1 for 1)
        from j
        where substring(s from i+1 for 1)<>''
    )
 
    ,d as(
        select :a_text s, :a_special sp from rdb$database
    )
    ,e as( -- loop #2: perform replacing each delimeter to `space`
        select d.s, replace(d.s, j.del, :a_special) s1, j.i, j.del
        from d join j on j.i=1
 
        UNION ALL
 
        select e.s, replace(e.s1, j.del, :a_special) s1, j.i, j.del
        from e
        -- nb: here 'column unknown: e.i' error will be on old builds of 2.5,
        -- e.g: WI-V2.5.2.26540 (letter from Alexey Kovyazin, 24.08.2014 14:34)
        join j on j.i = e.i + 1
    )
    ,f as(
        select s1 from e order by i desc rows 1
    )
    
    ,r as ( -- loop #3: perform split text into single words
        select iif(t.k>0, substring(t.s from t.k+1 ), t.s) s,
             iif(t.k>0,position( del, substring(t.s from t.k+1 )),-1) k,
             t.i,
             t.del,
             iif(t.k>0,left(t.s, t.k-1),t.s) word
        from(
          select f.s1 s, d.sp del, position(d.sp, s1) k, 0 i from f cross join d
        )t
 
        UNION ALL
 
        select iif(r.k>0, substring(r.s from r.k+1 ), r.s) s,
             iif(r.k>0,position(r.del, substring(r.s from r.k+1 )),-1) k,
             r.i+1,
             r.del,
             iif(r.k>0,left(r.s, r.k-1),r.s) word
        from r
        where r.k>=0
    )
    select word from r where word>''
    into
        word
do
    suspend;
end
^
set term ;^
commit;