Reputation: 21641
I've a text file containing large number of queries. I want to get all the distinct tables used in the entire file in all the queries. The table name can come after a 'from' or 'join'. How can i extract them by doing a regex match. Can anyone suggest a regular expression to get the matches?
Upvotes: 10
Views: 16421
Reputation: 11
Solutions that can help you.
Regular expression
/(from|join|into)\s+([`]\w+.*[`] *\w+|(\[)\w+.*(\]) *\w+|\w*\.*\w+ *\w+)/g
Regular expression
/(from|join|into)\s+([`]\w+.+\w+\s*[`]|(\[)\w+.+\w+\s*(\])|\w+\s*\.+\s*\w*|\w+\b)/g
Test string
-------------------------------------------------------
select * into [dbo].[table_temp]
from [dbo].[table_a] a inner join dbo.table_b b ...
join table_c c on ...
from dbo.table_d d ...
from `dbo`.`table_e` e ...
from table_f f ...
-------------------------------------------------------
Generated Code for C#
using System;
using System.Text.RegularExpressions;
public static class QueryExtension
{
public static List<string> GetTables(this string query)
{
List<string> tables = new List<string>();
string pattern = @"(from|join|into)\s+([`]\w+.+\w+\s*[`]|(\[)\w+.+\w+\s*(\])|\w+\s*\.+\s*\w*|\w+\b)";
foreach (Match m in Regex.Matches(query, pattern))
{
string name = m.Groups[2].Value;
tables.Add(name);
}
return tables;
}
public static string Join(this IEnumerable<string> values, string separator) {
return string.Join(separator, values);
}
}
How to use it.
string input = @"select * into [dbo].[table_temp]
from [dbo].[table_a] a inner join dbo.table_b b ...
join table_c c on ...
from dbo.table_d d ...
from `dbo`.`table_e` e ...
from table_f f ...";
Console.WriteLine(input.GetTables().Join("\n"));
Output
[dbo].[table_temp]
[dbo].[table_a]
dbo.table_b
table_c
dbo.table_d
`dbo`.`table_e`
table_f
Regular expression
/(\w*\.*\w+|`\w*.*\w`|(\[)\w*.*(\]))+(,|\s+,|\s+FROM|\s+from)/g
Test string
-------------------------------------------------------
SELECT
[a].[column_1],
`b`.`column_2`,
c.column_3,
col4 as column_4,
col5 as `column_5`,
col6 as [column_6],
column_7,
a.col8 column_8,
(select max(column_x) from table_d where column_y in ('1','2','3')) as column_9
from table_a a
inner join table_b b on ...
inner join table_c c on ...
-------------------------------------------------------
Generated code for C#
public static class QueryExtension
{
public static List<string> GetColumns(this string query)
{
List<string> columns = new List<string>();
string pattern = @"(\w*\.*\w+|`\w*.*\w`|(\[)\w*.*(\]))+(,|\s+,|\s+FROM|\s+from)";
foreach (Match m in Regex.Matches(query, pattern))
{
string name = m.Groups[1].Value;
columns.Add(name);
}
return columns;
}
public static string Join(this IEnumerable<string> values, string separator) {
return string.Join(separator, values);
}
}
How to use it
string input1 = @"SELECT
[a].[column_1],
`b`.`column_2`,
c.column_3,
col4 as column_4,
col5 as `column_5`,
col6 as [column_6],
column_7,
a.col8 column_8,
(select max(column_x) from table_d where column_y in ('1','2','3')) as column_9
from table_a a
inner join table_b b on ...
inner join table_c c on ...
";
Console.WriteLine(input1.GetColumns().Join("\n"));
Output
[a].[column_1]
`b`.`column_2`
c.column_3
column_4
`column_5`
[column_6]
column_7
column_8
column_9
Regular Expression Language - Quick Reference
Simplified Regular Expressions
Try regular expressions online
Upvotes: 1
Reputation: 33
can try this but it doesnt work for all the types of query,
public void Main()
{
// TODO: Add your code here
string Line = string.Empty;
using (StreamReader sr = new StreamReader(@"D:\ssis\queryfile.txt"))//reading the filename
{
var text = string.Empty;
do
{
// MessageBox.Show(Line);
text = Line = sr.ReadToEnd();// storing it in a variable by reading till end
MessageBox.Show(Line);
} while ((Line = sr.ReadLine()) != null);
var text1 = text.Replace("[", string.Empty).Replace("]", string.Empty);//replacing brackets with empty space
MessageBox.Show(text1);
Regex r = new Regex(@"(?<=from|join)\s+(?<table>\S+)", RegexOptions.IgnoreCase | RegexOptions.Compiled);//regex for extracting the tablename after from and join
Match m = r.Match(text1);//creating match object
MessageBox.Show(m.Groups[1].Value);
var v = string.Empty;
while (m.Success)
{
v = m.Groups[0].Value;
m = m.NextMatch();
StreamWriter wr = new StreamWriter(@"D:\ssis\writefile.txt", true);// writing the match to the file
var text2 = v.Replace(".", " ,"); // replace the . with , seperated values
wr.WriteLine(text2);
sr.Close();
wr.Close();
}
}
}
Upvotes: 0
Reputation: 11797
I'd use:
r = new Regex("(from|join)\s+(?<table>\S+)", RegexOptions.IgnoreCase);
once you have the Match object "m", you'll have the table name with
m.Groups["table"].Value
example:
string line = @"select * from tb_name join tb_name2 ON a=b WHERE x=y";
Regex r = new Regex(@"(from|join)\s+(?<table>\S+)",
RegexOptions.IgnoreCase|RegexOptions.Compiled);
Match m = r.Match(line);
while (m.Success) {
Console.WriteLine (m.Groups["table"].Value);
m = m.NextMatch();
}
it will print: tb_table tb_table2
Upvotes: 8
Reputation: 1020
It depends on structure of your file. Try to use this:
(?<=from|join)(\s+\w+\b)
Also turn on options Multiline if your not split your file in array or smth else with singleline string members. Also try to turn on IgnorCase option.
Upvotes: 11
Reputation: 20721
Something like this maybe:
/(from|join)\s+(\w*\.)*(?<tablename>\w+)/
It won't match escaped table names though, and you need to make the regex evaluation case-insensitive.
Upvotes: 1