Reputation: 1
I am new to SQL and trying to speed up a query used multiple times in a stored procedure. I am trying to match the names of places using a query. Some of the places have names of different length for example the same address may be called 'McDonalds Riccarton' in one table and 'Mac Donalds Riccarton' in another.
I have separated each of the words in each table into individual columns ie 'McDonalds, Riccarton' and 'Mac, Donalds, Riccarton'. These are called first for first word, second for second the second word etc... as you will see in my query
By using direct comparions or using soundex I am trying to match them based on loose matching terms for example where the soundex (mac) = soundex(macdonalds) and soundex (riccarton) = soundex( riccarton) they should be the same.
The query tries to match first word to all other columns ie first match first or second or third or fourth or last... last is the last word or any groups of words that are longer than the fourth word for long sets of names..
I am happy with my current query except that it is a little slow. When matching tables with 50 names in one and 600 names in another it takes roughly 2 mins. it obviously must be trying to match in each of the 'or' loops until it falls over and then tries the next loop... making it slow. I would like to avoid cursors if possible. Cheers in advance.
insert into
dbo.MatchedCulture_Recreation_Sports
select
loc.id,
loc.name,
cpn.cpn_id,
cpn.cpn_name
from
#NZF_CPN_Culture_Recreation_Sports cpn
inner join #Locations_Culture_Recreation_Sports loc
on
(
--where they match in string size see if the names match exactly
(
cpn.stringsize = loc.stringsize and
cpn.first = loc.first and
loc.last = cpn.last and
cpn.second = loc.second and
cpn.third = loc.third and
cpn.fourth = loc.Fourth
)
or
--or where they arent equal and the name isnt one word
(
cpn.stringsize <> loc.stringsize and
cpn.stringsize <> 1 and
loc.stringsize <>1 and
(
-- see if the first word matches anything
cpn.first = loc.first or
cpn.first = loc.second or
cpn.first = loc.third or
cpn.first = loc.fourth or
cpn.first = loc.last
)
and
-- and the last word matches anything
(
cpn.last = loc.first or
cpn.last = loc.second or
cpn.last = loc.third or
cpn.last = loc.fourth or
cpn.last = loc.last
)
and
-- and the sec matches anything
(
cpn.second = loc.first or
cpn.second = loc.second or
cpn.second = loc.third or
cpn.second = loc.fourth or
cpn.second = loc.last
)
-- or if the there are 3 words in one and 2 words in another try soundex
)
or
(
cpn.stringsize = 2 and
loc.stringsize = 3 and
cpn.stringsize <> 1 and
loc.stringsize <> 1 and
(
SOUNDEX(cpn.first) = SOUNDEX(loc.first) or
SOUNDEX(cpn.first) = SOUNDEX(loc.second) or
SOUNDEX(cpn.first) = SOUNDEX(loc.last)
)
and
(
SOUNDEX(cpn.last) = SOUNDEX(loc.first) or
SOUNDEX(cpn.last) = SOUNDEX(loc.second) or
SOUNDEX(cpn.last) = SOUNDEX(loc.last)
)
)
-- or if the there are 3 words in the other and 2 words in one try soundex
or
(
cpn.stringsize = 3 and
loc.stringsize = 2 and
cpn.stringsize <> 1 and
loc.stringsize <> 1 and
(
SOUNDEX(loc.first) = SOUNDEX(cpn.first) or
SOUNDEX(loc.first) = SOUNDEX(cpn.second) or
SOUNDEX(loc.first) = SOUNDEX(cpn.last)
)
and
(
SOUNDEX(loc.last) = SOUNDEX(cpn.first) or
SOUNDEX(loc.last) = SOUNDEX(cpn.second) or
SOUNDEX(loc.last) = SOUNDEX(cpn.last)
)
)
or
-- or if the there are more than 3 words in one and 3 words in another try soundex
(
cpn.stringsize <3 and
loc.stringsize = 3 and
cpn.stringsize <> 1 and
loc.stringsize <> 1 and
(
SOUNDEX(loc.first) = SOUNDEX(cpn.first) or
SOUNDEX(loc.first) = SOUNDEX(cpn.second) or
SOUNDEX(loc.first) = SOUNDEX(cpn.third) or
SOUNDEX(loc.first) = SOUNDEX(cpn.last)
)
and
(
SOUNDEX(loc.second) = SOUNDEX(cpn.first) or
SOUNDEX(loc.second) = SOUNDEX(cpn.second) or
SOUNDEX(loc.second) = SOUNDEX(cpn.third) or
SOUNDEX(loc.second) = SOUNDEX(cpn.last)
)
and
(
SOUNDEX(loc.last) = SOUNDEX(cpn.first) or
SOUNDEX(loc.last) = SOUNDEX(cpn.second) or
SOUNDEX(loc.last) = SOUNDEX(cpn.third) or
SOUNDEX(loc.last) = SOUNDEX(cpn.last)
)
)
or
-- or if the there are 3 words in the other and 3 words in one try soundex
(
cpn.stringsize = 3 and
loc.stringsize < 3 and
cpn.stringsize <> 1 and
loc.stringsize <> 1 and
(
SOUNDEX(cpn.first) = SOUNDEX(loc.first) or
SOUNDEX(cpn.first) = SOUNDEX(loc.second) or
SOUNDEX(cpn.first) = SOUNDEX(loc.third) or
SOUNDEX(cpn.first) = SOUNDEX(loc.last)
)
and
(
SOUNDEX(cpn.second) = SOUNDEX(loc.first) or
SOUNDEX(cpn.second) = SOUNDEX(loc.second) or
SOUNDEX(cpn.second) = SOUNDEX(loc.third) or
SOUNDEX(cpn.second) = SOUNDEX(loc.last)
)
and
(
SOUNDEX(cpn.last) = SOUNDEX(loc.first) or
SOUNDEX(cpn.last) = SOUNDEX(loc.second) or
SOUNDEX(cpn.last) = SOUNDEX(loc.third) or
SOUNDEX(cpn.last) = SOUNDEX(loc.last)
)
)
)
and
-- search within a distance
(
loc.latitude < cpn.Maxlat and
loc.latitude > cpn.Minlat and
loc.longitude < cpn.Maxlon and
loc.longitude > cpn.Minlon
)
Upvotes: 0
Views: 62
Reputation: 321
You can try execution plan in SQL Server Management Studio
. It let you know missing index (if it need index) list with a index creation script in sql. After creating index the query will run more faster then before.
That is the one way to speed up your query in SQL Server
.
Upvotes: 1