Home » SQL & PL/SQL » SQL & PL/SQL » Fuzzy Matching S-Q-L Sever Taking Fime for 2.4 millions records
Fuzzy Matching S-Q-L Sever Taking Fime for 2.4 millions records [message #683341] Thu, 31 December 2020 14:21 Go to next message
mohsin_zee
Messages: 9
Registered: October 2008
Location: kuwait
Junior Member
Hi,

Could somebody help me to optimize Fuzzy Matching process I wrote my own Function(in MS SQL Server ) which is running fine but I have problem executing it for 2.4 millions rows to find matching Customer Name for MDM. it means each customer name match every time with 2.4 million records which is taking huge time unable to complete process each time I have to kill the job below is my Query I wrote procedure too. but all is failing unable to complete Please help to find solution. Totally Struck.

Query:

SELECT
o.lob_key_a,
[dbo].[func_dq_calculate_JaroWinkler] (o.MXEM_FAM_NAME,s.MXEM_FAM_NAME),
o.MXEM_FAM_NAME,
o.MXEM_FST_NAME,
s.MXEM_FAM_NAME
FROM AXA_CDB.DBO.lob_mxem o,AXA_CDB.DBO.lob_mxem s
where
o.lob_key_a != s.lob_key_a
and o.lob_key_a > s.lob_key_a
AND [dbo].[func_dq_calculate_JaroWinkler] (o.MXEM_FAM_NAME,s.MXEM_FAM_NAME) > .80;

My Procedure

ALTER PROCEDURE [dbo].[proc_dq_load_mxem_distance_mzee]
--@IP_DB_NAME VARCHAR(50),
--@IP_SCH_NAME VARCHAR(50),
--@IP_TBL_NAME VARCHAR(25),
--@IP_COL_NAME VARCHAR(50) = '1'

AS
DECLARE @TABLE_type1 TABLE (MXEM_NUM_1 varchar(500),MXEM_NUM_2 varchar(500),MXEM_CLI_NUM_1 varchar(500),MXEM_CLI_NUM_2 varchar(500) );

DECLARE @LV_MXEM_NUM_1_1 VARCHAR(500),
@LV_MXEM_NUM_2_1 VARCHAR(500),
@LV_MXEM_CLI_NUM_1_1 VARCHAR(500),
@LV_MXEM_CLI_NUM_2_1 VARCHAR(500),
@LV_MXEM_FAM_NAME1 VARCHAR(500),
@LV_MXEM_FST_NAME1 VARCHAR(500),
@LV_MXEM_DTE_BIRTH1 VARCHAR(500),
@LV_MXEM_SEX1 VARCHAR(500),
@LV_MXEM_EMAIL1 VARCHAR(500),
@LV_MXEM_TEL1 VARCHAR(500),
@LV_MXEM_COUNTY1 VARCHAR(500),
@LV_MXEM_CPOST1 VARCHAR(500),
@LV_MXEM_POBOX1 VARCHAR(500),

@LV_MXEM_NUM_1_2 VARCHAR(500),
@LV_MXEM_NUM_2_2 VARCHAR(500),
@LV_MXEM_CLI_NUM_1_2 VARCHAR(500),
@LV_MXEM_CLI_NUM_2_2 VARCHAR(500),
@LV_MXEM_FAM_NAME2 VARCHAR(500),
@LV_MXEM_FST_NAME2 VARCHAR(500),
@LV_MXEM_DTE_BIRTH2 VARCHAR(500),
@LV_MXEM_SEX2 VARCHAR(500),
@LV_MXEM_EMAIL2 VARCHAR(500),
@LV_MXEM_TEL2 VARCHAR(500),
@LV_MXEM_COUNTY2 VARCHAR(500),
@LV_MXEM_CPOST2 VARCHAR(500),
@LV_MXEM_POBOX2 VARCHAR(500),

@LV_MXEM_FAM_NAME_SCORE FLOAT,
@LV_MXEM_FST_NAME_SCORE FLOAT,
@LV_MXEM_DTE_BIRTH_SCORE FLOAT,
@LV_MXEM_SEX_SCORE FLOAT,
@LV_MXEM_EMAIL_SCORE FLOAT,
@LV_MXEM_TEL_SCORE FLOAT,
@LV_MXEM_COUNTY_SCORE FLOAT,
@LV_MXEM_CPOST_SCORE FLOAT,
@LV_MXEM_POBOX_SCORE FLOAT,
@LV_MXEM_SCORE_AVG FLOAT,

@LV_EXIST_COUNT INT,
@LV_INSERT_QRY VARCHAR(500),
@LV_INSERT_QRY_PT VARCHAR(500);

BEGIN


SET @LV_INSERT_QRY_PT = 'INSERT INTO AXA_KSA_CDB.DBO.log_dedupe_distance_test VALUES ( ';
---------------------------------
DECLARE CUR_MXEM_A CURSOR FOR
SELECT ISNULL(convert(nvarchar(18),convert(bigint,MXEM_NUM_1)),), ISNULL(convert(nvarchar(18),convert(bigint,MXEM_NUM_2)),),
ISNULL(convert(nvarchar(18),convert(bigint,MXEM_CLI_NUM_1)),), ISNULL(convert(nvarchar(18),convert(bigint,MXEM_CLI_NUM_2)),), ISNULL(MXEM_FAM_NAME,),
ISNULL(MXEM_FST_NAME,
), ISNULL(convert(nvarchar(18),convert(bigint,MXEM_DTE_BIRTH)),), ISNULL(MXEM_SEX,), ISNULL(MXEM_EMAIL,),
ISNULL(MXEM_TEL,
), ISNULL(MXEM_COUNTRY,), ISNULL(MXEM_CPOST,), ISNULL(MXEM_POBOX,)
FROM AXA_CDB.DBO.lob_mxem

OPEN CUR_MXEM_A

FETCH NEXT FROM CUR_MXEM_A INTO @LV_MXEM_NUM_1_1,
@LV_MXEM_NUM_2_1 ,
@LV_MXEM_CLI_NUM_1_1,
@LV_MXEM_CLI_NUM_2_1 ,
@LV_MXEM_FAM_NAME1 ,
@LV_MXEM_FST_NAME1,
@LV_MXEM_DTE_BIRTH1,
@LV_MXEM_SEX1 ,
@LV_MXEM_EMAIL1 ,
@LV_MXEM_TEL1 ,
@LV_MXEM_COUNTY1 ,
@LV_MXEM_CPOST1,
@LV_MXEM_POBOX1

WHILE @@FETCH_STATUS = 0
--MAIN WHILE START--
BEGIN


begin
insert into @TABLE_type1
values(@LV_MXEM_NUM_1_1,@LV_MXEM_NUM_2_1,@LV_MXEM_CLI_NUM_1_1,@LV_MXEM_CLI_NUM_2_1);
end;
--- dont chaira chari

DECLARE CUR_MXEM_B CURSOR FOR
SELECT
[dbo].[func_dq_calculate_JaroWinkler] (@LV_MXEM_FAM_NAME1,MXEM_FAM_NAME),
[dbo].[func_dq_calculate_JaroWinkler] (@LV_MXEM_EMAIL1,MXEM_EMAIL),
[dbo].[func_dq_calculate_JaroWinkler] (@LV_MXEM_COUNTY1,MXEM_COUNTRY),ISNULL(convert(nvarchar(18),
convert(bigint,MXEM_NUM_1)),
), ISNULL(convert(nvarchar(18),convert(bigint,MXEM_NUM_2)),),
ISNULL(convert(nvarchar(18),convert(bigint,MXEM_CLI_NUM_1)),
), ISNULL(convert(nvarchar(18),convert(bigint,MXEM_CLI_NUM_2)),),
ISNULL(MXEM_FAM_NAME,
), ISNULL(MXEM_FST_NAME,), ISNULL(convert(nvarchar(18),convert(bigint,MXEM_DTE_BIRTH)),),
ISNULL(MXEM_SEX,), ISNULL(MXEM_EMAIL,), ISNULL(MXEM_TEL,), ISNULL(MXEM_COUNTRY,), ISNULL(MXEM_CPOST,), ISNULL(MXEM_POBOX,)
FROM AXA_CDB.DBO.lob_mxem o
where not exists ( select 1 from @TABLE_type1 t where
t.MXEM_NUM_1 = o.MXEM_NUM_1 and
t.MXEM_NUM_2 = o.MXEM_NUM_2 and
t.MXEM_CLI_NUM_1 = o.MXEM_CLI_NUM_1 and
t.MXEM_CLI_NUM_2 = o.MXEM_CLI_NUM_2)
and MXEM_NUM_1+MXEM_NUM_2+MXEM_CLI_NUM_1+MXEM_CLI_NUM_2 != @LV_MXEM_NUM_1_1+@LV_MXEM_NUM_2_1+@LV_MXEM_CLI_NUM_1_1+@LV_MXEM_CLI_NUM_2_1
and [dbo].[func_dq_calculate_JaroWinkler] (@LV_MXEM_FAM_NAME1,MXEM_FAM_NAME) > 0.8;
--end chairda chari

OPEN CUR_MXEM_B
FETCH NEXT FROM CUR_MXEM_B INTO
@LV_MXEM_FAM_NAME_SCORE,
@LV_MXEM_EMAIL_SCORE,
@LV_MXEM_COUNTY_SCORE,
@LV_MXEM_NUM_1_2,
@LV_MXEM_NUM_2_2 ,
@LV_MXEM_CLI_NUM_1_2,
@LV_MXEM_CLI_NUM_2_2 ,
@LV_MXEM_FAM_NAME2 ,
@LV_MXEM_FST_NAME2,
@LV_MXEM_DTE_BIRTH2,
@LV_MXEM_SEX2 ,
@LV_MXEM_EMAIL2 ,
@LV_MXEM_TEL2 ,
@LV_MXEM_COUNTY2 ,
@LV_MXEM_CPOST2,
@LV_MXEM_POBOX2

WHILE @@FETCH_STATUS = 0


--NESTED WHILE START--
BEGIN





SET @LV_INSERT_QRY = @LV_INSERT_QRY_PT + ' + @LV_MXEM_NUM_1_1 + , + @LV_MXEM_NUM_2_1 + , +
@LV_MXEM_CLI_NUM_1_1 +
, + @LV_MXEM_CLI_NUM_2_1 + , +
@LV_MXEM_FAM_NAME1+
, + @LV_MXEM_NUM_1_2 + , + @LV_MXEM_NUM_2_2 + , +
@LV_MXEM_CLI_NUM_1_2 +
, + @LV_MXEM_CLI_NUM_2_2 + , +
@LV_MXEM_FAM_NAME2 +
,''' + 'MXEM_FAM_NAME' + ''',' + convert(nvarchar(100),@LV_MXEM_FAM_NAME_SCORE) + ')';

exec (@LV_INSERT_QRY);


SET @LV_INSERT_QRY = @LV_INSERT_QRY_PT + ' + @LV_MXEM_NUM_1_1 + , + @LV_MXEM_NUM_2_1 + , +
@LV_MXEM_CLI_NUM_1_1 +
, + @LV_MXEM_CLI_NUM_2_1 + , +
@LV_MXEM_EMAIL1+
, + @LV_MXEM_NUM_1_2 + , + @LV_MXEM_NUM_2_2 + , +
@LV_MXEM_CLI_NUM_1_2 +
, + @LV_MXEM_CLI_NUM_2_2 + , +
@LV_MXEM_EMAIL2 +
,''' + + 'MXEM_EMAIL' + ''',' + convert(nvarchar(100),@LV_MXEM_EMAIL_SCORE) + ')';

exec (@LV_INSERT_QRY);


SET @LV_INSERT_QRY = @LV_INSERT_QRY_PT + ' + @LV_MXEM_NUM_1_1 + , + @LV_MXEM_NUM_2_1 + , +
@LV_MXEM_CLI_NUM_1_1 +
, + @LV_MXEM_CLI_NUM_2_1 + , +
@LV_MXEM_COUNTY1+
, + @LV_MXEM_NUM_1_2 + , + @LV_MXEM_NUM_2_2 + , +
@LV_MXEM_CLI_NUM_1_2 +
, + @LV_MXEM_CLI_NUM_2_2 + , +
@LV_MXEM_COUNTY2 +
,''' + + 'MXEM_COUNTRY' + ''',' + convert(nvarchar(100),@LV_MXEM_COUNTY_SCORE) + ')';
exec (@LV_INSERT_QRY);



/*PRINT @LV_MXEM_NUM_1_2
PRINT @LV_MXEM_NUM_2_2
PRINT @LV_MXEM_CLI_NUM_1_2
PRINT @LV_MXEM_CLI_NUM_2_2*/

/*PRINT @LV_MXEM_FST_NAME2
PRINT @LV_MXEM_DTE_BIRTH2
PRINT @LV_MXEM_SEX2;
PRINT @LV_MXEM_EMAIL2;
PRINT @LV_MXEM_TEL2;
PRINT @LV_MXEM_COUNTY2;
PRINT @LV_MXEM_CPOST2;
PRINT @LV_MXEM_POBOX2;
*/


FETCH NEXT FROM CUR_MXEM_B INTO
@LV_MXEM_FAM_NAME_SCORE,
@LV_MXEM_EMAIL_SCORE,
@LV_MXEM_COUNTY_SCORE,
@LV_MXEM_NUM_1_2,
@LV_MXEM_NUM_2_2 ,
@LV_MXEM_CLI_NUM_1_2,
@LV_MXEM_CLI_NUM_2_2 ,
@LV_MXEM_FAM_NAME2 ,
@LV_MXEM_FST_NAME2,
@LV_MXEM_DTE_BIRTH2,
@LV_MXEM_SEX2 ,
@LV_MXEM_EMAIL2 ,
@LV_MXEM_TEL2 ,
@LV_MXEM_COUNTY2 ,
@LV_MXEM_CPOST2,
@LV_MXEM_POBOX2

END

CLOSE CUR_MXEM_B;
DEALLOCATE CUR_MXEM_B;


FETCH NEXT FROM CUR_MXEM_A INTO @LV_MXEM_NUM_1_1,
@LV_MXEM_NUM_2_1 ,
@LV_MXEM_CLI_NUM_1_1,
@LV_MXEM_CLI_NUM_2_1 ,
@LV_MXEM_FAM_NAME1 ,
@LV_MXEM_FST_NAME1,
@LV_MXEM_DTE_BIRTH1,
@LV_MXEM_SEX1 ,
@LV_MXEM_EMAIL1 ,
@LV_MXEM_TEL1 ,
@LV_MXEM_COUNTY1 ,
@LV_MXEM_CPOST1,
@LV_MXEM_POBOX1

END

CLOSE CUR_MXEM_A;
DEALLOCATE CUR_MXEM_A;


END
Re: Fuzzy Matching S-Q-L Sever Taking Fime for 2.4 millions records [message #683342 is a reply to message #683341] Thu, 31 December 2020 14:26 Go to previous message
Michel Cadot
Messages: 68624
Registered: March 2007
Location: Nanterre, France, http://...
Senior Member
Account Moderator

Please read the OraFAQ Forum Guide and How to use [code] tags and make your code easier to read.
Indent the code, make sure that lines of code do not exceed 100 characters.

This is an Oracle forum NOT a SQL Server one.
Please find a more appropriate forum.
The topic is locked. /forum/fa/448/0/
If I'm wrong, please, PM me (or report this message to a moderator, explain why you think it should be unlocked and it might be done).

[Updated on: Thu, 31 December 2020 14:27]

Report message to a moderator

Previous Topic: sending bulk email in Report 12c?
Next Topic: oracle pl/sql
Goto Forum:
  


Current Time: Thu Mar 28 10:13:38 CDT 2024