{********************************************************************} { } { Developer Express Visual Component Library } { ExpressSpellChecker } { } { Copyright (c) 1998-2008 Developer Express Inc. } { ALL RIGHTS RESERVED } { } { The entire contents of this file is protected by U.S. and } { International Copyright Laws. Unauthorized reproduction, } { reverse-engineering, and distribution of all or any portion of } { the code contained in this file is strictly prohibited and may } { result in severe civil and criminal penalties and will be } { prosecuted to the maximum extent possible under the law. } { } { RESTRICTIONS } { } { THIS SOURCE CODE AND ALL RESULTING INTERMEDIATE FILES } { (DCU, OBJ, DLL, ETC.) ARE CONFIDENTIAL AND PROPRIETARY TRADE } { SECRETS OF DEVELOPER EXPRESS INC. THE REGISTERED DEVELOPER IS } { LICENSED TO DISTRIBUTE THE EXPRESSSPELLCHECKER AND ALL } { ACCOMPANYING VCL CONTROLS AS PART OF AN EXECUTABLE PROGRAM ONLY. } { } { THE SOURCE CODE CONTAINED WITHIN THIS FILE AND ALL RELATED } { FILES OR ANY PORTION OF ITS CONTENTS SHALL AT NO TIME BE } { COPIED, TRANSFERRED, SOLD, DISTRIBUTED, OR OTHERWISE MADE } { AVAILABLE TO OTHER INDIVIDUALS WITHOUT EXPRESS WRITTEN CONSENT } { AND PERMISSION FROM DEVELOPER EXPRESS INC. } { } { CONSULT THE END USER LICENSE AGREEMENT FOR INFORMATION ON } { ADDITIONAL RESTRICTIONS. } { } {********************************************************************} unit dxSpellCheckerAlgorithms; {$I cxVer.inc} interface type { TdxDoubleMetaphone } TdxDoubleMetaphone = class private FAlternate: Boolean; FCurrent: Integer; FLast: Integer; FPrimary: WideString; FSecondary: WideString; FValue: WideString; procedure IncCurrent(ADelta: Integer = 1); {$IFDEF DELPHI9} inline; {$ENDIF} procedure ProcessAEIOUY; procedure ProcessB; procedure ProcessC; procedure ProcessD; procedure ProcessF; procedure ProcessG; procedure ProcessH; procedure ProcessJ; procedure ProcessK; procedure ProcessL; procedure ProcessM; procedure ProcessN; procedure ProcessP; procedure ProcessQ; procedure ProcessR; procedure ProcessS; procedure ProcessT; procedure ProcessV; procedure ProcessW; procedure ProcessX; procedure ProcessZ; procedure Reset; protected function StringAt(AStart, ALength: Integer; const ASubStr: WideString): Boolean; overload; function StringAt(AStart, ALength: Integer; const ASubStrs: array of WideString): Boolean; overload; function IsSlavoGermanic: Boolean; function IsVowel(AIndex: Integer): Boolean; procedure MetaphAdd(const AMain: WideString); overload; {$IFDEF DELPHI9} inline; {$ENDIF} procedure MetaphAdd(const AMain, AAlt: WideString); overload; {$IFDEF DELPHI9} inline; {$ENDIF} property Current: Integer read FCurrent; property Last: Integer read FLast; property Value: WideString read FValue; public procedure DoubleMetaphone(const S: WideString); virtual; property Primary: WideString read FPrimary; property Secondary: WideString read FSecondary; end; { TdxStringSimilarityCalculator } TdxStringSimilarityCalculator = class protected function DeleteCost(const S: WideString; APos: Integer): Integer; {$IFDEF DELPHI9} inline; {$ENDIF} function InsertCost(const S: WideString; APos: Integer): Integer; {$IFDEF DELPHI9} inline; {$ENDIF} function Minimum(A, B, C: Integer): Integer; function SubstitutionCost(const S1: WideString; APos1: Integer; const S2: WideString; APos2: Integer): Integer; {$IFDEF DELPHI9} inline; {$ENDIF} public function GetDistance(const S1, S2: WideString): Integer; virtual; end; implementation uses SysUtils; { TdxDoubleMetaphone } procedure TdxDoubleMetaphone.DoubleMetaphone(const S: WideString); begin Reset; FLast := Length(S); if FLast = 0 then Exit; //pad the original string so that we can index beyond the edge of the world FValue := WideUpperCase(S) + ' '; //skip these when at start of word if StringAt(1, 2, ['GN', 'KN', 'PN', 'WR', 'PS']) then IncCurrent; //Initial 'X' is pronounced 'Z' e.g. 'Xavier' if Value[1] = 'X' then begin MetaphAdd('S'); //'Z' maps to 'S' IncCurrent; end; // main loop while ((Length(Primary) < 4) or (Length(Secondary) < 4)) and (Current <= Last) do case FValue[Current] of 'A', 'E', 'I', 'O', 'U', 'Y': ProcessAEIOUY; 'B': ProcessB; 'C': ProcessC; 'D': ProcessD; 'F': ProcessF; 'G': ProcessG; 'H': ProcessH; 'J': ProcessJ; 'K': ProcessK; 'L': ProcessL; 'M': ProcessM; 'N': ProcessN; 'P': ProcessP; 'Q': ProcessQ; 'R': ProcessR; 'S': ProcessS; 'T': ProcessT; 'V': ProcessV; 'W': ProcessW; 'X': ProcessX; 'Z': ProcessZ; else Inc(FCurrent); end; if Length(Primary) > 4 then Delete(FPrimary, 1, 4); if FAlternate and (Length(Secondary) > 4) then Delete(FSecondary, 1, 4); end; function TdxDoubleMetaphone.IsSlavoGermanic: Boolean; begin Result := (Pos('W', Value) > 0) or (Pos('K', Value) > 0) or (Pos('CZ', Value) > 0) or (Pos('WITZ', Value) > 0); end; function TdxDoubleMetaphone.IsVowel(AIndex: Integer): Boolean; begin Result := (AIndex > 0) and (AIndex <= Last); if Result then case Value[AIndex] of 'A', 'E', 'I', 'O', 'U', 'Y':; else Result := False; end; end; procedure TdxDoubleMetaphone.MetaphAdd(const AMain: WideString); begin if Length(AMain) <> 0 then begin FPrimary := FPrimary + AMain; FSecondary := FSecondary + AMain; end; end; procedure TdxDoubleMetaphone.MetaphAdd(const AMain, AAlt: WideString); begin if Length(AMain) <> 0 then FPrimary := FPrimary + AMain; if Length(AAlt) <> 0 then begin FAlternate := True; if AAlt[1] <> ' ' then FSecondary := FSecondary + AAlt; end else if (Length(AMain) <> 0) and (AMain[1] <> ' ') then FSecondary := FSecondary + AMain; end; function TdxDoubleMetaphone.StringAt(AStart, ALength: Integer; const ASubStr: WideString): Boolean; begin Result := (AStart > 0) and (Copy(Value, AStart, ALength) = ASubStr); end; function TdxDoubleMetaphone.StringAt(AStart, ALength: Integer; const ASubStrs: array of WideString): Boolean; var I: Integer; T: WideString; begin Result := False; if AStart < 1 then Exit; T := Copy(Value, AStart, ALength); for I := Low(ASubStrs) to High(ASubStrs) do if T = ASubStrs[I] then begin Result := True; Break; end; end; procedure TdxDoubleMetaphone.IncCurrent(ADelta: Integer = 1); begin Inc(FCurrent, ADelta); end; procedure TdxDoubleMetaphone.ProcessAEIOUY; begin if Current = 0 then //all init vowels now map to 'A' MetaphAdd('A'); Inc(FCurrent); end; procedure TdxDoubleMetaphone.ProcessB; begin //"-mb", e.g", "dumb", already skipped over... MetaphAdd('P'); if Value[Current + 1] = 'B' then IncCurrent(2) else IncCurrent; end; procedure TdxDoubleMetaphone.ProcessC; begin //various germanic if ((Current > 2) and not IsVowel(Current - 2) and StringAt(Current - 1, 3, 'ACH') and ((Value[Current + 2] <> 'I') and ((Value[Current + 2] <> 'E') or StringAt(Current - 2, 6, ['BACHER', 'MACHER'])) )) then begin MetaphAdd('K'); IncCurrent(2); Exit; end; //special case 'caesar' if (Current = 1) AND StringAt(Current, 6, 'CAESAR') then begin MetaphAdd('S'); IncCurrent(2); Exit; end; //italian 'chianti' if StringAt(Current, 4, 'CHIA') then begin MetaphAdd('K'); IncCurrent(2); Exit; end; if StringAt(Current, 2, 'CH') then begin //find 'michael' if (Current > 1) and StringAt(Current, 4, 'CHAE') then begin MetaphAdd('K', 'X'); IncCurrent(2); Exit; end; //greek roots e.g. 'chemistry', 'chorus' if ((Current = 1) and (StringAt(Current + 1, 5, ['HARAC', 'HARIS']) or StringAt(Current + 1, 3, ['HOR', 'HYM', 'HIA', 'HEM'])) and not StringAt(1, 5, 'CHORE')) then begin MetaphAdd('K'); IncCurrent(2); Exit; end; //germanic, greek, or otherwise 'ch' for 'kh' sound if ((StringAt(1, 4, ['VAN ', 'VON ']) or StringAt(1, 3, 'SCH')) // 'architect but not 'arch', 'orchestra', 'orchid' or StringAt(Current - 2, 6, ['ORCHES', 'ARCHIT', 'ORCHID']) or StringAt(Current + 2, 1, ['T', 'S']) or ((StringAt(Current - 1, 1, ['A', 'O', 'U', 'E']) or (Current = 1)) //e.g., 'wachtler', 'wechsler', but not 'tichner' and StringAt(Current + 2, 1, ['L', 'R', 'N', 'M', 'B', 'H', 'F', 'V', 'W', ' ']))) then MetaphAdd('K') else begin if Current > 1 then begin if StringAt(1, 2, 'MC') then //e.g., 'McHugh' MetaphAdd('K') else MetaphAdd('X', 'K'); end else MetaphAdd('X'); end; IncCurrent(2); Exit; end; //e.g, 'czerny' if StringAt(Current, 2, 'CZ') and not StringAt(Current - 2, 4, 'WICZ') then begin MetaphAdd('S', 'X'); IncCurrent(2); Exit; end; //e.g., 'focaccia' if StringAt((Current + 1), 3, 'CIA') then begin MetaphAdd('X'); IncCurrent(3); Exit; end; //double 'C', but not if e.g. 'McClellan' if StringAt(Current, 2, 'CC') and not ((Current = 2) and (Value[1] = 'M')) then begin //'bellocchio' but not 'bacchus' if StringAt(Current + 2, 1, ['I', 'E', 'H']) and not StringAt(Current + 2, 2, 'HU') then begin //'accident', 'accede' 'succeed' if (((Current = 2) and (Value[Current - 1] = 'A')) or StringAt(Current - 1, 5, ['UCCEE', 'UCCES'])) then MetaphAdd('KS') else //'bacci', 'bertucci', other italian MetaphAdd('X'); IncCurrent(3); end else begin //Pierce's rule MetaphAdd('K'); IncCurrent(2); end; Exit; end; if StringAt(Current, 2, ['CK', 'CG', 'CQ']) then begin MetaphAdd('K'); IncCurrent(2); Exit; end; if StringAt(Current, 2, ['CI', 'CE', 'CY']) then begin //italian vs. english if StringAt(Current, 3, ['CIO', 'CIE', 'CIA']) then MetaphAdd('S', 'X') else MetaphAdd('S'); IncCurrent(2); Exit; end; //else MetaphAdd('K'); //name sent in 'mac caffrey', 'mac gregor if StringAt(Current + 1, 2, [' C', ' Q', ' G']) then IncCurrent(3) else if StringAt(Current + 1, 1, ['C', 'K', 'Q']) and not StringAt(Current + 1, 2, ['CE', 'CI']) then IncCurrent(2) else IncCurrent; end; procedure TdxDoubleMetaphone.ProcessD; begin if StringAt(Current, 2, 'DG') then begin if StringAt(Current + 2, 1, ['I', 'E', 'Y']) then begin //e.g. 'edge' MetaphAdd('J'); IncCurrent(3); end else begin //e.g. 'edgar' MetaphAdd('TK'); IncCurrent(2) end; Exit; end; if StringAt(current, 2, ['DT', 'DD']) then begin MetaphAdd('T'); IncCurrent(2); end else begin MetaphAdd('T'); IncCurrent; end; end; procedure TdxDoubleMetaphone.ProcessF; begin if Value[Current + 1] = 'F' then IncCurrent(2) else IncCurrent; MetaphAdd('F'); end; procedure TdxDoubleMetaphone.ProcessG; begin if Value[Current + 1] = 'H' then begin if (Current > 1) and not IsVowel(Current - 1) then begin MetaphAdd('K'); IncCurrent(2); Exit; end; if Current < 4 then begin //'ghislane', ghiradelli if Current = 1 then begin if Value[Current + 2] = 'I' then MetaphAdd('J') else MetaphAdd('K'); IncCurrent(2); Exit; end; end; //Parker's rule (with some further refinements) - e.g., 'hugh' if (((Current > 2) and StringAt(Current - 2, 1, ['B', 'H', 'D'])) //e.g., 'bough' or ((Current > 3) and StringAt(Current - 3, 1, ['B', 'H', 'D'])) //e.g., 'broughton' or ((Current > 4) and StringAt(Current - 4, 1, ['B', 'H']))) then IncCurrent(2) else begin //e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough' if (Current > 3) and (Value[Current - 1] = 'U') and StringAt(Current - 3, 1, ['C', 'G', 'L', 'R', 'T']) then MetaphAdd('F') else if (Current > 1) and (Value[Current - 1] <> 'I') then MetaphAdd('K'); IncCurrent(2); end; Exit; end; if Value[Current + 1] = 'N' then begin if (Current = 2) and IsVowel(1) and not IsSlavoGermanic then MetaphAdd('KN', 'N') else //not e.g. 'cagney' if not StringAt(Current + 2, 2, 'EY') and (Value[Current + 1] <> 'Y') and not IsSlavoGermanic then MetaphAdd('N', 'KN') else MetaphAdd('KN'); IncCurrent(2); Exit; end; //'tagliaro' if StringAt(Current + 1, 2, 'LI') and not IsSlavoGermanic then begin MetaphAdd('KL', 'L'); IncCurrent(2); Exit; end; //-ges-,-gep-,-gel-, -gie- at beginning if ((Current = 1) and ((Value[Current + 1] = 'Y') or StringAt(Current + 1, 2, ['ES', 'EP', 'EB', 'EL', 'EY', 'IB', 'IL', 'IN', 'IE', 'EI', 'ER']))) then begin MetaphAdd('K', 'J'); IncCurrent(2); Exit; end; // -ger-, -gy- if ((StringAt(Current + 1, 2, 'ER') or (Value[Current + 1] = 'Y')) and not StringAt(1, 6, ['DANGER', 'RANGER', 'MANGER']) and not StringAt(Current - 1, 1, ['E', 'I']) and not StringAt(Current - 1, 3, ['RGY', 'OGY'])) then begin MetaphAdd('K', 'J'); IncCurrent(2); Exit; end; // italian e.g, 'biaggi' if StringAt(Current + 1, 1, ['E', 'I', 'Y']) or StringAt(Current - 1, 4, ['AGGI', 'OGGI']) then begin //obvious germanic if StringAt(1, 4, ['VAN ', 'VON ']) or StringAt(1, 3, 'SCH') or StringAt(Current + 1, 2, 'ET') then MetaphAdd('K') else //always soft if french ending if StringAt((Current + 1), 4, 'IER ') then MetaphAdd('J') else MetaphAdd('J', 'K'); IncCurrent(2); Exit; end; if Value[Current + 1] = 'G' then IncCurrent(2) else IncCurrent; MetaphAdd('K'); end; procedure TdxDoubleMetaphone.ProcessH; begin //only keep if first & before vowel or btw. 2 vowels if ((Current = 1) or IsVowel(Current - 1)) and IsVowel(Current + 1) then begin MetaphAdd('H'); IncCurrent(2); end else //also takes care of 'HH' IncCurrent; end; procedure TdxDoubleMetaphone.ProcessJ; begin //obvious spanish, 'jose', 'san jacinto' if StringAt(Current, 4, 'JOSE') or StringAt(1, 4, 'SAN ') then begin if ((Current = 1) and (Value[Current + 4] = ' ')) or StringAt(1, 4, 'SAN ') then MetaphAdd('H') else MetaphAdd('J', 'H'); IncCurrent; Exit; end; if (Current = 1) and not StringAt(Current, 4, 'JOSE') then MetaphAdd('J', 'A') //Yankelovich/Jankelowicz else //spanish pron. of e.g. 'bajador' if IsVowel(Current - 1) and not IsSlavoGermanic and ((Value[Current + 1] = 'A') or (Value[Current + 1] = 'O')) then MetaphAdd('J', 'H') else if Current = Last then MetaphAdd('J', ' ') else if not StringAt(Current + 1, 1, ['L', 'T', 'K', 'S', 'N', 'M', 'B', 'Z']) and not StringAt(Current - 1, 1, ['S', 'K', 'L']) then MetaphAdd('J'); if Value[Current + 1] = 'J' then //it could happennot IncCurrent(2) else IncCurrent; end; procedure TdxDoubleMetaphone.ProcessK; begin if Value[Current + 1] = 'K' then IncCurrent(2) else IncCurrent; MetaphAdd('K'); end; procedure TdxDoubleMetaphone.ProcessL; begin if Value[Current + 1] = 'L' then begin //!!!TODO: check Last - 2 bellow //spanish e.g. 'cabrillo', 'gallegos' if ((Current = (Last - 2)) and StringAt(Current - 1, 4, ['ILLO', 'ILLA', 'ALLE'])) or ((StringAt(Last - 1, 2, ['AS', 'OS']) or StringAt(Last, 1, ['A', 'O'])) and StringAt(Current - 1, 4, 'ALLE')) then begin MetaphAdd('L', ' '); IncCurrent(2); Exit; end; IncCurrent(2); end else IncCurrent; MetaphAdd('L'); end; procedure TdxDoubleMetaphone.ProcessM; begin if (StringAt(Current - 1, 3, 'UMB') and ((Current + 1 = Last) or StringAt(Current + 2, 2, 'ER'))) //'dumb','thumb' or (Value[Current + 1] = 'M') then IncCurrent(2) else IncCurrent; MetaphAdd('M'); end; procedure TdxDoubleMetaphone.ProcessN; begin if Value[Current + 1] = 'N' then IncCurrent(2) else IncCurrent; MetaphAdd('N'); end; procedure TdxDoubleMetaphone.ProcessP; begin if Value[Current + 1] = 'H' then begin MetaphAdd('F'); IncCurrent(2); Exit; end; //also account for 'campbell', 'raspberry' if StringAt(Current + 1, 1, ['P', 'B']) then IncCurrent(2) else IncCurrent; MetaphAdd('P'); end; procedure TdxDoubleMetaphone.ProcessQ; begin if Value[Current + 1] = 'Q' then IncCurrent(2) else IncCurrent; MetaphAdd('K'); end; procedure TdxDoubleMetaphone.ProcessR; begin //french e.g. 'rogier', but exclude 'hochmeier' if (Current = Last) and not IsSlavoGermanic and StringAt(Current - 2, 2, 'IE') and not StringAt(Current - 4, 2, ['ME', 'MA']) then MetaphAdd('', 'R') else MetaphAdd('R'); if Value[Current + 1] = 'R' then IncCurrent(2) else IncCurrent; end; procedure TdxDoubleMetaphone.ProcessS; begin //special cases 'island', 'isle', 'carlisle', 'carlysle' if StringAt(Current - 1, 3, ['ISL', 'YSL']) then begin IncCurrent; Exit; end; //special case 'sugar-' if (Current = 1) and StringAt(Current, 5, 'SUGAR') then begin MetaphAdd('X', 'S'); IncCurrent; Exit; end; if StringAt(Current, 2, 'SH') then begin //germanic if StringAt(Current + 1, 4, ['HEIM', 'HOEK', 'HOLM', 'HOLZ']) then MetaphAdd('S') else MetaphAdd('X'); IncCurrent(2); Exit; end; //italian & armenian if StringAt(Current, 3, ['SIO', 'SIA']) or StringAt(Current, 4, 'SIAN') then begin if not IsSlavoGermanic then MetaphAdd('S', 'X') else MetaphAdd('S'); IncCurrent(3); Exit; end; //german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider' //also, -sz- in slavic language altho in hungarian it is pronounced 's' if ((Current = 1) and StringAt(Current + 1, 1, ['M', 'N', 'L', 'W'])) or StringAt(Current + 1, 1, 'Z') then begin MetaphAdd('S', 'X'); if StringAt(Current + 1, 1, 'Z') then IncCurrent(2) else IncCurrent; Exit; end; if StringAt(Current, 2, 'SC') then begin //Schlesinger's rule if Value[Current + 2] = 'H' then begin //dutch origin, e.g. 'school', 'schooner' if StringAt(Current + 3, 2, ['OO', 'ER', 'EN', 'UY', 'ED', 'EM']) then begin //'schermerhorn', 'schenker' if StringAt(Current + 3, 2, ['ER', 'EN']) then MetaphAdd('X', 'SK') else MetaphAdd('SK'); end else begin if (Current = 1) and not IsVowel(4) and (Value[4] <> 'W') then MetaphAdd('X', 'S') else MetaphAdd('X'); end; IncCurrent(3); Exit; end; if StringAt(Current + 2, 1, ['I', 'E', 'Y']) then begin MetaphAdd('S'); IncCurrent(3); Exit; end; //else MetaphAdd('SK'); IncCurrent(3); Exit; end; //french e.g. 'resnais', 'artois' if (Current = Last) and StringAt(Current - 2, 2, ['AI', 'OI']) then MetaphAdd('', 'S') else MetaphAdd('S'); if StringAt(Current + 1, 1, ['S', 'Z']) then IncCurrent(2) else IncCurrent; end; procedure TdxDoubleMetaphone.ProcessT; begin if StringAt(Current, 4, 'TION') or StringAt(Current, 3, ['TIA', 'TCH']) then begin MetaphAdd('X'); IncCurrent(3); Exit; end; if StringAt(Current, 2, 'TH') or StringAt(Current, 3, 'TTH') then begin //special case 'thomas', 'thames' or germanic if StringAt(Current + 2, 2, ['OM', 'AM']) or StringAt(1, 4, ['VAN ', 'VON ']) or StringAt(1, 3, 'SCH') then MetaphAdd('T') else MetaphAdd('0', 'T'); IncCurrent(2); Exit; end; if StringAt(Current + 1, 1, ['T', 'D']) then IncCurrent(2) else IncCurrent; MetaphAdd('T'); end; procedure TdxDoubleMetaphone.ProcessV; begin if Value[Current + 1] = 'V' then IncCurrent(2) else IncCurrent; MetaphAdd('F'); end; procedure TdxDoubleMetaphone.ProcessW; begin //can also be in middle of word if StringAt(Current, 2, 'WR') then begin MetaphAdd('R'); IncCurrent(2); Exit; end; if (Current = 1) and (IsVowel(Current + 1) or StringAt(Current, 2, 'WH')) then begin //Wasserman should match Vasserman if IsVowel(Current + 1) then MetaphAdd('A', 'F') else //need Uomo to match Womo MetaphAdd('A'); end; //Arnow should match Arnoff if ((Current = Last) and IsVowel(Current - 1)) or StringAt(Current - 1, 5, ['EWSKI', 'EWSKY', 'OWSKI', 'OWSKY']) or StringAt(1, 3, 'SCH') then begin MetaphAdd('', 'F'); IncCurrent; Exit; end; //polish e.g. 'filipowicz' if StringAt(Current, 4, ['WICZ', 'WITZ']) then begin MetaphAdd('TS', 'FX'); IncCurrent(4); Exit; end; //else skip it IncCurrent; end; procedure TdxDoubleMetaphone.ProcessX; begin //french e.g. breaux if not ((Current = Last) and (StringAt(Current - 3, 3, ['IAU', 'EAU']) or StringAt(Current - 2, 2, ['AU', 'OU']))) then MetaphAdd('KS'); if StringAt(Current + 1, 1, ['C', 'X']) then IncCurrent(2) else IncCurrent; end; procedure TdxDoubleMetaphone.ProcessZ; begin //chinese pinyin e.g. 'zhao' if Value[Current + 1] = 'H' then begin MetaphAdd('J'); IncCurrent(2); Exit; end else if StringAt(Current + 1, 2, ['ZO', 'ZI', 'ZA']) or (IsSlavoGermanic and ((Current > 1) and (Value[Current - 1] <> 'T'))) then MetaphAdd('S', 'TS') else MetaphAdd('S'); if Value[Current + 1] = 'Z' then IncCurrent(2) else IncCurrent; end; procedure TdxDoubleMetaphone.Reset; begin FCurrent := 1; FAlternate := False; FPrimary := ''; FSecondary := ''; end; { TdxStringSimilarityCalculator } function TdxStringSimilarityCalculator.GetDistance(const S1, S2: WideString): Integer; var AMatrix: array of array of Integer; ALoString1, ALoString2: WideString; I, J, ALen1, ALen2: Integer; begin if (Length(S1) = 0) or (Length(S2) = 0) then begin Result := 0; Exit; end; ALoString1 := WideLowerCase(S1); ALoString2 := WideLowerCase(S2); ALen1 := Length(ALoString1); ALen2 := Length(ALoString2); SetLength(AMatrix, ALen1 + 1, ALen2 + 1); AMatrix[0, 0] := 0; for I := 0 to ALen2 - 1 do AMatrix[0, I + 1] := AMatrix[0, I] + InsertCost(ALoString2, I + 1); for I := 0 to ALen1 - 1 do begin AMatrix[I + 1, 0] := AMatrix[I, 0] + DeleteCost(ALoString1, I + 1); for J := 0 to ALen2 - 1 do begin AMatrix[I + 1, J + 1] := Minimum( AMatrix[I, J] + SubstitutionCost(ALoString1, I + 1, ALoString2, J + 1), AMatrix[I, J + 1] + DeleteCost(ALoString1, I + 1), AMatrix[I + 1, J] + InsertCost(ALoString2, J + 1)); end; end; Result := AMatrix[ALen1, ALen2]; if Result > 0 then begin if ALoString1[1] <> ALoString2[1] then Inc(Result); if ALoString1[ALen1] <> ALoString2[ALen2] then Inc(Result); end; end; function TdxStringSimilarityCalculator.DeleteCost(const S: WideString; APos: Integer): Integer; begin Result := 2; end; function TdxStringSimilarityCalculator.InsertCost(const S: WideString; APos: Integer): Integer; begin Result := 2; end; function TdxStringSimilarityCalculator.Minimum(A, B, C: Integer): Integer; begin Result := A; if B < A then Result := B; if C < Result then Result := C; end; function TdxStringSimilarityCalculator.SubstitutionCost(const S1: WideString; APos1: Integer; const S2: WideString; APos2: Integer): Integer; begin if S1[APos1] = S2[APos2] then Result := 0 else Result := 2; end; end.