Componentes.Terceros.DevExp.../official/x.48/ExpressSpellChecker/Sources/dxISpellDecompressor.pas
2010-01-18 18:33:24 +00:00

1437 lines
40 KiB
ObjectPascal

{********************************************************************}
{ }
{ Developer Express Visual Component Library }
{ ExpressSpellChecker }
{ }
{ Copyright (c) 1998-2009 Developer Express Inc. }
{ ALL RIGHTS RESERVED }
{ }
{ The entire contents of this file is protected by U.S. and }
{ International Copyright Laws. Unauthorized reproduction, }
{ reverse-engineering, and distribution of all or any portion of }
{ the code contained in this file is strictly prohibited and may }
{ result in severe civil and criminal penalties and will be }
{ prosecuted to the maximum extent possible under the law. }
{ }
{ RESTRICTIONS }
{ }
{ THIS SOURCE CODE AND ALL RESULTING INTERMEDIATE FILES }
{ (DCU, OBJ, DLL, ETC.) ARE CONFIDENTIAL AND PROPRIETARY TRADE }
{ SECRETS OF DEVELOPER EXPRESS INC. THE REGISTERED DEVELOPER IS }
{ LICENSED TO DISTRIBUTE THE EXPRESSSPELLCHECKER AND ALL }
{ ACCOMPANYING VCL CONTROLS AS PART OF AN EXECUTABLE PROGRAM ONLY. }
{ }
{ THE SOURCE CODE CONTAINED WITHIN THIS FILE AND ALL RELATED }
{ FILES OR ANY PORTION OF ITS CONTENTS SHALL AT NO TIME BE }
{ COPIED, TRANSFERRED, SOLD, DISTRIBUTED, OR OTHERWISE MADE }
{ AVAILABLE TO OTHER INDIVIDUALS WITHOUT EXPRESS WRITTEN CONSENT }
{ AND PERMISSION FROM DEVELOPER EXPRESS INC. }
{ }
{ CONSULT THE END USER LICENSE AGREEMENT FOR INFORMATION ON }
{ ADDITIONAL RESTRICTIONS. }
{ }
{********************************************************************}
unit dxISpellDecompressor;
{$I cxVer.inc}
interface
uses
Windows, Classes, SysUtils, cxClasses, dxSpellChecker, dxSpellCheckerUtils;
type
TdxCustomAffixElement = class;
TdxAffixCompressionDictionary = class;
TdxOpenOfficeDictionary = class;
TdxAffixElementState = (aesNone, aesPrefix, aesSuffix);
{ TdxCharValidator }
TdxCharValidator = class
private
FAnyChar: Boolean;
FChars: WideString;
FValid: Boolean;
public
constructor Create(const AChars: WideString; AValid: Boolean);
constructor CreateAnyChar;
function IsValidChar(AChar: WideChar): Boolean;
end;
{ TdxWordValidator }
TdxRuleValidator = class(TcxObjectList)
private
function GetItem(Index: Integer): TdxCharValidator; {$IFDEF DELPHI9} inline; {$ENDIF}
public
procedure AddAnyCharValid;
procedure AddCharInfo(const AChars: WideString; AValid: Boolean);
function IsValid(const AWord: WideString; AFromStart: Boolean): Boolean;
property Items[Index: Integer]: TdxCharValidator read GetItem; default;
end;
{ TdxAffixRule }
TdxAffixRule = class
private
FAffix: TdxCustomAffixElement;
FAppendString: WideString;
FForceValid: Boolean;
FStripString: WideString;
FValidator: TdxRuleValidator;
function GetIsPrefix: Boolean; {$IFDEF DELPHI9} inline; {$ENDIF}
protected
function DoParse(const ARule: WideString): Boolean; virtual; abstract;
function RemoveComments(const ARule: WideString): WideString; virtual; abstract;
property Affix: TdxCustomAffixElement read FAffix;
property AppendString: WideString read FAppendString write FAppendString;
property ForceValid: Boolean read FForceValid write FForceValid;
property IsPrefix: Boolean read GetIsPrefix;
property StripString: WideString read FStripString write FStripString;
property Validator: TdxRuleValidator read FValidator;
public
constructor Create(AAffix: TdxCustomAffixElement);
destructor Destroy; override;
function CanApply(const AWord: WideString): Boolean;
function GetWordform(const AWord, ACheckStripWord: WideString): WideString;
function Parse(const ARule: WideString): Boolean; virtual;
end;
{ TdxAffixRuleList }
TdxAffixRuleList = class(TcxObjectList)
private
function GetItem(Index: Integer): TdxAffixRule; {$IFDEF DELPHI9} inline; {$ENDIF}
public
procedure AddWordforms(const AWord: WideString;
ADictionary: TdxSpellCheckerStrings; ALangID: Cardinal);
property Items[Index: Integer]: TdxAffixRule read GetItem; default;
end;
{ TdxCustomAffixElement }
TdxCustomAffixElement = class
private
FCanCombine: Boolean;
FKey: WideChar;
FLangID: Cardinal;
FRules: TdxAffixRuleList;
protected
function CreateRule: TdxAffixRule; virtual; abstract;
function GetIsPrefix: Boolean; virtual; abstract;
property CanCombine: Boolean read FCanCombine;
property IsPrefix: Boolean read GetIsPrefix;
property LangID: Cardinal read FLangID;
property Rules: TdxAffixRuleList read FRules;
public
constructor Create(AKey: WideChar; ACanCombine: Boolean; ALangID: Cardinal); virtual;
destructor Destroy; override;
procedure AddRule(ARuleDefinition: WideString);
procedure AddWordforms(const AWord: WideString; ADictionary: TdxSpellCheckerStrings); virtual;
property Key: WideChar read FKey;
end;
{ TdxAffixList }
TdxAffixList = class(TcxObjectList)
private
function GetItem(Index: Integer): TdxCustomAffixElement; {$IFDEF DELPHI9} inline; {$ENDIF}
public
function FindForKey(AKey: WideChar; AIsPrefix: Boolean): TdxCustomAffixElement;
property Items[Index: Integer]: TdxCustomAffixElement read GetItem; default;
end;
// ISpell support
{ TdxISpellAffixRule }
TdxISpellAffixRule = class(TdxAffixRule)
protected
procedure BuildValidator(const S: WideString); virtual;
function DoParse(const ARule: WideString): Boolean; override;
procedure ParseNewWordform(const S: WideString); virtual;
function RemoveComments(const ARule: WideString): WideString; override;
end;
{ TdxISpellPrefix }
TdxISpellPrefix = class(TdxCustomAffixElement)
protected
function CreateRule: TdxAffixRule; override;
function GetIsPrefix: Boolean; override;
end;
{ TdxISpellSuffix }
TdxISpellSuffix = class(TdxISpellPrefix)
protected
function GetIsPrefix: Boolean; override;
end;
{ TdxCustomAffixDecompressor }
TdxCustomAffixDecompressor = class
private
FAffixes: TdxAffixList;
FAlphabet: WideString;
FCodePage: Cardinal;
FDictionary: TdxAffixCompressionDictionary;
FPrefixes: TList;
FSuffixes: TList;
FWordforms: TdxSpellCheckerStrings;
protected
procedure CreateWordforms(const AWordDefinition: WideString); virtual;
procedure CreateWordFormsForKeys(const AWord, AKeys: WideString);
procedure GetLanguageInfo(AStream: TStream; out ACodePage: Cardinal;
out AAlphabet: WideString); virtual; abstract;
procedure ExtractWordInfo(const AWordDefinition: WideString; out AWord, AKeys: WideString);
procedure InitParsing; virtual;
procedure Parse(AStrings: TdxSpellCheckerStrings); virtual;
procedure ParseLine(S: WideString); virtual; abstract;
function RemoveComments(const S: WideString): WideString; virtual;
procedure Reset;
property Prefixes: TList read FPrefixes;
property Suffixes: TList read FSuffixes;
property Wordforms: TdxSpellCheckerStrings read FWordforms;
public
constructor Create(ADictionary: TdxAffixCompressionDictionary); virtual;
destructor Destroy; override;
procedure LoadFromFile(const AFileName: TFileName); virtual;
procedure LoadFromStream(AStream: TStream); virtual;
procedure PopulateWordForms(const AWordDefinition: WideString;
ADictionary: TdxSpellCheckerWordList); virtual;
property Affixes: TdxAffixList read FAffixes;
property Alphabet: WideString read FAlphabet;
property CodePage: Cardinal read FCodePage;
property Dictionary: TdxAffixCompressionDictionary read FDictionary;
end;
{ TdxAffixCompressionDictionary }
TdxAffixCompressionDictionary = class(TdxCustomSpellCheckerDictionary)
private
FDictionaryPath: TFileName;
FGrammarPath: TFileName;
procedure SetDictionaryPath(const AValue: TFileName);
procedure SetGrammarPath(const AValue: TFileName);
protected
function CreateDecompressor: TdxCustomAffixDecompressor; virtual; abstract;
function DoLoad: Boolean; override;
function GetDisplayName: string; override;
procedure LoadGrammar(ADecompressor: TdxCustomAffixDecompressor); virtual;
procedure LoadWordBase(AWordBase: TdxSpellCheckerStrings); virtual;
procedure UpdateByDecompressor(ADecompressor: TdxCustomAffixDecompressor); virtual;
public
procedure Assign(Source: TPersistent); override;
published
property DictionaryPath: TFileName read FDictionaryPath write SetDictionaryPath;
property GrammarPath: TFileName read FGrammarPath write SetGrammarPath;
end;
{ TdxISpellDecompressor }
TdxISpellDecompressor = class(TdxCustomAffixDecompressor)
private
FCurrentAffix: TdxCustomAffixElement;
FCurrentState: TdxAffixElementState;
protected
function CreateAffix(AKey: WideChar; ACanCombine: Boolean; AState: TdxAffixElementState): TdxCustomAffixElement; virtual;
procedure ExtractKeyInfo(const ADefinition: WideString; out AKey: WideChar; out ACanCombine: Boolean); virtual;
procedure GetLanguageInfo(AStream: TStream; out ACodePage: Cardinal;
out AAlphabet: WideString); override;
procedure InitParsing; override;
function IsUnsupportedKeyWord(const S: WideString): Boolean; virtual;
procedure ParseLine(S: WideString); override;
end;
{ TdxISpellDictionary }
TdxISpellDictionary = class(TdxAffixCompressionDictionary)
protected
function CreateDecompressor: TdxCustomAffixDecompressor; override;
published
property Alphabet;
property CheckCapitalization;
property CodePage;
property DictionaryPath;
property Enabled;
property GrammarPath;
property Language;
property OnLoaded;
property OnLoading;
end;
// Hunspell support
{ TdxHunspellAffixRule }
TdxHunspellAffixRule = class(TdxISpellAffixRule)
protected
procedure BuildValidator(const S: WideString); override;
function DoParse(const ARule: WideString): Boolean; override;
end;
{ TdxHunspellPrefix }
TdxHunspellPrefix = class(TdxCustomAffixElement)
protected
function CreateRule: TdxAffixRule; override;
function GetIsPrefix: Boolean; override;
end;
{ TdxHunspellSuffix }
TdxHunspellSuffix = class(TdxHunspellPrefix)
protected
function GetIsPrefix: Boolean; override;
end;
{ TdxHunspellDecompressor }
TdxHunspellDecompressor = class(TdxCustomAffixDecompressor)
private
FCurrentKey: WideChar;
FCurrentKeyCanCombine: Boolean;
FRepCount: Integer;
function GetDictionary: TdxOpenOfficeDictionary;
protected
function CreateAffix(AKey: WideChar; ACanCombine: Boolean;
AState: TdxAffixElementState): TdxCustomAffixElement; virtual;
procedure GetLanguageInfo(AStream: TStream; out ACodePage: Cardinal;
out AAlphabet: WideString); override;
procedure InitParsing; override;
procedure ParseLine(S: WideString); override;
procedure ProcessAlphabet(S: WideString);
procedure ProcessAffix(S: WideString; AState: TdxAffixElementState);
procedure ProcessSubstitution(S: WideString);
public
property Dictionary: TdxOpenOfficeDictionary read GetDictionary;
end;
{ TdxOpenOfficeSuggestionBuilder }
TdxOpenOfficeSuggestionBuilder = class(TdxNearMissStrategy)
private
function GetDictionary: TdxOpenOfficeDictionary;
protected
procedure DoAddSuggestions; override;
function FindPos(const ASubStr, AStr: WideString; var AStartPos: Integer): Boolean;
procedure ProcessSubstitutions;
public
property Dictionary: TdxOpenOfficeDictionary read GetDictionary;
end;
{ TdxOpenOfficeDictionary }
TdxOpenOfficeDictionary = class(TdxAffixCompressionDictionary)
private
FSubstitutions: TdxSpellCheckerReplacementList;
protected
function CreateDecompressor: TdxCustomAffixDecompressor; override;
function CreateSuggestionBuilder: TdxSpellCheckerSuggestionBuilder; override;
procedure UpdateByDecompressor(ADecompressor: TdxCustomAffixDecompressor); override;
property Substitutions: TdxSpellCheckerReplacementList read FSubstitutions;
public
constructor Create(ASpellChecker: TdxCustomSpellChecker); override;
destructor Destroy; override;
published
property CheckCapitalization;
property DictionaryPath;
property Enabled;
property GrammarPath;
property Language;
property OnLoaded;
property OnLoading;
end;
implementation
uses
{$IFDEF DELPHI12}
AnsiStrings,
{$ENDIF}
dxSpellCheckerStrs, dxCore;
type
TdxHunspellCodePage = record
Name: AnsiString;
CodePage: Cardinal;
Description: AnsiString;
end;
const
TdxHunspellCodePages: array [0..17] of TdxHunspellCodePage = (
(Name: 'UTF-8'; CodePage: 65001; Description: 'UTF-8'),
(Name: 'ISO8859-1'; CodePage: 28591; Description: 'ISO 8859-1 Latin I'),
(Name: 'ISO8859-2'; CodePage: 28592; Description: 'ISO 8859-2 Central Europe'),
(Name: 'ISO8859-3'; CodePage: 28593; Description: 'ISO 8859-3 Latin 3'),
(Name: 'ISO8859-4'; CodePage: 28594; Description: 'ISO 8859-4 Baltic'),
(Name: 'ISO8859-5'; CodePage: 28595; Description: 'ISO 8859-5 Cyrillic'),
(Name: 'ISO8859-6'; CodePage: 28596; Description: 'ISO 8859-6 Arabic'),
(Name: 'ISO8859-7'; CodePage: 28597; Description: 'ISO 8859-7 Greek'),
(Name: 'ISO8859-8'; CodePage: 28598; Description: 'ISO 8859-8 Hebrew: Visual Ordering'),
(Name: 'ISO8859-9'; CodePage: 28599; Description: 'ISO 8859-9 Latin 5'),
(Name: 'ISO8859-10'; CodePage: 28600; Description: 'ISO 8859-10 Latin 6'),
(Name: 'ISO8859-13'; CodePage: 28603; Description: 'ISO 8859-13 Latin 7'),
(Name: 'ISO8859-14'; CodePage: 28604; Description: 'ISO 8859-14 Latin 8'),
(Name: 'ISO8859-15'; CodePage: 28605; Description: 'ISO 8859-15 Latin 9'),
(Name: 'KOI8-R'; CodePage: 20866; Description: 'Russian - KOI8'),
(Name: 'KOI8-U'; CodePage: 21866; Description: 'Ukrainian - KOI8-U'),
(Name: 'microsoft-cp1251'; CodePage: 1251; Description: 'ANSI - Cyrillic'),
(Name: 'ISCII-DEVANAGARI'; CodePage: 57002; Description: 'ISCII - Devanagari')
);
function RemoveGrammarComments(const S: WideString): WideString; overload;
var
APos: Integer;
begin
APos := WideCharPos('#', S);
if APos > 0 then
Result := Trim(Copy(S, 1, APos - 1))
else
Result := Trim(S);
end;
function RemoveGrammarComments(const S: AnsiString): AnsiString; overload;
var
APos: Integer;
begin
APos := AnsiPos(AnsiChar('#'), S);
if APos > 0 then
Result := Trim(Copy(S, 1, APos - 1))
else
Result := Trim(S);
end;
function GetFirstWord(var S: AnsiString; ARemove: Boolean = False): AnsiString; overload;
var
L, R: Integer;
begin
L := 1;
while (L <= Length(S)) and (S[L] in [#0..' ']) do Inc(L);
R := L + 1;
while (R <= Length(S)) and not (S[R] in [#0..' ']) do Inc(R);
Result := Copy(S, L, R - L);
if ARemove and (Length(Result) > 0) then
begin
while (R <= Length(S)) and (S[R] in [#0..' ']) do Inc(R);
Delete(S, 1, R - 1);
end;
end;
function GetFirstWord(var S: WideString; ARemove: Boolean = False): WideString; overload;
var
L, R: Integer;
begin
L := 1;
while (L <= Length(S)) and WideIsSpace(S[L]) do Inc(L);
R := L + 1;
while (R <= Length(S)) and not WideIsSpace(S[R]) do Inc(R);
Result := Copy(S, L, R - L);
if ARemove and (Length(Result) > 0) then
begin
while (R <= Length(S)) and WideIsSpace(S[R]) do Inc(R);
Delete(S, 1, R - 1);
end;
end;
{ TdxCharValidator }
constructor TdxCharValidator.Create(const AChars: WideString; AValid: Boolean);
begin
inherited Create;
FAnyChar := False;
FChars := AChars;
FValid := AValid;
end;
constructor TdxCharValidator.CreateAnyChar;
begin
inherited Create;
FAnyChar := True;
end;
function TdxCharValidator.IsValidChar(AChar: WideChar): Boolean;
begin
Result := FAnyChar or ((WideCharPos(AChar, FChars) < 1) xor FValid);
end;
{ TdxRuleValidator }
procedure TdxRuleValidator.AddAnyCharValid;
begin
inherited Add(TdxCharValidator.CreateAnyChar);
end;
procedure TdxRuleValidator.AddCharInfo(const AChars: WideString;
AValid: Boolean);
begin
if Length(AChars) > 0 then
inherited Add(TdxCharValidator.Create(AChars, AValid));
end;
function TdxRuleValidator.IsValid(const AWord: WideString;
AFromStart: Boolean): Boolean;
var
I, AStart, ALen: Integer;
begin
Result := False;
ALen := Length(AWord);
if ALen < Count then Exit;
if AFromStart then
AStart := 1
else
AStart := ALen - Count + 1;
for I := 0 to Count - 1 do
begin
if not Items[I].IsValidChar(AWord[AStart]) then
Exit;
Inc(AStart);
end;
Result := True;
end;
function TdxRuleValidator.GetItem(Index: Integer): TdxCharValidator;
begin
Result := TdxCharValidator(List^[Index]);
end;
{ TdxAffixRule }
constructor TdxAffixRule.Create(AAffix: TdxCustomAffixElement);
begin
inherited Create;
FAffix := AAffix;
FValidator := TdxRuleValidator.Create;
end;
destructor TdxAffixRule.Destroy;
begin
FValidator.Free;
inherited Destroy;
end;
function TdxAffixRule.CanApply(const AWord: WideString): Boolean;
begin
Result := FForceValid or FValidator.IsValid(AWord, IsPrefix);
end;
function TdxAffixRule.GetWordform(const AWord, ACheckStripWord: WideString): WideString;
var
ALen: Integer;
begin
Result := AWord;
ALen := Length(StripString);
if ALen > 0 then
begin
if IsPrefix then
begin
if WideStartsWith(StripString, ACheckStripWord) then
Delete(Result, 1, ALen);
end
else
begin
if WideEndsWith(StripString, ACheckStripWord) then
Delete(Result, Length(Result) - ALen + 1, ALen);
end;
end;
if Length(AppendString) > 0 then
begin
if IsPrefix then
Result := AppendString + Result
else
Result := Result + AppendString;
end;
end;
function TdxAffixRule.Parse(const ARule: WideString): Boolean;
begin
Result := DoParse(RemoveComments(ARule));
end;
function TdxAffixRule.GetIsPrefix: Boolean;
begin
Result := FAffix.IsPrefix;
end;
{ TdxAffixRuleList }
procedure TdxAffixRuleList.AddWordforms(const AWord: WideString;
ADictionary: TdxSpellCheckerStrings; ALangID: Cardinal);
var
I, ALen: Integer;
ARule: TdxAffixRule;
ALower: WideString;
begin
ALen := Length(AWord);
SetLength(ALower, ALen);
LCMapStringW(ALangID, LCMAP_LOWERCASE, Pointer(AWord), ALen, Pointer(ALower), ALen);
for I := 0 to Count - 1 do
begin
ARule := Items[I];
if ARule.CanApply(ALower) then
ADictionary.Add(ARule.GetWordform(AWord, ALower));
end;
end;
function TdxAffixRuleList.GetItem(Index: Integer): TdxAffixRule;
begin
Result := TdxAffixRule(List^[Index]);
end;
{ TdxCustomAffixElement }
constructor TdxCustomAffixElement.Create(AKey: WideChar; ACanCombine: Boolean;
ALangID: Cardinal);
begin
inherited Create;
FCanCombine := ACanCombine;
FKey := AKey;
FLangID := ALangID;
FRules := TdxAffixRuleList.Create;
end;
destructor TdxCustomAffixElement.Destroy;
begin
FRules.Free;
inherited Destroy;
end;
procedure TdxCustomAffixElement.AddRule(ARuleDefinition: WideString);
var
ARule: TdxAffixRule;
begin
ARule := CreateRule;
LCMapStringW(LangID, LCMAP_LOWERCASE, @(ARuleDefinition[1]),
Length(ARuleDefinition), @(ARuleDefinition[1]), Length(ARuleDefinition));
if ARule.Parse(ARuleDefinition) then
Rules.Add(ARule)
else
ARule.Free;
end;
procedure TdxCustomAffixElement.AddWordforms(const AWord: WideString;
ADictionary: TdxSpellCheckerStrings);
begin
Rules.AddWordforms(AWord, ADictionary, LangID);
end;
{ TdxAffixList }
function TdxAffixList.FindForKey(AKey: WideChar; AIsPrefix: Boolean): TdxCustomAffixElement;
var
I: Integer;
begin
for I := 0 to Count - 1 do
begin
Result := Items[I];
if (Result.Key = AKey) and (Result.IsPrefix = AIsPrefix) then
Exit;
end;
Result := nil;
end;
function TdxAffixList.GetItem(Index: Integer): TdxCustomAffixElement;
begin
Result := TdxCustomAffixElement(List^[Index]);
end;
{ TdxISpellAffixRule }
procedure TdxISpellAffixRule.BuildValidator(const S: WideString);
var
AChar: WideChar;
AChars: WideString;
I: Integer;
AValid: Boolean;
procedure AddValidatorRule;
begin
if AChars <> '' then
begin
Validator.AddCharInfo(AChars, AValid);
AChars := '';
end;
AValid := True;
end;
begin
AValid := True;
for I := 1 to Length(S) do
begin
AChar := S[I];
case AChar of
#$09, ' ', '[', ']': AddValidatorRule;
'^': AValid := False;
else
AChars := AChars + AChar;
end;
end;
AddValidatorRule;
end;
procedure TdxISpellAffixRule.ParseNewWordform(const S: WideString);
type
TdxStateWordform = (swNone, swStrip, swAppend);
var
I: Integer;
AChar: WideChar;
AState: TdxStateWordform;
begin
AState := swNone;
for I := 1 to Length(S) do
begin
AChar := S[I];
case AChar of
#$0000..' ': Continue;
',': AState := swNone;
'-': AState := swStrip;
else
case AState of
swStrip: StripString := StripString + AChar;
swNone:
begin
AState := swAppend;
AppendString := AppendString + AChar;
end;
else
AppendString := AppendString + AChar;
end;
end;
end;
end;
function TdxISpellAffixRule.RemoveComments(const ARule: WideString): WideString;
begin
Result := RemoveGrammarComments(ARule);
end;
function TdxISpellAffixRule.DoParse(const ARule: WideString): Boolean;
var
S: WideString;
APos: Integer;
begin
Result := False;
APos := WideCharPos('>', ARule);
if APos < 1 then Exit;
S := Trim(Copy(ARule, 1, APos - 1));
ForceValid := S = '.';
if not ForceValid then
BuildValidator(S);
S := Trim(Copy(ARule, APos + 1, Length(ARule)));
if Length(S) > 0 then
begin
ParseNewWordform(S);
Result := (ForceValid or (Validator.Count > 0)) and
((Length(StripString) > 0) or (Length(AppendString) > 0))
end;
end;
{ TdxISpellPrefix }
function TdxISpellPrefix.CreateRule: TdxAffixRule;
begin
Result := TdxISpellAffixRule.Create(Self);
end;
function TdxISpellPrefix.GetIsPrefix: Boolean;
begin
Result := True;
end;
{ TdxISpellSuffix }
function TdxISpellSuffix.GetIsPrefix: Boolean;
begin
Result := False;
end;
{ TdxCustomAffixDecompressor }
constructor TdxCustomAffixDecompressor.Create(
ADictionary: TdxAffixCompressionDictionary);
begin
inherited Create;
FDictionary := ADictionary;
FCodePage := CP_ACP;
FAffixes := TdxAffixList.Create;
FPrefixes := TList.Create;
FSuffixes := TList.Create;
FWordforms := TdxSpellCheckerStrings.Create;
end;
destructor TdxCustomAffixDecompressor.Destroy;
begin
FWordforms.Free;
FPrefixes.Free;
FSuffixes.Free;
FAffixes.Free;
inherited Destroy;
end;
procedure TdxCustomAffixDecompressor.CreateWordforms(
const AWordDefinition: WideString);
var
AWord, AKeys: WideString;
begin
ExtractWordInfo(RemoveComments(AWordDefinition), AWord, AKeys);
if AWord <> '' then
begin
Reset;
Wordforms.Add(AWord);
CreateWordFormsForKeys(AWord, AKeys);
end;
end;
procedure TdxCustomAffixDecompressor.CreateWordFormsForKeys(const AWord,
AKeys: WideString);
var
AAffix: TdxCustomAffixElement;
I, J, K, AStartIndex: Integer;
begin
if AKeys = '' then Exit;
for I := 1 to Length(AKeys) do
begin
for J := 0 to Affixes.Count - 1 do
begin
AAffix := Affixes[J];
if AAffix.Key = AKeys[I] then
begin
if AAffix.IsPrefix then
Prefixes.Add(AAffix)
else
Suffixes.Add(AAffix);
end;
end;
end;
for I := 0 to Prefixes.Count - 1 do
begin
AAffix := TdxCustomAffixElement(Prefixes[I]);
AStartIndex := Wordforms.Count;
AAffix.AddWordforms(AWord, Wordforms);
if AAffix.CanCombine then
for J := AStartIndex to Wordforms.Count - 1 do
for K := 0 to Suffixes.Count - 1 do
begin
AAffix := TdxCustomAffixElement(Suffixes[K]);
if AAffix.CanCombine then
AAffix.AddWordforms(Wordforms[J], Wordforms);
end;
end;
for I := 0 to Suffixes.Count - 1 do
TdxCustomAffixElement(Suffixes[I]).AddWordforms(AWord, Wordforms);
end;
procedure TdxCustomAffixDecompressor.ExtractWordInfo(
const AWordDefinition: WideString; out AWord, AKeys: WideString);
var
APos: Integer;
begin
AWord := '';
AKeys := '';
APos := WideCharPos('/', AWordDefinition);
if APos > 0 then
begin
AWord := Copy(AWordDefinition, 1, APos - 1);
AKeys := Copy(AWordDefinition, APos + 1, Length(AWordDefinition));
end
else
AWord := AWordDefinition;
end;
procedure TdxCustomAffixDecompressor.InitParsing;
begin
end;
procedure TdxCustomAffixDecompressor.LoadFromFile(const AFileName: TFileName);
var
AStream: TStream;
begin
AStream := TFileStream.Create(AFileName, fmOpenRead or fmShareDenyWrite);
try
LoadFromStream(AStream);
finally
AStream.Free;
end;
end;
procedure TdxCustomAffixDecompressor.LoadFromStream(AStream: TStream);
var
AStrings: TdxSpellCheckerStrings;
begin
GetLanguageInfo(AStream, FCodePage, FAlphabet);
Wordforms.CodePage := CodePage;
AStrings := TdxSpellCheckerStrings.Create(CodePage);
try
AStream.Position := 0;
AStrings.Capacity := 16384;
AStrings.LoadFromStream(AStream);
Parse(AStrings);
finally
AStrings.Free;
end;
end;
procedure TdxCustomAffixDecompressor.Parse(AStrings: TdxSpellCheckerStrings);
var
I: Integer;
S: WideString;
begin
for I := 0 to AStrings.Count - 1 do
begin
S := RemoveComments(AStrings[I]);
if S <> '' then
ParseLine(S);
end;
end;
procedure TdxCustomAffixDecompressor.PopulateWordForms(
const AWordDefinition: WideString; ADictionary: TdxSpellCheckerWordList);
var
I: Integer;
begin
CreateWordforms(AWordDefinition);
for I := 0 to Wordforms.Count - 1 do
begin
if Dictionary.LoadingTerminated then
Break;
ADictionary.Add(Wordforms[I]);
end;
end;
function TdxCustomAffixDecompressor.RemoveComments(
const S: WideString): WideString;
begin
Result := RemoveGrammarComments(S);
end;
procedure TdxCustomAffixDecompressor.Reset;
begin
Prefixes.Clear;
Suffixes.Clear;
Wordforms.Clear;
end;
{ TdxAffixCompressionDictionary }
procedure TdxAffixCompressionDictionary.Assign(Source: TPersistent);
begin
inherited Assign(Source);
if Source is TdxAffixCompressionDictionary then
begin
DictionaryPath := TdxAffixCompressionDictionary(Source).DictionaryPath;
GrammarPath := TdxAffixCompressionDictionary(Source).GrammarPath;
end;
end;
function TdxAffixCompressionDictionary.DoLoad: Boolean;
var
ADecompressor: TdxCustomAffixDecompressor;
AWordBase: TdxSpellCheckerStrings;
I: Integer;
begin
Result := True;
ADecompressor := CreateDecompressor;
try
LoadGrammar(ADecompressor);
AWordBase := TdxSpellCheckerStrings.Create(ADecompressor.CodePage);
try
LoadWordBase(AWordBase);
for I := 0 to AWordBase.Count - 1 do
if LoadingTerminated then
Abort
else
ADecompressor.PopulateWordForms(AWordBase[I], Words);
finally
AWordBase.Free;
end;
finally
ADecompressor.Free;
end;
end;
function TdxAffixCompressionDictionary.GetDisplayName: string;
var
AFileName: TFileName;
begin
Result := inherited GetDisplayName;
AFileName := SysUtils.ExtractFileName(DictionaryPath);
if AFileName <> '' then
Result := Format('%s (%s)', [Result, AFileName]);
end;
procedure TdxAffixCompressionDictionary.LoadGrammar(
ADecompressor: TdxCustomAffixDecompressor);
begin
ADecompressor.LoadFromFile(GrammarPath);
if not LoadingTerminated then
UpdateByDecompressor(ADecompressor);
end;
procedure TdxAffixCompressionDictionary.LoadWordBase(
AWordBase: TdxSpellCheckerStrings);
begin
AWordBase.LoadFromFile(DictionaryPath);
end;
procedure TdxAffixCompressionDictionary.UpdateByDecompressor(
ADecompressor: TdxCustomAffixDecompressor);
begin
end;
procedure TdxAffixCompressionDictionary.SetDictionaryPath(
const AValue: TFileName);
begin
if FDictionaryPath <> AValue then
begin
Unload;
FDictionaryPath := AValue;
end;
end;
procedure TdxAffixCompressionDictionary.SetGrammarPath(const AValue: TFileName);
begin
if FGrammarPath <> AValue then
begin
Unload;
FGrammarPath := AValue;
end;
end;
{ TdxISpellDecompressor }
function TdxISpellDecompressor.CreateAffix(AKey: WideChar; ACanCombine: Boolean;
AState: TdxAffixElementState): TdxCustomAffixElement;
begin
case AState of
aesPrefix: Result := TdxISpellPrefix.Create(AKey, ACanCombine, Dictionary.Language);
aesSuffix: Result := TdxISpellSuffix.Create(AKey, ACanCombine, Dictionary.Language);
else
Result := nil;
end;
if Result <> nil then
Affixes.Add(Result);
end;
procedure TdxISpellDecompressor.ExtractKeyInfo(const ADefinition: WideString;
out AKey: WideChar; out ACanCombine: Boolean);
var
I: Integer;
begin
ACanCombine := False;
AKey := #$0000;
for I := 1 to Length(ADefinition) do
case ADefinition[I] of
#$0000..' ': Continue;
':': Break;
'*': ACanCombine := True;
else
AKey := ADefinition[I];
end;
end;
procedure TdxISpellDecompressor.GetLanguageInfo(AStream: TStream;
out ACodePage: Cardinal; out AAlphabet: WideString);
begin
ACodePage := Dictionary.CodePage;
if ACodePage = CP_ACP then
ACodePage := GetACP;
AAlphabet := CreateDefaultAlphabet(ACodePage);
end;
procedure TdxISpellDecompressor.InitParsing;
begin
FCurrentState := aesNone;
FCurrentAffix := nil;
end;
function TdxISpellDecompressor.IsUnsupportedKeyWord(const S: WideString): Boolean;
const
UnsupportedKeyWords: array[0..6] of WideString = (
'allaffixes', 'altstringtype', 'altstringchar', 'boundarychars',
'defstringtype', 'stringchar', 'wordchars');
var
I: Integer;
begin
Result := False;
for I := Low(UnsupportedKeyWords) to High(UnsupportedKeyWords) do
if WideSameText(UnsupportedKeyWords[I], S) then
begin
Result := True;
Break;
end;
end;
procedure TdxISpellDecompressor.ParseLine(S: WideString);
var
W: WideString;
AAffix: TdxCustomAffixElement;
AKey: WideChar;
ACanCombine: Boolean;
begin
W := GetFirstWord(S);
if IsUnsupportedKeyWord(W) then
InitParsing
else if WideSameText('prefixes', W) then
begin
FCurrentState := aesPrefix;
FCurrentAffix := nil;
end
else if WideSameText('suffixes', W) then
begin
FCurrentState := aesSuffix;
FCurrentAffix := nil;
end
else if WideSameText('flag', W) then
begin
ExtractKeyInfo(Trim(Copy(S, 5, Length(S))), AKey, ACanCombine);
AAffix := Affixes.FindForKey(AKey, FCurrentState = aesPrefix);
if AAffix = nil then
FCurrentAffix := CreateAffix(AKey, ACanCombine, FCurrentState)
else
FCurrentAffix := AAffix;
end
else if FCurrentAffix <> nil then
FCurrentAffix.AddRule(S);
end;
{ TdxISpellDictionary }
function TdxISpellDictionary.CreateDecompressor: TdxCustomAffixDecompressor;
begin
Result := TdxISpellDecompressor.Create(Self);
end;
{ TdxHunspellAffixRule }
procedure TdxHunspellAffixRule.BuildValidator(const S: WideString);
var
AChar: WideChar;
AChars: WideString;
I: Integer;
AValid, AInSet: Boolean;
procedure AddValidatorRule;
begin
if AChars <> '' then
begin
Validator.AddCharInfo(AChars, AValid);
AChars := '';
end;
AValid := True;
end;
begin
AValid := True;
AInSet := False;
for I := 1 to Length(S) do
begin
AChar := S[I];
case AChar of
'[':
AInSet := True;
']':
begin
AInSet := False;
AddValidatorRule;
end;
'^':
AValid := False;
'.':
Validator.AddAnyCharValid;
else
if AInSet then
AChars := AChars + AChar
else
begin
AChars := AChar;
AddValidatorRule;
end;
end;
end;
end;
function TdxHunspellAffixRule.DoParse(const ARule: WideString): Boolean;
var
W, S: WideString;
begin
S := ARule;
W := GetFirstWord(S, True);
if W = '0' then
StripString := ''
else
StripString := W;
AppendString := GetFirstWord(S, True);
S := GetFirstWord(S, True);
ForceValid := S = '.';
if not ForceValid then
BuildValidator(S);
Result := (ForceValid or (Validator.Count > 0)) and
((Length(StripString) > 0) or (Length(AppendString) > 0));
end;
{ TdxHunspellPrefix }
function TdxHunspellPrefix.CreateRule: TdxAffixRule;
begin
Result := TdxHunspellAffixRule.Create(Self);
end;
function TdxHunspellPrefix.GetIsPrefix: Boolean;
begin
Result := True;
end;
{ TdxHunspellSuffix }
function TdxHunspellSuffix.GetIsPrefix: Boolean;
begin
Result := False;
end;
{ TdxHunspellDecompressor }
function TdxHunspellDecompressor.CreateAffix(AKey: WideChar;
ACanCombine: Boolean; AState: TdxAffixElementState): TdxCustomAffixElement;
begin
case AState of
aesPrefix: Result := TdxHunspellPrefix.Create(AKey, ACanCombine, Dictionary.Language);
aesSuffix: Result := TdxHunspellSuffix.Create(AKey, ACanCombine, Dictionary.Language);
else
Result := nil;
end;
if Result <> nil then
Affixes.Add(Result);
end;
procedure TdxHunspellDecompressor.GetLanguageInfo(AStream: TStream;
out ACodePage: Cardinal; out AAlphabet: WideString);
var
S, AWord, AnsiAlphabet: AnsiString;
L: TStrings;
I, J: Integer;
begin
AnsiAlphabet := '';
ACodePage := CP_ACP;
L := TStringList.Create;
try
L.LoadFromStream(AStream);
for I := 0 to L.Count - 1 do
begin
if (ACodePage <> CP_ACP) and (AnsiAlphabet <> '') then Break;
S := RemoveGrammarComments(dxStringToAnsiString(L[I]));
if S = '' then Continue;
AWord := GetFirstWord(S, True);
if AnsiSameText(AWord, AnsiString('try')) then
AnsiAlphabet := Trim(GetFirstWord(S))
else if AnsiSameText(AWord, AnsiString('set')) then
begin
S := Trim(GetFirstWord(S));
for J := Low(TdxHunspellCodePages) to High(TdxHunspellCodePages) do
if AnsiSameText(TdxHunspellCodePages[J].Name, S) then
begin
ACodePage := TdxHunspellCodePages[J].CodePage;
Break;
end;
end;
end;
finally
L.Free;
end;
if ACodePage = CP_ACP then
ACodePage := GetACP;
if AnsiAlphabet <> '' then
AAlphabet := CreateAlphabetFromAnsiString(ACodePage, AnsiAlphabet)
else
AAlphabet := CreateDefaultAlphabet(ACodePage);
end;
procedure TdxHunspellDecompressor.InitParsing;
begin
FCurrentKey := #00;
FRepCount := 0;
end;
procedure TdxHunspellDecompressor.ParseLine(S: WideString);
var
W: WideString;
begin
W := GetFirstWord(S, True);
if WideSameText('PFX', W) then
ProcessAffix(S, aesPrefix)
else if WideSameText('SFX', W) then
ProcessAffix(S, aesSuffix)
else if WideSameText('REP', W) then
ProcessSubstitution(S)
else if WideSameText('TRY', W) then
ProcessAlphabet(S);
end;
procedure TdxHunspellDecompressor.ProcessAlphabet(S: WideString);
begin
Dictionary.Alphabet := S;
end;
procedure TdxHunspellDecompressor.ProcessAffix(S: WideString;
AState: TdxAffixElementState);
var
W: WideString;
AAffix: TdxCustomAffixElement;
begin
W := GetFirstWord(S, True);
if Length(W) <> 1 then Exit;
if W <> FCurrentKey then
begin
FCurrentKey := W[1];
W := GetFirstWord(S, True);
FCurrentKeyCanCombine := WideSameText(W, 'Y');
W := GetFirstWord(S, True);
end
else
begin
AAffix := Affixes.FindForKey(FCurrentKey, AState = aesPrefix);
if AAffix = nil then
AAffix := CreateAffix(FCurrentKey, FCurrentKeyCanCombine, AState);
AAffix.AddRule(S);
end;
end;
procedure TdxHunspellDecompressor.ProcessSubstitution(S: WideString);
var
W: WideString;
begin
if FRepCount = 0 then
FRepCount := StrToIntDef(GetFirstWord(S), 0)
else
begin
S := WideLowerCase(S);
W := GetFirstWord(S, True);
Dictionary.Substitutions.Add(W, GetFirstWord(S));
Dec(FRepCount);
end;
end;
function TdxHunspellDecompressor.GetDictionary: TdxOpenOfficeDictionary;
begin
Result := TdxOpenOfficeDictionary(inherited Dictionary);
end;
{ TdxOpenOfficeSuggestionBuilder }
procedure TdxOpenOfficeSuggestionBuilder.DoAddSuggestions;
begin
ProcessSubstitutions;
inherited DoAddSuggestions;
end;
function TdxOpenOfficeSuggestionBuilder.FindPos(const ASubStr, AStr: WideString;
var AStartPos: Integer): Boolean;
var
I, J, AMaxIndex, ASubStrLen, AStrLen: Integer;
begin
Result := False;
if AStartPos <= 0 then Exit;
AStrLen := Length(AStr);
if (AStrLen = 0) or (AStartPos > AStrLen) then Exit;
ASubStrLen := Length(ASubStr);
AMaxIndex := AStrLen - ASubStrLen + 1;
if (ASubStrLen = 0) or (AStartPos > AMaxIndex) then Exit;
repeat
while (AStartPos <= AMaxIndex) and (ASubStr[1] <> AStr[AStartPos]) do
Inc(AStartPos);
if AStartPos > AMaxIndex then Exit;
J := AStartPos + 1;
Result := True;
for I := 2 to ASubStrLen do
begin
if ASubStr[I] <> AStr[J] then
begin
Result := False;
Inc(AStartPos);
Break;
end;
Inc(J);
end;
until Result;
end;
procedure TdxOpenOfficeSuggestionBuilder.ProcessSubstitutions;
var
ASubstitutions: TdxSpellCheckerReplacementList;
ATestWord, T, R: WideString;
I, APos: Integer;
begin
ASubstitutions := Dictionary.Substitutions;
for I := 0 to ASubstitutions.Count - 1 do
begin
APos := 1;
R := ASubstitutions[I].Replacement;
T := ASubstitutions[I].Text;
while FindPos(T, Word, APos) do
begin
ATestWord := Word;
Delete(ATestWord, APos, Length(T));
Insert(R, ATestWord, APos);
if Dictionary.HasWord(ATestWord) then
Suggestions.Add(ATestWord, Dictionary, 1);
Inc(APos);
end;
end;
end;
function TdxOpenOfficeSuggestionBuilder.GetDictionary: TdxOpenOfficeDictionary;
begin
Result := TdxOpenOfficeDictionary(inherited Dictionary);
end;
{ TdxOpenOfficeDictionary }
constructor TdxOpenOfficeDictionary.Create(
ASpellChecker: TdxCustomSpellChecker);
begin
inherited Create(ASpellChecker);
FSubstitutions := TdxSpellCheckerReplacementList.Create(True);
end;
destructor TdxOpenOfficeDictionary.Destroy;
begin
FSubstitutions.Free;
inherited Destroy;
end;
function TdxOpenOfficeDictionary.CreateDecompressor: TdxCustomAffixDecompressor;
begin
Result := TdxHunspellDecompressor.Create(Self);
end;
function TdxOpenOfficeDictionary.CreateSuggestionBuilder: TdxSpellCheckerSuggestionBuilder;
begin
Result := TdxOpenOfficeSuggestionBuilder.Create(Self);
end;
procedure TdxOpenOfficeDictionary.UpdateByDecompressor(
ADecompressor: TdxCustomAffixDecompressor);
begin
CodePage := ADecompressor.CodePage;
Substitutions.SortByText;
end;
initialization
GetRegisteredDictionaryTypes.Register(TdxISpellDictionary, cxGetResourceString(@sdxSpellCheckerISpellDictionary));
GetRegisteredDictionaryTypes.Register(TdxOpenOfficeDictionary, cxGetResourceString(@sdxSpellCheckerOpenOfficeDictionary));
end.