Componentes.Terceros.DevExp.../official/x.48/ExpressSpellChecker 2/Sources/dxHunspellWords.pas
2010-01-18 18:33:24 +00:00

1270 lines
37 KiB
ObjectPascal

{********************************************************************}
{ }
{ Developer Express Visual Component Library }
{ ExpressSpellChecker }
{ }
{ Copyright (c) 1998-2009 Developer Express Inc. }
{ ALL RIGHTS RESERVED }
{ }
{ The entire contents of this file is protected by U.S. and }
{ International Copyright Laws. Unauthorized reproduction, }
{ reverse-engineering, and distribution of all or any portion of }
{ the code contained in this file is strictly prohibited and may }
{ result in severe civil and criminal penalties and will be }
{ prosecuted to the maximum extent possible under the law. }
{ }
{ RESTRICTIONS }
{ }
{ THIS SOURCE CODE AND ALL RESULTING INTERMEDIATE FILES }
{ (DCU, OBJ, DLL, ETC.) ARE CONFIDENTIAL AND PROPRIETARY TRADE }
{ SECRETS OF DEVELOPER EXPRESS INC. THE REGISTERED DEVELOPER IS }
{ LICENSED TO DISTRIBUTE THE EXPRESSSPELLCHECKER AND ALL }
{ ACCOMPANYING VCL CONTROLS AS PART OF AN EXECUTABLE PROGRAM ONLY. }
{ }
{ THE SOURCE CODE CONTAINED WITHIN THIS FILE AND ALL RELATED }
{ FILES OR ANY PORTION OF ITS CONTENTS SHALL AT NO TIME BE }
{ COPIED, TRANSFERRED, SOLD, DISTRIBUTED, OR OTHERWISE MADE }
{ AVAILABLE TO OTHER INDIVIDUALS WITHOUT EXPRESS WRITTEN CONSENT }
{ AND PERMISSION FROM DEVELOPER EXPRESS INC. }
{ }
{ CONSULT THE END USER LICENSE AGREEMENT FOR INFORMATION ON }
{ ADDITIONAL RESTRICTIONS. }
{ }
{********************************************************************}
unit dxHunspellWords;
{$I cxVer.inc}
interface
uses
SysUtils, Classes, cxClasses, dxHunspellTypes, dxSpellCheckerUtils;
const
BUFSIZE = 65536;
dxHunspellMaxDictionaryCount = 20;
RotateLength = 5;
UserWordCount = 1000;
type
TdxHunspellWordBaseManager = class;
TdxHunspellWordItem = class;
TdxHunspellFlags = class;
TdxAffixFlagMode = (afmChar, afmLong, afmNum, afmUni);
TdxCapitalizationType = (ctNoCapital, ctFirstCapital, ctAllCapital,
ctMixedCapital, ctFirstMixedCapital);
TdxWordBaseOption = (wboMorphologyDescription, wboMorphologyAliases,
wboMorphologyPhone);
TdxWordBaseOptions = set of TdxWordBaseOption;
PdxHunspellWordBaseTable = ^TdxSpellCheckerHashTable;
TdxSpellCheckerHashTable = array[0..0] of TdxHunspellWordItem;
PdxAffixMorphologicAliases = ^TdxSpellCheckerArrayOfPAnsiChar;
TdxSpellCheckerArrayOfPAnsiChar = array[0..0] of PAnsiChar;
PdxAffixFlagsData = ^TdxAffixFlagsData;
TdxAffixFlagsData = array[0..0] of Word;
{ TdxHunspellFlags }
TdxHunspellFlags = class
private
FData: PdxAffixFlagsData;
FDataSize: Integer;
FLength: Integer;
FWordBaseManager: TdxHunspellWordBaseManager;
procedure DisposeData;
function GetItem(Index: Integer): Word;
procedure SetLength(ANewLength: Integer);
public
constructor Create(AWordBaseManager: TdxHunspellWordBaseManager);
constructor CreateCapitalized(AFlags: TdxHunspellFlags);
destructor Destroy; override;
procedure Assign(ASource: TdxHunspellFlags);
procedure AssignCapitalized(ASource: TdxHunspellFlags);
function ContainsFlag(AFlag: Word): Boolean;
function Decode(AFlags: PAnsiChar): Boolean;
function InitializeByAlias(AAlias: PAnsiChar): Boolean;
procedure Sort;
property Data: PdxAffixFlagsData read FData write FData;
property Items[Index: Integer]: Word read GetItem; default;
property Length: Integer read FLength write SetLength;
end;
{ TdxHunspellFlagsList }
TdxHunspellFlagsList = class(TcxObjectList)
private
function GetItem(Index: Integer): TdxHunspellFlags;
public
property Items[Index: Integer]: TdxHunspellFlags read GetItem; default;
end;
{ TdxHunspellWordItem }
TdxHunspellWordItem = class
private
FAffixFlags: TdxHunspellFlags;
FNext: TdxHunspellWordItem;
FNextHomonym: TdxHunspellWordItem;
FMorphologicalDescription: PAnsiChar;
FOptions: TdxWordBaseOptions;
FWordBase: PAnsiChar;
FWordBaseManager: TdxHunspellWordBaseManager;
function GetWordBaseLength: Byte;
function IsAffixMorphologicAliasUsed: Boolean;
procedure SetAffixFlags(const Value: TdxHunspellFlags);
public
clen: Byte; // word length in characters (different for UTF-8 enc.)
constructor Create(AWordBaseManager: TdxHunspellWordBaseManager);
destructor Destroy; override;
function IsCompatibleWithFlag(AFlag: Word; CompatibleIfNull: Boolean = False): Boolean;
property AffixFlags: TdxHunspellFlags read FAffixFlags write SetAffixFlags;
property MorphologicalDescription: PAnsiChar read FMorphologicalDescription write FMorphologicalDescription;
property Next: TdxHunspellWordItem read FNext write FNext;
property NextHomonym: TdxHunspellWordItem read FNextHomonym write FNextHomonym;
property Options: TdxWordBaseOptions read FOptions write FOptions;
property WordBase: PAnsiChar read FWordBase write FWordBase;
property WordBaseLength: Byte read GetWordBaseLength;
end;
{ TdxHunspellReader }
TdxHunspellReader = class
private
FData: TdxSpellCheckerStrings;
FLineIndex: Integer;
FCodePage: Cardinal;
procedure LoadFromFile(const AFileName: string);
procedure LoadFromStream(AStream: TStream);
public
constructor Create(const AFileName: string);
destructor Destroy; override;
function GetLine: PAnsiChar;
function GetLineIndex: Cardinal;
end;
{ TdxHunspellWordBaseManager }
TdxHunspellWordBaseManager = class
private
FHashTableSize: Integer;
FWordBaseTable: PdxHunspellWordBaseTable;
FAffixFlagMode: TdxAffixFlagMode;
FComplexPrefixes: Boolean;
FForbiddenWordFlag: Word;
FIsUTF8: Boolean;
FLanguage: Integer;
FCodePage: Cardinal;
FLanguageName: PAnsiChar;
FIgnoredChars: PAnsiChar;
ignorechars_utf16: PWord;
ignorechars_utf16_len: Integer;
FAffixFlagAliases: TdxHunspellFlagsList;
FAffixMorphologicAliasesCount: Integer;
FAffixMorphologicAliases: PdxAffixMorphologicAliases;
procedure AllocateHashTable;
procedure FreeHashTable;
function Hash(AWord: PAnsiChar): Integer;
procedure FreeAffixMorphologicAliases;
function GetWordLengthAndCapitalType(const AWord: PAnsiChar; ALength: Integer; out ACapitalizationType: TdxCapitalizationType): Integer; //TODO: Remove this function because of UTF-8
function LoadTables(const AFileName: string): Boolean;
function AddWord(const AWord: PAnsiChar; AWordLength, wcl: Integer; AAffixFlags: TdxHunspellFlags;
const AMorphologocalDescription: PAnsiChar; AOnlyUpperCase: Boolean): Boolean;
function LoadCommonFlags(const AFileName: string): Boolean;
function ParseAffixFlagAliases(AAffixFileLine: PAnsiChar; AAffixFileManager: TdxHunspellReader): Boolean;
function AddCapitalizedWord(AWord: PAnsiChar; wbl, wcl: Integer;
AAffixFlags: TdxHunspellFlags; ADescription: PAnsiChar; ACapitalizationType: TdxCapitalizationType): Boolean;
function ParseMorphologicAliases(AAffixFileLine: PAnsiChar; AAffixFileManager: TdxHunspellReader): Boolean;
protected
property AffixFlagMode: TdxAffixFlagMode read FAffixFlagMode;
property AffixFlagAliases: TdxHunspellFlagsList read FAffixFlagAliases;
property IsUTF8: Boolean read FIsUTF8;
public
constructor Create(ALanguage: Integer);
destructor Destroy; override;
function Load(const ADictionaryFileName: string; const AAffixFileName: string): Boolean;
function Lookup(const AWord: PAnsiChar): TdxHunspellWordItem;
function DecodeFlag(const AFlag: PAnsiChar): Word;
function EncodeFlag(AFlag: Word): PAnsiChar;
function HasAffixFlagAliases: Boolean;
function HasAffixMorphologicAliases: Boolean;
function GetAffixMorphologyByAlias(AAlias: Integer): PAnsiChar;
property CodePage: Cardinal read FCodePage;
property Language: Integer read FLanguage;
end;
TdxHunspellWordBaseManagers = array[0..dxHunspellMaxDictionaryCount - 1] of TdxHunspellWordBaseManager;
var
FNum, FCapNum, FFree: Integer;
implementation
uses
Math, dxHunspellUtils, dxCore;
procedure Rotate(var v: Integer; q: Integer);
begin
v := v shl q or (v shr (32 - q)) and ((1 shl q) - 1);
end;
{ TdxHunspellFlags }
constructor TdxHunspellFlags.Create(AWordBaseManager: TdxHunspellWordBaseManager);
begin
inherited Create;
FWordBaseManager := AWordBaseManager;
Inc(FNum);
end;
constructor TdxHunspellFlags.CreateCapitalized(AFlags: TdxHunspellFlags);
begin
inherited Create;
Length := AFlags.Length + 1;
Move(AFlags.Data^, Data^, AFlags.Length * SizeOf(Word));
FData[Length - 1] := ONLYUPCASEFLAG;
Inc(FCapNum);
end;
destructor TdxHunspellFlags.Destroy;
begin
Inc(FFree);
DisposeData;
inherited Destroy;
end;
procedure TdxHunspellFlags.Assign(ASource: TdxHunspellFlags);
begin
Length := ASource.Length;
Move(ASource.Data^, Data^, ASource.Length * SizeOf(Word));
FWordBaseManager := ASource.FWordBaseManager;
end;
procedure TdxHunspellFlags.AssignCapitalized(ASource: TdxHunspellFlags);
begin
Length := ASource.Length + 1;
Move(ASource.Data^, Data^, ASource.Length * SizeOf(Word));
FData[Length - 1] := ONLYUPCASEFLAG;
end;
procedure TdxHunspellFlags.DisposeData;
begin
if FDataSize > 0 then
begin
FreeMem(FData, FDataSize);
FData := nil;
FDataSize := 0;
end;
end;
function TdxHunspellFlags.InitializeByAlias(AAlias: PAnsiChar): Boolean;
var
AAliasNum: Integer;
begin
AAliasNum := StrInt(AAlias);
Result := (AAliasNum > 0) and (AAliasNum <= FWordBaseManager.AffixFlagAliases.Count);
DisposeData;
if Result then
begin
FLength := FWordBaseManager.AffixFlagAliases[AAliasNum - 1].Length;
FData := FWordBaseManager.AffixFlagAliases[AAliasNum - 1].Data;
end;
end;
function TdxHunspellFlags.ContainsFlag(AFlag: Word): Boolean;
begin
Result := (Length > 0) and dxHunspellUtils.ContainsFlag(FData, Length, AFlag);
end;
function TdxHunspellFlags.Decode(AFlags: PAnsiChar): Boolean;
var
I, ALength: Integer;
AFlag, ATempPointer: PAnsiChar;
ADecodedFlag: PWord;
begin
Result := True;
case FWordBaseManager.AffixFlagMode of
afmLong:
begin
Length := StrLen(AFlags) div 2;
for I := 0 to Length - 1 do
Data[I] := Ord((AFlags + I * 2)^) shl 8 +
Ord((AFlags + I * 2 + 1)^);
end;
afmNum:
begin
ALength := 1;
AFlag := AFlags;
ATempPointer := AFlags;
while ATempPointer^ <> #0 do
begin
if ATempPointer^ = ',' then
Inc(ALength);
Inc(ATempPointer);
end;
Length := ALength;
ADecodedFlag := @FData[0];
ATempPointer := AFlags;
while ATempPointer^ <> #0 do
begin
if ATempPointer^ = ',' then
begin
I := StrInt(AFlag);
ADecodedFlag^ := I;
AFlag := ATempPointer + 1;
Inc(ADecodedFlag);
end;
Inc(ATempPointer);
end;
I := StrInt(AFlag);
ADecodedFlag^ := I;
end;
afmUni: // UTF-8 characters
begin
//TODO:
end;
else
Length := StrLen(AFlags);
ADecodedFlag := @FData[0];
ATempPointer := AFlags;
while ATempPointer^ <> #0 do
begin
ADecodedFlag^ := Ord(ATempPointer^);
Inc(ADecodedFlag);
Inc(ATempPointer);
end;
end;
end;
procedure TdxHunspellFlags.Sort;
begin
SortFlags(Data^, 0, Length);
end;
procedure TdxHunspellFlags.SetLength(ANewLength: Integer);
begin
if ANewLength <> Length then
begin
FLength := ANewLength;
if ANewLength * SizeOf(Word) > FDataSize then
begin
FDataSize := ANewLength * SizeOf(Word);
ReallocMem(FData, FDataSize);
end;
end;
end;
function TdxHunspellFlags.GetItem(Index: Integer): Word;
begin
Result := FData[Index];
end;
{ TdxHunspellFlagsList }
function TdxHunspellFlagsList.GetItem(Index: Integer): TdxHunspellFlags;
begin
Result := TdxHunspellFlags(inherited Items[Index]);
end;
{ TdxHunspellWordItem }
constructor TdxHunspellWordItem.Create(AWordBaseManager: TdxHunspellWordBaseManager);
begin
inherited Create;
FWordBaseManager := AWordBaseManager;
end;
destructor TdxHunspellWordItem.Destroy;
begin
if FWordBase <> nil then
StrDispose(FWordBase);
if (FMorphologicalDescription <> nil) and not IsAffixMorphologicAliasUsed then
StrDispose(FMorphologicalDescription);
FreeAndNil(FAffixFlags);
inherited Destroy;
end;
function TdxHunspellWordItem.IsCompatibleWithFlag(AFlag: Word;
CompatibleIfNull: Boolean = False): Boolean;
begin
if AFlag <> NullFlag then
Result := (FAffixFlags <> nil) and FAffixFlags.ContainsFlag(AFlag)
else
Result := CompatibleIfNull;
end;
procedure TdxHunspellWordItem.SetAffixFlags(const Value: TdxHunspellFlags);
begin
FreeAndNil(FAffixFlags);
FAffixFlags := Value;
end;
function TdxHunspellWordItem.GetWordBaseLength: Byte;
begin
Result := StrLen(FWordBase);
end;
function TdxHunspellWordItem.IsAffixMorphologicAliasUsed: Boolean;
begin
Result := wboMorphologyAliases in FOptions;
end;
{ TdxHunspellReader }
constructor TdxHunspellReader.Create(const AFileName: string{; const key: PAnsiChar});
begin
FCodePage := 28591;
FLineIndex := 0;
FData := TdxSpellCheckerStrings.Create(FCodePage);
LoadFromFile(AFileName);
end;
destructor TdxHunspellReader.Destroy;
begin
FData.Free;
inherited Destroy;
end;
function TdxHunspellReader.GetLine: PAnsiChar;
begin
Result := nil;
if (FData <> nil) and (FLineIndex < FData.Count) then
Result := StrNew(PAnsiChar(dxWideStringToAnsiString(FData.Items[FLineIndex], FCodePage)));
Inc(FLineIndex);
end;
procedure TdxHunspellReader.LoadFromFile(const AFileName: string);
var
AStream: TStream;
begin
AStream := TFileStream.Create(AFileName, fmOpenRead or fmShareDenyWrite);
try
LoadFromStream(AStream);
finally
AStream.Free;
end;
end;
procedure TdxHunspellReader.LoadFromStream(AStream: TStream);
begin
AStream.Position := 0;
FData.Capacity := 16384;
FData.LoadFromStream(AStream);
end;
function TdxHunspellReader.GetLineIndex: Cardinal;
begin
Result := FLineIndex;
end;
{ TdxHunspellWordBaseManager }
constructor TdxHunspellWordBaseManager.Create(ALanguage: Integer);
begin
FHashTableSize := 0;
FWordBaseTable := nil;
FAffixFlagMode := afmChar;
FComplexPrefixes := False;
FIsUTF8 := False;
FLanguage := ALanguage;
FLanguageName := nil;
FIgnoredChars := nil;
ignorechars_utf16 := nil;
ignorechars_utf16_len := 0;
FAffixMorphologicAliasesCount := 0;
FAffixMorphologicAliases := nil;
FForbiddenWordFlag := ForbiddenWordFlag;
FAffixFlagAliases := TdxHunspellFlagsList.Create;
end;
destructor TdxHunspellWordBaseManager.Destroy;
var
I: Integer;
ACurrentItem, ANextItem: TdxHunspellWordItem;
begin
if FWordBaseTable <> nil then
begin
for I := 0 to FHashTableSize - 1 do
begin
ACurrentItem := FWordBaseTable[I];
while ACurrentItem <> nil do
begin
ANextItem := ACurrentItem.Next;
FreeAndNil(ACurrentItem);
ACurrentItem := ANextItem;
end;
end;
FreeHashTable;
end;
FHashTableSize := 0;
FreeAndNil(FAffixFlagAliases);
FreeAffixMorphologicAliases;
StrDispose(FLanguageName);
StrDispose(FIgnoredChars);
if ignorechars_utf16 <> nil then
Dispose(ignorechars_utf16);
inherited Destroy;
end;
function TdxHunspellWordBaseManager.Load(const ADictionaryFileName: string;
const AAffixFileName: string): Boolean;
begin
LoadCommonFlags(AAffixFileName);
Result := LoadTables(ADictionaryFileName);
if not Result then
begin
if FWordBaseTable <> nil then
FreeHashTable;
FHashTableSize := 0;
end;
end;
function TdxHunspellWordBaseManager.Lookup(const AWord: PAnsiChar): TdxHunspellWordItem;
begin
Result := nil;
if FWordBaseTable <> nil then
begin
Result := FWordBaseTable[Hash(AWord)];
if Result <> nil then
while Result <> nil do
begin
if StrComp(AWord, Result.WordBase) = 0 then
Break;
Result := Result.Next;
end;
end;
end;
function TdxHunspellWordBaseManager.AddWord(const AWord: PAnsiChar;
AWordLength, wcl: Integer; AAffixFlags: TdxHunspellFlags;
const AMorphologocalDescription: PAnsiChar; AOnlyUpperCase: Boolean): Boolean;
procedure AssignFlags(AItem: TdxHunspellWordItem);
begin
if AOnlyUpperCase then
AItem.AffixFlags.AssignCapitalized(AAffixFlags)
else
AItem.AffixFlags.Assign(AAffixFlags);
end;
function InitNewItem(AItem: TdxHunspellWordItem): TdxHunspellWordItem;
var
AFlags: TdxHunspellFlags;
begin
if AOnlyUpperCase then
AFlags := TdxHunspellFlags.CreateCapitalized(AAffixFlags)
else
begin
AFlags := TdxHunspellFlags.Create(Self);
AFlags.Assign(AAffixFlags);
end;
AItem.AffixFlags := AFlags;
Result := AItem;
end;
var
AIsUpperCaseHomonym: Boolean;
ATableIndex: Integer;
ANewHashTableItem, AHashTableItem: TdxHunspellWordItem;
begin
Result := False;
AIsUpperCaseHomonym := False;
ANewHashTableItem := TdxHunspellWordItem.Create(Self);
ANewHashTableItem.WordBase := StrNew(AWord);
if FIgnoredChars <> nil then
begin
if IsUTF8 then
//TODO:
else
RemoveIgnoredChars(ANewHashTableItem.WordBase, FIgnoredChars);
end;
if FComplexPrefixes then
begin
if IsUTF8 then
//TODO:
else
StrReverse(ANewHashTableItem.WordBase);
end;
ATableIndex := Hash(ANewHashTableItem.WordBase);
ANewHashTableItem.clen := wcl;
ANewHashTableItem.Next := nil;
ANewHashTableItem.NextHomonym := nil;
if AMorphologocalDescription <> nil then
begin
Include(ANewHashTableItem.FOptions, wboMorphologyDescription);
if FAffixMorphologicAliases <> nil then
begin
Include(ANewHashTableItem.FOptions, wboMorphologyAliases);
ANewHashTableItem.MorphologicalDescription := GetAffixMorphologyByAlias(StrInt(AMorphologocalDescription));
end
else
begin
ANewHashTableItem.MorphologicalDescription := StrNew(AMorphologocalDescription);
if FComplexPrefixes then
begin
if IsUTF8 then
//TODO:
else
StrReverse(ANewHashTableItem.MorphologicalDescription);
end;
end;
if StrPos(ANewHashTableItem.MorphologicalDescription, MORPH_PHON) <> nil then
Include(ANewHashTableItem.FOptions, wboMorphologyPhone);
end;
AHashTableItem := FWordBaseTable[ATableIndex];
if AHashTableItem = nil then
begin
FWordBaseTable[ATableIndex] := InitNewItem(ANewHashTableItem);
Exit;
end;
while AHashTableItem.Next <> nil do
begin
if (AHashTableItem.NextHomonym = nil) and (StrComp(PAnsiChar(@ANewHashTableItem.WordBase), PAnsiChar(@AHashTableItem.WordBase)) = 0) then
begin
if not AOnlyUpperCase then
begin
if AHashTableItem.IsCompatibleWithFlag(ONLYUPCASEFLAG) then
begin
FreeAndNil(ANewHashTableItem);
AssignFlags(AHashTableItem);
Exit;
end
else
AHashTableItem.NextHomonym := InitNewItem(ANewHashTableItem);
end
else
AIsUpperCaseHomonym := True;
end;
AHashTableItem := AHashTableItem.Next;
end;
if StrComp(ANewHashTableItem.WordBase, AHashTableItem.WordBase) = 0 then
begin
if not AOnlyUpperCase then
begin
if AHashTableItem.IsCompatibleWithFLag(ONLYUPCASEFLAG) then
begin
FreeAndNil(ANewHashTableItem);
AssignFlags(AHashTableItem);
Exit;
end
else
AHashTableItem.NextHomonym := InitNewItem(ANewHashTableItem);
end
else
AIsUpperCaseHomonym := True;
end;
if not AIsUpperCaseHomonym then
AHashTableItem.Next := InitNewItem(ANewHashTableItem)
else
FreeAndNil(ANewHashTableItem);
end;
procedure TdxHunspellWordBaseManager.AllocateHashTable;
begin
FWordBaseTable := AllocMem(FHashTableSize * SizeOf(TdxHunspellWordItem));
end;
function TdxHunspellWordBaseManager.AddCapitalizedWord(AWord: PAnsiChar; wbl, wcl: Integer;
AAffixFlags: TdxHunspellFlags; ADescription: PAnsiChar; ACapitalizationType: TdxCapitalizationType): Boolean;
begin
Result := False;
if (ACapitalizationType in [ctMixedCapital, ctFirstMixedCapital, ctAllCapital]) and
not AAffixFlags.ContainsFlag(FForbiddenWordFlag) then
begin
if IsUTF8 then
begin
//TODO:
end
else
begin
MakeAllSmall(AWord, CodePage, Language);
MakeCapitalized(AWord, CodePage, Language);
Result := AddWord(AWord, wbl, wcl, AAffixFlags, ADescription, True);
end;
end;
end;
procedure TdxHunspellWordBaseManager.FreeAffixMorphologicAliases;
var
I: Integer;
begin
if FAffixMorphologicAliases <> nil then
begin
for I := 0 to FAffixMorphologicAliasesCount - 1 do
StrDispose(FAffixMorphologicAliases[I]);
FreeMem(FAffixMorphologicAliases, FAffixMorphologicAliasesCount * SizeOf(PAnsiChar));
FAffixMorphologicAliases := nil;
FAffixMorphologicAliasesCount := 0;
end;
end;
procedure TdxHunspellWordBaseManager.FreeHashTable;
begin
FreeMem(FWordBaseTable, FHashTableSize * SizeOf(TdxHunspellWordItem));
FWordBaseTable := nil;
end;
function TdxHunspellWordBaseManager.Hash(AWord: PAnsiChar): Integer;
var
I, AHashValue: Integer;
begin
AHashValue := 0;
I := 0;
while (I < 4) and (AWord^ <> #0) do
begin
AHashValue := AHashValue shl 8 or Ord(AWord^);
Inc(AWord);
Inc(I);
end;
while AWord^ <> #0 do
begin
Rotate(AHashValue, RotateLength);
AHashValue := AHashValue xor Ord(AWord^);
Inc(AWord);
end;
Result := Cardinal(AHashValue) mod Cardinal(FHashTableSize);
end;
function TdxHunspellWordBaseManager.GetWordLengthAndCapitalType(const AWord: PAnsiChar; ALength: Integer; out ACapitalizationType: TdxCapitalizationType): Integer;
begin
if IsUTF8 then
begin
//TODO:
Result := ALength;
end
else
begin
Result := ALength;
ACapitalizationType := GetCapitalizationType(AWord, Result);
end;
end;
function TdxHunspellWordBaseManager.LoadTables(const AFileName: string): Boolean;
var
I, ALinePartLength, wcl: Integer;
ACapitalizationType: TdxCapitalizationType;
ALineCursor, AAffixPartLineCursor, AMorphologicalPartLineCursor, ATempCursor: PAnsiChar;
AAffixFlags: TdxHunspellFlags;
ADictionaryFileManager: TdxHunspellReader;
begin
ADictionaryFileManager := nil;
Result := True;
try
ADictionaryFileManager := TdxHunspellReader.Create(AFileName);
if ADictionaryFileManager = nil then
begin
Result := False;
Exit;
end;
ALineCursor := ADictionaryFileManager.GetLine;
if ALineCursor = nil then
begin
Result := False;
Exit;
end;
RemoveCRLF(ALineCursor);
RemoveMark(ALineCursor);
FHashTableSize := StrInt(ALineCursor);
StrDispose(ALineCursor);
if FHashTableSize = 0 then
begin
Result := False;
Exit;
end;
FHashTableSize := FHashTableSize + 5 + UserWordCount;
if FHashTableSize mod 2 = 0 then
Inc(FHashTableSize);
AllocateHashTable;
for I := 0 to FHashTableSize - 1 do
FWordBaseTable[I] := nil;
ALineCursor := ADictionaryFileManager.GetLine;
while ALineCursor <> nil do
begin
RemoveCRLF(ALineCursor);
AMorphologicalPartLineCursor := ALineCursor;
AMorphologicalPartLineCursor := StrScan(AMorphologicalPartLineCursor, ':');
while AMorphologicalPartLineCursor <> nil do
begin
if (AMorphologicalPartLineCursor > ALineCursor + 3) and (((AMorphologicalPartLineCursor - 3)^ = ' ') or ((AMorphologicalPartLineCursor - 3)^ = #9)) then
begin
AMorphologicalPartLineCursor := AMorphologicalPartLineCursor - 4;
while (AMorphologicalPartLineCursor >= ALineCursor) and ((AMorphologicalPartLineCursor^ = ' ') or (AMorphologicalPartLineCursor^ = #9)) do
Dec(AMorphologicalPartLineCursor);
if AMorphologicalPartLineCursor < ALineCursor then
AMorphologicalPartLineCursor := nil
else
begin
(AMorphologicalPartLineCursor + 1)^ := #0;
AMorphologicalPartLineCursor := AMorphologicalPartLineCursor + 2;
end;
Break;
end;
Inc(AMorphologicalPartLineCursor);
AMorphologicalPartLineCursor := StrScan(AMorphologicalPartLineCursor, ':');
end;
ATempCursor := StrScan(ALineCursor, #9);
if (ATempCursor <> nil) and ((AMorphologicalPartLineCursor = nil) or (ATempCursor < AMorphologicalPartLineCursor)) then
begin
ATempCursor^ := #0;
AMorphologicalPartLineCursor := ATempCursor + 1;
end;
AAffixPartLineCursor := StrScan(ALineCursor, '/');
while (AAffixPartLineCursor <> nil) do
begin
if AAffixPartLineCursor = ALineCursor then
begin
Inc(AAffixPartLineCursor);
Continue;
end
else
if (AAffixPartLineCursor - 1)^ <> '\' then
Break;
ATempCursor := AAffixPartLineCursor - 1;
while ATempCursor^ <> #0 do
begin
ATempCursor^ := (ATempCursor + 1)^;
Inc(ATempCursor);
end;
AAffixPartLineCursor := StrScan(AAffixPartLineCursor, '/');
end;
AAffixFlags := TdxHunspellFlags.Create(Self);
try
if (AAffixPartLineCursor <> nil) then
begin
AAffixPartLineCursor^ := #0;
if FAffixFlagAliases.Count > 0 then
begin
Result := AAffixFlags.InitializeByAlias(AAffixPartLineCursor + 1);
if not Result then
AAffixPartLineCursor^ := #0;
end
else
begin
Result := AAffixFlags.Decode(AAffixPartLineCursor + 1);
if Result then
AAffixFlags.Sort;
end;
if not Result then
AAffixFlags.Length := 0;
end;
ALinePartLength := StrLen(ALineCursor);
wcl := GetWordLengthAndCapitalType(ALineCursor, ALinePartLength, ACapitalizationType);
if AddWord(ALineCursor, ALinePartLength, wcl, AAffixFlags, AMorphologicalPartLineCursor, False) or
AddCapitalizedWord(ALineCursor, ALinePartLength, wcl, AAffixFlags, AMorphologicalPartLineCursor, ACapitalizationType) then
begin
Result := True;
Exit;
end;
finally
FreeAndNil(AAffixFlags);
end;
StrDispose(ALineCursor);
ALineCursor := ADictionaryFileManager.GetLine;
end;
StrDispose(ALineCursor);
finally
ADictionaryFileManager.Free;
end;
end;
function TdxHunspellWordBaseManager.DecodeFlag(const AFlag: PAnsiChar): Word;
begin
Result := 0;
case FAffixFlagMode of
afmLong:
Result := Ord(AFlag^) shl 8 + Ord((AFlag + 1)^);
afmNum:
Result := StrInt(AFlag);
afmUni:;
//TODO:
else
Result := Ord(AFlag^);
end;
end;
function TdxHunspellWordBaseManager.EncodeFlag(AFlag: Word): PAnsiChar;
const
ArraySize = 10;
var
AEncodedFlag: array [0..ArraySize - 1] of Byte;
begin
if AFlag = 0 then
Result := nil
else
begin
if FAffixFlagMode = afmLong then
begin
AEncodedFlag[0] := AFlag shr 8;
AEncodedFlag[1] := AFlag - ((AFlag shr 8) shl 8);
AEncodedFlag[2] := 0;
end
else
if FAffixFlagMode = afmNum then
StrPCopy(@AEncodedFlag, IntToStr(AFlag))
else
if FAffixFlagMode = afmUni then
//TODO:
else
begin
AEncodedFlag[0] := AFlag;
AEncodedFlag[1] := 0;
end;
Result := StrNew(PAnsiChar(@AEncodedFlag));
end;
end;
function TdxHunspellWordBaseManager.LoadCommonFlags(const AFileName: string): Boolean;
var
ALine: PAnsiChar;
AIsFirstLine: Boolean;
AAffixFileManager: TdxHunspellReader;
AFlag: PAnsiChar;
AEncoding: PAnsiChar;
begin
Result := False;
AIsFirstLine := True;
AAffixFileManager := TdxHunspellReader.Create(AFileName);
try
if AAffixFileManager = nil then
begin
Result := True;
Exit;
end;
ALine := AAffixFileManager.GetLine;
while ALine <> nil do
begin
RemoveCRLF(ALine);
if AIsFirstLine then
begin
AIsFirstLine := False;
RemoveMark(ALine);
end;
if (StrLComp(ALine, 'FLAG', 4) = 0) and isspace((ALine + 4)^) then
begin
if StrPos(ALine, 'long') <> nil then FAffixFlagMode := afmLong;
if StrPos(ALine, 'num') <> nil then FAffixFlagMode := afmNum;
if StrPos(ALine, 'UTF-8') <> nil then FAffixFlagMode := afmUni;
end;
if StrLComp(ALine, 'FORBIDDENWORD', 13) = 0 then
begin
AFlag := nil;
if not GetSecondPartOfString(ALine, AFlag) then
begin
Result := True;
Exit;
end;
FForbiddenWordFlag := DecodeFlag(AFlag);
StrDispose(AFlag);
end;
if StrLComp(ALine, 'SET', 3) = 0 then
begin
AEncoding := nil;
try
if not GetSecondPartOfString(ALine, AEncoding) then
begin
Result := True;
Exit;
end;
FCodePage := GetCodePageByName(AEncoding);
if StrComp(AEncoding, 'UTF-8') = 0 then
//TODO:
// FIsUTF8 := 1;
raise Exception.Create('UTF-8 doesn''t supported');
finally
StrDispose(AEncoding);
end;
end;
if StrLComp(ALine, 'LANG', 4) = 0 then
begin
if not GetSecondPartOfString(ALine, FLanguageName) then
begin
Result := True;
Exit;
end;
FLanguage := GetLanguageID(FLanguageName);
end;
if StrLComp(ALine, 'IGNORE', 6) = 0 then
if not ParseArray(ALine, FIgnoredChars, FIsUTF8) then
begin
Result := True;
Exit;
end;
if (StrLComp(ALine, 'AF', 2) = 0) and isspace((ALine + 2)^) then
if ParseAffixFlagAliases(ALine, AAffixFileManager) then
begin
Result := True;
Exit;
end;
if (StrLComp(ALine, 'AM', 2) = 0) and isspace((ALine + 2)^) then
if ParseMorphologicAliases(ALine, AAffixFileManager) then
begin
Result := True;
Exit;
end;
if StrLComp(ALine, 'COMPLEXPREFIXES', 15) = 0 then
FComplexPrefixes := True;
if ((StrLComp(ALine, 'SFX', 3) = 0) or (StrLComp(ALine, 'PFX', 3) = 0)) and
isspace((ALine + 3)^) then
Break;
StrDispose(ALine);
ALine := AAffixFileManager.GetLine;
end;
StrDispose(ALine);
finally
AAffixFileManager.Free;
end;
end;
function TdxHunspellWordBaseManager.ParseAffixFlagAliases(AAffixFileLine: PAnsiChar; AAffixFileManager: TdxHunspellReader): Boolean;
var
ALineStart, ALineCursor, APiece: PAnsiChar;
I, J, ACount: Integer;
AAffixFlags: TdxHunspellFlags;
begin
Result := False;
if AffixFlagAliases.Count <> 0 then
begin
Result := True;
Exit;
end;
ALineCursor := AAffixFileLine;
I := 0;
APiece := StrSeparate(@ALineCursor, #0);
ACount := 0;
while (APiece <> nil) and (I < 2) do
begin
if APiece^ <> #0 then
begin
if I = 1 then
begin
ACount := StrInt(APiece);
if ACount < 1 then
begin
Result := True;
Exit;
end;
end;
Inc(I);
end;
APiece := StrSeparate(@ALineCursor, #0);
end;
if I < 2 then
begin
Result := True;
Exit;
end;
AffixFlagAliases.Capacity := ACount;
for J := 0 to ACount - 1 do
begin
ALineStart := AAffixFileManager.GetLine;
try
if ALineStart = nil then
begin
Result := True;
Exit;
end;
ALineCursor := ALineStart;
RemoveCRLF(ALineCursor);
I := 0;
APiece := StrSeparate(@ALineCursor, #0);
while (APiece <> nil) and (I < 2) do
begin
if APiece^ <> #0 then
begin
if I = 0 then
begin
if StrLComp(APiece, 'AF', 2) <> 0 then
begin
Result := True;
Exit;
end;
end
else
begin
AAffixFlags := TdxHunspellFlags.Create(Self);
AAffixFlags.Decode(APiece);
AAffixFlags.Sort;
AffixFlagAliases.Add(AAffixFlags);
end;
Inc(I);
end;
APiece := StrSeparate(@ALineCursor, #0);
end;
finally
StrDispose(ALineStart);
end;
end;
end;
function TdxHunspellWordBaseManager.HasAffixFlagAliases: Boolean;
begin
Result := FAffixFlagAliases.Count > 0;
end;
function TdxHunspellWordBaseManager.ParseMorphologicAliases(AAffixFileLine: PAnsiChar; AAffixFileManager: TdxHunspellReader): Boolean;
var
ALineStart, ALineCursor, APiece: PAnsiChar;
I, J: Integer;
begin
if FAffixMorphologicAliasesCount <> 0 then
begin
Result := True;
Exit;
end;
ALineCursor := AAffixFileLine;
I := 0;
APiece := StrSeparate(@ALineCursor, #0);
while (APiece <> nil) and (I < 2) do
begin
if APiece^ <> #0 then
begin
if I = 1 then
begin
FAffixMorphologicAliasesCount := StrInt(APiece);
if FAffixMorphologicAliasesCount < 1 then
begin
Result := True;
Exit;
end;
try
FAffixMorphologicAliases := AllocMem(FAffixMorphologicAliasesCount * SizeOf(PAnsiChar));
except
on EOutOfMemory do
begin
FAffixMorphologicAliasesCount := 0;
Result := True;
Exit;
end;
end;
end;
Inc(I);
end;
APiece := StrSeparate(@ALineCursor, #0);
end;
if I < 2 then
begin
FreeAffixMorphologicAliases;
Result := True;
Exit;
end;
for J := 0 to FAffixMorphologicAliasesCount - 1 do
begin
ALineStart := AAffixFileManager.GetLine;
if ALineStart = nil then
begin
Result := True;
Exit;
end;
ALineCursor := ALineStart;
RemoveCRLF(ALineCursor);
I := 0;
FAffixMorphologicAliases[J] := nil;
APiece := StrSeparate(@ALineCursor, ' ');
while (APiece <> nil) and (I < 2) do
begin
if APiece^ <> #0 then
begin
if I = 0 then
begin
if StrLComp(APiece, 'AM', 2) <> 0 then
begin
FreeAffixMorphologicAliases;
Result := True;
Exit;
end;
end
else
begin
if ALineCursor^ <> #0 then
begin
(ALineCursor - 1)^ := ' ';
ALineCursor := ALineCursor + StrLen(ALineCursor);
end;
if FComplexPrefixes then
begin
if IsUTF8 then
//TODO:
else
StrReverse(APiece);
end;
FAffixMorphologicAliases[J] := StrNew(APiece);
if FAffixMorphologicAliases[J] = nil then
begin
FreeAffixMorphologicAliases;
Result := True;
Exit;
end;
end;
Inc(I);
end;
APiece := StrSeparate(@ALineCursor, ' ');
end;
if FAffixMorphologicAliases[J] = nil then
begin
FreeAffixMorphologicAliases;
Result := True;
Exit;
end;
StrDispose(ALineStart);
end;
Result := False;
end;
function TdxHunspellWordBaseManager.HasAffixMorphologicAliases: Boolean;
begin
Result := FAffixMorphologicAliases <> nil;
end;
function TdxHunspellWordBaseManager.GetAffixMorphologyByAlias(AAlias: Integer): PAnsiChar;
begin
Result := nil;
if (AAlias > 0) and (AAlias <= FAffixMorphologicAliasesCount) then
Result := FAffixMorphologicAliases[AAlias - 1];
end;
end.