{********************************************************************} { } { Developer Express Visual Component Library } { ExpressSpellChecker } { } { Copyright (c) 1998-2009 Developer Express Inc. } { ALL RIGHTS RESERVED } { } { The entire contents of this file is protected by U.S. and } { International Copyright Laws. Unauthorized reproduction, } { reverse-engineering, and distribution of all or any portion of } { the code contained in this file is strictly prohibited and may } { result in severe civil and criminal penalties and will be } { prosecuted to the maximum extent possible under the law. } { } { RESTRICTIONS } { } { THIS SOURCE CODE AND ALL RESULTING INTERMEDIATE FILES } { (DCU, OBJ, DLL, ETC.) ARE CONFIDENTIAL AND PROPRIETARY TRADE } { SECRETS OF DEVELOPER EXPRESS INC. THE REGISTERED DEVELOPER IS } { LICENSED TO DISTRIBUTE THE EXPRESSSPELLCHECKER AND ALL } { ACCOMPANYING VCL CONTROLS AS PART OF AN EXECUTABLE PROGRAM ONLY. } { } { THE SOURCE CODE CONTAINED WITHIN THIS FILE AND ALL RELATED } { FILES OR ANY PORTION OF ITS CONTENTS SHALL AT NO TIME BE } { COPIED, TRANSFERRED, SOLD, DISTRIBUTED, OR OTHERWISE MADE } { AVAILABLE TO OTHER INDIVIDUALS WITHOUT EXPRESS WRITTEN CONSENT } { AND PERMISSION FROM DEVELOPER EXPRESS INC. } { } { CONSULT THE END USER LICENSE AGREEMENT FOR INFORMATION ON } { ADDITIONAL RESTRICTIONS. } { } {********************************************************************} unit dxHunspellWords; {$I cxVer.inc} interface uses SysUtils, Classes, cxClasses, dxHunspellTypes, dxSpellCheckerUtils; const BUFSIZE = 65536; dxHunspellMaxDictionaryCount = 20; RotateLength = 5; UserWordCount = 1000; type TdxHunspellWordBaseManager = class; TdxHunspellWordItem = class; TdxHunspellFlags = class; TdxAffixFlagMode = (afmChar, afmLong, afmNum, afmUni); TdxCapitalizationType = (ctNoCapital, ctFirstCapital, ctAllCapital, ctMixedCapital, ctFirstMixedCapital); TdxWordBaseOption = (wboMorphologyDescription, wboMorphologyAliases, wboMorphologyPhone); TdxWordBaseOptions = set of TdxWordBaseOption; PdxHunspellWordBaseTable = ^TdxSpellCheckerHashTable; TdxSpellCheckerHashTable = array[0..0] of TdxHunspellWordItem; PdxAffixMorphologicAliases = ^TdxSpellCheckerArrayOfPAnsiChar; TdxSpellCheckerArrayOfPAnsiChar = array[0..0] of PAnsiChar; PdxAffixFlagsData = ^TdxAffixFlagsData; TdxAffixFlagsData = array[0..0] of Word; { TdxHunspellFlags } TdxHunspellFlags = class private FData: PdxAffixFlagsData; FDataSize: Integer; FLength: Integer; FWordBaseManager: TdxHunspellWordBaseManager; procedure DisposeData; function GetItem(Index: Integer): Word; procedure SetLength(ANewLength: Integer); public constructor Create(AWordBaseManager: TdxHunspellWordBaseManager); constructor CreateCapitalized(AFlags: TdxHunspellFlags); destructor Destroy; override; procedure Assign(ASource: TdxHunspellFlags); procedure AssignCapitalized(ASource: TdxHunspellFlags); function ContainsFlag(AFlag: Word): Boolean; function Decode(AFlags: PAnsiChar): Boolean; function InitializeByAlias(AAlias: PAnsiChar): Boolean; procedure Sort; property Data: PdxAffixFlagsData read FData write FData; property Items[Index: Integer]: Word read GetItem; default; property Length: Integer read FLength write SetLength; end; { TdxHunspellFlagsList } TdxHunspellFlagsList = class(TcxObjectList) private function GetItem(Index: Integer): TdxHunspellFlags; public property Items[Index: Integer]: TdxHunspellFlags read GetItem; default; end; { TdxHunspellWordItem } TdxHunspellWordItem = class private FAffixFlags: TdxHunspellFlags; FNext: TdxHunspellWordItem; FNextHomonym: TdxHunspellWordItem; FMorphologicalDescription: PAnsiChar; FOptions: TdxWordBaseOptions; FWordBase: PAnsiChar; FWordBaseManager: TdxHunspellWordBaseManager; function GetWordBaseLength: Byte; function IsAffixMorphologicAliasUsed: Boolean; procedure SetAffixFlags(const Value: TdxHunspellFlags); public clen: Byte; // word length in characters (different for UTF-8 enc.) constructor Create(AWordBaseManager: TdxHunspellWordBaseManager); destructor Destroy; override; function IsCompatibleWithFlag(AFlag: Word; CompatibleIfNull: Boolean = False): Boolean; property AffixFlags: TdxHunspellFlags read FAffixFlags write SetAffixFlags; property MorphologicalDescription: PAnsiChar read FMorphologicalDescription write FMorphologicalDescription; property Next: TdxHunspellWordItem read FNext write FNext; property NextHomonym: TdxHunspellWordItem read FNextHomonym write FNextHomonym; property Options: TdxWordBaseOptions read FOptions write FOptions; property WordBase: PAnsiChar read FWordBase write FWordBase; property WordBaseLength: Byte read GetWordBaseLength; end; { TdxHunspellReader } TdxHunspellReader = class private FData: TdxSpellCheckerStrings; FLineIndex: Integer; FCodePage: Cardinal; procedure LoadFromFile(const AFileName: string); procedure LoadFromStream(AStream: TStream); public constructor Create(const AFileName: string); destructor Destroy; override; function GetLine: PAnsiChar; function GetLineIndex: Cardinal; end; { TdxHunspellWordBaseManager } TdxHunspellWordBaseManager = class private FHashTableSize: Integer; FWordBaseTable: PdxHunspellWordBaseTable; FAffixFlagMode: TdxAffixFlagMode; FComplexPrefixes: Boolean; FForbiddenWordFlag: Word; FIsUTF8: Boolean; FLanguage: Integer; FCodePage: Cardinal; FLanguageName: PAnsiChar; FIgnoredChars: PAnsiChar; ignorechars_utf16: PWord; ignorechars_utf16_len: Integer; FAffixFlagAliases: TdxHunspellFlagsList; FAffixMorphologicAliasesCount: Integer; FAffixMorphologicAliases: PdxAffixMorphologicAliases; procedure AllocateHashTable; procedure FreeHashTable; function Hash(AWord: PAnsiChar): Integer; procedure FreeAffixMorphologicAliases; function GetWordLengthAndCapitalType(const AWord: PAnsiChar; ALength: Integer; out ACapitalizationType: TdxCapitalizationType): Integer; //TODO: Remove this function because of UTF-8 function LoadTables(const AFileName: string): Boolean; function AddWord(const AWord: PAnsiChar; AWordLength, wcl: Integer; AAffixFlags: TdxHunspellFlags; const AMorphologocalDescription: PAnsiChar; AOnlyUpperCase: Boolean): Boolean; function LoadCommonFlags(const AFileName: string): Boolean; function ParseAffixFlagAliases(AAffixFileLine: PAnsiChar; AAffixFileManager: TdxHunspellReader): Boolean; function AddCapitalizedWord(AWord: PAnsiChar; wbl, wcl: Integer; AAffixFlags: TdxHunspellFlags; ADescription: PAnsiChar; ACapitalizationType: TdxCapitalizationType): Boolean; function ParseMorphologicAliases(AAffixFileLine: PAnsiChar; AAffixFileManager: TdxHunspellReader): Boolean; protected property AffixFlagMode: TdxAffixFlagMode read FAffixFlagMode; property AffixFlagAliases: TdxHunspellFlagsList read FAffixFlagAliases; property IsUTF8: Boolean read FIsUTF8; public constructor Create(ALanguage: Integer); destructor Destroy; override; function Load(const ADictionaryFileName: string; const AAffixFileName: string): Boolean; function Lookup(const AWord: PAnsiChar): TdxHunspellWordItem; function DecodeFlag(const AFlag: PAnsiChar): Word; function EncodeFlag(AFlag: Word): PAnsiChar; function HasAffixFlagAliases: Boolean; function HasAffixMorphologicAliases: Boolean; function GetAffixMorphologyByAlias(AAlias: Integer): PAnsiChar; property CodePage: Cardinal read FCodePage; property Language: Integer read FLanguage; end; TdxHunspellWordBaseManagers = array[0..dxHunspellMaxDictionaryCount - 1] of TdxHunspellWordBaseManager; var FNum, FCapNum, FFree: Integer; implementation uses Math, dxHunspellUtils, dxCore; procedure Rotate(var v: Integer; q: Integer); begin v := v shl q or (v shr (32 - q)) and ((1 shl q) - 1); end; { TdxHunspellFlags } constructor TdxHunspellFlags.Create(AWordBaseManager: TdxHunspellWordBaseManager); begin inherited Create; FWordBaseManager := AWordBaseManager; Inc(FNum); end; constructor TdxHunspellFlags.CreateCapitalized(AFlags: TdxHunspellFlags); begin inherited Create; Length := AFlags.Length + 1; Move(AFlags.Data^, Data^, AFlags.Length * SizeOf(Word)); FData[Length - 1] := ONLYUPCASEFLAG; Inc(FCapNum); end; destructor TdxHunspellFlags.Destroy; begin Inc(FFree); DisposeData; inherited Destroy; end; procedure TdxHunspellFlags.Assign(ASource: TdxHunspellFlags); begin Length := ASource.Length; Move(ASource.Data^, Data^, ASource.Length * SizeOf(Word)); FWordBaseManager := ASource.FWordBaseManager; end; procedure TdxHunspellFlags.AssignCapitalized(ASource: TdxHunspellFlags); begin Length := ASource.Length + 1; Move(ASource.Data^, Data^, ASource.Length * SizeOf(Word)); FData[Length - 1] := ONLYUPCASEFLAG; end; procedure TdxHunspellFlags.DisposeData; begin if FDataSize > 0 then begin FreeMem(FData, FDataSize); FData := nil; FDataSize := 0; end; end; function TdxHunspellFlags.InitializeByAlias(AAlias: PAnsiChar): Boolean; var AAliasNum: Integer; begin AAliasNum := StrInt(AAlias); Result := (AAliasNum > 0) and (AAliasNum <= FWordBaseManager.AffixFlagAliases.Count); DisposeData; if Result then begin FLength := FWordBaseManager.AffixFlagAliases[AAliasNum - 1].Length; FData := FWordBaseManager.AffixFlagAliases[AAliasNum - 1].Data; end; end; function TdxHunspellFlags.ContainsFlag(AFlag: Word): Boolean; begin Result := (Length > 0) and dxHunspellUtils.ContainsFlag(FData, Length, AFlag); end; function TdxHunspellFlags.Decode(AFlags: PAnsiChar): Boolean; var I, ALength: Integer; AFlag, ATempPointer: PAnsiChar; ADecodedFlag: PWord; begin Result := True; case FWordBaseManager.AffixFlagMode of afmLong: begin Length := StrLen(AFlags) div 2; for I := 0 to Length - 1 do Data[I] := Ord((AFlags + I * 2)^) shl 8 + Ord((AFlags + I * 2 + 1)^); end; afmNum: begin ALength := 1; AFlag := AFlags; ATempPointer := AFlags; while ATempPointer^ <> #0 do begin if ATempPointer^ = ',' then Inc(ALength); Inc(ATempPointer); end; Length := ALength; ADecodedFlag := @FData[0]; ATempPointer := AFlags; while ATempPointer^ <> #0 do begin if ATempPointer^ = ',' then begin I := StrInt(AFlag); ADecodedFlag^ := I; AFlag := ATempPointer + 1; Inc(ADecodedFlag); end; Inc(ATempPointer); end; I := StrInt(AFlag); ADecodedFlag^ := I; end; afmUni: // UTF-8 characters begin //TODO: end; else Length := StrLen(AFlags); ADecodedFlag := @FData[0]; ATempPointer := AFlags; while ATempPointer^ <> #0 do begin ADecodedFlag^ := Ord(ATempPointer^); Inc(ADecodedFlag); Inc(ATempPointer); end; end; end; procedure TdxHunspellFlags.Sort; begin SortFlags(Data^, 0, Length); end; procedure TdxHunspellFlags.SetLength(ANewLength: Integer); begin if ANewLength <> Length then begin FLength := ANewLength; if ANewLength * SizeOf(Word) > FDataSize then begin FDataSize := ANewLength * SizeOf(Word); ReallocMem(FData, FDataSize); end; end; end; function TdxHunspellFlags.GetItem(Index: Integer): Word; begin Result := FData[Index]; end; { TdxHunspellFlagsList } function TdxHunspellFlagsList.GetItem(Index: Integer): TdxHunspellFlags; begin Result := TdxHunspellFlags(inherited Items[Index]); end; { TdxHunspellWordItem } constructor TdxHunspellWordItem.Create(AWordBaseManager: TdxHunspellWordBaseManager); begin inherited Create; FWordBaseManager := AWordBaseManager; end; destructor TdxHunspellWordItem.Destroy; begin if FWordBase <> nil then StrDispose(FWordBase); if (FMorphologicalDescription <> nil) and not IsAffixMorphologicAliasUsed then StrDispose(FMorphologicalDescription); FreeAndNil(FAffixFlags); inherited Destroy; end; function TdxHunspellWordItem.IsCompatibleWithFlag(AFlag: Word; CompatibleIfNull: Boolean = False): Boolean; begin if AFlag <> NullFlag then Result := (FAffixFlags <> nil) and FAffixFlags.ContainsFlag(AFlag) else Result := CompatibleIfNull; end; procedure TdxHunspellWordItem.SetAffixFlags(const Value: TdxHunspellFlags); begin FreeAndNil(FAffixFlags); FAffixFlags := Value; end; function TdxHunspellWordItem.GetWordBaseLength: Byte; begin Result := StrLen(FWordBase); end; function TdxHunspellWordItem.IsAffixMorphologicAliasUsed: Boolean; begin Result := wboMorphologyAliases in FOptions; end; { TdxHunspellReader } constructor TdxHunspellReader.Create(const AFileName: string{; const key: PAnsiChar}); begin FCodePage := 28591; FLineIndex := 0; FData := TdxSpellCheckerStrings.Create(FCodePage); LoadFromFile(AFileName); end; destructor TdxHunspellReader.Destroy; begin FData.Free; inherited Destroy; end; function TdxHunspellReader.GetLine: PAnsiChar; begin Result := nil; if (FData <> nil) and (FLineIndex < FData.Count) then Result := StrNew(PAnsiChar(dxWideStringToAnsiString(FData.Items[FLineIndex], FCodePage))); Inc(FLineIndex); end; procedure TdxHunspellReader.LoadFromFile(const AFileName: string); var AStream: TStream; begin AStream := TFileStream.Create(AFileName, fmOpenRead or fmShareDenyWrite); try LoadFromStream(AStream); finally AStream.Free; end; end; procedure TdxHunspellReader.LoadFromStream(AStream: TStream); begin AStream.Position := 0; FData.Capacity := 16384; FData.LoadFromStream(AStream); end; function TdxHunspellReader.GetLineIndex: Cardinal; begin Result := FLineIndex; end; { TdxHunspellWordBaseManager } constructor TdxHunspellWordBaseManager.Create(ALanguage: Integer); begin FHashTableSize := 0; FWordBaseTable := nil; FAffixFlagMode := afmChar; FComplexPrefixes := False; FIsUTF8 := False; FLanguage := ALanguage; FLanguageName := nil; FIgnoredChars := nil; ignorechars_utf16 := nil; ignorechars_utf16_len := 0; FAffixMorphologicAliasesCount := 0; FAffixMorphologicAliases := nil; FForbiddenWordFlag := ForbiddenWordFlag; FAffixFlagAliases := TdxHunspellFlagsList.Create; end; destructor TdxHunspellWordBaseManager.Destroy; var I: Integer; ACurrentItem, ANextItem: TdxHunspellWordItem; begin if FWordBaseTable <> nil then begin for I := 0 to FHashTableSize - 1 do begin ACurrentItem := FWordBaseTable[I]; while ACurrentItem <> nil do begin ANextItem := ACurrentItem.Next; FreeAndNil(ACurrentItem); ACurrentItem := ANextItem; end; end; FreeHashTable; end; FHashTableSize := 0; FreeAndNil(FAffixFlagAliases); FreeAffixMorphologicAliases; StrDispose(FLanguageName); StrDispose(FIgnoredChars); if ignorechars_utf16 <> nil then Dispose(ignorechars_utf16); inherited Destroy; end; function TdxHunspellWordBaseManager.Load(const ADictionaryFileName: string; const AAffixFileName: string): Boolean; begin LoadCommonFlags(AAffixFileName); Result := LoadTables(ADictionaryFileName); if not Result then begin if FWordBaseTable <> nil then FreeHashTable; FHashTableSize := 0; end; end; function TdxHunspellWordBaseManager.Lookup(const AWord: PAnsiChar): TdxHunspellWordItem; begin Result := nil; if FWordBaseTable <> nil then begin Result := FWordBaseTable[Hash(AWord)]; if Result <> nil then while Result <> nil do begin if StrComp(AWord, Result.WordBase) = 0 then Break; Result := Result.Next; end; end; end; function TdxHunspellWordBaseManager.AddWord(const AWord: PAnsiChar; AWordLength, wcl: Integer; AAffixFlags: TdxHunspellFlags; const AMorphologocalDescription: PAnsiChar; AOnlyUpperCase: Boolean): Boolean; procedure AssignFlags(AItem: TdxHunspellWordItem); begin if AOnlyUpperCase then AItem.AffixFlags.AssignCapitalized(AAffixFlags) else AItem.AffixFlags.Assign(AAffixFlags); end; function InitNewItem(AItem: TdxHunspellWordItem): TdxHunspellWordItem; var AFlags: TdxHunspellFlags; begin if AOnlyUpperCase then AFlags := TdxHunspellFlags.CreateCapitalized(AAffixFlags) else begin AFlags := TdxHunspellFlags.Create(Self); AFlags.Assign(AAffixFlags); end; AItem.AffixFlags := AFlags; Result := AItem; end; var AIsUpperCaseHomonym: Boolean; ATableIndex: Integer; ANewHashTableItem, AHashTableItem: TdxHunspellWordItem; begin Result := False; AIsUpperCaseHomonym := False; ANewHashTableItem := TdxHunspellWordItem.Create(Self); ANewHashTableItem.WordBase := StrNew(AWord); if FIgnoredChars <> nil then begin if IsUTF8 then //TODO: else RemoveIgnoredChars(ANewHashTableItem.WordBase, FIgnoredChars); end; if FComplexPrefixes then begin if IsUTF8 then //TODO: else StrReverse(ANewHashTableItem.WordBase); end; ATableIndex := Hash(ANewHashTableItem.WordBase); ANewHashTableItem.clen := wcl; ANewHashTableItem.Next := nil; ANewHashTableItem.NextHomonym := nil; if AMorphologocalDescription <> nil then begin Include(ANewHashTableItem.FOptions, wboMorphologyDescription); if FAffixMorphologicAliases <> nil then begin Include(ANewHashTableItem.FOptions, wboMorphologyAliases); ANewHashTableItem.MorphologicalDescription := GetAffixMorphologyByAlias(StrInt(AMorphologocalDescription)); end else begin ANewHashTableItem.MorphologicalDescription := StrNew(AMorphologocalDescription); if FComplexPrefixes then begin if IsUTF8 then //TODO: else StrReverse(ANewHashTableItem.MorphologicalDescription); end; end; if StrPos(ANewHashTableItem.MorphologicalDescription, MORPH_PHON) <> nil then Include(ANewHashTableItem.FOptions, wboMorphologyPhone); end; AHashTableItem := FWordBaseTable[ATableIndex]; if AHashTableItem = nil then begin FWordBaseTable[ATableIndex] := InitNewItem(ANewHashTableItem); Exit; end; while AHashTableItem.Next <> nil do begin if (AHashTableItem.NextHomonym = nil) and (StrComp(PAnsiChar(@ANewHashTableItem.WordBase), PAnsiChar(@AHashTableItem.WordBase)) = 0) then begin if not AOnlyUpperCase then begin if AHashTableItem.IsCompatibleWithFlag(ONLYUPCASEFLAG) then begin FreeAndNil(ANewHashTableItem); AssignFlags(AHashTableItem); Exit; end else AHashTableItem.NextHomonym := InitNewItem(ANewHashTableItem); end else AIsUpperCaseHomonym := True; end; AHashTableItem := AHashTableItem.Next; end; if StrComp(ANewHashTableItem.WordBase, AHashTableItem.WordBase) = 0 then begin if not AOnlyUpperCase then begin if AHashTableItem.IsCompatibleWithFLag(ONLYUPCASEFLAG) then begin FreeAndNil(ANewHashTableItem); AssignFlags(AHashTableItem); Exit; end else AHashTableItem.NextHomonym := InitNewItem(ANewHashTableItem); end else AIsUpperCaseHomonym := True; end; if not AIsUpperCaseHomonym then AHashTableItem.Next := InitNewItem(ANewHashTableItem) else FreeAndNil(ANewHashTableItem); end; procedure TdxHunspellWordBaseManager.AllocateHashTable; begin FWordBaseTable := AllocMem(FHashTableSize * SizeOf(TdxHunspellWordItem)); end; function TdxHunspellWordBaseManager.AddCapitalizedWord(AWord: PAnsiChar; wbl, wcl: Integer; AAffixFlags: TdxHunspellFlags; ADescription: PAnsiChar; ACapitalizationType: TdxCapitalizationType): Boolean; begin Result := False; if (ACapitalizationType in [ctMixedCapital, ctFirstMixedCapital, ctAllCapital]) and not AAffixFlags.ContainsFlag(FForbiddenWordFlag) then begin if IsUTF8 then begin //TODO: end else begin MakeAllSmall(AWord, CodePage, Language); MakeCapitalized(AWord, CodePage, Language); Result := AddWord(AWord, wbl, wcl, AAffixFlags, ADescription, True); end; end; end; procedure TdxHunspellWordBaseManager.FreeAffixMorphologicAliases; var I: Integer; begin if FAffixMorphologicAliases <> nil then begin for I := 0 to FAffixMorphologicAliasesCount - 1 do StrDispose(FAffixMorphologicAliases[I]); FreeMem(FAffixMorphologicAliases, FAffixMorphologicAliasesCount * SizeOf(PAnsiChar)); FAffixMorphologicAliases := nil; FAffixMorphologicAliasesCount := 0; end; end; procedure TdxHunspellWordBaseManager.FreeHashTable; begin FreeMem(FWordBaseTable, FHashTableSize * SizeOf(TdxHunspellWordItem)); FWordBaseTable := nil; end; function TdxHunspellWordBaseManager.Hash(AWord: PAnsiChar): Integer; var I, AHashValue: Integer; begin AHashValue := 0; I := 0; while (I < 4) and (AWord^ <> #0) do begin AHashValue := AHashValue shl 8 or Ord(AWord^); Inc(AWord); Inc(I); end; while AWord^ <> #0 do begin Rotate(AHashValue, RotateLength); AHashValue := AHashValue xor Ord(AWord^); Inc(AWord); end; Result := Cardinal(AHashValue) mod Cardinal(FHashTableSize); end; function TdxHunspellWordBaseManager.GetWordLengthAndCapitalType(const AWord: PAnsiChar; ALength: Integer; out ACapitalizationType: TdxCapitalizationType): Integer; begin if IsUTF8 then begin //TODO: Result := ALength; end else begin Result := ALength; ACapitalizationType := GetCapitalizationType(AWord, Result); end; end; function TdxHunspellWordBaseManager.LoadTables(const AFileName: string): Boolean; var I, ALinePartLength, wcl: Integer; ACapitalizationType: TdxCapitalizationType; ALineCursor, AAffixPartLineCursor, AMorphologicalPartLineCursor, ATempCursor: PAnsiChar; AAffixFlags: TdxHunspellFlags; ADictionaryFileManager: TdxHunspellReader; begin ADictionaryFileManager := nil; Result := True; try ADictionaryFileManager := TdxHunspellReader.Create(AFileName); if ADictionaryFileManager = nil then begin Result := False; Exit; end; ALineCursor := ADictionaryFileManager.GetLine; if ALineCursor = nil then begin Result := False; Exit; end; RemoveCRLF(ALineCursor); RemoveMark(ALineCursor); FHashTableSize := StrInt(ALineCursor); StrDispose(ALineCursor); if FHashTableSize = 0 then begin Result := False; Exit; end; FHashTableSize := FHashTableSize + 5 + UserWordCount; if FHashTableSize mod 2 = 0 then Inc(FHashTableSize); AllocateHashTable; for I := 0 to FHashTableSize - 1 do FWordBaseTable[I] := nil; ALineCursor := ADictionaryFileManager.GetLine; while ALineCursor <> nil do begin RemoveCRLF(ALineCursor); AMorphologicalPartLineCursor := ALineCursor; AMorphologicalPartLineCursor := StrScan(AMorphologicalPartLineCursor, ':'); while AMorphologicalPartLineCursor <> nil do begin if (AMorphologicalPartLineCursor > ALineCursor + 3) and (((AMorphologicalPartLineCursor - 3)^ = ' ') or ((AMorphologicalPartLineCursor - 3)^ = #9)) then begin AMorphologicalPartLineCursor := AMorphologicalPartLineCursor - 4; while (AMorphologicalPartLineCursor >= ALineCursor) and ((AMorphologicalPartLineCursor^ = ' ') or (AMorphologicalPartLineCursor^ = #9)) do Dec(AMorphologicalPartLineCursor); if AMorphologicalPartLineCursor < ALineCursor then AMorphologicalPartLineCursor := nil else begin (AMorphologicalPartLineCursor + 1)^ := #0; AMorphologicalPartLineCursor := AMorphologicalPartLineCursor + 2; end; Break; end; Inc(AMorphologicalPartLineCursor); AMorphologicalPartLineCursor := StrScan(AMorphologicalPartLineCursor, ':'); end; ATempCursor := StrScan(ALineCursor, #9); if (ATempCursor <> nil) and ((AMorphologicalPartLineCursor = nil) or (ATempCursor < AMorphologicalPartLineCursor)) then begin ATempCursor^ := #0; AMorphologicalPartLineCursor := ATempCursor + 1; end; AAffixPartLineCursor := StrScan(ALineCursor, '/'); while (AAffixPartLineCursor <> nil) do begin if AAffixPartLineCursor = ALineCursor then begin Inc(AAffixPartLineCursor); Continue; end else if (AAffixPartLineCursor - 1)^ <> '\' then Break; ATempCursor := AAffixPartLineCursor - 1; while ATempCursor^ <> #0 do begin ATempCursor^ := (ATempCursor + 1)^; Inc(ATempCursor); end; AAffixPartLineCursor := StrScan(AAffixPartLineCursor, '/'); end; AAffixFlags := TdxHunspellFlags.Create(Self); try if (AAffixPartLineCursor <> nil) then begin AAffixPartLineCursor^ := #0; if FAffixFlagAliases.Count > 0 then begin Result := AAffixFlags.InitializeByAlias(AAffixPartLineCursor + 1); if not Result then AAffixPartLineCursor^ := #0; end else begin Result := AAffixFlags.Decode(AAffixPartLineCursor + 1); if Result then AAffixFlags.Sort; end; if not Result then AAffixFlags.Length := 0; end; ALinePartLength := StrLen(ALineCursor); wcl := GetWordLengthAndCapitalType(ALineCursor, ALinePartLength, ACapitalizationType); if AddWord(ALineCursor, ALinePartLength, wcl, AAffixFlags, AMorphologicalPartLineCursor, False) or AddCapitalizedWord(ALineCursor, ALinePartLength, wcl, AAffixFlags, AMorphologicalPartLineCursor, ACapitalizationType) then begin Result := True; Exit; end; finally FreeAndNil(AAffixFlags); end; StrDispose(ALineCursor); ALineCursor := ADictionaryFileManager.GetLine; end; StrDispose(ALineCursor); finally ADictionaryFileManager.Free; end; end; function TdxHunspellWordBaseManager.DecodeFlag(const AFlag: PAnsiChar): Word; begin Result := 0; case FAffixFlagMode of afmLong: Result := Ord(AFlag^) shl 8 + Ord((AFlag + 1)^); afmNum: Result := StrInt(AFlag); afmUni:; //TODO: else Result := Ord(AFlag^); end; end; function TdxHunspellWordBaseManager.EncodeFlag(AFlag: Word): PAnsiChar; const ArraySize = 10; var AEncodedFlag: array [0..ArraySize - 1] of Byte; begin if AFlag = 0 then Result := nil else begin if FAffixFlagMode = afmLong then begin AEncodedFlag[0] := AFlag shr 8; AEncodedFlag[1] := AFlag - ((AFlag shr 8) shl 8); AEncodedFlag[2] := 0; end else if FAffixFlagMode = afmNum then StrPCopy(@AEncodedFlag, IntToStr(AFlag)) else if FAffixFlagMode = afmUni then //TODO: else begin AEncodedFlag[0] := AFlag; AEncodedFlag[1] := 0; end; Result := StrNew(PAnsiChar(@AEncodedFlag)); end; end; function TdxHunspellWordBaseManager.LoadCommonFlags(const AFileName: string): Boolean; var ALine: PAnsiChar; AIsFirstLine: Boolean; AAffixFileManager: TdxHunspellReader; AFlag: PAnsiChar; AEncoding: PAnsiChar; begin Result := False; AIsFirstLine := True; AAffixFileManager := TdxHunspellReader.Create(AFileName); try if AAffixFileManager = nil then begin Result := True; Exit; end; ALine := AAffixFileManager.GetLine; while ALine <> nil do begin RemoveCRLF(ALine); if AIsFirstLine then begin AIsFirstLine := False; RemoveMark(ALine); end; if (StrLComp(ALine, 'FLAG', 4) = 0) and isspace((ALine + 4)^) then begin if StrPos(ALine, 'long') <> nil then FAffixFlagMode := afmLong; if StrPos(ALine, 'num') <> nil then FAffixFlagMode := afmNum; if StrPos(ALine, 'UTF-8') <> nil then FAffixFlagMode := afmUni; end; if StrLComp(ALine, 'FORBIDDENWORD', 13) = 0 then begin AFlag := nil; if not GetSecondPartOfString(ALine, AFlag) then begin Result := True; Exit; end; FForbiddenWordFlag := DecodeFlag(AFlag); StrDispose(AFlag); end; if StrLComp(ALine, 'SET', 3) = 0 then begin AEncoding := nil; try if not GetSecondPartOfString(ALine, AEncoding) then begin Result := True; Exit; end; FCodePage := GetCodePageByName(AEncoding); if StrComp(AEncoding, 'UTF-8') = 0 then //TODO: // FIsUTF8 := 1; raise Exception.Create('UTF-8 doesn''t supported'); finally StrDispose(AEncoding); end; end; if StrLComp(ALine, 'LANG', 4) = 0 then begin if not GetSecondPartOfString(ALine, FLanguageName) then begin Result := True; Exit; end; FLanguage := GetLanguageID(FLanguageName); end; if StrLComp(ALine, 'IGNORE', 6) = 0 then if not ParseArray(ALine, FIgnoredChars, FIsUTF8) then begin Result := True; Exit; end; if (StrLComp(ALine, 'AF', 2) = 0) and isspace((ALine + 2)^) then if ParseAffixFlagAliases(ALine, AAffixFileManager) then begin Result := True; Exit; end; if (StrLComp(ALine, 'AM', 2) = 0) and isspace((ALine + 2)^) then if ParseMorphologicAliases(ALine, AAffixFileManager) then begin Result := True; Exit; end; if StrLComp(ALine, 'COMPLEXPREFIXES', 15) = 0 then FComplexPrefixes := True; if ((StrLComp(ALine, 'SFX', 3) = 0) or (StrLComp(ALine, 'PFX', 3) = 0)) and isspace((ALine + 3)^) then Break; StrDispose(ALine); ALine := AAffixFileManager.GetLine; end; StrDispose(ALine); finally AAffixFileManager.Free; end; end; function TdxHunspellWordBaseManager.ParseAffixFlagAliases(AAffixFileLine: PAnsiChar; AAffixFileManager: TdxHunspellReader): Boolean; var ALineStart, ALineCursor, APiece: PAnsiChar; I, J, ACount: Integer; AAffixFlags: TdxHunspellFlags; begin Result := False; if AffixFlagAliases.Count <> 0 then begin Result := True; Exit; end; ALineCursor := AAffixFileLine; I := 0; APiece := StrSeparate(@ALineCursor, #0); ACount := 0; while (APiece <> nil) and (I < 2) do begin if APiece^ <> #0 then begin if I = 1 then begin ACount := StrInt(APiece); if ACount < 1 then begin Result := True; Exit; end; end; Inc(I); end; APiece := StrSeparate(@ALineCursor, #0); end; if I < 2 then begin Result := True; Exit; end; AffixFlagAliases.Capacity := ACount; for J := 0 to ACount - 1 do begin ALineStart := AAffixFileManager.GetLine; try if ALineStart = nil then begin Result := True; Exit; end; ALineCursor := ALineStart; RemoveCRLF(ALineCursor); I := 0; APiece := StrSeparate(@ALineCursor, #0); while (APiece <> nil) and (I < 2) do begin if APiece^ <> #0 then begin if I = 0 then begin if StrLComp(APiece, 'AF', 2) <> 0 then begin Result := True; Exit; end; end else begin AAffixFlags := TdxHunspellFlags.Create(Self); AAffixFlags.Decode(APiece); AAffixFlags.Sort; AffixFlagAliases.Add(AAffixFlags); end; Inc(I); end; APiece := StrSeparate(@ALineCursor, #0); end; finally StrDispose(ALineStart); end; end; end; function TdxHunspellWordBaseManager.HasAffixFlagAliases: Boolean; begin Result := FAffixFlagAliases.Count > 0; end; function TdxHunspellWordBaseManager.ParseMorphologicAliases(AAffixFileLine: PAnsiChar; AAffixFileManager: TdxHunspellReader): Boolean; var ALineStart, ALineCursor, APiece: PAnsiChar; I, J: Integer; begin if FAffixMorphologicAliasesCount <> 0 then begin Result := True; Exit; end; ALineCursor := AAffixFileLine; I := 0; APiece := StrSeparate(@ALineCursor, #0); while (APiece <> nil) and (I < 2) do begin if APiece^ <> #0 then begin if I = 1 then begin FAffixMorphologicAliasesCount := StrInt(APiece); if FAffixMorphologicAliasesCount < 1 then begin Result := True; Exit; end; try FAffixMorphologicAliases := AllocMem(FAffixMorphologicAliasesCount * SizeOf(PAnsiChar)); except on EOutOfMemory do begin FAffixMorphologicAliasesCount := 0; Result := True; Exit; end; end; end; Inc(I); end; APiece := StrSeparate(@ALineCursor, #0); end; if I < 2 then begin FreeAffixMorphologicAliases; Result := True; Exit; end; for J := 0 to FAffixMorphologicAliasesCount - 1 do begin ALineStart := AAffixFileManager.GetLine; if ALineStart = nil then begin Result := True; Exit; end; ALineCursor := ALineStart; RemoveCRLF(ALineCursor); I := 0; FAffixMorphologicAliases[J] := nil; APiece := StrSeparate(@ALineCursor, ' '); while (APiece <> nil) and (I < 2) do begin if APiece^ <> #0 then begin if I = 0 then begin if StrLComp(APiece, 'AM', 2) <> 0 then begin FreeAffixMorphologicAliases; Result := True; Exit; end; end else begin if ALineCursor^ <> #0 then begin (ALineCursor - 1)^ := ' '; ALineCursor := ALineCursor + StrLen(ALineCursor); end; if FComplexPrefixes then begin if IsUTF8 then //TODO: else StrReverse(APiece); end; FAffixMorphologicAliases[J] := StrNew(APiece); if FAffixMorphologicAliases[J] = nil then begin FreeAffixMorphologicAliases; Result := True; Exit; end; end; Inc(I); end; APiece := StrSeparate(@ALineCursor, ' '); end; if FAffixMorphologicAliases[J] = nil then begin FreeAffixMorphologicAliases; Result := True; Exit; end; StrDispose(ALineStart); end; Result := False; end; function TdxHunspellWordBaseManager.HasAffixMorphologicAliases: Boolean; begin Result := FAffixMorphologicAliases <> nil; end; function TdxHunspellWordBaseManager.GetAffixMorphologyByAlias(AAlias: Integer): PAnsiChar; begin Result := nil; if (AAlias > 0) and (AAlias <= FAffixMorphologicAliasesCount) then Result := FAffixMorphologicAliases[AAlias - 1]; end; end.