{********************************************************************} { } { Developer Express Visual Component Library } { ExpressSpellChecker } { } { Copyright (c) 1998-2009 Developer Express Inc. } { ALL RIGHTS RESERVED } { } { The entire contents of this file is protected by U.S. and } { International Copyright Laws. Unauthorized reproduction, } { reverse-engineering, and distribution of all or any portion of } { the code contained in this file is strictly prohibited and may } { result in severe civil and criminal penalties and will be } { prosecuted to the maximum extent possible under the law. } { } { RESTRICTIONS } { } { THIS SOURCE CODE AND ALL RESULTING INTERMEDIATE FILES } { (DCU, OBJ, DLL, ETC.) ARE CONFIDENTIAL AND PROPRIETARY TRADE } { SECRETS OF DEVELOPER EXPRESS INC. THE REGISTERED DEVELOPER IS } { LICENSED TO DISTRIBUTE THE EXPRESSSPELLCHECKER AND ALL } { ACCOMPANYING VCL CONTROLS AS PART OF AN EXECUTABLE PROGRAM ONLY. } { } { THE SOURCE CODE CONTAINED WITHIN THIS FILE AND ALL RELATED } { FILES OR ANY PORTION OF ITS CONTENTS SHALL AT NO TIME BE } { COPIED, TRANSFERRED, SOLD, DISTRIBUTED, OR OTHERWISE MADE } { AVAILABLE TO OTHER INDIVIDUALS WITHOUT EXPRESS WRITTEN CONSENT } { AND PERMISSION FROM DEVELOPER EXPRESS INC. } { } { CONSULT THE END USER LICENSE AGREEMENT FOR INFORMATION ON } { ADDITIONAL RESTRICTIONS. } { } {********************************************************************} unit dxHunspellUtils; {$I cxVer.inc} interface uses SysUtils, Windows, Forms, dxHunspellWords, dxHunspellTypes; const // default morphological fields MORPH_STEM = 'st:'; MORPH_ALLOMORPH = 'al:'; MORPH_POS = 'po:'; MORPH_DERI_PFX = 'dp:'; MORPH_INFL_PFX = 'ip:'; MORPH_TERM_PFX = 'tp:'; MORPH_DERI_SFX = 'ds:'; MORPH_INFL_SFX = 'is:'; MORPH_TERM_SFX = 'ts:'; MORPH_SURF_PFX = 'sp:'; MORPH_FREQ = 'fr:'; MORPH_PHON = 'ph:'; MORPH_HYPH = 'hy:'; MORPH_PART = 'pa:'; MORPH_FLAG = 'fl:'; MORPH_HENTRY = '_H:'; MORPH_TAG_LEN = Length(MORPH_STEM); MSEP_FLD = ' '; MSEP_REC = #10; // default flags DEFAULTFLAGS = 65510; ForbiddenWordFlag = 65510; ONLYUPCASEFLAG = 65511; MaxWordLength = 100; MAXWORDUTF8LEN = 256; NullFlag = $00; function IsUpCase(Ch: AnsiChar): Boolean; function IsSpace(Value: AnsiChar): Boolean; procedure SortFlags(var AFlags: array of Word; ABegin, AEnd: Integer); function ContainsFlag(AFlags: PdxAffixFlagsData; ALength: Integer; ARequiredFlag: Word): Boolean; function GetCapitalizationType(P: PAnsiChar; ALength: Integer): TdxCapitalizationType; procedure RemoveCRLF(S: PAnsiChar); procedure RemoveIgnoredChars(AWord, AIgnoreChars: PAnsiChar); function StrReplace(AWord: PAnsiChar; APattern: PAnsiChar; AReplacement: PAnsiChar): PAnsiChar; function GetSecondPartOfString(ASource: PAnsiChar; out ADest: PAnsiChar): Boolean; function StrCopyReverse(S: PAnsiChar): PAnsiChar; function StrReverse(S: PAnsiChar): PAnsiChar; function StrInt(S: PAnsiChar): Integer; function GetElementPointer(const Source: PPAnsiChar; Index: Integer): PPAnsiChar; {$IFDEF DELPHI9}inline;{$ENDIF} function StrSeparate(ALine: PPAnsiChar; const ADelimiter: AnsiChar): PAnsiChar; procedure MakeAllSmall(P: PAnsiChar; ACodePage: DWORD; ALangID: LCID); procedure MakeCapitalized(P: PAnsiChar; ACodePage: DWORD; ALangID: LCID); function ParseArray(ALine: PAnsiChar; out ADest: PAnsiChar; utf8: Boolean): Boolean; function GetLanguageID(ALanguage: PAnsiChar): Integer; procedure RemoveMark(ALineCursor: PAnsiChar); procedure dxHunspellError(const Message: string); var dxHunspellWarningFlag: Boolean; dxHunspellLastWarningMessage: string; implementation uses StrUtils, Dialogs, dxCore; type PPPAnsiChar = ^PPAnsiChar; var HasWarning: Boolean; procedure dxHunspellError(const Message: string); begin dxHunspellLastWarningMessage := Message; ShowMessage(Message); dxHunspellWarningFlag := True; HasWarning := True; end; function IsUpCase(Ch: AnsiChar): Boolean; begin Result := dxGetAnsiCharCType1(Ch) and C1_UPPER > 0; end; procedure RemoveMark(ALineCursor: PAnsiChar); begin if StrLComp(ALineCursor, #$EF#$BB#$BF, 3) = 0 then Move((ALineCursor + 3)^, ALineCursor^, StrLen(ALineCursor + 3) + 1); end; function IsSpace(Value: AnsiChar): Boolean; var B: Byte; begin B := Ord(Value); Result := ($09 <= B) and (B <= $0D) or (B = $20); end; function GetElementPointer(const Source: PPAnsiChar; Index: Integer): PPAnsiChar; overload; begin Result := Source; Inc(Result, Index); end; procedure SortFlags(var AFlags: array of Word; ABegin, AEnd: Integer); var ATemp, APivot: Word; ALeft, ARight: Integer; begin if AEnd > ABegin then begin APivot := AFlags[ABegin]; ALeft := ABegin + 1; ARight := AEnd; while ALeft < ARight do begin if AFlags[ALeft] <= APivot then Inc(ALeft) else begin Dec(ARight); ATemp := AFlags[ALeft]; AFlags[ALeft] := AFlags[ARight]; AFlags[ARight] := ATemp; end end; Dec(ALeft); ATemp := AFlags[ABegin]; AFlags[ABegin] := AFlags[ALeft]; AFlags[ALeft] := ATemp; SortFlags(AFlags, ABegin, ALeft); SortFlags(AFlags, ARight, AEnd); end; end; function ContainsFlag(AFlags: PdxAffixFlagsData; ALength: Integer; ARequiredFlag: Word): Boolean; var AMiddle, ALeft, ARight: Integer; begin Result := False; ALeft := 0; ARight := ALength - 1; while ALeft <= ARight do begin AMiddle := (ALeft + ARight) div 2; if AFlags^[AMiddle] = ARequiredFlag then begin Result := True; Break; end; if ARequiredFlag < AFlags^[AMiddle] then ARight := AMiddle - 1 else ALeft := AMiddle + 1; end; end; function StrInt(S: PAnsiChar): Integer; begin Result := 0; if S = nil then Exit; while (S^ <> #0) and (S^ in ['0' .. '9']) do begin Result := Result * 10 + Ord(S^) - Ord('0'); Inc(S); end; end; function StrSeparate(ALine: PPAnsiChar; const ADelimiter: AnsiChar): PAnsiChar; var ALineStart, ADelimiterCursor: PAnsiChar; begin Result := nil; ALineStart := ALine^; if ALineStart^ <> #0 then begin if ADelimiter <> #0 then ADelimiterCursor := StrScan(ALineStart, ADelimiter) else begin ADelimiterCursor := ALineStart; while (ADelimiterCursor^ <> #0) and (ADelimiterCursor^ <> ' ') and (ADelimiterCursor^ <> #9) do Inc(ADelimiterCursor); if ADelimiterCursor^ = #0 then ADelimiterCursor := nil; end; if ADelimiterCursor <> nil then begin ALine^ := ADelimiterCursor + 1; ADelimiterCursor^ := #0; end else ALine^ := ALineStart + StrLen(ALineStart); Result := ALineStart; end; end; procedure RemoveCRLF(S: PAnsiChar); var L: Integer; begin L := StrLen(S); if (L > 0) and ((S + L - 1)^ in [#13, #10]) then (S + L - 1)^ := #0; if (L > 1) and ((S + L - 2)^ = #13) then (S + L - 2)^ := #0; end; function StrReverse(S: PAnsiChar): PAnsiChar; var C: AnsiChar; P: PAnsiChar; begin Result := S; P := S + StrLen(S) - 1; while S < P do begin C := S^; S^ := P^; P^ := C; Inc(S); Dec(P); end; end; function StrCopyReverse(S: PAnsiChar): PAnsiChar; var ALength: Integer; ALeft, ARight: PAnsiChar; begin Result := nil; if S <> nil then begin ALength := StrLen(S); {$IFDEF DELPHI12} Result := AnsiStrAlloc(ALength + 1); {$ELSE} Result := StrAlloc(ALength + 1); {$ENDIF} ALeft := Result; ARight := S + ALength - 1; while ARight >= S do begin ALeft^ := ARight^; Inc(ALeft); Dec(ARight); end; ALeft^ := #0; end; end; function StrReplace(AWord: PAnsiChar; APattern: PAnsiChar; AReplacement: PAnsiChar): PAnsiChar; var APos, AEnd, ANext, APrev: PAnsiChar; AReplacementLength, APatternLength: Integer; begin APos := StrPos(AWord, APattern); if APos <> nil then begin AReplacementLength := StrLen(AReplacement); APatternLength := StrLen(APattern); if AReplacementLength < APatternLength then begin AEnd := AWord + StrLen(AWord); ANext := APos + AReplacementLength; APrev := APos + APatternLength; while APrev < AEnd do begin ANext^ := APrev^; Inc(APrev); Inc(ANext); end; ANext^ := #0; end else if AReplacementLength > APatternLength then begin AEnd := APos + APatternLength; ANext := AWord + StrLen(AWord) + AReplacementLength - APatternLength; APrev := ANext - AReplacementLength + APatternLength; while APrev >= AEnd do begin ANext^ := APrev^; Dec(APrev); Dec(ANext); end; end; StrLCopy(APos, AReplacement, AReplacementLength); end; Result := AWord; end; procedure MakeAllSmall(P: PAnsiChar; ACodePage: DWORD; ALangID: LCID); var L: Integer; W: array[0..512] of WideChar; begin if P = nil then Exit; L := StrLen(P); if (L = 0) or (L > 512 - 1) then Exit; MultiByteToWideChar(ACodePage, MB_PRECOMPOSED, P, L, PWideChar(@W), 512); LCMapStringW(ALangID, LCMAP_LOWERCASE, @W, L, @W, 512); WideCharToMultiByte(ACodePage, 0, @W, L, P, L, nil, nil); end; procedure MakeCapitalized(P: PAnsiChar; ACodePage: DWORD; ALangID: LCID); var L: Integer; W: WideChar; begin if P = nil then Exit; L := StrLen(P); if L < 1 then Exit; MultiByteToWideChar(ACodePage, MB_PRECOMPOSED, P, 1, @W, 1); LCMapStringW(ALangID, LCMAP_UPPERCASE, @W, 1, @W, 1); WideCharToMultiByte(ACodePage, 0, @W, 1, P, 1, nil, nil); end; function GetLanguageID(ALanguage: PAnsiChar): Integer; begin Result := LanguageNone; if StrComp(ALanguage, 'az') = 0 then Result := LanguageAz; if StrComp(ALanguage, 'hu') = 0 then Result := LanguageHu; if StrComp(ALanguage, 'tr') = 0 then Result := LanguageTr; end; function GetCapitalizationType(P: PAnsiChar; ALength: Integer): TdxCapitalizationType; var ACharTypes: PWordArray; I, AlphaCount, AUpperCount: Integer; begin Result := ctNoCapital; if ALength = 0 then Exit; GetMem(ACharTypes, ALength * SizeOf(Word)); if dxGetStringTypeA(LOCALE_USER_DEFAULT, CT_CTYPE1, P, ALength, ACharTypes^) then begin AlphaCount := 0; AUpperCount := 0; for I := 1 to ALength do begin if ACharTypes[I - 1] and C1_ALPHA > 0 then begin Inc(AlphaCount); if ACharTypes[I - 1] and C1_UPPER > 0 then begin Inc(AUpperCount); if AlphaCount = 1 then Result := ctFirstCapital; end; end; end; if AlphaCount > 0 then begin if AlphaCount = AUpperCount then Result := ctAllCapital else if AUpperCount = 0 then Result := ctNoCapital else if Result = ctFirstCapital then begin if AUpperCount > 1 then Result := ctFirstMixedCapital; end else Result := ctMixedCapital; end; end; FreeMem(ACharTypes, ALength * SizeOf(Word)); end; procedure RemoveIgnoredChars(AWord, AIgnoreChars: PAnsiChar); var AWordCursor: PAnsiChar; begin AWordCursor := AWord; while AWordCursor^ <> #0 do begin if StrScan(AIgnoreChars, AWordCursor^) = nil then begin AWord^ := AWordCursor^; Inc(AWord); end; Inc(AWordCursor); end; AWord^ := #0; end; function GetSecondPartOfString(ASource: PAnsiChar; out ADest: PAnsiChar): Boolean; var ALineCursor, APiece: PAnsiChar; I, APartCount: Integer; begin Result := False; ALineCursor := ASource; I := 0; APartCount := 0; if ADest <> nil then Exit; APiece := StrSeparate(@ALineCursor, #0); while APiece <> nil do begin if APiece^ <> #0 then begin case I of 0: Inc(APartCount); 1: begin ADest := StrNew(APiece); if ADest = nil then Exit; Inc(APartCount); end; end; Inc(I); end; APiece := StrSeparate(@ALineCursor, #0); end; Result := APartCount = 2; end; function ParseArray(ALine: PAnsiChar; out ADest: PAnsiChar; utf8: Boolean): Boolean; begin Result := False; if not GetSecondPartOfString(ALine, ADest) then Exit; if utf8 then begin //TODO: end; Result := True; end; end.