Componentes.Terceros.DevExp.../official/x.48/ExpressSpellChecker 2/Sources/dxHunspellUtils.pas
2010-01-18 18:33:24 +00:00

482 lines
13 KiB
ObjectPascal

{********************************************************************}
{ }
{ Developer Express Visual Component Library }
{ ExpressSpellChecker }
{ }
{ Copyright (c) 1998-2009 Developer Express Inc. }
{ ALL RIGHTS RESERVED }
{ }
{ The entire contents of this file is protected by U.S. and }
{ International Copyright Laws. Unauthorized reproduction, }
{ reverse-engineering, and distribution of all or any portion of }
{ the code contained in this file is strictly prohibited and may }
{ result in severe civil and criminal penalties and will be }
{ prosecuted to the maximum extent possible under the law. }
{ }
{ RESTRICTIONS }
{ }
{ THIS SOURCE CODE AND ALL RESULTING INTERMEDIATE FILES }
{ (DCU, OBJ, DLL, ETC.) ARE CONFIDENTIAL AND PROPRIETARY TRADE }
{ SECRETS OF DEVELOPER EXPRESS INC. THE REGISTERED DEVELOPER IS }
{ LICENSED TO DISTRIBUTE THE EXPRESSSPELLCHECKER AND ALL }
{ ACCOMPANYING VCL CONTROLS AS PART OF AN EXECUTABLE PROGRAM ONLY. }
{ }
{ THE SOURCE CODE CONTAINED WITHIN THIS FILE AND ALL RELATED }
{ FILES OR ANY PORTION OF ITS CONTENTS SHALL AT NO TIME BE }
{ COPIED, TRANSFERRED, SOLD, DISTRIBUTED, OR OTHERWISE MADE }
{ AVAILABLE TO OTHER INDIVIDUALS WITHOUT EXPRESS WRITTEN CONSENT }
{ AND PERMISSION FROM DEVELOPER EXPRESS INC. }
{ }
{ CONSULT THE END USER LICENSE AGREEMENT FOR INFORMATION ON }
{ ADDITIONAL RESTRICTIONS. }
{ }
{********************************************************************}
unit dxHunspellUtils;
{$I cxVer.inc}
interface
uses
SysUtils, Windows, Forms, dxHunspellWords, dxHunspellTypes;
const
// default morphological fields
MORPH_STEM = 'st:';
MORPH_ALLOMORPH = 'al:';
MORPH_POS = 'po:';
MORPH_DERI_PFX = 'dp:';
MORPH_INFL_PFX = 'ip:';
MORPH_TERM_PFX = 'tp:';
MORPH_DERI_SFX = 'ds:';
MORPH_INFL_SFX = 'is:';
MORPH_TERM_SFX = 'ts:';
MORPH_SURF_PFX = 'sp:';
MORPH_FREQ = 'fr:';
MORPH_PHON = 'ph:';
MORPH_HYPH = 'hy:';
MORPH_PART = 'pa:';
MORPH_FLAG = 'fl:';
MORPH_HENTRY = '_H:';
MORPH_TAG_LEN = Length(MORPH_STEM);
MSEP_FLD = ' ';
MSEP_REC = #10;
// default flags
DEFAULTFLAGS = 65510;
ForbiddenWordFlag = 65510;
ONLYUPCASEFLAG = 65511;
MaxWordLength = 100;
MAXWORDUTF8LEN = 256;
NullFlag = $00;
function IsUpCase(Ch: AnsiChar): Boolean;
function IsSpace(Value: AnsiChar): Boolean;
procedure SortFlags(var AFlags: array of Word; ABegin, AEnd: Integer);
function ContainsFlag(AFlags: PdxAffixFlagsData; ALength: Integer; ARequiredFlag: Word): Boolean;
function GetCapitalizationType(P: PAnsiChar; ALength: Integer): TdxCapitalizationType;
procedure RemoveCRLF(S: PAnsiChar);
procedure RemoveIgnoredChars(AWord, AIgnoreChars: PAnsiChar);
function StrReplace(AWord: PAnsiChar; APattern: PAnsiChar; AReplacement: PAnsiChar): PAnsiChar;
function GetSecondPartOfString(ASource: PAnsiChar; out ADest: PAnsiChar): Boolean;
function StrCopyReverse(S: PAnsiChar): PAnsiChar;
function StrReverse(S: PAnsiChar): PAnsiChar;
function StrInt(S: PAnsiChar): Integer;
function GetElementPointer(const Source: PPAnsiChar; Index: Integer): PPAnsiChar; {$IFDEF DELPHI9}inline;{$ENDIF}
function StrSeparate(ALine: PPAnsiChar; const ADelimiter: AnsiChar): PAnsiChar;
procedure MakeAllSmall(P: PAnsiChar; ACodePage: DWORD; ALangID: LCID);
procedure MakeCapitalized(P: PAnsiChar; ACodePage: DWORD; ALangID: LCID);
function ParseArray(ALine: PAnsiChar; out ADest: PAnsiChar; utf8: Boolean): Boolean;
function GetLanguageID(ALanguage: PAnsiChar): Integer;
procedure RemoveMark(ALineCursor: PAnsiChar);
procedure dxHunspellError(const Message: string);
var
dxHunspellWarningFlag: Boolean;
dxHunspellLastWarningMessage: string;
implementation
uses
StrUtils, Dialogs, dxCore;
type
PPPAnsiChar = ^PPAnsiChar;
var
HasWarning: Boolean;
procedure dxHunspellError(const Message: string);
begin
dxHunspellLastWarningMessage := Message;
ShowMessage(Message);
dxHunspellWarningFlag := True;
HasWarning := True;
end;
function IsUpCase(Ch: AnsiChar): Boolean;
begin
Result := dxGetAnsiCharCType1(Ch) and C1_UPPER > 0;
end;
procedure RemoveMark(ALineCursor: PAnsiChar);
begin
if StrLComp(ALineCursor, #$EF#$BB#$BF, 3) = 0 then
Move((ALineCursor + 3)^, ALineCursor^, StrLen(ALineCursor + 3) + 1);
end;
function IsSpace(Value: AnsiChar): Boolean;
var
B: Byte;
begin
B := Ord(Value);
Result := ($09 <= B) and (B <= $0D) or (B = $20);
end;
function GetElementPointer(const Source: PPAnsiChar; Index: Integer): PPAnsiChar;
overload;
begin
Result := Source;
Inc(Result, Index);
end;
procedure SortFlags(var AFlags: array of Word; ABegin, AEnd: Integer);
var
ATemp, APivot: Word;
ALeft, ARight: Integer;
begin
if AEnd > ABegin then
begin
APivot := AFlags[ABegin];
ALeft := ABegin + 1;
ARight := AEnd;
while ALeft < ARight do
begin
if AFlags[ALeft] <= APivot then
Inc(ALeft)
else
begin
Dec(ARight);
ATemp := AFlags[ALeft];
AFlags[ALeft] := AFlags[ARight];
AFlags[ARight] := ATemp;
end
end;
Dec(ALeft);
ATemp := AFlags[ABegin];
AFlags[ABegin] := AFlags[ALeft];
AFlags[ALeft] := ATemp;
SortFlags(AFlags, ABegin, ALeft);
SortFlags(AFlags, ARight, AEnd);
end;
end;
function ContainsFlag(AFlags: PdxAffixFlagsData; ALength: Integer;
ARequiredFlag: Word): Boolean;
var
AMiddle, ALeft, ARight: Integer;
begin
Result := False;
ALeft := 0;
ARight := ALength - 1;
while ALeft <= ARight do
begin
AMiddle := (ALeft + ARight) div 2;
if AFlags^[AMiddle] = ARequiredFlag then
begin
Result := True;
Break;
end;
if ARequiredFlag < AFlags^[AMiddle] then
ARight := AMiddle - 1
else
ALeft := AMiddle + 1;
end;
end;
function StrInt(S: PAnsiChar): Integer;
begin
Result := 0;
if S = nil then Exit;
while (S^ <> #0) and (S^ in ['0' .. '9']) do
begin
Result := Result * 10 + Ord(S^) - Ord('0');
Inc(S);
end;
end;
function StrSeparate(ALine: PPAnsiChar; const ADelimiter: AnsiChar): PAnsiChar;
var
ALineStart, ADelimiterCursor: PAnsiChar;
begin
Result := nil;
ALineStart := ALine^;
if ALineStart^ <> #0 then
begin
if ADelimiter <> #0 then
ADelimiterCursor := StrScan(ALineStart, ADelimiter)
else
begin
ADelimiterCursor := ALineStart;
while (ADelimiterCursor^ <> #0) and (ADelimiterCursor^ <> ' ') and (ADelimiterCursor^ <> #9) do
Inc(ADelimiterCursor);
if ADelimiterCursor^ = #0 then
ADelimiterCursor := nil;
end;
if ADelimiterCursor <> nil then
begin
ALine^ := ADelimiterCursor + 1;
ADelimiterCursor^ := #0;
end
else
ALine^ := ALineStart + StrLen(ALineStart);
Result := ALineStart;
end;
end;
procedure RemoveCRLF(S: PAnsiChar);
var
L: Integer;
begin
L := StrLen(S);
if (L > 0) and ((S + L - 1)^ in [#13, #10]) then
(S + L - 1)^ := #0;
if (L > 1) and ((S + L - 2)^ = #13) then
(S + L - 2)^ := #0;
end;
function StrReverse(S: PAnsiChar): PAnsiChar;
var
C: AnsiChar;
P: PAnsiChar;
begin
Result := S;
P := S + StrLen(S) - 1;
while S < P do
begin
C := S^;
S^ := P^;
P^ := C;
Inc(S);
Dec(P);
end;
end;
function StrCopyReverse(S: PAnsiChar): PAnsiChar;
var
ALength: Integer;
ALeft, ARight: PAnsiChar;
begin
Result := nil;
if S <> nil then
begin
ALength := StrLen(S);
{$IFDEF DELPHI12}
Result := AnsiStrAlloc(ALength + 1);
{$ELSE}
Result := StrAlloc(ALength + 1);
{$ENDIF}
ALeft := Result;
ARight := S + ALength - 1;
while ARight >= S do
begin
ALeft^ := ARight^;
Inc(ALeft);
Dec(ARight);
end;
ALeft^ := #0;
end;
end;
function StrReplace(AWord: PAnsiChar; APattern: PAnsiChar; AReplacement: PAnsiChar): PAnsiChar;
var
APos, AEnd, ANext, APrev: PAnsiChar;
AReplacementLength, APatternLength: Integer;
begin
APos := StrPos(AWord, APattern);
if APos <> nil then
begin
AReplacementLength := StrLen(AReplacement);
APatternLength := StrLen(APattern);
if AReplacementLength < APatternLength then
begin
AEnd := AWord + StrLen(AWord);
ANext := APos + AReplacementLength;
APrev := APos + APatternLength;
while APrev < AEnd do
begin
ANext^ := APrev^;
Inc(APrev);
Inc(ANext);
end;
ANext^ := #0;
end
else if AReplacementLength > APatternLength then
begin
AEnd := APos + APatternLength;
ANext := AWord + StrLen(AWord) + AReplacementLength - APatternLength;
APrev := ANext - AReplacementLength + APatternLength;
while APrev >= AEnd do
begin
ANext^ := APrev^;
Dec(APrev);
Dec(ANext);
end;
end;
StrLCopy(APos, AReplacement, AReplacementLength);
end;
Result := AWord;
end;
procedure MakeAllSmall(P: PAnsiChar; ACodePage: DWORD; ALangID: LCID);
var
L: Integer;
W: array[0..512] of WideChar;
begin
if P = nil then Exit;
L := StrLen(P);
if (L = 0) or (L > 512 - 1) then Exit;
MultiByteToWideChar(ACodePage, MB_PRECOMPOSED, P, L, PWideChar(@W), 512);
LCMapStringW(ALangID, LCMAP_LOWERCASE, @W, L, @W, 512);
WideCharToMultiByte(ACodePage, 0, @W, L, P, L, nil, nil);
end;
procedure MakeCapitalized(P: PAnsiChar; ACodePage: DWORD; ALangID: LCID);
var
L: Integer;
W: WideChar;
begin
if P = nil then Exit;
L := StrLen(P);
if L < 1 then Exit;
MultiByteToWideChar(ACodePage, MB_PRECOMPOSED, P, 1, @W, 1);
LCMapStringW(ALangID, LCMAP_UPPERCASE, @W, 1, @W, 1);
WideCharToMultiByte(ACodePage, 0, @W, 1, P, 1, nil, nil);
end;
function GetLanguageID(ALanguage: PAnsiChar): Integer;
begin
Result := LanguageNone;
if StrComp(ALanguage, 'az') = 0 then
Result := LanguageAz;
if StrComp(ALanguage, 'hu') = 0 then
Result := LanguageHu;
if StrComp(ALanguage, 'tr') = 0 then
Result := LanguageTr;
end;
function GetCapitalizationType(P: PAnsiChar; ALength: Integer): TdxCapitalizationType;
var
ACharTypes: PWordArray;
I, AlphaCount, AUpperCount: Integer;
begin
Result := ctNoCapital;
if ALength = 0 then Exit;
GetMem(ACharTypes, ALength * SizeOf(Word));
if dxGetStringTypeA(LOCALE_USER_DEFAULT, CT_CTYPE1, P, ALength, ACharTypes^) then
begin
AlphaCount := 0;
AUpperCount := 0;
for I := 1 to ALength do
begin
if ACharTypes[I - 1] and C1_ALPHA > 0 then
begin
Inc(AlphaCount);
if ACharTypes[I - 1] and C1_UPPER > 0 then
begin
Inc(AUpperCount);
if AlphaCount = 1 then
Result := ctFirstCapital;
end;
end;
end;
if AlphaCount > 0 then
begin
if AlphaCount = AUpperCount then
Result := ctAllCapital
else if AUpperCount = 0 then
Result := ctNoCapital
else if Result = ctFirstCapital then
begin
if AUpperCount > 1 then
Result := ctFirstMixedCapital;
end
else
Result := ctMixedCapital;
end;
end;
FreeMem(ACharTypes, ALength * SizeOf(Word));
end;
procedure RemoveIgnoredChars(AWord, AIgnoreChars: PAnsiChar);
var
AWordCursor: PAnsiChar;
begin
AWordCursor := AWord;
while AWordCursor^ <> #0 do
begin
if StrScan(AIgnoreChars, AWordCursor^) = nil then
begin
AWord^ := AWordCursor^;
Inc(AWord);
end;
Inc(AWordCursor);
end;
AWord^ := #0;
end;
function GetSecondPartOfString(ASource: PAnsiChar; out ADest: PAnsiChar): Boolean;
var
ALineCursor, APiece: PAnsiChar;
I, APartCount: Integer;
begin
Result := False;
ALineCursor := ASource;
I := 0;
APartCount := 0;
if ADest <> nil then
Exit;
APiece := StrSeparate(@ALineCursor, #0);
while APiece <> nil do
begin
if APiece^ <> #0 then
begin
case I of
0:
Inc(APartCount);
1:
begin
ADest := StrNew(APiece);
if ADest = nil then
Exit;
Inc(APartCount);
end;
end;
Inc(I);
end;
APiece := StrSeparate(@ALineCursor, #0);
end;
Result := APartCount = 2;
end;
function ParseArray(ALine: PAnsiChar; out ADest: PAnsiChar; utf8: Boolean): Boolean;
begin
Result := False;
if not GetSecondPartOfString(ALine, ADest) then
Exit;
if utf8 then
begin
//TODO:
end;
Result := True;
end;
end.