Componentes.Terceros.DevExp.../official/x.42/ExpressSpellChecker/Sources/dxSpellCheckerUtils.pas
2009-02-27 12:02:10 +00:00

728 lines
20 KiB
ObjectPascal

{********************************************************************}
{ }
{ Developer Express Visual Component Library }
{ ExpressSpellChecker }
{ }
{ Copyright (c) 1998-2009 Developer Express Inc. }
{ ALL RIGHTS RESERVED }
{ }
{ The entire contents of this file is protected by U.S. and }
{ International Copyright Laws. Unauthorized reproduction, }
{ reverse-engineering, and distribution of all or any portion of }
{ the code contained in this file is strictly prohibited and may }
{ result in severe civil and criminal penalties and will be }
{ prosecuted to the maximum extent possible under the law. }
{ }
{ RESTRICTIONS }
{ }
{ THIS SOURCE CODE AND ALL RESULTING INTERMEDIATE FILES }
{ (DCU, OBJ, DLL, ETC.) ARE CONFIDENTIAL AND PROPRIETARY TRADE }
{ SECRETS OF DEVELOPER EXPRESS INC. THE REGISTERED DEVELOPER IS }
{ LICENSED TO DISTRIBUTE THE EXPRESSSPELLCHECKER AND ALL }
{ ACCOMPANYING VCL CONTROLS AS PART OF AN EXECUTABLE PROGRAM ONLY. }
{ }
{ THE SOURCE CODE CONTAINED WITHIN THIS FILE AND ALL RELATED }
{ FILES OR ANY PORTION OF ITS CONTENTS SHALL AT NO TIME BE }
{ COPIED, TRANSFERRED, SOLD, DISTRIBUTED, OR OTHERWISE MADE }
{ AVAILABLE TO OTHER INDIVIDUALS WITHOUT EXPRESS WRITTEN CONSENT }
{ AND PERMISSION FROM DEVELOPER EXPRESS INC. }
{ }
{ CONSULT THE END USER LICENSE AGREEMENT FOR INFORMATION ON }
{ ADDITIONAL RESTRICTIONS. }
{ }
{********************************************************************}
unit dxSpellCheckerUtils;
{$I cxVer.inc}
interface
uses
Classes, SysUtils, cxClasses, dxCore;
type
TdxCapitalizationType = (ctNone, ctCapitalized, ctUpper, ctLower, ctMixed);
TdxSpellCheckerStrings = class(TList)
private
FCodePage: Cardinal;
function GetItem(Index: Integer): PWideChar; {$IFDEF DELPHI9} inline; {$ENDIF}
function GetText: WideString;
procedure SetText(const AValue: WideString);
protected
function AddRaw(AData: Pointer; ALength: Integer): PWideChar;
function ItemLength(P: PWideChar): Integer;
public
constructor Create(ACodePage: Cardinal = 0);
function Add(const S: WideString): PWideChar;
procedure Clear; override;
procedure LoadFromFile(const AFileName: TFileName);
procedure LoadFromStream(AStream: TStream);
procedure SaveToFile(const AFileName: TFileName);
procedure SaveToStream(AStream: TStream);
property CodePage: Cardinal read FCodePage write FCodePage;
property Items[Index: Integer]: PWideChar read GetItem; default;
property Text: WideString read GetText write SetText;
end;
{ TdxCodePage }
TdxSpellCheckerCodePage = class
private
FCode: Cardinal;
function GetName: WideString;
public
constructor Create(ACode: Cardinal);
property Code: Cardinal read FCode;
property Name: WideString read GetName;
end;
{ TdxSpellCheckerCodePages }
TdxSpellCheckerCodePages = class(TcxObjectList)
private
function CallbackProc(ACodePage: PWideChar): Cardinal; stdcall;
function GetCode(AIndex: Integer): Cardinal;
function GetCodeByName(const AName: WideString): Cardinal;
function GetItem(AIndex: Integer): TdxSpellCheckerCodePage;
function GetName(AIndex: Integer): WideString;
function GetNameByCode(ACode: Cardinal): WideString;
public
constructor Create(AOnlyInstalled: Boolean);
procedure PopulateCodePages(AList: TStringList);
property Code[AIndex: Integer]: Cardinal read GetCode;
property CodeByName[const AName: WideString]: Cardinal read GetCodeByName;
property Items[AIndex: Integer]: TdxSpellCheckerCodePage read GetItem; default;
property Name[AIndex: Integer]: WideString read GetName;
property NameByCode[ACode: Cardinal]: WideString read GetNameByCode;
end;
function CreateAlphabetFromAnsiString(ACodePage: Cardinal;
const AnsiAlphabet: AnsiString): WideString;
function CreateDefaultAlphabet(ACodePage: Cardinal): WideString;
function GetWideCharCount(AChar: WideChar; const S: WideString): Integer;
function GetWordCapitalizationType(const S: WideString): TdxCapitalizationType; overload;
function GetWordCapitalizationType(P: PWideChar; ALength: Integer): TdxCapitalizationType; overload;
function IsUrl(const S: WideString): Boolean;
function LanguageToCodePage(ALanguage: Cardinal): Integer;
function WideCapitalizeCase(const S: WideString): WideString;
function WideIsNumeric(Ch: WideChar): Boolean;
function WideIsAlpha(Ch: WideChar): Boolean;
function WideIsAlphaNumeric(Ch: WideChar): Boolean;
function WideIsSpace(Ch: WideChar): Boolean;
function WideIsUpCase(Ch: WideChar): Boolean;
function WideIsLoCase(Ch: WideChar): Boolean;
function WidePatternCase(const APattern, S: WideString): WideString;
function WidePos(const ASubStr, S: WideString): Integer;
function WideStringContainsAlpha(const S: WideString): Boolean;
function WideCharInCharset(Ch: WideChar; const AChars: TdxAnsiCharSet): Boolean;
function WideCharPos(Ch: WideChar; const S: WideString): Integer;
function WideStartsWith(const ASubStr, S: WideString): Boolean;
function WideEndsWith(const ASubStr, S: WideString): Boolean;
function StreamToAnsiString(AStream: TStream; APos: Integer; ASize: Integer = 0): AnsiString;
function StreamToWideString(AStream: TStream; ACodePage: Cardinal;
APos: Integer; ASize: Integer = 0): WideString;
function dxGetCodePageName(ACodePage: Cardinal): WideString;
procedure dxPopulateCodePages(AList: TStringList; AOnlyInstalled: Boolean = True);
implementation
uses
Windows;
const
DefaultAnsiAlphabet: AnsiString =
'&''-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' +
#$8A#$8C#$8D#$8E#$8F#$92#$9A#$9C#$9D#$9E#$9F#$A1#$A2#$A3#$A5#$A6#$A8 +
#$A9#$AB#$AC#$AE#$AF#$B1#$B2#$B3#$B6#$B8#$B9#$BB#$BC#$BE#$BF#$C0#$C1 +
#$C2#$C3#$C4#$C5#$C6#$C7#$C8#$C9#$CA#$CB#$CC#$CD#$CE#$CF#$D0#$D1#$D2 +
#$D3#$D4#$D5#$D6#$D7#$D8#$D9#$DA#$DB#$DC#$DD#$DE#$DF#$E0#$E1#$E2#$E3 +
#$E4#$E5#$E6#$E7#$E8#$E9#$EA#$EB#$EC#$ED#$EE#$EF#$F0#$F1#$F2#$F3#$F4 +
#$F5#$F6#$F7#$F8#$F9#$FA#$FB#$FC#$FD#$FE#$FF;
var
dxCodePages: TStringList;
type
TcxCPInfoEx = record
MaxCharSize: UINT;
DefaultChar: array [0..MAX_DEFAULTCHAR - 1] of BYTE;
LeadByte: array [0..MAX_LEADBYTES - 1] of BYTE;
UnicodeDefaultChar: WCHAR;
CodePage: UINT;
CodePageName: array [0..MAX_PATH - 1] of WCHAR;
end;
PcxCPInfoEx = ^TcxCPInfoEx;
function GetCPInfoEx(CodePage: Cardinal; dwFlags: DWORD; var AInfoEx: TcxCPInfoEx): BOOL; stdcall;
external 'kernel32.dll' name 'GetCPInfoExW';
function dxCodePagesSort(AItem1, AItem2: TdxSpellCheckerCodePage): Integer;
begin
Result := AItem1.Code - AItem2.Code;
end;
function CreateAlphabetFromAnsiString(ACodePage: Cardinal;
const AnsiAlphabet: AnsiString): WideString;
var
S: WideString;
I: Integer;
begin
S := WideUpperCase(dxAnsiStringToWideString(AnsiAlphabet, ACodePage));
Result := '';
for I := 1 to Length(S) do
if (WideCharPos(S[I], Result) = 0) and WideIsAlpha(S[I]) then
Result := Result + S[I];
end;
function CreateDefaultAlphabet(ACodePage: Cardinal): WideString;
begin
Result := CreateAlphabetFromAnsiString(ACodePage, DefaultAnsiAlphabet);
end;
function GetWideCharCount(AChar: WideChar; const S: WideString): Integer;
var
I: Integer;
begin
Result := 0;
for I := 1 to Length(S) do
if AChar = S[I] then
Inc(Result);
end;
function GetWordCapitalizationType(P: PWideChar; ALength: Integer): TdxCapitalizationType;
var
ACharTypes: PWordArray;
I, AlphaCount, AUpperCount: Integer;
begin
Result := ctNone;
if ALength = 0 then Exit;
GetMem(ACharTypes, ALength * SizeOf(Word));
if dxGetStringTypeW(CT_CTYPE1, P, ALength, ACharTypes^) then
begin
AlphaCount := 0;
AUpperCount := 0;
for I := 1 to ALength do
begin
if ACharTypes[I - 1] and C1_ALPHA > 0 then
begin
Inc(AlphaCount);
if ACharTypes[I - 1] and C1_UPPER > 0 then
begin
Inc(AUpperCount);
if AlphaCount = 1 then
Result := ctCapitalized;
end;
end;
end;
if AlphaCount > 0 then
begin
if AlphaCount = AUpperCount then
Result := ctUpper
else if AUpperCount = 0 then
Result := ctLower
else if not ((AUpperCount = 1) and (Result = ctCapitalized)) then
Result := ctMixed;
end;
end;
FreeMem(ACharTypes, ALength * SizeOf(Word));
end;
function GetWordCapitalizationType(const S: WideString): TdxCapitalizationType;
begin
Result := GetWordCapitalizationType(PWideChar(S), Length(S));
end;
function IsUrl(const S: WideString): Boolean;
const
Urls: array[0..8] of WideString = ('http://', 'https://', 'ftp://', 'www.', 'file://', '\\', 'gopher://', 'nntp://', 'news://');
var
ALower: WideString;
I: Integer;
begin
Result := Length(S) > 2;
if Result then
begin
ALower := WideLowerCase(S);
for I := Low(Urls) to High(Urls) do
if WideStartsWith(Urls[I], ALower) then
Exit;
Result := False;
end;
end;
function LanguageToCodePage(ALanguage: Cardinal): Integer;
var
ABuffer: array[0..6] of Char;
begin
GetLocaleInfo(ALanguage, LOCALE_IDEFAULTANSICODEPAGE, ABuffer, 6);
Result:= StrToIntDef(ABuffer, GetACP);
end;
function WideCapitalizeCase(const S: WideString): WideString;
var
AUpperCase: WideString;
begin
Result := '';
if Length(S) > 0 then
begin
AUpperCase := WideUpperCase(S);
Result := WideLowerCase(S);
Result[1] := AUpperCase[1];
end;
end;
function WideIsNumeric(Ch: WideChar): Boolean;
begin
Result := dxGetWideCharCType1(Ch) and C1_DIGIT > 0;
end;
function WideIsAlpha(Ch: WideChar): Boolean;
begin
Result := dxGetWideCharCType1(Ch) and C1_ALPHA > 0;
end;
function WideIsAlphaNumeric(Ch: WideChar): Boolean;
begin
Result := dxGetWideCharCType1(Ch) and (C1_ALPHA or C1_DIGIT) > 0;
end;
function WideIsSpace(Ch: WideChar): Boolean;
begin
Result := dxGetWideCharCType1(Ch) and C1_SPACE > 0;
end;
function WideIsUpCase(Ch: WideChar): Boolean;
begin
Result := dxGetWideCharCType1(Ch) and C1_UPPER > 0;
end;
function WideIsLoCase(Ch: WideChar): Boolean;
begin
Result := dxGetWideCharCType1(Ch) and C1_LOWER > 0;
end;
function WidePatternCase(const APattern, S: WideString): WideString;
begin
case GetWordCapitalizationType(APattern) of
ctCapitalized:
Result := WideCapitalizeCase(S);
ctUpper:
Result := WideUpperCase(S);
else
Result := WideLowerCase(S);
end;
end;
function WidePos(const ASubStr, S: WideString): Integer;
var
I: Integer;
APos: Integer;
ALenS, ALenSubStr: Integer;
begin
Result := 0;
ALenS := Length(S);
ALenSubStr := Length(ASubStr);
if (ALenSubStr = 0) or (ALenS = 0) or (ALenS < ALenSubStr) then
Exit;
APos := 1;
for I := 1 to ALenS do
begin
if S[I] = ASubStr[APos] then
Inc(APos)
else
APos := 1;
if APos = ALenSubStr + 1 then
begin
Result := I - APos + 2;
Break;
end;
end;
end;
function WideStringContainsAlpha(const S: WideString): Boolean;
var
I: Integer;
begin
Result := False;
for I := 1 to Length(S) do
if WideIsAlpha(S[I]) then
begin
Result := True;
Break;
end;
end;
function WideCharInCharset(Ch: WideChar; const AChars: TdxAnsiCharSet): Boolean;
begin
Result := (Ch <= #$FF) and (AnsiChar(Ch) in AChars);
end;
function WideCharPos(Ch: WideChar; const S: WideString): Integer;
var
I: Integer;
begin
for I := 1 to Length(S) do
if S[I] = Ch then
begin
Result := I;
Exit;
end;
Result := 0;
end;
function WideStartsWith(const ASubStr, S: WideString): Boolean;
var
I, ALen1, ALen2: Integer;
begin
ALen1 := Length(ASubStr);
ALen2 := Length(S);
Result := (ALen1 > 0) and (ALen1 <= ALen2);
if Result then
begin
for I := 1 to ALen1 do
if ASubStr[I] <> S[I] then
begin
Result := False;
Exit;
end;
end;
end;
function WideEndsWith(const ASubStr, S: WideString): Boolean;
var
I, ALen1, ALen2: Integer;
begin
ALen1 := Length(ASubStr);
ALen2 := Length(S);
Result := (ALen1 > 0) and (ALen1 <= ALen2);
if Result then
begin
for I := 1 to ALen1 do
if ASubStr[I] <> S[(ALen2 - ALen1) + I] then
begin
Result := False;
Exit;
end;
end;
end;
function StreamToAnsiString(AStream: TStream; APos: Integer; ASize: Integer = 0): AnsiString;
begin
Result := '';
AStream.Position := APos;
if ASize = 0 then
ASize := AStream.Size - APos;
if ASize > 0 then
begin
SetLength(Result, ASize);
AStream.Read(PAnsiChar(Result)^, ASize);
end;
end;
function StreamToWideString(AStream: TStream; ACodePage: Cardinal;
APos: Integer; ASize: Integer = 0): WideString;
begin
Result := dxAnsiStringToWideString(StreamToAnsiString(AStream, APos, ASize), ACodePage);
end;
{ TdxSpellCheckerStrings }
constructor TdxSpellCheckerStrings.Create(ACodePage: Cardinal);
begin
inherited Create;
FCodePage := ACodePage;
Capacity := 256 * 1024;
end;
function TdxSpellCheckerStrings.Add(
const S: WideString): PWideChar;
begin
if Length(S) = 0 then
Result := nil
else
Result := AddRaw(@S[1], Length(S));
end;
function TdxSpellCheckerStrings.AddRaw(AData: Pointer;
ALength: Integer): PWideChar;
var
ASize: Integer;
begin
ASize := (ALength + 1) * Sizeof(WideChar);
Inc(ASize, SizeOf(Cardinal) * 2);
Result := AllocMem(ASize);
Cardinal(Pointer(Result)^) := ASize;
Inc(Integer(Result), SizeOf(Cardinal));
Cardinal(Pointer(Result)^) := ALength;
Inc(Integer(Result), SizeOf(Cardinal));
System.Move(AData^, Pointer(Result)^, ALength * Sizeof(WideChar));
inherited Add(Pointer(Result));
end;
procedure TdxSpellCheckerStrings.Clear;
var
ALine: PWideChar;
I: Integer;
begin
for I := 0 to Count - 1 do
begin
ALine := Items[I];
if ALine <> nil then
begin
Dec(Integer(ALine), SizeOf(Cardinal) * 2);
FreeMem(ALine, Cardinal(Pointer(ALine)^));
end;
end;
inherited Clear;
end;
function TdxSpellCheckerStrings.ItemLength(P: PWideChar): Integer;
begin
if P = nil then
Result := 0
else
begin
Dec(Integer(P), SizeOf(Cardinal));
Result := Cardinal(Pointer(P)^);
end;
end;
procedure TdxSpellCheckerStrings.LoadFromFile(const AFileName: TFileName);
var
AStream: TStream;
begin
AStream := TFileStream.Create(AFileName, fmOpenRead or fmShareDenyWrite);
try
LoadFromStream(AStream);
finally
AStream.Free;
end;
end;
procedure TdxSpellCheckerStrings.LoadFromStream(AStream: TStream);
begin
Text := StreamToWideString(AStream, CodePage, AStream.Position);
end;
procedure TdxSpellCheckerStrings.SaveToFile(const AFileName: TFileName);
var
AStream: TFileStream;
begin
AStream := TFileStream.Create(AFileName, fmCreate);
try
SaveToStream(AStream);
finally
AStream.Free;
end;
end;
procedure TdxSpellCheckerStrings.SaveToStream(AStream: TStream);
var
S: AnsiString;
begin
S := dxWideStringToAnsiString(Text, CodePage);
if Length(S) > 0 then
AStream.Write(S[1], Length(S));
end;
function TdxSpellCheckerStrings.GetItem(Index: Integer): PWideChar;
begin
Result := PWideChar(inherited Items[Index]);
end;
function TdxSpellCheckerStrings.GetText: WideString;
var
I, ASize: Integer;
ALine: PWideChar;
begin
ASize := 0;
for I := 0 to Count - 1 do
Inc(ASize, ItemLength(Items[I]));
Inc(ASize, Count * 2);
SetLength(Result, ASize);
ASize := 1;
for I := 0 to Count - 1 do
begin
ALine := Items[I];
System.Move(ALine^, Result[ASize], ItemLength(ALine) * SizeOf(WideChar));
Inc(ASize, ItemLength(ALine));
Result[ASize] := #13;
Result[ASize + 1] := #10;
Inc(ASize, 2);
end;
end;
procedure TdxSpellCheckerStrings.SetText(const AValue: WideString);
var
P, ALine: PWideChar;
begin
Clear;
if Length(AValue) = 0 then Exit;
P := Pointer(AValue);
while P^ <> #0 do
begin
ALine := P;
while (P^ <> #0) and (P^ <> #10) and (P^ <> #13) do Inc(P);
AddRaw(ALine, P - ALine);
if P^ = #13 then Inc(P);
if P^ = #10 then Inc(P);
end;
end;
{ TdxSpellCheckerCodePage }
constructor TdxSpellCheckerCodePage.Create(ACode: Cardinal);
begin
FCode := ACode;
end;
function TdxSpellCheckerCodePage.GetName: WideString;
begin
Result := dxGetCodePageName(Code);
end;
{ TdxSpellCheckerCodePages }
constructor TdxSpellCheckerCodePages.Create(AOnlyInstalled: Boolean);
type
TCallbackThunk = packed record
POPEDX: Byte;
MOVEAX: Byte;
SelfPtr: Pointer;
PUSHEAX: Byte;
PUSHEDX: Byte;
JMP: Byte;
JmpOffset: Integer;
end;
var
Callback: TCallbackThunk;
AFlags: Cardinal;
begin
inherited Create;
Clear;
if AOnlyInstalled then
AFlags := CP_INSTALLED
else
AFlags := CP_SUPPORTED;
Callback.POPEDX := $5A;
Callback.MOVEAX := $B8;
Callback.SelfPtr := Self;
Callback.PUSHEAX := $50;
Callback.PUSHEDX := $52;
Callback.JMP := $E9;
Callback.JmpOffset := Integer(@TdxSpellCheckerCodePages.CallbackProc) - Integer(@Callback.JMP) - 5;
EnumSystemCodePagesW(TFNCodepageEnumProc(@Callback), AFlags);
Sort(@dxCodePagesSort);
end;
procedure TdxSpellCheckerCodePages.PopulateCodePages(AList: TStringList);
var
I: Integer;
begin
AList.Clear;
for I := 0 to Count - 1 do
AList.Add(Items[I].Name);
end;
function TdxSpellCheckerCodePages.CallbackProc(ACodePage: PWideChar): Cardinal; stdcall;
begin
if Length(ACodePage) > 0 then
Add(TdxSpellCheckerCodePage.Create(StrToInt(ACodePage)));
Result := 1;
end;
function TdxSpellCheckerCodePages.GetCode(AIndex: Integer): Cardinal;
begin
Result := Items[AIndex].Code;
end;
function TdxSpellCheckerCodePages.GetCodeByName(const AName: WideString): Cardinal;
var
I: Integer;
begin
Result := 0;
for I := 0 to Count - 1 do
if WideCompareStr(AName, Items[I].Name) = 0 then
begin
Result := Items[I].Code;
Break;
end;
end;
function TdxSpellCheckerCodePages.GetItem(AIndex: Integer): TdxSpellCheckerCodePage;
begin
Result := TdxSpellCheckerCodePage(inherited Items[AIndex]);
end;
function TdxSpellCheckerCodePages.GetName(AIndex: Integer): WideString;
begin
Result := Items[AIndex].Name;
end;
function TdxSpellCheckerCodePages.GetNameByCode(ACode: Cardinal): WideString;
var
I: Integer;
begin
Result := '';
for I := 0 to Count - 1 do
if Items[I].Code = ACode then
begin
Result := Items[I].Name;
Break;
end;
end;
function dxGetCodePageName(ACodePage: Cardinal): WideString;
var
AInfo: TcxCPInfoEx;
begin
FillChar(AInfo, SizeOf(AInfo), 0);
GetCPInfoEx(ACodePage, 0, AInfo);
Result := AInfo.CodePageName;
if Length(Result) = 0 then
Result := IntToStr(ACodePage);
end;
procedure dxPopulateCodePages(AList: TStringList; AOnlyInstalled: Boolean = True);
function CallbackProc(ACodePage: PWideChar): Cardinal; stdcall;
var
AName: WideString;
begin
AName := dxGetCodePageName(StrToInt(ACodePage));
if Length(AName) > 0 then
dxCodePages.Add(AName)
else
dxCodePages.Add(ACodePage);
Result := 1;
end;
var
AFlags: Cardinal;
begin
dxCodePages.Clear;
if AOnlyInstalled then
AFlags := CP_INSTALLED
else
AFlags := CP_SUPPORTED;
EnumSystemCodePagesW(@CallbackProc, AFlags);
dxCodePages.Sort;
AList.Clear;
AList.AddStrings(dxCodePages);
end;
initialization
dxCodePages := TStringList.Create;
finalization
FreeAndNil(dxCodePages);
end.