Componentes.Terceros.jcl/official/1.100/source/common/JclPCRE.pas

659 lines
21 KiB
ObjectPascal

{**************************************************************************************************}
{ }
{ Project JEDI Code Library (JCL) }
{ }
{ The contents of this file are subject to the Mozilla Public License Version 1.1 (the "License"); }
{ you may not use this file except in compliance with the License. You may obtain a copy of the }
{ License at http://www.mozilla.org/MPL/ }
{ }
{ Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF }
{ ANY KIND, either express or implied. See the License for the specific language governing rights }
{ and limitations under the License. }
{ }
{ The Original Code is JclPCRE.pas. }
{ }
{ The Initial Developer of the Original Code is Peter Thornqvist. }
{ Portions created by Peter Thornqvist are Copyright (C) of Peter Thornqvist. All rights reserved. }
{ }
{ Contributor(s): }
{ Robert Rossmair (rrossmair) }
{ Mario R. Carro }
{ Florent Ouchet (outchy) }
{ }
{**************************************************************************************************}
{ }
{ Class wrapper for PCRE (PERL Compatible Regular Expression) }
{ }
{ Unit owner: Peter Th?nqvist }
{ Last modified: $Date: 2007-05-25 13:08:11 +0200 (ven., 25 mai 2007) $ }
{ }
{**************************************************************************************************}
unit JclPCRE;
{$I jcl.inc}
{$RANGECHECKS OFF}
interface
uses
pcre,
{$IFDEF UNITVERSIONING}
JclUnitVersioning,
{$ENDIF UNITVERSIONING}
{$IFDEF MSWINDOWS}
Windows,
{$ENDIF MSWINDOWS}
{$IFDEF HAS_UNIT_LIBC}
Libc,
{$ENDIF HAS_UNIT_LIBC}
Classes, SysUtils, JclBase;
const
JCL_PCRE_CALLOUT_NOERROR = 0;
JCL_PCRE_CALLOUT_FAILCONTINUE = 1;
JCL_PCRE_ERROR_CALLOUTERROR = -998;
JCL_PCRE_ERROR_STUDYFAILED = -999;
type
TJclAnsiRegEx = class;
EPCREError = class(EJclError)
private
FErrorCode: Integer;
public
constructor CreateRes(ResStringRec: PResStringRec; ErrorCode: Integer);
property ErrorCode: Integer read FErrorCode;
end;
TPCREIntArray = array [0 .. 0] of Integer;
PPCREIntArray = ^TPCREIntArray;
TJclAnsiRegExOption = (roIgnoreCase, roMultiLine, roDotAll, roExtended,
roAnchored, roDollarEndOnly, roExtra, roNotBOL, roNotEOL, roUnGreedy,
roNotEmpty, roUTF8, roNoAutoCapture, roNoUTF8Check, roAutoCallout,
roPartial, roDfaShortest, roDfaRestart, roDfaFirstLine, roDupNames,
roNewLineCR, roNewLineLF, roNewLineCRLF, roNewLineAny);
TJclAnsiRegExOptions = set of TJclAnsiRegExOption;
TJclAnsiCaptureRange = record
FirstPos: Integer;
LastPos: Integer;
end;
TJclAnsiRegExCallout = procedure (Sender: TJclAnsiRegEx;
Index, MatchStart, SubjectPos, LastCapture, PatternPos, NextItemLength: Integer;
var ErrorCode: Integer) of object;
TPCRECalloutIndex = 0 .. 255;
TJclAnsiRegEx = class(TObject)
private
FCode: PPCRE;
FExtra: PPCREExtra;
FOptions: TJclAnsiRegExOptions;
FPattern: AnsiString;
FDfaMode: Boolean;
FSubject: AnsiString;
FViewChanges: Boolean;
FChangedCaptures: TList;
FResultValues: array of String;
FErrorCode: Integer;
FErrorMessage: AnsiString;
FErrorOffset: Integer;
FVector: PPCREIntArray;
FVectorSize: Integer;
FCaptureCount: Integer;
FOnCallout: TJclAnsiRegExCallout;
function GetResult: AnsiString;
function GetCapture(Index: Integer): AnsiString;
procedure SetCapture(Index: Integer; const Value: AnsiString);
function GetCaptureRange(Index: Integer): TJclAnsiCaptureRange;
function GetNamedCapture(const Name: AnsiString): AnsiString;
procedure SetNamedCapture(const Name, Value: Ansistring);
function GetCaptureNameCount: Integer;
function GetCaptureName(Index: Integer): String;
function GetAPIOptions(RunTime: Boolean): Integer;
function CalloutHandler(var CalloutBlock: pcre_callout_block): Integer;
public
destructor Destroy; override;
property Options: TJclAnsiRegExOptions read FOptions write FOptions;
function Compile(const Pattern: AnsiString; Study: Boolean;
UserLocale: Boolean = False): Boolean;
property Pattern: AnsiString read FPattern;
property DfaMode: Boolean read FDfaMode write FDfaMode;
function Match(const Subject: AnsiString; StartOffset: Cardinal = 1): Boolean;
property Subject: AnsiString read FSubject;
property Result: AnsiString read GetResult;
property ViewChanges: Boolean read FViewChanges write FViewChanges;
property CaptureCount: Integer read FCaptureCount write FCaptureCount;
property Captures[Index: Integer]: AnsiString read GetCapture write SetCapture;
property CaptureRanges[Index: Integer]: TJclAnsiCaptureRange read GetCaptureRange;
property NamedCaptures[const Name: AnsiString]: AnsiString
read GetNamedCapture write SetNamedCapture;
property CaptureNameCount: Integer read GetCaptureNameCount;
property CaptureNames[Index: Integer]: AnsiString read GetCaptureName;
function IndexOfName(const Name: String): Integer;
function IsNameValid(const Name: String): Boolean;
property ErrorCode: Integer read FErrorCode;
property ErrorMessage: AnsiString read FErrorMessage;
property ErrorOffset: Integer read FErrorOffset;
property oncallout: TJclAnsiRegExCallout
read FOnCallout write FOnCallout;
end;
procedure InitializeLocaleSupport;
procedure TerminateLocaleSupport;
// Args is an array of pairs (CaptureIndex, Value) or (CaptureName, Value).
// For example: NewIp := StrReplaceRegEx(DirIP, '(\d+)\.(\d+)\.(\d+)\.(\d+)', [3, '128', 4, '254']);
function StrReplaceRegEx(const Subject, Pattern: AnsiString; Args: array of const): AnsiString;
{$IFDEF UNITVERSIONING}
const
UnitVersioning: TUnitVersionInfo = (
RCSfile: '$URL: https://jcl.svn.sourceforge.net:443/svnroot/jcl/tags/JCL-1.100-Build2646/jcl/source/common/JclPCRE.pas $';
Revision: '$Revision: 2015 $';
Date: '$Date: 2007-05-25 13:08:11 +0200 (ven., 25 mai 2007) $';
LogPath: 'JCL\source\common'
);
{$ENDIF UNITVERSIONING}
implementation
uses
SysConst,
JclResources;
var
GTables: PChar;
function JclPCREGetMem(Size: Integer): Pointer; {$IFDEF PCRE_EXPORT_CDECL} cdecl; {$ENDIF PCRE_EXPORT_CDECL}
begin
GetMem(Result, Size);
end;
procedure JclPCREFreeMem(P: Pointer); {$IFDEF PCRE_EXPORT_CDECL} cdecl; {$ENDIF PCRE_EXPORT_CDECL}
begin
FreeMem(P);
end;
function JclPCRECallout(var callout_block: pcre_callout_block): Integer; {$IFDEF PCRE_EXPORT_CDECL} cdecl; {$ENDIF PCRE_EXPORT_CDECL}
begin
Result := TJclAnsiRegEx(callout_block.callout_data).CalloutHandler(callout_block);
end;
function PCRECheck(Value: Integer): Boolean;
var
PErr: PResStringRec;
begin
Result := Value >= 0;
if Result then Exit;
case Value of
PCRE_ERROR_NOMATCH:
PErr := @RsErrNoMatch;
PCRE_ERROR_NULL:
PErr := @RsErrNull;
PCRE_ERROR_BADOPTION:
PErr := @RsErrBadOption;
PCRE_ERROR_BADMAGIC:
PErr := @RsErrBadMagic;
PCRE_ERROR_UNKNOWN_NODE:
PErr := @RsErrUnknownNode;
PCRE_ERROR_NOMEMORY:
PErr := @RsErrNoMemory;
PCRE_ERROR_NOSUBSTRING:
PErr := @RsErrNoSubString;
PCRE_ERROR_MATCHLIMIT:
PErr := @RsErrMatchLimit;
PCRE_ERROR_CALLOUT:
PErr := @RsErrCallout;
PCRE_ERROR_BADUTF8:
PErr := @RsErrBadUTF8;
PCRE_ERROR_BADUTF8_OFFSET:
PErr := @RsErrBadUTF8Offset;
PCRE_ERROR_PARTIAL:
PErr := @RsErrPartial;
PCRE_ERROR_BADPARTIAL:
PErr := @RsErrBadPartial;
PCRE_ERROR_INTERNAL:
PErr := @RsErrInternal;
PCRE_ERROR_BADCOUNT:
PErr := @RsErrBadCount;
PCRE_ERROR_DFA_UITEM:
PErr := @RsErrDfaUItem;
PCRE_ERROR_DFA_UCOND:
PErr := @RsErrDfaUCond;
PCRE_ERROR_DFA_UMLIMIT:
PErr := @RsErrDfaUMLimit;
PCRE_ERROR_DFA_WSSIZE:
PErr := @RsErrDfaWSSize;
PCRE_ERROR_DFA_RECURSE:
PErr := @RsErrDfaRecurse;
PCRE_ERROR_RECURSIONLIMIT:
PErr := @RsErrRecursionLimit;
JCL_PCRE_ERROR_STUDYFAILED:
PErr := @RsErrStudyFailed;
JCL_PCRE_ERROR_CALLOUTERROR:
PErr := @RsErrCalloutError;
else
PErr := @RsErrUnknownError;
end;
raise EPCREError.CreateRes(PErr, Value);
end;
//=== { TJclAnsiRegEx } ======================================================
destructor TJclAnsiRegEx.Destroy;
begin
if Assigned(FCode) then
CallPCREFree(FCode);
if Assigned(FExtra) then
CallPCREFree(FExtra);
if Assigned(FVector) then
FreeMem(FVector);
if Assigned(FChangedCaptures) then
FChangedCaptures.Free;
inherited Destroy;
end;
function TJclAnsiRegEx.Compile(const Pattern: AnsiString; Study: Boolean;
UserLocale: Boolean = False): Boolean;
var
ErrMsgPtr: PChar;
Tables: PChar;
begin
if UserLocale then
begin
InitializeLocaleSupport;
Tables := GTables;
end
else
Tables := nil;
FPattern := Pattern;
if FPattern = '' then
raise EPCREError.CreateRes(@RsErrNull, PCRE_ERROR_NULL);
if Assigned(FCode) then CallPCREFree(FCode);
FCode := pcre_compile2(PChar(FPattern), GetAPIOptions(False),
@FErrorCode, @ErrMsgPtr, @FErrorOffset, Tables);
Inc(FErrorOffset);
FErrorMessage := ErrMsgPtr;
Result := Assigned(FCode);
if Result then
begin
if Study then
begin
if Assigned(FExtra) then CallPCREFree(FExtra);
FExtra := pcre_study(FCode, 0, @ErrMsgPtr);
Result := Assigned(FExtra) or (not Assigned(ErrMsgPtr));
if not Result then
begin
FErrorCode := JCL_PCRE_ERROR_STUDYFAILED;
FErrorMessage := ErrMsgPtr;
end;
end;
if FDfaMode then
FVectorSize := FCaptureCount
else
begin
PCRECheck(pcre_fullinfo(FCode, FExtra, PCRE_INFO_CAPTURECOUNT, @FCaptureCount));
FVectorSize := (FCaptureCount + 1) * 3;
end;
ReAllocMem(FVector, FVectorSize * SizeOf(Integer));
end;
end;
function TJclAnsiRegEx.GetAPIOptions(RunTime: Boolean): Integer;
const
{ roIgnoreCase, roMultiLine, roDotAll, roExtended,
roAnchored, roDollarEndOnly, roExtra, roNotBOL, roNotEOL, roUnGreedy,
roNotEmpty, roUTF8, roNoAutoCapture, roNoUTF8Check, roAutoCallout,
roPartial, roDfaShortest, roDfaRestart, roDfaFirstLine, roDupNames,
roNewLineCR, roNewLineLF, roNewLineCRLF, roNewLineAny }
cDesignOptions: array [TJclAnsiRegExOption] of Integer =
(PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, PCRE_EXTENDED, PCRE_ANCHORED,
PCRE_DOLLAR_ENDONLY, PCRE_EXTRA, 0, 0, PCRE_UNGREEDY, 0, PCRE_UTF8,
PCRE_NO_AUTO_CAPTURE, PCRE_NO_UTF8_CHECK, PCRE_AUTO_CALLOUT, 0, 0, 0, 0,
PCRE_DUPNAMES, PCRE_NEWLINE_CR, PCRE_NEWLINE_LF, PCRE_NEWLINE_CRLF,
PCRE_NEWLINE_ANY);
cRunOptions: array [TJclAnsiRegExOption] of Integer =
(0, 0, 0, 0, 0, 0, 0, PCRE_NOTBOL, PCRE_NOTEOL, 0, PCRE_NOTEMPTY, 0, 0,
PCRE_NO_UTF8_CHECK, 0, PCRE_PARTIAL, 0, 0, 0, 0, PCRE_NEWLINE_CR,
PCRE_NEWLINE_LF, PCRE_NEWLINE_CRLF, PCRE_NEWLINE_ANY);
var
I: TJclAnsiRegExOption;
begin
Result := 0;
if RunTime then
begin
for I := Low(TJclAnsiRegExOption) to High(TJclAnsiRegExOption) do
if I in Options then
Result := Result or cRunOptions[I];
end
else
begin
for I := Low(TJclAnsiRegExOption) to High(TJclAnsiRegExOption) do
if I in Options then
Result := Result or cDesignOptions[I];
end;
end;
function TJclAnsiRegEx.GetResult: AnsiString;
var
Index, CaptureIndex, Pos: Integer;
Range: TJclAnsiCaptureRange;
begin
if Assigned(FChangedCaptures) and (FChangedCaptures.Count > 0) then
begin
Pos := 1;
Result := '';
for Index := 0 to FChangedCaptures.Count - 1 do
begin
CaptureIndex := Integer(FChangedCaptures[Index]);
Range := GetCaptureRange(CaptureIndex);
Result := Result +
Copy(FSubject, Pos, Range.FirstPos - Pos) +
FResultValues[CaptureIndex];
Pos := Range.LastPos + 1;
end;
if Pos <= Length(FSubject) then
Result := Result + Copy(FSubject, Pos, Length(FSubject) - Pos + 1);
end
else
Result := FSubject;
end;
function TJclAnsiRegEx.GetCapture(Index: Integer): AnsiString;
var
From, Len: Integer;
begin
if (Index < 0) or (Index >= FCaptureCount) then
PCRECheck(PCRE_ERROR_NOSUBSTRING)
else
begin
if FViewChanges and (FChangedCaptures.IndexOf(Pointer(Index)) >= 0) then
begin
Result := FResultValues[Index];
Exit;
end;
Index := Index * 2;
From := FVector^[Index];
Len := FVector^[Index + 1] - From;
SetLength(Result, Len);
Move(FSubject[From + 1], PChar(Result)^, Len);
end;
end;
procedure TJclAnsiRegEx.SetCapture(Index: Integer; const Value: String);
begin
if (Index < 0) or (Index >= FCaptureCount) then
PCRECheck(PCRE_ERROR_NOSUBSTRING)
else
begin
if (not Assigned(FChangedCaptures)) or (FChangedCaptures.Count = 0) then
begin
if not Assigned(FChangedCaptures) then
FChangedCaptures := TList.Create;
// Always resize to the max length to avoid repeated allocations.
FChangedCaptures.Capacity := FCaptureCount;
SetLength(FResultValues, FCaptureCount);
end;
if FChangedCaptures.IndexOf(Pointer(Index)) < 0 then
FChangedCaptures.Add(Pointer(Index));
FResultValues[Index] := Value;
end;
end;
function TJclAnsiRegEx.GetCaptureRange(Index: Integer): TJclAnsiCaptureRange;
begin
if (Index < 0) or (Index >= FCaptureCount) then
PCRECheck(PCRE_ERROR_NOSUBSTRING)
else
begin
Index := Index * 2;
Result.FirstPos := FVector^[Index] + 1;
Result.LastPos := FVector^[Index + 1];
end;
end;
function TJclAnsiRegEx.GetNamedCapture(const Name: AnsiString): AnsiString;
var
Index: Integer;
begin
Index := pcre_get_stringnumber(FCode, PChar(Name));
PCRECheck(Index);
Result := GetCapture(Index);
end;
procedure TJclAnsiRegEx.SetNamedCapture(const Name, Value: String);
var
Index: Integer;
begin
Index := pcre_get_stringnumber(FCode, PChar(Name));
PCRECheck(Index);
SetCapture(Index, Value);
end;
function TJclAnsiRegEx.GetCaptureNameCount: Integer;
begin
PCRECheck(pcre_fullinfo(FCode, FExtra, PCRE_INFO_NAMECOUNT, @Result));
end;
function TJclAnsiRegEx.GetCaptureName(Index: Integer): String;
var
NameTable: PChar;
EntrySize: Integer;
begin
PCRECheck(pcre_fullinfo(FCode, FExtra, PCRE_INFO_NAMETABLE, @NameTable));
PCRECheck(pcre_fullinfo(FCode, FExtra, PCRE_INFO_NAMEENTRYSIZE, @EntrySize));
Result := NameTable + EntrySize * Index + 2;
end;
function TJclAnsiRegEx.CalloutHandler(var CalloutBlock: pcre_callout_block): Integer;
begin
try
Result := JCL_PCRE_CALLOUT_NOERROR;
if Assigned(FOnCallout) then
begin
with CalloutBlock do
begin
FCaptureCount := capture_top;
FOnCallout(Self, callout_number, start_match + 1, current_position + 1,
capture_last, pattern_position + 1, next_item_length, Result);
end;
end;
except
on E: Exception do
begin
FErrorMessage := E.Message;
Result := JCL_PCRE_ERROR_CALLOUTERROR;
end;
end;
end;
function TJclAnsiRegEx.Match(const Subject: AnsiString; StartOffset: Cardinal = 1): Boolean;
var
LocalExtra: real_pcre_extra;
Extra: Pointer;
WorkSpace: array [0 .. 19] of Integer;
ExecRslt: Integer;
begin
if Assigned(FOnCallout) then
begin
if Assigned(FExtra) then
begin
LocalExtra.flags := PCRE_EXTRA_STUDY_DATA or PCRE_EXTRA_CALLOUT_DATA;
LocalExtra.study_data := FExtra;
end
else
LocalExtra.flags := PCRE_EXTRA_CALLOUT_DATA;
LocalExtra.callout_data := Self;
Extra := @LocalExtra;
SetPCRECalloutCallback(JclPCRECallout);
end
else
begin
Extra := FExtra;
SetPCRECalloutCallback(nil);
end;
FSubject := Subject;
if Assigned(FChangedCaptures) then
FChangedCaptures.Clear;
if FDfaMode then
begin
ExecRslt := pcre_dfa_exec(FCode, Extra, PChar(FSubject), Length(FSubject),
StartOffset - 1, GetAPIOptions(True), pcre.PInteger(FVector), FVectorSize, @Workspace, 20);
end
else
begin
ExecRslt := pcre_exec(FCode, Extra, PChar(FSubject), Length(FSubject),
StartOffset - 1, GetAPIOptions(True), pcre.PInteger(FVector), FVectorSize);
end;
Result := ExecRslt >= 0;
if Result then
begin
FCaptureCount := ExecRslt;
FErrorCode := 0;
end
else
begin
FErrorCode := ExecRslt;
if FErrorCode <> PCRE_ERROR_NOMATCH then
PCRECheck(FErrorCode);
end;
end;
function TJclAnsiRegEx.IndexOfName(const Name: String): Integer;
begin
Result := pcre_get_stringnumber(FCode, PChar(Name));
end;
function TJclAnsiRegEx.IsNameValid(const Name: String): Boolean;
begin
Result := pcre_get_stringnumber(FCode, PChar(Name)) >= 0;
end;
procedure InitializeLocaleSupport;
begin
if not Assigned(GTables) then
GTables := pcre_maketables;
end;
procedure TerminateLocaleSupport;
begin
if Assigned(GTables) then
begin
CallPCREFree(GTables);
GTables := nil;
end;
end;
// TODO: Better/specific error messages, show index when available.
function StrReplaceRegEx(const Subject, Pattern: AnsiString; Args: array of const): AnsiString;
function ArgToString(Index: Integer): AnsiString;
begin
// TODO: Any other type?
case TVarRec(Args[Index]).VType of
vtString: Result := TVarRec(Args[Index]).VString^;
vtPChar: Result := TVarRec(Args[Index]).VPChar;
vtAnsiString: Result := AnsiString(TVarRec(Args[Index]).VAnsiString);
else
raise EConvertError.Create(SInvalidFormat);
end;
end;
var
Re: TJclAnsiRegEx;
Index, ArgIndex: Integer;
Value: AnsiString;
begin
if Odd(Length(Args)) then
raise EConvertError.Create(SArgumentMissing)
else
begin
Re := TJclAnsiRegEx.Create;
try
if Re.Compile(Pattern, False) and Re.Match(Subject) then
begin
for Index := 0 to Length(Args) div 2 - 1 do
begin
ArgIndex := Index * 2;
Value := ArgToString(ArgIndex + 1);
if TVarRec(Args[ArgIndex]).VType = vtInteger then
Re.Captures[TVarRec(Args[ArgIndex]).VInteger] := Value
else
Re.NamedCaptures[ArgToString(ArgIndex)] := Value;
end;
Result := Re.Result;
end
else
raise EConvertError.Create(SInvalidFormat);
finally
Re.Free;
end;
end;
end;
//=== { EPCREError } =========================================================
constructor EPCREError.CreateRes(ResStringRec: PResStringRec; ErrorCode: Integer);
begin
FErrorCode := ErrorCode;
inherited CreateRes(ResStringRec);
end;
procedure LibNotLoadedHandler; {$IFDEF PCRE_EXPORT_CDECL} cdecl; {$ENDIF PCRE_EXPORT_CDECL}
begin
raise EPCREError.CreateRes(@RsErrLibNotLoaded, 0);
end;
initialization
pcre.LibNotLoadedHandler := LibNotLoadedHandler;
if LoadPCRE then
begin
SetPCREMallocCallback(JclPCREGetMem);
SetPCREFreeCallback(JclPCREFreeMem);
end;
{$IFDEF UNITVERSIONING}
RegisterUnitVersion(HInstance, UnitVersioning);
{$ENDIF UNITVERSIONING}
finalization
TerminateLocaleSupport;
{$IFDEF UNITVERSIONING}
UnregisterUnitVersion(HInstance);
{$ENDIF UNITVERSIONING}
UnloadPCRE;
end.