Componentes.Terceros.FastRe.../internal/4.2/1/Source/frxUnicodeUtils.pas
2007-11-18 19:40:07 +00:00

637 lines
15 KiB
ObjectPascal

{*******************************************************}
{ The Delphi Unicode Controls Project }
{ }
{ http://home.ccci.org/wolbrink }
{ }
{ Copyright (c) 2002, Troy Wolbrink (wolbrink@ccci.org) }
{ }
{*******************************************************}
unit frxUnicodeUtils;
interface
{$I frx.inc}
uses Windows, Classes, SysUtils;
type
TWString = record
WString: WideString;
Obj: TObject;
end;
TWideStrings = class(TPersistent)
private
FWideStringList: TList;
function Get(Index: Integer): WideString;
procedure Put(Index: Integer; const S: WideString);
function GetObject(Index: Integer): TObject;
procedure PutObject(Index: Integer; const Value: TObject);
procedure ReadData(Reader: TReader);
procedure ReadDataW(Reader: TReader);
procedure WriteDataW(Writer: TWriter);
function GetTextStr: WideString;
procedure SetTextStr(const Value: WideString);
protected
procedure AssignTo(Dest: TPersistent); override;
procedure DefineProperties(Filer: TFiler); override;
public
constructor Create;
destructor Destroy; override;
procedure Assign(Source: TPersistent); override;
function Count: Integer;
procedure Clear;
procedure Delete(Index: Integer);
function Add(const S: WideString): Integer;
procedure AddStrings(Strings: TWideStrings);
function AddObject(const S: WideString; AObject: TObject): Integer;
function IndexOf(const S: WideString): Integer;
procedure Insert(Index: Integer; const S: WideString);
procedure LoadFromFile(const FileName: WideString);
procedure LoadFromStream(Stream: TStream);
procedure LoadFromWStream(Stream: TStream);
procedure SaveToFile(const FileName: WideString);
procedure SaveToStream(Stream: TStream);
property Objects[Index: Integer]: TObject read GetObject write PutObject;
property Strings[Index: Integer]: WideString read Get write Put; default;
property Text: WideString read GetTextStr write SetTextStr;
end;
{$IFNDEF Delphi6}
function Utf8Encode(const WS: WideString): String;
function UTF8Decode(const S: String): WideString;
function VarToWideStr(const V: Variant): WideString;
{$ENDIF}
function AnsiToUnicode(const s: String; Charset: UINT): WideString;
implementation
const
sLineBreak = #13#10;
WideLineSeparator = WideChar($2028);
NameValueSeparator = '=';
{$IFNDEF Delphi6}
function Utf8Encode(const WS: WideString): String;
var
L: Integer;
Temp: String;
function ToUtf8(Dest: PChar; MaxDestBytes: Cardinal;
Source: PWideChar; SourceChars: Cardinal): Cardinal;
var
i, count: Cardinal;
c: Cardinal;
begin
Result := 0;
if Source = nil then Exit;
count := 0;
i := 0;
if Dest <> nil then
begin
while (i < SourceChars) and (count < MaxDestBytes) do
begin
c := Cardinal(Source[i]);
Inc(i);
if c <= $7F then
begin
Dest[count] := Char(c);
Inc(count);
end
else if c > $7FF then
begin
if count + 3 > MaxDestBytes then
break;
Dest[count] := Char($E0 or (c shr 12));
Dest[count+1] := Char($80 or ((c shr 6) and $3F));
Dest[count+2] := Char($80 or (c and $3F));
Inc(count,3);
end
else // $7F < Source[i] <= $7FF
begin
if count + 2 > MaxDestBytes then
break;
Dest[count] := Char($C0 or (c shr 6));
Dest[count+1] := Char($80 or (c and $3F));
Inc(count,2);
end;
end;
if count >= MaxDestBytes then count := MaxDestBytes-1;
Dest[count] := #0;
end
else
begin
while i < SourceChars do
begin
c := Integer(Source[i]);
Inc(i);
if c > $7F then
begin
if c > $7FF then
Inc(count);
Inc(count);
end;
Inc(count);
end;
end;
Result := count+1;
end;
begin
Result := '';
if WS = '' then Exit;
SetLength(Temp, Length(WS) * 3);
L := ToUtf8(PChar(Temp), Length(Temp)+1, PWideChar(WS), Length(WS));
if L > 0 then
SetLength(Temp, L-1)
else
Temp := '';
Result := Temp;
end;
function Utf8Decode(const S: String): WideString;
var
L: Integer;
Temp: WideString;
function Utf8ToUnicode(Dest: PWideChar; MaxDestChars: Cardinal; Source: PChar; SourceBytes: Cardinal): Cardinal;
var
i, count: Cardinal;
c: Byte;
wc: Cardinal;
begin
if Source = nil then
begin
Result := 0;
Exit;
end;
Result := Cardinal(-1);
count := 0;
i := 0;
if Dest <> nil then
begin
while (i < SourceBytes) and (count < MaxDestChars) do
begin
wc := Cardinal(Source[i]);
Inc(i);
if (wc and $80) <> 0 then
begin
wc := wc and $3F;
if i > SourceBytes then Exit; // incomplete multibyte char
if (wc and $20) <> 0 then
begin
c := Byte(Source[i]);
Inc(i);
if (c and $C0) <> $80 then Exit; // malformed trail byte or out of range char
if i > SourceBytes then Exit; // incomplete multibyte char
wc := (wc shl 6) or (c and $3F);
end;
c := Byte(Source[i]);
Inc(i);
if (c and $C0) <> $80 then Exit; // malformed trail byte
Dest[count] := WideChar((wc shl 6) or (c and $3F));
end
else
Dest[count] := WideChar(wc);
Inc(count);
end;
if count >= MaxDestChars then count := MaxDestChars-1;
Dest[count] := #0;
end
else
begin
while (i <= SourceBytes) do
begin
c := Byte(Source[i]);
Inc(i);
if (c and $80) <> 0 then
begin
if (c and $F0) = $F0 then Exit; // too many bytes for UCS2
if (c and $40) = 0 then Exit; // malformed lead byte
if i > SourceBytes then Exit; // incomplete multibyte char
if (Byte(Source[i]) and $C0) <> $80 then Exit; // malformed trail byte
Inc(i);
if i > SourceBytes then Exit; // incomplete multibyte char
if ((c and $20) <> 0) and ((Byte(Source[i]) and $C0) <> $80) then Exit; // malformed trail byte
Inc(i);
end;
Inc(count);
end;
end;
Result := count+1;
end;
begin
Result := '';
if S = '' then Exit;
SetLength(Temp, Length(S));
L := Utf8ToUnicode(PWideChar(Temp), Length(Temp)+1, PChar(S), Length(S));
if L > 0 then
SetLength(Temp, L-1)
else
Temp := '';
Result := Temp;
end;
function VarToWideStrDef(const V: Variant; const ADefault: WideString): WideString;
begin
if not VarIsNull(V) then
Result := V
else
Result := ADefault;
end;
function VarToWideStr(const V: Variant): WideString;
begin
Result := VarToWideStrDef(V, '');
end;
{$ENDIF}
{ TWideStrings }
constructor TWideStrings.Create;
begin
FWideStringList := TList.Create;
end;
destructor TWideStrings.Destroy;
begin
Clear;
FWideStringList.Free;
inherited;
end;
procedure TWideStrings.Clear;
var
Index: Integer;
PWStr: ^TWString;
begin
for Index := 0 to FWideStringList.Count-1 do
begin
PWStr := FWideStringList.Items[Index];
if PWStr <> nil then
Dispose(PWStr);
end;
FWideStringList.Clear;
end;
function TWideStrings.Get(Index: Integer): WideString;
var
PWStr: ^TWString;
begin
Result := '';
if ( (Index >= 0) and (Index < FWideStringList.Count) ) then
begin
PWStr := FWideStringList.Items[Index];
if PWStr <> nil then
Result := PWStr^.WString;
end;
end;
procedure TWideStrings.Put(Index: Integer; const S: WideString);
begin
Insert(Index, S);
end;
function TWideStrings.GetObject(Index: Integer): TObject;
var
PWStr: ^TWString;
begin
Result := nil;
if ( (Index >= 0) and (Index < FWideStringList.Count) ) then
begin
PWStr := FWideStringList.Items[Index];
if PWStr <> nil then
Result := PWStr^.Obj;
end;
end;
procedure TWideStrings.PutObject(Index: Integer; const Value: TObject);
var
PWStr: ^TWString;
begin
if ( (Index >= 0) and (Index < FWideStringList.Count) ) then
begin
PWStr := FWideStringList.Items[Index];
if PWStr <> nil then
PWStr^.Obj := Value;
end;
end;
function TWideStrings.Add(const S: WideString): Integer;
var
PWStr: ^TWString;
begin
New(PWStr);
PWStr^.WString := S;
PWStr^.Obj := nil;
Result := FWideStringList.Add(PWStr);
end;
procedure TWideStrings.Delete(Index: Integer);
var
PWStr: ^TWString;
begin
PWStr := FWideStringList.Items[Index];
if PWStr <> nil then
Dispose(PWStr);
FWideStringList.Delete(Index);
end;
function TWideStrings.IndexOf(const S: WideString): Integer;
var
Index: Integer;
PWStr: ^TWString;
begin
Result := -1;
for Index := 0 to FWideStringList.Count -1 do
begin
PWStr := FWideStringList.Items[Index];
if PWStr <> nil then
begin
if S = PWStr^.WString then
begin
Result := Index;
break;
end;
end;
end;
end;
function TWideStrings.Count: Integer;
begin
Result := FWideStringList.Count;
end;
procedure TWideStrings.Insert(Index: Integer; const S: WideString);
var
PWStr: ^TWString;
begin
if((Index < 0) or (Index > FWideStringList.Count)) then
raise Exception.Create('Wide String Out of Bounds');
if Index < FWideStringList.Count then
begin
PWStr := FWideStringList.Items[Index];
if PWStr <> nil then
PWStr.WString := S;
end
else
Add(S);
end;
procedure TWideStrings.AddStrings(Strings: TWideStrings);
var
I: Integer;
begin
for I := 0 to Strings.Count - 1 do
AddObject(Strings[I], Strings.Objects[I]);
end;
function TWideStrings.AddObject(const S: WideString; AObject: TObject): Integer;
begin
Result := Add(S);
PutObject(Result, AObject);
end;
procedure TWideStrings.Assign(Source: TPersistent);
var
I: Integer;
begin
if Source is TWideStrings then
begin
Clear;
AddStrings(TWideStrings(Source));
end
else if Source is TStrings then
begin
Clear;
for I := 0 to TStrings(Source).Count - 1 do
AddObject(TStrings(Source)[I], TStrings(Source).Objects[I]);
end
else
inherited Assign(Source);
end;
procedure TWideStrings.AssignTo(Dest: TPersistent);
var
I: Integer;
begin
if Dest is TWideStrings then
Dest.Assign(Self)
else if Dest is TStrings then
begin
TStrings(Dest).BeginUpdate;
try
TStrings(Dest).Clear;
for I := 0 to Count - 1 do
TStrings(Dest).AddObject(Strings[I], Objects[I]);
finally
TStrings(Dest).EndUpdate;
end;
end
else
inherited AssignTo(Dest);
end;
procedure TWideStrings.DefineProperties(Filer: TFiler);
begin
// compatibility
Filer.DefineProperty('Strings', ReadData, nil, Count > 0);
Filer.DefineProperty('UTF8', ReadDataW, WriteDataW, Count > 0);
end;
function TWideStrings.GetTextStr: WideString;
var
I, L, Size, Count: Integer;
P: PWideChar;
S, LB: WideString;
begin
Count := FWideStringList.Count;
Size := 0;
LB := sLineBreak;
for I := 0 to Count - 1 do Inc(Size, Length(Get(I)) + Length(LB));
SetString(Result, nil, Size);
P := Pointer(Result);
for I := 0 to Count - 1 do
begin
S := Get(I);
L := Length(S);
if L <> 0 then
begin
System.Move(Pointer(S)^, P^, L * SizeOf(WideChar));
Inc(P, L);
end;
L := Length(LB);
if L <> 0 then
begin
System.Move(Pointer(LB)^, P^, L * SizeOf(WideChar));
Inc(P, L);
end;
end;
end;
procedure TWideStrings.LoadFromFile(const FileName: WideString);
var
Stream: TStream;
begin
Stream := TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
try
LoadFromStream(Stream);
finally
Stream.Free;
end;
end;
procedure TWideStrings.LoadFromStream(Stream: TStream);
var
Size: Integer;
S: WideString;
ansiS: String;
sign: Word;
begin
Size := Stream.Size - Stream.Position;
sign := 0;
if Size > 2 then
Stream.Read(sign, 2);
if sign = $FEFF then
begin
Dec(Size, 2);
SetLength(S, Size div 2);
Stream.Read(S[1], Size);
SetTextStr(S);
end
else
begin
Stream.Seek(-2, soFromCurrent);
SetLength(ansiS, Size);
Stream.Read(ansiS[1], Size);
SetTextStr(ansiS);
end;
end;
procedure TWideStrings.LoadFromWStream(Stream: TStream);
var
Size: Integer;
S: WideString;
begin
Size := Stream.Size - Stream.Position;
SetLength(S, Size div 2);
Stream.Read(S[1], Size);
SetTextStr(S);
end;
procedure TWideStrings.ReadData(Reader: TReader);
begin
Clear;
Reader.ReadListBegin;
while not Reader.EndOfList do
if Reader.NextValue in [vaString, vaLString] then
Add(Reader.ReadString) {TStrings compatiblity}
else
Add(Reader.ReadWideString);
Reader.ReadListEnd;
end;
procedure TWideStrings.ReadDataW(Reader: TReader);
begin
Clear;
Reader.ReadListBegin;
while not Reader.EndOfList do
Add(Utf8Decode(Reader.ReadString));
Reader.ReadListEnd;
end;
procedure TWideStrings.SaveToFile(const FileName: WideString);
var
Stream: TStream;
begin
Stream := TFileStream.Create(FileName, fmCreate);
try
SaveToStream(Stream);
finally
Stream.Free;
end;
end;
procedure TWideStrings.SaveToStream(Stream: TStream);
var
SW: WideString;
begin
SW := GetTextStr;
Stream.WriteBuffer(PWideChar(SW)^, Length(SW) * SizeOf(WideChar));
end;
procedure TWideStrings.SetTextStr(const Value: WideString);
var
P, Start: PWideChar;
S: WideString;
begin
Clear;
P := Pointer(Value);
if P <> nil then
while P^ <> #0 do
begin
Start := P;
while not (P^ in [WideChar(#0), WideChar(#10), WideChar(#13)]) and (P^ <> WideLineSeparator) do
Inc(P);
SetString(S, Start, P - Start);
Add(S);
if P^ = #13 then Inc(P);
if P^ = #10 then Inc(P);
if P^ = WideLineSeparator then Inc(P);
end;
end;
procedure TWideStrings.WriteDataW(Writer: TWriter);
var
I: Integer;
begin
Writer.WriteListBegin;
for I := 0 to Count - 1 do
Writer.WriteString(Utf8Encode(Get(I)));
Writer.WriteListEnd;
end;
function TranslateCharsetInfo(lpSrc: DWORD; var lpCs: TCharsetInfo;
dwFlags: DWORD): BOOL; stdcall; external gdi32 name 'TranslateCharsetInfo';
function CharSetToCodePage(ciCharset: DWORD): Cardinal;
var
C: TCharsetInfo;
begin
if ciCharset = DEFAULT_CHARSET then
Result := GetACP
else
begin
Win32Check(TranslateCharsetInfo(ciCharset, C, TCI_SRCCHARSET));
Result := C.ciACP;
end;
end;
function AnsiToUnicode(const s: String; Charset: UINT): WideString;
var
CodePage: Integer;
InputLength, OutputLength: Integer;
begin
CodePage := CharSetToCodePage(Charset);
InputLength := Length(S);
OutputLength := MultiByteToWideChar(CodePage, 0, PAnsiChar(S), InputLength, nil, 0);
SetLength(Result, OutputLength);
MultiByteToWideChar(CodePage, 0, PAnsiChar(S), InputLength, PWideChar(Result), OutputLength);
end;
end.
//862fd5d6aa1a637203d9b08a3c0bcfb0