Componentes.Terceros.jcl/official/1.96/source/common/JclStatistics.pas

551 lines
16 KiB
ObjectPascal

{**************************************************************************************************}
{ }
{ Project JEDI Code Library (JCL) }
{ }
{ The contents of this file are subject to the Mozilla Public License Version 1.1 (the "License"); }
{ you may not use this file except in compliance with the License. You may obtain a copy of the }
{ License at http://www.mozilla.org/MPL/ }
{ }
{ Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF }
{ ANY KIND, either express or implied. See the License for the specific language governing rights }
{ and limitations under the License. }
{ }
{ The Original Code is JclStatistics.pas. }
{ }
{ The Initial Developer of the Original Code is ESB Consultancy. }
{ Portions created by ESB Consultancy are Copyright ESB Consultancy. All rights reserved. }
{ }
{ Contributors (in alphabetical order): }
{ ESB Consultancy }
{ Fred Hovey }
{ Marcel van Brakel }
{ Matthias Thoma }
{ Robert Marquardt }
{ Robert Rossmair }
{ Petr Vones }
{ }
{**************************************************************************************************}
{ }
{ Various common statistics routines to calculate, for example, the arithmetic mean, geometric }
{ meanor median of a set of numbers. }
{ }
{**************************************************************************************************}
// Last modified: $Date: 2005/05/05 20:08:44 $
// For history see end of file
{ TODO : Test cases! }
unit JclStatistics;
{$I jcl.inc}
interface
uses
JclBase, JclMath;
type
EJclStatisticsError = class(EJclMathError);
{ Mean functions }
function ArithmeticMean(const X: TDynFloatArray): Float;
function GeometricMean(const X: TDynFloatArray): Float;
function HarmonicMean(const X: TDynFloatArray): Float;
function HeronianMean(const A, B: Float): Float;
{ Miscellanous }
function BinomialCoeff(N, R: Cardinal): Float;
function IsPositiveFloatArray(const X: TDynFloatArray): Boolean;
function MaxFloatArray(const B: TDynFloatArray): Float;
function MaxFloatArrayIndex(const B: TDynFloatArray): Integer;
function Median(const X: TDynFloatArray): Float;
{$IFNDEF CLR}
function MedianUnsorted(const X: TDynFloatArray): Float;
{$ENDIF ~CLR}
function MinFloatArray(const B: TDynFloatArray): Float;
function MinFloatArrayIndex(const B: TDynFloatArray): Integer;
function Permutation(N, R: Cardinal): Float;
function Combinations(N, R: Cardinal): Float;
function SumOfSquares(const X: TDynFloatArray): Float;
function PopulationVariance(const X: TDynFloatArray): Float;
procedure PopulationVarianceAndMean(const X: TDynFloatArray; var Variance, Mean: Float);
function SampleVariance(const X: TDynFloatArray): Float;
procedure SampleVarianceAndMean(const X: TDynFloatArray; var Variance, Mean: Float);
function StdError(const X: TDynFloatArray): Float; overload;
function StdError(const Variance: Float; const SampleSize: Integer): Float; overload;
function SumFloatArray(const B: TDynFloatArray): Float;
function SumSquareDiffFloatArray(const B: TDynFloatArray; Diff: Float): Float;
function SumSquareFloatArray(const B: TDynFloatArray): Float;
function SumPairProductFloatArray(const X, Y: TDynFloatArray): Float;
implementation
uses
JclLogic,
{$IFNDEF CLR}
JclSysUtils,
{$ENDIF ~CLR}
JclResources;
//=== Local helpers ==========================================================
function GetDynLength(const X: TDynFloatArray): Integer;
begin
Result := Length(X);
end;
function GetDynLengthNotNull(const X: TDynFloatArray): Integer;
begin
Result := Length(X);
if Result = 0 then
{$IFDEF CLR}
raise EJclMathError.Create(RsEmptyArray);
{$ELSE}
raise EJclMathError.CreateRes(@RsEmptyArray);
{$ENDIF CLR}
end;
procedure InvalidSampleSize(SampleSize: Integer);
begin
{$IFDEF CLR}
raise EJclStatisticsError.CreateFmt(RsInvalidSampleSize, [SampleSize]);
{$ELSE}
raise EJclStatisticsError.CreateResFmt(@RsInvalidSampleSize, [SampleSize]);
{$ENDIF CLR}
end;
function GetSampleSize(const Sample: TDynFloatArray; MinValidSize: Integer = 1): Integer;
begin
Result := Length(Sample);
if Result < MinValidSize then
InvalidSampleSize(Result);
end;
//=== Mean Functions =========================================================
function ArithmeticMean(const X: TDynFloatArray): Float;
begin
Result := SumFloatArray(X) / Length(X);
end;
function GeometricMean(const X: TDynFloatArray): Float;
var
I, N: Integer;
begin
N := GetSampleSize(X);
Result := 1.0;
for I := 0 to N - 1 do
begin
if X[I] <= PrecisionTolerance then
{$IFDEF CLR}
raise EJclMathError.Create(RsNonPositiveArray);
{$ELSE}
raise EJclMathError.CreateRes(@RsNonPositiveArray);
{$ENDIF CLR}
Result := Result * X[I];
end;
Result := Power(Result, 1 / N);
end;
function HarmonicMean(const X: TDynFloatArray): Float;
var
I, N: Integer;
begin
Result := 0.0;
N := GetSampleSize(X);
for I := 0 to N - 1 do
begin
if X[I] <= PrecisionTolerance then
{$IFDEF CLR}
raise EJclMathError.Create(RsNonPositiveArray);
{$ELSE}
raise EJclMathError.CreateRes(@RsNonPositiveArray);
{$ENDIF CLR}
Result := Result + 1 / X[I];
end;
Result := N / Result;
end;
function HeronianMean(const A, B: Float): Float;
begin
Assert(A >= 0);
Assert(B >= 0);
Result := (A + Sqrt(A * B) + B) / 3;
end;
//=== Miscellanous ===========================================================
function BinomialCoeff(N, R: Cardinal): Float;
var
I: Integer;
K: LongWord;
begin
if (N = 0) or (R > N) or (N > MaxFactorial) then
begin
Result := 0.0;
Exit;
end;
Result := 1.0;
if not ((R = 0) or (R = N)) then
begin
if R > N div 2 then
R := N - R;
K := 2;
try
for I := N - R + 1 to N do
begin
Result := Result * I;
if K <= R then
begin
Result := Result / K;
Inc(K);
end;
end;
Result := Int(Result + 0.5);
except
Result := -1.0;
end;
end;
end;
function IsPositiveFloatArray(const X: TDynFloatArray): Boolean;
var
I, N: Integer;
begin
Result := False;
N := GetDynLengthNotNull(X);
for I := 0 to N - 1 do
if X[I] <= PrecisionTolerance then
Exit;
Result := True;
end;
function MaxFloatArray(const B: TDynFloatArray): Float;
var
I, N: Integer;
begin
N := GetDynLengthNotNull(B);
Result := B[0];
for I := 1 to N - 1 do
if B[I] > Result then
Result := B[I];
end;
function MaxFloatArrayIndex(const B: TDynFloatArray): Integer;
var
I, N: Integer;
Max: Float;
begin
Result := 0;
N := GetDynLengthNotNull(B);
Max := B[0];
for I := 1 to N - 1 do
if B[I] > Max then
begin
Max := B[I];
Result := I;
end;
end;
// The FloatArray X must be presorted so Median can calculate the correct value.
// Y_{(n+1)/2} if N is odd
// Median = { 1/2 * (Y_{n/2} + Y_{1+(n/2) } if N is even
function Median(const X: TDynFloatArray): Float;
var
N: Integer;
begin
N := GetSampleSize(X);
if N = 1 then
Result := X[0]
else
if Odd(N) then
Result := X[N div 2]
else
Result := (X[N div 2 - 1] + X[N div 2]) / 2;
end;
{$IFNDEF CLR}
function MedianUnsorted(const X: TDynFloatArray): Float;
var
SortedList: TDynFloatArray;
begin
// We need to sort the values first
SortedList := Copy(X);
// type cast to Pointer for the sake of FPC
SortDynArray(Pointer(SortedList), SizeOf(Float),DynArrayCompareFloat);
// and call the median function afterwards
Result := Median(SortedList);
end;
{$ENDIF ~CLR}
function MinFloatArray(const B: TDynFloatArray): Float;
var
I, N: Integer;
begin
N := GetDynLengthNotNull(B);
Result := B[0];
for I := 1 to N - 1 do
if B[I] < Result then
Result := B[I];
end;
function MinFloatArrayIndex(const B: TDynFloatArray): Integer;
var
I, N: Integer;
Min: Float;
begin
Result := 0;
N := GetDynLengthNotNull(B);
Min := B[0];
for I := 1 to N - 1 do
if B[I] < Min then
begin
Min := B[I];
Result := I;
end;
end;
function Permutation(N, R: Cardinal): Float;
var
I : Integer;
begin
if (N = 0) or (R > N) or (N > MaxFactorial) then
begin
Result := 0.0;
Exit;
end;
Result := 1.0;
if R <> 0 then
try
for I := N downto N - R + 1 do
Result := Result * I;
Result := Int(Result + 0.5);
except
Result := -1.0;
end;
end;
{ TODO -cDoc : Donator: Fred Hovey }
function Combinations(N, R: Cardinal): Float;
begin
Result := Factorial(R);
if IsFloatZero(Result) then
Result := -1.0
else
Result := Permutation(N, R) / Result;
end;
{ TODO -cDoc : donator: Fred Hovey, contributor: Robert Rossmair }
function SumOfSquares(const X: TDynFloatArray): Float;
var
I, N: Integer;
Sum: Float;
begin
N := GetSampleSize(X);
Result := Sqr(X[0]);
Sum := X[0];
for I := 1 to N - 1 do
begin
Result := Result + Sqr(X[I]);
Sum := Sum + X[I];
end;
Result := Result - Sum * Sum / N;
end;
{ TODO -cDoc : Contributors: Fred Hovey, Robert Rossmair }
function PopulationVariance(const X: TDynFloatArray): Float;
begin
// Length(X) = 0 would cause SumOfSquares() to raise an exception before the division is executed.
Result := SumOfSquares(X) / Length(X);
end;
procedure PopulationVarianceAndMean(const X: TDynFloatArray; var Variance, Mean: Float);
var
I, N: Integer;
Sum, SumSq: Float;
begin
N := GetSampleSize(X);
SumSq := Sqr(X[0]);
Sum := X[0];
for I := 1 to N - 1 do
begin
SumSq := SumSq + Sqr(X[I]);
Sum := Sum + X[I];
end;
Mean := Sum / N;
Variance := (SumSq / N) - Sqr(Mean);
end;
{ TODO -cDoc : Contributors: Fred Hovey, Robert Rossmair }
function SampleVariance(const X: TDynFloatArray): Float;
var
N: Integer;
begin
N := GetSampleSize(X, 2);
Result := SumOfSquares(X) / (N - 1)
end;
{ TODO -cDoc : Contributors: Fred Hovey, Robert Rossmair }
procedure SampleVarianceAndMean(const X: TDynFloatArray; var Variance, Mean: Float);
var
I, N: Integer;
Sum, SumSq: Float;
begin
N := GetSampleSize(X);
SumSq := Sqr(X[0]);
Sum := X[0];
for I := 1 to N - 1 do
begin
SumSq := SumSq + Sqr(X[I]);
Sum := Sum + X[I];
end;
Mean := Sum / N;
if N < 2 then
InvalidSampleSize(N);
//Variance := (SumSq / (N - 1)) - Sqr(Sum / (N - 1)) => WRONG!!!!
Variance := (SumSq - Sum * Sum / N) / (N - 1)
end;
{ TODO -cDoc : Donator: Fred Hovey, contributor: Robert Rossmair }
function StdError(const X: TDynFloatArray): Float;
begin
// Length(X) = 0 would cause SampleVariance() to raise an exception before the division is
// executed.
Result := Sqrt(SampleVariance(X) / Length(X));
end;
{ TODO -cDoc : Donator: Fred Hovey, contributor: Robert Rossmair }
function StdError(const Variance: Float; const SampleSize: Integer): Float;
begin
if SampleSize = 0 then
InvalidSampleSize(SampleSize);
Result := Sqrt(Variance / SampleSize);
end;
function SumFloatArray(const B: TDynFloatArray): Float;
var
I, N: Integer;
begin
Result := 0.0;
N := GetDynLength(B);
if N <> 0 then
begin
Result := B[0];
for I := 1 to N - 1 do
Result := Result + B[I];
end;
end;
function SumSquareDiffFloatArray(const B: TDynFloatArray; Diff: Float): Float;
var
I, N: Integer;
begin
Result := 0.0;
N := GetDynLength(B);
if N <> 0 then
begin
Result := Sqr(B[0] - Diff);
for I := 1 to N - 1 do
Result := Result + Sqr(B[I] - Diff);
end;
end;
function SumSquareFloatArray(const B: TDynFloatArray): Float;
var
I, N: Integer;
begin
Result := 0.0;
N := GetDynLength(B);
if N <> 0 then
begin
Result := Sqr(B[0]);
for I := 1 to N - 1 do
Result := Result + Sqr(B[I]);
end;
end;
function SumPairProductFloatArray(const X, Y: TDynFloatArray): Float;
var
I, N: Integer;
begin
Result := 0.0;
N := Min(Length(X), Length(Y));
if N <> 0 then
begin
Result := X[0] * Y[0];
for I := 1 to N - 1 do
Result := Result + X[I] * Y[I];
end;
end;
function ChiSquare(const X: TDynFloatArray): Float; { TODO -cDoc : ChiSquare }
var
I, N: Integer;
Sum: Float;
begin
N := GetDynLengthNotNull(X);
Result := Sqr(X[0]);
Sum := X[0];
for I := 1 to N - 1 do
begin
Result := Result + Sqr(X[I]);
Sum := Sum + X[I];
end;
end;
// History:
// $Log: JclStatistics.pas,v $
// Revision 1.16 2005/05/05 20:08:44 ahuser
// JCL.NET support
//
// Revision 1.15 2005/03/08 08:33:17 marquardt
// overhaul of exceptions and resourcestrings, minor style cleaning
//
// Revision 1.14 2005/02/24 16:34:40 marquardt
// remove divider lines, add section lines (unfinished)
//
// Revision 1.13 2004/12/17 05:33:02 marquardt
// updates for DCL
//
// Revision 1.12 2004/10/17 20:25:21 mthoma
// style cleaning, adjusting contributors
//
// Revision 1.11 2004/09/16 19:47:32 rrossmair
// check-in in preparation for release 1.92
//
// Revision 1.10 2004/08/18 19:06:15 rrossmair
// - got rid of warning
// - renamed local variables "L" to "N" (as commonly used to denote sample size)
//
// Revision 1.9 2004/08/18 17:08:59 rrossmair
// - mantis #2019 & #2021 handled, improved error reports
//
// Revision 1.8 2004/07/29 15:16:51 marquardt
// simple style cleaning
//
// Revision 1.7 2004/05/05 07:18:31 rrossmair
// MedianUnsorted: type cast for FPC compatibility
//
// Revision 1.6 2004/05/05 00:09:59 mthoma
// Updated headers: Added donors as contributors, adjusted the initial authors, added cvs names when they were not obvious. Changed $data to $date where necessary,
//
// Revision 1.5 2004/04/08 17:14:46 mthoma
// no message
//
// Revision 1.4 2004/04/08 16:57:21 mthoma
// Fixed #1268. Introduced new function MedianUnsorted
//
// Revision 1.3 2004/04/06 04:53:18
// adapt compiler conditions, add log entry
//
end.