551 lines
16 KiB
ObjectPascal
551 lines
16 KiB
ObjectPascal
{**************************************************************************************************}
|
|
{ }
|
|
{ Project JEDI Code Library (JCL) }
|
|
{ }
|
|
{ The contents of this file are subject to the Mozilla Public License Version 1.1 (the "License"); }
|
|
{ you may not use this file except in compliance with the License. You may obtain a copy of the }
|
|
{ License at http://www.mozilla.org/MPL/ }
|
|
{ }
|
|
{ Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF }
|
|
{ ANY KIND, either express or implied. See the License for the specific language governing rights }
|
|
{ and limitations under the License. }
|
|
{ }
|
|
{ The Original Code is JclStatistics.pas. }
|
|
{ }
|
|
{ The Initial Developer of the Original Code is ESB Consultancy. }
|
|
{ Portions created by ESB Consultancy are Copyright ESB Consultancy. All rights reserved. }
|
|
{ }
|
|
{ Contributors (in alphabetical order): }
|
|
{ ESB Consultancy }
|
|
{ Fred Hovey }
|
|
{ Marcel van Brakel }
|
|
{ Matthias Thoma }
|
|
{ Robert Marquardt }
|
|
{ Robert Rossmair }
|
|
{ Petr Vones }
|
|
{ }
|
|
{**************************************************************************************************}
|
|
{ }
|
|
{ Various common statistics routines to calculate, for example, the arithmetic mean, geometric }
|
|
{ meanor median of a set of numbers. }
|
|
{ }
|
|
{**************************************************************************************************}
|
|
|
|
// Last modified: $Date: 2005/05/05 20:08:44 $
|
|
// For history see end of file
|
|
|
|
{ TODO : Test cases! }
|
|
|
|
unit JclStatistics;
|
|
|
|
{$I jcl.inc}
|
|
|
|
interface
|
|
|
|
uses
|
|
JclBase, JclMath;
|
|
|
|
type
|
|
EJclStatisticsError = class(EJclMathError);
|
|
|
|
{ Mean functions }
|
|
|
|
function ArithmeticMean(const X: TDynFloatArray): Float;
|
|
function GeometricMean(const X: TDynFloatArray): Float;
|
|
function HarmonicMean(const X: TDynFloatArray): Float;
|
|
function HeronianMean(const A, B: Float): Float;
|
|
|
|
{ Miscellanous }
|
|
|
|
function BinomialCoeff(N, R: Cardinal): Float;
|
|
function IsPositiveFloatArray(const X: TDynFloatArray): Boolean;
|
|
function MaxFloatArray(const B: TDynFloatArray): Float;
|
|
function MaxFloatArrayIndex(const B: TDynFloatArray): Integer;
|
|
function Median(const X: TDynFloatArray): Float;
|
|
{$IFNDEF CLR}
|
|
function MedianUnsorted(const X: TDynFloatArray): Float;
|
|
{$ENDIF ~CLR}
|
|
function MinFloatArray(const B: TDynFloatArray): Float;
|
|
function MinFloatArrayIndex(const B: TDynFloatArray): Integer;
|
|
function Permutation(N, R: Cardinal): Float;
|
|
function Combinations(N, R: Cardinal): Float;
|
|
function SumOfSquares(const X: TDynFloatArray): Float;
|
|
function PopulationVariance(const X: TDynFloatArray): Float;
|
|
procedure PopulationVarianceAndMean(const X: TDynFloatArray; var Variance, Mean: Float);
|
|
function SampleVariance(const X: TDynFloatArray): Float;
|
|
procedure SampleVarianceAndMean(const X: TDynFloatArray; var Variance, Mean: Float);
|
|
function StdError(const X: TDynFloatArray): Float; overload;
|
|
function StdError(const Variance: Float; const SampleSize: Integer): Float; overload;
|
|
function SumFloatArray(const B: TDynFloatArray): Float;
|
|
function SumSquareDiffFloatArray(const B: TDynFloatArray; Diff: Float): Float;
|
|
function SumSquareFloatArray(const B: TDynFloatArray): Float;
|
|
function SumPairProductFloatArray(const X, Y: TDynFloatArray): Float;
|
|
|
|
implementation
|
|
|
|
uses
|
|
JclLogic,
|
|
{$IFNDEF CLR}
|
|
JclSysUtils,
|
|
{$ENDIF ~CLR}
|
|
JclResources;
|
|
|
|
//=== Local helpers ==========================================================
|
|
|
|
function GetDynLength(const X: TDynFloatArray): Integer;
|
|
begin
|
|
Result := Length(X);
|
|
end;
|
|
|
|
function GetDynLengthNotNull(const X: TDynFloatArray): Integer;
|
|
begin
|
|
Result := Length(X);
|
|
if Result = 0 then
|
|
{$IFDEF CLR}
|
|
raise EJclMathError.Create(RsEmptyArray);
|
|
{$ELSE}
|
|
raise EJclMathError.CreateRes(@RsEmptyArray);
|
|
{$ENDIF CLR}
|
|
end;
|
|
|
|
procedure InvalidSampleSize(SampleSize: Integer);
|
|
begin
|
|
{$IFDEF CLR}
|
|
raise EJclStatisticsError.CreateFmt(RsInvalidSampleSize, [SampleSize]);
|
|
{$ELSE}
|
|
raise EJclStatisticsError.CreateResFmt(@RsInvalidSampleSize, [SampleSize]);
|
|
{$ENDIF CLR}
|
|
end;
|
|
|
|
function GetSampleSize(const Sample: TDynFloatArray; MinValidSize: Integer = 1): Integer;
|
|
begin
|
|
Result := Length(Sample);
|
|
if Result < MinValidSize then
|
|
InvalidSampleSize(Result);
|
|
end;
|
|
|
|
//=== Mean Functions =========================================================
|
|
|
|
function ArithmeticMean(const X: TDynFloatArray): Float;
|
|
begin
|
|
Result := SumFloatArray(X) / Length(X);
|
|
end;
|
|
|
|
function GeometricMean(const X: TDynFloatArray): Float;
|
|
var
|
|
I, N: Integer;
|
|
begin
|
|
N := GetSampleSize(X);
|
|
Result := 1.0;
|
|
for I := 0 to N - 1 do
|
|
begin
|
|
if X[I] <= PrecisionTolerance then
|
|
{$IFDEF CLR}
|
|
raise EJclMathError.Create(RsNonPositiveArray);
|
|
{$ELSE}
|
|
raise EJclMathError.CreateRes(@RsNonPositiveArray);
|
|
{$ENDIF CLR}
|
|
Result := Result * X[I];
|
|
end;
|
|
Result := Power(Result, 1 / N);
|
|
end;
|
|
|
|
function HarmonicMean(const X: TDynFloatArray): Float;
|
|
var
|
|
I, N: Integer;
|
|
begin
|
|
Result := 0.0;
|
|
N := GetSampleSize(X);
|
|
for I := 0 to N - 1 do
|
|
begin
|
|
if X[I] <= PrecisionTolerance then
|
|
{$IFDEF CLR}
|
|
raise EJclMathError.Create(RsNonPositiveArray);
|
|
{$ELSE}
|
|
raise EJclMathError.CreateRes(@RsNonPositiveArray);
|
|
{$ENDIF CLR}
|
|
Result := Result + 1 / X[I];
|
|
end;
|
|
Result := N / Result;
|
|
end;
|
|
|
|
function HeronianMean(const A, B: Float): Float;
|
|
begin
|
|
Assert(A >= 0);
|
|
Assert(B >= 0);
|
|
Result := (A + Sqrt(A * B) + B) / 3;
|
|
end;
|
|
|
|
//=== Miscellanous ===========================================================
|
|
|
|
function BinomialCoeff(N, R: Cardinal): Float;
|
|
var
|
|
I: Integer;
|
|
K: LongWord;
|
|
begin
|
|
if (N = 0) or (R > N) or (N > MaxFactorial) then
|
|
begin
|
|
Result := 0.0;
|
|
Exit;
|
|
end;
|
|
Result := 1.0;
|
|
if not ((R = 0) or (R = N)) then
|
|
begin
|
|
if R > N div 2 then
|
|
R := N - R;
|
|
K := 2;
|
|
try
|
|
for I := N - R + 1 to N do
|
|
begin
|
|
Result := Result * I;
|
|
if K <= R then
|
|
begin
|
|
Result := Result / K;
|
|
Inc(K);
|
|
end;
|
|
end;
|
|
Result := Int(Result + 0.5);
|
|
except
|
|
Result := -1.0;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
|
|
function IsPositiveFloatArray(const X: TDynFloatArray): Boolean;
|
|
var
|
|
I, N: Integer;
|
|
begin
|
|
Result := False;
|
|
N := GetDynLengthNotNull(X);
|
|
for I := 0 to N - 1 do
|
|
if X[I] <= PrecisionTolerance then
|
|
Exit;
|
|
Result := True;
|
|
end;
|
|
|
|
function MaxFloatArray(const B: TDynFloatArray): Float;
|
|
var
|
|
I, N: Integer;
|
|
begin
|
|
N := GetDynLengthNotNull(B);
|
|
Result := B[0];
|
|
for I := 1 to N - 1 do
|
|
if B[I] > Result then
|
|
Result := B[I];
|
|
end;
|
|
|
|
function MaxFloatArrayIndex(const B: TDynFloatArray): Integer;
|
|
var
|
|
I, N: Integer;
|
|
Max: Float;
|
|
begin
|
|
Result := 0;
|
|
N := GetDynLengthNotNull(B);
|
|
Max := B[0];
|
|
for I := 1 to N - 1 do
|
|
if B[I] > Max then
|
|
begin
|
|
Max := B[I];
|
|
Result := I;
|
|
end;
|
|
end;
|
|
|
|
// The FloatArray X must be presorted so Median can calculate the correct value.
|
|
// Y_{(n+1)/2} if N is odd
|
|
// Median = { 1/2 * (Y_{n/2} + Y_{1+(n/2) } if N is even
|
|
|
|
function Median(const X: TDynFloatArray): Float;
|
|
var
|
|
N: Integer;
|
|
begin
|
|
N := GetSampleSize(X);
|
|
if N = 1 then
|
|
Result := X[0]
|
|
else
|
|
if Odd(N) then
|
|
Result := X[N div 2]
|
|
else
|
|
Result := (X[N div 2 - 1] + X[N div 2]) / 2;
|
|
end;
|
|
|
|
{$IFNDEF CLR}
|
|
function MedianUnsorted(const X: TDynFloatArray): Float;
|
|
var
|
|
SortedList: TDynFloatArray;
|
|
|
|
begin
|
|
// We need to sort the values first
|
|
SortedList := Copy(X);
|
|
// type cast to Pointer for the sake of FPC
|
|
SortDynArray(Pointer(SortedList), SizeOf(Float),DynArrayCompareFloat);
|
|
|
|
// and call the median function afterwards
|
|
Result := Median(SortedList);
|
|
end;
|
|
{$ENDIF ~CLR}
|
|
|
|
function MinFloatArray(const B: TDynFloatArray): Float;
|
|
var
|
|
I, N: Integer;
|
|
begin
|
|
N := GetDynLengthNotNull(B);
|
|
Result := B[0];
|
|
for I := 1 to N - 1 do
|
|
if B[I] < Result then
|
|
Result := B[I];
|
|
end;
|
|
|
|
function MinFloatArrayIndex(const B: TDynFloatArray): Integer;
|
|
var
|
|
I, N: Integer;
|
|
Min: Float;
|
|
begin
|
|
Result := 0;
|
|
N := GetDynLengthNotNull(B);
|
|
Min := B[0];
|
|
for I := 1 to N - 1 do
|
|
if B[I] < Min then
|
|
begin
|
|
Min := B[I];
|
|
Result := I;
|
|
end;
|
|
end;
|
|
|
|
function Permutation(N, R: Cardinal): Float;
|
|
var
|
|
I : Integer;
|
|
begin
|
|
if (N = 0) or (R > N) or (N > MaxFactorial) then
|
|
begin
|
|
Result := 0.0;
|
|
Exit;
|
|
end;
|
|
Result := 1.0;
|
|
if R <> 0 then
|
|
try
|
|
for I := N downto N - R + 1 do
|
|
Result := Result * I;
|
|
Result := Int(Result + 0.5);
|
|
except
|
|
Result := -1.0;
|
|
end;
|
|
end;
|
|
|
|
{ TODO -cDoc : Donator: Fred Hovey }
|
|
function Combinations(N, R: Cardinal): Float;
|
|
begin
|
|
Result := Factorial(R);
|
|
if IsFloatZero(Result) then
|
|
Result := -1.0
|
|
else
|
|
Result := Permutation(N, R) / Result;
|
|
end;
|
|
|
|
{ TODO -cDoc : donator: Fred Hovey, contributor: Robert Rossmair }
|
|
function SumOfSquares(const X: TDynFloatArray): Float;
|
|
var
|
|
I, N: Integer;
|
|
Sum: Float;
|
|
begin
|
|
N := GetSampleSize(X);
|
|
Result := Sqr(X[0]);
|
|
Sum := X[0];
|
|
for I := 1 to N - 1 do
|
|
begin
|
|
Result := Result + Sqr(X[I]);
|
|
Sum := Sum + X[I];
|
|
end;
|
|
Result := Result - Sum * Sum / N;
|
|
end;
|
|
|
|
{ TODO -cDoc : Contributors: Fred Hovey, Robert Rossmair }
|
|
function PopulationVariance(const X: TDynFloatArray): Float;
|
|
begin
|
|
// Length(X) = 0 would cause SumOfSquares() to raise an exception before the division is executed.
|
|
Result := SumOfSquares(X) / Length(X);
|
|
end;
|
|
|
|
procedure PopulationVarianceAndMean(const X: TDynFloatArray; var Variance, Mean: Float);
|
|
var
|
|
I, N: Integer;
|
|
Sum, SumSq: Float;
|
|
begin
|
|
N := GetSampleSize(X);
|
|
SumSq := Sqr(X[0]);
|
|
Sum := X[0];
|
|
for I := 1 to N - 1 do
|
|
begin
|
|
SumSq := SumSq + Sqr(X[I]);
|
|
Sum := Sum + X[I];
|
|
end;
|
|
Mean := Sum / N;
|
|
Variance := (SumSq / N) - Sqr(Mean);
|
|
end;
|
|
|
|
{ TODO -cDoc : Contributors: Fred Hovey, Robert Rossmair }
|
|
function SampleVariance(const X: TDynFloatArray): Float;
|
|
var
|
|
N: Integer;
|
|
begin
|
|
N := GetSampleSize(X, 2);
|
|
Result := SumOfSquares(X) / (N - 1)
|
|
end;
|
|
|
|
{ TODO -cDoc : Contributors: Fred Hovey, Robert Rossmair }
|
|
procedure SampleVarianceAndMean(const X: TDynFloatArray; var Variance, Mean: Float);
|
|
var
|
|
I, N: Integer;
|
|
Sum, SumSq: Float;
|
|
begin
|
|
N := GetSampleSize(X);
|
|
SumSq := Sqr(X[0]);
|
|
Sum := X[0];
|
|
for I := 1 to N - 1 do
|
|
begin
|
|
SumSq := SumSq + Sqr(X[I]);
|
|
Sum := Sum + X[I];
|
|
end;
|
|
Mean := Sum / N;
|
|
if N < 2 then
|
|
InvalidSampleSize(N);
|
|
//Variance := (SumSq / (N - 1)) - Sqr(Sum / (N - 1)) => WRONG!!!!
|
|
Variance := (SumSq - Sum * Sum / N) / (N - 1)
|
|
end;
|
|
|
|
{ TODO -cDoc : Donator: Fred Hovey, contributor: Robert Rossmair }
|
|
function StdError(const X: TDynFloatArray): Float;
|
|
begin
|
|
// Length(X) = 0 would cause SampleVariance() to raise an exception before the division is
|
|
// executed.
|
|
Result := Sqrt(SampleVariance(X) / Length(X));
|
|
end;
|
|
|
|
{ TODO -cDoc : Donator: Fred Hovey, contributor: Robert Rossmair }
|
|
function StdError(const Variance: Float; const SampleSize: Integer): Float;
|
|
begin
|
|
if SampleSize = 0 then
|
|
InvalidSampleSize(SampleSize);
|
|
Result := Sqrt(Variance / SampleSize);
|
|
end;
|
|
|
|
function SumFloatArray(const B: TDynFloatArray): Float;
|
|
var
|
|
I, N: Integer;
|
|
begin
|
|
Result := 0.0;
|
|
N := GetDynLength(B);
|
|
if N <> 0 then
|
|
begin
|
|
Result := B[0];
|
|
for I := 1 to N - 1 do
|
|
Result := Result + B[I];
|
|
end;
|
|
end;
|
|
|
|
function SumSquareDiffFloatArray(const B: TDynFloatArray; Diff: Float): Float;
|
|
var
|
|
I, N: Integer;
|
|
begin
|
|
Result := 0.0;
|
|
N := GetDynLength(B);
|
|
if N <> 0 then
|
|
begin
|
|
Result := Sqr(B[0] - Diff);
|
|
for I := 1 to N - 1 do
|
|
Result := Result + Sqr(B[I] - Diff);
|
|
end;
|
|
end;
|
|
|
|
function SumSquareFloatArray(const B: TDynFloatArray): Float;
|
|
var
|
|
I, N: Integer;
|
|
begin
|
|
Result := 0.0;
|
|
N := GetDynLength(B);
|
|
if N <> 0 then
|
|
begin
|
|
Result := Sqr(B[0]);
|
|
for I := 1 to N - 1 do
|
|
Result := Result + Sqr(B[I]);
|
|
end;
|
|
end;
|
|
|
|
function SumPairProductFloatArray(const X, Y: TDynFloatArray): Float;
|
|
var
|
|
I, N: Integer;
|
|
begin
|
|
Result := 0.0;
|
|
N := Min(Length(X), Length(Y));
|
|
if N <> 0 then
|
|
begin
|
|
Result := X[0] * Y[0];
|
|
for I := 1 to N - 1 do
|
|
Result := Result + X[I] * Y[I];
|
|
end;
|
|
end;
|
|
|
|
function ChiSquare(const X: TDynFloatArray): Float; { TODO -cDoc : ChiSquare }
|
|
var
|
|
I, N: Integer;
|
|
Sum: Float;
|
|
begin
|
|
N := GetDynLengthNotNull(X);
|
|
Result := Sqr(X[0]);
|
|
Sum := X[0];
|
|
for I := 1 to N - 1 do
|
|
begin
|
|
Result := Result + Sqr(X[I]);
|
|
Sum := Sum + X[I];
|
|
end;
|
|
end;
|
|
|
|
// History:
|
|
|
|
// $Log: JclStatistics.pas,v $
|
|
// Revision 1.16 2005/05/05 20:08:44 ahuser
|
|
// JCL.NET support
|
|
//
|
|
// Revision 1.15 2005/03/08 08:33:17 marquardt
|
|
// overhaul of exceptions and resourcestrings, minor style cleaning
|
|
//
|
|
// Revision 1.14 2005/02/24 16:34:40 marquardt
|
|
// remove divider lines, add section lines (unfinished)
|
|
//
|
|
// Revision 1.13 2004/12/17 05:33:02 marquardt
|
|
// updates for DCL
|
|
//
|
|
// Revision 1.12 2004/10/17 20:25:21 mthoma
|
|
// style cleaning, adjusting contributors
|
|
//
|
|
// Revision 1.11 2004/09/16 19:47:32 rrossmair
|
|
// check-in in preparation for release 1.92
|
|
//
|
|
// Revision 1.10 2004/08/18 19:06:15 rrossmair
|
|
// - got rid of warning
|
|
// - renamed local variables "L" to "N" (as commonly used to denote sample size)
|
|
//
|
|
// Revision 1.9 2004/08/18 17:08:59 rrossmair
|
|
// - mantis #2019 & #2021 handled, improved error reports
|
|
//
|
|
// Revision 1.8 2004/07/29 15:16:51 marquardt
|
|
// simple style cleaning
|
|
//
|
|
// Revision 1.7 2004/05/05 07:18:31 rrossmair
|
|
// MedianUnsorted: type cast for FPC compatibility
|
|
//
|
|
// Revision 1.6 2004/05/05 00:09:59 mthoma
|
|
// Updated headers: Added donors as contributors, adjusted the initial authors, added cvs names when they were not obvious. Changed $data to $date where necessary,
|
|
//
|
|
// Revision 1.5 2004/04/08 17:14:46 mthoma
|
|
// no message
|
|
//
|
|
// Revision 1.4 2004/04/08 16:57:21 mthoma
|
|
// Fixed #1268. Introduced new function MedianUnsorted
|
|
//
|
|
// Revision 1.3 2004/04/06 04:53:18
|
|
// adapt compiler conditions, add log entry
|
|
//
|
|
|
|
end.
|