{**************************************************************************************************} { } { Project JEDI Code Library (JCL) } { } { The contents of this file are subject to the Mozilla Public License Version 1.1 (the "License"); } { you may not use this file except in compliance with the License. You may obtain a copy of the } { License at http://www.mozilla.org/MPL/ } { } { Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF } { ANY KIND, either express or implied. See the License for the specific language governing rights } { and limitations under the License. } { } { The Original Code is JclStatistics.pas. } { } { The Initial Developer of the Original Code is ESB Consultancy. } { Portions created by ESB Consultancy are Copyright ESB Consultancy. All rights reserved. } { } { Contributors (in alphabetical order): } { ESB Consultancy } { Fred Hovey } { Marcel van Brakel } { Matthias Thoma } { Robert Marquardt } { Robert Rossmair } { Petr Vones } { } {**************************************************************************************************} { } { Various common statistics routines to calculate, for example, the arithmetic mean, geometric } { meanor median of a set of numbers. } { } {**************************************************************************************************} // Last modified: $Date: 2005/05/05 20:08:44 $ // For history see end of file { TODO : Test cases! } unit JclStatistics; {$I jcl.inc} interface uses JclBase, JclMath; type EJclStatisticsError = class(EJclMathError); { Mean functions } function ArithmeticMean(const X: TDynFloatArray): Float; function GeometricMean(const X: TDynFloatArray): Float; function HarmonicMean(const X: TDynFloatArray): Float; function HeronianMean(const A, B: Float): Float; { Miscellanous } function BinomialCoeff(N, R: Cardinal): Float; function IsPositiveFloatArray(const X: TDynFloatArray): Boolean; function MaxFloatArray(const B: TDynFloatArray): Float; function MaxFloatArrayIndex(const B: TDynFloatArray): Integer; function Median(const X: TDynFloatArray): Float; {$IFNDEF CLR} function MedianUnsorted(const X: TDynFloatArray): Float; {$ENDIF ~CLR} function MinFloatArray(const B: TDynFloatArray): Float; function MinFloatArrayIndex(const B: TDynFloatArray): Integer; function Permutation(N, R: Cardinal): Float; function Combinations(N, R: Cardinal): Float; function SumOfSquares(const X: TDynFloatArray): Float; function PopulationVariance(const X: TDynFloatArray): Float; procedure PopulationVarianceAndMean(const X: TDynFloatArray; var Variance, Mean: Float); function SampleVariance(const X: TDynFloatArray): Float; procedure SampleVarianceAndMean(const X: TDynFloatArray; var Variance, Mean: Float); function StdError(const X: TDynFloatArray): Float; overload; function StdError(const Variance: Float; const SampleSize: Integer): Float; overload; function SumFloatArray(const B: TDynFloatArray): Float; function SumSquareDiffFloatArray(const B: TDynFloatArray; Diff: Float): Float; function SumSquareFloatArray(const B: TDynFloatArray): Float; function SumPairProductFloatArray(const X, Y: TDynFloatArray): Float; implementation uses JclLogic, {$IFNDEF CLR} JclSysUtils, {$ENDIF ~CLR} JclResources; //=== Local helpers ========================================================== function GetDynLength(const X: TDynFloatArray): Integer; begin Result := Length(X); end; function GetDynLengthNotNull(const X: TDynFloatArray): Integer; begin Result := Length(X); if Result = 0 then {$IFDEF CLR} raise EJclMathError.Create(RsEmptyArray); {$ELSE} raise EJclMathError.CreateRes(@RsEmptyArray); {$ENDIF CLR} end; procedure InvalidSampleSize(SampleSize: Integer); begin {$IFDEF CLR} raise EJclStatisticsError.CreateFmt(RsInvalidSampleSize, [SampleSize]); {$ELSE} raise EJclStatisticsError.CreateResFmt(@RsInvalidSampleSize, [SampleSize]); {$ENDIF CLR} end; function GetSampleSize(const Sample: TDynFloatArray; MinValidSize: Integer = 1): Integer; begin Result := Length(Sample); if Result < MinValidSize then InvalidSampleSize(Result); end; //=== Mean Functions ========================================================= function ArithmeticMean(const X: TDynFloatArray): Float; begin Result := SumFloatArray(X) / Length(X); end; function GeometricMean(const X: TDynFloatArray): Float; var I, N: Integer; begin N := GetSampleSize(X); Result := 1.0; for I := 0 to N - 1 do begin if X[I] <= PrecisionTolerance then {$IFDEF CLR} raise EJclMathError.Create(RsNonPositiveArray); {$ELSE} raise EJclMathError.CreateRes(@RsNonPositiveArray); {$ENDIF CLR} Result := Result * X[I]; end; Result := Power(Result, 1 / N); end; function HarmonicMean(const X: TDynFloatArray): Float; var I, N: Integer; begin Result := 0.0; N := GetSampleSize(X); for I := 0 to N - 1 do begin if X[I] <= PrecisionTolerance then {$IFDEF CLR} raise EJclMathError.Create(RsNonPositiveArray); {$ELSE} raise EJclMathError.CreateRes(@RsNonPositiveArray); {$ENDIF CLR} Result := Result + 1 / X[I]; end; Result := N / Result; end; function HeronianMean(const A, B: Float): Float; begin Assert(A >= 0); Assert(B >= 0); Result := (A + Sqrt(A * B) + B) / 3; end; //=== Miscellanous =========================================================== function BinomialCoeff(N, R: Cardinal): Float; var I: Integer; K: LongWord; begin if (N = 0) or (R > N) or (N > MaxFactorial) then begin Result := 0.0; Exit; end; Result := 1.0; if not ((R = 0) or (R = N)) then begin if R > N div 2 then R := N - R; K := 2; try for I := N - R + 1 to N do begin Result := Result * I; if K <= R then begin Result := Result / K; Inc(K); end; end; Result := Int(Result + 0.5); except Result := -1.0; end; end; end; function IsPositiveFloatArray(const X: TDynFloatArray): Boolean; var I, N: Integer; begin Result := False; N := GetDynLengthNotNull(X); for I := 0 to N - 1 do if X[I] <= PrecisionTolerance then Exit; Result := True; end; function MaxFloatArray(const B: TDynFloatArray): Float; var I, N: Integer; begin N := GetDynLengthNotNull(B); Result := B[0]; for I := 1 to N - 1 do if B[I] > Result then Result := B[I]; end; function MaxFloatArrayIndex(const B: TDynFloatArray): Integer; var I, N: Integer; Max: Float; begin Result := 0; N := GetDynLengthNotNull(B); Max := B[0]; for I := 1 to N - 1 do if B[I] > Max then begin Max := B[I]; Result := I; end; end; // The FloatArray X must be presorted so Median can calculate the correct value. // Y_{(n+1)/2} if N is odd // Median = { 1/2 * (Y_{n/2} + Y_{1+(n/2) } if N is even function Median(const X: TDynFloatArray): Float; var N: Integer; begin N := GetSampleSize(X); if N = 1 then Result := X[0] else if Odd(N) then Result := X[N div 2] else Result := (X[N div 2 - 1] + X[N div 2]) / 2; end; {$IFNDEF CLR} function MedianUnsorted(const X: TDynFloatArray): Float; var SortedList: TDynFloatArray; begin // We need to sort the values first SortedList := Copy(X); // type cast to Pointer for the sake of FPC SortDynArray(Pointer(SortedList), SizeOf(Float),DynArrayCompareFloat); // and call the median function afterwards Result := Median(SortedList); end; {$ENDIF ~CLR} function MinFloatArray(const B: TDynFloatArray): Float; var I, N: Integer; begin N := GetDynLengthNotNull(B); Result := B[0]; for I := 1 to N - 1 do if B[I] < Result then Result := B[I]; end; function MinFloatArrayIndex(const B: TDynFloatArray): Integer; var I, N: Integer; Min: Float; begin Result := 0; N := GetDynLengthNotNull(B); Min := B[0]; for I := 1 to N - 1 do if B[I] < Min then begin Min := B[I]; Result := I; end; end; function Permutation(N, R: Cardinal): Float; var I : Integer; begin if (N = 0) or (R > N) or (N > MaxFactorial) then begin Result := 0.0; Exit; end; Result := 1.0; if R <> 0 then try for I := N downto N - R + 1 do Result := Result * I; Result := Int(Result + 0.5); except Result := -1.0; end; end; { TODO -cDoc : Donator: Fred Hovey } function Combinations(N, R: Cardinal): Float; begin Result := Factorial(R); if IsFloatZero(Result) then Result := -1.0 else Result := Permutation(N, R) / Result; end; { TODO -cDoc : donator: Fred Hovey, contributor: Robert Rossmair } function SumOfSquares(const X: TDynFloatArray): Float; var I, N: Integer; Sum: Float; begin N := GetSampleSize(X); Result := Sqr(X[0]); Sum := X[0]; for I := 1 to N - 1 do begin Result := Result + Sqr(X[I]); Sum := Sum + X[I]; end; Result := Result - Sum * Sum / N; end; { TODO -cDoc : Contributors: Fred Hovey, Robert Rossmair } function PopulationVariance(const X: TDynFloatArray): Float; begin // Length(X) = 0 would cause SumOfSquares() to raise an exception before the division is executed. Result := SumOfSquares(X) / Length(X); end; procedure PopulationVarianceAndMean(const X: TDynFloatArray; var Variance, Mean: Float); var I, N: Integer; Sum, SumSq: Float; begin N := GetSampleSize(X); SumSq := Sqr(X[0]); Sum := X[0]; for I := 1 to N - 1 do begin SumSq := SumSq + Sqr(X[I]); Sum := Sum + X[I]; end; Mean := Sum / N; Variance := (SumSq / N) - Sqr(Mean); end; { TODO -cDoc : Contributors: Fred Hovey, Robert Rossmair } function SampleVariance(const X: TDynFloatArray): Float; var N: Integer; begin N := GetSampleSize(X, 2); Result := SumOfSquares(X) / (N - 1) end; { TODO -cDoc : Contributors: Fred Hovey, Robert Rossmair } procedure SampleVarianceAndMean(const X: TDynFloatArray; var Variance, Mean: Float); var I, N: Integer; Sum, SumSq: Float; begin N := GetSampleSize(X); SumSq := Sqr(X[0]); Sum := X[0]; for I := 1 to N - 1 do begin SumSq := SumSq + Sqr(X[I]); Sum := Sum + X[I]; end; Mean := Sum / N; if N < 2 then InvalidSampleSize(N); //Variance := (SumSq / (N - 1)) - Sqr(Sum / (N - 1)) => WRONG!!!! Variance := (SumSq - Sum * Sum / N) / (N - 1) end; { TODO -cDoc : Donator: Fred Hovey, contributor: Robert Rossmair } function StdError(const X: TDynFloatArray): Float; begin // Length(X) = 0 would cause SampleVariance() to raise an exception before the division is // executed. Result := Sqrt(SampleVariance(X) / Length(X)); end; { TODO -cDoc : Donator: Fred Hovey, contributor: Robert Rossmair } function StdError(const Variance: Float; const SampleSize: Integer): Float; begin if SampleSize = 0 then InvalidSampleSize(SampleSize); Result := Sqrt(Variance / SampleSize); end; function SumFloatArray(const B: TDynFloatArray): Float; var I, N: Integer; begin Result := 0.0; N := GetDynLength(B); if N <> 0 then begin Result := B[0]; for I := 1 to N - 1 do Result := Result + B[I]; end; end; function SumSquareDiffFloatArray(const B: TDynFloatArray; Diff: Float): Float; var I, N: Integer; begin Result := 0.0; N := GetDynLength(B); if N <> 0 then begin Result := Sqr(B[0] - Diff); for I := 1 to N - 1 do Result := Result + Sqr(B[I] - Diff); end; end; function SumSquareFloatArray(const B: TDynFloatArray): Float; var I, N: Integer; begin Result := 0.0; N := GetDynLength(B); if N <> 0 then begin Result := Sqr(B[0]); for I := 1 to N - 1 do Result := Result + Sqr(B[I]); end; end; function SumPairProductFloatArray(const X, Y: TDynFloatArray): Float; var I, N: Integer; begin Result := 0.0; N := Min(Length(X), Length(Y)); if N <> 0 then begin Result := X[0] * Y[0]; for I := 1 to N - 1 do Result := Result + X[I] * Y[I]; end; end; function ChiSquare(const X: TDynFloatArray): Float; { TODO -cDoc : ChiSquare } var I, N: Integer; Sum: Float; begin N := GetDynLengthNotNull(X); Result := Sqr(X[0]); Sum := X[0]; for I := 1 to N - 1 do begin Result := Result + Sqr(X[I]); Sum := Sum + X[I]; end; end; // History: // $Log: JclStatistics.pas,v $ // Revision 1.16 2005/05/05 20:08:44 ahuser // JCL.NET support // // Revision 1.15 2005/03/08 08:33:17 marquardt // overhaul of exceptions and resourcestrings, minor style cleaning // // Revision 1.14 2005/02/24 16:34:40 marquardt // remove divider lines, add section lines (unfinished) // // Revision 1.13 2004/12/17 05:33:02 marquardt // updates for DCL // // Revision 1.12 2004/10/17 20:25:21 mthoma // style cleaning, adjusting contributors // // Revision 1.11 2004/09/16 19:47:32 rrossmair // check-in in preparation for release 1.92 // // Revision 1.10 2004/08/18 19:06:15 rrossmair // - got rid of warning // - renamed local variables "L" to "N" (as commonly used to denote sample size) // // Revision 1.9 2004/08/18 17:08:59 rrossmair // - mantis #2019 & #2021 handled, improved error reports // // Revision 1.8 2004/07/29 15:16:51 marquardt // simple style cleaning // // Revision 1.7 2004/05/05 07:18:31 rrossmair // MedianUnsorted: type cast for FPC compatibility // // Revision 1.6 2004/05/05 00:09:59 mthoma // Updated headers: Added donors as contributors, adjusted the initial authors, added cvs names when they were not obvious. Changed $data to $date where necessary, // // Revision 1.5 2004/04/08 17:14:46 mthoma // no message // // Revision 1.4 2004/04/08 16:57:21 mthoma // Fixed #1268. Introduced new function MedianUnsorted // // Revision 1.3 2004/04/06 04:53:18 // adapt compiler conditions, add log entry // end.