## Copyright © 1995, 2000, 2003-2005 Francesco Potort́
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software Foundation,
## Inc., 51 Franklin Street, 5th Floor, Boston, MA 02110-1301, USA.
## -*- texinfo -*-
## @deftypefn {Function File} {} hurst (@var{A})
## @deftypefnx {Function File} {} hurst (@var{A}, @var{algo})
## @deftypefnx {Function File} {[@var{H}, @var{r}, @var{x}, @var{y}] =} hurst (@var{A}, @var{algo})
##
## Estimate the Hurst parameter of the fGn for each column of @var{A}
##
## If @var{algo} is omitted or @code{"old"}, do a quick and crude estimate
## of the Hurst parameter by using the rescaled range statistic.
##
## If @var{algo} is @code{"VTP"}, use the variance-time plot method, that
## is, the variance computed over intervals of different lenghts. The
## argument is a vector representing fractional Gaussian noise (fGn).
## Plotted on a log-log diagram, the variance @var{x} versus the interval
## length @var{y} is least-squares interpolated by a line of slope
## @math{\beta = 2H}. @var{r} is the correlation coefficient and gives a
## "reliability factor" of the @var{H} estimate: values higher than 0.9
## should be expected for data series representing real fGn.
##
## If @var{algo} is @code{"IDC"}, use the index of dispersion for counts
## (IDC, or peakedness), that is, the ratio of the variance over mean,
## computed over intervals of different lenghts. The argument is a vector
## representing fGn plotted on a log-log diagram, the IDC @var{x} versus the
## interval length @var{y} is least-squares interpolated by a line of slope
## @math{\beta = 2H-1}. @var{r} is the correlation coefficient and gives a
## "reliability factor" of the @var{H} estimate: values higher than 0.9
## should be expected. This method gives the same result as the previous
## one, but a different reliability factor: both methods should be tried and
## the lowest @var{r} considered.
##
## @strong{Reference}
## Leland, Taqqu, Willinger, Wilson, "On the Self-Similar
## Nature of Ethernet Traffic (Extended Version)", IEEE/ACM Trans. on
## Networking Vol.2, Num.1, 1994.
## @end deftypefn
##
## @seealso{fGn, RMDtraffic, RMDinterp}
## Author: Francesco Potort́
## $Revision: 1.4 $
## Usage: [H, r, x, y] = hurst(A, algo)
## Description: Estimate the Hurst parameter of fractional Gaussian noise
function [H, r, x, y] = hurst (A, algo)
## Check input parameters
if (nargin < 1 || nargin > 2)
usage ("hurstpot (A, algo)");
elseif (!is_matrix(A))
error("hurstpot: argument should be a vector or matrix");
elseif (nargin == 1)
algo = "old"; # default for backwards compatibility
endif
## The algorithm names must be three letters long, or else use strcmp
switch algo
case "old" ; case "IDC" ; case "VTP" ;
otherwise
error("hurstpot: algorithm \"%s\" not implemented", algo);
endswitch
## Some initializations
if (is_vector(A) && rows(A) == 1)
A = A'; # make it a column vector
endif
l = rows(A); # length of each vector to analyse
c = columns(A); # number of vectors to analyse
H = NaN+zeros(1,c); # prepare the vector of Hurst coefficients
r = zeros(1,c); # prepare the vector of correlation coefficients
## Look for constant columns: these should not be analysed
z = (range(A) == 0); # these are the constant columns
H(find(z)) = 1; # for constant columns H=1
r(find(z)) = 1; # and the result is exact
g = find(!z); # this are the good columns, the ones to analyse
## Backward compatibility algorithm. This is a fast and crude rescaled
## range statistic that provides no measure of reliability.
if (algo == "old")
x = log(l);
y = log(range(cumsum(detrend(A(:,g),0))) ./ std(A(:,g)));
H(g) = y / x;
return;
endif
## m is the minimum interval length over which the chosen statistic is
## computed. M is the minimum number of intervals over which that same
## quantity is computed. k is the number of interval lengths per decade
## used for the computation. minp is the minimum number of points that
## must be used. maxp is the maximum number of points that must be used.
m = 10;
M = 10;
k = 5;
minp = 5;
maxp = 20;
minl = ceil(m*M*10^((minp-1)/k));
if (l < minl)
error(sprintf("A should have at least %d rows\n", minl));
endif
A(:,g) -= ones(l,1) * min(A(:,g)); # condition A to be greater than 0
A(:,g) ./= ones(l,1) * max(A(:,g)); # condition A to be between 0 and 1
A(:,g) += 1; # condition A to be between 1 and 2
## n is the number of points we use to interpolate each line (one for each
## column in A) on a log-log plot. x is a vector containing their
## abscissae, the columns of y contain their ordinates.
n = min(maxp, floor(k * log10(l/m/M)));
x = floor(logspace(log10(m), log10(l/M), n))';
y = zeros(rows(x),c);
cumA = cumsum(A(:,g));
for i = 1:n
Y = diff([zeros(1, columns(cumA)); cumA(x(i):x(i):l,:)]);
sets = rows(Y);
switch algo
case "IDC"
## The ordinates are the ratio of variance over mean for the
## increments of the vector A at distances x.
s = sum(Y);
y(i,g) = (sumsq(Y)*sets./s - s) / (sets-1);
case "VTP"
## The ordinates are the variances of the increments of the vector A
## at distances x.
y(i,g) = (sumsq(Y) - cumA(sets*x(i),:).^2/sets) / (sets-1);
endswitch
endfor
## Look for bad conditioned columns
if (any(any(y(:,g)==0)))
## Some columns of A(:,g) are badly conditioned: remove them from the
## vector of good columns.
b = ones(1,c); # all columns are bad
b(g) = any(y(:,g)==0); # no, the not badly conditioned ones are not
g = find(!b); # badly conditioned columns are not good
endif
# Now compute H by doing a log-log linear regression
logx = log(x);
logy = log(y(:,g));
alfabeta = [ones(size(logx)),logx] \ logy;
switch algo
case "IDC"
## The slope of the plot is 2H-1.
H(g) = (1+alfabeta(2,:)) / 2;
if (nargout > 1) r(g) = sign(alfabeta(2,:)) .* corrcoef(logx, logy); endif
case "VTP"
## The slope of the plot is 2H.
H(g) = alfabeta(2,:) / 2;
if (nargout > 1) r(g) = corrcoef(logx, logy); endif
endswitch
endfunction