/***************************************************************************
**
**  This file is part of QGpCoreStat.
**
**  This library is free software; you can redistribute it and/or
**  modify it under the terms of the GNU Lesser General Public
**  License as published by the Free Software Foundation; either
**  version 2.1 of the License, or (at your option) any later version.
**
**  This file is distributed in the hope that it will be useful, but WITHOUT
**  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
**  FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
**  License for more details.
**
**  You should have received a copy of the GNU Lesser General Public
**  License along with this library; if not, write to the Free Software
**  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
**
**  See http://www.geopsy.org for more information.
**
**  Created: 2012-12-19
**  Copyright: 2012-2019
**    Marc Wathelet (ISTerre, Grenoble, France)
**
***************************************************************************/

#include "Histogram.h"

namespace QGpCoreStat {

  /*!
    \class Histogram Histogram.h
    \brief An histogram built as an extension of Curve<Point2D>

    There is no sample outside the first and the last class.
    The last point Y is ignored and assumed to be null. It is the upper limit of
    the last class.
  */

  Histogram Histogram::scaled() const
  {
    Histogram curve(*this);
    if(_sampling & LogScale) {
      curve.xLog();
    } else if(_sampling & InversedScale) {
      curve.xInverse();
    }
    return curve;
  }

  RealStatisticalValue Histogram::real(double average) const
  {
    RealStatisticalValue v;
    if(_sampling & LogScale) {
      v.setMean(exp(average));
      v.setStddev(exp(sqrt(variance(average))));
    } else if(_sampling  & InversedScale) {
      v.setMean(1.0/average);
      v.setStddev(1.0/sqrt(variance(average)));
    } else {
      v.setMean(average);
      v.setStddev(sqrt(variance(average)));
    }
    return v;
  }

  double Histogram::normalize()
  {
    Curve<Point2D> curve=scaled();

    int n=count()-1;
    double sum=0.0;
    for(int i=0; i<n; i++) {
      const Point2D& p=curve.constAt(i);
      sum+=p.y()*(curve.x(i+1)-p.x());
    }
    if(sum>0.0) {
      double fac=1.0/sum;
      for(int i=0; i<n; i++) {
        setY(i, y(i)*fac);
      }
    }
    return sum;
  }

  Curve<Point2D> Histogram::cumulative() const
  {
    Curve<Point2D> curve=scaled();
    Curve<Point2D> cum;
    cum.append(Point2D(curve.firstX(), 0.0));

    int n=count()-1;
    double sum=0.0;
    for(int i=0; i<n; i++) {
      const Point2D& p=curve.constAt(i);
      sum+=p.y()*(curve.x(i+1)-p.x());
      cum.append(Point2D(curve.x(i+1), sum));
    }
    if(_sampling & LogScale) {
      cum.xExp();
    } else if(_sampling & InversedScale) {
      cum.xInverse();
    }
    return cum;
  }

  /*!
    The variance is computed relative to \a average that can be any of mean, median or mode. If \a average is
    the median, the median deviation is returned.
  */
  double Histogram::variance(double average) const
  {
    TRACE;
    double sumX=0.0, sumX2=0.0;
    int n=count()-1;
    int hitCount=n;
    double width=0.0;
    for(int i=0; i<n; i++) {
      const Point2D& val=constAt(i);
      double nextx=x(i+1);
      double w=nextx-val.x();
      double p=val.y()*w;
      if(p>0.0) {
        width=w;
        double v=0.5*(nextx+val.x());
        double vp=v*p;
        sumX+=vp;
        sumX2+=v*vp;
      } else {
        hitCount--;
      }
    }
    if(hitCount>1) {
      return sumX2+(average-2.0*sumX)*average;
    } else if(hitCount==1) {
      // The only one class, assumed to be 4*sigma (95% on a gaussian);
      return width*width/16;
    }
    return 0.0;
  }

  RealStatisticalValue Histogram::mean() const
  {
    TRACE;
    Histogram curve=scaled();

    double sumX=0.0, sumP=0.0;
    int n=count()-1;
    for(int i=0; i<n; i++) {
      const Point2D& val=curve.constAt(i);
      double nextx=curve.x(i+1);
      double p=val.y()*(nextx-val.x());
      double v=0.5*(nextx+val.x());
      sumP+=p;
      sumX+=v*p;
    }
    return curve.real(sumX/sumP);
  }

  RealStatisticalValue Histogram::median() const
  {
    TRACE;
    Histogram curve=scaled();

    int n=count()-1;
    double average=0.0, sumP=0.0;
    for(int i=0; i<n; i++) {
      const Point2D& val=curve.constAt(i);
      double nextx=curve.x(i+1);
      double p=val.y()*(nextx-val.x());
      sumP+=p;
      if(sumP>0.5) {
        average=val.x()+(1.0-(sumP-0.5)/p)*(nextx-val.x());
        break;
      }
    }
    return curve.real(average);
  }

  RealStatisticalValue Histogram::mode() const
  {
    Histogram curve=scaled();

    int n=count()-1;
    double average=0.0, pmax=0.0;
    for(int i=0; i<n; i++) {
      const Point2D& val=curve.constAt(i);
      double nextx=curve.x(i+1);
      double p=val.y()*(nextx-val.x());
      if(p>pmax) {
        pmax=p;
        average=0.5*(nextx+val.x());
      }
    }
    return curve.real(average);
  }

  /*!
    A non null value must available at index \a i. This function increment \a i
    and populate histogram \a h until finding a null value. The mean value is
    computed and expanded to x0:
       x0=mean*(1+\a range)   if scale is log
       x0=mean+range          if scale is linear
    \a i is again incremented until reaching x0 or having non null values. The same
    process is repeated until getting null value at x0.
  */
  void Histogram::expand(Histogram& hist, int& i, double range) const
  {
    if(_sampling & LogScale) {
      range+=1.0;
    }
    double x0;
    int n=count();
    if(i==n) {
      return;
    }
    // Get a first set from which a first mean can be obtained
    while(y(i)>0.0) {
      hist.append(constAt(i++));
      if(i==n) {
        return;
      }
    }
    hist.append(constAt(i++));
    x0=hist.mean().mean();
    if(_sampling & LogScale) {
      x0*=range;
    } else {
      x0+=range;
    }
    while(true) {
      if(i==n) {
        return;
      }
      while(y(i)>0.0 || x(i)<x0) {
        hist.append(constAt(i++));
        if(i==n) {
          return;
        }
      }
      hist.append(constAt(i++));
      x0=hist.mean().mean();
      if(_sampling & LogScale) {
        x0*=range;
      } else {
        x0+=range;
      }
      if(x(i-1)>=x0) {
        return;
      }
    }
  }

  /*!
    Automatically pick all maxima

    The largest \a maxCount maxima are picked.

    \a range is the relative or absolute tolerance for null values around the mean.

    Picked value is accepted if its error is less thant \a maxError.
  */
  ::QVector<RealStatisticalValue> Histogram::pick(int maxCount, double range, double maxError) const
  {
    ASSERT(lastY()==0.0);
    ::QVector<RealStatisticalValue> values;
    RealStatisticalValue p;
    int i=0;
    int n=count();
    while(i<n) {
      while(i<n && y(i)==0.0) {
        i++;
      }
      Histogram h(_sampling);
      expand(h, i, range);
      if(!h.isEmpty()) {
        double w=h.normalize();
        RealStatisticalValue v=h.mean();
        if(_sampling & LogScale) {
          if(v.stddev()<1.0+maxError) {
            v.setWeight(w);
            values.append(v);
          }
        } else {
          if(v.stddev()<maxError) {
            v.setWeight(w);
            values.append(v);
          }
        }
      }
    }
    std::sort(values.begin(), values.end(), RealStatisticalValue::compareWeights);
    while(maxCount<values.count()) {
      values.removeFirst();
    }
    return values;
  }

} // namespace QGpCoreStat
