/***************************************************************************
**
**  This file is part of QGpCoreStat.
**
**  This library is free software; you can redistribute it and/or
**  modify it under the terms of the GNU Lesser General Public
**  License as published by the Free Software Foundation; either
**  version 2.1 of the License, or (at your option) any later version.
**
**  This file is distributed in the hope that it will be useful, but WITHOUT
**  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
**  FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
**  License for more details.
**
**  You should have received a copy of the GNU Lesser General Public
**  License along with this library; if not, write to the Free Software
**  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
**
**  See http://www.geopsy.org for more information.
**
**  Created: 2012-12-19
**  Copyright: 2012-2019
**    Marc Wathelet (ISTerre, Grenoble, France)
**
***************************************************************************/

#include "Histogram.h"

namespace QGpCoreStat {

  /*!
    \class Histogram Histogram.h
    \brief An histogram built as an extension of Curve<Point2D>

    There is no sample outside the first and the last class.
    The last point Y is ignored and assumed to be null. It is the upper limit of
    the last class.
  */

  Histogram Histogram::scaled() const
  {
    Histogram curve(*this);
    if(_sampling & LogScale) {
      curve.xLog();
    } else if(_sampling & InverseScale) {
      curve.xInverse();
    }
    return curve;
  }

  /*!
    Set values as periodic. The period is equal to max-min.
  */
  void Histogram::setPeriodic(bool p)
  {
    _periodic=p;
    if(_periodic) {
      _sampling=LinearScale;
    }
  }

  double Histogram::period() const
  {
    // The first and the last class of a periodic value are equivalent.
    // It is an artificial split of the same class.
    // The period is not exactly the difference between the max and the min.
    return constAt(count()-1).x()-constAt(1).x();
  }

  RealStatisticalValue Histogram::real(double average, double stddev, double weight) const
  {
    RealStatisticalValue v;
    if(_sampling & LogScale) {
      v.setMean(exp(average));
      v.setStddev(exp(stddev));
    } else if(_sampling  & InverseScale) {
      v.setMean(1.0/average);
      v.setStddev(1.0/stddev);
    } else {
      v.setMean(average);
      v.setStddev(stddev);
    }
    v.setWeight(weight);
    return v;
  }

  double Histogram::normalize()
  {
    Curve<Point2D> curve=scaled();
    int n=count()-1;
    double sum=0.0;
    for(int i=0; i<n; i++) {
      const Point2D& p=curve.constAt(i);
      sum+=p.y()*(curve.x(i+1)-p.x());
    }
    if(sum>0.0) {
      double fac=1.0/sum;
      for(int i=0; i<n; i++) {
        setY(i, y(i)*fac);
      }
    }
    return sum;
  }

  Curve<Point2D> Histogram::cumulative() const
  {
    Curve<Point2D> curve=scaled();
    Curve<Point2D> cum;
    if(curve.isEmpty()) {
      return cum;
    }
    if(_periodic) {
      double period=Histogram::period();
      int index=curve.indexAfter(mean().mean()+0.5*period);
      double sum=0.0;
      int n=count()-1;
      curve.firstValid(index);
      cum.append(Point2D(curve.constAt(index).x(), 0.0));
      for(int i=index; i<n; i++) {
        const Point2D& p=curve.constAt(i);
        sum+=p.y()*(curve.x(i+1)-p.x());
        cum.append(Point2D(curve.x(i+1), sum));
      }
      for(int i=0; i<index; i++) {
        const Point2D& p=curve.constAt(i);
        sum+=p.y()*(curve.x(i+1)-p.x());
        cum.append(Point2D(curve.x(i+1), sum));
      }
    } else {
      int index=0;
      curve.firstValid(index);
      cum.append(Point2D(curve.constAt(index).x(), 0.0));

      int n=count()-1;
      double sum=0.0;
      for(int i=0; i<n; i++) {
        const Point2D& p=curve.constAt(i);
        sum+=p.y()*(curve.x(i+1)-p.x());
        cum.append(Point2D(curve.x(i+1), sum));
      }
    }
    if(_sampling & LogScale) {
      cum.xExp();
    } else if(_sampling & InverseScale) {
      cum.xInverse();
    }
    cum.sort();
    return cum;
  }

  void Histogram::addClasses(Statistics& stat) const
  {
    int n=count()-1;
    for(int i=0; i<n; i++) {
      const Point2D& val=constAt(i);
      double nextx=x(i+1);
      double w=nextx-val.x();
      double p=val.y()*w;
      if(p>0.0) {
        stat.add(0.5*(nextx+val.x()), p);
      }
    }
  }

  void Histogram::meanAddClasses(PeriodicStatistics& stat) const
  {
    // First ignore first and last classed
    int n=count()-2;
    for(int i=1; i<n; i++) {
      const Point2D& val=constAt(i);
      double nextx=x(i+1);
      double w=nextx-val.x();
      double p=val.y()*w;
      if(p>0.0) {
        stat.meanAdd(0.5*(nextx+val.x()), p);
      }
    }
    // Unified first and last class
    const Point2D& first=constAt(0);
    const Point2D& last=constAt(n);
    double w=x(1)-first.x();
    double p=(first.y()+last.y())*w;
    if(p>0.0) {
      stat.meanAdd(0.5*(x(1)+first.x()), p);
    }
  }

  void Histogram::varianceAddClasses(PeriodicStatistics& stat) const
  {
    stat.varianceReset();
    // First ignore first and last classed
    int n=count()-2;
    for(int i=1; i<n; i++) {
      const Point2D& val=constAt(i);
      double nextx=x(i+1);
      double w=nextx-val.x();
      double p=val.y()*w;
      if(p>0.0) {
        stat.varianceAdd(0.5*(nextx+val.x()), p);
      }
    }
    // Unified first and last class
    const Point2D& first=constAt(0);
    const Point2D& last=constAt(n);
    double w=x(1)-first.x();
    double p=(first.y()+last.y())*w;
    if(p>0.0) {
      stat.varianceAdd(0.5*(x(1)+first.x()), p);
    }
  }

  /*!
    The variance is computed relative to \a average that can be any of mean, median or mode. If \a average is
    the median, the median deviation is returned.
  */
  double Histogram::variance(double average, double& weight) const
  {
    TRACE;
    if(_periodic) {
      PeriodicStatistics stat(period());
      stat.setAverage(average);
      varianceAddClasses(stat);
      weight=stat.weight();
      return stat.variance();
    } else {
      Statistics stat;
      addClasses(stat);
      weight=stat.weight();
      return stat.variance(average);
    }
  }

  double Histogram::stddev(double average, double& weight) const
  {
    TRACE;
    double s=variance(average, weight);
    if(s<0.0) s=0.0;
    return sqrt(s);
  }

  RealStatisticalValue Histogram::mean() const
  {
    TRACE;
    if(_periodic) {
      PeriodicStatistics stat(period());
      meanAddClasses(stat);
      varianceAddClasses(stat);
      return real(stat.mean(), stat.stddev(), stat.weight());
    } else {
      Histogram scaledHist=scaled();
      Statistics stat;
      scaledHist.addClasses(stat);
      return real(stat.mean(), stat.stddev(), stat.weight());
    }
  }

  RealStatisticalValue Histogram::median() const
  {
    TRACE;
    int n=count()-1;
    double sum=0.0;
    if(_periodic) {
      // Unification of first and last classes not required here
      // because no square of p is taken.
      double period=Histogram::period();
      int index=indexAfter(mean().mean()+0.5*period);
      firstValid(index);
      for(int i=index; i<n; i++) {
        const Point2D& val=constAt(i);
        double nextx=x(i+1);
        double p=val.y()*(nextx-val.x());
        sum+=p;
        if(sum>0.5) {
          double average=val.x()+(1.0-(sum-0.5)/p)*(nextx-val.x());
          double w, s=stddev(average, w);
          return real(average, s, w);
        }
      }
      for(int i=0; i<index; i++) {
        const Point2D& val=constAt(i);
        double nextx=x(i+1);
        double p=val.y()*(nextx-val.x());
        sum+=p;
        if(sum>0.5) {
          double average=val.x()+(1.0-(sum-0.5)/p)*(nextx-val.x());
          double w, s=stddev(average, w);
          return real(average, s, w);
        }
      }
    } else {
      Histogram scaledHist=scaled();
      for(int i=0; i<n; i++) {
        const Point2D& val=scaledHist.constAt(i);
        double nextx=scaledHist.x(i+1);
        double p=val.y()*(nextx-val.x());
        sum+=p;
        if(sum>0.5) {
          double average=val.x()+(1.0-(sum-0.5)/p)*(nextx-val.x());
          double w, s=scaledHist.stddev(average, w);
          return real(average, s, w);
        }
      }
    }
    return std::numeric_limits<double>::quiet_NaN();
  }

  RealStatisticalValue Histogram::mode() const
  {
    int n=count()-1;
    double average=0.0, pmax=0.0;
    if(_periodic) {
      n--;
      // Unification of the first and last classes
      const Point2D& val=constAt(0);
      double nextx=x(1);
      pmax=(val.y()+constAt(n).y())*(nextx-val.x());
      average=0.5*(nextx+val.x());
      for(int i=1; i<n; i++) {
        const Point2D& val=constAt(i);
        nextx=x(i+1);
        double p=val.y()*(nextx-val.x());
        if(p>pmax) {
          pmax=p;
          average=0.5*(nextx+val.x());
        }
      }
      double w, s=stddev(average, w);
      return real(average, s, w);
    } else {
      Histogram curve=scaled();
      for(int i=0; i<n; i++) {
        const Point2D& val=curve.constAt(i);
        double nextx=curve.x(i+1);
        double p=val.y()*(nextx-val.x());
        if(p>pmax) {
          pmax=p;
          average=0.5*(nextx+val.x());
        }
      }
      double w, s=curve.stddev(average, w);
      return curve.real(average, s, w);
    }
  }

  /*!
    A non null value must available at index \a i. This function increment \a i
    and populate histogram \a h until finding a null value. The mean value is
    computed and expanded to x0:
       x0=mean*(1+\a range)   if scale is log
       x0=mean+range          if scale is linear
    \a i is again incremented until reaching x0 or having non null values. The same
    process is repeated until getting null value at x0.
  */
  void Histogram::expand(Histogram& hist, int& i, double range) const
  {
    if(_sampling & LogScale) {
      range+=1.0;
    }
    double x0;
    int n=count();
    if(i==n) {
      return;
    }
    // Get a first set from which a first mean can be obtained
    while(y(i)>0.0) {
      hist.append(constAt(i++));
      if(i==n) {
        return;
      }
    }
    hist.append(constAt(i++));
    x0=hist.mean().mean();
    if(_sampling & LogScale) {
      x0*=range;
    } else {
      x0+=range;
    }
    while(true) {
      if(i==n) {
        return;
      }
      while(y(i)>0.0 || x(i)<x0) {
        hist.append(constAt(i++));
        if(i==n) {
          return;
        }
      }
      hist.append(constAt(i++));
      x0=hist.mean().mean();
      if(_sampling & LogScale) {
        x0*=range;
      } else {
        x0+=range;
      }
      if(x(i-1)>=x0) {
        return;
      }
    }
  }

  /*!
    Automatically pick all maxima

    The largest \a maxCount maxima are picked.

    \a range is the relative or absolute tolerance for null values around the mean.

    Picked value is accepted if its error is less thant \a maxError.
  */
  ::VectorList<RealStatisticalValue> Histogram::pick(int maxCount, double range, double maxError) const
  {
    int index=count()-1;
    lastValid(index);
    ASSERT(constAt(index).y()==0.0);
    ::VectorList<RealStatisticalValue> values;
    RealStatisticalValue p;
    int i=0;
    int n=count();
    while(i<n) {
      while(i<n && y(i)==0.0) {
        i++;
      }
      Histogram h(_sampling, _periodic);
      expand(h, i, range);
      if(!h.isEmpty()) {
        double w=h.normalize();
        RealStatisticalValue v=h.mean();
        if(_sampling & LogScale) {
          if(v.stddev()<1.0+maxError) {
            v.setWeight(w);
            values.append(v);
          }
        } else {
          if(v.stddev()<maxError) {
            v.setWeight(w);
            values.append(v);
          }
        }
      }
    }
    std::sort(values.begin(), values.end(), RealStatisticalValue::compareWeights);
    while(maxCount<values.count()) {
      values.removeFirst();
    }
    return values;
  }

} // namespace QGpCoreStat
