Newer
Older
Karsten Rink
committed
* \brief Implementation of Histogram class.
*
* Copyright (c) 2012-2015, OpenGeoSys Community (http://www.opengeosys.org)
* Distributed under a Modified BSD License.
* See accompanying file LICENSE.txt or
* http://www.opengeosys.org/project/license
Karsten Rink
committed
*/
#ifndef BASELIB_HISTOGRAM_H
#define BASELIB_HISTOGRAM_H
#include <algorithm>
#include <cmath>
#include <iterator>
#include <iostream>
#include <fstream>
Karsten Rink
committed
#include <vector>
Karsten Rink
committed
{
/** Basic Histogram implementation.
*
* Creates histogram from input data of type \c T.
*/
template <typename T>
class Histogram
{
typedef typename std::vector<T> Data; /// Underlying input data vector type.
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
public:
/** Creates histogram of the given element in the range \c [first, last).
*
* Input data is copied into \c std::vector.
*
* \param data Range of elements to create histogram from.
* \param nr_bins Number of bins in histogram.
* \param computeHistogram Compute histogram if set. If not set user must
* call \c update() before accessing data.
*/
template <typename InputIterator>
Histogram(InputIterator first, InputIterator last, const int nr_bins = 16,
const bool computeHistogram = true )
: _data(first, last), _nr_bins(nr_bins)
{
init(computeHistogram);
}
/** Creates histogram from \c std::vector.
* \param data Input vector.
* \param nr_bins Number of bins in histogram.
* \param computeHistogram Compute histogram if set. If not set user must call
* \c update() before accessing data.
*/
Histogram(std::vector<T> const& data, const unsigned int nr_bins = 16,
const bool computeHistogram = true)
: _data(data), _nr_bins(nr_bins)
{
init(computeHistogram);
}
/** Updates histogram using sorted \c _data vector.
*
* Start histogram creation with first element. Then find first element in
* the next histogram bin. Number of elments in the bin is the difference
* between these two iterators.
* \verbatim
[0.1, 0.2, ..., 0.7 , ..., 0.7+binWidth = 0.9, 1.0 , ..., last]
it itEnd - 1 itEnd
\endverbatim
*/
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
{
if (!_dirty)
return;
_bin_width = (_max - _min) / _nr_bins;
typedef typename Data::const_iterator DataCI;
DataCI it = _data.begin();
DataCI itEnd;
for (unsigned int bin = 0; bin < _nr_bins; bin++)
{
itEnd = std::upper_bound(it, (DataCI)_data.end(),
_min + (bin + 1) * _bin_width);
_histogram[bin] = std::distance(it, itEnd);
it = itEnd;
}
_dirty = false;
}
void setMinimum(const T& minimum) { _min = minimum; _dirty = true; }
void setMaximum(const T& maximum) { _max = maximum; _dirty = true; }
const Data& getSortedData() const { return _data; }
const std::vector<std::size_t>& getBinCounts() const { return _histogram; }
const unsigned int& getNrBins() const { return _nr_bins; }
const T& getMinimum() const { return _min; }
const T& getMaximum() const { return _max; }
const T& getBinWidth() const { return _bin_width; }
void
prettyPrint(std::ostream& os, const unsigned int line_width = 16) const
{
const std::size_t count_max =
*std::max_element(_histogram.begin(), _histogram.end());
for (unsigned int bin = 0; bin < _nr_bins; ++bin)
{
os << "[" << _min + bin * _bin_width << ", " << _min +
(bin + 1) * _bin_width << ")\t";
os << _histogram[bin] << "\t";
const int n_stars =
std::ceil(line_width * ((double)_histogram[bin] / count_max));
for (int star = 0; star < n_stars; star++)
os << "*";
os << "\n";
}
}
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
int write(std::string const& file_name, std::string const& data_set_name, std::string const& param_name) const
{
if (file_name.empty())
{
ERR ("No file name specified.");
return 1;
}
std::ofstream out (file_name);
if (!out)
{
ERR("Error writing histogram: Could not open file.");
return 1;
}
out << "# Histogram for parameter " << param_name << " of data set " << data_set_name << "\n";
std::size_t const n_bins = this->getNrBins();
std::vector<size_t> const& bin_cnts(this->getBinCounts());
double const min (this->getMinimum());
double const bin_width (this->getBinWidth());
for (size_t k(0); k < n_bins; k++)
out << min+k*bin_width << " " << bin_cnts[k] << "\n";
out.close ();
return 0;
}
protected:
/** Initialize class members after constructor call.
*/
void init(const bool computeHistogram = true)
{
std::sort(_data.begin(), _data.end());
_histogram.resize(_nr_bins);
_min = _data.front();
_max = _data.back();
_bin_width = (_max - _min) / _nr_bins;
_dirty = true;
if (computeHistogram)
update();
}
Data _data;
const unsigned int _nr_bins;
std::vector<std::size_t> _histogram;
T _min, _max; ///< Minimum and maximum input data values.
T _bin_width;
private:
bool _dirty; ///< When set \c update() will recompute histogram.
Karsten Rink
committed
};
/** Writes histogram to output stream.
*
* Writes histogram properties in this order:
* number of bins, minimum, maximum, bin0 count, ..., binN-1 count.
*/
template <typename T>
std::ostream&
operator<<(std::ostream& os, const Histogram<T>& h)
{
os << h.getNrBins() << " "
<< h.getMinimum() << " "
<< h.getMaximum() << " ";
std::copy(h.getBinCounts().begin(), h.getBinCounts().end(),
std::ostream_iterator<T>(os, " "));
return os << std::endl;
Karsten Rink
committed
}
Karsten Rink
committed
#endif // BASELIB_HISTOGRAM_H