Histogram.cpp

#include "Histogram.h"
#include "testing.h"
#include "matlib.h"
#include "LineChart.h"

using namespace std;




/**
*    This function computes the data needed to plot a histogram
*    we have two vectors - one containing numBuckets+1 values which
*    indicate the x coordinates of each "fence post" in our histogram
*    and another containing numBuckets values which contains the heights
*    of each fence post.
*
*   All vectors are col vectors
*/
static void histogramData(const Matrix& values,
	int numBuckets,
	Matrix& fencePosts,
	Matrix& heights) {

	fencePosts = Matrix(numBuckets + 1, 1);
	heights = Matrix(numBuckets, 1);

	ASSERT(values.nRows()>0);
	Matrix sortedValues = sortCols(values);

	double start = minOverCols(values).asScalar();
	double end = maxOverCols(values).asScalar();

	double width = (end - start) / numBuckets;
	for (int i = 0; i<numBuckets + 1; i++) {
		fencePosts(i, 0) = start + i*width;
	}
	fencePosts(numBuckets) = end;
	int currentFence = 0;
	int numPoints = sortedValues.nRows();
	double fenceEnd = fencePosts(currentFence + 1, 0);
	for (int i = 0; i<numPoints; i++) {
		double value = sortedValues(i, 0);
		while (value>fenceEnd) {
			currentFence++;
			fenceEnd = fencePosts(currentFence + 1, 0);
		}
		heights(currentFence, 0)++;
	}

}

/**
*  Having computed the fence posts and fence heights for our histogram
*  we can convert this to a series of x and y points to plot. Google charts
*  works fine with repeated x points, so this will give a nice looking result.
*/
static void fencePostsToPlotPoints(const Matrix& fencePosts,
	const Matrix& heights,
	Matrix& xPoints,
	Matrix& yPoints) {
	int n = heights.nRows();
	xPoints = Matrix(4 * n, 1);
	yPoints = Matrix(4 * n, 1);
	int count = 0;
	for (int i = 0; i<n; i++) {
		double x1 = fencePosts(i, 0);
		double x2 = fencePosts(i + 1, 0);
		double y = heights(i, 0);
		xPoints(count, 0) = x1;
		yPoints(count, 0) = 0.0;
		count++;
		xPoints(count, 0) = x1;
		yPoints(count, 0) = y;
		count++;
		xPoints(count, 0) = x2;
		yPoints(count, 0) = y;
		count++;
		xPoints(count, 0) = x2;
		yPoints(count, 0) = 0;
		count++;
	}
	ASSERT(count == 4 * n);
}


//////////////////////////////////
// Histogram class
/////////////////////////////////

Histogram::Histogram() :
numBuckets(10),
title("A Histogram") {
}

void Histogram::setTitle(const string& t) {
	title = t;
}

void Histogram::setData(const vector<double>& v) {
	data = Matrix(v);
}

void Histogram::setNumBuckets(int n) {
	numBuckets = n;
}


void Histogram::writeAsHTML(ostream& out) const {
	Matrix fencePosts, heights, x, y;
	histogramData(data, numBuckets, fencePosts, heights);
	fencePostsToPlotPoints(fencePosts, heights, x, y);
	LineChart lc;
	lc.setTitle(title);
	lc.setSeries(x.colVector(), y.colVector());
	lc.writeAsHTML(out);
}

void Histogram::writeAsHTML(const string& file) const {
	ofstream out;
	out.open(file.c_str());
	writeAsHTML(out);
	out.close();
}




//////////////////////////////////////////////////////
//
//   TESTS
//
//////////////////////////////////////////////////////



void testHistogramData() {
	// a good test that is completely automated
	Matrix data("1;2;3;4;5");

	Matrix fencePosts;
	Matrix heights;
	histogramData(data,
		3,
		fencePosts,
		heights);

	ASSERT(fencePosts.nRows() == 4);
	ASSERT(heights.nRows() == 3);
	ASSERT_APPROX_EQUAL(fencePosts(0), 1.0, 0.001);
	ASSERT_APPROX_EQUAL(fencePosts(3), 5.0, 0.001);
	ASSERT_APPROX_EQUAL(sumCols(Matrix(heights)).asScalar(), 5.0, 0.001);
}

void testFencePostsToPlotPoints() {
	// a good test that is completely automated
	Matrix fencePosts("1;2;3");
	Matrix heights("1.0;-1.0");

	Matrix x, y;
	fencePostsToPlotPoints(fencePosts, heights, x, y);

	ASSERT_APPROX_EQUAL(x(0), 1.0, 0.001);
	ASSERT_APPROX_EQUAL(x(1), 1.0, 0.001);
	ASSERT_APPROX_EQUAL(x(2), 2.0, 0.001);
	ASSERT_APPROX_EQUAL(x(3), 2.0, 0.001);
	ASSERT_APPROX_EQUAL(x(4), 2.0, 0.001);

	ASSERT_APPROX_EQUAL(y(0), 0.0, 0.001);
	ASSERT_APPROX_EQUAL(y(1), 1.0, 0.001);
	ASSERT_APPROX_EQUAL(y(2), 1.0, 0.001);
	ASSERT_APPROX_EQUAL(y(3), 0.0, 0.001);
	ASSERT_APPROX_EQUAL(y(4), 0.0, 0.001);
	ASSERT_APPROX_EQUAL(y(5), -1.0, 0.001);

}

static void testAll() {
	// a somewhat unsatisfactory test since it writes a file, but pretty much
	// everything it uses has been tested individually.
	Histogram h;
	h.setTitle("Uniform distribution");
	h.setData(randuniform(1000, 1).colVector());
	h.writeAsHTML("uniformDistributionHistogram.html");
}

void testHistogram() {
	TEST(testHistogramData);
	TEST(testFencePostsToPlotPoints);
	TEST(testAll);
}