#!/usr/bin/ruby
require 'yaml'
require 'matrix'
require './stats.rb'

if ARGV.empty?
	puts "Usage: ./cov.rb file.csv"
	exit
end

# A small hack in order to get mutable matrix.
class Matrix
	def []=(i, j, x)
		@rows[i][j] = x
	end
end

# Pick all columns for inspection.
h = File.open(ARGV.first, 'r')
column_names = h.readline.split(',')
columns = [*0...(column_names.length)]
h.close

# Count statistics (especially average) for all colums.
puts "Counting statistics"
s = stats(ARGV.first, columns)

puts "Starting to count covariances"
#cov = columns.map { [0.0] * columns.length }
cov = Matrix.zero(columns.length)

rowcount = 0
IO.foreach(ARGV.first) { |row|
	rowcount += 1
	data = row.split(',').map { |i| i.to_f }
	data.length.times { |i|
		for j in i...columns.length
			cov[i,j] = cov[i,j] + (data[i] - s['avg'][i]) * (data[j] - s['avg'][j])
		end
	}
	puts "Row: #{rowcount}" if rowcount % 100 == 0
}

raise "Not enough rows (only #{rowcount})!" if rowcount < 2

puts "Finishing up covariance matrix"
for i in 0...columns.length
	for j in i...columns.length
		cov[j,i] = cov[i,j] = cov[i,j] / (rowcount.to_f - 1.0)
	end
end

drop = columns.map { |i| cov[i,i] == 0 }
dropped_columns = (column_names.each_with_index.select { |n,i| drop[i] }).map { |n,i| n }

puts "Dropping columns #{dropped_columns.join ', '}" unless dropped_columns.empty?
puts "Building correlation matrix"

cor = Matrix.I(columns.length - dropped_columns.length)
cov_reduced = Matrix.I(columns.length - dropped_columns.length)
cor_i = 0
for cov_i in 0...columns.length
	if not drop[cov_i]
		cor_j = 0
		for cov_j in (cov_i + 1)...columns.length
			if not drop[cov_j]
				cor[cor_j,cor_i] = cor[cor_i,cor_j] = cov[cov_i,cov_j] / Math.sqrt(cov[cov_i,cov_i] * cov[cov_j,cov_j])
				cov_reduced[cor_j,cor_i] = cov_reduced[cor_i,cor_j] = cov[cov_i,cov_j]
				cor_j += 1
			end
		end
		cor_i += 1
	end
end

file_name = (ARGV.first.split('.')[0...-1]).join '.'
file_name += '_objects.yaml'
File.open(file_name, 'w') { |f|
	f.write YAML.dump({
		'statistics' => s,
		'cov_matrix' => cov,
		'cov_matrix_r' => cov_reduced,
		'cor_matrix' => cor,
		'columns' => column_names,
		'dropped_columns' => dropped_columns
	})
}

puts "Objects written to #{file_name}"
