#!/usr/bin/ruby
require 'yaml'
require './stats.rb'

# Pick all columns for inspection.
h = File.open(ARGV.first, 'r')
columns = [*0..(h.readline.count(','))]
h.close

# Count statistics (especially average) for all colums.
puts "Counting statistics"
s = stats(ARGV.first, columns)

puts "Starting to count covariances"
cov = columns.map { [0.0] * columns.length }

rowcount = 0
IO.foreach(ARGV.first) { |row|
	rowcount += 1
	data = row.split(',').map { |i| i.to_f }
	data.length.times { |i|
		for j in i...columns.length
			cov[i][j] += (data[i] - s['avg'][i]) * (data[j] - s['avg'][j])
		end
	}
	puts "Row: #{rowcount}" if rowcount % 100 == 0
}

raise "Not enough rows (only #{rowcount})!" if rowcount < 2

puts "Finishing up"
for i in 0...columns.length
	for j in i...columns.length
		cov[i][j] /= (rowcount.to_f - 1.0)
		cov[j][i] = cov[i][j]
	end
end

File.open("#{ARGV.first}.stats.yaml", 'w') { |f|
	f.write YAML.dump(s)
}

File.open("#{ARGV.first}.cov.yaml", 'w') { |f|                                                                               
	f.write YAML.dump(cov)
}


puts "Mean:"
puts s['avg'].inspect

puts "Covariance matrix:"
puts cov.inspect
