#!/usr/bin/ruby
require 'yaml'
require './stats.rb'

if ARGV.empty?
	puts "Usage: ./lpf_predictor_events.rb test.csv [events [bins]]"
	exit
end

data_file = ARGV.shift
events = (ARGV.shift || 40).to_i
bins = (ARGV.shift || 20).to_i

#def in_trainset rown
#	rown % 5 == 0
#end

h = File.open(data_file, 'r')
column_names = h.readline.strip.split(',')
#h.close
lpf_column_ids = %w{lpf_x lpf_y lpf_z}.map { |i| column_names.index i }
evt_column_id = column_names.index 'event3'

s = stats(data_file, lpf_column_ids)

min = s['min']
max = s['max']
lens = [*0..2].map { |i| max[i] - min[i] }
freq = [*1..3].map {
	[*1..bins].map { [0, 0] }
}
event_freq = [0, 0]
bin_lens = lens.map { |l| 1.001 * l / bins } # a small fix for maximum values

row_counter = 0
event_list = []
last_event = nil

while not h.eof?
	row = h.readline
	row_counter += 1
#	if in_trainset(row_counter)
	data = row.strip.split(',')
	event_on = data[evt_column_id].to_f > 0.5 #works for both value types (0/1 and -100/100)
	
	if (not last_event.nil?) && event_on != last_event
		puts "Picking rows #{row_counter - event_list.length} - #{row_counter} (events left: #{events -= 1}) for training"
		lpf_column_ids.each_with_index { |col_i, i|
#			bin = ((data[col_i].to_f - min[i]) / bin_lens[i]).to_i
			# take average of event sequence
			avg = (event_list.inject(0) { |s, e| s + e[i] }) / event_list.length
			bin = ((avg - min[i]) / bin_lens[i]).to_i
			freq[i][bin][(last_event)?0:1] += 1
			event_freq[(last_event)?0:1] += 1
		}
		break if events == 0
		event_list = []
	end

	last_event = event_on
	event_list << lpf_column_ids.map { |i| data[i].to_f }
end

puts "Note: #{events} missing" if events > 0

raise "Training data has not enough representative data" if event_freq.any? { |i| i == 0 }

sum_e = event_freq.inject(0,:+)
event_prob = event_freq.map { |i| i.to_f / sum_e }

prob = freq.map { |f| # each lpf
	f.map { |b| # each bin
		sum = b.inject(0,:+)
		(sum == 0) ? event_prob : [
			b[0].to_f / sum,
			b[1].to_f / sum
		]
	}
}

h = File.open(data_file, 'r')

results = [0] * 4
correct_runs = []
incorrect_runs = []
run_counter = 0
last = true
row_counter = 1
votes = [0] * 3

h.readline
data = h.readline.split(',').map {|i| i.to_f}

puts "Note: lpf column indices are #{lpf_column_ids.inspect}"

while (not h.eof?)
#	if not in_trainset(row_counter)
		summed_vote = 0
		summed_decision = 0
		event_on = data[evt_column_id] > 0.5
	
		lpf_column_ids.each_with_index { |col_i, i|
			bin = ((data[col_i] - min[i]) / bin_lens[i]).to_i
			if bin < 0
				puts "Bin below scale, setting to 0."
				bin = 0
			elsif bins <= bin
				puts "Bin above scale, setting to #{bins - 1}."
				bin = bins - 1
			end
			votes[i] = prob[i][bin].first # weight on decision to predict an event
			summed_vote += votes[i]
			summed_decision += (prob[i][bin].first)?1:-1
		}
	
		predicted_status = summed_vote > 1.5
	
		marker = (b = predicted_status == event_on) ? 'x' : ' '
		puts "[#{marker}]\t#{event_on}\t#{predicted_status}\t#{summed_vote}\t#{votes.inspect}"
	
		if b != last
			if last
				correct_runs << run_counter
			else
				incorrect_runs << run_counter
			end
			run_counter = 0
		end
		run_counter += 1
		results[ ((event_on) ? 0 : 1) + ((predicted_status) ? 0 : 2) ] += 1
		last = b
#	end

	data = h.readline.split(',').map {|i| i.to_f}
	row_counter += 1
end

if last
	correct_runs << run_counter
else
	incorrect_runs << run_counter
end

puts ''
puts "\tp_on\tp_off"
puts "e_on\t#{results[0]}\t#{results[2]}\t#{results[0] + results[2]}"
puts "e_off\t#{results[1]}\t#{results[3]}\t#{results[1] + results[3]}"
puts "\t#{results[0] + results[1]}\t#{results[2] + results[3]}\t#{sum = results.inject(0,:+)}"
puts ''
puts "Sum of correct answers: #{results[0] + results[3]}"
puts "Percentage of correct answers: #{((results[0] + results[3]).to_f * 100 / sum).round(2)}"
puts ''
puts "Correct runs had average length of #{correct_runs.inject(0,:+) / correct_runs.length.to_f}."
puts "Incorrect runs had average length of #{incorrect_runs.inject(0,:+) / incorrect_runs.length.to_f}."

File.open("temp.yaml", "w") do |file|
	file.puts YAML::dump( {
		'bins' => bins,
		'freq' => freq,
		'prob' => prob,
		'min' => min,
		'max' => max,
		'len' => lens,
		'bin_lens' => bin_lens,
		'correct' => correct_runs,
		'incorrect' => incorrect_runs
	} )
end

puts "Runs written to temp.yaml for further inspection."