#!/usr/bin/ruby

require './runs.rb'
require './pass.rb'
require './diff.rb'
require './ar_filter.rb'
require './tempfile.rb'
require './stats.rb'
require './event.rb'

=begin
Methods for converting a file to another:
- local statistics
	- runavg in_file, out_file, columns, win_len = 3
	- runsum in_file, out_file, columns, win_len = 3
	- runmin in_file, out_file, columns, win_len = 3
	- runmax in_file, out_file, columns, win_len = 3
	- runmed in_file, out_file, columns, win_len = 3
- other
	- lpf(in_file, out_file, columns, dt, rc)		# low pass filter
	- filter(in_file, out_file) {block}			# for simple row by row calculations (block should return an array of numbers)
	- boolean_filter(in_file, out_file) {block}	# same as above but for a block returning array of boolean values (transcoded to 1/0)
	- replicate(in_file, out_file, data)			# for replicating constant data
	- diff(in_file, out_file, columns)			# difference between current and last row (for counting jerk)
	- event_combiner datafile, eventfile, out_file, time_column = 0, event_file_structure = { .. } # appends data with ongoing events listed in event file

Methods for counting (global) statistics:
- stats(file, columns, median = false)

Other useful methods:
- generate_temporary_filename

TODO:
- fix appearing bugs
- diff within larger window

Counting some common measures
- mse (actually local mse is a running average of squared distances between values and their local averages: runavg(filter(([value]-[runavg(value)])²)))
- mae (same but replace ² by abs())
- variance (actually the same as mse if not biased)
=end

class Array
	def indices *p
		p.map { |i| index(i) }
	end
end

in_file = 'implant_axis.csv'
out_file = 'out.csv' #@todo: use this
intermediate_files = [in_file]
columns = %w{time x y z} # for keeping track of column names
phase = 0 # modify to skip first phases, if you still have intermediate temporary file left as a input file
#phase_counter = 1
leaving = false

begin
	phase += 1
#	file = generate_temporary_filename "#{phase}"
#	file = "tempfile_#{phase}"
	file = "tempfile_#{1 + ((phase + 1) % 2)}"
	time = Time.now
	puts "[#{time}] Writing phase #{phase} to #{file}"
	phase_counter = 0
	last_file = intermediate_files.last
	
	case phase # Each phase (preferably) consists of creating an intermediate file and writing column names down for further processing.
		##################################
		# Remove low pass filtered trend #
		##################################
		when phase_counter += 1
			lpf(last_file, file, columns.indices(*%w{x y z}), 1.0, 500.0)
			columns += %w{lpf_x lpf_y lpf_z}
			
		when phase_counter += 1
			filter(last_file, file) { |data| [
				data[columns.index('x')] - data[columns.index('lpf_x')],
				data[columns.index('y')] - data[columns.index('lpf_y')],
				data[columns.index('z')] - data[columns.index('lpf_z')]
			] }
			columns += %w{level_x level_y level_z}
		
		######################################
		# Mark sequences where "jumps" occur #
		# (an indicator for places where the #
		# values might not be exactly        #
		# correct)                           #
		######################################
		when phase_counter += 1
			diff(last_file, file, columns.indices(*%w{lpf_x lpf_y lpf_z}))
			columns += %w{d_lpf_x d_lpf_y d_lpf_z}
			
		when phase_counter += 1
			runmed(last_file, file, columns.indices(*%w{d_lpf_x d_lpf_y d_lpf_z}), 21)
			columns += %w{med_d_lpf_x med_d_lpf_y med_d_lpf_z}
		
		when phase_counter += 1
			boolean_filter(last_file, file) { |data| [
				%w{med_d_lpf_x med_d_lpf_y med_d_lpf_z}.map { |k| data[columns.index(k)] > 10 } #@todo: test decreased limit
			] }
			columns += %w{jump_x jump_y jump_z}
		
		#######################
		# Combined components #
		#######################
		when phase_counter += 1
			filter(last_file, file) { |data| [
				Math.sqrt(data[columns.index('level_x')] ** 2 + data[columns.index('level_y')] ** 2),
				Math.sqrt(data[columns.index('level_x')] ** 2 + data[columns.index('level_z')] ** 2),
				Math.sqrt(data[columns.index('level_y')] ** 2 + data[columns.index('level_z')] ** 2),
				Math.sqrt(data[columns.index('level_x')] ** 2 + data[columns.index('level_y')] ** 2 + data[columns.index('level_z')] ** 2)
			] }
			columns += %w{level_xy level_xz level_yz level_xyz}
			
		########
		# Jerk #
		########
		when phase_counter += 1
			diff(last_file, file, columns.indices(*%w{level_x level_y level_z level_xy level_xz level_yz level_xyz}))
			columns += %w{jerk_x jerk_y jerk_z jerk_xy jerk_xz jerk_yz jerk_xyz}
		
		####################
		# Local high peaks #
		####################
		when phase_counter += 1
			runmax(last_file, file, columns.indices(*%w{level_x level_y level_z level_xy level_xz level_yz level_xyz}), 31)
			columns += %w{high_x high_y high_z high_xy high_xz high_yz high_xyz}
		
		###################
		# Local low peaks #
		###################
		when phase_counter += 1
			runmin(last_file, file, columns.indices(*%w{level_x level_y level_z level_xy level_xz level_yz level_xyz}), 31)
			columns += %w{low_x low_y low_z low_xy low_xz low_yz low_xyz}
		
		#################################
		# A rough estimate of amplitude #
		# (difference of local peaks    #
		# divided by 2)                 #
		#################################
		when phase_counter += 1
			filter(last_file, file) { |data|
				%w{x y z xy xz yz xyz}.map { |a|
					(data[columns.index("high_#{a}")] - data[columns.index("low_#{a}")]) / 2
				}
			}
			columns += %w{amp_x amp_y amp_z amp_xy amp_xz amp_yz amp_xyz}
		
		##########################
		# Deviation of amplitude #
		##########################
		when phase_counter += 1 # squared errors
			s = stats(last_file, columns.indices(*%w{amp_x amp_y amp_z amp_xy amp_xz amp_yz amp_xyz}))
			filter(last_file, file) { |data|
				%w{x y z xy xz yz xyz}.each_with_index.map { |a, i|
					(data[columns.index("amp_#{a}")] - s['avg'][i]) ** 2
				}
			}
			columns += %w{se_amp_x se_amp_y se_amp_z se_amp_xy se_amp_xz se_amp_yz se_amp_xyz}
		
		when phase_counter += 1 # mean squared errors (~ variances)
			runavg(last_file, file, columns.indices(*%w{se_amp_x se_amp_y se_amp_z se_amp_xy se_amp_xz se_amp_yz se_amp_xyz}), 31)
			columns += %w{mse_amp_x mse_amp_y mse_amp_z mse_amp_xy mse_amp_xz mse_amp_yz mse_amp_xyz}
		
		when phase_counter += 1 # deviations
			filter(last_file, file) { |data|
				%w{x y z xy xz yz xyz}.map { |a|
					Math.sqrt(data[columns.index("mse_amp_#{a}")])
				}
			}
			columns += %w{sd_amp_x sd_amp_y sd_amp_z sd_amp_xy sd_amp_xz sd_amp_yz sd_amp_xyz}
		
		################
		# Wave lengths #
		################
		when phase_counter += 1
			matching_segment_length(last_file, file) { |data|
				data[columns.index("level_x")] >= 0
			}
			columns << 'len_x'
		
		when phase_counter += 1
			matching_segment_length(last_file, file) { |data|
				data[columns.index("level_y")] >= 0
			}
			columns << 'len_y'
		
		when phase_counter += 1
			matching_segment_length(last_file, file) { |data|
				data[columns.index("level_z")] >= 0
			}
			columns << 'len_z'
		
		when phase_counter += 1
			matching_segment_length(last_file, file) { |data|
				data[columns.index("level_xy")] >= 0
			}
			columns << 'len_xy'
		
		when phase_counter += 1
			matching_segment_length(last_file, file) { |data|
				data[columns.index("level_xz")] >= 0
			}
			columns << 'len_xz'
		
		when phase_counter += 1
			matching_segment_length(last_file, file) { |data|
				data[columns.index("level_yz")] >= 0
			}
			columns << 'len_yz'
		
		when phase_counter += 1
			matching_segment_length(last_file, file) { |data|
				data[columns.index("level_xyz")] >= 0
			}
			columns << 'len_xyz'
		
		when phase_counter += 1 # positive and negative wave lengths separated (@todo: these are _useless_ for combinations of x,y,z)
			filter(last_file, file) { |data|
				#(%w{x y z xy xz yz xyz}.map { |a|
				(%w{x y z}.map { |a|
					d = data[columns.index("len_#{a}")]
					((d >= 0)? [d, 0] : [0, -d])
				}).flatten
			}
			#columns += (%w{x y z xy xz yz xyz}.map { |a| ["len_pos_#{a}", "len_neg_#{a}"] }).flatten
			columns += (%w{x y z}.map { |a| ["len_pos_#{a}", "len_neg_#{a}"] }).flatten
		
		when phase_counter += 1 # height/length ratio of positive peaks
			filter(last_file, file) { |data|
				#%w{x y z xy xz yz xyz}.map { |a|
				%w{x y z}.map { |a|
					data[columns.index("len_pos_#{a}")] / (data[columns.index("amp_#{a}")] + 0.001) # a small fix to avoid division by zero
				}
			}
			#columns += %w{x y z xy xz yz xyz}.map { |a| "ratio_len_amp_pos_#{a}" }
			columns += %w{x y z}.map { |a| "ratio_len_amp_pos_#{a}" }

		when phase_counter += 1 # height/length ratio of negative peaks
			filter(last_file, file) { |data|
				#%w{x y z xy xz yz xyz}.map { |a|
				%w{x y z}.map { |a|
					data[columns.index("len_neg_#{a}")] / (data[columns.index("amp_#{a}")] + 0.001) # a small fix to avoid division by zero
				}
			}
			#columns += %w{x y z xy xz yz xyz}.map { |a| "ratio_len_amp_neg_#{a}" }
			columns += %w{x y z}.map { |a| "ratio_len_amp_neg_#{a}" }
		
		when phase_counter += 1 # coarsely estimated local area (negative for negative peaks)
			filter(last_file, file) { |data|
				%w{x y z xy xz yz xyz}.map { |a|
					data[columns.index("len_#{a}")] * data[columns.index("amp_#{a}")] / 2
				}
			}
			columns += %w{x y z xy xz yz xyz}.map { |a| "area_#{a}" }
			
		##############################
		# Deviation of accelerations #
		##############################
		when phase_counter += 1 # squared errors
			s = stats(last_file, columns.indices(*%w{level_x level_y level_z level_xy level_xz level_yz level_xyz}))
			filter(last_file, file) { |data|
				%w{x y z xy xz yz xyz}.each_with_index.map { |a, i|
					(data[columns.index("level_#{a}")] - s['avg'][i]) ** 2
				}
			}
			columns += %w{se_level_x se_level_y se_level_z se_level_xy se_level_xz se_level_yz se_level_xyz}
		
		when phase_counter += 1 # mean squared errors (~ variances)
			runavg(last_file, file, columns.indices(*%w{se_level_x se_level_y se_level_z se_level_xy se_level_xz se_level_yz se_level_xyz}), 31)
			columns += %w{mse_level_x mse_level_y mse_level_z mse_level_xy mse_level_xz mse_level_yz mse_level_xyz}
		
		when phase_counter += 1 # deviations
			filter(last_file, file) { |data|
				%w{x y z xy xz yz xyz}.map { |a|
					Math.sqrt(data[columns.index("mse_level_#{a}")])
				}
			}
			columns += %w{sd_level_x sd_level_y sd_level_z sd_level_xy sd_level_xz sd_level_yz sd_level_xyz}
			
		############################
		# Local high peaks of jerk #
		############################
		when phase_counter += 1
			runmax(last_file, file, columns.indices(*%w{jerk_x jerk_y jerk_z jerk_xy jerk_xz jerk_yz jerk_xyz}), 31)
			columns += %w{high_jerk_x high_jerk_y high_jerk_z high_jerk_xy high_jerk_xz high_jerk_yz high_jerk_xyz}
		
		###################
		# Local low peaks #
		###################
		when phase_counter += 1
			runmin(last_file, file, columns.indices(*%w{jerk_x jerk_y jerk_z jerk_xy jerk_xz jerk_yz jerk_xyz}), 31)
			columns += %w{low_jerk_x low_jerk_y low_jerk_z low_jerk_xy low_jerk_xz low_jerk_yz low_jerk_xyz}
		
		##########################################
		# A rough estimate of amplitude of jerks #
		##########################################
		when phase_counter += 1
			filter(last_file, file) { |data|
				%w{x y z xy xz yz xyz}.map { |a|
					(data[columns.index("high_jerk_#{a}")] - data[columns.index("low_jerk_#{a}")]) / 2
				}
			}
			columns += %w{amp_jerk_x amp_jerk_y amp_jerk_z amp_jerk_xy amp_jerk_xz amp_jerk_yz amp_jerk_xyz}
		
		##################################
		# Deviation of amplitude of jerk #
		##################################
		when phase_counter += 1 # squared errors
			s = stats(last_file, columns.indices(*%w{amp_jerk_x amp_jerk_y amp_jerk_z amp_jerk_xy amp_jerk_xz amp_jerk_yz amp_jerk_xyz}))
			filter(last_file, file) { |data|
				%w{x y z xy xz yz xyz}.each_with_index.map { |a, i|
					(data[columns.index("amp_jerk_#{a}")] - s['avg'][i]) ** 2
				}
			}
			columns += %w{se_amp_jerk_x se_amp_jerk_y se_amp_jerk_z se_amp_jerk_xy se_amp_jerk_xz se_amp_jerk_yz se_amp_jerk_xyz}
		
		when phase_counter += 1 # mean squared errors (~ variances)
			runavg(last_file, file, columns.indices(*%w{se_amp_jerk_x se_amp_jerk_y se_amp_jerk_z se_amp_jerk_xy se_amp_jerk_xz se_amp_jerk_yz se_amp_jerk_xyz}), 31)
			columns += %w{mse_amp_jerk_x mse_amp_jerk_y mse_amp_jerk_z mse_amp_jerk_xy mse_amp_jerk_xz mse_amp_jerk_yz mse_amp_jerk_xyz}
		
		when phase_counter += 1 # deviations
			filter(last_file, file) { |data|
				%w{x y z xy xz yz xyz}.map { |a|
					Math.sqrt(data[columns.index("mse_amp_jerk_#{a}")])
				}
			}
			columns += %w{sd_amp_jerk_x sd_amp_jerk_y sd_amp_jerk_z sd_amp_jerk_xy sd_amp_jerk_xz sd_amp_jerk_yz sd_amp_jerk_xyz}

		#####################
		# Jerk wave lengths #
		#####################
		when phase_counter += 1
			matching_segment_length(last_file, file) { |data|
				data[columns.index("jerk_x")] >= 0
			}
			columns << 'len_jerk_x'
		
		when phase_counter += 1
			matching_segment_length(last_file, file) { |data|
				data[columns.index("jerk_y")] >= 0
			}
			columns << 'len_jerk_y'
		
		when phase_counter += 1
			matching_segment_length(last_file, file) { |data|
				data[columns.index("jerk_z")] >= 0
			}
			columns << 'len_jerk_z'
		
		when phase_counter += 1
			matching_segment_length(last_file, file) { |data|
				data[columns.index("jerk_xy")] >= 0
			}
			columns << 'len_jerk_xy'
		
		when phase_counter += 1
			matching_segment_length(last_file, file) { |data|
				data[columns.index("jerk_xz")] >= 0
			}
			columns << 'len_jerk_xz'
		
		when phase_counter += 1
			matching_segment_length(last_file, file) { |data|
				data[columns.index("jerk_yz")] >= 0
			}
			columns << 'len_jerk_yz'
		
		when phase_counter += 1
			matching_segment_length(last_file, file) { |data|
				data[columns.index("jerk_xyz")] >= 0
			}
			columns << 'len_jerk_xyz'
		
		when phase_counter += 1 # positive and negative wave lengths separated
			filter(last_file, file) { |data|
				(%w{x y z xy xz yz xyz}.map { |a|
					d = data[columns.index("len_jerk_#{a}")]
					((d >= 0)? [d, 0] : [0, -d])
				}).flatten
			}
			columns += (%w{x y z xy xz yz xyz}.map { |a| ["len_jerk_pos_#{a}", "len_jerk_neg_#{a}"] }).flatten
		
		when phase_counter += 1 # height/length ratio of positive peaks
			filter(last_file, file) { |data|
				%w{x y z xy xz yz xyz}.map { |a|
					data[columns.index("len_jerk_pos_#{a}")] / (data[columns.index("amp_jerk_#{a}")] + 0.001) # a small fix to avoid division by zero
				}
			}
			columns += %w{x y z xy xz yz xyz}.map { |a| "ratio_len_amp_pos_jerk_#{a}" }

		when phase_counter += 1 # height/length ratio of negative peaks
			filter(last_file, file) { |data|
				%w{x y z xy xz yz xyz}.map { |a|
					data[columns.index("len_jerk_neg_#{a}")] / (data[columns.index("amp_jerk_#{a}")] + 0.001) # a small fix to avoid division by zero
				}
			}
			columns += %w{x y z xy xz yz xyz}.map { |a| "ratio_len_amp_neg_jerk_#{a}" }
		
		when phase_counter += 1 # coarsely estimated local area (negative for negative peaks)
			filter(last_file, file) { |data|
				%w{x y z xy xz yz xyz}.map { |a|
					data[columns.index("len_jerk_#{a}")] * data[columns.index("amp_jerk_#{a}")] / 2
				}
			}
			columns += %w{x y z xy xz yz xyz}.map { |a| "area_jerk_#{a}" }

		####################################
		# Last but definitely not least:   #
		# a constant for linear regression #
		# models :)                        #
		####################################
		when phase_counter += 1
			replicate(last_file, file, 1)
			columns << "constant"
			
		else
			h = File.open(file = out_file, 'w')
			h.write "#{columns.join(',')}\n"
			IO.foreach(last_file) { |row| h.write row }
			leaving = true
	end

# a6=jerk-aallon keskimääräinen amplitudi
# a16=yz-jerk-aallon amplitudin keskihajonta
# a28=xy-jerk-aallon positiivisten piikkien keskimääräinen korkeus-leveys-suhde
# a49=y-kiihtyvyyden keskihajonta
# a52=xz-jerk-signaalin keskimääräinen magnitudi

	puts "File #{file} was generated in #{Time.now.to_i - time.to_i} seconds."
	intermediate_files << file
	phase_counter += 1
end while not leaving

puts "Column names (note: starting from zero)"
columns.each_with_index { |name, index|
	puts "#{index}\t#{name}"
}

# Tests:
#runmed '1..10.csv', 'new2.csv', [0], 3
#lpf('implant_axis.csv', 'temp.csv', [1,2,3], 1.0, 500)
#filter('temp.csv', 'temp2.csv') { |data| [*1..3].map { |i| data[3 + i] - data[i] } }
#event_combiner 'implant_axis.csv', 'events.csv', 'test_out.csv'
#puts stats('implant_axis.csv', [2], true).inspect
#puts generate_temporary_filename