#!/usr/bin/env python3

import argparse
import re
import statistics as st
import math

version='1.1.0'

parser = argparse.ArgumentParser(description="%s v%s: Returns overall statistics and numbers from a log file." % ('%(prog)s', version), add_help=False)

# Add the arguments to the parser
requirArgs = parser.add_argument_group('Required arguments')
finputArgs = parser.add_argument_group('Optional arguments related to the input')
outputArgs = parser.add_argument_group('Optional arguments related to the output')
optionArgs = parser.add_argument_group('Other optional arguments')

requirArgs.add_argument("-f", "--file", dest="fileIn", required=True,
						help="A tab delimited table.")

finputArgs.add_argument("-c", "--columns", dest="columns", required=False, action="store", nargs="+",
						default = None,
						help="The columns to calculate the stats to. By default will do all columns except the identifier ('identifier') and the sequences ('sequence' and/or 'revCom').")

outputArgs.add_argument("-p", "--plot", dest="plot", required=False, action="store_true",
						help="If selected, will prompt a histogram with the scores for each column independently.")

outputArgs.add_argument("-n", "--headers", dest="headers", required=False, action="store_true",
						help="If selected, will assume the log file has no header names.")

optionArgs.add_argument("-v", "--verbose", dest="verbose", required=False, action="store_true",
						help="If selected, will print information to the console.")

optionArgs.add_argument("-h", "--help", action="help",
						help="Show this help message and exit.")

optionArgs.add_argument("-V", "--version", action='version',
						version='oligoN-design %s v%s' % ('%(prog)s', version),
						help='Show the version number and exit.')

args = parser.parse_args()

# Define functions _________________________________________________________________________________

def testNumOrStr(string):
	try:
		return int(string)
	except ValueError:
		try:
			return float(string)
		except ValueError:
			try:
				return str(string)
			except ValueError:
				return None

def getIDs(idList, headers):
	out = list()
	for i in idList:
		tmp = testNumOrStr(i)
		if type(tmp) is int:
			tmp = int(i)-1
		elif type(tmp) is str:
			j = 0
			for h in headers:
				if h == i:
					tmp = j
				j += 1
		out.append(tmp)
	return out

def percentile(listvals, qs):
	order = sorted(listvals)
	n = len(order)
	out = list()
	for q in qs:
		index = math.ceil(q * n)
		if index > n-1:
			index = n-1
		tmp = round(order[index], 3)
		out.append(str(tmp))
	return out

if args.plot:
	import matplotlib.pyplot as plt

# Debugging input commands _________________________________________________________________________
if args.headers:
	for i in args.columns:
		if type(testNumOrStr(i)) is str:
			print("\033[91mError!\033[0m A string cannot be provided through '-c/--columns', if '-n/--headers' is selected.")
			import sys
			sys.exit(1)

# Reading the table ________________________________________________________________________________
if args.verbose:
	print("  Reading table:", args.fileIn)
with open(args.fileIn) as filein:
	headers = filein.readline()
headers = headers.strip("\t").split()

if args.headers:
	if args.columns is None:
		ids = list(range(1, len(headers)))
	else:
		ids = list()
		for i in args.columns:
			ids.append(testNumOrStr(i)) 
else:
	if args.columns is None:
		columns = list()
		for i in headers:
			if i != 'identifier' and i != 'sequence' and i != 'revCom':
				columns.append(i)
		ids = getIDs(columns, headers)
	else:
		ids = getIDs(args.columns, headers)

data = {}
r = 0
for line in open(args.fileIn):
	r += 1
	liner = line.strip("\t").split()
	if args.headers:
		if r == 1:
			for i in ids:
				data[i] = list()
		for i in ids:
			tmp = testNumOrStr(liner[i-1])
			data[i].append(tmp)
	else:
		if r == 1:
			for i in ids:
				data[headers[i]] = list()
		if r > 1:
			for i in ids:
				tmp = testNumOrStr(liner[i])
				data[headers[i]].append(tmp)

if args.verbose:
	print("  Calculating stats...\n")
print("")
for key, values in data.items():
	if type(values[0]) == int or type(values[0]) == float:
		if args.headers:
			print("  Numerical summary for column", key)
		else:
			print("  Numerical summary for:", key)
		minv = str(round(min(values), 3))
		pcts = percentile(values, [0.05, 0.25, 0.5, 0.75, 0.95])
		maxv = str(round(max(values), 3))
		mean = st.mean(values)
		if mean < 1:
			mean = str(round(mean, 2))
		elif mean < 10:
			mean = str(round(mean, 2))
		else:
			mean = str(round(mean, 1))
		sd = st.stdev(values)
		if sd < 1:
			sd = str(round(sd, 3))
		elif sd < 10:
			sd = str(round(sd, 2))
		else:
			sd = str(round(sd, 1))
		print("\tMin\t5pct\t25pct\tmedian\t75pct\t95pct\tmax\t\tmean\tsd")
		print("", minv, pcts[0], pcts[1], pcts[2], pcts[3], pcts[4], maxv, "", mean, sd, sep="\t")
		print("")
	elif type(values[0]) == str:
		print("  Categorical summary for:", key)
		print("\tCategory\tCount")
		tmp = set()
		for i in values:
			tmp.add(i)
		tmp = sorted(tmp)
		for i in tmp:
			print("", i, str(values.count(i)), sep="\t")
		print("")
	if args.plot:
		plt.hist(values, label=key, alpha=0.5)
		plt.legend()
		plt.show()

if args.verbose:
	print("  Done")
