#!/usr/bin/env python3

import argparse
from Bio.Seq import Seq
import sys
import time
import itertools
import statistics as st

start_time = time.time()
version='1.1.0'

parser = argparse.ArgumentParser(description="%s v%s: Find all possible specific oligonucleotides of length 'l', that matches at least 'm' sequences in the target file and has a maximum specificity of 's' to the excluding file." % ('%(prog)s', version), add_help=False)

# Add the arguments to the parser
requirArgs = parser.add_argument_group('Required arguments')
functiArgs = parser.add_argument_group('Optional arguments related to the search')
outputArgs = parser.add_argument_group('Optional arguments related to the output')
optionArgs = parser.add_argument_group('Other optional arguments')

requirArgs.add_argument("-t", "--target", dest="target", required=True,
						help="A fasta file containing the sequences (5'-3') of the target group.")

requirArgs.add_argument("-e", "--excluding", dest="excluding", required=True,
						help="A fasta file containing the sequence (5'-3') to be excluded.")

functiArgs.add_argument("-l", "--length", dest="length", nargs="+", required=False, action='store', type=str,
						default=['18', '20'],
						help="The length(s) of the oligonucleotides to be searched. Default = %(default)s. A range can be specified with the '-' sign. A decreasing range of lengths will avoid smaller oligonucleotides found within larger oligonucleotides.")

functiArgs.add_argument("-m", "--minimum", dest="minimum", required=False, action='store',
						default="0.8",
						help="The minimum percentage of sequences (or number of sequences) that the oligonucleotide has to appear in the target file. Default = %(default)s. If the value provided is a 'float' [0.0-1.0], it will be interpreted as a percentage. If the value provided is a 'int(eger)' [1-Inf), it will be interpreted as an absolute number.")

functiArgs.add_argument("-s", "--specificity", dest="specificity", required=False, action='store',
						default="0.01",
						help="The maximum percentage of sequences (or number of sequences) that can contain the oligonucleotide in the excluding file. Default = %(default)s. If the value provided is a 'float' [0.0-1.0], it will be interpreted as a percentage. If the value provided is a 'int(eger)' [1-Inf), it will be interpreted as an absolute number of sequences.")

functiArgs.add_argument("-n", "--mismatches", dest="mismatches", nargs="+", required=False, action='store', type=str,
						default=['1', '2'],
						help="The number of mismatches against the excluding file. Default = %(default)s. Please note that at this step it will not look for insertions or deletions. If set to '0', it will not look for mismatches.")

functiArgs.add_argument("-b", "--flank", dest="flank", required=False, action='store', type=str,
						default="0",
						help="The number of leading and trailing (flanks) bases to flag when the given number of mismatches are found within. Default = %(default)s, it will not look for leading nor trailing mismatches. If (e.g.,) 3 is selected along with 2 mismatches, it will count how many hits are occurring when 2 mismatches are present within 3 flanking bases. If the value provided is a 'float' [0.0-1.0], it will be interpreted as a percentage of the given oligo length.")

functiArgs.add_argument("-c", "--center", dest="center", required=False, action='store', type=str,
						default="0",
						help="Similar to '-b/--flank' but centered. Default = %(default)s, it will not look for centered mismatches. If (e.g.,) 10 is selected along with 2 mismatches, it will count how many hits are occurring when 2 mismatches are present in the 10 central bases. If the value provided is a 'float' [0.0-1.0], it will be interpreted as a percentage of the given oligo length.")

functiArgs.add_argument("-C", "--GCcontent", dest="GCcontent", required=False, action='store', nargs="+", type=float,
						default=None,
						help="A minimum and maximum GC content percentage allowed (0-1). If only one provided, it will be interpreted as a minimum.")

outputArgs.add_argument("-o", "--output", dest="output", required=False,
						default=None,
						help="The output file name. By default will replace the extension of the target file with '_oligos.tsv'. The output file is a tab delimited table with the following default columns: identifier, sequence, revCom, length, GC, Tm, F35t, hitsT_prop, hitsT, hitsE_prop, hitsE, mismatch1_prop, mismatch1, mismatch2_prop, mismatch2. Please see documentation for further details.")

outputArgs.add_argument("-f", "--fasta", dest="fasta", required=False, action="store",
						default=None,
						help="If selected, will export a fasta file with the selected oligonucleotides to the given output.")

outputArgs.add_argument("-p", "--probes", dest="probes", required=False, action="store",
						default=None,
						help="If selected, will export a fasta file with the reverse complement of the selected oligonucleotides to the given output.")

outputArgs.add_argument("-S", "--stats", dest="stats", required=False, action="store",
						default=None,
						help="If selected, will export a tab delimited table to the given output and lengths of how many oligonucleotides would have passed different minimum and specificity criteria, to help tuning parameters in following runs.")

optionArgs.add_argument("-L", "--lowMem", dest="lowMem", required=False, action="store_true",
						help="If selected, will reduce the RAM usage at the expense of speed (depending on the datasets, from 1.5 up to 5 times slower).")

optionArgs.add_argument("--force", dest="force", required=False, action="store_true",
						help="Only used for debugging unnecessarily large number of mismatches and preventing undesired slow down of the search and large RAM usage.")

optionArgs.add_argument("-u", "--uracil", dest="uracil", required=False, action="store_false",
						help="If selected, will not replace Uracil bases (U) by Thymine bases (T).")

optionArgs.add_argument("-v", "--verbose", dest="verbose", required=False, action="store_false",
						help="If selected, will not print information to the console.")

optionArgs.add_argument("-h", "--help", action="help",
						help="Show this help message and exit.")

optionArgs.add_argument("-V", "--version", action='version',
						version='oligoN-design %s v%s' % ('%(prog)s', version),
						help='Show the version number and exit.')

args = parser.parse_args()

# Setting variables and functions __________________________________________________________________
if args.verbose:
	print("  Setting variables...", flush=True)

def getNumersFromArgs(stringList):
	out = list()
	for m in stringList:
		if '-' in m:
			first = int(m.split('-')[0])
			last = int(m.split('-')[1])
			if first < last:
				for m2 in range(first, last+1):
					out.append(m2)
			elif first > last:
				for m2 in range(first, last-1, -1):
					out.append(m2)
		elif " " in m:
			tocut = m.strip().split(" ")
			for m2 in tocut:
				out.append(int(m2))
		else:
			out.append(int(m))
	return  out

def string2numeric(string):
	try:
		return int(string)
	except ValueError:
		try:
			return float(string)
		except ValueError:
			return None

def readFasta(fastafile):
	out = {}
	gaps = False
	repeated = 0
	uracil = False
	for line in open(fastafile):
		if line.startswith(">"):
			name = line.replace(">", "")
			name = name.replace("\n", "")
			if name in out.keys():
				repeated += 1
			out[name] = str()
		else:
			if "-" in line:
				gaps = True
			sequence = line.replace("\n", "")
			sequence = sequence.replace("-", "")
			sequence = sequence.upper()
			if "U" in sequence:
				uracil = True
				if args.uracil:
					sequence = sequence.replace("U", "T")
			out[name] = (out[name] + sequence)
	if gaps:
		print("    Warning! Target file contains gaps, they will be removed before the search", flush=True)
	if repeated > 0:
		print("    \033[91mWarning!\033[0m There are", str(repeated), "repeated sequence names ignored", flush=True)
	if uracil and args.uracil:
		print("    Warning! RNA sequences were transformed into DNA sequences. Use '-u/--uracil' to desactivate", flush=True)
	if uracil and not args.uracil:
		print("    \033[91mWarning!\033[0m RNA sequences have been provided and intentionally not replaced, this might cause problems downstream", flush=True)
	return out

def kmersBreak(dictionary, k, frecuency=True, verbose=True, addinfo=""):
	# Set variables for printing process
	if verbose:
		print("    Extracting ", k, "-mers", addinfo, end="", sep="", flush=True)
		i = 0
		pcti = 0
	# Get the length of the input file
	lenDict = len(dictionary)
	# Set whether to keep frecuency of k mers or not
	if frecuency:
		kmers = {}
	else:
		kmers = set()
	# Loop through all the sequences in the input file
	for tseq in dictionary.values():
		if verbose:
			i += 1
			pct = round(i/lenDict*100)
			if pct > pcti:
				pcti = pct
				print("\r    Extracting ", k, "-mers ", addinfo, ": ", pct, "%", end="", sep="", flush=True)
		lentseq = len(tseq)
		# Split the given sequence into kmers of length 'k'
		for p in range(0, lentseq-k+1):
			kmer = tseq[int(p):(int(p+k))]
			if frecuency:
				if kmer not in kmers.keys():
					kmers[kmer] = 1
				else:
					kmers[kmer] += 1
			else:
				kmers.add(kmer)
	if verbose:
		print("\r    Extracting ", k, "-mers ", addinfo, ": ", len(kmers), " ", k, "-mers found", sep="", flush=True)
	return kmers

def flatten(nestedList):
	flatList = []
	for item in nestedList:
		if isinstance(item, list):
			flatList.extend(flatten(item))
		else:
			flatList.append(item)
	return flatList

def replaceBase(base):
	bases = ["A", "C", "G", "T"]
	if base in bases:
		bases.remove(base)
	return bases

def replacePosition(oligo, position):
	i = position - 1
	moligo = list(oligo)
	out = list()
	replaced = replaceBase(moligo[i])
	for r in replaced:
		moligo[i] = r
		tmp = "".join(moligo)
		if tmp not in out:
			out.append(tmp)
	return out

def replacePositionRecursive(oligo, positions, index=0):
	if index == len(positions):
		return oligo
	out = list()
	p = positions[index]
	replaced = replacePosition(oligo, p)
	for r in replaced:
		outr = replacePositionRecursive(r, positions, index=index+1)
		out.append(outr)
	return flatten(out)

def getAllMismatches(oligo, mismatches):
	out = {}
	length = len(oligo)
	positions = list(range(1, length+1))
	if mismatches == 1:
		for p in positions:
			tmp = replacePosition(oligo, p)
			for t in tmp:
				if t not in out.keys():
					out[t] = [p]
	else:
		positions = list(itertools.combinations(positions,mismatches))
		for pos in positions:
			tmp = replacePositionRecursive(oligo, pos)
			for t in tmp:
				if t not in out.keys():
					out[t] = list()
					for p in pos:
						out[t].append(p)
	return out

def GCcheck(oligo, minimum, maximum=None):
	out = None
	gcs = oligo.upper().count("G") + oligo.upper().count("C")
	length = len(oligo)
	GC = round((gcs) / length, 4)
	if GC >= minimum:
		out = GC
	if maximum is not None:
		if GC > maximum:
			out = None
	return out

def flankCheck(oligo, positions, bases):
	length = len(oligo)
	if type(bases) == float:
		bases = int(length*bases)
	test = False
	if all(p <= bases for p in positions) or all(p >= length-bases+1 for p in positions):
		test = True
	return test

def centeredCheck(oligo, positions, bases):
	length = len(oligo)
	if type(bases) == float:
		bases = int(length*bases)
	flanks = (length-bases)/2
	test = False
	if all(p > flanks for p in positions) and all(p < length-flanks+1 for p in positions):
		test = True
	return test

# Setting the range of lengths _____________________________________________________________________
lengths = getNumersFromArgs(args.length)

if args.verbose:
	print("    Lengths:    ", *lengths, flush=True)

# Setting the specificities ________________________________________________________________________
minimum = string2numeric(args.minimum)
if type(minimum) is float:
	minimumType = "percentage"
	if args.verbose:
		print("    Minimum:     ", minimum*100, "%", sep="", flush=True)
elif type(minimum) is int:
	minimumType = "absolute"
	if args.verbose:
		print("    Minimum:     ", minimum, " sequences", sep="", flush=True)
else:
	print("Error: argument -m/--minimum needs a numerical value: either [0.0-1.0] or [1-Inf] (i.e., '0.9')")
	sys.exit(1)

specificity = string2numeric(args.specificity)
if type(specificity) is float:
	specificityType = "percentage"
	if args.verbose:
		print("    Specificity: ", specificity*100, "%", sep="", flush=True)
elif type(specificity) is int:
	specificityType = "absolute"
	if args.verbose:
		print("    Specificity: ", specificity, " sequences", sep="", flush=True)
else:
	print("Error: argument -s/--specificity needs a numerical value: either [0.0-1.0] or [1-Inf] (i.e., '0.001')")
	sys.exit(1)

# Setting search paramenters _______________________________________________________________________
mismatches = getNumersFromArgs(args.mismatches)
if len(mismatches) == 1:
	if mismatches[0] == 0:
		mismatches = 0
if args.verbose and mismatches != 0:
	print("    Mismatches: ", *mismatches, flush=True)
if mismatches != 0:
	for i in mismatches:
		if i > 4:
			if args.force:
				print("  \033[91mWarning!\033[0m Setting '", i, "' mismatches at this step will result in a waste of resources...", sep="")
			else:
				print("  \033[91mWarning!\033[0m Setting a number of mismatches unnecessarily large (>4) will considerably slow down the search and increase RAM usage.\n  If you are very sure what you are doing, repeat the search adding '--force'.")
				sys.exit(1)

flank = string2numeric(args.flank)
if args.verbose and mismatches != 0 and flank != 0:
	if type(flank) == int:
		print("      Leading and trailing bases with mismatches: ", flank, flush=True)
	if type(flank) == float:
		if flank > 1:
			print("\033[91mError!\033[0m Please select a flanking length either with absolute numbers or from 0.0 to 1.0")
			sys.exit(1)
		print("      Leading and trailing length with mismatches: ", flank*100, "%", sep="", flush=True)

center = string2numeric(args.center)
if args.verbose and mismatches != 0 and center != 0:
	if type(center) == int:
		print("      Central bases with mismatches: ", center, flush=True)
	if type(center) == float:
		if center > 1:
			print("\033[91mError!\033[0m Please select a centered length either with absolute numbers or from 0.0 to 1.0")
			sys.exit(1)
		print("      Central length with mismatches: ", center*100, "%", sep="", flush=True)

if args.GCcontent is not None:
	if len(args.GCcontent) > 2:
		print("  Warning! More than 2 values have been parsed for GC content. Only the first two will be considered.")
	if len(args.GCcontent) == 1:
		GCmin = args.GCcontent[0]
		print("    Accepted GC content ≥ ", GCmin*100, "%", sep="")
	else:
		GCmin = args.GCcontent[0]
		GCmax = args.GCcontent[1]
		print("    Accepted GC content ≥ ", GCmin*100, "% and ≤ ", GCmax, "%", sep="")

# Reading target file ______________________________________________________________________________
if args.verbose:
	print("  Reading target file:", str(args.target), flush=True)

target = readFasta(args.target)
lenTarget = len(target)

# Check lengths of sequences in target file
seqLengths = list()
for l in target.values():
	seqLengths.append(len(l))

if lenTarget > 1:
	mean = st.mean(seqLengths)
	sd = st.stdev(seqLengths)
	countMax = 0
	countMin = 0
	for l in seqLengths:
		if l > mean+sd:
			countMax += 1
		if l < mean-sd:
			countMin += 1
	if countMax != 0:
		print("\033[91m    Warning!\033[0m Some sequences are very long. Specific regions might be ignored in the search")
	if countMin != 0:
		print("\033[91m    Warning!\033[0m Some sequences are very short. This will artifically bias the 'minimum' criteria")
else:
	print("\033[91m    Warning!\033[0m With only 1 sequence, specificity values might be overestimated")

if args.verbose:
	print("    Sequences in target file:", lenTarget, flush=True)
	if minimumType == "percentage":
		print("    Oligonucleotides will be present in at least", round(lenTarget*minimum), "sequences in target file", flush=True)
	if minimumType == "absolute":
		print("    Oligonucleotides will be present in at least ", round(minimum/lenTarget*100, 2), "% of the sequences in target file", sep="", flush=True)


# Reading excluding file ___________________________________________________________________________
if args.verbose:
	print("  Reading excluding file:", str(args.excluding), flush=True)

exc = readFasta(args.excluding)
lenExc = len(exc)

if args.verbose:
	print("    Sequences in excluding file:", lenExc, flush=True)
	if specificityType == "percentage":
		print("    Oligonucleotides will be present in at most", round(lenExc*specificity), "sequences in excluding file", flush=True)
	if specificityType == "absolute":
		print("    Oligonucleotides will be present in at most ", round(specificity/lenExc*100, 2), "% of the sequences in excluding file", sep="", flush=True)

if args.verbose and args.lowMem:
	print("  Low RAM memory activated, this might take a while longer")

# Setting output file name _________________________________________________________________________
if args.output is None:
	import re
	output = re.sub("\\.[^\\.]+$", "_oligos.tsv", args.target)
else:
	output = args.output

# Start the search _________________________________________________________________________________
oligos = set()
if args.fasta is not None or args.probes is not None:
	oligosSeq = {}
if args.stats is not None:
	stats = {}
	stats["length"] = ["target_100", "target_95", "target_90", "target_85", "target_80", "target_75", "target_70", "target_65", "target_60", "target_55", "target_50", "target_0", "excluding_0", "excluding_0.0001", "excluding_0.001", "excluding_0.01", "excluding_0.1", "excluding_1", "excluding_2", "excluding_3", "excluding_4", "excluding_5", "excluding_10"]
	for l in lengths:
		stats[str(l)] = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
with open(output, "w", buffering=2) as logfile:
	headers="identifier\tsequence\trevCom\tlength\tGC\tTm\tF35t\thitsT_prop\thitsT\thitsE_prop\thitsE"
	if mismatches != 0:
		for m in mismatches:
			headers = headers + "\tmismatch" + str(m) + "_prop\tmismatch" + str(m)
			if flank != 0:
				headers = headers + "\tmismatch" + str(m) + "_flanks" + str(flank)
			if center != 0:
				headers = headers + "\tmismatch" + str(m) + "_central" + str(center)
	print(headers, file=logfile)
	# Loop through the different lengths -----------------------------------------------------------
	for length in lengths:
		if args.verbose:
			print("\r  Searching oligos of", length, "base pairs...")
			i = 0
			pcti = 0
		# Extract kmers from target file -----------------------------------------------------------
		uniquesTarget = kmersBreak(target, length, frecuency=True, verbose=args.verbose, addinfo="from target file")
		# Check minimum criteria -------------------------------------------------------------------
		tocheck = {}
		for poligo, countTarget in uniquesTarget.items():  # Loop through all unique oligonucleotides
			if args.verbose:
				i += 1
				pct = round(i/len(uniquesTarget)*100)
				if pct > pcti:
					pcti = pct
					print("\r    Checking minimum presence\t", pct, "%", sep="", end="", flush=True)
			if minimumType == "percentage":
				R = countTarget/lenTarget
			elif minimumType == "absolute":
				R = countTarget
			if R >= minimum:
				tocheck[poligo] = countTarget
			if args.stats is not None:
				tmp = countTarget/lenTarget
				if tmp == 1:
					stats[str(length)][0] += 1
				if tmp >= 0.95:
					stats[str(length)][1] += 1
				if tmp >= 0.90:
					stats[str(length)][2] += 1
				if tmp >= 0.85:
					stats[str(length)][3] += 1
				if tmp >= 0.80:
					stats[str(length)][4] += 1
				if tmp >= 0.75:
					stats[str(length)][5] += 1
				if tmp >= 0.70:
					stats[str(length)][6] += 1
				if tmp >= 0.65:
					stats[str(length)][7] += 1
				if tmp >= 0.60:
					stats[str(length)][8] += 1
				if tmp >= 0.55:
					stats[str(length)][9] += 1
				if tmp >= 0.50:
					stats[str(length)][10] += 1
				if tmp >= 0:
					stats[str(length)][11] += 1
		uniquesTarget = {} # Empty variable to free up RAM memory
		if args.verbose:
			print("\r   ", len(tocheck), "potential oligos passed the minimum presence criteria in the target file", flush=True)
		# Check if the selected oligos are already nested within greater lengths -----------------------
		if len(oligos) > 0 and len(tocheck) > 0:
			if lengthp > length:
				removed = set()
				for checking in tocheck.keys():
					for poligo in oligos:
						if checking in poligo:
							removed.add(checking)
				if len(removed) > 0:
					for remove in removed:
						del tocheck[remove]
					if args.verbose:
						print("    Of which", len(removed), "are nested in previous lengths and therefore removed:", len(tocheck), "oligos passing", flush=True)
		if args.GCcontent is not None:
			removed = set()
			for checking in tocheck.keys():
				content = None
				if len(args.GCcontent) == 1:
					content = GCcheck(checking, GCmin)
				else:
					content = GCcheck(checking, GCmin, GCmax)
				if content is None:
					removed.add(checking)
			if len(removed) > 0:
				for remove in removed:
					del tocheck[remove]
				if args.verbose:
					if len(args.GCcontent) == 1:
						print("    Of which ", len(tocheck), " have a GC content ≥ ", GCmin*100, "%", sep="", flush=True)
					else:
						print("    Of which ", len(tocheck), " have a GC content ≥ ", GCmin*100, "% and ≤ ", GCmax*100, "%", sep="", flush=True)
		# Now go through the selected probes from target file ------------------------------------------
		count = 0
		if len(tocheck) > 0:
			# If selected, break excluding file into kmers ---------------------------------------------
			if args.lowMem is False:
				uniquesExc = kmersBreak(exc, length, frecuency=True, verbose=args.verbose, addinfo="from excluding file")
			if args.verbose:
				i = 0
				pcti = 0
				print("    Checking specificity", sep="", end="", flush=True)
			# Check specificity criteria and export if pass --------------------------------------------
			for poligo, countTarget in tocheck.items():  # Loop through all oligonucleotides that passed the minimum criteria
				if args.verbose:
					i += 1
					pct = round(i/len(tocheck)*100)
					if pct > pcti:
						pcti = pct
						print("\r    Checking specificity\t", pct, "%", sep="", end="", flush=True)
				if args.lowMem is False:
					if poligo in uniquesExc.keys():
						countExc = uniquesExc[poligo]
					else:
						countExc = 0
				else:
					countExc = sum(poligo in i for i in exc.values())
				# Check specificity criteria -----------------------------------------------------------
				if specificityType == "percentage":
					C = countExc/lenExc
				elif specificityType == "absolute":
					C = countExc
				# Export if pass -----------------------------------------------------------------------
				if C <= specificity:
					oligos.add(poligo)
					count += 1
					# Estimate the GC content
					gcs = poligo.upper().count("G") + poligo.upper().count("C")
					length = len(poligo)
					GC = round((gcs) / length, 4)
					# Estimate the theoretical melting temperature
					if length < 14:
						Tm = 2 * (poligo.upper().count("A") + poligo.upper().count("T")) + 4 * (gcs)
					else:
						Tm = 64.9 + 41*(gcs - 16.4) / length
					Tm = round(Tm, 2)
					# Estimate the theoretical formamide concentration based on the melting temperature
					formamide = round((Tm - 35) / 0.7, 1)
					poligo_revCom = Seq(poligo).reverse_complement()
					# Check for mismatches
					if mismatches != 0:
						hitsMismatches = list()
						if flank != 0:
							hitsFlanks = list()
						if center != 0:
							hitsCenter = list()
						for m in mismatches:
							hits = 0
							hitsF = 0
							hitsC = 0
							moligos = getAllMismatches(poligo, m)
							for key, val in moligos.items():
								if args.lowMem is False:
									if key in uniquesExc.keys():
										hitsi = uniquesExc[key]
									else:
										hitsi = 0
								else:
									hitsi = sum(key in j for j in exc.values())
								hits += hitsi
								if flank != 0:
									if flankCheck(poligo, val, bases=flank):
										hitsF += hitsi
								if center != 0:
									if centeredCheck(poligo, val, bases=flank):
										hitsC += hitsi
							hitsMismatches.append(hits)
							if flank != 0:
								hitsFlanks.append(hitsF)
							if center != 0:
								hitsCenter.append(hitsC)
					# And export
					out = "oligoN" + str(len(oligos)) + "\t" + str(poligo) + "\t" + str(poligo_revCom) + "\t" + str(len(poligo)) + "\t" + str(GC) + "\t" + str(Tm) + "\t" + str(formamide) + "\t" + str(countTarget/lenTarget) + "\t" + str(countTarget) + "\t" + str(countExc/lenExc) + "\t" + str(countExc)
					if mismatches != 0:
						for j in range(0, len(hitsMismatches)):
							out = out + "\t" + str(hitsMismatches[j]/lenExc) + "\t" + str(hitsMismatches[j])
							if flank != 0:
								out = out + "\t" + str(hitsFlanks[j])
							if center != 0:
								out = out + "\t" + str(hitsCenter[j])
					print(out, file=logfile, flush=True)
					if args.fasta is not None or args.probes is not None:
						name = "oligoN" + str(len(oligos))
						oligosSeq[name] = str(poligo)
				if args.stats is not None:
					tmp = countExc/lenExc
					if tmp == 0:
						stats[str(length)][12] += 1
					if tmp <= 0.0001:
						stats[str(length)][13] += 1
					if tmp <= 0.001:
						stats[str(length)][14] += 1
					if tmp <= 0.01:
						stats[str(length)][15] += 1
					if tmp <= 0.1:
						stats[str(length)][16] += 1
					if tmp <= 1:
						stats[str(length)][17] += 1
					if tmp <= 2:
						stats[str(length)][18] += 1
					if tmp <= 3:
						stats[str(length)][19] += 1
					if tmp <= 4:
						stats[str(length)][20] += 1
					if tmp <= 5:
						stats[str(length)][21] += 1
					if tmp <= 10:
						stats[str(length)][22] += 1
			if args.verbose:
				print("\r   ", count, "potential oligos passed the specificity criteria in the excluding file", flush=True)
		lengthp = length
		tocheck = {} # Empty variable to free up RAM
		uniquesExc = {} # Empty variable to free up RAM

if len(oligos) == 0:
	import os
	os.remove(output)
	if args.fasta is not None or args.probes is not None:
		print("    Fasta file was not exported")

# Export fasta file if selected ____________________________________________________________________
if args.fasta is not None and len(oligos) > 0:
	if args.verbose:
		print("  Exporting oligonucleotides", flush=True)
	with open(args.fasta, "w", buffering=1) as fasfile:
		for name, seq in oligosSeq.items():
			print(">" + str(name) + "\n" + str(seq), file=fasfile, flush=True)

# Export reverse complement fasta file if selected _________________________________________________
if args.probes is not None and len(oligos) > 0:
	if args.verbose:
		print("  Exporting reverse-complemented oligonucleotides", flush=True)
	with open(args.probes, "w", buffering=1) as revfile:
		for name, seq in oligosSeq.items():
			seqout = Seq(seq).reverse_complement()
			print(">" + str(name) + "_revCom\n" + str(seqout), file=revfile, flush=True)

# Export fasta file if selected ____________________________________________________________________
if args.stats is not None:
	if args.verbose:
		print("  Exporting stats", flush=True)
	with open(args.stats, "w", buffering=1) as statsfile:
		for key, val in stats.items():
			tmp = list()
			for v in val:
				tmp.append(str(v))
			tmp = "\t".join(tmp)
			print(str(key) + "\t" + str(tmp), file=statsfile, flush=True)

# Print concluding information _____________________________________________________________________
if args.verbose:
	t = time.time() - start_time
	seconds = t % (24 * 3600)
	hour = seconds // 3600
	seconds %= 3600
	minutes = seconds // 60
	seconds %= 60
	if hour == 0:
		if minutes == 0:
			seconds, remainder = divmod(seconds, 1)
			millisec = int(remainder * 1000)
			print("  Run time: %02d seconds and %02d milliseconds" % (seconds, millisec), flush=True)
		else:
			print("  Run time: %02d minutes and %02d seconds" % (minutes, seconds), flush=True)
	else:
		print("  Run time: %d hours %02d minutes and %02d seconds" % (hour, minutes, seconds), flush=True)
	c = len(oligos)
	if c == 0:
		print("\033[1m  No oligonucleotides were found with the given parameters...\033[0m", sep="", flush=True)
	if c > 0:
		print("\033[1m  In total", c, "candidate oligonucleotides have been identified\033[0m")
		if args.stats is not None:
			print("  \033[1m", args.stats, "\033[0m: contains a summary of k-mers", sep="", flush=True)
		if args.probes is not None:
			print("  \033[1m", args.probes, "\033[0m: contains the reverse complement of all candidate oligonucleotides", sep="", flush=True)
		if args.fasta is not None:
			print("  \033[1m", args.fasta, "\033[0m: contains all candidate oligonucleotides", sep="", flush=True)
		print("  \033[1m", output, "\033[0m: contains all parameters of the search for each candidate oligonucleotides", sep="", flush=True)
	print("Done")
