#!/usr/bin/env python3

import argparse
from Bio.Seq import Seq
import sys
import time
import itertools
import statistics as st

start_time = time.time()
version='1.0.0'

parser = argparse.ArgumentParser(description="%s v%s: Find all possible specific oligonucleotides of length 'l', that matches at least 'm' sequences in the target file and has a maximum specificity of 's' to the excluding file." % ('%(prog)s', version), add_help=False,
								 epilog="*The basic melting temperature (Tm) is an approximation and should be considered as a baseline for comparison. Briefly, for short oligonucleotides (<14 bp): Tm = 2*(A+T) + 4*(G+C); and for longer oligonucleotides (>13 bp): Tm = 64.9 + 41*(G+C - 16.4) / (A+G+C+T); where A, C, G and T are the number of bases of A, G, C and T respectively.")

# Add the arguments to the parser
requirArgs = parser.add_argument_group('Required arguments')
functiArgs = parser.add_argument_group('Optional arguments related to the search')
outputArgs = parser.add_argument_group('Optional arguments related to the output')
optionArgs = parser.add_argument_group('Other optional arguments')

requirArgs.add_argument("-t", "--target", dest="target", required=True,
						help="A fasta file containing the sequences (5'-3') of the target group.")

requirArgs.add_argument("-e", "--excluding", dest="excluding", required=True,
						help="A fasta file containing the sequence (5'-3') to be excluded.")

functiArgs.add_argument("-l", "--length", dest="length", nargs="+", required=False, action='store', type=str,
						default=['18', '20'],
						help="The length(s) of the oligonucleotides to be searched. Default = %(default)s. A range can be specified with the '-' sign. A decreasing range of lengths will avoid smaller oligonucleotides found within larger oligonucleotides.")

functiArgs.add_argument("-m", "--minimum", dest="minimum", required=False, action='store',
						default="0.8",
						help="The minimum percentage of sequences (or number of sequences) that the oligonucleotide has to appear in the target file. Default = %(default)s. If the value provided is a 'float' [0.0-1.0], it will be interpreted as a percentage. If the value provided is a 'int(eger)' [1-Inf), it will be interpreted as an absolute number.")

functiArgs.add_argument("-s", "--specificity", dest="specificity", required=False, action='store',
						default="0.01",
						help="The maximum percentage of sequences (or number of sequences) that can contain the oligonucleotide in the excluding file. Default = %(default)s. If the value provided is a 'float' [0.0-1.0], it will be interpreted as a percentage. If the value provided is a 'int(eger)' [1-Inf), it will be interpreted as an absolute number of sequences.")

functiArgs.add_argument("-n", "--mismatches", dest="mismatches", nargs="+", required=False, action='store', type=str,
						default=['1', '2'],
						help="The number of mismatches against the excluding file. Default = %(default)s. Please note that at this step it will not look for insertions or deletions. If set to '0', it will not look for mismatches.")

functiArgs.add_argument("-b", "--flank", dest="flank", required=False, action='store', type=str,
						default="0",
						help="The number of leading and trailing (flanks) bases to flag when the given number of mismatches are found within. Default = %(default)s, it will not look for leading nor trailing mismatches. If (e.g.,) 3 is selected along with 2 mismatches, it will count how many hits are occurring when 2 mismatches are present within 3 flanking bases. If the value provided is a 'float' [0.0-1.0], it will be interpreted as a percentage of the given oligo length.")

functiArgs.add_argument("-c", "--center", dest="center", required=False, action='store', type=str,
						default="0",
						help="Similar to '-b/--flank' but centered. Default = %(default)s, it will not look for centered mismatches. If (e.g.,) 10 is selected along with 2 mismatches, it will count how many hits are occurring when 2 mismatches are present in the 10 central bases. If the value provided is a 'float' [0.0-1.0], it will be interpreted as a percentage of the given oligo length.")

functiArgs.add_argument("-C", "--GCcontent", dest="GCcontent", required=False, action='store', nargs="+", type=float,
						default=None,
						help="A minimum and maximum GC content percentage allowed (0-1). If only one provided, it will be interpreted as a minimum.")

outputArgs.add_argument("-o", "--output", dest="output", required=False,
						default=None,
						help="The output file name. By default will replace the extension of the target file with '_oligos.tsv'. The output file is a tab delimited table with the following columns: an arbitrary oligonucleotide identifier, the oligonucleotide sequence, the reverse complement sequence (which will be the reverse primer or the FISH probe), the length of the oligonucleotide, the GC content, the basic melting temperature*, the theoretical formamide concentration at 35ºC (based on the basic melting temperature), the proportion of hits in the target file, the number of hits in the target file, the proportion of hits in the excluding file, the number of hits in the excluding file and four columns for each number of mismatches selected, corresponding to the the proportion of hits with N mismatches, the number of hits with N mismatches and the number of hits with N mismatches in the first or last B bases.")

outputArgs.add_argument("-f", "--fasta", dest="fasta", required=False, action="store",
						default=None,
						help="If selected, will export a fasta file with the selected oligonucleotides to the given output.")

outputArgs.add_argument("-p", "--probes", dest="probes", required=False, action="store",
						default=None,
						help="If selected, will export a fasta file with the reverse complement of the selected oligonucleotides to the given output.")

outputArgs.add_argument("-S", "--stats", dest="stats", required=False, action="store",
						default=None,
						help="If selected, will export a tab delimited table to the given output and lengths of how many oligonucleotides would have passed different minimum and specificity criteria, to help tuning parameters in following runs.")

optionArgs.add_argument("-L", "--lowMem", dest="lowMem", required=False, action="store_true",
						help="If selected, will reduce the RAM usage at the expense of speed (depending on the datasets, from 1.5 up to 5 times slower).")

optionArgs.add_argument("--force", dest="force", required=False, action="store_true",
						help="Only used for debugging unnecessarily large number of mismatches and preventing undesired slow down of the search and large RAM usage.")

optionArgs.add_argument("-v", "--verbose", dest="verbose", required=False, action="store_false",
						help="If selected, will not print information to the console.")

optionArgs.add_argument("-h", "--help", action="help",
						help="Show this help message and exit.")

optionArgs.add_argument("-V", "--version", action='version',
						version='oligoN-design %s v%s' % ('%(prog)s', version),
						help='Show the version number and exit.')

args = parser.parse_args()

# Setting variables and functions __________________________________________________________________
if args.verbose:
	print("  Setting variables...", flush=True)

def getNumersFromArgs(stringList):
	out = list()
	for m in stringList:
		if '-' in m:
			first = int(m.split('-')[0])
			last = int(m.split('-')[1])
			if first < last:
				for m2 in range(first, last+1):
					out.append(m2)
			elif first > last:
				for m2 in range(first, last-1, -1):
					out.append(m2)
		elif " " in m:
			tocut = m.strip().split(" ")
			for m2 in tocut:
				out.append(int(m2))
		else:
			out.append(int(m))
	return  out

def string2numeric(string):
	try:
		return int(string)
	except ValueError:
		try:
			return float(string)
		except ValueError:
			return None

def readFasta(fastafile):
	out = {}
	w = False
	for line in open(fastafile):
		if line.startswith(">"):
			name = line.replace(">", "")
			name = name.replace("\n", "")
			out[name] = str()
		else:
			if "-" in line:
				w = True
			sequence = line.replace("\n", "")
			sequence = sequence.replace("-", "")
			sequence = sequence.upper()
			out[name] = (out[name] + sequence)
	if w:
		print("  Warning! Target file contains gaps, they will be removed before the search", flush=True)
	return out

def kmersBreak(dictionary, k, frecuency=True, verbose=True, addinfo=""):
	# Set variables for printing process
	if verbose:
		print("    Extracting ", k, "-mers", addinfo, end="", sep="", flush=True)
		i = 0
		pcti = 0
	# Get the length of the input file
	lenDict = len(dictionary)
	# Set whether to keep frecuency of k mers or not
	if frecuency:
		kmers = {}
	else:
		kmers = set()
	# Loop through all the sequences in the input file
	for tseq in dictionary.values():
		if verbose:
			i += 1
			pct = round(i/lenDict*100)
			if pct > pcti:
				pcti = pct
				print("\r    Extracting ", k, "-mers ", addinfo, ": ", pct, "%", end="", sep="", flush=True)
		lentseq = len(tseq)
		# Split the given sequence into kmers of length 'k'
		for p in range(0, lentseq-k+1):
			kmer = tseq[int(p):(int(p+k))]
			if frecuency:
				if kmer not in kmers.keys():
					kmers[kmer] = 1
				else:
					kmers[kmer] += 1
			else:
				kmers.add(kmer)
	if verbose:
		print("\r    Extracting ", k, "-mers ", addinfo, ": ", len(kmers), " ", k, "-mers found", sep="", flush=True)
	return kmers

def flatten(nestedList):
	flatList = []
	for item in nestedList:
		if isinstance(item, list):
			flatList.extend(flatten(item))
		else:
			flatList.append(item)
	return flatList

def replaceBase(base):
	bases = ["A", "C", "G", "T"]
	if base in bases:
		bases.remove(base)
	return bases

def replacePosition(oligo, position):
	i = position - 1
	moligo = list(oligo)
	out = list()
	replaced = replaceBase(moligo[i])
	for r in replaced:
		moligo[i] = r
		tmp = "".join(moligo)
		if tmp not in out:
			out.append(tmp)
	return out

def replacePositionRecursive(oligo, positions, index=0):
	if index == len(positions):
		return oligo
	out = list()
	p = positions[index]
	replaced = replacePosition(oligo, p)
	for r in replaced:
		outr = replacePositionRecursive(r, positions, index=index+1)
		out.append(outr)
	return flatten(out)

def getAllMismatches(oligo, mismatches):
	out = {}
	length = len(oligo)
	positions = list(range(1, length+1))
	if mismatches == 1:
		for p in positions:
			tmp = replacePosition(oligo, p)
			for t in tmp:
				if t not in out.keys():
					out[t] = [p]
	else:
		positions = list(itertools.combinations(positions,mismatches))
		for pos in positions:
			tmp = replacePositionRecursive(oligo, pos)
			for t in tmp:
				if t not in out.keys():
					out[t] = list()
					for p in pos:
						out[t].append(p)
	return out

def GCcheck(oligo, minimum, maximum=None):
	out = None
	gcs = oligo.upper().count("G") + oligo.upper().count("C")
	length = len(oligo)
	GC = round((gcs) / length, 4)
	if GC >= minimum:
		out = GC
	if maximum is not None:
		if GC > maximum:
			out = None
	return out

def flankCheck(oligo, positions, bases):
	length = len(oligo)
	if type(bases) == float:
		bases = int(length*bases)
	test = False
	if all(p <= bases for p in positions) or all(p >= length-bases+1 for p in positions):
		test = True
	return test

def centeredCheck(oligo, positions, bases):
	length = len(oligo)
	if type(bases) == float:
		bases = int(length*bases)
	flanks = (length-bases)/2
	test = False
	if all(p > flanks for p in positions) and all(p < length-flanks+1 for p in positions):
		test = True
	return test

# Setting the range of lengths _____________________________________________________________________
lengths = getNumersFromArgs(args.length)

if args.verbose:
	print("    Lengths:    ", *lengths, flush=True)

# Setting the specificities ________________________________________________________________________
minimum = string2numeric(args.minimum)
if type(minimum) is float:
	minimumType = "percentage"
	if args.verbose:
		print("    Minimum:     ", minimum*100, "%", sep="", flush=True)
elif type(minimum) is int:
	minimumType = "absolute"
	if args.verbose:
		print("    Minimum:     ", minimum, " sequences", sep="", flush=True)
else:
	print("Error: argument -m/--minimum needs a numerical value: either [0.0-1.0] or [1-Inf] (i.e., '0.9')")
	sys.exit(1)

specificity = string2numeric(args.specificity)
if type(specificity) is float:
	specificityType = "percentage"
	if args.verbose:
		print("    Specificity: ", specificity*100, "%", sep="", flush=True)
elif type(specificity) is int:
	specificityType = "absolute"
	if args.verbose:
		print("    Specificity: ", specificity, " sequences", sep="", flush=True)
else:
	print("Error: argument -s/--specificity needs a numerical value: either [0.0-1.0] or [1-Inf] (i.e., '0.001')")
	sys.exit(1)

# Setting search paramenters _______________________________________________________________________
mismatches = getNumersFromArgs(args.mismatches)
if len(mismatches) == 1:
	if mismatches[0] == 0:
		mismatches = 0
if args.verbose and mismatches != 0:
	print("    Mismatches: ", *mismatches, flush=True)
if mismatches != 0:
	for i in mismatches:
		if i > 4:
			if args.force:
				print("  \033[91mWarning!\033[0m Setting '", i, "' mismatches at this step will result in a waste of resources...", sep="")
			else:
				print("  \033[91mWarning!\033[0m Setting a number of mismatches unnecessarily large (>4) will considerably slow down the search and increase RAM usage.\n  If you are very sure what you are doing, repeat the search adding '--force'.")
				sys.exit(1)

flank = string2numeric(args.flank)
if args.verbose and mismatches != 0 and flank != 0:
	if type(flank) == int:
		print("      Leading and trailing bases with mismatches: ", flank, flush=True)
	if type(flank) == float:
		if flank > 1:
			print("\033[91mError!\033[0m Please select a flanking length either with absolute numbers or from 0.0 to 1.0")
			sys.exit(1)
		print("      Leading and trailing length with mismatches: ", flank*100, "%", sep="", flush=True)

center = string2numeric(args.center)
if args.verbose and mismatches != 0 and center != 0:
	if type(center) == int:
		print("      Central bases with mismatches: ", center, flush=True)
	if type(center) == float:
		if center > 1:
			print("\033[91mError!\033[0m Please select a centered length either with absolute numbers or from 0.0 to 1.0")
			sys.exit(1)
		print("      Central length with mismatches: ", center*100, "%", sep="", flush=True)

if args.GCcontent is not None:
	if len(args.GCcontent) > 2:
		print("  Warning! More than 2 values have been parsed for GC content. Only the first two will be considered.")
	if len(args.GCcontent) == 1:
		GCmin = args.GCcontent[0]
		print("    Accepted GC content ≥ ", GCmin*100, "%", sep="")
	else:
		GCmin = args.GCcontent[0]
		GCmax = args.GCcontent[1]
		print("    Accepted GC content ≥ ", GCmin*100, "% and ≤ ", GCmax, "%", sep="")

# Reading target file ______________________________________________________________________________
if args.verbose:
	print("  Reading target file:", str(args.target), flush=True)

target = readFasta(args.target)
lenTarget = len(target)

# Check lengths of sequences in target file
seqLengths = list()
for l in target.values():
	seqLengths.append(len(l))

if lenTarget > 1:
	mean = st.mean(seqLengths)
	sd = st.stdev(seqLengths)
	countMax = 0
	countMin = 0
	for l in seqLengths:
		if l > mean+sd:
			countMax += 1
		if l < mean-sd:
			countMin += 1
	if countMax != 0:
		print("\033[91m  Warning!\033[0m Some sequences are very long. Specific regions might be ignored in the search")
	if countMin != 0:
		print("\033[91m  Warning!\033[0m Some sequences are very short. This will artifically bias the 'minimum' criteria")
else:
	print("\033[91m  Warning!\033[0m With only 1 sequence, specificity values might be overestimated")

if args.verbose:
	print("    Sequences in target file:", lenTarget, flush=True)
	if minimumType == "percentage":
		print("    Oligonucleotides will be present in at least", round(lenTarget*minimum), "sequences in target file", flush=True)
	if minimumType == "absolute":
		print("    Oligonucleotides will be present in at least ", round(minimum/lenTarget*100, 2), "% of the sequences in target file", sep="", flush=True)


# Reading excluding file ___________________________________________________________________________
if args.verbose:
	print("  Reading excluding file:", str(args.excluding), flush=True)

exc = readFasta(args.excluding)
lenExc = len(exc)

if args.verbose:
	print("    Sequences in excluding file:", lenExc, flush=True)
	if specificityType == "percentage":
		print("    Oligonucleotides will be present in at most", round(lenExc*specificity), "sequences in excluding file", flush=True)
	if specificityType == "absolute":
		print("    Oligonucleotides will be present in at most ", round(specificity/lenExc*100, 2), "% of the sequences in excluding file", sep="", flush=True)

if args.verbose and args.lowMem:
	print("  Low RAM memory activated, this might take a while longer")

# Setting output file name _________________________________________________________________________
if args.output is None:
	import re
	output = re.sub("\\.[^\\.]+$", "_oligos.tsv", args.target)
else:
	output = args.output

# Start the search _________________________________________________________________________________
oligos = set()
if args.fasta is not None or args.probes is not None:
	oligosSeq = {}
if args.stats is not None:
	stats = {}
	stats["length"] = ["target_100", "target_95", "target_90", "target_85", "target_80", "target_75", "target_70", "target_65", "target_60", "target_55", "target_50", "target_0", "excluding_0", "excluding_0.0001", "excluding_0.001", "excluding_0.01", "excluding_0.1", "excluding_1", "excluding_2", "excluding_3", "excluding_4", "excluding_5", "excluding_10"]
	for l in lengths:
		stats[str(l)] = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
with open(output, "w", buffering=2) as logfile:
	headers="identifier\tsequence\trevCom\tlength\tGC\tTm\tF35t\thitsT_prop\thitsT\thitsE_prop\thitsE"
	if mismatches != 0:
		for m in mismatches:
			headers = headers + "\tmismatch" + str(m) + "_prop\tmismatch" + str(m)
			if flank != 0:
				headers = headers + "\tmismatch" + str(m) + "_flanks" + str(flank)
			if center != 0:
				headers = headers + "\tmismatch" + str(m) + "_central" + str(center)
	print(headers, file=logfile)
	# Loop through the different lengths -----------------------------------------------------------
	for length in lengths:
		if args.verbose:
			print("\r  Searching oligos of", length, "base pairs...")
			i = 0
			pcti = 0
		# Extract kmers from target file -----------------------------------------------------------
		uniquesTarget = kmersBreak(target, length, frecuency=True, verbose=args.verbose, addinfo="from target file")
		# Check minimum criteria -------------------------------------------------------------------
		tocheck = {}
		for poligo, countTarget in uniquesTarget.items():  # Loop through all unique oligonucleotides
			if args.verbose:
				i += 1
				pct = round(i/len(uniquesTarget)*100)
				if pct > pcti:
					pcti = pct
					print("\r    Checking minimum presence\t", pct, "%", sep="", end="", flush=True)
			if minimumType == "percentage":
				R = countTarget/lenTarget
			elif minimumType == "absolute":
				R = countTarget
			if R >= minimum:
				tocheck[poligo] = countTarget
			if args.stats is not None:
				tmp = countTarget/lenTarget
				if tmp == 1:
					stats[str(length)][0] += 1
				if tmp >= 0.95:
					stats[str(length)][1] += 1
				if tmp >= 0.90:
					stats[str(length)][2] += 1
				if tmp >= 0.85:
					stats[str(length)][3] += 1
				if tmp >= 0.80:
					stats[str(length)][4] += 1
				if tmp >= 0.75:
					stats[str(length)][5] += 1
				if tmp >= 0.70:
					stats[str(length)][6] += 1
				if tmp >= 0.65:
					stats[str(length)][7] += 1
				if tmp >= 0.60:
					stats[str(length)][8] += 1
				if tmp >= 0.55:
					stats[str(length)][9] += 1
				if tmp >= 0.50:
					stats[str(length)][10] += 1
				if tmp >= 0:
					stats[str(length)][11] += 1
		uniquesTarget = {} # Empty variable to free up RAM memory
		if args.verbose:
			print("\r   ", len(tocheck), "potential oligos passed the minimum presence criteria in the target file", flush=True)
		# Check if the selected oligos are already nested within greater lengths -----------------------
		if len(oligos) > 0 and len(tocheck) > 0:
			if lengthp > length:
				removed = set()
				for checking in tocheck.keys():
					for poligo in oligos:
						if checking in poligo:
							removed.add(checking)
				if len(removed) > 0:
					for remove in removed:
						del tocheck[remove]
					if args.verbose:
						print("    Of which", len(removed), "are nested in previous lengths and therefore removed:", len(tocheck), "oligos passing", flush=True)
		if args.GCcontent is not None:
			removed = set()
			for checking in tocheck.keys():
				content = None
				if len(args.GCcontent) == 1:
					content = GCcheck(checking, GCmin)
				else:
					content = GCcheck(checking, GCmin, GCmax)
				if content is None:
					removed.add(checking)
			if len(removed) > 0:
				for remove in removed:
					del tocheck[remove]
				if args.verbose:
					if len(args.GCcontent) == 1:
						print("    Of which ", len(tocheck), " have a GC content ≥ ", GCmin*100, "%", sep="", flush=True)
					else:
						print("    Of which ", len(tocheck), " have a GC content ≥ ", GCmin*100, "% and ≤ ", GCmax*100, "%", sep="", flush=True)
		# Now go through the selected probes from target file ------------------------------------------
		count = 0
		if len(tocheck) > 0:
			# If selected, break excluding file into kmers ---------------------------------------------
			if args.lowMem is False:
				uniquesExc = kmersBreak(exc, length, frecuency=True, verbose=args.verbose, addinfo="from excluding file")
			if args.verbose:
				i = 0
				pcti = 0
				print("    Checking specificity", sep="", end="", flush=True)
			# Check specificity criteria and export if pass --------------------------------------------
			for poligo, countTarget in tocheck.items():  # Loop through all oligonucleotides that passed the minimum criteria
				if args.verbose:
					i += 1
					pct = round(i/len(tocheck)*100)
					if pct > pcti:
						pcti = pct
						print("\r    Checking specificity\t", pct, "%", sep="", end="", flush=True)
				if args.lowMem is False:
					if poligo in uniquesExc.keys():
						countExc = uniquesExc[poligo]
					else:
						countExc = 0
				else:
					countExc = sum(poligo in i for i in exc.values())
				# Check specificity criteria -----------------------------------------------------------
				if specificityType == "percentage":
					C = countExc/lenExc
				elif specificityType == "absolute":
					C = countExc
				# Export if pass -----------------------------------------------------------------------
				if C <= specificity:
					oligos.add(poligo)
					count += 1
					# Estimate the GC content
					gcs = poligo.upper().count("G") + poligo.upper().count("C")
					length = len(poligo)
					GC = round((gcs) / length, 4)
					# Estimate the theoretical melting temperature
					if length < 14:
						Tm = 2 * (poligo.upper().count("A") + poligo.upper().count("T")) + 4 * (gcs)
					else:
						Tm = 64.9 + 41*(gcs - 16.4) / length
					Tm = round(Tm, 2)
					# Estimate the theoretical formamide concentration based on the melting temperature
					formamide = round((Tm - 35) / 0.7, 1)
					poligo_revCom = Seq(poligo).reverse_complement()
					# Check for mismatches
					if mismatches != 0:
						hitsMismatches = list()
						if flank != 0:
							hitsFlanks = list()
						if center != 0:
							hitsCenter = list()
						for m in mismatches:
							hits = 0
							hitsF = 0
							hitsC = 0
							moligos = getAllMismatches(poligo, m)
							for key, val in moligos.items():
								if args.lowMem is False:
									if key in uniquesExc.keys():
										hitsi = uniquesExc[key]
									else:
										hitsi = 0
								else:
									hitsi = sum(key in j for j in exc.values())
								hits += hitsi
								if flank != 0:
									if flankCheck(poligo, val, bases=flank):
										hitsF += hitsi
								if center != 0:
									if centeredCheck(poligo, val, bases=flank):
										hitsC += hitsi
							hitsMismatches.append(hits)
							if flank != 0:
								hitsFlanks.append(hitsF)
							if center != 0:
								hitsCenter.append(hitsC)
					# And export
					out = "oligoN" + str(len(oligos)) + "\t" + str(poligo) + "\t" + str(poligo_revCom) + "\t" + str(len(poligo)) + "\t" + str(GC) + "\t" + str(Tm) + "\t" + str(formamide) + "\t" + str(countTarget/lenTarget) + "\t" + str(countTarget) + "\t" + str(countExc/lenExc) + "\t" + str(countExc)
					if mismatches != 0:
						for j in range(0, len(hitsMismatches)):
							out = out + "\t" + str(hitsMismatches[j]/lenExc) + "\t" + str(hitsMismatches[j])
							if flank != 0:
								out = out + "\t" + str(hitsFlanks[j])
							if center != 0:
								out = out + "\t" + str(hitsCenter[j])
					print(out, file=logfile, flush=True)
					if args.fasta is not None or args.probes is not None:
						name = "oligoN" + str(len(oligos))
						oligosSeq[name] = str(poligo)
				if args.stats is not None:
					tmp = countExc/lenExc
					if tmp == 0:
						stats[str(length)][12] += 1
					if tmp <= 0.0001:
						stats[str(length)][13] += 1
					if tmp <= 0.001:
						stats[str(length)][14] += 1
					if tmp <= 0.01:
						stats[str(length)][15] += 1
					if tmp <= 0.1:
						stats[str(length)][16] += 1
					if tmp <= 1:
						stats[str(length)][17] += 1
					if tmp <= 2:
						stats[str(length)][18] += 1
					if tmp <= 3:
						stats[str(length)][19] += 1
					if tmp <= 4:
						stats[str(length)][20] += 1
					if tmp <= 5:
						stats[str(length)][21] += 1
					if tmp <= 10:
						stats[str(length)][22] += 1
			if args.verbose:
				print("\r   ", count, "potential oligos passed the specificity criteria in the excluding file", flush=True)
		lengthp = length
		tocheck = {} # Empty variable to free up RAM
		uniquesExc = {} # Empty variable to free up RAM

if len(oligos) == 0:
	import os
	os.remove(output)
	if args.fasta is not None or args.probes is not None:
		print("    Fasta file was not exported")

# Export fasta file if selected ____________________________________________________________________
if args.fasta is not None and len(oligos) > 0:
	if args.verbose:
		print("  Exporting oligonucleotides", flush=True)
	with open(args.fasta, "w", buffering=1) as fasfile:
		for name, seq in oligosSeq.items():
			print(">" + str(name) + "\n" + str(seq), file=fasfile, flush=True)

# Export reverse complement fasta file if selected _________________________________________________
if args.probes is not None and len(oligos) > 0:
	if args.verbose:
		print("  Exporting reverse-complemented oligonucleotides", flush=True)
	with open(args.probes, "w", buffering=1) as revfile:
		for name, seq in oligosSeq.items():
			seqout = Seq(seq).reverse_complement()
			print(">" + str(name) + "_revCom\n" + str(seqout), file=revfile, flush=True)

# Export fasta file if selected ____________________________________________________________________
if args.stats is not None:
	if args.verbose:
		print("  Exporting stats", flush=True)
	with open(args.stats, "w", buffering=1) as statsfile:
		for key, val in stats.items():
			tmp = list()
			for v in val:
				tmp.append(str(v))
			tmp = "\t".join(tmp)
			print(str(key) + "\t" + str(tmp), file=statsfile, flush=True)

# Print concluding information _____________________________________________________________________
if args.verbose:
	t = time.time() - start_time
	seconds = t % (24 * 3600)
	hour = seconds // 3600
	seconds %= 3600
	minutes = seconds // 60
	seconds %= 60
	if hour == 0:
		if minutes == 0:
			seconds, remainder = divmod(seconds, 1)
			millisec = int(remainder * 1000)
			print("  Run time: %02d seconds and %02d milliseconds" % (seconds, millisec), flush=True)
		else:
			print("  Run time: %02d minutes and %02d seconds" % (minutes, seconds), flush=True)
	else:
		print("  Run time: %d hours %02d minutes and %02d seconds" % (hour, minutes, seconds), flush=True)
	c = len(oligos)
	if c == 0:
		print("\033[1m  No oligonucleotides were found with the given parameters...\033[0m", sep="", flush=True)
	if c > 0:
		print("\033[1m  In total", c, "candidate oligonucleotides have been identified\033[0m")
		if args.stats is not None:
			print("  \033[1m", args.stats, "\033[0m: contains a summary of k-mers", sep="", flush=True)
		if args.probes is not None:
			print("  \033[1m", args.probes, "\033[0m: contains the reverse complement of all candidate oligonucleotides", sep="", flush=True)
		if args.fasta is not None:
			print("  \033[1m", args.fasta, "\033[0m: contains all candidate oligonucleotides", sep="", flush=True)
		print("  \033[1m", output, "\033[0m: contains all parameters of the search for each candidate oligonucleotides", sep="", flush=True)
	print("Done")
