#!/bin/bash

version='1.1.0'
name=$(basename "$0")

usage()
{
    echo ""
    echo "$name v$version: Test oligonucleotides against a excluding file allowing mismatches. Note this script will also count the exact hit."
    echo ""
    echo "Usage: testOligo -p OLIGOS -e EXCLUDING [-o OUTPUT] [-m 'MISMATCH']"
    echo ""
    echo "Required arguments:"
    echo "  -p    Oligonucleotides fasta file."
    echo "  -e    Excluding fasta file."
    echo ""
    echo "Optional arguments:"
    echo "  -o    Output log file. By default, will replace the extension of the oligonucleotides file by '_tested.tsv'."
    echo "  -m    The number of mismatches (in between quotes). Default='1 2'."
    echo "  -h    Show this help message and exit."
    echo "  -v    If selected, will not print information to the console."
    echo "  -V    Print version information and exit."
    echo ""
    echo "This script is a wrapper using 'agrep'."
}

MISMATCHESIN="1 2"
START_TIME=`date +%s`
verbose="True"
OUT="NONE"

while getopts "hvVp:e:m:o:" opt; do
        case ${opt} in
                h )
                        usage
                        exit
                        ;;
                v )
                        verbose="False"
                        ;;
                V )
                        echo "oligoN-design $name v$version"
                        exit
                        ;;
                p )
                        OLIGOS=$OPTARG
                        ;;
                e )
                        EXCLUDING=$OPTARG
                        ;;
                m )
                        MISMATCHESIN=$OPTARG
                        ;;
                o )
                        OUT=$OPTARG
                        ;;
        esac
done

if [ -z "${OLIGOS}" ]; then
        echo "  \033[91mError!\033[0m Please provide an oligonucleotides file through '-o'"
        exit
fi
if [ -z "${EXCLUDING}" ]; then
        echo "  \033[91mError!\033[0m Please provide a excluding file through '-r'"
        exit
fi

if [[ $OUT == "NONE" ]]; then
	filename="${OLIGOS%.*}"
	OUT="${filename}_tested.tsv"
fi

OLIGOSSEQS=$(grep -c "^>" ${OLIGOS})
if [[ $verbose == "True" ]]; then
        echo "  Oligonucleotides file: ${OLIGOS}"
        echo "    Number of sequences: ${OLIGOSSEQS}"
fi

EXCSEQS=$(grep -c "^>" ${EXCLUDING})
if [[ $verbose == "True" ]]; then
        echo "  Excluding file:        ${EXCLUDING}"
        echo "    Number of sequences: ${EXCSEQS}"
fi
LINES=$(wc -l < ${EXCLUDING})
if [ $LINES -gt $(( ${EXCSEQS}*2 )) ]; then
        echo "    Warning! Sequences in '${EXCLUDING}' might be in different lines"
        tmp=$(mktemp --tmpdir=$(pwd))
        echo -n "      Editing file..."
        i=0
        while read LINE; do
                if echo $LINE | grep -q ">"; then
                        ((i=i+1))
                        if [[ $verbose == "True" ]]; then
                                echo -n -e "\r      Editing file...  $(( ${i}*100/${EXCSEQS} ))%"
                        fi
                        if (( $i == 1 )); then
                        echo "$LINE" > ${tmp}
                        else
                        echo -e "\n$LINE" >> ${tmp}
                        fi
                else
                        echo -n "$LINE" >> ${tmp}
                fi
        done < "${EXCLUDING}"
        mv ${tmp} ${EXCLUDING}
        rm -f ${tmp}
        echo ""
fi

MISMATCHESOUT=""
MISMATCHES=""
for i in ${MISMATCHESIN[@]}; do
        if [[ $i == *-* ]]; then
                min=$(echo $i | cut -f 1 -d '-')
                max=$(echo $i | cut -f 2 -d '-')
                for m in $(seq $min $max); do
                        MISMATCHES+=$m
                        MISMATCHES+=" "
                        MISMATCHESOUT+="\tmismatch${m}_indel_prop\tmismatch${m}_indel"
                done
        else
                MISMATCHES+=$i
                MISMATCHES+=" "
                MISMATCHESOUT+="\tmismatch${i}_indel_prop\tmismatch${i}_indel"
        fi
done
if [[ $verbose == "True" ]]; then
        echo "  Mismatches:            $MISMATCHES"
fi
for i in ${MISMATCHES[@]}; do
        if [ $i -ge 9 ] ; then
                echo "  \033[91mError!\033[0m Maximum number of mismatches allowed is 8. Please use 'testThorough' instead."
                exit
        fi
done

echo -e "identifier\tsequence${MISMATCHESOUT}" > $OUT
if [[ $verbose == "True" ]]; then
        echo -n "  Searching..."
fi
i=0
while read LINE; do
        EXPORT="FALSE"
        if echo ${LINE} | grep -q "^>"; then
                NAME=${LINE/>/}
                if [[ $verbose == "True" ]]; then
                        ((i=i+1))
                        echo -n -e "\r  Searching...           $(( ${i}*100/${OLIGOSSEQS} ))%"
                fi
        else
                MISMATCHESOUT=""
                for j in ${MISMATCHES[@]}; do
                        ABS=$(agrep -c -${j} "${LINE}" ${EXCLUDING})
                        PROP="0$(echo "scale=6; ${ABS}/${EXCSEQS}" | bc)"
                        MISMATCHESOUT+="\t${PROP}\t${ABS}"
                done
                EXPORT="TRUE"
        fi
        if [ "$EXPORT" == "TRUE" ]; then
                echo -e "${NAME}\t${LINE}${MISMATCHESOUT}" >> $OUT
        fi
done < "${OLIGOS}"

if [[ $verbose == "True" ]]; then
        echo ""
        END_TIME=`date +%s`
        RUN=`expr $END_TIME - $START_TIME`
        HOURS=$(date -d@$RUN -u +%H)
        MIN=$(date -d@$RUN -u +%M)
        SEC=$(date -d@$RUN -u +%S)
        if [ "$HOURS" -eq "0" ]; then
                if [ "$MIN" -eq "0" ]; then
                        echo "  Run time: $SEC seconds"
                else
                        echo "  Run time: $MIN minutes and $SEC seconds"
                fi
        else
                echo "  Run time: $HOURS hours $MIN minutes and $SEC seconds"
        fi
        echo -e "  \033[1m$OUT\033[0m: contains all tested parameters"
        echo "Done"
fi
