#!/opt/conda/conda-bld/cdskit_1771324122246/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placeh/bin/python

import argparse
import sys

from datetime import datetime

from cdskit.__init__ import __version__

# Main parser
psr = argparse.ArgumentParser(description='A toolkit to handle protein-coding DNA sequences in frame')
subparsers = psr.add_subparsers()

# Parent parser for shared options

p_version = argparse.ArgumentParser(add_help=False)
p_version.add_argument('--version', action='version', version='cdskit version ' + __version__)

p_infile = argparse.ArgumentParser(add_help=False)
p_infile.add_argument('-s', '--seqfile', metavar='PATH', default='-', type=str, required=False, action='store',
                 help='default=%(default)s: Input sequence file. Use "-" for STDIN.')
p_infile.add_argument('-if', '--inseqformat', metavar='STR', default='fasta', type=str, required=False, action='store',
                 help='default=%(default)s: Input sequence format. See Biopython documentation for available options. https://biopython.org/wiki/SeqIO')

p_outfile = argparse.ArgumentParser(add_help=False)
p_outfile.add_argument('-o', '--outfile', metavar='PATH', default='-', type=str, required=False, action='store',
                 help='default=%(default)s: Output sequence file. Use "-" for STDOUT.')
p_outfile.add_argument('-of', '--outseqformat', metavar='STR', default='fasta', type=str, required=False, action='store',
                 help='default=%(default)s: Output sequence format. See Biopython documentation for available options. https://biopython.org/wiki/SeqIO')

p_codon = argparse.ArgumentParser(add_help=False)
p_codon.add_argument('-d', '--codontable', metavar='INT', default=1, type=int, required=False, action='store',
                      help='default=%(default)s: Codon table ID. The standard code is "1". '
                           'See here for details: https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi')

p_gffio = argparse.ArgumentParser(add_help=False)
p_gffio.add_argument('--ingff', metavar='PATH', default=None, type=str, required=False, action='store',
                     help='default=%(default)s: Input gff file.')
p_gffio.add_argument('--outgff', metavar='PATH', default='out.gff', type=str, required=False, action='store',
                     help='default=%(default)s: Output gff file.')



def strtobool(val):
    val = val.lower()
    if val in ("y", "yes", "t", "true", "on", "1"):
        return True
    elif val in ("n", "no", "f", "false", "off", "0"):
        return False
    else:
        raise ValueError(f"invalid truth value {val!r}")

def command_accession2fasta(args):
    from cdskit.accession2fasta import accession2fasta_main
    sys.stderr.write('cdskit accession2fasta: started at {}\n'.format(datetime.now()))
    accession2fasta_main(args)
    sys.stderr.write('cdskit accession2fasta: ended at {}\n'.format(datetime.now()))

help_accession2fasta = 'Retrieving fasta sequences from a list of GenBank accessions. See `cdskit accession2fasta -h`'
p_accession2fasta = subparsers.add_parser('accession2fasta', help=help_accession2fasta, parents=[p_version,p_outfile])
p_accession2fasta.add_argument('--accession_file', metavar='PATH', default='', type=str, required=False, action='store',
                               help='default=%(default)s: PATH to the accession-per-line text file.')
p_accession2fasta.add_argument('--email', metavar='aaa@bbb.com', default='', type=str, required=False, action='store',
                               help='default=%(default)s: Your email address. This is passed to the NCBI\'s E-utilities. '
                                    'For details, see here: https://biopython.org/docs/1.75/api/Bio.Entrez.html')
p_accession2fasta.add_argument('--extract_cds', metavar='yes|no', default='yes', type=strtobool, required=False, action='store',
                               help='default=%(default)s: Whether to extract the CDS feature.')
p_accession2fasta.add_argument('--ncbi_database', metavar='STR', default='nucleotide', type=str, required=False,
                               action='store', choices=['nucleotide', ],
                               help='default=%(default)s: NCBI database to search.')
p_accession2fasta.add_argument('--seqnamefmt', metavar='STR', default='organism_accessions', type=str, required=False, action='store',
                               help='default=%(default)s: Underline-separated list of output sequence name elements. '
                                    'Try --list_seqname_keys to check available values.')
p_accession2fasta.add_argument('--list_seqname_keys', metavar='yes|no', default='no', type=strtobool, required=False, action='store',
                               help='default=%(default)s: Listing the keys (and values) available for --seqnamefmt.')
p_accession2fasta.set_defaults(handler=command_accession2fasta)


def command_aggregate(args):
    from cdskit.aggregate import aggregate_main
    sys.stderr.write('cdskit aggregate: started at {}\n'.format(datetime.now()))
    aggregate_main(args)
    sys.stderr.write('cdskit aggregate: ended at {}\n'.format(datetime.now()))

help_aggregate = 'Extracting the longest sequences combined with a sequence name regex. See `cdskit aggregate -h`'
p_aggregate = subparsers.add_parser('aggregate', help=help_aggregate, parents=[p_version,p_infile,p_outfile])
p_aggregate.add_argument('-m', '--mode', metavar='STR', default='longest', type=str, required=False, action='store',
                         choices=['longest', ],
                         help='default=%(default)s: Criterion to keep a sequence during aggregation.')
p_aggregate.add_argument('-x', '--expression', metavar='REGEX', default='-', type=str, required=False, action='store',
                         nargs='+',
                         help='default=%(default)s: A regular expression to aggregate the sequences. Multiple values can be specified.')
p_aggregate.set_defaults(handler=command_aggregate)


def command_backtrim(args):
    from cdskit.backtrim import backtrim_main
    sys.stderr.write('cdskit backtrim: started at {}\n'.format(datetime.now()))
    backtrim_main(args)
    sys.stderr.write('cdskit backtrim: ended at {}\n'.format(datetime.now()))

help_backtrim = 'Back-translating a trimmed protein alignment. See `cdskit backtrim -h`'
p_backtrim = subparsers.add_parser('backtrim', help=help_backtrim, parents=[p_version,p_infile,p_outfile,p_codon])
p_backtrim.add_argument('-a', '--trimmed_aa_aln', metavar='PATH', default='', type=str, required=True, action='store',
                        help='default=%(default)s: PATH to the trimmed amino acid alignment. '
                             'In addition to this, please specify the untrimmed CDS alignment by --seqfile.')
p_backtrim.set_defaults(handler=command_backtrim)


def command_backalign(args):
    from cdskit.backalign import backalign_main
    sys.stderr.write('cdskit backalign: started at {}\n'.format(datetime.now()))
    backalign_main(args)
    sys.stderr.write('cdskit backalign: ended at {}\n'.format(datetime.now()))

help_backalign = 'Back-aligning CDS based on an amino acid alignment. See `cdskit backalign -h`'
p_backalign = subparsers.add_parser('backalign', help=help_backalign, parents=[p_version,p_infile,p_outfile,p_codon])
p_backalign.add_argument('-a', '--aa_aln', metavar='PATH', default='', type=str, required=True, action='store',
                         help='default=%(default)s: PATH to aligned amino acid sequences. '
                              'In addition to this, please specify unaligned CDS by --seqfile.')
p_backalign.set_defaults(handler=command_backalign)


def command_gapjust(args):
    from cdskit.gapjust import gapjust_main
    sys.stderr.write('cdskit gapjust: started at {}\n'.format(datetime.now()))
    gapjust_main(args)
    sys.stderr.write('cdskit gapjust: ended at {}\n'.format(datetime.now()))

help_gapjust = 'Adjusting consecutive Ns to the fixed length. See `cdskit gapjust -h`'
p_gapjust = subparsers.add_parser('gapjust', help=help_gapjust, parents=[p_version,p_infile,p_outfile,p_gffio,])
p_gapjust.add_argument('--gap_len', metavar='INT', default=100, type=int, required=False, action='store',
                        help='default=%(default)s: Gap length. Ns will be added or removed to make the gap length fixed.')
p_gapjust.add_argument('--gap_just_min', metavar='INT', default=None, type=int, required=False, action='store',
                        help='default=%(default)s: Minimum gap length to be adjusted. Ns will be extended if the gap length is equal to or greater than this value.')
p_gapjust.add_argument('--gap_just_max', metavar='INT', default=None, type=int, required=False, action='store',
                        help='default=%(default)s: Maximum gap length to be adjusted. Ns will be shortened if the gap length is equal to or smaller than this value.')
p_gapjust.set_defaults(handler=command_gapjust)


def command_hammer(args):
    from cdskit.hammer import hammer_main
    sys.stderr.write('cdskit hammer: started at {}\n'.format(datetime.now()))
    hammer_main(args)
    sys.stderr.write('cdskit hammer: ended at {}\n'.format(datetime.now()))

help_hammer = 'Removing less-occupied codon columns from a gappy alignment. See `cdskit hammer -h`'
p_hammer = subparsers.add_parser('hammer', help=help_hammer, parents=[p_version,p_infile,p_outfile,p_codon])
p_hammer.add_argument('--nail', metavar='INT/all', default='4', type=str, required=False, action='store',
                      help='default=%(default)s: Threshold number of "nail sequences" to hammer down. '
                           'Codon columns are removed if there are no more than this number of non-missing sequences. '
                           '"all" generates a completely no-gap output.')
p_hammer.add_argument('--prevent_gap_only', metavar='yes|no', default='yes', type=strtobool, required=False, action='store',
                       help='default=%(default)s: Whether to relax (decrease) --nail when a gap-only sequence is generated.')
p_hammer.set_defaults(handler=command_hammer)

def command_intersection(args):
    from cdskit.intersection import intersection_main
    sys.stderr.write('cdskit intersection: started at {}\n'.format(datetime.now()))
    intersection_main(args)
    sys.stderr.write('cdskit intersection: ended at {}\n'.format(datetime.now()))

help_intersection = 'Dropping non-overlapping sequence labels between two sequences files or between a sequence file and a gff file. See `cdskit intersection -h`'
p_intersection = subparsers.add_parser('intersection', help=help_intersection, parents=[p_version,p_infile,p_outfile,p_gffio])
p_intersection.add_argument('--seqfile2', metavar='PATH', default=None, type=str, required=False, action='store',
                            help='default=%(default)s: Input sequence file 2.')
p_intersection.add_argument('--inseqformat2', metavar='STR', default='fasta', type=str, required=False, action='store',
                            help='default=%(default)s: Input sequence format for --seqfile2.')
p_intersection.add_argument('--outfile2', metavar='PATH', default='seqfile2.out', type=str, required=False, action='store',
                            help='default=%(default)s: Output sequence file 2.')
p_intersection.add_argument('--outseqformat2', metavar='STR', default='fasta', type=str, required=False,
                            action='store',
                            help='default=%(default)s: Output sequence format for --outfile2.')
p_intersection.add_argument('--fix_outrange_gff_records', metavar='yes|no', default='yes', type=strtobool, required=False, action='store',
                            help='default=%(default)s: Fix gff records that have coordinates out of the sequence range.')
p_intersection.set_defaults(handler=command_intersection)


def command_label(args):
    from cdskit.label import label_main
    sys.stderr.write('cdskit label: started at {}\n'.format(datetime.now()))
    label_main(args)
    sys.stderr.write('cdskit label: ended at {}\n'.format(datetime.now()))

help_label = 'Modifying sequence labels. See `cdskit label -h`'
p_label = subparsers.add_parser('label', help=help_label, parents=[p_version,p_infile,p_outfile])
p_label.add_argument('--replace_chars', metavar='FROM1FROM2...--TO', default='', type=str, required=False, action='store',
                     help='default=%(default)s: Replace sequence label characters. For example, "!@#$%%^&*+=/?<>|--_" replaces various characters with underbar ("_").')
p_label.add_argument('--clip_len', metavar='INT', default=0, type=int, required=False, action='store',
                     help='default=%(default)s: Maximum length of sequence labels. Longer labels are truncated.')
p_label.add_argument('--unique', metavar='yes|no', default='no', type=strtobool, required=False, action='store',
                     help='default=%(default)s: Make sequence labels unique by adding suffix (_1, _2, ...).')
p_label.set_defaults(handler=command_label)


def command_mask(args):
    from cdskit.mask import mask_main
    sys.stderr.write('cdskit mask: started at {}\n'.format(datetime.now()))
    mask_main(args)
    sys.stderr.write('cdskit mask: ended at {}\n'.format(datetime.now()))

help_mask = 'Masking ambiguous and/or stop codons. See `cdskit mask -h`'
p_mask = subparsers.add_parser('mask', help=help_mask, parents=[p_version,p_infile,p_outfile,p_codon])
p_mask.add_argument('-c', '--maskchar', metavar='CHAR', default='N', type=str, required=False, action='store',
                    choices=['N', '-'],
                    help='default=%(default)s: A character to be used to mask codons.')
p_mask.add_argument('-a', '--ambiguouscodon', metavar='yes|no', default='yes', type=str, required=False, action='store',
                    choices=['yes', 'no'],
                    help='default=%(default)s: Mask ambiguous codons. '
                         'e.g., "AAN", which may code Asn or Lys in the standard genetic code.')
p_mask.add_argument('-t', '--stopcodon', metavar='yes|no', default='yes', type=str, required=False, action='store',
                    choices=['yes', 'no'],
                    help='default=%(default)s: Mask stop codons.')
p_mask.set_defaults(handler=command_mask)


def command_pad(args):
    from cdskit.pad import pad_main
    sys.stderr.write('cdskit pad: started at {}\n'.format(datetime.now()))
    pad_main(args)
    sys.stderr.write('cdskit pad: ended at {}\n'.format(datetime.now()))

help_pad = 'Making nucleotide sequences in-frame by head and tail paddings. See `cdskit pad -h`'
p_pad = subparsers.add_parser('pad', help=help_pad, parents=[p_version,p_infile,p_outfile,p_codon])
p_pad.add_argument('-c', '--padchar', metavar='CHAR', default='N', type=str, required=False, action='store',
                   choices=['N', '-'],
                   help='default=%(default)s: A character to be used to pad when the sequence length is not multiple of three.')
p_pad.add_argument('-n', '--nopseudo', default=False, required=False, action='store_true',
                   help='default=%(default)s: Drop sequences that contain stop codon(s) even after padding to 5\'- or 3\'- terminal.')
p_pad.set_defaults(handler=command_pad)


def command_parsegb(args):
    from cdskit.parsegb import parsegb_main
    sys.stderr.write('cdskit parsegb: started at {}\n'.format(datetime.now()))
    parsegb_main(args)
    sys.stderr.write('cdskit parsegb: ended at {}\n'.format(datetime.now()))

help_parsegb = 'Converting the GenBank format. See `cdskit parsegb -h`'
p_parsegb = subparsers.add_parser('parsegb', help=help_parsegb, parents=[p_version,p_infile,p_outfile])
p_parsegb.add_argument('--seqnamefmt', metavar='STR', default='organism_accessions', type=str, required=False, action='store',
                       help='default=%(default)s: Underline-separated list of sequence name elements. '
                            'Use --list_seqname_keys to browse available values.')
p_parsegb.add_argument('--list_seqname_keys', metavar='yes|no', default='no', type=strtobool, required=False, action='store',
                       help='default=%(default)s: Listing the keys (and values) available for --seqnamefmt.')
p_parsegb.add_argument('--extract_cds', metavar='yes|no', default='yes', type=strtobool, required=False, action='store',
                       help='default=%(default)s: Whether to extract the CDS feature.')
p_parsegb.set_defaults(handler=command_parsegb)


def command_printseq(args):
    from cdskit.printseq import printseq_main
    sys.stderr.write('cdskit printseq: started at {}\n'.format(datetime.now()))
    printseq_main(args)
    sys.stderr.write('cdskit printseq: ended at {}\n'.format(datetime.now()))

help_printseq = 'Print a subset of sequences with a regex. See `cdskit printseq -h`'
p_printseq = subparsers.add_parser('printseq', help=help_printseq, parents=[p_version,p_infile])
p_printseq.add_argument('-n', '--seqname', default='', type=str, required=False, action='store',
                        help='default=%(default)s: Name of the sequence to print. Regex is supported.')
p_printseq.add_argument('--show_seqname', metavar='yes|no', default='yes', type=strtobool, required=False, action='store',
                        help='default=%(default)s: Whether to show sequence name starting with ">". '
                             '"no" prints sequences only.')
p_printseq.set_defaults(handler=command_printseq)

def command_rmseq(args):
    from cdskit.rmseq import rmseq_main
    sys.stderr.write('cdskit rmseq: started at {}\n'.format(datetime.now()))
    rmseq_main(args)
    sys.stderr.write('cdskit rmseq: ended at {}\n'.format(datetime.now()))

help_rmseq = 'Removing a subset of sequences by using a sequence name regex and by detecting problematic sequence characters. See `cdskit rmseq -h`'
p_rmseq = subparsers.add_parser('rmseq', help=help_rmseq, parents=[p_version,p_infile,p_outfile])
p_rmseq.add_argument('--seqname', default='', type=str, required=False, action='store',
                        help='default=%(default)s: Names of sequences to remove. Regex is supported.')
p_rmseq.add_argument('--problematic_char', default='NX-?', type=str, required=False, action='store',
                        help='default=%(default)s: Problematic characters considered by --problematic_percent. Without separator.')
p_rmseq.add_argument('--problematic_percent', default=0, type=float, required=False, action='store',
                        help='default=%(default)s: Sequences containing >= this percentage of --problematic_char are removed.')
p_rmseq.set_defaults(handler=command_rmseq)

def command_split(args):
    from cdskit.split import split_main
    sys.stderr.write('cdskit split: started at {}\n'.format(datetime.now()))
    split_main(args)
    sys.stderr.write('cdskit split: ended at {}\n'.format(datetime.now()))

help_split = 'Splitting 1st, 2nd, and 3rd codon positions. See `cdskit split -h`'
p_split = subparsers.add_parser('split', help=help_split, parents=[p_version,p_infile,p_outfile])
p_split.add_argument('--prefix', default='INFILE', type=str, required=False, action='store',
                        help='default=%(default)s: Output prefix PATH. '
                             'If this is INFILE and --outfile is set, --outfile is used as the prefix.')
p_split.set_defaults(handler=command_split)

def command_stats(args):
    from cdskit.stats import stats_main
    sys.stderr.write('cdskit stats: started at {}\n'.format(datetime.now()))
    stats_main(args)
    sys.stderr.write('cdskit stats: ended at {}\n'.format(datetime.now()))

help_stats = 'Printing sequence statistics. See `cdskit stats -h`'
p_stats = subparsers.add_parser('stats', help=help_stats, parents=[p_version,p_infile])
p_stats.set_defaults(handler=command_stats)


# Handler
args = psr.parse_args()
if hasattr(args, 'handler'):
    args.handler(args)
else:
    psr.print_help()
