#!/opt/conda/conda-bld/rbpbench_1769440384667/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_plac/bin/python

import argparse
import base64
import os
import re
import shutil
import statistics
import numpy as np
import sys
from itertools import combinations

from pandas import DataFrame, read_csv
from rbpbench import benchlib
from scipy.stats import fisher_exact
from scipy.stats import mannwhitneyu
from scipy.stats import false_discovery_control  # Benjamini-Hochberg correction.


__version__ = "1.1.0"

################################################################################

def setup_argument_parser():
    """Setup argparse parser."""
    # Tool description text.
    help_description = """

    Evaluate CLIP-seq and other genomic region data using a comprehensive collection of known RBP binding motifs (RNA sequence + structure).
    RBPBench can be used for a variety of purposes, from RBP motif search in genomic regions, over motif enrichment and co-occurrence analysis, 
    in-depth comparisons over multiple datasets via sequence and genomic annotation statistics, to benchmarking CLIP-seq peak callers, as well as 
    comparisons across cell types and CLIP-seq protocols.

    """

    # Define argument parser.
    p = argparse.ArgumentParser(#add_help=False,
                                prog="rbpbench",
                                description=help_description)

    # Tool version.
    p.add_argument("-v", "--version", action="version",
                   version="rbpbench v" + __version__)

    # Add subparsers.
    subparsers = p.add_subparsers(help='Program modes')

    """
    Search motifs mode.

    """
    p_s = subparsers.add_parser('search',
                                help='Search motifs in genomic sites')
    p_s.set_defaults(which='search')
    # Add required arguments group.
    p_sm = p_s.add_argument_group("required arguments")
    # Required arguments for search.
    p_sm.add_argument("--in",
                   dest="in_sites",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Genomic RBP binding sites (peak regions) file in BED format")
    p_sm.add_argument("--rbps",
                   dest="list_rbps",
                   type=str,
                   metavar='str',
                   nargs='+',
                   required=True,
                   help="List of RBP names to define RBP motifs used for search (--rbps rbp1 rbp2 .. ). To search with all available motifs, use --rbps ALL. NOTE: to search with user-provided motifs, set --rbps USER and provide --user-meme-xml and/or --user-cm. To search only for --regex, set --rbps REGEX")
    p_sm.add_argument("--genome",
                   dest="in_genome",
                   type=str,
                   metavar='str',
                   required = True,
                   help = "Genomic sequences file (currently supported formats: FASTA)")
    p_sm.add_argument("--out",
                   dest="out_folder",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Results output folder")
    # Optional arguments for search.
    p_s.add_argument("--data-id",
                   dest="data_id",
                   type=str,
                   metavar='str',
                   default = "data_id",
                   help="Dataset ID to describe dataset, e.g. --data-id PUM2_eCLIP_K562, used in output tables and for generating the comparison reports (rbpbench compare)")
    p_s.add_argument("--method-id",
                   dest="method_id",
                   type=str,
                   metavar='str',
                   default = "method_id",
                   help="Method ID to describe peak calling method, e.g. --method-id clipper_idr, used in output tables and for generating the comparison reports (rbpbench compare)")
    p_s.add_argument("--run-id",
                   dest="run_id",
                   type=str,
                   metavar='str',
                   default = "run_id",
                   help="Run ID to describe rbpbench search job, e.g. --run-id RBP1_eCLIP_tool1, used in output tables and reports")
    p_s.add_argument("--user-rbp-id",
                   dest="user_rbp_id",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide RBP ID belonging to provided sequence or structure motif(s) (mandatory for --rbps USER)")
    p_s.add_argument("--user-meme-xml",
                   dest="user_meme_xml",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide MEME/DREME XML file containing sequence motif(s) to be used for the run (needs --rbps USER)")
    p_s.add_argument("--user-cm",
                   dest="user_cm",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide covariance model (.cm) file containing covariance model(s) to be used for the run (needs --rbps USER)")
    p_s.add_argument("--ext",
                   dest="ext_up_down",
                   type=str,
                   metavar='str',
                   default="0",
                   help="Up- and downstream extension of --in sites in nucleotides (nt). Set e.g. --ext 30 for 30 nt on both sides, or --ext 20,10 for different up- and downstream extension (default: 0)")
    p_s.add_argument("--motif-db",
                   dest="motif_db",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Built-in motif database to use. 1: human RBP motifs (257 RBPs, 599 motifs, \"catrapid_omics_v2.1_human_6plus_ext\"), 2: human RBP motifs + 23 ucRBP motifs (277 RBPs, 622 motifs, \"catrapid_omics_v2.1_human_6plus_ext_ucrbps\"), 3: human RBP motifs from Ray et al 2013 (80 RBPs, 102 motifs, \"ray2013_human_rbps_rnacompete\") (default: 1)")
    p_s.add_argument("--custom-db",
                   dest="custom_db",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database folder. Alternatively, provide single files via --custom-db-meme-xml, --custom-db-cm, --custom-db-info")
    p_s.add_argument("--custom-db-id",
                   dest="custom_db_id",
                   type=str,
                   metavar='str',
                   default = "custom",
                   help = "Set ID/name for provided custom motif database via --custom-db  (default: \"custom\")")
    p_s.add_argument("--custom-db-meme-xml",
                   dest="custom_db_meme_xml",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database MEME/DREME XML file containing sequence motifs")
    p_s.add_argument("--custom-db-cm",
                   dest="custom_db_cm",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database covariance model (.cm) file containing covariance model(s)")
    p_s.add_argument("--custom-db-info",
                   dest="custom_db_info",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database info table file containing RBP ID -> motif ID -> motif type assignments")
    p_s.add_argument("--motifs",
                   dest="motifs_list",
                   type=str,
                   metavar='str',
                   nargs='+',
                   default=False,
                   help="Provide IDs for motifs of interest (need to be in database and loaded). All other RBP motifs will be discarded (except --regex)")
    p_s.add_argument("--motif-min-len",
                   dest="motif_min_len",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Minimum MEME/DREME motif length to include in search. By default all selected RBP motifs are used")
    p_s.add_argument("--motif-max-len",
                   dest="motif_max_len",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Maximum MEME/DREME motif length to include in search. By default all selected RBP motifs are used")
    p_s.add_argument("--functions",
                   dest="rbp_functions",
                   type=str,
                   metavar='str',
                   nargs='+',
                   default=False,
                   help="Filter defined RBPs (via --rbps) by their molecular functions (annotations available for most database RBPs). E.g. for RBPs in splicing regulation, set --functions SR, for RBPs in RNA stability & decay plus translation regulation, set --functions RSD TR (see rbpbench info for full function descriptions). NOTE that --regex is not affected by filtering")
    p_s.add_argument("--regex",
                   dest="regex",
                   type=str,
                   metavar='str',
                   default = False,
                   help="Define regular expression (regex) DNA motif to include in search, e.g. --regex AAACC, --regex 'C[ACGT]AC[AC]', .. IUPAC code is also supported, e.g. AAARN resolves to AAA[AG][ACGT]. Alternatively, supply structure pattern, e.g. AA((((ARA))))AA or CC(((A...R)))CC with variable spacer")
    p_s.add_argument("--regex-id",
                   dest="regex_id",
                   type=str,
                   metavar='str',
                   default = "regex",
                   help="Set regex ID used as RBP ID and database ID associated to -regex hits (default: \"regex\")")
    p_s.add_argument("--motif-regex-id",
                   dest="motif_regex_id",
                   default = False,
                   action = "store_true",
                   help="Use --regex-id for motif ID as well. By default, regular expression string is used as motif ID for regex motif hits")
    p_s.add_argument("--regex-type",
                   dest="regex_type",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Set type of supplied --regex string 1: auto-detect type (standard regex or structure pattern). 2: given --regex string is standard regex, e.g. AC[AG]T. 3: given --regex string is structure pattern string, e.g. ((AA(((...)))AA)) (default: 1)")
    p_s.add_argument("--regex-search-mode",
                   dest="regex_search_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Define regex search mode. 1: when motif hit encountered, continue +1 after motif hit start position, 2: when motif hit encountered, continue +1 after motif hit end position. NOTE that structure pattern regex currently always uses mode 1 (default: 1)")
    p_s.add_argument("--regex-min-gc",
                    dest="regex_min_gc",
                    type=float,
                    metavar='float',
                    default=0.0,
                    help="Minimum GC base pair fraction to report structure pattern regex hits (default: 0.0)")
    p_s.add_argument("--regex-max-gu",
                    dest="regex_max_gu",
                    type=float,
                    metavar='float',
                    default=1.0,
                    help="Maximum GU (GT) base pair fraction to report structure pattern regex hits (default: 1.0)")
    p_s.add_argument("--regex-spacer-min",
                   dest="regex_spacer_min",
                   type=int,
                   metavar='int',
                   default=5,
                   help="Minimum spacer length for structure pattern regex search (default: 5)")
    p_s.add_argument("--regex-spacer-max",
                   dest="regex_spacer_max",
                   type=int,
                   metavar='int',
                   default=200,
                   help="Maximum spacer length for structure pattern regex search (default: 200)")
    p_s.add_argument("--fimo-ntf-file",
                   dest="fimo_user_ntf_file",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide FIMO nucleotide frequencies (FIMO option: --bfile) file (default: use internal frequencies file, define which with --fimo-ntf-mode)")
    p_s.add_argument("--fimo-ntf-mode",
                   dest="fimo_ntf_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Set which internal nucleotide frequencies to use for FIMO search. 1: use frequencies from human ENSEMBL transcripts (excluding introns, A most prominent) 2: use frequencies from human ENSEMBL transcripts (including introns, resulting in lower G+C and T most prominent) 3: use uniform frequencies (same for every nucleotide) (default: 1)")
    p_s.add_argument("--fimo-pval",
                   dest="fimo_pval",
                   type=float,
                   metavar='float',
                   default=0.0005,
                   help="FIMO p-value threshold (FIMO option: --thresh) (default: 0.0005)")
    p_s.add_argument("--cmsearch-bs",
                   dest="cmsearch_bs",
                   type=float,
                   metavar='float',
                   default=1.0,
                   help="CMSEARCH bit score threshold (CMSEARCH options: -T --incT). The higher the more strict (default: 1.0)")
    p_s.add_argument("--cmsearch-mode",
                   dest="cmsearch_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Set CMSEARCH mode to control strictness of filtering. 1: default setting (CMSEARCH option: --default). 2: max setting (CMSEARCH option: --max), i.e., turn all heuristic filters off, slower and more sensitive / more hits) (default: 1)")
    p_s.add_argument("--greatest-hits",
                   dest="greatest_hits",
                   default = False,
                   action = "store_true",
                   help = "Keep only best FIMO/CMSEARCH motif hits (i.e., hit with lowest p-value / highest bit score for each motif sequence/site combination). By default, report all hits (default: False)")
    p_s.add_argument("--bed-score-col",
                   dest="bed_score_col",
                   type=int,
                   metavar='int',
                   default=5,
                   help="--in BED score column used for p-value calculations. BED score can be e.g. log2 fold change or -log10 p-value of the region (default: 5)")
    p_s.add_argument("--bed-sc-thr",
                   dest="bed_sc_thr",
                   type = float,
                   metavar='float',
                   default = None,
                   help = "Minimum site score (by default: --in BED column 5, or set via --bed-score-col) for filtering (assuming higher score == better site) (default: None)")
    p_s.add_argument("--bed-sc-thr-rev",
                   dest="bed_sc_thr_rev_filter",
                   default = False,
                   action = "store_true",
                   help = "Reverse --bed-sc-thr filtering (i.e. the lower the better, e.g. if score column contains p-values) (default: False)")
    p_s.add_argument("--wrs-mode",
                   dest="wrs_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Defines Wilcoxon rank-sum test alternative hypothesis for testing whether motif-containing regions have significantly different scores. 1: test for higher (greater) scores, 2: test for lower (less) scores (default: 1)")
    p_s.add_argument("--fisher-mode",
                   dest="fisher_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Defines Fisher exact test alternative hypothesis for testing co-occurrences of RBP motifs. 1: greater, 2: two-sided, 3: less (default: 1)")
    p_s.add_argument("--unstranded",
                   dest="unstranded",
                   default = False,
                   action = "store_true",
                   help = "Set if --in BED regions are NOT strand-specific, i.e., to look for motifs on both strands of the provided regions. Note that the two strands of a region will still be counted as one region (change with --unstranded-ct) (default: False)")
    p_s.add_argument("--unstranded-ct",
                   dest="unstranded_ct",
                   default = False,
                   action = "store_true",
                   help = "Count each --in region twice for RBP hit statistics when --unstranded is enabled. By default, two strands of one region are counted as one region for RBP hit statistics")
    p_s.add_argument("--plot-motifs",
                   dest="plot_motifs",
                   default = False,
                   action = "store_true",
                   help = "Visualize selected sequence motifs, by outputting sequence logos and motif hit statistics into a separate .html file (default: False)")
    p_s.add_argument("--top-n-matched",
                   dest="top_n_matched",
                   type=int,
                   metavar='int',
                   default=10,
                   help="Set top n matched sequences to be displayed in motif hit statistics HTML report (create via --plot-motifs) (default: 10)")
    # p_s.add_argument("--add-motif-annot",
    #                dest="add_motif_annot",
    #                default = False,
    #                action = "store_true",
    #                help = "Add motif hit region annotations to --plot-motifs HTML file. This requires --gtf set (default: False)")
    p_s.add_argument("--cooc-pval-thr",
                   dest="cooc_pval_thr",
                   type=float,
                   metavar='float',
                   default=0.005,
                   help="RBP co-occurrence p-value threshold for reporting significant co-occurrences. NOTE that if --cooc-pval-mode Bonferroni is selected, this threshold gets further adjusted by Bonferroni correction (i.e. divided by number of tests). Threshold applies unchanged for BH corrected p-values as well as for disabled correction (default: 0.005)")
    p_s.add_argument("--cooc-pval-mode",
                   dest="cooc_pval_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Defines multiple testing correction mode for co-occurrence p-values. 1: Benjamini-Hochberg (BH), 2: Bonferroni, 3: no correction (default: 1)")
    p_s.add_argument("--min-motif-dist",
                   dest="min_motif_dist",
                   type=int,
                   metavar='int',
                   default=0,
                   help="Set minimum mean motif distance for an RBP pair to be reported significant in RBP co-occurrence heatmap plot. By default (value 0), all RBP pairs <= set p-value are reported significant. So setting --min-motif-dist >= 0 acts as a second filter to show only RBP pairs with signficiant p-values as significant if there is the specified minimum average distance between their motifs (default: 0)")
    p_s.add_argument("--max-motif-dist",
                   dest="max_motif_dist",
                   type=int,
                   metavar='int',
                   default=20,
                   help="Set maximum motif distance for RBP co-occurrence plot statistic inside hover boxes (default: 20)")
    p_s.add_argument("--disable-len-dist-plot",
                   dest="disable_len_dist_plot",
                   default = False,
                   action = "store_true",
                   help = "Disable input sequence length distribution plot in HTML report (default: False)")
    # k-mer distribution plots.
    p_s.add_argument("--disable-kmer-tb-plot",
                   dest="disable_kmer_tb_plot",
                   default = False,
                   action = "store_true",
                   help = "Disable top vs bottom scoring sites k-mer distribution plot. By default, plot is generated with split point as the middle point, i.e., top 50 percent vs bottom 50 percent scoring sites (default: False)")
    p_s.add_argument("--kmer-tb-plot-top-n",
                   dest="kmer_tb_plot_top_n",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Set number of top scoring sites to use for top vs bottom scoring sites distribution k-mer plot. By default, top and bottom is split in half (default: False)")
    p_s.add_argument("--kmer-tb-plot-bottom-n",
                   dest="kmer_tb_plot_bottom_n",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Set number of bottom scoring sites to use for top vs bottom scoring sites k-mer distribution plot. By default, top and bottom is split in half (default: False)")
    p_s.add_argument("--disable-kmer-var-plot",
                   dest="disable_kmer_var_plot",
                   default = False,
                   action = "store_true",
                   help = "Disable sequence k-mer variation plot (default: False)")
    p_s.add_argument("--kmer-var-color-mode",
                   dest="kmer_var_color_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help = "Define which attribute to use for coloring sequence k-mer variation plot. 1: correlation (Spearman) between k-mer ratios and site scores. 2: k-mer percentage in dataset (default: 1)")
    p_s.add_argument("--kmer-plot-k",
                   dest="kmer_plot_k",
                   type=int,
                   metavar='int',
                   default=4,
                   help="Define k for k-mer distribution plots, including top k-mers plot, top vs bottom scoring sites plot, and k-mer variation plot (default: 4)")
    # Upset plot.
    p_s.add_argument("--enable-upset-plot",
                   dest="enable_upset_plot",
                   default = False,
                   action = "store_true",
                   help = "Enable upset plot in HTML report (default: False)")
    p_s.add_argument("--upset-plot-min-degree",
                   dest="upset_plot_min_degree",
                   type=int,
                   metavar='int',
                   default=2,
                   help="Upset plot minimum degree parameter (default: 2)")
    p_s.add_argument("--upset-plot-max-degree",
                   dest="upset_plot_max_degree",
                   type=int,
                   metavar='int',
                   default=None,
                   help="Upset plot maximum degree. By default no maximum degree is set. Useful to look at specific degrees only (together with --upset-plot-min-degree), e.g. 2 or 2 to 3 (default: None)")
    p_s.add_argument("--upset-plot-min-subset-size",
                   dest="upset_plot_min_subset_size",
                   type=int,
                   metavar='int',
                   default=5,
                   help="Upset plot minimum subset size (default: 5)")
    p_s.add_argument("--upset-plot-max-subset-rank",
                   dest="upset_plot_max_subset_rank",
                   type=int,
                   metavar='int',
                   default=25,
                   help="Upset plot maximum subset rank to plot. All tied subsets are included (default: 25)")
    p_s.add_argument("--upset-plot-max-rbp-rank",
                   dest="upset_plot_max_rbp_rank",
                   type=int,
                   metavar='int',
                   default=None,
                   help="Maximum RBP hit region count rank. Set this to limit the amount of RBPs included in upset plot (+ statistic !) to top --upset-plot-max-rbp-rank RBPs. By default all RBPs are included (default: None)")
    p_s.add_argument("--upset-plot-min-rbp-count",
                   dest="upset_plot_min_rbp_count",
                   type=int,
                   metavar='int',
                   default=0,
                   help="Minimum amount of input sites containing motifs for an RBP in order for the RBP to be included in upset plot (+ statistic !). By default, all RBPs are included, also RBPs without hit regions (default: 0)")
    p_s.add_argument("--gtf",
                   dest="in_gtf",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Input GTF file for genomic region annotations + related plots (e.g. from GENCODE or Ensembl). By default the most prominent transcripts will be extracted and used for functional annotation. Alternatively, provide a list of expressed transcripts via --tr-list (together with --gtf containing the transcripts). Note that only features on standard chromosomes (1,2,..,X,Y,MT) are currently used for annotation")
    p_s.add_argument("--tr-list",
                   dest="tr_list",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Supply file with transcript IDs (one ID per row) to define which transcripts to use from --gtf for genomic annotation of input regions")
    p_s.add_argument("--tr-types",
                   dest="tr_types_list",
                   type=str,
                   metavar='str',
                   nargs='+',
                   help="List of transcript biotypes to consider for genomic region annotations. By default an internal selection of transcript biotypes is used (in addition to intron, CDS, UTR, intergenic). Note that provided biotype strings need to be in GTF file")
    p_s.add_argument("--gtf-feat-min-overlap",
                   dest="gtf_feat_min_overlap",
                   type=float,
                   metavar='float',
                   default=0.1,
                   help="Minimum amount of overlap required for a region to be assigned to a GTF feature (if less or no overlap, region will be assigned to \"intergenic\"). If there is overlap with several features, assign the one with highest overlap (default: 0.1)")
    # p_s.add_argument("--gtf-eib-min-overlap",
    #                dest="gtf_eib_min_overlap",
    #                type=float,
    #                metavar='float',
    #                default=0.5,
    #                help="Minimum amount input region has to overlap with exon (e), intron (i), i + ei borders to be counted as overlapping with these (note that the amount is reciprocal, i.e., one of the overlapping parts meeting the minimum amount is enough) (default: 0.5)")
    p_s.add_argument("--gtf-intron-border-len",
                   dest="gtf_intron_border_len",
                   type=int,
                   metavar='int',
                   default=250,
                   help="Set intron border region length (up- + downstream ends) for exon intron overlap statistics (default: 250)")
    p_s.add_argument("--gtf-min-mrna-overlap",
                   dest="gtf_min_mrna_overlap",
                   type=float,
                   metavar='float',
                   default=0.5,
                   help="Minimum amount of overlap required for a region to be considered in mRNA region site coverage profile (i.e., overlap with mRNA exons) (default: 0.5)")
    p_s.add_argument("--exp-gene-list",
                   dest="exp_gene_list",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Supply file with gene IDs (one ID per row) of expressed genes for filtering selected RBPs (supported gene ID format: ENSG00000100320, no version numbers). E.g. if --rbps ALL selected, using --exp-gene-list allows filtering RBPs by expression (default: False)")
    p_s.add_argument("--exp-gene-filter",
                   dest="exp_gene_filter",
                   default = False,
                   action = "store_true",
                   help = "Filter out --in regions not overlapping with --exp-gene-list gene regions. Note that --exp-gene-list gene IDs have to be compatible with --gtf file (default: False)")
    p_s.add_argument("--exp-gene-filter-mode",
                   dest="exp_gene_filter_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help = "Define --exp-gene-filter behavior. 1: filter out --in regions NOT OVERLAPPING with --exp-gene-list gene regions. 2: filter out --in regions OVERLAPPING with --exp-gene-list gene regions (default: 1)")
    p_s.add_argument("--mrna-norm-mode",
                   dest="mrna_norm_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help = "Define whether to use median or mean mRNA region lengths for plotting. 1: median. 2: mean (default: 1)")
    p_s.add_argument("--disable-all-reg-bar",
                   dest="disable_all_reg_bar",
                   default = False,
                   action = "store_true",
                   help = "Disable adding region annotations for all input regions (irrespective of motif hits) to stacked bar plot (default: False)")
    # Sequence k-mers plot.
    p_s.add_argument("--disable-kmer-pca-plot",
                   dest="disable_kmer_pca_plot",
                   default = False,
                   action = "store_true",
                   help = "Disable input sequences by k-mer content PCA plot (default: False)")
    p_s.add_argument("--kmer-pca-plot-k",
                   dest="kmer_pca_plot_k",
                   type=int,
                   metavar='int',
                   default=3,
                   help="Set k for k-mer usage in sequences by k-mer content PCA plot (default: 3)")
    p_s.add_argument("--kmer-pca-plot-no-comp",
                   dest="kmer_pca_plot_no_comp",
                   default = False,
                   action = "store_true",
                   help = "Disable adding sequence complexity as feature to sequences by k-mer content PCA plot (default: False)")
    p_s.add_argument("--kmer-pca-plot-comp-k",
                   dest="kmer_pca_plot_comp_k",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Set k for sequence complexity (Shannon entropy) calculation. 1 == mono-nucleotides, 2 == di-nucleotides (default: 1)")
    p_s.add_argument("--kmer-pca-plot-no-motifs",
                   dest="kmer_pca_plot_no_motifs",
                   default = False,
                   action = "store_true",
                   help = "Disable adding motif hits information to sequences by k-mer content PCA plot (default: False)")
    # Set RBP ID to focus on in motif distance plots.
    p_s.add_argument("--set-rbp-id",
                   dest="set_rbp_id",
                   type=str,
                   metavar='str',
                   default = None,
                   help = "Set reference RBP ID to plot motif distances relative to motifs from this RBP (needs to be one of the selected RBP IDs!). Motif plot will be centered on best scoring motif of the RBP for each region")
    p_s.add_argument("--motif-distance-plot-range",
                   dest="motif_distance_plot_range",
                   type=int,
                   metavar='int',
                   default=60,
                   help="Set range of motif distance plot. I.e., centered on the set RBP (--set-rbp-id) motifs, motifs within minus and plus --motif-distance-plot-range will be plotted (default: 50)")
    p_s.add_argument("--motif-min-pair-count",
                   dest="motif_min_pair_count",
                   type=int,
                   metavar='int',
                   default=10,
                   help="On single motif level, minimum count of co-occurrences of a motif with set RBP ID (--set-rbp-id) motif to be reported and plotted (default: 10)")
    p_s.add_argument("--rbp-min-pair-count",
                   dest="rbp_min_pair_count",
                   type=int,
                   metavar='int',
                   default=10,
                   help="On RBP level, minimum amount of co-occurrences of motifs for an RBP ID compared to set RBP ID (--set-rbp-id) motifs to be reported and plotted (default: 10)")
    # GO enrichment analysis.
    p_s.add_argument("--goa",
                   dest="run_goa",
                   default = False,
                   action = "store_true",
                   help = "Run gene ontology (GO) enrichment analysis on genes occupied by --in regions. Requires --gtf (default: False)")
    p_s.add_argument("--goa-obo-mode",
                   dest="goa_obo_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help = "Define how to obtain GO DAG (directed acyclic graph) obo file. 1: download most recent file from internet,  2: use local file,  3: provide file via --goa-obo-file (default: 1)")
    p_s.add_argument("--goa-obo-file",
                   dest="goa_obo_file",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide GO DAG obo file (default: False)")
    p_s.add_argument("--goa-gene2go-file",
                   dest="goa_gene2go_file",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide gene ID to GO IDs mapping table (row format: gene_id<tab>go_id1,go_id2). By default, a local file with ENSEMBL gene IDs is used. NOTE that gene IDs need to be compatible with --gtf (default: False)")
    p_s.add_argument("--goa-pval",
                   dest="goa_pval",
                   type=float,
                   metavar='float',
                   default=0.05,
                   help="GO enrichment analysis p-value threshold (applied on corrected p-value) (default: 0.05)")
    p_s.add_argument("--goa-cooc-mode",
                   dest="goa_cooc_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help = "Define what input regions to consider in GOA, in relation to motif hit (co-)occurrences. 1: Perform GOA on all input regions (no motif (co-)occurrences required). 2: perform GOA only on input regions with motif hits from ANY specified RBP. 3: perform GOA only on input regions with motif hits from ALL specified RBPs. GOA on input regions translates to GOA on the underlying genes (default: 1)")
    p_s.add_argument("--goa-bg-gene-list",
                   dest="goa_bg_gene_list",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Supply file with gene IDs (one ID per row) to use as background gene list for GOA. NOTE that gene IDs need to be compatible with --gtf (default: False)")
    p_s.add_argument("--goa-max-child",
                   dest="goa_max_child",
                   type=int,
                   metavar='int',
                   default=None,
                   help="Specify maximum number of children for a significant GO term to be reported in HTML table, e.g. --goa-max-child 100. This allows filtering out very broad terms (default: None)")
    p_s.add_argument("--goa-min-depth",
                   dest="goa_min_depth",
                   type=int,
                   metavar='int',
                   default=None,
                   help="Specify minimum depth number for a significant GO term to be reported in HTML table, e.g. --goa-min-depth 5 (default: None)")
    p_s.add_argument("--goa-filter-purified",
                   dest="goa_filter_purified",
                   default = False,
                   action = "store_true",
                   help = "Filter out GOA results labeled as purified (i.e., GO terms with significantly lower concentration) in HTML table (default: False)")
    p_s.add_argument("--plot-abs-paths",
                   dest="plot_abs_paths",
                   default = False,
                   action = "store_true",
                   help = "Store plot files with absolute paths in HTML files. Default is relative paths (default: False)")
    # Additional annoations.
    p_s.add_argument("--prom-min-tr-len",
                   dest="prom_min_tr_len",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Minimum transcript length for promoter region extraction. By default consider all transcript regions")
    p_s.add_argument("--prom-mrna-only",
                   dest="prom_mrna_only",
                   default = False,
                   action = "store_true",
                   help="Consider only mRNA transcript regions for promoter region extraction")
    p_s.add_argument("--prom-both-str",
                   dest="prom_both_str",
                   default = False,
                   action = "store_true",
                   help="Use both strands for promoter region overlap calculation. By default, use transcript strand")
    p_s.add_argument("--prom-ext",
                   dest="prom_ext_up_down",
                   type=str,
                   metavar='str',
                   default="1000,100",
                   help="Up- and downstream extension of transcript start site (TSS) to define putative promoter regions, e.g. --prom-ext 500,0 for 500 upstream and 0 downstream extension (default: 1000,100)")
    p_s.add_argument("--add-annot-bed",
                   dest="add_annot_bed",
                   type=str,
                   metavar='str',
                   default = False,
                   help="Specify additional genomic regions in BED format for which to calculate the percentage of input regions that overlap with them")
    p_s.add_argument("--add-annot-comp",
                   dest="add_annot_comp",
                   default = False,
                   action = "store_true",
                   help="Get the complement percentage, i.e., the percentage of input regions NOT overlapping with --add-annot-bed regions")
    p_s.add_argument("--add-annot-id",
                   dest="add_annot_id",
                   type=str,
                   metavar='str',
                   default = "custom",
                   help = "Label to use for additional regions in HTML report (default: \"custom\")")
    p_s.add_argument("--plot-pdf",
                   dest="plot_pdf",
                   default = False,
                   action = "store_true",
                   help = "Also output .png plots as .pdf in plotting subfolder (default: False)")
    p_s.add_argument("--meme-no-check",
                   dest="meme_disable_check",
                   default = False,
                   action = "store_true",
                   help = "Disable MEME version check. Make sure --meme-no-pgc is set if MEME version >= 5.5.4 is installed! (default: False)")
    p_s.add_argument("--meme-no-pgc",
                   dest="meme_no_pgc",
                   default = False,
                   action = "store_true",
                   help = "Manually set MEME's FIMO --no-pgc option (required for MEME version >= 5.5.4). Make sure that MEME >= 5.5.4 is installed! (default: False)")
    p_s.add_argument("--plotly-js-mode",
                   dest="plotly_js_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3, 4, 5, 6, 7],
                   help = "Define how to provide plotly .js file. 1: use online version via \"cdn\" (requires internet connection). 2: link to packaged plotly .js file. 3: copy plotly .js file to plots output folder. 4: include plotly .js code in plotly HTML. 5: put web version link and plotly plot codes into main HTML. 6: put local version link and plotly plot codes in main HTML. 7: put plotly js and plotly plot codes into main HTML! (default: 1)")
    p_s.add_argument("--sort-js-mode",
                   dest="sort_js_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help = "Define how to provide sorttable.js file. 1: link to packaged .js file. 2: copy .js file to plots output folder. 3: include .js code in HTML (default: 1)")

    """
    Batch mode.

    """
    p_b = subparsers.add_parser('batch',
                                help='Search motifs on > 1 dataset')
    p_b.set_defaults(which='batch')
    # Add required arguments group.
    p_bm = p_b.add_argument_group("required arguments")
    # Required arguments for batch.
    p_bm.add_argument("--bed",
                   dest="bed_files",
                   type=str,
                   metavar='str',
                   nargs='+',
                   required=True,
                   help="Provide either: a folder with BED files (e.g. --bed clipper_bed), a list of BED files to search for motifs, or a table file defining files and settings. If folder, RBP IDs should be part of BED file names, like: RBP1_...bed, RBP2_...bed. If list of BED files, define RBP IDs with --rbp-list. If table file, see manual for the correct format")
    p_bm.add_argument("--genome",
                   dest="in_genome",
                   type=str,
                   metavar='str',
                   required = True,
                   help = "Genomic sequences file (currently supported formats: FASTA)")
    p_bm.add_argument("--out",
                   dest="out_folder",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Batch job results output folder")
    # Optional arguments for batch.
    p_b.add_argument("--rbp-list",
                   dest="list_rbp_ids",
                   type=str,
                   metavar='str',
                   nargs='+',
                   help="List of RBP names to define RBP motifs used for search. One --rbp-list RBP ID for each --bed BED file (NOTE: order needs to correspond to --bed-list)")
    p_b.add_argument("--data-list",
                   dest="list_data_ids",
                   type=str,
                   metavar='str',
                   nargs='+',
                   help="List of data IDs to describe datasets given by -bed-list (NOTE: order needs to correspond to --bed order). Alternatively, use --data-id to set method for all datasets")
    p_b.add_argument("--data-id",
                   dest="data_id",
                   type=str,
                   metavar='str',
                   default = "data_id",
                   help="Data ID to describe data for given datasets, e.g. --method-id k562_eclip, used in output tables and for generating the comparison reports (rbpbench compare)")
    p_b.add_argument("--method-list",
                   dest="list_method_ids",
                   type=str,
                   metavar='str',
                   nargs='+',
                   help="List of method IDs to describe datasets given by -bed-list (NOTE: order needs to correspond to --bed order). Alternatively, use --method-id to set method for all datasets")
    p_b.add_argument("--method-id",
                   dest="method_id",
                   type=str,
                   metavar='str',
                   default = "method_id",
                   help="Method ID to describe peak calling method for given datasets, e.g. --method-id clipper_idr, used in output tables and for generating the comparison reports (rbpbench compare)")
    p_b.add_argument("--run-id",
                   dest="run_id",
                   type=str,
                   metavar='str',
                   default = "run_id",
                   help="Run ID to describe rbpbench search job, e.g. --run-id RBP1_eCLIP_tool1, used in output tables and reports")
    p_b.add_argument("--ext",
                   dest="ext_up_down",
                   type=str,
                   metavar='str',
                   default="0",
                   help="Up- and downstream extension of --in sites in nucleotides (nt). Set e.g. --ext 30 for 30 nt on both sides, or --ext 20,10 for different up- and downstream extension (default: 0)")
    p_b.add_argument("--motif-db",
                   dest="motif_db",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Built-in motif database to use. 1: human RBP motifs (257 RBPs, 599 motifs, \"catrapid_omics_v2.1_human_6plus_ext\"), 2: human RBP motifs + 23 ucRBP motifs (277 RBPs, 622 motifs, \"catrapid_omics_v2.1_human_6plus_ext_ucrbps\"), 3: human RBP motifs from Ray et al. 2013 (80 RBPs, 102 motifs, \"ray2013_human_rbps_rnacompete\") (default: 1)")
    p_b.add_argument("--custom-db",
                   dest="custom_db",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database folder")
    p_b.add_argument("--custom-db-id",
                   dest="custom_db_id",
                   type=str,
                   metavar='str',
                   default = "custom",
                   help = "Set ID/name for provided custom motif database via --custom-db  (default: \"custom\")")
    p_b.add_argument("--custom-db-meme-xml",
                   dest="custom_db_meme_xml",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database MEME/DREME XML file containing sequence motifs")
    p_b.add_argument("--custom-db-cm",
                   dest="custom_db_cm",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database covariance model (.cm) file containing covariance model(s)")
    p_b.add_argument("--custom-db-info",
                   dest="custom_db_info",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database info table file containing RBP ID -> motif ID -> motif type assignments")
    p_b.add_argument("--regex",
                   dest="regex",
                   type=str,
                   metavar='str',
                   default = False,
                   help="Define regular expression (regex) DNA motif to include in search, e.g. --regex AAACC, --regex 'C[ACGT]AC[AC]', .. IUPAC code is also supported, e.g. AAARN resolves to AAA[AG][ACGT]. Alternatively, supply structure pattern, e.g. AA((((ARA))))AA or CC(((A...R)))CC with variable spacer")
    p_b.add_argument("--regex-search-mode",
                   dest="regex_search_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Define regex search mode. 1: when motif hit encountered, continue +1 after motif hit start position, 2: when motif hit encountered, continue +1 after motif hit end position. NOTE that structure pattern regex currently always uses mode 1 (default: 1)")
    p_b.add_argument("--regex-type",
                   dest="regex_type",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Set type of supplied --regex string 1: auto-detect type (standard regex or structure pattern). 2: given --regex string is standard regex, e.g. AC[AG]T. 3: given --regex string is structure pattern string, e.g. ((AA(((...)))AA)) (default: 1)")
    p_b.add_argument("--regex-min-gc",
                    dest="regex_min_gc",
                    type=float,
                    metavar='float',
                    default=0.0,
                    help="Minimum GC base pair fraction to report structure pattern regex hits (default: 0.0)")
    p_b.add_argument("--regex-max-gu",
                    dest="regex_max_gu",
                    type=float,
                    metavar='float',
                    default=1.0,
                    help="Maximum GU (GT) base pair fraction to report structure pattern regex hits (default: 1.0)")
    p_b.add_argument("--regex-spacer-min",
                   dest="regex_spacer_min",
                   type=int,
                   metavar='int',
                   default=5,
                   help="Minimum spacer length for structure pattern regex search (default: 5)")
    p_b.add_argument("--regex-spacer-max",
                   dest="regex_spacer_max",
                   type=int,
                   metavar='int',
                   default=200,
                   help="Maximum spacer length for structure pattern regex search (default: 200)")
    p_b.add_argument("--max-motif-dist",
                   dest="max_motif_dist",
                   type=int,
                   metavar='int',
                   default=20,
                   help="Set maximum motif distance for regex-RBP co-occurrence statistic in HTML report (default: 20)")
    p_b.add_argument("--fimo-ntf-file",
                   dest="fimo_user_ntf_file",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide FIMO nucleotide frequencies (FIMO option: --bfile) file (default: use internal frequencies file, define which with --fimo-ntf-mode)")
    p_b.add_argument("--fimo-ntf-mode",
                   dest="fimo_ntf_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Set which internal nucleotide frequencies to use for FIMO search. 1: use frequencies from human ENSEMBL transcripts (excluding introns, A most prominent) 2: use frequencies from human ENSEMBL transcripts (including introns, resulting in lower G+C and T most prominent) 3: use uniform frequencies (same for every nucleotide) (default: 1)")
    p_b.add_argument("--fimo-pval",
                   dest="fimo_pval",
                   type=float,
                   metavar='float',
                   default=0.0005,
                   help="FIMO p-value threshold (FIMO option: --thresh) (default: 0.0005)")
    p_b.add_argument("--cmsearch-bs",
                   dest="cmsearch_bs",
                   type=float,
                   metavar='float',
                   default=1.0,
                   help="CMSEARCH bit score threshold (CMSEARCH options: -T --incT). The higher the more strict (default: 1.0)")
    p_b.add_argument("--cmsearch-mode",
                   dest="cmsearch_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Set CMSEARCH mode to control strictness of filtering. 1: default setting (CMSEARCH option: --default). 2: max setting (CMSEARCH option: --max), i.e., turn all heuristic filters off, slower and more sensitive / more hits) (default: 1)")
    p_b.add_argument("--greatest-hits",
                   dest="greatest_hits",
                   default = False,
                   action = "store_true",
                   help = "Keep only best FIMO/CMSEARCH motif hits (i.e., hit with lowest p-value / highest bit score for each motif sequence/site combination). By default, report all hits (default: False)")
    p_b.add_argument("--bed-score-col",
                   dest="bed_score_col",
                   type=int,
                   metavar='int',
                   default=5,
                   help="--in BED score column used for p-value calculations. BED score can be e.g. log2 fold change or -log10 p-value of the region (default: 5)")
    p_b.add_argument("--bed-sc-thr",
                   dest="bed_sc_thr",
                   type = float,
                   metavar='float',
                   default = None,
                   help = "Minimum site score (by default: --in BED column 5, or set via --bed-score-col) for filtering (assuming higher score == better site) (default: None)")
    p_b.add_argument("--bed-sc-thr-rev",
                   dest="bed_sc_thr_rev_filter",
                   default = False,
                   action = "store_true",
                   help = "Reverse --bed-sc-thr filtering (i.e. the lower the better, e.g. if score column contains p-values) (default: False)")
    p_b.add_argument("--wrs-mode",
                   dest="wrs_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Defines Wilcoxon rank-sum test alternative hypothesis for testing whether motif-containing regions have significantly different scores. 1: test for higher (greater) scores, 2: test for lower (less) scores (default: 1)")
    p_b.add_argument("--fisher-mode",
                   dest="fisher_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Defines Fisher exact test alternative hypothesis for testing co-occurrences of RBP motifs. 1: greater, 2: two-sided, 3: less (default: 1)")
    p_b.add_argument("--unstranded",
                   dest="unstranded",
                   default = False,
                   action = "store_true",
                   help = "Set if --in BED regions are NOT strand-specific, i.e., to look for motifs on both strands of the provided regions. Note that the two strands of a region will still be counted as one region (change with --unstranded-ct) (default: False)")
    p_b.add_argument("--unstranded-ct",
                   dest="unstranded_ct",
                   default = False,
                   action = "store_true",
                   help = "Count each --in region twice for RBP hit statistics when --unstranded is enabled. By default, two strands of one region are counted as one region for RBP hit statistics")
    p_b.add_argument("--kmer-size",
                   dest="kmer_size",
                   type=int,
                   metavar='int',
                   default=5,
                   help="K-mer size for comparative plots (default: 5)")
    p_b.add_argument("--disable-heatmap-cluster-olo",
                   dest="disable_heatmap_cluster_olo",
                   default = False,
                   action = "store_true",
                   help="Disable optimal leave ordering (OLO) for clustering gene region occupancy heatmap. By default, OLO is enabled")
    p_b.add_argument("--gtf",
                   dest="in_gtf",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Input GTF file with genomic annotations to generate genomic region annotation plots for each input BED file (output to HTML report). By default the most prominent transcripts will be extracted and used for functional annotation. Alternatively, provide a list of expressed transcripts via --tr-list (together with --gtf containing the transcripts). Note that only features on standard chromosomes (1,2,..,X,Y,MT) are currently used for annotation")
    p_b.add_argument("--tr-list",
                   dest="tr_list",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Supply file with transcript IDs (one ID per row) to define which transcripts to use from --gtf for genomic region annotations plots")
    p_b.add_argument("--tr-types",
                   dest="tr_types_list",
                   type=str,
                   metavar='str',
                   nargs='+',
                   help="List of transcript biotypes to consider for genomic region annotations plots. By default an internal selection of transcript biotypes is used (in addition to intron, CDS, UTR, intergenic). Note that provided biotype strings need to be in --gtf GTF file")
    p_b.add_argument("--gtf-feat-min-overlap",
                   dest="gtf_feat_min_overlap",
                   type=float,
                   metavar='float',
                   default=0.1,
                   help="Minimum amount of overlap required for a region to be assigned to a GTF feature (if less or no overlap, region will be assigned to \"intergenic\"). If there is overlap with several features, assign the one with highest overlap (default: 0.1)")
    p_b.add_argument("--gtf-intron-border-len",
                   dest="gtf_intron_border_len",
                   type=int,
                   metavar='int',
                   default=250,
                   help="Set intron border region length (up- + downstream ends) for exon intron overlap statistics (default: 250)")
    p_b.add_argument("--no-occ-heatmap",
                   dest="no_occ_heatmap",
                   default = False,
                   action = "store_true",
                   help = "Do not produce gene region occupancy heatmap plot in HTML report (default: False)")
    p_b.add_argument("--hk-gene-list",
                   dest="hk_gene_list",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Supply file with gene IDs (one ID per row) of housekeeping genes as additional plotting info. ID format needs to be compatible with provided --gtf file (default: False)")
    p_b.add_argument("--no-comp-feat",
                   dest="no_comp_feat",
                   default = False,
                   action = "store_true",
                   help = "Disable sequence complexity info to be added to plot (default: False)")
    p_b.add_argument("--seq-comp-k",
                   dest="seq_comp_k",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Set k for sequence complexity (Shannon entropy) calculation. 1 == mono-nucleotides, 2 == di-nucleotides (default: 1)")
    p_b.add_argument("--seq-var-kmer-size",
                   dest="seq_var_kmer_size",
                   type=int,
                   metavar='int',
                   default=3,
                   help="K-mer size for sequence variation statistics and plot (default: 3)")
    p_b.add_argument("--seq-var-feat-mode",
                   dest="seq_var_feat_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help = "Define what sequence k-mer variation plot to create 1: plot using site percentages for each k-mer as dimensions. 2: plot using k-mer variations as dimensions (default: 1)")
    p_b.add_argument("--seq-var-color-mode",
                   dest="seq_var_color_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help = "Define which attribute to use for coloring k-mer variation plot. 1: average k-mer site percentage. 2: present k-mers percentage (default: 1)")
    # GO enrichment analysis for batch mode.
    p_b.add_argument("--goa",
                   dest="run_goa",
                   default = False,
                   action = "store_true",
                   help = "Run gene ontology (GO) enrichment analysis on genes occupied by sites in input datasets. Requires --gtf (default: False)")
    p_b.add_argument("--goa-obo-mode",
                   dest="goa_obo_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help = "Define how to obtain GO DAG (directed acyclic graph) obo file. 1: download most recent file from internet,  2: use local file,  3: provide file via --goa-obo-file (default: 1)")
    p_b.add_argument("--goa-obo-file",
                   dest="goa_obo_file",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide GO DAG obo file (default: False)")
    p_b.add_argument("--goa-gene2go-file",
                   dest="goa_gene2go_file",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide gene ID to GO IDs mapping table (row format: gene_id<tab>go_id1,go_id2). By default, a local file with ENSEMBL gene IDs is used. NOTE that gene IDs need to be compatible with --gtf (default: False)")
    p_b.add_argument("--goa-pval",
                   dest="goa_pval",
                   type=float,
                   metavar='float',
                   default=0.05,
                   help="GO enrichment analysis p-value threshold (applied on corrected p-value) (default: 0.05)")
    p_b.add_argument("--goa-only-cooc",
                   dest="goa_only_cooc",
                   default = False,
                   action = "store_true",
                   help = "Only look at genes in GO enrichment analysis which contain motif hits for all input datasets. By default, GO enrichment analysis is performed on the genes covered by sites from all input datasets (default: False)")
    p_b.add_argument("--goa-bg-gene-list",
                   dest="goa_bg_gene_list",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Supply file with gene IDs (one ID per row) to use as background gene list for GOA. NOTE that gene IDs need to be compatible with --gtf (default: False)")
    p_b.add_argument("--goa-max-child",
                   dest="goa_max_child",
                   type=int,
                   metavar='int',
                   default=None,
                   help="Specify maximum number of children for a significant GO term to be reported in HTML table, e.g. --goa-max-child 100. This allows filtering out very broad terms (default: None)")
    p_b.add_argument("--goa-min-depth",
                   dest="goa_min_depth",
                   type=int,
                   metavar='int',
                   default=None,
                   help="Specify minimum depth number for a significant GO term to be reported in HTML table, e.g. --goa-min-depth 5 (default: None)")
    p_b.add_argument("--goa-filter-purified",
                   dest="goa_filter_purified",
                   default = False,
                   action = "store_true",
                   help = "Filter out GOA results labeled as purified (i.e., GO terms with significantly lower concentration) in HTML table (default: False)")
    # Additional annoations.
    p_b.add_argument("--prom-min-tr-len",
                   dest="prom_min_tr_len",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Minimum transcript length for promoter region extraction. By default consider all transcript regions")
    p_b.add_argument("--prom-mrna-only",
                   dest="prom_mrna_only",
                   default = False,
                   action = "store_true",
                   help="Consider only mRNA transcript regions for promoter region extraction")
    p_b.add_argument("--prom-both-str",
                   dest="prom_both_str",
                   default = False,
                   action = "store_true",
                   help="Use both strands for promoter region overlap calculation. By default, use transcript strand")
    p_b.add_argument("--prom-ext",
                   dest="prom_ext_up_down",
                   type=str,
                   metavar='str',
                   default="1000,100",
                   help="Up- and downstream extension of transcript start site (TSS) to define putative promoter regions, e.g. --prom-ext 500,0 for 500 upstream and 0 downstream extension (default: 1000,100)")
    p_b.add_argument("--add-annot-bed",
                   dest="add_annot_bed",
                   type=str,
                   metavar='str',
                   default = False,
                   help="Specify additional genomic regions in BED format for which to calculate the percentages of input regions that overlap with them")
    p_b.add_argument("--add-annot-comp",
                   dest="add_annot_comp",
                   default = False,
                   action = "store_true",
                   help="Get the complement percentages, i.e., the percentages of input regions NOT overlapping with --add-annot-bed regions")
    p_b.add_argument("--add-annot-id",
                   dest="add_annot_id",
                   type=str,
                   metavar='str',
                   default = "custom",
                   help = "Label to use for additional regions in HTML report (default: \"custom\")")
    p_b.add_argument("--plot-abs-paths",
                   dest="plot_abs_paths",
                   default = False,
                   action = "store_true",
                   help = "Store plot files with absolute paths in HTML files. Default is relative paths (default: False)")
    p_b.add_argument("--plot-pdf",
                   dest="plot_pdf",
                   default = False,
                   action = "store_true",
                   help = "Also output .png plots as .pdf in plotting subfolder (default: False)")
    p_b.add_argument("--plotly-js-mode",
                   dest="plotly_js_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3, 4, 5, 6, 7],
                   help = "Define how to provide plotly .js file. 1: use online version via \"cdn\" (requires internet connection). 2: link to packaged plotly .js file. 3: copy plotly .js file to plots output folder. 4: include plotly .js code in plotly HTML. 5: put web version link and plotly plot codes into main HTML. 6: put local version link and plotly plot codes in main HTML. 7: put plotly js and plotly plot codes into main HTML! (default: 1)")
    p_b.add_argument("--sort-js-mode",
                   dest="sort_js_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help = "Define how to provide sorttable.js file. 1: link to packaged .js file. 2: copy .js file to plots output folder. 3: include .js code in HTML (default: 1)")
    p_b.add_argument("--meme-no-check",
                   dest="meme_disable_check",
                   default = False,
                   action = "store_true",
                   help = "Disable MEME version check. Make sure --meme-no-pgc is set if MEME version >= 5.5.4 is installed! (default: False)")
    p_b.add_argument("--meme-no-pgc",
                   dest="meme_no_pgc",
                   default = False,
                   action = "store_true",
                   help = "Manually set MEME's FIMO --no-pgc option (required for MEME version >= 5.5.4). Make sure that MEME >= 5.5.4 is installed! (default: False)")

    """
    Compare mode.
    """
    p_c = subparsers.add_parser('compare',
                                  help='Compare different search results')
    p_c.set_defaults(which='compare')
    # Add required arguments group.
    p_cm = p_c.add_argument_group("required arguments")
    # Required arguments.
    p_cm.add_argument("--in",
                   dest="data_in",
                   type=str,
                   metavar='str',
                   nargs='+',
                   required=True,
                   help="Supply motif search results data, either as folders (--out folders of rbpbench search or batch), or as files (both RBP and motif hit stats files needed!). Order of files does NOT matter")
    p_cm.add_argument("--out",
                   dest="out_folder",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Comparison results output folder")
    p_c.add_argument("--plot-abs-paths",
                   dest="plot_abs_paths",
                   default = False,
                   action = "store_true",
                   help = "Store plot files with absolute paths in HTML files. Default is relative paths (default: False)")
    p_c.add_argument("--plot-pdf",
                   dest="plot_pdf",
                   default = False,
                   action = "store_true",
                   help = "Also output .png plots as .pdf in plotting subfolder (default: False)")
    p_c.add_argument("--sort-js-mode",
                   dest="sort_js_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help = "Define how to provide sorttable.js file. 1: link to packaged .js file. 2: copy .js file to plots output folder. 3: include .js code in HTML (default: 1)")

    """
    Search motifs in sequences mode.

    """
    p_ss = subparsers.add_parser('searchseq',
                                help='Search motifs in sequences')
    p_ss.set_defaults(which='searchseq')
    # Add required arguments group.
    p_ssm = p_ss.add_argument_group("required arguments")
    # Required arguments for searchseq.
    p_ssm.add_argument("--in",
                   dest="in_seqs",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Input FASTA file with DNA/RNA sequences used for motif search")
    p_ssm.add_argument("--rbps",
                   dest="list_rbps",
                   type=str,
                   metavar='str',
                   nargs='+',
                   required=True,
                   help="List of RBP names to define RBP motifs used for search (--rbps rbp1 rbp2 .. ). To search with all available motifs, use --rbps ALL. NOTE: to search with user-provided motifs, set --rbps USER and provide --user-meme-xml and/or --user-cm")
    p_ssm.add_argument("--out",
                   dest="out_folder",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Results output folder")
    # Optional arguments for searchseq.
    p_ss.add_argument("--data-id",
                   dest="data_id",
                   type=str,
                   metavar='str',
                   default = "data_id",
                   help="Dataset ID to describe dataset, e.g. --data-id PUM2_eCLIP_K562, used in output tables and for generating the comparison reports (rbpbench compare)")
    p_ss.add_argument("--method-id",
                   dest="method_id",
                   type=str,
                   metavar='str',
                   default = "method_id",
                   help="Method ID to describe peak calling method, e.g. --method-id clipper_idr, used in output tables and for generating the comparison reports (rbpbench compare)")
    p_ss.add_argument("--user-rbp-id",
                   dest="user_rbp_id",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide RBP ID belonging to provided sequence or structure motif(s) (mandatory for --rbps USER)")
    p_ss.add_argument("--user-meme-xml",
                   dest="user_meme_xml",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide MEME/DREME XML file containing sequence motif(s) to be used for the run (needs --rbps USER)")
    p_ss.add_argument("--user-cm",
                   dest="user_cm",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide covariance model (.cm) file containing covariance model(s) to be used for the run (needs --rbps USER)")
    p_ss.add_argument("--motif-db",
                   dest="motif_db",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Built-in motif database to use. 1: human RBP motifs (257 RBPs, 599 motifs, \"catrapid_omics_v2.1_human_6plus_ext\"), 2: human RBP motifs + 23 ucRBP motifs (277 RBPs, 622 motifs, \"catrapid_omics_v2.1_human_6plus_ext_ucrbps\"), 3: human RBP motifs from Ray et al. 2013 (80 RBPs, 102 motifs, \"ray2013_human_rbps_rnacompete\") (default: 1)")
    p_ss.add_argument("--custom-db",
                   dest="custom_db",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database folder. Alternatively, provide single files via --custom-db-meme-xml, --custom-db-cm, --custom-db-info")
    p_ss.add_argument("--custom-db-id",
                   dest="custom_db_id",
                   type=str,
                   metavar='str',
                   default = "custom",
                   help = "Set ID/name for provided custom motif database via --custom-db  (default: \"custom\")")
    p_ss.add_argument("--custom-db-meme-xml",
                   dest="custom_db_meme_xml",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database MEME/DREME XML file containing sequence motifs")
    p_ss.add_argument("--custom-db-cm",
                   dest="custom_db_cm",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database covariance model (.cm) file containing covariance model(s)")
    p_ss.add_argument("--custom-db-info",
                   dest="custom_db_info",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database info table file containing RBP ID -> motif ID -> motif type assignments")
    p_ss.add_argument("--regex",
                   dest="regex",
                   type=str,
                   metavar='str',
                   default = False,
                   help="Define regular expression (regex) DNA motif to include in search, e.g. --regex AAACC, --regex 'C[ACGT]AC[AC]', .. IUPAC code is also supported, e.g. AAARN resolves to AAA[AG][ACGT]. Alternatively, supply structure pattern, e.g. AA((((ARA))))AA or CC(((A...R)))CC with variable spacer")
    p_ss.add_argument("--regex-id",
                   dest="regex_id",
                   type=str,
                   metavar='str',
                   default = "regex",
                   help="Set regex ID used as RBP ID and database ID associated to -regex hits (default: \"regex\")")
    p_ss.add_argument("--motif-regex-id",
                   dest="motif_regex_id",
                   default = False,
                   action = "store_true",
                   help="Use --regex-id for motif ID as well. By default, regular expression string is used as motif ID for regex motif hits")
    p_ss.add_argument("--regex-type",
                   dest="regex_type",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Set type of supplied --regex string 1: auto-detect type (standard regex or structure pattern). 2: given --regex string is standard regex, e.g. AC[AG]T. 3: given --regex string is structure pattern string, e.g. ((AA(((...)))AA)) (default: 1)")
    p_ss.add_argument("--regex-search-mode",
                   dest="regex_search_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Define regex search mode. 1: when motif hit encountered, continue +1 after motif hit start position, 2: when motif hit encountered, continue +1 after motif hit end position. NOTE that structure pattern regex currently always uses mode 1 (default: 1)")
    p_ss.add_argument("--regex-min-gc",
                    dest="regex_min_gc",
                    type=float,
                    metavar='float',
                    default=0.0,
                    help="Minimum GC base pair fraction to report structure pattern regex hits (default: 0.0)")
    p_ss.add_argument("--regex-max-gu",
                    dest="regex_max_gu",
                    type=float,
                    metavar='float',
                    default=1.0,
                    help="Maximum GU (GT) base pair fraction to report structure pattern regex hits (default: 1.0)")
    p_ss.add_argument("--regex-spacer-min",
                   dest="regex_spacer_min",
                   type=int,
                   metavar='int',
                   default=5,
                   help="Minimum spacer length for structure pattern regex search (default: 5)")
    p_ss.add_argument("--regex-spacer-max",
                   dest="regex_spacer_max",
                   type=int,
                   metavar='int',
                   default=200,
                   help="Maximum spacer length for structure pattern regex search (default: 200)")
    p_ss.add_argument("--motifs",
                   dest="motifs_list",
                   type=str,
                   metavar='str',
                   nargs='+',
                   default=False,
                   help="Provide IDs for motifs of interest (need to be in database and loaded). All other RBP motifs will be discarded (except --regex)")
    p_ss.add_argument("--motif-min-len",
                   dest="motif_min_len",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Minimum MEME/DREME motif length to include in search. By default all selected RBP motifs are used")
    p_ss.add_argument("--motif-max-len",
                   dest="motif_max_len",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Maximum MEME/DREME motif length to include in search. By default all selected RBP motifs are used")
    p_ss.add_argument("--functions",
                   dest="rbp_functions",
                   type=str,
                   metavar='str',
                   nargs='+',
                   default=False,
                   help="Filter defined RBPs (via --rbps) by their molecular functions (annotations available for most database RBPs). E.g. for RBPs in splicing regulation, set --functions SR, for RBPs in RNA stability & decay plus translation regulation, set --functions RSD TR (see rbpbench info for full function descriptions). NOTE that --regex is not affected by filtering")
    p_ss.add_argument("--fimo-ntf-file",
                   dest="fimo_user_ntf_file",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide FIMO nucleotide frequencies (FIMO option: --bfile) file (default: use internal frequencies file, define which with --fimo-ntf-mode)")
    p_ss.add_argument("--fimo-ntf-mode",
                   dest="fimo_ntf_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Set which internal nucleotide frequencies to use for FIMO search. 1: use frequencies from human ENSEMBL transcripts (excluding introns, A most prominent) 2: use frequencies from human ENSEMBL transcripts (including introns, resulting in lower G+C and T most prominent) 3: use uniform frequencies (same for every nucleotide) (default: 1)")
    p_ss.add_argument("--fimo-pval",
                   dest="fimo_pval",
                   type=float,
                   metavar='float',
                   default=0.0005,
                   help="FIMO p-value threshold (FIMO option: --thresh) (default: 0.0005)")
    p_ss.add_argument("--cmsearch-bs",
                   dest="cmsearch_bs",
                   type=float,
                   metavar='float',
                   default=1.0,
                   help="CMSEARCH bit score threshold (CMSEARCH options: -T --incT). The higher the more strict (default: 1.0)")
    p_ss.add_argument("--cmsearch-mode",
                   dest="cmsearch_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Set CMSEARCH mode to control strictness of filtering. 1: default setting (CMSEARCH option: --default). 2: max setting (CMSEARCH option: --max), i.e., turn all heuristic filters off, slower and more sensitive / more hits) (default: 1)")
    p_ss.add_argument("--greatest-hits",
                   dest="greatest_hits",
                   default = False,
                   action = "store_true",
                   help = "Keep only best FIMO/CMSEARCH motif hits (i.e., hit with lowest p-value / highest bit score for each motif sequence/site combination). By default, report all hits (default: False)")
    p_ss.add_argument("--meme-no-check",
                   dest="meme_disable_check",
                   default = False,
                   action = "store_true",
                   help = "Disable MEME version check. Make sure --meme-no-pgc is set if MEME version >= 5.5.4 is installed! (default: False)")
    p_ss.add_argument("--meme-no-pgc",
                   dest="meme_no_pgc",
                   default = False,
                   action = "store_true",
                   help = "Manually set MEME's FIMO --no-pgc option (required for MEME version >= 5.5.4). Make sure that MEME >= 5.5.4 is installed! (default: False)")
    p_ss.add_argument("--make-uniq-headers",
                   dest="make_uniq_headers",
                   default = False,
                   action = "store_true",
                   help = "Make FASTA headers unique. By default, an error is thrown if identical FASTA headers are encountered (default: False)")
    p_ss.add_argument("--header-id",
                   dest="header_id",
                   type=str,
                   metavar='str',
                   default = "seq",
                   help = "Constant header ID part used if --make-uniq-headers set (default: \"seq\")")
    # Sequence length filtering.
    p_ss.add_argument("--min-seq-len",
                   dest="min_seq_len",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Minimum sequence length required for input sequences to be included in search (default: False)")
    p_ss.add_argument("--max-seq-len",
                   dest="max_seq_len",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Maximum sequence length required for input sequences to be included in search (default: False)")
    # Profiles options.
    p_ss.add_argument("--profiles",
                   dest="profiles",
                   default = False,
                   action = "store_true",
                   help = "Generate motif hit + k-mer profiles for each input sequence, and compare profiles between sequences in PCA plot. Generates an HTML report (default: False)")
    p_ss.add_argument("--profiles-level",
                   dest="profiles_level",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Define on which hit level to generate motif hit profile. 1: on RBP hit level (merging all motif hits for one RBP). 2: on individual motif level (default: 1)")
    p_ss.add_argument("--profiles-norm",
                   dest="profiles_norm",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Define how to normalize RBP/motif hit counts included in hit profile. 1: use hits per 1000 nt. 2: set 1 if hits present and 0 if no hits on sequence (default: 1)")
    p_ss.add_argument("--profiles-k",
                   dest="profiles_k",
                   type=int,
                   metavar='int',
                   default=5,
                   help="Set k-mer size k for k-mer profiles, output in addition to hit profiles in HTML report (default: 5)")
    p_ss.add_argument("--profiles-seq-id",
                   dest="profiles_seq_id",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Specify sequence ID to highlight in profile PCA plots and to report top --profiles-top-n similar other sequences for, based on motif hit and k-mer profiles (default: False, i.e., no sequence ID is highlighted)")
    p_ss.add_argument("--profiles-top-n",
                   dest="profiles_top_n",
                   type=int,
                   metavar='int',
                   default=20,
                   help="Set top n similar sequences (similar to --profiles-seq-id sequence, using cosine similarity and euclidean distance) to be reported in HTML report (--profiles-seq-id needs to be set) (default: 20)")
    p_ss.add_argument("--plot-motifs",
                   dest="plot_motifs",
                   default = False,
                   action = "store_true",
                   help = "Visualize selected sequence motifs, by outputting sequence logos and motif hit statistics into .html file (default: False)")
    p_ss.add_argument("--top-n-matched",
                   dest="top_n_matched",
                   type=int,
                   metavar='int',
                   default=10,
                   help="Set top n matched sequences to be displayed in motif hit statistics HTML report (create via --plot-motifs) (default: 10)")
    p_ss.add_argument("--plot-abs-paths",
                   dest="plot_abs_paths",
                   default = False,
                   action = "store_true",
                   help = "Store plot files with absolute paths in HTML files. Default is relative paths (default: False)")
    p_ss.add_argument("--plot-pdf",
                   dest="plot_pdf",
                   default = False,
                   action = "store_true",
                   help = "Also output .png plots as .pdf in plotting subfolder (default: False)")
    p_ss.add_argument("--plotly-js-mode",
                   dest="plotly_js_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3, 4, 5, 6, 7],
                   help = "Define how to provide plotly .js file. 1: use online version via \"cdn\" (requires internet connection). 2: link to packaged plotly .js file. 3: copy plotly .js file to plots output folder. 4: include plotly .js code in plotly HTML. 5: put web version link and plotly plot codes into main HTML. 6: put local version link and plotly plot codes in main HTML. 7: put plotly js and plotly plot codes into main HTML! (default: 1)")
    p_ss.add_argument("--sort-js-mode",
                   dest="sort_js_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help = "Define how to provide sorttable.js file. 1: link to packaged .js file. 2: copy .js file to plots output folder. 3: include .js code in HTML (default: 1)")

    """
    Regex motif search mode.

    """
    p_sr = subparsers.add_parser('searchregex',
                                help='Search regex in genomic sites or sequences')
    p_sr.set_defaults(which='searchregex')
    # Add required arguments group.
    p_srm = p_sr.add_argument_group("required arguments")
    # Required arguments for searchregex.
    p_srm.add_argument("--in",
                   dest="in_file",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Input FASTA file with DNA/RNA sequences or BED file (>= 6 column format) with genomic regions used for regex search. NOTE that sequences will be converted to DNA and uppercased before search. If BED file, also provide --genome FASTA file")
    p_srm.add_argument("--regex",
                   dest="regex",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Define regular expression (regex) DNA motif used for search, e.g. --regex AAAAA, --regex 'C[ACGT]AC[AC]' .. IUPAC code is also supported, e.g. AAARN resolves to AAA[AG][ACGT]. Moreover, structure patterns can be supplied, e.g. AA((((ARA))))AA or CC(((A...R)))CC with variable spacer. Alternatively, provide a file with regexes (first column regex ID, second column regex), e.g. --regex regexes.txt")
    p_srm.add_argument("--out",
                   dest="out_folder",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Results output folder")
    # Optional arguments for searchregex.
    p_sr.add_argument("--genome",
                   dest="in_genome",
                   type=str,
                   metavar='str',
                   help = "Genomic sequences FASTA file (required if --in is BED)")
    p_sr.add_argument("--make-uniq-headers",
                   dest="make_uniq_headers",
                   default = False,
                   action = "store_true",
                   help = "Make sequence IDs (FASTA header or BED column 4 IDs) unique. By default, an error is thrown if identical IDs are encountered (default: False)")
    p_sr.add_argument("--header-id",
                   dest="header_id",
                   type=str,
                   metavar='str',
                   default = "reg",
                   help = "Constant header ID part used if --make-uniq-headers set (default: \"reg\")")
    p_sr.add_argument("--regex-search-mode",
                   dest="regex_search_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Define regex search mode. 1: when motif hit encountered, continue +1 after motif hit start position, 2: when motif hit encountered, continue +1 after motif hit end position. NOTE that structure pattern regex currently always uses mode 1 (default: 1)")
    p_sr.add_argument("--regex-min-gc",
                    dest="regex_min_gc",
                    type=float,
                    metavar='float',
                    default=0.0,
                    help="Minimum GC base pair fraction to report structure pattern regex hits (default: 0.0)")
    p_sr.add_argument("--regex-max-gu",
                    dest="regex_max_gu",
                    type=float,
                    metavar='float',
                    default=1.0,
                    help="Maximum GU (GT) base pair fraction to report structure pattern regex hits (default: 1.0)")
    p_sr.add_argument("--regex-spacer-min",
                   dest="regex_spacer_min",
                   type=int,
                   metavar='int',
                   default=5,
                   help="Minimum spacer length for structure pattern regex search (default: 5)")
    p_sr.add_argument("--regex-spacer-max",
                   dest="regex_spacer_max",
                   type=int,
                   metavar='int',
                   default=200,
                   help="Maximum spacer length for structure pattern regex search (default: 200)")
    p_sr.add_argument("--ext",
                   dest="ext_up_down",
                   type=str,
                   metavar='str',
                   default="0",
                   help="Up- and downstream extension of --in sites in nucleotides (nt), if --in are genomic regions in BED format. Set e.g. --ext 30 for 30 nt on both sides, or --ext 20,10 for different up- and downstream extension (default: 0)")
    p_sr.add_argument("--add-zero-hits",
                   dest="add_zero_hits",
                   default = False,
                   action = "store_true",
                   help = "Also add regions with 0 hits to output BED file (hit count in column 5) (default: False)")

    """
    Search motifs in long genomic regions mode.
    
    """
    p_sl = subparsers.add_parser('searchlong',
                                help='Search motifs in long genomic regions')
    p_sl.set_defaults(which='searchlong')
    # Add required arguments group.
    p_slm = p_sl.add_argument_group("required arguments")
    # Required arguments for searchlong.
    p_slm.add_argument("--in",
                   dest="in_file",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Genomic regions file in BED format OR file with transcript IDs (one ID per row) to define genomic regions in which to search for motifs (requires --gtf)")
    p_slm.add_argument("--rbps",
                   dest="list_rbps",
                   type=str,
                   metavar='str',
                   nargs='+',
                   required=True,
                   help="List of RBP names to define RBP motifs used for search (--rbps rbp1 rbp2 .. ). To search with all available motifs, use --rbps ALL. NOTE: to search with user-provided motifs, set --rbps USER and provide --user-meme-xml and/or --user-cm. To search only for --regex, set --rbps REGEX")
    p_slm.add_argument("--genome",
                   dest="in_genome",
                   type=str,
                   metavar='str',
                   required = True,
                   help = "Genomic sequences file (currently supported formats: FASTA)")
    p_slm.add_argument("--out",
                   dest="out_folder",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Results output folder")
    # Optional arguments for searchlong.
    p_sl.add_argument("--data-id",
                   dest="data_id",
                   type=str,
                   metavar='str',
                   default = "data_id",
                   help="Dataset ID")
    p_sl.add_argument("--method-id",
                   dest="method_id",
                   type=str,
                   metavar='str',
                   default = "method_id",
                   help="Method ID")
    p_sl.add_argument("--run-id",
                   dest="run_id",
                   type=str,
                   metavar='str',
                   default = "run_id",
                   help="Run ID")
    p_sl.add_argument("--user-rbp-id",
                   dest="user_rbp_id",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide RBP ID belonging to provided sequence or structure motif(s) (mandatory for --rbps USER)")
    p_sl.add_argument("--user-meme-xml",
                   dest="user_meme_xml",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide MEME/DREME XML file containing sequence motif(s) to be used for the run (needs --rbps USER)")
    p_sl.add_argument("--user-cm",
                   dest="user_cm",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide covariance model (.cm) file containing covariance model(s) to be used for the run (needs --rbps USER)")
    p_sl.add_argument("--motif-db",
                   dest="motif_db",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Built-in motif database to use. 1: human RBP motifs (257 RBPs, 599 motifs, \"catrapid_omics_v2.1_human_6plus_ext\"), 2: human RBP motifs + 23 ucRBP motifs (277 RBPs, 622 motifs, \"catrapid_omics_v2.1_human_6plus_ext_ucrbps\"), 3: human RBP motifs from Ray et al. 2013 (80 RBPs, 102 motifs, \"ray2013_human_rbps_rnacompete\") (default: 1)")
    p_sl.add_argument("--custom-db",
                   dest="custom_db",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database folder. Alternatively, provide single files via --custom-db-meme-xml, --custom-db-cm, --custom-db-info")
    p_sl.add_argument("--custom-db-id",
                   dest="custom_db_id",
                   type=str,
                   metavar='str',
                   default = "custom",
                   help = "Set ID/name for provided custom motif database via --custom-db  (default: \"custom\")")
    p_sl.add_argument("--custom-db-meme-xml",
                   dest="custom_db_meme_xml",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database MEME/DREME XML file containing sequence motifs")
    p_sl.add_argument("--custom-db-cm",
                   dest="custom_db_cm",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database covariance model (.cm) file containing covariance model(s)")
    p_sl.add_argument("--custom-db-info",
                   dest="custom_db_info",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database info table file containing RBP ID -> motif ID -> motif type assignments")
    p_sl.add_argument("--motifs",
                   dest="motifs_list",
                   type=str,
                   metavar='str',
                   nargs='+',
                   default=False,
                   help="Provide IDs for motifs of interest (need to be in database and loaded). All other RBP motifs will be discarded (except --regex)")
    p_sl.add_argument("--motif-min-len",
                   dest="motif_min_len",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Minimum MEME/DREME motif length to include in search. By default all selected RBP motifs are used")
    p_sl.add_argument("--motif-max-len",
                   dest="motif_max_len",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Maximum MEME/DREME motif length to include in search. By default all selected RBP motifs are used")
    p_sl.add_argument("--functions",
                   dest="rbp_functions",
                   type=str,
                   metavar='str',
                   nargs='+',
                   default=False,
                   help="Filter defined RBPs (via --rbps) by their molecular functions (annotations available for most database RBPs). E.g. for RBPs in splicing regulation, set --functions SR, for RBPs in RNA stability & decay plus translation regulation, set --functions RSD TR (see rbpbench info for full function descriptions). NOTE that --regex is not affected by filtering")
    p_sl.add_argument("--regex",
                   dest="regex",
                   type=str,
                   metavar='str',
                   default = False,
                   help="Define regular expression (regex) DNA motif to include in search, e.g. --regex AAACC, --regex 'C[ACGT]AC[AC]', .. IUPAC code is also supported, e.g. AAARN resolves to AAA[AG][ACGT]. Alternatively, supply structure pattern, e.g. AA((((ARA))))AA or CC(((A...R)))CC with variable spacer")
    p_sl.add_argument("--regex-id",
                   dest="regex_id",
                   type=str,
                   metavar='str',
                   default = "regex",
                   help="Set regex ID used as RBP ID and database ID associated to -regex hits (default: \"regex\")")
    p_sl.add_argument("--motif-regex-id",
                   dest="motif_regex_id",
                   default = False,
                   action = "store_true",
                   help="Use --regex-id for motif ID as well. By default, regular expression string is used as motif ID for regex motif hits")
    p_sl.add_argument("--regex-type",
                   dest="regex_type",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Set type of supplied --regex string 1: auto-detect type (standard regex or structure pattern). 2: given --regex string is standard regex, e.g. AC[AG]T. 3: given --regex string is structure pattern string, e.g. ((AA(((...)))AA)) (default: 1)")
    p_sl.add_argument("--regex-search-mode",
                   dest="regex_search_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Define regex search mode. 1: when motif hit encountered, continue +1 after motif hit start position, 2: when motif hit encountered, continue +1 after motif hit end position. NOTE that structure pattern regex currently always uses mode 1 (default: 1)")
    p_sl.add_argument("--regex-min-gc",
                    dest="regex_min_gc",
                    type=float,
                    metavar='float',
                    default=0.0,
                    help="Minimum GC base pair fraction to report structure pattern regex hits (default: 0.0)")
    p_sl.add_argument("--regex-max-gu",
                    dest="regex_max_gu",
                    type=float,
                    metavar='float',
                    default=1.0,
                    help="Maximum GU (GT) base pair fraction to report structure pattern regex hits (default: 1.0)")
    p_sl.add_argument("--regex-spacer-min",
                   dest="regex_spacer_min",
                   type=int,
                   metavar='int',
                   default=5,
                   help="Minimum spacer length for structure pattern regex search (default: 5)")
    p_sl.add_argument("--regex-spacer-max",
                   dest="regex_spacer_max",
                   type=int,
                   metavar='int',
                   default=200,
                   help="Maximum spacer length for structure pattern regex search (default: 200)")
    p_sl.add_argument("--fimo-ntf-file",
                   dest="fimo_user_ntf_file",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide FIMO nucleotide frequencies (FIMO option: --bfile) file (default: use internal frequencies file, define which with --fimo-ntf-mode)")
    p_sl.add_argument("--fimo-ntf-mode",
                   dest="fimo_ntf_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Set which internal nucleotide frequencies to use for FIMO search. 1: use frequencies from human ENSEMBL transcripts (excluding introns, A most prominent) 2: use frequencies from human ENSEMBL transcripts (including introns, resulting in lower G+C and T most prominent) 3: use uniform frequencies (same for every nucleotide) (default: 1)")
    p_sl.add_argument("--fimo-pval",
                   dest="fimo_pval",
                   type=float,
                   metavar='float',
                   default=0.0005,
                   help="FIMO p-value threshold (FIMO option: --thresh) (default: 0.0005)")
    p_sl.add_argument("--cmsearch-bs",
                   dest="cmsearch_bs",
                   type=float,
                   metavar='float',
                   default=1.0,
                   help="CMSEARCH bit score threshold (CMSEARCH options: -T --incT). The higher the more strict (default: 1.0)")
    p_sl.add_argument("--cmsearch-mode",
                   dest="cmsearch_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Set CMSEARCH mode to control strictness of filtering. 1: default setting (CMSEARCH option: --default). 2: max setting (CMSEARCH option: --max), i.e., turn all heuristic filters off, slower and more sensitive / more hits) (default: 1)")
    p_sl.add_argument("--greatest-hits",
                   dest="greatest_hits",
                   default = False,
                   action = "store_true",
                   help = "Keep only best FIMO/CMSEARCH motif hits (i.e., hit with lowest p-value / highest bit score for each motif sequence/site combination). By default, report all hits (default: False)")
    p_sl.add_argument("--gtf",
                   dest="in_gtf",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Input GTF file with genomic annotations to generate genomic region annotation plots (output to motif statistics HTML). By default the most prominent transcripts will be extracted and used for functional annotation. Alternatively, provide a list of expressed transcripts via --tr-list (together with --gtf containing the transcripts). Note that only features on standard chromosomes (1,2,..,X,Y,MT) are currently used for annotation")
    p_sl.add_argument("--tr-list",
                   dest="tr_list",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Supply file with transcript IDs (one ID per row) to define which transcripts to use from --gtf for genomic region annotations plots")
    p_sl.add_argument("--tr-types",
                   dest="tr_types_list",
                   type=str,
                   metavar='str',
                   nargs='+',
                   help="List of transcript biotypes to consider for genomic region annotations plots. By default an internal selection of transcript biotypes is used (in addition to intron, CDS, UTR, intergenic). Note that provided biotype strings need to be in --gtf GTF file")
    p_sl.add_argument("--gtf-feat-min-overlap",
                   dest="gtf_feat_min_overlap",
                   type=float,
                   metavar='float',
                   default=0.1,
                   help="Minimum amount of overlap required for a region to be assigned to a GTF feature (if less or no overlap, region will be assigned to \"intergenic\"). If there is overlap with several features, assign the one with highest overlap (default: 0.1)")
    p_sl.add_argument("--top-n-matched",
                   dest="top_n_matched",
                   type=int,
                   metavar='int',
                   default=10,
                   help="Set top n matched sequences to be displayed in motif hit statistics HTML report (create via --plot-motifs) (default: 10)")
    p_sl.add_argument("--plot-abs-paths",
                   dest="plot_abs_paths",
                   default = False,
                   action = "store_true",
                   help = "Store plot files with absolute paths in HTML report. Default is relative paths (default: False)")
    p_sl.add_argument("--plot-pdf",
                   dest="plot_pdf",
                   default = False,
                   action = "store_true",
                   help = "Also output .png plots as .pdf in plotting subfolder (default: False)")
    p_sl.add_argument("--meme-no-check",
                   dest="meme_disable_check",
                   default = False,
                   action = "store_true",
                   help = "Disable MEME version check. Make sure --meme-no-pgc is set if MEME version >= 5.5.4 is installed! (default: False)")
    p_sl.add_argument("--meme-no-pgc",
                   dest="meme_no_pgc",
                   default = False,
                   action = "store_true",
                   help = "Manually set MEME's FIMO --no-pgc option (required for MEME version >= 5.5.4). Make sure that MEME >= 5.5.4 is installed! (default: False)")
    p_sl.add_argument("--sort-js-mode",
                   dest="sort_js_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help = "Define how to provide sorttable.js file. 1: link to packaged .js file. 2: copy .js file to plots output folder. 3: include .js code in HTML (default: 1)")

    """
    Search motifs in spliced transcript sites, and check co-occurrence.

    """
    p_scr = subparsers.add_parser('searchrna',
                                help='Search motifs in spliced transcript sites')
    p_scr.set_defaults(which='searchrna')
    # Add required arguments group.
    p_scrm = p_scr.add_argument_group("required arguments")
    # Required arguments for searchrna.
    p_scrm.add_argument("--in",
                   dest="in_sites",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Transcript sites file in BED format. Column 1 is transcript ID (ID needs to be in --gtf), column 2 and 3 are start and end coordinates of site on the transcript")
    p_scrm.add_argument("--rbps",
                   dest="list_rbps",
                   type=str,
                   metavar='str',
                   nargs='+',
                   required=True,
                   help="List of RBP names to define RBP motifs used for search (--rbps rbp1 rbp2 .. ). To search with all available motifs, use --rbps ALL. NOTE: to search with user-provided motifs, set --rbps USER and provide --user-meme-xml and/or --user-cm. To search only for --regex, set --rbps REGEX")
    p_scrm.add_argument("--gtf",
                   dest="in_gtf",
                   type=str,
                   metavar='str',
                   required = True,
                   help = "Input GTF file with genomic annotations to extract spliced transcript sequences for provided --in transcript sites. Note that only features on standard chromosomes (1,2,..,X,Y,MT) are currently considered")
    p_scrm.add_argument("--genome",
                   dest="in_genome",
                   type=str,
                   metavar='str',
                   required = True,
                   help = "Genomic sequences file (currently supported formats: FASTA)")
    p_scrm.add_argument("--out",
                   dest="out_folder",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Results output folder")
    # Optional arguments for searchrna.
    p_scr.add_argument("--data-id",
                   dest="data_id",
                   type=str,
                   metavar='str',
                   default = "data_id",
                   help="Dataset ID")
    p_scr.add_argument("--method-id",
                   dest="method_id",
                   type=str,
                   metavar='str',
                   default = "method_id",
                   help="Method ID")
    p_scr.add_argument("--run-id",
                   dest="run_id",
                   type=str,
                   metavar='str',
                   default = "run_id",
                   help="Run ID")
    p_scr.add_argument("--ext",
                   dest="ext_up_down",
                   type=str,
                   metavar='str',
                   default="0",
                   help="Up- and downstream extension of --in transcript sites in nucleotides (nt). Set e.g. --ext 30 for 30 nt on both sides, or --ext 20,10 for different up- and downstream extension. Note that complete extension might not always be possible, e.g. for sites on short transcripts or sites at transcript ends (default: 0)")
    p_scr.add_argument("--fisher-mode",
                   dest="fisher_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Defines Fisher exact test alternative hypothesis for testing co-occurrences of RBP motifs on --in sites. 1: greater, 2: two-sided, 3: less (default: 1)")
    p_scr.add_argument("--cooc-pval-thr",
                   dest="cooc_pval_thr",
                   type=float,
                   metavar='float',
                   default=0.005,
                   help="RBP co-occurrence p-value threshold for reporting significant co-occurrences. NOTE that if --cooc-pval-mode Bonferroni is selected, this threshold gets further adjusted by Bonferroni correction (i.e. divided by number of tests). Threshold applies unchanged for BH corrected p-values as well as for disabled correction (default: 0.005)")
    p_scr.add_argument("--cooc-pval-mode",
                   dest="cooc_pval_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Defines multiple testing correction mode for co-occurrence p-values. 1: Benjamini-Hochberg (BH), 2: Bonferroni, 3: no correction (default: 1)")
    p_scr.add_argument("--min-motif-dist",
                   dest="min_motif_dist",
                   type=int,
                   metavar='int',
                   default=0,
                   help="Set minimum mean motif distance for an RBP pair to be reported significant in RBP co-occurrence heatmap plot. By default (value 0), all RBP pairs <= set p-value are reported significant. So setting --min-motif-dist >= 0 acts as a second filter to show only RBP pairs with signficiant p-values as significant if there is the specified minimum average distance between their motifs (default: 0)")
    p_scr.add_argument("--max-motif-dist",
                   dest="max_motif_dist",
                   type=int,
                   metavar='int',
                   default=20,
                   help="Set maximum motif distance for RBP co-occurrence plot statistic inside hover boxes (default: 20)")
    p_scr.add_argument("--user-rbp-id",
                   dest="user_rbp_id",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide RBP ID belonging to provided sequence or structure motif(s) (mandatory for --rbps USER)")
    p_scr.add_argument("--user-meme-xml",
                   dest="user_meme_xml",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide MEME/DREME XML file containing sequence motif(s) to be used for the run (needs --rbps USER)")
    p_scr.add_argument("--user-cm",
                   dest="user_cm",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide covariance model (.cm) file containing covariance model(s) to be used for the run (needs --rbps USER)")
    p_scr.add_argument("--motif-db",
                   dest="motif_db",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Built-in motif database to use. 1: human RBP motifs (257 RBPs, 599 motifs, \"catrapid_omics_v2.1_human_6plus_ext\"), 2: human RBP motifs + 23 ucRBP motifs (277 RBPs, 622 motifs, \"catrapid_omics_v2.1_human_6plus_ext_ucrbps\"), 3: human RBP motifs from Ray et al. 2013 (80 RBPs, 102 motifs, \"ray2013_human_rbps_rnacompete\") (default: 1)")
    p_scr.add_argument("--custom-db",
                   dest="custom_db",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database folder. Alternatively, provide single files via --custom-db-meme-xml, --custom-db-cm, --custom-db-info")
    p_scr.add_argument("--custom-db-id",
                   dest="custom_db_id",
                   type=str,
                   metavar='str',
                   default = "custom",
                   help = "Set ID/name for provided custom motif database via --custom-db  (default: \"custom\")")
    p_scr.add_argument("--custom-db-meme-xml",
                   dest="custom_db_meme_xml",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database MEME/DREME XML file containing sequence motifs")
    p_scr.add_argument("--custom-db-cm",
                   dest="custom_db_cm",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database covariance model (.cm) file containing covariance model(s)")
    p_scr.add_argument("--custom-db-info",
                   dest="custom_db_info",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database info table file containing RBP ID -> motif ID -> motif type assignments")
    p_scr.add_argument("--motifs",
                   dest="motifs_list",
                   type=str,
                   metavar='str',
                   nargs='+',
                   default=False,
                   help="Provide IDs for motifs of interest (need to be in database and loaded). All other RBP motifs will be discarded (except --regex)")
    p_scr.add_argument("--motif-min-len",
                   dest="motif_min_len",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Minimum MEME/DREME motif length to include in search. By default all selected RBP motifs are used")
    p_scr.add_argument("--motif-max-len",
                   dest="motif_max_len",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Maximum MEME/DREME motif length to include in search. By default all selected RBP motifs are used")
    p_scr.add_argument("--functions",
                   dest="rbp_functions",
                   type=str,
                   metavar='str',
                   nargs='+',
                   default=False,
                   help="Filter defined RBPs (via --rbps) by their molecular functions (annotations available for most database RBPs). E.g. for RBPs in splicing regulation, set --functions SR, for RBPs in RNA stability & decay plus translation regulation, set --functions RSD TR (see rbpbench info for full function descriptions). NOTE that --regex is not affected by filtering")
    p_scr.add_argument("--regex",
                   dest="regex",
                   type=str,
                   metavar='str',
                   default = False,
                   help="Define regular expression (regex) DNA motif to include in search, e.g. --regex AAACC, --regex 'C[ACGT]AC[AC]', .. IUPAC code is also supported, e.g. AAARN resolves to AAA[AG][ACGT]. Alternatively, supply structure pattern, e.g. AA((((ARA))))AA or CC(((A...R)))CC with variable spacer")
    p_scr.add_argument("--regex-id",
                   dest="regex_id",
                   type=str,
                   metavar='str',
                   default = "regex",
                   help="Set regex ID used as RBP ID and database ID associated to -regex hits (default: \"regex\")")
    p_scr.add_argument("--motif-regex-id",
                   dest="motif_regex_id",
                   default = False,
                   action = "store_true",
                   help="Use --regex-id for motif ID as well. By default, regular expression string is used as motif ID for regex motif hits")
    p_scr.add_argument("--regex-type",
                   dest="regex_type",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Set type of supplied --regex string 1: auto-detect type (standard regex or structure pattern). 2: given --regex string is standard regex, e.g. AC[AG]T. 3: given --regex string is structure pattern string, e.g. ((AA(((...)))AA)) (default: 1)")
    p_scr.add_argument("--regex-search-mode",
                   dest="regex_search_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Define regex search mode. 1: when motif hit encountered, continue +1 after motif hit start position, 2: when motif hit encountered, continue +1 after motif hit end position. NOTE that structure pattern regex currently always uses mode 1 (default: 1)")
    p_scr.add_argument("--regex-min-gc",
                    dest="regex_min_gc",
                    type=float,
                    metavar='float',
                    default=0.0,
                    help="Minimum GC base pair fraction to report structure pattern regex hits (default: 0.0)")
    p_scr.add_argument("--regex-max-gu",
                    dest="regex_max_gu",
                    type=float,
                    metavar='float',
                    default=1.0,
                    help="Maximum GU (GT) base pair fraction to report structure pattern regex hits (default: 1.0)")
    p_scr.add_argument("--regex-spacer-min",
                   dest="regex_spacer_min",
                   type=int,
                   metavar='int',
                   default=5,
                   help="Minimum spacer length for structure pattern regex search (default: 5)")
    p_scr.add_argument("--regex-spacer-max",
                   dest="regex_spacer_max",
                   type=int,
                   metavar='int',
                   default=200,
                   help="Maximum spacer length for structure pattern regex search (default: 200)")
    p_scr.add_argument("--bed-score-col",
                   dest="bed_score_col",
                   type=int,
                   metavar='int',
                   default=5,
                   help="--in BED score column used for p-value calculations. BED score can be e.g. log2 fold change or -log10 p-value of the region (default: 5)")
    p_scr.add_argument("--bed-sc-thr",
                   dest="bed_sc_thr",
                   type = float,
                   metavar='float',
                   default = None,
                   help = "Minimum site score (by default: --in BED column 5, or set via --bed-score-col) for filtering (assuming higher score == better site) (default: None)")
    p_scr.add_argument("--bed-sc-thr-rev",
                   dest="bed_sc_thr_rev_filter",
                   default = False,
                   action = "store_true",
                   help = "Reverse --bed-sc-thr filtering (i.e. the lower the better, e.g. if score column contains p-values) (default: False)")
    p_scr.add_argument("--fimo-ntf-file",
                   dest="fimo_user_ntf_file",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide FIMO nucleotide frequencies (FIMO option: --bfile) file (default: use internal frequencies file, define which with --fimo-ntf-mode)")
    p_scr.add_argument("--fimo-ntf-mode",
                   dest="fimo_ntf_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Set which internal nucleotide frequencies to use for FIMO search. 1: use frequencies from human ENSEMBL transcripts (excluding introns, A most prominent) 2: use frequencies from human ENSEMBL transcripts (including introns, resulting in lower G+C and T most prominent) 3: use uniform frequencies (same for every nucleotide) (default: 1)")
    p_scr.add_argument("--fimo-pval",
                   dest="fimo_pval",
                   type=float,
                   metavar='float',
                   default=0.0005,
                   help="FIMO p-value threshold (FIMO option: --thresh) (default: 0.0005)")
    p_scr.add_argument("--cmsearch-bs",
                   dest="cmsearch_bs",
                   type=float,
                   metavar='float',
                   default=1.0,
                   help="CMSEARCH bit score threshold (CMSEARCH options: -T --incT). The higher the more strict (default: 1.0)")
    p_scr.add_argument("--cmsearch-mode",
                   dest="cmsearch_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Set CMSEARCH mode to control strictness of filtering. 1: default setting (CMSEARCH option: --default). 2: max setting (CMSEARCH option: --max), i.e., turn all heuristic filters off, slower and more sensitive / more hits) (default: 1)")
    p_scr.add_argument("--greatest-hits",
                   dest="greatest_hits",
                   default = False,
                   action = "store_true",
                   help = "Keep only best FIMO/CMSEARCH motif hits (i.e., hit with lowest p-value / highest bit score for each motif sequence/site combination). By default, report all hits (default: False)")
    # Sequence k-mers plot.
    p_scr.add_argument("--disable-kmer-pca-plot",
                   dest="disable_kmer_pca_plot",
                   default = False,
                   action = "store_true",
                   help = "Disable input sequences by k-mer content PCA plot (default: False)")
    p_scr.add_argument("--kmer-pca-plot-k",
                   dest="kmer_pca_plot_k",
                   type=int,
                   metavar='int',
                   default=3,
                   help="Set k for k-mer usage in sequences by k-mer content PCA plot (default: 3)")
    p_scr.add_argument("--kmer-pca-plot-no-comp",
                   dest="kmer_pca_plot_no_comp",
                   default = False,
                   action = "store_true",
                   help = "Disable adding sequence complexity as feature to sequences by k-mer content PCA plot (default: False)")
    p_scr.add_argument("--kmer-pca-plot-comp-k",
                   dest="kmer_pca_plot_comp_k",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Set k for sequence complexity (Shannon entropy) calculation. 1 == mono-nucleotides, 2 == di-nucleotides (default: 1)")
    p_scr.add_argument("--kmer-pca-plot-no-motifs",
                   dest="kmer_pca_plot_no_motifs",
                   default = False,
                   action = "store_true",
                   help = "Disable adding motif hits information to sequences by k-mer content PCA plot (default: False)")
    p_scr.add_argument("--kmer-plot-k",
                   dest="kmer_plot_k",
                   type=int,
                   metavar='int',
                   default=4,
                   help="Define k for k-mer distribution plots, including top k-mers plot, top vs bottom scoring sites plot, and k-mer variation plot (default: 4)")
    p_scr.add_argument("--set-rbp-id",
                   dest="set_rbp_id",
                   type=str,
                   metavar='str',
                   default = None,
                   help = "Set reference RBP ID to plot motif distances relative to motifs from this RBP (needs to be one of the selected RBP IDs!). Motif plot will be centered on best scoring motif of the RBP for each region")
    p_scr.add_argument("--disable-len-dist-plot",
                   dest="disable_len_dist_plot",
                   default = False,
                   action = "store_true",
                   help = "Disable input sequence length distribution plot in HTML report (default: False)")
    p_scr.add_argument("--enable-upset-plot",
                   dest="enable_upset_plot",
                   default = False,
                   action = "store_true",
                   help = "Enable upset plot in HTML report (default: False)")
    p_scr.add_argument("--upset-plot-min-degree",
                   dest="upset_plot_min_degree",
                   type=int,
                   metavar='int',
                   default=2,
                   help="Upset plot minimum degree parameter (default: 2)")
    p_scr.add_argument("--upset-plot-max-degree",
                   dest="upset_plot_max_degree",
                   type=int,
                   metavar='int',
                   default=None,
                   help="Upset plot maximum degree. By default no maximum degree is set. Useful to look at specific degrees only (together with --upset-plot-min-degree), e.g. 2 or 2 to 3 (default: None)")
    p_scr.add_argument("--upset-plot-min-subset-size",
                   dest="upset_plot_min_subset_size",
                   type=int,
                   metavar='int',
                   default=5,
                   help="Upset plot minimum subset size (default: 5)")
    p_scr.add_argument("--upset-plot-max-subset-rank",
                   dest="upset_plot_max_subset_rank",
                   type=int,
                   metavar='int',
                   default=25,
                   help="Upset plot maximum subset rank to plot. All tied subsets are included (default: 25)")
    p_scr.add_argument("--upset-plot-max-rbp-rank",
                   dest="upset_plot_max_rbp_rank",
                   type=int,
                   metavar='int',
                   default=None,
                   help="Maximum RBP hit region count rank. Set this to limit the amount of RBPs included in upset plot (+ statistic !) to top --upset-plot-max-rbp-rank RBPs. By default all RBPs are included (default: None)")
    p_scr.add_argument("--upset-plot-min-rbp-count",
                   dest="upset_plot_min_rbp_count",
                   type=int,
                   metavar='int',
                   default=0,
                   help="Minimum amount of input sites containing motifs for an RBP in order for the RBP to be included in upset plot (+ statistic !). By default, all RBPs are included, also RBPs without hit regions (default: 0)")
    p_scr.add_argument("--motif-distance-plot-range",
                   dest="motif_distance_plot_range",
                   type=int,
                   metavar='int',
                   default=60,
                   help="Set range of motif distance plot. I.e., centered on the set RBP (--set-rbp-id) motifs, motifs within minus and plus --motif-distance-plot-range will be plotted (default: 50)")
    p_scr.add_argument("--motif-min-pair-count",
                   dest="motif_min_pair_count",
                   type=int,
                   metavar='int',
                   default=10,
                   help="On single motif level, minimum count of co-occurrences of a motif with set RBP ID (--set-rbp-id) motif to be reported and plotted (default: 10)")
    p_scr.add_argument("--rbp-min-pair-count",
                   dest="rbp_min_pair_count",
                   type=int,
                   metavar='int',
                   default=10,
                   help="On RBP level, minimum amount of co-occurrences of motifs for an RBP ID compared to set RBP ID (--set-rbp-id) motifs to be reported and plotted (default: 10)")
    p_scr.add_argument("--goa",
                   dest="run_goa",
                   default = False,
                   action = "store_true",
                   help = "Run gene ontology (GO) enrichment analysis on genes occupied by --in regions. Requires --gtf (default: False)")
    p_scr.add_argument("--goa-obo-mode",
                   dest="goa_obo_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help = "Define how to obtain GO DAG (directed acyclic graph) obo file. 1: download most recent file from internet,  2: use local file,  3: provide file via --goa-obo-file (default: 1)")
    p_scr.add_argument("--goa-obo-file",
                   dest="goa_obo_file",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide GO DAG obo file (default: False)")
    p_scr.add_argument("--goa-gene2go-file",
                   dest="goa_gene2go_file",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide gene ID to GO IDs mapping table (row format: gene_id<tab>go_id1,go_id2). By default, a local file with ENSEMBL gene IDs is used. NOTE that gene IDs need to be compatible with --gtf (default: False)")
    p_scr.add_argument("--goa-pval",
                   dest="goa_pval",
                   type=float,
                   metavar='float',
                   default=0.05,
                   help="GO enrichment analysis p-value threshold (applied on corrected p-value) (default: 0.05)")
    p_scr.add_argument("--goa-cooc-mode",
                   dest="goa_cooc_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help = "Define what input regions to consider in GOA, in relation to motif hit (co-)occurrences. 1: Perform GOA on all input regions (no motif (co-)occurrences required). 2: perform GOA only on input regions with motif hits from ANY specified RBP. 3: perform GOA only on input regions with motif hits from ALL specified RBPs. GOA on input regions translates to GOA on the underlying genes (default: 1)")
    p_scr.add_argument("--goa-bg-gene-list",
                   dest="goa_bg_gene_list",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Supply file with gene IDs (one ID per row) to use as background gene list for GOA. NOTE that gene IDs need to be compatible with --gtf (default: False)")
    p_scr.add_argument("--goa-max-child",
                   dest="goa_max_child",
                   type=int,
                   metavar='int',
                   default=None,
                   help="Specify maximum number of children for a significant GO term to be reported in HTML table, e.g. --goa-max-child 100. This allows filtering out very broad terms (default: None)")
    p_scr.add_argument("--goa-min-depth",
                   dest="goa_min_depth",
                   type=int,
                   metavar='int',
                   default=None,
                   help="Specify minimum depth number for a significant GO term to be reported in HTML table, e.g. --goa-min-depth 5 (default: None)")
    p_scr.add_argument("--goa-filter-purified",
                   dest="goa_filter_purified",
                   default = False,
                   action = "store_true",
                   help = "Filter out GOA results labeled as purified (i.e., GO terms with significantly lower concentration) in HTML table (default: False)")
    p_scr.add_argument("--plot-abs-paths",
                   dest="plot_abs_paths",
                   default = False,
                   action = "store_true",
                   help = "Store plot files with absolute paths in HTML report. Default is relative paths (default: False)")
    p_scr.add_argument("--plot-pdf",
                   dest="plot_pdf",
                   default = False,
                   action = "store_true",
                   help = "Also output .png plots as .pdf in plotting subfolder (default: False)")
    p_scr.add_argument("--meme-no-check",
                   dest="meme_disable_check",
                   default = False,
                   action = "store_true",
                   help = "Disable MEME version check. Make sure --meme-no-pgc is set if MEME version >= 5.5.4 is installed! (default: False)")
    p_scr.add_argument("--meme-no-pgc",
                   dest="meme_no_pgc",
                   default = False,
                   action = "store_true",
                   help = "Manually set MEME's FIMO --no-pgc option (required for MEME version >= 5.5.4). Make sure that MEME >= 5.5.4 is installed! (default: False)")
    p_scr.add_argument("--plot-motifs",
                   dest="plot_motifs",
                   default = False,
                   action = "store_true",
                   help = "Visualize selected sequence motifs, by outputting sequence logos and motif hit statistics into a separate .html file (default: False)")
    p_scr.add_argument("--top-n-matched",
                   dest="top_n_matched",
                   type=int,
                   metavar='int',
                   default=10,
                   help="Set top n matched sequences to be displayed in motif hit statistics HTML report (create via --plot-motifs) (default: 10)")
    p_scr.add_argument("--plotly-js-mode",
                   dest="plotly_js_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3, 4, 5, 6, 7],
                   help = "Define how to provide plotly .js file. 1: use online version via \"cdn\" (requires internet connection). 2: link to packaged plotly .js file. 3: copy plotly .js file to plots output folder. 4: include plotly .js code in plotly HTML. 5: put web version link and plotly plot codes into main HTML. 6: put local version link and plotly plot codes in main HTML. 7: put plotly js and plotly plot codes into main HTML! (default: 1)")
    p_scr.add_argument("--sort-js-mode",
                   dest="sort_js_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help = "Define how to provide sorttable.js file. 1: link to packaged .js file. 2: copy .js file to plots output folder. 3: include .js code in HTML (default: 1)")

    """
    Search motifs in spliced full transcripts.

    """
    p_smr = subparsers.add_parser('searchlongrna',
                                help='Search motifs in spliced full transcripts')
    p_smr.set_defaults(which='searchlongrna')
    # Add required arguments group.
    p_smrm = p_smr.add_argument_group("required arguments")
    # Required arguments for searchlongrna.
    p_smrm.add_argument("--rbps",
                   dest="list_rbps",
                   type=str,
                   metavar='str',
                   nargs='+',
                   required=True,
                   help="List of RBP names to define RBP motifs used for search (--rbps rbp1 rbp2 .. ). To search with all available motifs, use --rbps ALL. NOTE: to search with user-provided motifs, set --rbps USER and provide --user-meme-xml and/or --user-cm. To search only for --regex, set --rbps REGEX")
    p_smrm.add_argument("--gtf",
                   dest="in_gtf",
                   type=str,
                   metavar='str',
                   required = True,
                   help = "Input GTF file with genomic annotations to extract spliced RNA sequences. By default the most prominent transcript will be extracted and used for each gene. Alternatively, provide a list of expressed transcripts via --tr-list (together with --gtf containing the transcripts). Note that only features on standard chromosomes (1,2,..,X,Y,MT) are currently considered")
    p_smrm.add_argument("--genome",
                   dest="in_genome",
                   type=str,
                   metavar='str',
                   required = True,
                   help = "Genomic sequences file (currently supported formats: FASTA)")
    p_smrm.add_argument("--out",
                   dest="out_folder",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Results output folder")
    # Optional arguments for searchlongrna.
    p_smr.add_argument("--data-id",
                   dest="data_id",
                   type=str,
                   metavar='str',
                   default = "data_id",
                   help="Dataset ID")
    p_smr.add_argument("--method-id",
                   dest="method_id",
                   type=str,
                   metavar='str',
                   default = "method_id",
                   help="Method ID")
    p_smr.add_argument("--run-id",
                   dest="run_id",
                   type=str,
                   metavar='str',
                   default = "run_id",
                   help="Run ID")
    p_smr.add_argument("--user-rbp-id",
                   dest="user_rbp_id",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide RBP ID belonging to provided sequence or structure motif(s) (mandatory for --rbps USER)")
    p_smr.add_argument("--user-meme-xml",
                   dest="user_meme_xml",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide MEME/DREME XML file containing sequence motif(s) to be used for the run (needs --rbps USER)")
    p_smr.add_argument("--user-cm",
                   dest="user_cm",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide covariance model (.cm) file containing covariance model(s) to be used for the run (needs --rbps USER)")
    p_smr.add_argument("--motif-db",
                   dest="motif_db",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Built-in motif database to use. 1: human RBP motifs (257 RBPs, 599 motifs, \"catrapid_omics_v2.1_human_6plus_ext\"), 2: human RBP motifs + 23 ucRBP motifs (277 RBPs, 622 motifs, \"catrapid_omics_v2.1_human_6plus_ext_ucrbps\"), 3: human RBP motifs from Ray et al. 2013 (80 RBPs, 102 motifs, \"ray2013_human_rbps_rnacompete\") (default: 1)")
    p_smr.add_argument("--custom-db",
                   dest="custom_db",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database folder. Alternatively, provide single files via --custom-db-meme-xml, --custom-db-cm, --custom-db-info")
    p_smr.add_argument("--custom-db-id",
                   dest="custom_db_id",
                   type=str,
                   metavar='str',
                   default = "custom",
                   help = "Set ID/name for provided custom motif database via --custom-db  (default: \"custom\")")
    p_smr.add_argument("--custom-db-meme-xml",
                   dest="custom_db_meme_xml",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database MEME/DREME XML file containing sequence motifs")
    p_smr.add_argument("--custom-db-cm",
                   dest="custom_db_cm",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database covariance model (.cm) file containing covariance model(s)")
    p_smr.add_argument("--custom-db-info",
                   dest="custom_db_info",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database info table file containing RBP ID -> motif ID -> motif type assignments")
    p_smr.add_argument("--motifs",
                   dest="motifs_list",
                   type=str,
                   metavar='str',
                   nargs='+',
                   default=False,
                   help="Provide IDs for motifs of interest (need to be in database and loaded). All other RBP motifs will be discarded (except --regex)")
    p_smr.add_argument("--motif-min-len",
                   dest="motif_min_len",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Minimum MEME/DREME motif length to include in search. By default all selected RBP motifs are used")
    p_smr.add_argument("--motif-max-len",
                   dest="motif_max_len",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Maximum MEME/DREME motif length to include in search. By default all selected RBP motifs are used")
    p_smr.add_argument("--functions",
                   dest="rbp_functions",
                   type=str,
                   metavar='str',
                   nargs='+',
                   default=False,
                   help="Filter defined RBPs (via --rbps) by their molecular functions (annotations available for most database RBPs). E.g. for RBPs in splicing regulation, set --functions SR, for RBPs in RNA stability & decay plus translation regulation, set --functions RSD TR (see rbpbench info for full function descriptions). NOTE that --regex is not affected by filtering")
    p_smr.add_argument("--regex",
                   dest="regex",
                   type=str,
                   metavar='str',
                   default = False,
                   help="Define regular expression (regex) DNA motif to include in search, e.g. --regex AAACC, --regex 'C[ACGT]AC[AC]', .. IUPAC code is also supported, e.g. AAARN resolves to AAA[AG][ACGT]. Alternatively, supply structure pattern, e.g. AA((((ARA))))AA or CC(((A...R)))CC with variable spacer")
    p_smr.add_argument("--regex-id",
                   dest="regex_id",
                   type=str,
                   metavar='str',
                   default = "regex",
                   help="Set regex ID used as RBP ID and database ID associated to -regex hits (default: \"regex\")")
    p_smr.add_argument("--motif-regex-id",
                   dest="motif_regex_id",
                   default = False,
                   action = "store_true",
                   help="Use --regex-id for motif ID as well. By default, regular expression string is used as motif ID for regex motif hits")
    p_smr.add_argument("--regex-type",
                   dest="regex_type",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Set type of supplied --regex string 1: auto-detect type (standard regex or structure pattern). 2: given --regex string is standard regex, e.g. AC[AG]T. 3: given --regex string is structure pattern string, e.g. ((AA(((...)))AA)) (default: 1)")
    p_smr.add_argument("--regex-search-mode",
                   dest="regex_search_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Define regex search mode. 1: when motif hit encountered, continue +1 after motif hit start position, 2: when motif hit encountered, continue +1 after motif hit end position. NOTE that structure pattern regex currently always uses mode 1 (default: 1)")
    p_smr.add_argument("--regex-min-gc",
                    dest="regex_min_gc",
                    type=float,
                    metavar='float',
                    default=0.0,
                    help="Minimum GC base pair fraction to report structure pattern regex hits (default: 0.0)")
    p_smr.add_argument("--regex-max-gu",
                    dest="regex_max_gu",
                    type=float,
                    metavar='float',
                    default=1.0,
                    help="Maximum GU (GT) base pair fraction to report structure pattern regex hits (default: 1.0)")
    p_smr.add_argument("--regex-spacer-min",
                   dest="regex_spacer_min",
                   type=int,
                   metavar='int',
                   default=5,
                   help="Minimum spacer length for structure pattern regex search (default: 5)")
    p_smr.add_argument("--regex-spacer-max",
                   dest="regex_spacer_max",
                   type=int,
                   metavar='int',
                   default=200,
                   help="Maximum spacer length for structure pattern regex search (default: 200)")
    p_smr.add_argument("--fimo-ntf-file",
                   dest="fimo_user_ntf_file",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide FIMO nucleotide frequencies (FIMO option: --bfile) file (default: use internal frequencies file, define which with --fimo-ntf-mode)")
    p_smr.add_argument("--fimo-ntf-mode",
                   dest="fimo_ntf_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Set which internal nucleotide frequencies to use for FIMO search. 1: use frequencies from human ENSEMBL transcripts (excluding introns, A most prominent) 2: use frequencies from human ENSEMBL transcripts (including introns, resulting in lower G+C and T most prominent) 3: use uniform frequencies (same for every nucleotide) (default: 1)")
    p_smr.add_argument("--fimo-pval",
                   dest="fimo_pval",
                   type=float,
                   metavar='float',
                   default=0.0005,
                   help="FIMO p-value threshold (FIMO option: --thresh) (default: 0.0005)")
    p_smr.add_argument("--cmsearch-bs",
                   dest="cmsearch_bs",
                   type=float,
                   metavar='float',
                   default=1.0,
                   help="CMSEARCH bit score threshold (CMSEARCH options: -T --incT). The higher the more strict (default: 1.0)")
    p_smr.add_argument("--cmsearch-mode",
                   dest="cmsearch_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Set CMSEARCH mode to control strictness of filtering. 1: default setting (CMSEARCH option: --default). 2: max setting (CMSEARCH option: --max), i.e., turn all heuristic filters off, slower and more sensitive / more hits) (default: 1)")
    p_smr.add_argument("--greatest-hits",
                   dest="greatest_hits",
                   default = False,
                   action = "store_true",
                   help = "Keep only best FIMO/CMSEARCH motif hits (i.e., hit with lowest p-value / highest bit score for each motif sequence/site combination). By default, report all hits (default: False)")
    p_smr.add_argument("--tr-list",
                   dest="tr_list",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Supply file with transcript IDs (one ID per row) to define which transcripts to use from --gtf")
    p_smr.add_argument("--gtf-feat-min-overlap",
                   dest="gtf_feat_min_overlap",
                   type=float,
                   metavar='float',
                   default=0.1,
                   help="Minimum amount of overlap required for a motif hit to be assigned to a GTF feature (if less or no overlap, region will be assigned to \"intergenic\") (default: 0.1)")
    p_smr.add_argument("--goa",
                   dest="run_goa_tr",
                   default = False,
                   action = "store_true",
                   help = "Run gene ontology (GO) enrichment analysis on transcripts (i.e., their corresponding genes) with motif hits. Requires --gtf (default: False)")
    p_smr.add_argument("--goa-obo-mode",
                   dest="goa_obo_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help = "Define how to obtain GO DAG (directed acyclic graph) obo file. 1: download most recent file from internet,  2: use local file,  3: provide file via --goa-obo-file (default: 1)")
    p_smr.add_argument("--goa-obo-file",
                   dest="goa_obo_file",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide GO DAG obo file (default: False)")
    p_smr.add_argument("--goa-gene2go-file",
                   dest="goa_gene2go_file",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide gene ID to GO IDs mapping table (row format: gene_id<tab>go_id1,go_id2). By default, a local file with ENSEMBL gene IDs is used. NOTE that gene IDs need to be compatible with --gtf (default: False)")
    p_smr.add_argument("--goa-pval",
                   dest="goa_pval",
                   type=float,
                   metavar='float',
                   default=0.05,
                   help="GO enrichment analysis p-value threshold (applied on corrected p-value) (default: 0.05)")
    p_smr.add_argument("--goa-max-child",
                   dest="goa_max_child",
                   type=int,
                   metavar='int',
                   default=None,
                   help="Specify maximum number of children for a significant GO term to be reported in HTML table, e.g. --goa-max-child 200. This allows filtering out very broad terms (default: None)")
    p_smr.add_argument("--goa-min-depth",
                   dest="goa_min_depth",
                   type=int,
                   metavar='int',
                   default=None,
                   help="Specify minimum depth number for a significant GO term to be reported in HTML table, e.g. --goa-min-depth 5 (default: None)")
    p_smr.add_argument("--goa-filter-purified",
                   dest="goa_filter_purified",
                   default = False,
                   action = "store_true",
                   help = "Filter out GOA results labeled as purified (i.e., GO terms with significantly lower concentration) in HTML table (default: False)")
    p_smr.add_argument("--goa-only-cooc",
                   dest="goa_only_cooc",
                   default = False,
                   action = "store_true",
                   help = "Only look at regions in GO enrichment analysis which contain motifs by all specified RBPs (default: False)")
    p_smr.add_argument("--goa-rna-region",
                   dest="goa_rna_region",
                   type=int,
                   default=1,
                   choices=[1, 2, 3, 4],
                   help = "Define which (m)RNA region to use for motif hit GO enrichment analysis. 1: whole transcript, 2: only 3'UTR regions, 3: only CDS regions, 4: only 5'UTR regions (default: 1)")
    p_smr.add_argument("--top-n-matched",
                   dest="top_n_matched",
                   type=int,
                   metavar='int',
                   default=10,
                   help="Set top n matched sequences to be displayed in motif hit statistics HTML report (create via --plot-motifs) (default: 10)")
    p_smr.add_argument("--plot-abs-paths",
                   dest="plot_abs_paths",
                   default = False,
                   action = "store_true",
                   help = "Store plot files with absolute paths in HTML report. Default is relative paths (default: False)")
    p_smr.add_argument("--plot-pdf",
                   dest="plot_pdf",
                   default = False,
                   action = "store_true",
                   help = "Also output .png plots as .pdf in plotting subfolder (default: False)")
    p_smr.add_argument("--meme-no-check",
                   dest="meme_disable_check",
                   default = False,
                   action = "store_true",
                   help = "Disable MEME version check. Make sure --meme-no-pgc is set if MEME version >= 5.5.4 is installed! (default: False)")
    p_smr.add_argument("--meme-no-pgc",
                   dest="meme_no_pgc",
                   default = False,
                   action = "store_true",
                   help = "Manually set MEME's FIMO --no-pgc option (required for MEME version >= 5.5.4). Make sure that MEME >= 5.5.4 is installed! (default: False)")
    p_smr.add_argument("--mrna-only",
                   dest="only_mrna",
                   default = False,
                   action = "store_true",
                   help = "Set if only mRNAs should be extracted from --gtf file for motif search and plotting of mRNA region occupancies. To look only at specific mRNAs, use --tr-list and --mrna-only (default: False)")
    p_smr.add_argument("--mrna-norm-mode",
                   dest="mrna_norm_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help = "Define whether to use median or mean mRNA region lengths for plotting. 1: median. 2: mean (default: 1)")
    p_smr.add_argument("--sort-js-mode",
                   dest="sort_js_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help = "Define how to provide sorttable.js file. 1: link to packaged .js file. 2: copy .js file to plots output folder. 3: include .js code in HTML (default: 1)")

    """
    Check for enriched motifs mode.
    
    """
    p_em = subparsers.add_parser('enmo',
                                help='Check for enriched motifs in input sites')
    p_em.set_defaults(which='enmo')
    # Add required arguments group.
    p_emm = p_em.add_argument_group("required arguments")
    # Required arguments for enmo.
    p_emm.add_argument("--in",
                   dest="in_sites",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Genomic or transcript regions file in BED format. Use these regions together with a generated set of background regions to check for enriched motifs")
    p_emm.add_argument("--rbps",
                   dest="list_rbps",
                   type=str,
                   metavar='str',
                   nargs='+',
                   required=True,
                   help="List of RBP names to define RBP motifs used for search (--rbps rbp1 rbp2 .. ). To search with all available motifs, use --rbps ALL. NOTE: to search with user-provided motifs, set --rbps USER and provide --user-meme-xml and/or --user-cm. To search only for --regex, set --rbps REGEX")
    p_emm.add_argument("--gtf",
                   dest="in_gtf",
                   type=str,
                   metavar='str',
                   required = True,
                   help = "Input GTF file with genomic region annotations. Used to extract gene or transcript background regions, taking the most prominent transcript regions. Note that only features on standard chromosomes (1,2,..,X,Y,MT) are currently used for annotation")
    p_emm.add_argument("--genome",
                   dest="in_genome",
                   type=str,
                   metavar='str',
                   required = True,
                   help = "Genomic sequences file (currently supported formats: FASTA)")
    p_emm.add_argument("--out",
                   dest="out_folder",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Results output folder")
    # Optional arguments for enmo.
    p_em.add_argument("--user-rbp-id",
                   dest="user_rbp_id",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide RBP ID belonging to provided sequence or structure motif(s) (mandatory for --rbps USER)")
    p_em.add_argument("--user-meme-xml",
                   dest="user_meme_xml",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide MEME/DREME XML file containing sequence motif(s) to be used for the run (needs --rbps USER)")
    p_em.add_argument("--user-cm",
                   dest="user_cm",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide covariance model (.cm) file containing covariance model(s) to be used for the run (needs --rbps USER)")
    p_em.add_argument("--ext",
                   dest="ext_up_down",
                   type=str,
                   metavar='str',
                   default="0",
                   help="Up- and downstream extension of --in sites in nucleotides (nt). Set e.g. --ext 30 for 30 nt on both sides, or --ext 20,10 for different up- and downstream extension (default: 0)")
    p_em.add_argument("--motif-db",
                   dest="motif_db",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Built-in motif database to use. 1: human RBP motifs (257 RBPs, 599 motifs, \"catrapid_omics_v2.1_human_6plus_ext\"), 2: human RBP motifs + 23 ucRBP motifs (277 RBPs, 622 motifs, \"catrapid_omics_v2.1_human_6plus_ext_ucrbps\"), 3: human RBP motifs from Ray et al. 2013 (80 RBPs, 102 motifs, \"ray2013_human_rbps_rnacompete\") (default: 1)")
    p_em.add_argument("--custom-db",
                   dest="custom_db",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database folder. Alternatively, provide single files via --custom-db-meme-xml, --custom-db-cm, --custom-db-info")
    p_em.add_argument("--custom-db-id",
                   dest="custom_db_id",
                   type=str,
                   metavar='str',
                   default = "custom",
                   help = "Set ID/name for provided custom motif database via --custom-db  (default: \"custom\")")
    p_em.add_argument("--custom-db-meme-xml",
                   dest="custom_db_meme_xml",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database MEME/DREME XML file containing sequence motifs")
    p_em.add_argument("--custom-db-cm",
                   dest="custom_db_cm",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database covariance model (.cm) file containing covariance model(s)")
    p_em.add_argument("--custom-db-info",
                   dest="custom_db_info",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database info table file containing RBP ID -> motif ID -> motif type assignments")
    p_em.add_argument("--motifs",
                   dest="motifs_list",
                   type=str,
                   metavar='str',
                   nargs='+',
                   default=False,
                   help="Provide IDs for motifs of interest (need to be in database and loaded). All other RBP motifs will be discarded (except --regex)")
    p_em.add_argument("--motif-min-len",
                   dest="motif_min_len",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Minimum MEME/DREME motif length to include in search. By default all selected RBP motifs are used")
    p_em.add_argument("--motif-max-len",
                   dest="motif_max_len",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Maximum MEME/DREME motif length to include in search. By default all selected RBP motifs are used")
    p_em.add_argument("--functions",
                   dest="rbp_functions",
                   type=str,
                   metavar='str',
                   nargs='+',
                   default=False,
                   help="Filter defined RBPs (via --rbps) by their molecular functions (annotations available for most database RBPs). E.g. for RBPs in splicing regulation, set --functions SR, for RBPs in RNA stability & decay plus translation regulation, set --functions RSD TR (see rbpbench info for full function descriptions). NOTE that --regex is not affected by filtering")
    p_em.add_argument("--regex",
                   dest="regex",
                   type=str,
                   metavar='str',
                   default = False,
                   help="Define regular expression (regex) DNA motif to include in search, e.g. --regex AAACC, --regex 'C[ACGT]AC[AC]', .. IUPAC code is also supported, e.g. AAARN resolves to AAA[AG][ACGT]. Alternatively, supply structure pattern, e.g. AA((((ARA))))AA or CC(((A...R)))CC with variable spacer")
    p_em.add_argument("--regex-id",
                   dest="regex_id",
                   type=str,
                   metavar='str',
                   default = "regex",
                   help="Set regex ID used as RBP ID and database ID associated to -regex hits (default: \"regex\")")
    p_em.add_argument("--motif-regex-id",
                   dest="motif_regex_id",
                   default = False,
                   action = "store_true",
                   help="Use --regex-id for motif ID as well. By default, regular expression string is used as motif ID for regex motif hits")
    p_em.add_argument("--regex-type",
                   dest="regex_type",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Set type of supplied --regex string 1: auto-detect type (standard regex or structure pattern). 2: given --regex string is standard regex, e.g. AC[AG]T. 3: given --regex string is structure pattern string, e.g. ((AA(((...)))AA)) (default: 1)")
    p_em.add_argument("--regex-search-mode",
                   dest="regex_search_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Define regex search mode. 1: when motif hit encountered, continue +1 after motif hit start position, 2: when motif hit encountered, continue +1 after motif hit end position. NOTE that structure pattern regex currently always uses mode 1 (default: 1)")
    p_em.add_argument("--regex-min-gc",
                    dest="regex_min_gc",
                    type=float,
                    metavar='float',
                    default=0.0,
                    help="Minimum GC base pair fraction to report structure pattern regex hits (default: 0.0)")
    p_em.add_argument("--regex-max-gu",
                    dest="regex_max_gu",
                    type=float,
                    metavar='float',
                    default=1.0,
                    help="Maximum GU (GT) base pair fraction to report structure pattern regex hits (default: 1.0)")
    p_em.add_argument("--regex-spacer-min",
                   dest="regex_spacer_min",
                   type=int,
                   metavar='int',
                   default=5,
                   help="Minimum spacer length for structure pattern regex search (default: 5)")
    p_em.add_argument("--regex-spacer-max",
                   dest="regex_spacer_max",
                   type=int,
                   metavar='int',
                   default=200,
                   help="Maximum spacer length for structure pattern regex search (default: 200)")
    p_em.add_argument("--bed-score-col",
                   dest="bed_score_col",
                   type=int,
                   metavar='int',
                   default=5,
                   help="--in BED score column used for p-value calculations. BED score can be e.g. log2 fold change or -log10 p-value of the region (default: 5)")
    p_em.add_argument("--bed-sc-thr",
                   dest="bed_sc_thr",
                   type = float,
                   metavar='float',
                   default = None,
                   help = "Minimum site score (by default: --in BED column 5, or set via --bed-score-col) for filtering (assuming higher score == better site) (default: None)")
    p_em.add_argument("--bed-sc-thr-rev",
                   dest="bed_sc_thr_rev_filter",
                   default = False,
                   action = "store_true",
                   help = "Reverse --bed-sc-thr filtering (i.e. the lower the better, e.g. if score column contains p-values) (default: False)")
    # Background set options.
    p_em.add_argument("--bg-mode",
                   dest="bg_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Define how to generate the background regions dataset 1: depending on type of --in sites (transcript, genomic), sample random regions with same length distribution from transcript or gene regions (based on given --gtf), 2: shuffle --in site sequences (di-nucleotide shuffling) and use these as background (default: 1)")
    p_em.add_argument("--bg-min-size",
                   dest="bg_min_size",
                   type=int,
                   metavar='int',
                   default=5000,
                   help="Minimum size of background set to be used for calculating motif enrichment. If size <= --in set size, use --in set size. If size > --in set size, double the --in set until it is <= size. Only applies for --bg-mode 1. For --bg-mode 2, you can use --bg-shuff-factor (default: 5000)")
    p_em.add_argument("--bg-mask-bed",
                   dest="bg_mask_bed",
                   type=str,
                   metavar='str',
                   help = "Additional BED regions file (6-column format) for masking regions (i.e. no background sites should be extracted from --bg-mask-bed regions)")
    p_em.add_argument("--bg-mask-blacklist",
                   dest="bg_mask_blacklist",
                   default = False,
                   action = "store_true",
                   help="Add ENCODE blacklist regions (hg38) to excluded regions set, i.e. do not sample from these blacklisted regions (default: False)")
    p_em.add_argument("--bg-incl-bed",
                   dest="bg_user_incl_bed",
                   type=str,
                   metavar='str',
                   help = "Supply BED regions file (6-column format) to define from which regions to extract background sites. Make sure that regions are compatible with --in sites (genomic or transcript). By default, representative transcript regions for all genes from --gtf are used for this. Note that if extraction of needed number of background sites from --bg-incl-bed fails, all transcripts will be used in second try (or define which to use with --tr-list). Also note that --ada-sampling only applies to interally generated list of regions")
    p_em.add_argument("--bg-ada-sampling",
                   dest="bg_adaptive_sampling",
                   default = False,
                   action = "store_true",
                   help="If --bg-mode 1 and input sites genomic, instead of random background sampling, sample adjusted to intron ratio of input sites (default: False)")
    p_em.add_argument("--bg-shuff-factor",
                   dest="bg_shuff_factor",
                   type=int,
                   metavar='int',
                   default=1,
                   help="Define number of times the size of the shuffled set (--bg-mode 2 set) should be compared to --in set (default: 1)")
    p_em.add_argument("--bg-shuff-k",
                   dest="bg_shuff_k",
                   type=int,
                   metavar='int',
                   default=2,
                   help="Define k for k-nt shuffling --in set to create background set (default: 2)")
    p_em.add_argument("--random-seed",
                   dest="random_seed",
                   type=int,
                   metavar='int',
                   default=None,
                   help="Set a fixed random seed number (e.g. --random-seed 1) to obtain reproducible sampling results (default: None)")
    p_em.add_argument("--tr-list",
                   dest="tr_list",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Supply file with transcript IDs (one ID per row) to define which transcripts to use from --gtf to extract background sites")
    p_em.add_argument("--gtf-feat-min-overlap",
                   dest="gtf_feat_min_overlap",
                   type=float,
                   metavar='float',
                   default=0.1,
                   help="Minimum amount of overlap required for a region to be assigned to a GTF feature (default: 0.1)")
    p_em.add_argument("--fimo-ntf-file",
                   dest="fimo_user_ntf_file",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide FIMO nucleotide frequencies (FIMO option: --bfile) file (default: use internal frequencies file, define which with --fimo-ntf-mode)")
    p_em.add_argument("--fimo-ntf-mode",
                   dest="fimo_ntf_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Set which internal nucleotide frequencies to use for FIMO search. 1: use frequencies from human ENSEMBL transcripts (excluding introns, A most prominent) 2: use frequencies from human ENSEMBL transcripts (including introns, resulting in lower G+C and T most prominent) 3: use uniform frequencies (same for every nucleotide) (default: 1)")
    p_em.add_argument("--fimo-pval",
                   dest="fimo_pval",
                   type=float,
                   metavar='float',
                   default=0.0005,
                   help="FIMO p-value threshold (FIMO option: --thresh) (default: 0.0005)")
    p_em.add_argument("--cmsearch-bs",
                   dest="cmsearch_bs",
                   type=float,
                   metavar='float',
                   default=1.0,
                   help="CMSEARCH bit score threshold (CMSEARCH options: -T --incT). The higher the more strict (default: 1.0)")
    p_em.add_argument("--cmsearch-mode",
                   dest="cmsearch_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Set CMSEARCH mode to control strictness of filtering. 1: default setting (CMSEARCH option: --default). 2: max setting (CMSEARCH option: --max), i.e., turn all heuristic filters off, slower and more sensitive / more hits) (default: 1)")
    p_em.add_argument("--greatest-hits",
                   dest="greatest_hits",
                   default = False,
                   action = "store_true",
                   help = "Keep only best FIMO/CMSEARCH motif hits (i.e., hit with lowest p-value / highest bit score for each motif sequence/site combination). By default, report all hits (default: False)")
    p_em.add_argument("--fisher-mode",
                   dest="fisher_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Defines Fisher exact test alternative hypothesis for testing motif enrichment in --in sites compared to generated background / control sites. 1: greater, 2: two-sided, 3: less. Setting is used both for motif enrichment and co-occurrence Fisher test (default: 1)")
    p_em.add_argument("--enmo-pval-thr",
                   dest="enmo_pval_thr",
                   type=float,
                   metavar='float',
                   default=0.001,
                   help="P-value threshold for reporting significantly enriched motifs. NOTE that if --enmo-pval-mode Bonferroni is selected, this threshold gets further adjusted by Bonferroni correction (i.e. divided by number of tests). Threshold applies unchanged for BH corrected p-values as well as for disabled correction (default: 0.001)")
    p_em.add_argument("--enmo-pval-mode",
                   dest="enmo_pval_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Defines multiple testing correction mode for motif enrichment p-values. 1: Benjamini-Hochberg (BH), 2: Bonferroni, 3: no correction (default: 1)")
    # Motif co-occurrence stats.
    p_em.add_argument("--cooc-pval-thr",
                   dest="cooc_pval_thr",
                   type=float,
                   metavar='float',
                   default=0.005,
                   help="Motif co-occurrence p-value threshold for reporting significant motif co-occurrences. NOTE that if --cooc-pval-mode Bonferroni is selected, this threshold gets further adjusted by Bonferroni correction (i.e. divided by number of tests). Threshold applies unchanged for BH corrected p-values as well as for disabled correction (default: 0.005)")
    p_em.add_argument("--cooc-pval-mode",
                   dest="cooc_pval_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Defines multiple testing correction mode for co-occurrence p-values. 1: Benjamini-Hochberg (BH), 2: Bonferroni, 3: no correction (default: 1)")
    p_em.add_argument("--min-motif-dist",
                   dest="min_motif_dist",
                   type=int,
                   metavar='int',
                   default=0,
                   help="Set minimum mean motif distance for motif pair to be reported significant in motif co-occurrence heatmap plot. By default (value 0), all motif pairs <= set p-value are reported significant. So setting --min-motif-dist >= 0 acts as a second filter to show only motif pairs with signficiant p-values as significant if there is the specified minimum average distance between their motif hits (default: 0)")
    p_em.add_argument("--max-motif-dist",
                   dest="max_motif_dist",
                   type=int,
                   metavar='int',
                   default=20,
                   help="Set maximum motif distance for motif co-occurrence plot statistic inside hover boxes (default: 20)")
    p_em.add_argument("--motif-sim-thr",
                   dest="motif_sim_thr",
                   type=float,
                   metavar='float',
                   default=None,
                   help="Set motif pair similarity threshold for to filter out significantly co-occurring motifs that are similar to each other. Similarity between motifs is measured in -log10(TOMTOM p-value), so the larger the pair similarity value of two motifs, the more similar they are. E.g., --motif-sim-thr 2 corresponds to TOMTOM p-value of 0.01, to filter out motif pairs > 2 similarity (default: None)")
    p_em.add_argument("--motif-sim-cap",
                   dest="motif_sim_cap",
                   type=float,
                   metavar='float',
                   default=50,
                   help="Cap maximum motif similarity value to given value (default: 50)")
    p_em.add_argument("--motif-sim-norm",
                   dest="motif_sim_norm",
                   default = False,
                   action = "store_true",
                   help="Normalize motif similarities (min-max normalization) for PCA plot (default: False)")
    p_em.add_argument("--plot-abs-paths",
                   dest="plot_abs_paths",
                   default = False,
                   action = "store_true",
                   help = "Store plot files with absolute paths in HTML files. Default is relative paths (default: False)")
    p_em.add_argument("--plot-pdf",
                   dest="plot_pdf",
                   default = False,
                   action = "store_true",
                   help = "Also output .png plots as .pdf in plotting subfolder (default: False)")
    p_em.add_argument("--meme-no-check",
                   dest="meme_disable_check",
                   default = False,
                   action = "store_true",
                   help = "Disable MEME version check. Make sure --meme-no-pgc is set if MEME version >= 5.5.4 is installed! (default: False)")
    p_em.add_argument("--meme-no-pgc",
                   dest="meme_no_pgc",
                   default = False,
                   action = "store_true",
                   help = "Manually set MEME's FIMO --no-pgc option (required for MEME version >= 5.5.4). Make sure that MEME >= 5.5.4 is installed! (default: False)")
    p_em.add_argument("--plotly-js-mode",
                   dest="plotly_js_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3, 4, 5, 6, 7],
                   help = "Define how to provide plotly .js file. 1: use online version via \"cdn\" (requires internet connection). 2: link to packaged plotly .js file. 3: copy plotly .js file to plots output folder. 4: include plotly .js code in plotly HTML. 5: put web version link and plotly plot codes into main HTML. 6: put local version link and plotly plot codes in main HTML. 7: put plotly js and plotly plot codes into main HTML! (default: 1)")
    p_em.add_argument("--sort-js-mode",
                   dest="sort_js_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help = "Define how to provide sorttable.js file. 1: link to packaged .js file. 2: copy .js file to plots output folder. 3: include .js code in HTML (default: 1)")

    """
    Check for neighboring motifs mode.

    """
    p_nm = subparsers.add_parser('nemo',
                                help='Check for neighboring motifs in input sites')
    p_nm.set_defaults(which='nemo')
    # Add required arguments group.
    p_nmm = p_nm.add_argument_group("required arguments")
    # Required arguments for nemo.
    p_nmm.add_argument("--in",
                   dest="in_sites",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Genomic or transcript motif sites file in BED format. Use these motif sites as centers to check for significant motifs in proxmimity, i.e., motifs found in context but of motif sites but not overlapping with them (set context size via --ext). Significance is checked compared to generated background regions (see --bg-mode etc.)")
    p_nmm.add_argument("--rbps",
                   dest="list_rbps",
                   type=str,
                   metavar='str',
                   nargs='+',
                   required=True,
                   help="List of RBP names to define RBP motifs used for search (--rbps rbp1 rbp2 .. ). To search with all available motifs, use --rbps ALL. NOTE: to search with user-provided motifs, set --rbps USER and provide --user-meme-xml and/or --user-cm. To search only for --regex, set --rbps REGEX")
    p_nmm.add_argument("--gtf",
                   dest="in_gtf",
                   type=str,
                   metavar='str',
                   required = True,
                   help = "Input GTF file with genomic region annotations. Used to extract gene or transcript background regions, taking the most prominent transcript regions. Note that only features on standard chromosomes (1,2,..,X,Y,MT) are currently used for annotation")
    p_nmm.add_argument("--genome",
                   dest="in_genome",
                   type=str,
                   metavar='str',
                   required = True,
                   help = "Genomic sequences file (currently supported formats: FASTA)")
    p_nmm.add_argument("--out",
                   dest="out_folder",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Results output folder")
    # Optional arguments for nemo.
    p_nm.add_argument("--user-rbp-id",
                   dest="user_rbp_id",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide RBP ID belonging to provided sequence or structure motif(s) (mandatory for --rbps USER)")
    p_nm.add_argument("--user-meme-xml",
                   dest="user_meme_xml",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide MEME/DREME XML file containing sequence motif(s) to be used for the run (needs --rbps USER)")
    p_nm.add_argument("--user-cm",
                   dest="user_cm",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide covariance model (.cm) file containing covariance model(s) to be used for the run (needs --rbps USER)")
    p_nm.add_argument("--ext",
                   dest="ext_up_down",
                   type=str,
                   metavar='str',
                   default="30",
                   help="Up- and downstream extension of --in motif sites in nucleotides (nt). Set e.g. --ext 30 for 30 nt on both sides, or --ext 60,0 for only looking at upstream context (default: 30)")
    p_nm.add_argument("--motif-db",
                   dest="motif_db",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Built-in motif database to use. 1: human RBP motifs (257 RBPs, 599 motifs, \"catrapid_omics_v2.1_human_6plus_ext\"), 2: human RBP motifs + 23 ucRBP motifs (277 RBPs, 622 motifs, \"catrapid_omics_v2.1_human_6plus_ext_ucrbps\"), 3: human RBP motifs from Ray et al. 2013 (80 RBPs, 102 motifs, \"ray2013_human_rbps_rnacompete\") (default: 1)")
    p_nm.add_argument("--custom-db",
                   dest="custom_db",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database folder. Alternatively, provide single files via --custom-db-meme-xml, --custom-db-cm, --custom-db-info")
    p_nm.add_argument("--custom-db-id",
                   dest="custom_db_id",
                   type=str,
                   metavar='str',
                   default = "custom",
                   help = "Set ID/name for provided custom motif database via --custom-db  (default: \"custom\")")
    p_nm.add_argument("--custom-db-meme-xml",
                   dest="custom_db_meme_xml",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database MEME/DREME XML file containing sequence motifs")
    p_nm.add_argument("--custom-db-cm",
                   dest="custom_db_cm",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database covariance model (.cm) file containing covariance model(s)")
    p_nm.add_argument("--custom-db-info",
                   dest="custom_db_info",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database info table file containing RBP ID -> motif ID -> motif type assignments")
    p_nm.add_argument("--motifs",
                   dest="motifs_list",
                   type=str,
                   metavar='str',
                   nargs='+',
                   default=False,
                   help="Provide IDs for motifs of interest (need to be in database and loaded). All other RBP motifs will be discarded (except --regex)")
    p_nm.add_argument("--motif-min-len",
                   dest="motif_min_len",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Minimum MEME/DREME motif length to include in search. By default all selected RBP motifs are used")
    p_nm.add_argument("--motif-max-len",
                   dest="motif_max_len",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Maximum MEME/DREME motif length to include in search. By default all selected RBP motifs are used")
    p_nm.add_argument("--functions",
                   dest="rbp_functions",
                   type=str,
                   metavar='str',
                   nargs='+',
                   default=False,
                   help="Filter defined RBPs (via --rbps) by their molecular functions (annotations available for most database RBPs). E.g. for RBPs in splicing regulation, set --functions SR, for RBPs in RNA stability & decay plus translation regulation, set --functions RSD TR (see rbpbench info for full function descriptions). NOTE that --regex is not affected by filtering")
    p_nm.add_argument("--regex",
                   dest="regex",
                   type=str,
                   metavar='str',
                   default = False,
                   help="Define regular expression (regex) DNA motif to include in search, e.g. --regex AAACC, --regex 'C[ACGT]AC[AC]', .. IUPAC code is also supported, e.g. AAARN resolves to AAA[AG][ACGT]. Alternatively, supply structure pattern, e.g. AA((((ARA))))AA or CC(((A...R)))CC with variable spacer")
    p_nm.add_argument("--regex-id",
                   dest="regex_id",
                   type=str,
                   metavar='str',
                   default = "regex",
                   help="Set regex ID used as RBP ID and database ID associated to -regex hits (default: \"regex\")")
    p_nm.add_argument("--motif-regex-id",
                   dest="motif_regex_id",
                   default = False,
                   action = "store_true",
                   help="Use --regex-id for motif ID as well. By default, regular expression string is used as motif ID for regex motif hits")
    p_nm.add_argument("--regex-type",
                   dest="regex_type",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Set type of supplied --regex string 1: auto-detect type (standard regex or structure pattern). 2: given --regex string is standard regex, e.g. AC[AG]T. 3: given --regex string is structure pattern string, e.g. ((AA(((...)))AA)) (default: 1)")
    p_nm.add_argument("--regex-search-mode",
                   dest="regex_search_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Define regex search mode. 1: when motif hit encountered, continue +1 after motif hit start position, 2: when motif hit encountered, continue +1 after motif hit end position. NOTE that structure pattern regex currently always uses mode 1 (default: 1)")
    p_nm.add_argument("--regex-min-gc",
                    dest="regex_min_gc",
                    type=float,
                    metavar='float',
                    default=0.0,
                    help="Minimum GC base pair fraction to report structure pattern regex hits (default: 0.0)")
    p_nm.add_argument("--regex-max-gu",
                    dest="regex_max_gu",
                    type=float,
                    metavar='float',
                    default=1.0,
                    help="Maximum GU (GT) base pair fraction to report structure pattern regex hits (default: 1.0)")
    p_nm.add_argument("--regex-spacer-min",
                   dest="regex_spacer_min",
                   type=int,
                   metavar='int',
                   default=5,
                   help="Minimum spacer length for structure pattern regex search (default: 5)")
    p_nm.add_argument("--regex-spacer-max",
                   dest="regex_spacer_max",
                   type=int,
                   metavar='int',
                   default=200,
                   help="Maximum spacer length for structure pattern regex search (default: 200)")
    p_nm.add_argument("--bed-score-col",
                   dest="bed_score_col",
                   type=int,
                   metavar='int',
                   default=5,
                   help="--in BED score column used for p-value calculations. BED score can be e.g. log2 fold change or -log10 p-value of the region (default: 5)")
    p_nm.add_argument("--bed-sc-thr",
                   dest="bed_sc_thr",
                   type = float,
                   metavar='float',
                   default = None,
                   help = "Minimum site score (by default: --in BED column 5, or set via --bed-score-col) for filtering (assuming higher score == better site) (default: None)")
    p_nm.add_argument("--bed-sc-thr-rev",
                   dest="bed_sc_thr_rev_filter",
                   default = False,
                   action = "store_true",
                   help = "Reverse --bed-sc-thr filtering (i.e. the lower the better, e.g. if score column contains p-values) (default: False)")
    # Background set options.
    p_nm.add_argument("--bg-mode",
                   dest="bg_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Define how to generate the background regions dataset 1: depending on type of --in sites (transcript, genomic), sample random regions with same length distribution (after applying --ext to input sites) from transcript or gene regions (based on given --gtf), 2: shuffle --in site sequences (di-nucleotide shuffling) and use these as background (default: 1)")
    p_nm.add_argument("--bg-min-size",
                   dest="bg_min_size",
                   type=int,
                   metavar='int',
                   default=5000,
                   help="Minimum size of background set to be used for calculating motif enrichment. If size <= --in set size, use --in set size. If size > --in set size, double the --in set until it is <= size. Only applies for --bg-mode 1. For --bg-mode 2, you can use --bg-shuff-factor (default: 5000)")
    p_nm.add_argument("--bg-mask-bed",
                   dest="bg_mask_bed",
                   type=str,
                   metavar='str',
                   help = "Additional BED regions file (6-column format) for masking regions (i.e. no background sites should be extracted from --bg-mask-bed regions)")
    p_nm.add_argument("--bg-mask-blacklist",
                   dest="bg_mask_blacklist",
                   default = False,
                   action = "store_true",
                   help="Add ENCODE blacklist regions (hg38) to excluded regions set, i.e. do not sample from these blacklisted regions (default: False)")
    p_nm.add_argument("--bg-incl-bed",
                   dest="bg_user_incl_bed",
                   type=str,
                   metavar='str',
                   help = "Supply BED regions file (6-column format) to define from which regions to extract background sites. Make sure that regions are compatible with --in sites (genomic or transcript). By default, representative transcript regions for all genes from --gtf are used for this. Note that if extraction of needed number of background sites from --bg-incl-bed fails, all transcripts will be used in second try (or define which to use with --tr-list). Also note that --ada-sampling only applies to interally generated list of regions")
    p_nm.add_argument("--bg-ada-sampling",
                   dest="bg_adaptive_sampling",
                   default = False,
                   action = "store_true",
                   help="If --bg-mode 1 and input sites genomic, instead of random background sampling, sample adjusted to intron ratio of input sites (default: False)")

    p_nm.add_argument("--bg-shuff-factor",
                   dest="bg_shuff_factor",
                   type=int,
                   metavar='int',
                   default=1,
                   help="Define number of times the size of the shuffled set (--bg-mode 2 set) should be compared to --in set (default: 1)")
    p_nm.add_argument("--bg-shuff-k",
                   dest="bg_shuff_k",
                   type=int,
                   metavar='int',
                   default=2,
                   help="Define k for k-nt shuffling --in set to create background set (default: 2)")
    p_nm.add_argument("--random-seed",
                   dest="random_seed",
                   type=int,
                   metavar='int',
                   default=None,
                   help="Set a fixed random seed number (e.g. --random-seed 1) to obtain reproducible sampling results (default: None)")
    p_nm.add_argument("--tr-list",
                   dest="tr_list",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Supply file with transcript IDs (one ID per row) to define which transcripts to use from --gtf to extract control regions")
    p_nm.add_argument("--gtf-feat-min-overlap",
                   dest="gtf_feat_min_overlap",
                   type=float,
                   metavar='float',
                   default=0.1,
                   help="Minimum amount of overlap required for a region to be assigned to a GTF feature (default: 0.1)")
    p_nm.add_argument("--fimo-ntf-file",
                   dest="fimo_user_ntf_file",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide FIMO nucleotide frequencies (FIMO option: --bfile) file (default: use internal frequencies file, define which with --fimo-ntf-mode)")
    p_nm.add_argument("--fimo-ntf-mode",
                   dest="fimo_ntf_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Set which internal nucleotide frequencies to use for FIMO search. 1: use frequencies from human ENSEMBL transcripts (excluding introns, A most prominent) 2: use frequencies from human ENSEMBL transcripts (including introns, resulting in lower G+C and T most prominent) 3: use uniform frequencies (same for every nucleotide) (default: 1)")
    p_nm.add_argument("--fimo-pval",
                   dest="fimo_pval",
                   type=float,
                   metavar='float',
                   default=0.0005,
                   help="FIMO p-value threshold (FIMO option: --thresh) (default: 0.0005)")
    p_nm.add_argument("--cmsearch-bs",
                   dest="cmsearch_bs",
                   type=float,
                   metavar='float',
                   default=1.0,
                   help="CMSEARCH bit score threshold (CMSEARCH options: -T --incT). The higher the more strict (default: 1.0)")
    p_nm.add_argument("--cmsearch-mode",
                   dest="cmsearch_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Set CMSEARCH mode to control strictness of filtering. 1: default setting (CMSEARCH option: --default). 2: max setting (CMSEARCH option: --max), i.e., turn all heuristic filters off, slower and more sensitive / more hits) (default: 1)")
    p_nm.add_argument("--greatest-hits",
                   dest="greatest_hits",
                   default = False,
                   action = "store_true",
                   help = "Keep only best FIMO/CMSEARCH motif hits (i.e., hit with lowest p-value / highest bit score for each motif sequence/site combination). By default, report all hits (default: False)")
    p_nm.add_argument("--fisher-mode",
                   dest="fisher_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Defines Fisher exact test alternative hypothesis for testing motif enrichment in --in sites compared to generated background / control sites. 1: greater, 2: two-sided, 3: less. Setting is used both for motif enrichment and co-occurrence Fisher test (default: 1)")
    p_nm.add_argument("--nemo-pval-thr",
                   dest="nemo_pval_thr",
                   type=float,
                   metavar='float',
                   default=0.001,
                   help="P-value threshold for reporting significantly enriched motifs. NOTE that if --nemo-pval-mode Bonferroni is selected, this threshold gets further adjusted by Bonferroni correction (i.e. divided by number of tests). Threshold applies unchanged for BH corrected p-values as well as for disabled correction (default: 0.001)")
    p_nm.add_argument("--nemo-pval-mode",
                   dest="nemo_pval_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Defines multiple testing correction mode for motif enrichment p-values. 1: Benjamini-Hochberg (BH), 2: Bonferroni, 3: no correction (default: 1)")
    p_nm.add_argument("--allow-overlaps",
                   dest="allow_overlaps",
                   default = False,
                   action = "store_true",
                   help = "Allow overlaps of motif hit regions with provided --in sites. By default, motif hits overlapping with --in sites are filtered out (default: False)")
    # Motif co-occurrence stats.
    p_nm.add_argument("--cooc-pval-thr",
                   dest="cooc_pval_thr",
                   type=float,
                   metavar='float',
                   default=0.005,
                   help="Motif co-occurrence p-value threshold for reporting significant motif co-occurrences. NOTE that if --cooc-pval-mode Bonferroni is selected, this threshold gets further adjusted by Bonferroni correction (i.e. divided by number of tests). Threshold applies unchanged for BH corrected p-values as well as for disabled correction (default: 0.005)")
    p_nm.add_argument("--cooc-pval-mode",
                   dest="cooc_pval_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Defines multiple testing correction mode for co-occurrence p-values. 1: Benjamini-Hochberg (BH), 2: Bonferroni, 3: no correction (default: 1)")
    p_nm.add_argument("--min-motif-dist",
                   dest="min_motif_dist",
                   type=int,
                   metavar='int',
                   default=0,
                   help="Set minimum mean motif distance for motif pair to be reported significant in motif co-occurrence heatmap plot. By default (value 0), all motif pairs <= set p-value are reported significant. So setting --min-motif-dist >= 0 acts as a second filter to show only motif pairs with signficiant p-values as significant if there is the specified minimum average distance between their motif hits (default: 0)")
    p_nm.add_argument("--max-motif-dist",
                   dest="max_motif_dist",
                   type=int,
                   metavar='int',
                   default=20,
                   help="Set maximum motif distance for motif co-occurrence plot statistic inside hover boxes (default: 20)")
    p_nm.add_argument("--motif-sim-thr",
                   dest="motif_sim_thr",
                   type=float,
                   metavar='float',
                   default=None,
                   help="Set motif pair similarity threshold for to filter out significantly co-occurring motifs that are similar to each other. Similarity between motifs is measured in -log10(TOMTOM p-value), so the larger the pair similarity value of two motifs, the more similar they are. E.g., --motif-sim-thr 2 corresponds to TOMTOM p-value of 0.01, to filter out motif pairs > 2 similarity (default: None)")
    p_nm.add_argument("--motif-sim-cap",
                   dest="motif_sim_cap",
                   type=float,
                   metavar='float',
                   default=50,
                   help="Cap maximum motif similarity value to given value (default: 50)")
    p_nm.add_argument("--motif-sim-norm",
                   dest="motif_sim_norm",
                   default = False,
                   action = "store_true",
                   help="Normalize motif similarities (min-max normalization) for PCA plot (default: False)")
    p_nm.add_argument("--plot-abs-paths",
                   dest="plot_abs_paths",
                   default = False,
                   action = "store_true",
                   help = "Store plot files with absolute paths in HTML files. Default is relative paths (default: False)")
    p_nm.add_argument("--plot-pdf",
                   dest="plot_pdf",
                   default = False,
                   action = "store_true",
                   help = "Also output .png plots as .pdf in plotting subfolder (default: False)")
    p_nm.add_argument("--meme-no-check",
                   dest="meme_disable_check",
                   default = False,
                   action = "store_true",
                   help = "Disable MEME version check. Make sure --meme-no-pgc is set if MEME version >= 5.5.4 is installed! (default: False)")
    p_nm.add_argument("--meme-no-pgc",
                   dest="meme_no_pgc",
                   default = False,
                   action = "store_true",
                   help = "Manually set MEME's FIMO --no-pgc option (required for MEME version >= 5.5.4). Make sure that MEME >= 5.5.4 is installed! (default: False)")
    p_nm.add_argument("--plotly-js-mode",
                   dest="plotly_js_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3, 4, 5, 6, 7],
                   help = "Define how to provide plotly .js file. 1: use online version via \"cdn\" (requires internet connection). 2: link to packaged plotly .js file. 3: copy plotly .js file to plots output folder. 4: include plotly .js code in plotly HTML. 5: put web version link and plotly plot codes into main HTML. 6: put local version link and plotly plot codes in main HTML. 7: put plotly js and plotly plot codes into main HTML! (default: 1)")
    p_nm.add_argument("--sort-js-mode",
                   dest="sort_js_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help = "Define how to provide sorttable.js file. 1: link to packaged .js file. 2: copy .js file to plots output folder. 3: include .js code in HTML (default: 1)")

    """
    Compare phylogenomic conservation between two sets of genomic sites.

    """
    p_cn = subparsers.add_parser('con',
                                help='Compare conservation in genomic sites')
    p_cn.set_defaults(which='con')
    # Add required arguments group.
    p_cnm = p_cn.add_argument_group("required arguments")
    # Required arguments for con.
    p_cnm.add_argument("--in",
                   dest="in_bed",
                   type=str,
                   metavar='str',
                   required = True,
                   help = "Genomic sites of interest in BED format")
    p_cnm.add_argument("--ctrl-in",
                   dest="in_control_bed",
                   type=str,
                   metavar='str',
                   required=True,
                   help = "Genomic control sites to compare to --in genomic sites regarding conservation scores in BED format")
    p_cnm.add_argument("--out",
                   dest="out_folder",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Results output folder")
    # Optional arguments for con.
    p_cn.add_argument("--phastcons",
                   dest="pc_bw",
                   type=str,
                   metavar='str',
                   help = "Genomic .bigWig file with phastCons conservation scores")
    p_cn.add_argument("--phylop",
                   dest="pp_bw",
                   type=str,
                   metavar='str',
                   help = "Genomic .bigWig file with phyloP conservation scores")
    p_cn.add_argument("--use-regions",
                   dest="use_regions",
                   default = False,
                   action = "store_true",
                   help="Use genomic regions as --in / --ctrl-in input site IDs instead of BED col4 IDs (default: False)")
    p_cn.add_argument("--no-id-check",
                   dest="no_id_check",
                   default = False,
                   action = "store_true",
                   help="Do not check region IDs, instead overwriting existing regions if they have identical IDs (default: False)")
    p_cn.add_argument("--wrs-mode",
                   dest="wrs_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Defines Wilcoxon rank-sum test alternative hypothesis for testing whether --in sites have significantly different average conservation scores compared to --ctrl-in sites. 1: test for higher (greater) scores, 2: test for lower (less) scores (default: 1)")
    # Report / plot options.
    p_cn.add_argument("--plot-abs-paths",
                   dest="plot_abs_paths",
                   default = False,
                   action = "store_true",
                   help = "Store plot files with absolute paths in HTML files. Default is relative paths (default: False)")
    p_cn.add_argument("--plot-pdf",
                   dest="plot_pdf",
                   default = False,
                   action = "store_true",
                   help = "Also output .png plots as .pdf in plotting subfolder (default: False)")
    p_cn.add_argument("--sort-js-mode",
                   dest="sort_js_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help = "Define how to provide sorttable.js file. 1: link to packaged .js file. 2: copy .js file to output folder. 3: include .js code in HTML (default: 1)")

    """
    Check for sponge transcript sequences.

    """
    p_spn = subparsers.add_parser('sponge',
                                help='Check for sponge transcripts')
    p_spn.set_defaults(which='sponge')
    # Add required arguments group.
    p_spnm = p_spn.add_argument_group("required arguments")
    # Required arguments for sponge.
    p_spnm.add_argument("--regex",
                   dest="regex",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Specify regular expression (regex) DNA motif for which to look for sponge transcripts, e.g. --regex AAACCC, --regex 'C[ACGT]AC[AC]' .. IUPAC code is also supported, e.g. AAARN resol. Alternatively, supply structure pattern, e.g. AA((((ARA))))AA or CC(((A...R)))CC with variable spacer")
    p_spnm.add_argument("--out",
                   dest="out_folder",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Results output folder")
    # Optional arguments for sponge.
    p_spn.add_argument("--fasta",
                   dest="in_fasta",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Input FASTA file with transcript sequences to check for --regex matches, to identifiy sponge transcripts (i.e. sequences with high amounts of regex hits per kilo base (kb) length). Note that sequence IDs have to be unique. Also note that either --fasta or --gtf + --genome need to be supplied, in order to get transcript sequences for sponge testing")
    p_spn.add_argument("--gtf",
                   dest="in_gtf",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Input GTF file with genomic region annotations. Used to extract transcript sequences to check for sponge effects. Note that only features on standard chromosomes (1,2,..,X,Y,MT) are currently used for annotation")
    p_spn.add_argument("--genome",
                   dest="in_genome",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Genomic sequences file (currently supported formats: FASTA)")
    p_spn.add_argument("--select-mode",
                   dest="select_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Define what to extract from GTF file, i.e., for which transcripts or parts of transcripts to extract sequences to use for sponge search. 1: use full transcripts from all genes (selecting one representative for each gene). 2: use only mRNA transcripts. 3: use only 3'UTR parts of mRNA transcripts (default: 1)")
    p_spn.add_argument("--tr-list",
                   dest="tr_list",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Supply file with transcript IDs (one ID per row) to define which transcripts to extract from --gtf (overrides representative transcript selection, might not be compatible with --select-mode 2 or 3)")
    p_spn.add_argument("--allow-overlaps",
                   dest="allow_overlaps",
                   default = False,
                   action = "store_true",
                   help = "Allow overlapping regex hits. By default, search continues +1 after regex hit end position (i.e., not overlapping). NOTE that if --regex is structure pattern, search is currently always overlapping (default: False)")
    p_spn.add_argument("--min-spacer-len",
                   dest="min_spacer_len",
                   type=int,
                   metavar='int',
                   default=0,
                   help="Minimum spacer length between regex hits. By default 0, i.e., hits can also be adjacent. Note that setting --min-spacer to > 0 also sets --allow-overlaps (default: 0)")
    p_spn.add_argument("--min-seq-len",
                   dest="min_seq_len",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Minimum sequence length required for input transcript sequences to be included in search (default: False)")
    p_spn.add_argument("--min-hit-count",
                   dest="min_hit_count",
                   type=int,
                   metavar='int',
                   default=0,
                   help="Minimum regex hit count for a transcript to be included in percentile calculation and output table (default: 0)")
    p_spn.add_argument("--regex-min-gc",
                    dest="regex_min_gc",
                    type=float,
                    metavar='float',
                    default=0.0,
                    help="Minimum GC base pair fraction to report structure pattern regex hits (default: 0.0)")
    p_spn.add_argument("--regex-max-gu",
                    dest="regex_max_gu",
                    type=float,
                    metavar='float',
                    default=1.0,
                    help="Maximum GU (GT) base pair fraction to report structure pattern regex hits (default: 1.0)")
    p_spn.add_argument("--regex-spacer-min",
                   dest="regex_spacer_min",
                   type=int,
                   metavar='int',
                   default=5,
                   help="Minimum spacer length for structure pattern regex search (default: 5)")
    p_spn.add_argument("--regex-spacer-max",
                   dest="regex_spacer_max",
                   type=int,
                   metavar='int',
                   default=200,
                   help="Maximum spacer length for structure pattern regex search (default: 200)")
    p_spn.add_argument("--chr-id-style",
                   dest="chr_id_style",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Define to which chromosome ID style to convert chromosome IDs to. 1: do not change chromosome IDs. 2: convert to chr1,chr2,...,chrM style. 3: convert to 1,2,...,MT style (default: 1)")

    """
    Compare regex hit occurrences between transcript isoforms.

    """
    p_iso = subparsers.add_parser('isocomp',
                                help='Compare motif hits on transcript isoforms')
    p_iso.set_defaults(which='isocomp')
    # Add required arguments group.
    p_isom = p_iso.add_argument_group("required arguments")
    # Required arguments for isocomp.
    p_isom.add_argument("--regex",
                   dest="regex",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Specify regular expression (regex) DNA motif for which to check for differences in hit occurrences between transcript isoforms, e.g. --regex AAACCC, --regex 'C[ACGT]AC[AC]' .. IUPAC code is also supported, e.g. AAARN resolves to AAA[AG][ACGT]. Alternatively, supply structure pattern, e.g. AA((((ARA))))AA or CC(((A...R)))CC with variable spacer")
    p_isom.add_argument("--out",
                   dest="out_folder",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Results output folder")
    # Optional arguments for isocomp.
    p_iso.add_argument("--fasta",
                   dest="in_fasta",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Input FASTA file with transcript sequences to check for --regex matches. Note that sequence IDs have to be unique, and in format >transcript_id,gene_id for isoform assignment. Also note that either --fasta or --gtf + --genome need to be supplied")
    p_iso.add_argument("--gtf",
                   dest="in_gtf",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Input GTF file with genomic region annotations. Used to extract transcript sequences for isoform comparisons. NOTE that by default only 3'UTR sequences are compared / used for motif search (change via --select-mode). Also note that only features on standard chromosomes (1,2,..,X,Y,MT) are currently used for annotation")
    p_iso.add_argument("--genome",
                   dest="in_genome",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Genomic sequences file (currently supported formats: FASTA)")
    p_iso.add_argument("--select-mode",
                   dest="select_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3, 4, 5],
                   help="Define what to extract from GTF file, i.e., for which transcripts or parts of transcripts to extract sequences to use for motif search and isoform comparison. 1: use only 3'UTR parts of mRNA transcripts. 2: use only 5'UTR parts of mRNA transcripts. 3: use full mRNA transcripts (i.e., all mRNA transcripts from GTF). 4: use full non-coding transcripts (i.e., no transcripts from protein-coding genes). 5: use full transcripts, coding AND non-coding (i.e., all transcripts from GTF!) (default: 1)")
    p_iso.add_argument("--allow-overlaps",
                   dest="allow_overlaps",
                   default = False,
                   action = "store_true",
                   help = "Allow overlapping regex hits. By default, search continues +1 after regex hit end position (i.e., not overlapping). NOTE that if --regex is structure pattern, search is currently always overlapping (default: False)")
    p_iso.add_argument("--min-spacer-len",
                   dest="min_spacer_len",
                   type=int,
                   metavar='int',
                   default=0,
                   help="Minimum spacer length between regex hits. By default 0, i.e., hits can also be adjacent. Note that setting --min-spacer to > 0 also sets --allow-overlaps (default: 0)")
    p_iso.add_argument("--min-seq-len",
                   dest="min_seq_len",
                   type=int,
                   metavar='int',
                   default=False,
                   help="Minimum sequence length required for input transcript sequences to be included in search (default: False)")
    p_iso.add_argument("--regex-min-gc",
                    dest="regex_min_gc",
                    type=float,
                    metavar='float',
                    default=0.0,
                    help="Minimum GC base pair fraction to report structure pattern regex hits (default: 0.0)")
    p_iso.add_argument("--regex-max-gu",
                    dest="regex_max_gu",
                    type=float,
                    metavar='float',
                    default=1.0,
                    help="Maximum GU (GT) base pair fraction to report structure pattern regex hits (default: 1.0)")
    p_iso.add_argument("--regex-spacer-min",
                   dest="regex_spacer_min",
                   type=int,
                   metavar='int',
                   default=5,
                   help="Minimum spacer length for structure pattern regex search (default: 5)")
    p_iso.add_argument("--regex-spacer-max",
                   dest="regex_spacer_max",
                   type=int,
                   metavar='int',
                   default=200,
                   help="Maximum spacer length for structure pattern regex search (default: 200)")
    p_iso.add_argument("--chr-id-style",
                   dest="chr_id_style",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Define to which chromosome ID style to convert chromosome IDs to. 1: do not change chromosome IDs. 2: convert to chr1,chr2,...,chrM style. 3: convert to 1,2,...,MT style (default: 1)")

    """
    Run STREME on input sites for motif discovery.
    
    """
    p_st = subparsers.add_parser('streme',
                                help='Discover motifs in input sites using STREME')
    p_st.set_defaults(which='streme')
    # Add required arguments group.
    p_stm = p_st.add_argument_group("required arguments")
    # Required arguments for streme.
    p_stm.add_argument("--in",
                   dest="in_fa",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Provide primary (positive) sequences FASTA file (STREME option: --p)")
    p_stm.add_argument("--out",
                   dest="out_folder",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Results output folder")
    # STREME options.
    p_st.add_argument("--neg-in",
                   dest="in_neg_fa",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide control (negative) sequences FASTA file. By default, shuffled --in positive sequences are used as control sequences (STREME option: --n)")
    p_st.add_argument("--streme-bfile",
                   dest="streme_bfile",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide STREME nucleotide frequencies (STREME option: --bfile) file (default: use internal frequencies file, define which with --streme-ntf-mode)")
    p_st.add_argument("--streme-ntf-mode",
                   dest="streme_ntf_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Set which internal nucleotide frequencies to use for STREME. 1: use frequencies from human ENSEMBL transcripts (excluding introns, with A most prominent) 2: use frequencies from human ENSEMBL transcripts (including introns, resulting in lower G+C and T most prominent) 3: use uniform frequencies (same for every nucleotide) (default: 1)")
    p_st.add_argument("--streme-thresh",
                   dest="streme_thresh",
                   type=float,
                   metavar='float',
                   default=0.05,
                   help="STREME significance threshold (p-value) for reporting enriched motifs (STREME option --thresh) (default: 0.05)")
    p_st.add_argument("--streme-minw",
                   dest="streme_minw",
                   type=int,
                   metavar='int',
                   default=6,
                   help="Minimum width for motifs (must be >= 3) (STREME option: --minw) (default: 6)")
    p_st.add_argument("--streme-maxw",
                   dest="streme_maxw",
                   type=int,
                   metavar='int',
                   default=15,
                   help="Maximum width for motifs (must be <= 30) (STREME option: --maxw) (default: 15)")
    p_st.add_argument("--streme-seed",
                   dest="streme_seed",
                   type=int,
                   metavar='int',
                   default=0,
                   help="Random seed for shuffling sequences (STREME option: --seed) (default: 0)")
    p_st.add_argument("--streme-order",
                   dest="streme_order",
                   type=int,
                   metavar='int',
                   default=2,
                   help="Estimates an m-order background model for scoring sites and uses an m-order shuffle if creating control sequences from primary sequences. Default for RNA/DNA: 2 (STREME option: --order) (default: 2)")
    p_st.add_argument("--streme-evalue",
                   dest="streme_evalue",
                   default = False,
                   action = "store_true",
                   help = "Use E-value threshold instead of p-value (STREME option: --evalue) (default: False)")

    """
    Check if input motif is similar to database motifs using TOMTOM.

    """
    p_to = subparsers.add_parser('tomtom',
                                help='Compare motif(s) with database using TOMOTM')
    p_to.set_defaults(which='tomtom')
    # Add required arguments group.
    p_tom = p_to.add_argument_group("required arguments")
    # Required arguments for tomtom.
    p_tom.add_argument("--in",
                   dest="motif_in",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Provide MEME XML style file path or (regular expression) sequence to search for similar motifs in database. Currently only square bracket containing regexes are supported")
    p_tom.add_argument("--out",
                   dest="out_folder",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Results output folder")
    # Optional arguments.
    p_to.add_argument("--motif-db",
                   dest="motif_db",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Built-in motif database used so search for motifs similar to --in. 1: human RBP motifs (257 RBPs, 599 motifs, \"catrapid_omics_v2.1_human_6plus_ext\"), 2: human RBP motifs + 23 ucRBP motifs (277 RBPs, 622 motifs, \"catrapid_omics_v2.1_human_6plus_ext_ucrbps\"), 3: human RBP motifs from Ray et al. 2013 (80 RBPs, 102 motifs, \"ray2013_human_rbps_rnacompete\") (default: 1)")
    p_to.add_argument("--custom-db",
                   dest="custom_db_meme_xml",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database MEME/DREME XML file containing sequence motifs to search against. By default internal database is used, define which with --motif-db")
    # p_to.add_argument("--custom-db-id",
    #                dest="custom_db_id",
    #                type=str,
    #                metavar='str',
    #                default = "custom",
    #                help = "Set ID/name for provided custom motif database via --custom-db  (default: \"custom\")")
    p_to.add_argument("--regex-mode",
                   dest="regex_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help = "If --in is regex/sequence with format e.g. AC[AT]A, define whether to split regex into single motifs (ACAA, ACTA), or to make one motif out of it. 1: convert to one motif. 2: convert to multiple single motifs (default: 1)")
    p_to.add_argument("--fe-pval",
                   dest="fe_pval_thr",
                   type=float,
                   metavar='float',
                   default=0.5,
                   help="RBP function enrichment p-value threshold (default: 0.5)")
    p_to.add_argument("--fe-mode",
                   dest="fe_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help = "Define whether to calculate function enrichment on RBP or single motif level. 1) RBP level 2) single motif level (default: 1)")

    # TOMTOM options.
    p_to.add_argument("--tomtom-bfile",
                   dest="tomtom_bfile",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide TOMTOM nucleotide frequencies (TOMTOM option: -bfile) file (default: use internal frequencies file, define which with --tomtom-ntf-mode)")
    p_to.add_argument("--tomtom-ntf-mode",
                   dest="tomtom_ntf_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Set which internal nucleotide frequencies to use for TOMTOM. 1: use frequencies from human ENSEMBL transcripts (excluding introns, with A most prominent) 2: use frequencies from human ENSEMBL transcripts (including introns, resulting in lower G+C and T most prominent) 3: use uniform frequencies (same for every nucleotide) (default: 1)")
    p_to.add_argument("--tomtom-thresh",
                   dest="tomtom_thresh",
                   type=float,
                   metavar='float',
                   default=0.5,
                   help="TOMTOM q-value threshold (TOMTOM option: -thresh) (default: 0.5)")
    p_to.add_argument("--tomtom-evalue",
                   dest="tomtom_evalue",
                   default = False,
                   action = "store_true",
                   help = "Use E-value threshold instead of q-value (TOMTOM option: -evalue) (default: False)")
    p_to.add_argument("--tomtom-m",
                   dest="tomtom_m",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Use only query motifs with a specified ID, may be repeated (TOMTOM option: -m) file")
    p_to.add_argument("--tomtom-min-overlap",
                   dest="tomtom_min_overlap",
                   type=int,
                   metavar='int',
                   default=1,
                   help="Minimum overlap between query and target (TOMTOM option: -min-overlap) (default: 1)")

    """
    Run GO enrichment analysis on gene list.

    """
    p_goa = subparsers.add_parser('goa',
                                help='Run GO enrichment analysis on gene list')
    p_goa.set_defaults(which='goa')
    # Add required arguments group.
    p_goam = p_goa.add_argument_group("required arguments")
    # Required arguments for goa.
    p_goam.add_argument("--in",
                   dest="in_gene_list",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Supply file with gene IDs (one ID per row) to define which genes to use as target genes in GO enrichment analysis (GOA)")
    p_goam.add_argument("--gtf",
                   dest="in_gtf",
                   type=str,
                   metavar='str',
                   required = True,
                   help = "Input GTF file with genomic annotations to extract background genes used for GOA. Note that eventually only genes are used which are present in internal Ensembl gene ID -> GO ID(s) mapping or in provided mapping via --goa-gene2go-file")
    p_goam.add_argument("--out",
                   dest="out_folder",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Results output folder")
    # Optional arguments for goa.
    p_goa.add_argument("--goa-obo-mode",
                   dest="goa_obo_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help = "Define how to obtain GO DAG (directed acyclic graph) obo file. 1: download most recent file from internet,  2: use local file,  3: provide file via --goa-obo-file (default: 1)")
    p_goa.add_argument("--goa-obo-file",
                   dest="goa_obo_file",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide GO DAG obo file (default: False)")
    p_goa.add_argument("--goa-gene2go-file",
                   dest="goa_gene2go_file",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide gene ID to GO IDs mapping table (row format: gene_id<tab>go_id1,go_id2). By default, a local file with ENSEMBL gene IDs is used. NOTE that gene IDs need to be compatible with --gtf (default: False)")
    p_goa.add_argument("--goa-pval",
                   dest="goa_pval",
                   type=float,
                   metavar='float',
                   default=0.05,
                   help="GO enrichment analysis p-value threshold (applied on corrected p-value) (default: 0.05)")
    p_goa.add_argument("--goa-bg-gene-list",
                   dest="goa_bg_gene_list",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Supply file with gene IDs (one ID per row) to use as background gene list for GOA. NOTE that gene IDs need to be compatible with --gtf (default: False)")
    p_goa.add_argument("--goa-max-child",
                   dest="goa_max_child",
                   type=int,
                   metavar='int',
                   default=None,
                   help="Specify maximum number of children for a significant GO term to be reported in HTML table, e.g. --goa-max-child 100. This allows filtering out very broad terms (default: None)")
    p_goa.add_argument("--goa-min-depth",
                   dest="goa_min_depth",
                   type=int,
                   metavar='int',
                   default=None,
                   help="Specify minimum depth number for a significant GO term to be reported in HTML table, e.g. --goa-min-depth 5 (default: None)")
    p_goa.add_argument("--goa-filter-purified",
                   dest="goa_filter_purified",
                   default = False,
                   action = "store_true",
                   help = "Filter out GOA results labeled as purified (i.e., GO terms with significantly lower concentration) in HTML table (default: False)")
    p_goa.add_argument("--plot-abs-paths",
                   dest="plot_abs_paths",
                   default = False,
                   action = "store_true",
                   help = "Store plot files with absolute paths in HTML files. Default is relative paths (default: False)")
    p_goa.add_argument("--sort-js-mode",
                   dest="sort_js_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help = "Define how to provide sorttable.js file. 1: link to packaged .js file. 2: copy .js file to plots output folder. 3: include .js code in HTML (default: 1)")

    """
    Optimal extension mode.

    """
    p_oe = subparsers.add_parser('optex',
                                  help='Investigate optimal extension')
    p_oe.set_defaults(which='optex')
    # Add required arguments group.
    p_oem = p_oe.add_argument_group("required arguments")
    # Required arguments.
    p_oem.add_argument("--in",
                   dest="in_sites",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Genomic RBP binding sites (peak regions) file in BED format (also positives + negatives)")
    p_oem.add_argument("--rbp-id",
                   dest="rbp_id",
                   type=str,
                   metavar='str',
                   required=True,
                   help = "Provide RBP ID to define RBP motifs used for search")
    p_oem.add_argument("--genome",
                   dest="in_genome",
                   type=str,
                   metavar='str',
                   required = True,
                   help = "Genomic sequences file (currently supported formats: FASTA)")
    # Optional arguments.
    p_oe.add_argument("--user-meme-xml",
                   dest="user_meme_xml",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide MEME/DREME XML file containing sequence motif(s) to be used as search motifs")
    p_oe.add_argument("--user-cm",
                   dest="user_cm",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide covariance model (.cm) file containing covariance model(s) to be used as search motifs")
    p_oe.add_argument("--motif-db",
                   dest="motif_db",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Built-in motif database to use. 1: human RBP motifs (257 RBPs, 599 motifs, \"catrapid_omics_v2.1_human_6plus_ext\"), 2: human RBP motifs + 23 ucRBP motifs (277 RBPs, 622 motifs, \"catrapid_omics_v2.1_human_6plus_ext_ucrbps\"), 3: human RBP motifs from Ray et al. 2013 (80 RBPs, 102 motifs, \"ray2013_human_rbps_rnacompete\") (default: 1)")
    p_oe.add_argument("--fimo-ntf-file",
                   dest="fimo_user_ntf_file",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide FIMO nucleotide frequencies (FIMO option: --bfile) file (default: use internal frequencies file, define which with --fimo-ntf-mode)")
    p_oe.add_argument("--fimo-ntf-mode",
                   dest="fimo_ntf_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Set which internal nucleotide frequencies to use for FIMO search. 1: use frequencies from human ENSEMBL transcripts (excluding introns, A most prominent) 2: use frequencies from human ENSEMBL transcripts (including introns, resulting in lower G+C and T most prominent) 3: use uniform frequencies (same for every nucleotide) (default: 1)")
    p_oe.add_argument("--fimo-pval",
                   dest="fimo_pval",
                   type=float,
                   metavar='float',
                   default=0.0005,
                   help="FIMO p-value threshold (FIMO option: --thresh) (default: 0.0005)")
    p_oe.add_argument("--cmsearch-bs",
                   dest="cmsearch_bs",
                   type=float,
                   metavar='float',
                   default=1.0,
                   help="CMSEARCH bit score threshold (CMSEARCH options: -T --incT). The higher the more strict (default: 1.0)")
    p_oe.add_argument("--cmsearch-mode",
                   dest="cmsearch_mode",
                   type=int,
                   default=1,
                   choices=[1, 2],
                   help="Set CMSEARCH mode to control strictness of filtering. 1: default setting (CMSEARCH option: --default). 2: max setting (CMSEARCH option: --max), i.e., turn all heuristic filters off, slower and more sensitive / more hits) (default: 1)")
    p_oe.add_argument("--greatest-hits",
                   dest="greatest_hits",
                   default = False,
                   action = "store_true",
                   help = "Keep only best FIMO/CMSEARCH motif hits (i.e., hit with lowest p-value / highest bit score for each motif sequence/site combination). By default, report all hits (default: False)")
    p_oe.add_argument("--bed-score-col",
                   dest="bed_score_col",
                   type=int,
                   metavar='int',
                   default=5,
                   help="--in BED score column used for p-value calculations and finding optimal extension. BED score can be e.g. log2 fold change or -log10 p-value of the region (default: 5)")
    p_oe.add_argument("--bed-sc-thr",
                   dest="bed_sc_thr",
                   type = float,
                   metavar='float',
                   default = None,
                   help = "Minimum site score (by default: --in BED column 5, or set via --bed-score-col) for filtering (assuming higher score == better site) (default: None)")
    p_oe.add_argument("--bed-sc-thr-rf",
                   dest="bed_sc_thr_rev_filter",
                   default = False,
                   action = "store_true",
                   help = "Reverse --bed-sc-thr filtering (i.e. the lower the better, e.g. if score column contains p-values) (default: False)")
    p_oe.add_argument("--ext-pval",
                   dest="ext_pval",
                   type=float,
                   metavar='float',
                   default=0.001,
                   help="Longest extension p-value (default: 0.05)")
    p_oe.add_argument("--ext-list",
                   dest="ext_list",
                   type=int,
                   metavar='int',
                   nargs='+',
                   help="List of extensions to test (e.g. --ext-list 0 10 20 30 40 50). Internally, all combinations will be tested")
    p_oe.add_argument("--meme-no-check",
                   dest="meme_disable_check",
                   default = False,
                   action = "store_true",
                   help = "Disable MEME version check. Make sure --meme-no-pgc is set if MEME version >= 5.5.4 is installed! (default: False)")
    p_oe.add_argument("--meme-no-pgc",
                   dest="meme_no_pgc",
                   default = False,
                   action = "store_true",
                   help = "Manually set MEME's FIMO --no-pgc option (required for MEME version >= 5.5.4). Make sure that MEME >= 5.5.4 is installed! (default: False)")

    """
    Dist mode.

    """
    p_d = subparsers.add_parser('dist',
                                  help='Plot nt distribution at genomic positions')
    p_d.set_defaults(which='dist')
    # Add required arguments group.
    p_dm = p_d.add_argument_group("required arguments")
    # Required arguments.
    p_dm.add_argument("--in",
                   dest="in_sites",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Genomic RBP binding sites (peak regions) file in BED format (can be single positions or extended regions). Use --cp-mode to define zero position for plotting")
    p_dm.add_argument("--genome",
                   dest="in_genome",
                   type=str,
                   metavar='str',
                   required = True,
                   help = "Genomic sequences file (currently supported formats: FASTA)")
    p_dm.add_argument("--out",
                   dest="out_folder",
                   type=str,
                   metavar='str',
                   required=True,
                   help="Distribution plot results output folder")
    # Optional arguments.
    p_d.add_argument("--cp-mode",
                   dest="cp_mode",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Define which position of --in genomic sites to use as zero position for plotting. 1: upstream end position, 2: center position, 3: downstream end position (default: 1)")
    p_d.add_argument("--ext",
                   dest="ext_up_down",
                   type=int,
                   metavar='int',
                   default=10,
                   help="Up- and downstream extension of defined genomic positions (define via --cp-mode) in nucleotides (nt). Set e.g. --ext 20 for 20 nt on both sides (default: 10)")
    p_d.add_argument("--no-uniq-check",
                   dest="no_uniq_reg_check",
                   default = False,
                   action = "store_true",
                   help = "Disable checking for unique input regions and positions (defined by --cp-mode). By default, duplicate input regions are removed, and encountering identical genomic positions (defined by --cp-mode) for plotting results in an assert error (default: False)")
    p_d.add_argument("--plot-pdf",
                   dest="plot_pdf",
                   default = False,
                   action = "store_true",
                   help = "Plot .pdf (default: .png)")

    """
    Info mode.

    """
    p_i = subparsers.add_parser('info',
                                  help='Inform about motif database content')
    p_i.set_defaults(which='info')
    # Optional arguments.
    p_i.add_argument("--motif-db",
                   dest="motif_db",
                   type=int,
                   default=1,
                   choices=[1, 2, 3],
                   help="Built-in motif database to use. 1: human RBP motifs (257 RBPs, 599 motifs, \"catrapid_omics_v2.1_human_6plus_ext\"), 2: human RBP motifs + 23 ucRBP motifs (277 RBPs, 622 motifs, \"catrapid_omics_v2.1_human_6plus_ext_ucrbps\"), 3: human RBP motifs from Ray et al. 2013 (80 RBPs, 102 motifs, \"ray2013_human_rbps_rnacompete\") (default: 1)")
    p_i.add_argument("--custom-db",
                   dest="custom_db",
                   type=str,
                   metavar='str',
                   default = False,
                   help = "Provide custom motif database folder and print included IDs")

    return p


################################################################################

def specify_motif_db(motif_db_nr, db_path="./"):
    """
    Returns
    sequence motif database file
    structure motif database file
    motif database string

    motif_db_nr: motif database string
    1: catrapid_omics_v2.1_human_6plus_ext
    2: catrapid_omics_v2.1_human_6plus_ext_ucrbps
    3: ray2013_human_rbps_rnacompete
    File names:
    1: catRAPID_omics_v2.1_6plus_motifs.weak_rounded.ext.meme
    2: catRAPID_omics_v2.1_6plus_motifs.weak_rounded.ext.ucrbps.meme
    3: Ray2013_rbp_Homo_sapiens.dna_encoded.meme

    """

    seq_motifs_db_file = db_path + "/catRAPID_omics_v2.1_6plus_motifs.weak_rounded.ext.meme"
    str_motifs_db_file = db_path + "/str_motifs.cm"
    rbp2ids_file = db_path + "/rbp_motif2name_ids.1.txt"
    motif_db_str = "catrapid_omics_v2.1_human_6plus_ext"
    if motif_db_nr == 2:
        seq_motifs_db_file = db_path + "/catRAPID_omics_v2.1_6plus_motifs.weak_rounded.ext.ucrbps.meme"
        rbp2ids_file = db_path + "/rbp_motif2name_ids.2.txt"
        motif_db_str = "catrapid_omics_v2.1_human_6plus_ext_ucrbps"
    if motif_db_nr == 3:
        seq_motifs_db_file = db_path + "/Ray2013_rbp_Homo_sapiens.dna_encoded.meme"
        rbp2ids_file = db_path + "/rbp_motif2name_ids.3.txt"
        motif_db_str = "ray2013_human_rbps_rnacompete"
        str_motifs_db_file = False
    return seq_motifs_db_file, str_motifs_db_file, rbp2ids_file, motif_db_str


################################################################################

def get_local_tomtom_sim_file(motif_db_nr, db_path="./"):
    """
    Get local TOMTOM motif pair similarities file path.

    """
    tomtom_sim_file = db_path + "/catRAPID_omics_v2.1_6plus_motifs.weak_rounded.ext.tomtom_sim.tsv.gz"
    if motif_db_nr == 2:
        tomtom_sim_file = db_path + "/catRAPID_omics_v2.1_6plus_motifs.weak_rounded.ext.ucrbps.tomtom_sim.tsv.gz"
    elif motif_db_nr == 3:
        tomtom_sim_file = db_path + "/Ray2013_rbp_Homo_sapiens.dna_encoded.tomtom_sim.tsv.gz"
    return tomtom_sim_file


################################################################################

def specify_custom_motif_db(motif_db_path):
    """
    Define custom motif database, by providing a folder which has to contain 
    the following files:
    info.txt
        with columns motif_id, rbp_id, motif_type, organism
        ZRANB2_1	ZRANB2	meme_xml	human
        ZRANB2_2	ZRANB2	meme_xml	human
        RF00032	SLBP	cm	human

        organism column optional for now ...

    seq_motifs.meme
    str_motifs.cm
    
    """
    assert os.path.isdir(motif_db_path), "given --custom-motif-db path \"%s\" is not a valid path" %(motif_db_path) 
    seq_motifs_db_file = motif_db_path + "/seq_motifs.meme"
    str_motifs_db_file = motif_db_path + "/str_motifs.cm"
    rbp2ids_file = motif_db_path + "/info.txt"
    assert os.path.exists(seq_motifs_db_file) or os.path.exists(str_motifs_db_file), "custom database folder \"%s\" needs to contain seq_motifs.meme and/or str_motifs.cm file" %(motif_db_path)
    assert os.path.exists(rbp2ids_file), "custom database folder \"%s\" needs to contain info.txt file" %(motif_db_path)
    return seq_motifs_db_file, str_motifs_db_file, rbp2ids_file


################################################################################

def main_search(args):
    """
    Search motifs.

    """

    print("Running for you in SEARCH mode ... ")


    assert os.path.exists(args.in_sites), "--in file \"%s\" not found" % (args.in_sites)
    assert os.path.exists(args.in_genome), "--genome file \"%s\" not found" % (args.in_genome)

    # Check --in BED format.
    if not args.unstranded:
        benchlib.bed_check_format(args.in_sites, param_str="--in")
    # More checks.
    if args.upset_plot_max_degree is not None:
        assert args.upset_plot_max_degree >= args.upset_plot_min_degree, "set --upset-plot-max-degree needs to be >= --upset-plot-min-degree"
    if args.tr_list:
        assert args.in_gtf, "set --tr-list requires --gtf GTF file"
    assert benchlib.boundary_check(args.gtf_feat_min_overlap, 1E-9, 1.0), "set --gtf-feat-min-overlap expected to be >= 1E-9 and <= 1.0"
    assert benchlib.boundary_check(args.gtf_min_mrna_overlap, 1E-9, 1.0), "set --gtf-min-mrna-overlap expected to be >= 1E-9 and <= 1.0"
    assert benchlib.boundary_check(args.gtf_intron_border_len, 1, 1000), "set --gtf-intron-border-eln expected to be >= 1 and <= 1000"
    assert benchlib.boundary_check(args.min_motif_dist, 0, 1000), "set --min-motif-dist expected to be >= 0 and <= 1000"
    assert benchlib.boundary_check(args.kmer_plot_k, 1, 7), "set --kmer-plot-k expected to be >= 1 and <= 7"
    assert benchlib.boundary_check(args.kmer_pca_plot_k, 1, 7), "set --kmer-pca-plot-k expected to be >= 1 and <= 7"


    # if args.in_gtf:
    #     assert args.report, "set --gtf requires --report to be effective"
    assert args.upset_plot_min_rbp_count >= 0, "set --upset-plot-min-rbp-count needs to be >= 0"
    # if args.add_motif_annot:
    #     assert args.in_gtf, "set --add-motif-annot requires --gtf GTF file"
    if args.run_goa:
        assert args.in_gtf, "set --goa requires --gtf GTF file"
        if args.goa_obo_mode == 3:
            assert args.goa_obo_file, "set --goa-obo-mode 3 requires --goa-obo-file"
            assert os.path.exists(args.goa_obo_file), "--goa-obo-file file \"%s\" not found" % (args.goa_obo_file)
        if args.goa_obo_file:
            assert args.goa_obo_mode == 3, "--goa-obo-file requires --goa-obo-mode 3"
        if args.goa_max_child is not None:
            assert args.goa_max_child >= 0, "set --goa-max-child expected to be >= 0"
        if args.goa_min_depth is not None:
            assert args.goa_min_depth >= 0, "set --goa-min-depth expected to be >= 0"

    # Is MEME >= v5 installed?
    if not args.meme_disable_check:
        assert benchlib.is_tool("meme"), "meme not in PATH"
        check, meme_version = benchlib.check_tool_version("meme -version", "5.0")
        assert check, "RBPBench requires meme version >= 5.0 (installed version: %s)" %(meme_version)

    # Check if MEME version is >= 5.5.4 (need to add fimo --no-pgc option to produce same results!).
    fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text"
    if not args.meme_disable_check:
        check, meme_version = benchlib.check_tool_version("meme -version", "5.5.4")
        args.meme_version = meme_version
        if check:
            fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text --no-pgc"
    if args.meme_no_pgc:
        fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text --no-pgc"

    # GTF check.
    if args.in_gtf:
        assert os.path.exists(args.in_gtf), "--gtf file \"%s\" not found" % (args.in_gtf)

    # Regex check.
    regex_type = "sequence"
    if args.regex:
        if args.regex_type == 1:
            print("Check given --regex type ... ")
            if benchlib.looks_like_structure(args.regex):
                print("Given --regex looks like structure pattern ... ")
                regex_type = "structure"
            else:
                print("Given --regex assumed to be standard regex ... ")
                regex_type = "sequence"
        elif args.regex_type == 2:
            regex_type = "sequence"
        elif args.regex_type == 3:
            regex_type = "structure"
        else:
            assert False, "unexpected --regex-type value set (%d)" %(args.regex_type)
        # If standard regex, check validity.
        if regex_type == "sequence":
            assert benchlib.is_valid_regex(args.regex), "given --regex \"%s\" is not a valid regular expression. Please provide valid expression" % (args.regex)

    # Additional BED regions for annotation.
    if args.add_annot_bed:
        assert os.path.exists(args.add_annot_bed), "--add-annot-bed file \"%s\" not found" % (args.add_annot_bed)
        # Check if BED format.
        benchlib.bed_check_format(args.add_annot_bed, param_str="--add-annot-bed")
        # Check annotation ID.
        args.add_annot_id = benchlib.remove_special_chars_from_str(args.add_annot_id)
        assert args.add_annot_id, "empty string after removing special chars from --add-annot-id. Please provide alphanumeric string for custom motif database ID (- or _ are okay as well)"

    """
    Library path.
    """
    benchlib_path = os.path.dirname(benchlib.__file__)
    db_path = benchlib_path + "/content"

    """
    FIMO nt frequencies.

    """
    fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt.txt"
    if args.fimo_ntf_mode == 2:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt_with_introns.txt"
    elif args.fimo_ntf_mode == 3:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.uniform.txt"

    if args.fimo_user_ntf_file:
        fimo_freqs_file = args.fimo_user_ntf_file
    assert os.path.exists(fimo_freqs_file), "set FIMO nucleotide frequencies file \"%s\" not found" % (fimo_freqs_file)

    """
    Motif database.

    """
    seq_motifs_db_file, str_motifs_db_file, rbp2ids_file, motif_db_str = specify_motif_db(args.motif_db, 
                                                                            db_path=db_path)
    # Custom motif database: folder given.
    if args.custom_db:
        assert not args.custom_db_meme_xml, "--custom-db folder set incompatible with --custom-db-meme-xml. Provide custom motif database either via folder (--custom-db) or as single files"
        assert not args.custom_db_cm, "--custom-db folder set incompatible with --custom-db-cm. Provide custom motif database either via folder (--custom-db) or as single files"
        assert not args.custom_db_info, "--custom-db folder set incompatible with --custom-db-info. Provide custom motif database either via folder (--custom-db) or as single files"
        seq_motifs_db_file, str_motifs_db_file, rbp2ids_file = specify_custom_motif_db(args.custom_db)
        args.custom_db_id = benchlib.remove_special_chars_from_str(args.custom_db_id)
        assert args.custom_db_id, "empty string after removing special chars from --custom-db-id. Please provide alphanumeric string for custom motif database ID (- or _ are okay as well)"
        motif_db_str = args.custom_db_id
    # Custom motif database: single files given.
    if args.custom_db_meme_xml or args.custom_db_cm or args.custom_db_info:
        args.custom_db_id = benchlib.remove_special_chars_from_str(args.custom_db_id)
        assert args.custom_db_id, "empty string after removing special chars from --custom-db-id. Please provide alphanumeric string for custom motif database ID (- or _ are okay as well)"
        motif_db_str = args.custom_db_id
        assert not args.custom_db, "single custom motif database files provided not compatible with --custom-db. Provide custom motif database either via folder (--custom-db) or as single files"
        assert args.custom_db_info, "--custom-db-info needed to define custom motif database"
        rbp2ids_file = args.custom_db_info
        assert args.custom_db_meme_xml or args.custom_db_cm, "--custom-db-meme-xml and/or --custom-db-cm needed to define custom motif database"
        if args.custom_db_meme_xml:
            seq_motifs_db_file = args.custom_db_meme_xml
        else:
            seq_motifs_db_file = ""  # setting to empty string results in os.path.exists -> False.
        if args.custom_db_cm:
            str_motifs_db_file = args.custom_db_cm
        else:
            str_motifs_db_file = ""

    args.motif_db_str = motif_db_str

    """
    Get ID mappings

    name2ids_dic:
    RBP name -> motif IDs mapping
    id2type_dic:
    motif ID -> motif type (cm, meme_xml)
    id2org_dic (ignore for now):
    motif ID -> organism ID (so far only "human")
    name2fids_dic:
    RBP name -> annotated function IDs list, e.g.
    "A1CF" -> ["RM", "RSD", "RE"]

    """
    name2ids_dic, id2type_dic, name2gid_dic, name2fids_dic, id2pids_dic, id2exp_dic = benchlib.get_rbp_id_mappings(rbp2ids_file)

    # Motif ID -> RBP name (== RBP ID) mapping.
    id2name_dic = {}
    for rbp_id in name2ids_dic:
        for motif_id in name2ids_dic[rbp_id]:
            id2name_dic[motif_id] = rbp_id

    """
    Get MEME XML database motif blocks dictionary.

    """
    seq_motif_blocks_dic = {}
    if os.path.exists(seq_motifs_db_file):
        seq_motif_blocks_dic = benchlib.read_in_xml_motifs(seq_motifs_db_file,
                                                           empty_check=True)
    for motif_id in seq_motif_blocks_dic:
        assert motif_id in id2name_dic, "MEME XML motif ID \"%s\" not found in prior mapping. Please contact developers!" %(motif_id)

    """
    Get covariance model database motif blocks dictionary.

    """
    str_motif_blocks_dic = {}
    if str_motifs_db_file and os.path.exists(str_motifs_db_file):
        str_motif_blocks_dic = benchlib.read_in_cm_blocks(str_motifs_db_file, 
                                                          empty_check=True)
        for motif_id in str_motif_blocks_dic:
            assert motif_id in id2name_dic, "Covariance model accession ID \"%s\" not found in prior mapping. Please contact developers!" %(motif_id)

    # Remove special chars from run ID.
    args.data_id = benchlib.remove_special_chars_from_str(args.data_id)
    assert args.data_id, "empty string after removing special chars from --data-id. Please provide alphanumeric string for data ID (- or _ are okay as well)"
    args.method_id = benchlib.remove_special_chars_from_str(args.method_id)
    assert args.method_id, "empty string after removing special chars from --method-id. Please provide alphanumeric string for method ID (- or _ are okay as well)"
    # Run ID definition.
    run_id = "run_id"
    if args.run_id:
        run_id = benchlib.remove_special_chars_from_str(args.run_id)
        assert run_id, "empty string after removing special chars from --run-id. Please provide alphanumeric string for run ID (- or _ are okay as well)"
    # else:
    #     random_id = uuid.uuid4()
    #     run_id = str(random_id)

    # hash_len = max(len(run_id), len(args.data_id), len(args.method_id))
    # print("###################" + "#"*hash_len)
    print("Run ID:     ", run_id)
    print("Data ID:    ", args.data_id)
    print("Method ID:  ", args.method_id)
    # print("###################" + "#"*hash_len)

    """
    Output folders + files.

    """
    if not os.path.exists(args.out_folder):
        os.makedirs(args.out_folder)

    filtered_sites_bed = os.path.join(args.out_folder, 'in_sites.filtered.bed')
    filtered_sites_fa = os.path.join(args.out_folder, 'in_sites.filtered.fa')
    seq_motifs_xml = os.path.join(args.out_folder, 'seq_motifs.xml')
    str_motifs_cm = os.path.join(args.out_folder, 'str_motifs.cm')
    fimo_res_tsv = os.path.join(args.out_folder, 'fimo_results.tsv')
    cmsearch_res_txt = os.path.join(args.out_folder, 'cmsearch_results.txt')

    rbp_stats_out = os.path.join(args.out_folder, "rbp_hit_stats.tsv")
    motif_stats_out = os.path.join(args.out_folder, "motif_hit_stats.tsv")
    # con_res_out_tsv = os.path.join(args.out_folder, "contingency_table_results.tsv")
    settings_file = os.path.join(args.out_folder, "settings.rbpbench_search.out")
    rbp_reg_occ_table_out = os.path.join(args.out_folder, "rbp_region_occupancies.tsv")

    # Output unique motif hits.
    motif_hits_bed_out = os.path.join(args.out_folder, "motif_hits.rbpbench_search.bed")
    # Output matched sequences stats.
    matched_seqs_out = os.path.join(args.out_folder, "matched_seq_stats.tsv")

    # Output RBP co-ooccurrence stats as table.
    cooc_stats_out = os.path.join(args.out_folder, "rbp_cooc_stats.tsv")

    # GOA results.
    goa_results_tsv = os.path.join(args.out_folder, "goa_results.tsv")

    # Addition annotation regions output BED files.
    gene_regions_bed = os.path.join(args.out_folder, "gene_regions.bed")
    promoter_regions_bed = os.path.join(args.out_folder, "promoter_regions.bed")
    # Temp files.
    # random_id = uuid.uuid1()
    # tmp_out_bed = args.out_folder + "/" + str(random_id) + ".filtered_in.bed"
    out_tmp_bed = os.path.join(args.out_folder, "rbp_motif_hit_regions.tmp.bed")
    cmstat_tmp_out = os.path.join(args.out_folder, "cmstat_out.tmp.txt")

    # Delete if existing folder.
    if os.path.exists(fimo_res_tsv):
        os.remove(fimo_res_tsv)
    if os.path.exists(cmsearch_res_txt):
        os.remove(cmsearch_res_txt)


    """
    Load RBP data based on --rbps (+ optionally USER data).

    """

    rbp_in_dic = {}
    for rbp_id in args.list_rbps:
        rbp_in_dic[rbp_id] = 1

    # RBPs for motif search.
    loaded_rbps_dic = {}

    # USER set?
    user_motifs = False
    user_rbp_id = False
    if "USER" in rbp_in_dic:
        user_motifs = True
    else:
        assert not args.user_meme_xml, "--user-meme-xml provided but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"
        assert not args.user_cm, "--user-cm provided but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"
        assert not args.user_rbp_id, "--user-rbp-id set but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"

    special_rbp_ids_list = ["USER", "REGEX"]

    # If ALL set, load all RBPs (+ optinally USER).
    if "ALL" in rbp_in_dic:
        if len(rbp_in_dic) == 2:
            assert user_motifs, "set --rbps ALL, --rbps ALL USER, or individual RBPs (+ optinally USER) --rbps RBP1 RBP2 ... USER"
        if len(rbp_in_dic) > 2:
            assert False, "set --rbps ALL, --rbps ALL USER, or individual RBPs (+ optinally USER) --rbps RBP1 RBP2 ... USER"
        print("--rbps ALL selected. Loading all database motifs ... ")
        for rbp_id in name2ids_dic:
            loaded_rbps_dic[rbp_id] = motif_db_str

    else:
        # Load individual RBPs.
        for rbp_id in rbp_in_dic:
            if rbp_id not in special_rbp_ids_list:
                """
                Check if RBP ID in database.
                Suggest similar RBPs based on string similarity (edit distance).

                """
                if rbp_id not in name2ids_dic:
                    db_rbp_list = []
                    for db_rbp_id in name2ids_dic:
                        db_rbp_list.append(db_rbp_id)
                    pair_dist_dic = benchlib.calc_edit_dist_query_list(rbp_id, db_rbp_list)
                    max_c = 10
                    c = 0
                    suggested_rbps = []
                    for key, value in sorted(pair_dist_dic.items(), key=lambda item: item[1], reverse=False):
                        if c >= max_c:
                            break
                        c += 1
                        suggested_rbps.append(key)
                    suggested_rbps_str = ",".join(suggested_rbps)
                    assert False, "provided --rbps ID %s not in internal motif database (%s). Please provide RBP name present in database. Did you mean (any of) the following database ID(s) (top 10 hits based on string similarity): %s ?" %(rbp_id, motif_db_str, suggested_rbps_str)
                # assert rbp_id in name2ids_dic, "provided --rbps ID %s not in internal motif database. Please provide RBP name present in database" %(rbp_id)
                loaded_rbps_dic[rbp_id] = motif_db_str

    # Motif IDs for search.
    loaded_motif_ids_dic = {}
    for rbp_id in loaded_rbps_dic:
        for motif_id in name2ids_dic[rbp_id]:
            loaded_motif_ids_dic[motif_id] = motif_db_str

    """
    Check and load provided USER data.

    """

    if user_motifs:
        print("--rbps USER selected. Check + load provided USER motifs ... ")
        assert args.user_rbp_id, "--rbps USER demands --user-rbp-id to be set to connect the supplied motif(s) with an RBP ID"
        assert args.user_meme_xml or args.user_cm, "--rbps USER requires a provided sequence or structure motif file (via --user-meme-xml AND/OR --user-cm)"

        # Reformat user_rbp_id. 
        user_rbp_id = benchlib.remove_special_chars_from_str(args.user_rbp_id)
        assert user_rbp_id, "empty string after removing special chars from --user-rbp-id. Please provide alphanumeric string for RBP ID (- or _ are okay as well)"

        assert user_rbp_id not in loaded_rbps_dic, "user RBP ID %s already selected from database. Please deselect respective database RBP ID or provide unique user RBP ID via --user-rbp-id" %(user_rbp_id)
        loaded_rbps_dic[user_rbp_id] = "user"
        # In case user_rbp_id in database, reset motif IDs associated to user_rbp_id.
        name2ids_dic[user_rbp_id] = []
        print("RBP ID for user-supplied motifs:", user_rbp_id)

        user_seq_motif_blocks_dic = {}
        if args.user_meme_xml:
            assert os.path.exists(args.user_meme_xml), "--user-meme-xml file \"%s\" not found" % (args.user_meme_xml)
            user_seq_motif_blocks_dic = benchlib.read_in_xml_motifs(args.user_meme_xml, empty_check=False)
            assert user_seq_motif_blocks_dic, "no motifs read in from provided --user-meme-xml. Make sure to supply sequence motifs in MEME XML format!"
            # Check if motif ID already loaded.
            for acc_id in user_seq_motif_blocks_dic:
                assert acc_id not in loaded_motif_ids_dic, "user-supplied MEME XML motif ID %s already selected, i.e., ID is already associated with selected database RBP %s. Please change user motif ID to a unique motif ID or deselect the respective RBP" %(acc_id, id2name_dic[acc_id])
                loaded_motif_ids_dic[acc_id] = "user"
                # Add user to database blocks (overwrite if same ID encountered).
                seq_motif_blocks_dic[acc_id] = user_seq_motif_blocks_dic[acc_id]
                name2ids_dic[user_rbp_id].append(acc_id)
                id2type_dic[acc_id] = "meme_xml"
                id2name_dic[acc_id] = user_rbp_id

        user_str_motif_blocks_dic = {}
        if args.user_cm:
            assert os.path.exists(args.user_cm), "--user-cm file \"%s\" not found" % (args.user_cm)
            # Check for valid format.
            acc_ids_dic = benchlib.check_cm_file(args.user_cm, cmstat_tmp_out, empty_check=False)
            # Read in covariance model blocks.
            user_str_motif_blocks_dic = benchlib.read_in_cm_blocks(args.user_cm)
            for acc_id in acc_ids_dic:
                assert acc_id in user_str_motif_blocks_dic, "accession ID %s not in blocks dictionary. Please contact developers!" %(acc_id)
            # Check if motif ID already loaded.
            for acc_id in user_str_motif_blocks_dic:
                assert acc_id not in loaded_motif_ids_dic, "user-supplied covariance model accession (ACC) ID %s already selected, i.e., ID is already associated with selected database RBP %s. Please change to a unique accession ID or deselect the respective RBP" %(acc_id, id2name_dic[acc_id])
                loaded_motif_ids_dic[acc_id] = "user"
                # Add user to database blocks (overwrite if same ID encountered).
                str_motif_blocks_dic[acc_id] = user_str_motif_blocks_dic[acc_id]
                name2ids_dic[user_rbp_id].append(acc_id)
                id2type_dic[acc_id] = "cm"
                id2name_dic[acc_id] = user_rbp_id

    """
    Get sequence motif lengths.

    """

    id2len_dic = benchlib.get_seq_motif_lengths(seq_motif_blocks_dic)

    """
    Optionally filter DREME/MEME sequence motifs by length.

    """

    if args.motif_min_len or args.motif_max_len:

        if args.motif_min_len and args.motif_max_len:
            assert args.motif_min_len <= args.motif_max_len, "set --motif-min-len needs to be <= --motif-max-len!"

        print("Filtering sequence motifs by set min/max lengths ... ")

        seq_motif_blocks_dic, loaded_rbps_dic, loaded_motif_ids_dic, name2ids_dic, c_flt_out = benchlib.filter_dic_by_motif_lengths(
                seq_motif_blocks_dic, str_motif_blocks_dic, loaded_rbps_dic, loaded_motif_ids_dic, id2name_dic,
                id2len_dic, motif_min_len=args.motif_min_len, motif_max_len=args.motif_max_len
            )

        assert loaded_rbps_dic, "no MEME/DREME sequence motifs left after length filtering. Please adjust length filter range (--motif-min-len, --motif-max-len), RBP selection, or disable length filtering!"

        print("Filtered out %d sequence motifs outside set length range" %(c_flt_out))


    """
    Check if loaded RBP IDs have motifs.

    """
    for rbp_id in loaded_rbps_dic:
        for motif_id in name2ids_dic[rbp_id]:
            found = 0
            if motif_id in seq_motif_blocks_dic:
                found += 1
            if motif_id in str_motif_blocks_dic:
                found += 1
            assert found, "no motifs loaded for RBP ID \"%s\". Please provide the respective motifs file" %(rbp_id)


    """
    If --regex is set:
    Treat regex as sequence motif / fimo type.
    rbp_id: regex, motif_id: regex, motif_db: regex

    """

    regex_id = args.regex_id
    regex = args.regex

    if args.regex:

        if regex_type == "sequence":

            # Remove , ; from given regex, to avoid motif_id format conflicts.
            regex = benchlib.remove_special_chars_from_str(args.regex,
                                                        reg_ex="[ ;]",
                                                        to_upper=False)  # [ :;\(\)]
            
            assert regex, "empty string after removing special chars ( ;) from --regex. Please provide a valid regex with DNA letters"

            # Convert IUPAC codes (if present) in regex to standard regex format.
            regex = benchlib.convert_iupac_in_regex(regex)

        elif regex_type == "structure":

            # Check structure pattern and format.
            regex = benchlib.check_format_str_pattern(args.regex)

        args.regex = regex

        regex_id = benchlib.remove_special_chars_from_str(args.regex_id)

        assert regex_id, "empty string after removing special chars from --regex-id. Please provide alphanumeric string for regex ID (- or _ are okay as well)"
        assert regex_id not in name2ids_dic, "--regex set but a different RBP ID with name \"%s\" was found. Please provide a different RBP ID or --regex-id" %(regex_id)

        args.regex_id = regex_id

        if args.motif_regex_id:
            assert regex_id not in id2type_dic, "--regex set but a different motif ID with name \"%s\" was found. Please provide a different motif ID or --regex-id" %(regex_id)

            id2name_dic[regex_id] = regex_id    # motif_id -> rbp_id
            id2type_dic[regex_id] = "regex"  # motif_id -> motif type string; motif type string can be regex, cm, meme_xml
            loaded_motif_ids_dic[regex_id] = regex_id  # motif_id -> motif_db_str
            name2ids_dic[regex_id] = [regex_id]  # rbp_id -> motif_ids

        else:
            assert regex not in id2type_dic, "--regex set but a different motif ID with name \"%s\" was found. Please provide a different motif ID or use --motif-regex-id" %(regex_id)

            id2name_dic[regex] = regex_id    # motif_id -> rbp_id
            id2type_dic[regex] = "regex"  # motif_id -> motif type string; motif type string can be regex, cm, meme_xml
            loaded_motif_ids_dic[regex] = regex_id  # motif_id -> motif_db_str
            name2ids_dic[regex_id] = [regex]  # rbp_id -> motif_ids

        loaded_rbps_dic[regex_id] = regex_id  # rbp_id -> motif_db_str

        """
        Dictionaries that use motif_id as keys.
        loaded_motif_ids_dic
        name2ids_dic
        id2name_dic
        id2type_dic
        """
    else:
        regex_id = False

    """
    If --set-rbp-id set, check if it is part of loaded RBP IDs.

    """

    if args.set_rbp_id is not None:
        assert args.set_rbp_id in loaded_rbps_dic, "given --set-rbp-id \"%s\" is not part of loaded RBP IDs. Please provide --set-rbp-id that is part of loaded RBPs (user-defined or database RBP ID)" %(args.set_rbp_id)

    """
    Get function ID -> function descriptions mapping.

    """
    fid2desc_file = db_path + "/rbp_function_list.32728246.tsv"
    fid2desc_dic, desc2fid_dic = benchlib.get_fid2desc_mapping(fid2desc_file)
    

    """
    If --motifs sets, filter loaded_rbps_dic + loaded_motif_ids_dic.
    
    """

    if args.motifs_list:

        print("Filtering loaded motifs by provided --motifs ... ")

        motif_fids_dic = {}

        for motif_id in args.motifs_list:
            motif_fids_dic[motif_id] = 1

        filtered_rbps_dic = {}
        filtered_motif_ids_dic = {}
        filtered_name2ids_dic = {}
        for motif_id in motif_fids_dic:
            assert motif_id in id2name_dic, "motif ID \"%s\" provided via --motifs not found in internal motif ID -> RBP ID mapping. Please provide valid motif IDs" %(motif_id)
            assert motif_id in loaded_motif_ids_dic, "motif ID \"%s\" provided via --motifs not found in loaded motifs (via --rbps). Please provide motif IDs that are part of loaded RBPs (can also be caused by motif length filtering)" %(motif_id)
            set_db_str = loaded_motif_ids_dic[motif_id]
            filtered_motif_ids_dic[motif_id] = set_db_str
            rbp_id = id2name_dic[motif_id]
            assert rbp_id in loaded_rbps_dic, "motif ID \"%s\" provided via --motifs not found in loaded RBPs (via --rbps). Please provide motif IDs that are part of loaded RBPs" %(motif_id)
            set_db_str = loaded_rbps_dic[rbp_id]
            filtered_rbps_dic[rbp_id] = set_db_str
            if rbp_id in filtered_name2ids_dic:
                filtered_name2ids_dic[rbp_id].append(motif_id)
            else:
                filtered_name2ids_dic[rbp_id] = [motif_id]

        if args.regex:
            filtered_rbps_dic[regex_id] = regex_id
            if args.motif_regex_id:
                filtered_motif_ids_dic[regex_id] = regex_id
                filtered_name2ids_dic[regex_id] = [regex_id]
            else:
                filtered_motif_ids_dic[regex] = regex_id
                filtered_name2ids_dic[regex_id] = [regex]

        c_loaded_rbps_pre = len(loaded_rbps_dic)
        c_loaded_rbps_post = len(filtered_rbps_dic)
        loaded_rbps_dic = filtered_rbps_dic

        print("# RBPs pre-filtering by --motifs: ", c_loaded_rbps_pre)
        print("# RBPs post-filtering by --motifs:", c_loaded_rbps_post)

        assert loaded_rbps_dic, "no remaining RBPs after filtering by provided motif IDs (via --motifs). Please provide compatible RBPs + motif IDs"

        c_loaded_motif_ids_pre = len(loaded_motif_ids_dic)
        c_loaded_motif_ids_post = len(filtered_motif_ids_dic)
        loaded_motif_ids_dic = filtered_motif_ids_dic

        print("# motif IDs pre-filtering by --motifs: ", c_loaded_motif_ids_pre)
        print("# motif IDs post-filtering by --motifs:", c_loaded_motif_ids_post)

        assert loaded_motif_ids_dic, "no remaining motifs after filtering by provided motif IDs (via --motifs). Please provide compatible RBPs + motif IDs"

        name2ids_dic = filtered_name2ids_dic


    """
    If --functions set, filter loaded_rbps_dic + loaded_motif_ids_dic.

    """
    rbp_fids_dic = {}
    if args.rbp_functions:

        print("Filtering loaded RBPs by provided function IDs ... ")

        for fid in args.rbp_functions:
            rbp_fids_dic[fid] = 1

        # Check if provided function IDs are valid.
        for fid in rbp_fids_dic:
            assert fid in fid2desc_dic, "function ID \"%s\" provided via --functions not found in internal function ID -> description mapping. Please provide valid function IDs (see rbpbench info for a detailed description)" %(fid)

        # Filter loaded_rbps_dic.
        filtered_rbps_dic = {}
        for rbp_id in loaded_rbps_dic:
            if rbp_id == regex_id:  # keep regex if set.
                filtered_rbps_dic[regex_id] = regex_id
                continue
            if rbp_id not in name2fids_dic:
                continue
            for fid in rbp_fids_dic:
                if fid in name2fids_dic[rbp_id]:
                    set_db_str = loaded_rbps_dic[rbp_id]
                    filtered_rbps_dic[rbp_id] = set_db_str
                    break

        c_loaded_rbps_pre = len(loaded_rbps_dic)
        c_loaded_rbps_post = len(filtered_rbps_dic)
        loaded_rbps_dic = filtered_rbps_dic

        print("# RBPs pre-filtering by functions: ", c_loaded_rbps_pre)
        print("# RBPs post-filtering by functions:", c_loaded_rbps_post)

        assert loaded_rbps_dic, "no remaining RBPs after filtering by provided function IDs. Please provide compatible RBPs + function IDs (see rbpbench info for annotated RBP functions)"

        # Filter loaded motif IDs.
        filtered_motif_ids_dic = {}
        for rbp_id in loaded_rbps_dic:
            if rbp_id == regex_id:
                if args.motif_regex_id:
                    filtered_motif_ids_dic[regex_id] = regex_id
                else:
                    filtered_motif_ids_dic[regex] = regex_id
            else:
                for motif_id in name2ids_dic[rbp_id]:
                    set_db_str = loaded_motif_ids_dic[motif_id]
                    filtered_motif_ids_dic[motif_id] = set_db_str
        
        c_loaded_motif_ids_pre = len(loaded_motif_ids_dic)
        c_loaded_motif_ids_post = len(filtered_motif_ids_dic)
        loaded_motif_ids_dic = filtered_motif_ids_dic

        print("# motif IDs pre-filtering by functions: ", c_loaded_motif_ids_pre)
        print("# motif IDs post-filtering by functions:", c_loaded_motif_ids_post)


    """
    If --exp-gene-list set, filter RBPs by gene IDs from list, 
    such that only RBPs for search are kept whose gene IDs are in --exp-gene-list.
    Keep regex motif if set.
  
    """
    exp_gene_ids_dic = {}

    if args.exp_gene_list:

        print("Filter selected RBPs by --exp-gene-list ... ")

        assert os.path.exists(args.exp_gene_list), "given --exp-gene-list file \"%s\" not found" % (args.exp_gene_list)

        exp_gene_ids_dic = benchlib.read_ids_into_dic(args.exp_gene_list,
                                                      check_dic=False)

        assert exp_gene_ids_dic, "no gene IDs read in from --exp-gene-list. Please provide gene IDs in a text file, one ID per line"

        filtered_rbps_dic = {}
        filtered_motif_ids_dic = {}
        filtered_name2ids_dic = {}
        
        for rbp_id in loaded_rbps_dic:

            if rbp_id == regex_id:  # keep regex if set.
                filtered_rbps_dic[regex_id] = regex_id
                if args.motif_regex_id:
                    filtered_motif_ids_dic[regex_id] = regex_id
                    filtered_name2ids_dic[regex_id] = [regex_id]
                else:
                    filtered_motif_ids_dic[regex] = regex_id
                    filtered_name2ids_dic[regex_id] = [regex]
                continue

            if rbp_id not in name2gid_dic:
                continue

            rbp_gene_id = name2gid_dic[rbp_id]

            if rbp_gene_id in exp_gene_ids_dic:
                set_db_str = loaded_rbps_dic[rbp_id]
                filtered_rbps_dic[rbp_id] = set_db_str
                for motif_id in name2ids_dic[rbp_id]:
                    set_db_str = loaded_motif_ids_dic[motif_id]
                    filtered_motif_ids_dic[motif_id] = set_db_str
                filtered_name2ids_dic[rbp_id] = name2ids_dic[rbp_id]

        c_loaded_rbps_pre = len(loaded_rbps_dic)
        c_loaded_rbps_post = len(filtered_rbps_dic)
        loaded_rbps_dic = filtered_rbps_dic

        print("# RBPs pre-filtering by --exp-gene-list: ", c_loaded_rbps_pre)
        print("# RBPs post-filtering by --exp-gene-list:", c_loaded_rbps_post)

        if args.regex and len(loaded_rbps_dic) == 1:
            print("WARNING: only regex motif left after filtering by --exp-gene-list ...")

        assert loaded_rbps_dic, "no remaining RBPs after filtering by --exp-gene-list. Please provide compatible gene IDs in --exp-gene-list (supported gene ID format: ENSG00000100320, no version numbers!) and at least one RBP via --rbps that is in --exp-gene-list"

        loaded_motif_ids_dic = filtered_motif_ids_dic
        name2ids_dic = filtered_name2ids_dic


    """
    Load RBP data, store in RBP() class.

    """

    # Store motif IDs for search.
    search_rbps_dic = {}
    seq_rbps_dic = {}
    str_rbps_dic = {}
    motif_id2idx_dic = {} # motif ID -> list index.
    args.internal_id = []

    for rbp_id in loaded_rbps_dic:
    
        internal_id = base64.urlsafe_b64encode(os.urandom(6)).decode()
        args.internal_id.append(internal_id)
        rbp = benchlib.RBP(rbp_id, internal_id)

        for motif_id in name2ids_dic[rbp_id]:
        
            assert motif_id in loaded_motif_ids_dic, "motif_id %s not in loaded_motif_ids_dic" %(motif_id)

            if id2type_dic[motif_id] == "meme_xml":
                rbp.seq_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.seq_motif_ids) - 1
                rbp.seq_motif_hits.append(0)
                seq_rbps_dic[rbp_id] = 1
            elif id2type_dic[motif_id] == "regex":
                rbp.seq_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.seq_motif_ids) - 1
                rbp.seq_motif_hits.append(0)
                seq_rbps_dic[rbp_id] = 1
            elif id2type_dic[motif_id] == "cm":
                rbp.str_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.str_motif_ids) - 1
                rbp.str_motif_hits.append(0)
                str_rbps_dic[rbp_id] = 1
            else:
                assert False, "unknown motif type (\"%s\") set for motif_id %s" %(id2type_dic[motif_id], motif_id)

        search_rbps_dic[rbp_id] = rbp


    print("# of RBP IDs for search:    ", len(loaded_rbps_dic))
    print("# of motif IDs for search:  ", len(loaded_motif_ids_dic))

    """
    Get chromosome IDs from --genome.
    """
    print("Get --genome FASTA headers ... ")
    chr_ids_dic = benchlib.get_fasta_headers(args.in_genome)

    """
    Guess chromosome ID style.

    chr_style:
        1: chr1, chr2, ..., chrX, chrM
        2: 1, 2, ... , X, MT

    """
    print("Guess chromosome ID style (based on --genome FASTA headers) ... ")
    chr_style = benchlib.guess_chr_id_style(chr_ids_dic)
    
    """
    Optionally filter --in regions by --exp-gene-list (if --exp-gene-filter set).
    
    """

    in_reg_bed = args.in_sites

    if args.exp_gene_filter:

        assert exp_gene_ids_dic, "--exp-gene-filter set but no gene IDs provided via --exp-gene-list. Please provide gene IDs in a text file, one ID per line"
        assert args.in_gtf, "--exp-gene-filter set but no --gtf file provided. Please provide a GTF file with compatible gene annotations (compatible to --exp-gene-list gene IDs)"
        assert os.path.exists(args.in_gtf), "--gtf file \"%s\" not found" % (args.in_gtf)

        print("Extract --exp-gene-list gene regions from --gtf file ... ")
        
        tmp_gene_reg_bed = os.path.join(args.out_folder, "gene_regions.exp_gene_list.bed")

        c_out_gene_reg = benchlib.gtf_output_gene_regions_to_bed(args.in_gtf, tmp_gene_reg_bed,
                                                                 gids_dic=exp_gene_ids_dic,
                                                                 chr_id_style=chr_style)

        assert c_out_gene_reg, "no gene regions extracted from --gtf file. Please provide a valid GTF file with gene annotations compatible with --exp-gene-list gene IDs"

        print("Filter --in regions by --exp-gene-list gene regions ... ")
        tmp_in_filtered_bed = os.path.join(args.out_folder, "in_sites.exp_gene_list.filtered.bed")

        params = "-u -s"
        if args.exp_gene_filter_mode == 2:  # Keep --in regions not overlapping with --exp-gene-list gene regions.
            params = "-v -s"

        c_in_reg_pre_flt = benchlib.count_lines_in_file(args.in_sites)

        benchlib.bed_intersect_files(args.in_sites, tmp_gene_reg_bed, tmp_in_filtered_bed,
                                     params=params)

        c_in_reg_post_flt = benchlib.count_lines_in_file(tmp_in_filtered_bed)

        print("# --in regions pre-filtering: ", c_in_reg_pre_flt)
        print("# --in regions post-filtering:", c_in_reg_post_flt)

        assert c_in_reg_post_flt, "no --in regions remain after filtering by --exp-gene-list gene regions. Please provide compatible files or change --exp-gene-filter settings"

        in_reg_bed = tmp_in_filtered_bed

    """
    Promoter region definition.
    
    """

    prom_ext_parts = args.prom_ext_up_down.split(",")
    c_prom_ext_parts = len(prom_ext_parts)
    assert c_prom_ext_parts == 2, "invalid --prom-ext argument provided (correct format: --prom-ext 1000,100, i.e., please provide two integers separated by a comma)"

    prom_ext_up = int(prom_ext_parts[0])
    prom_ext_down = int(prom_ext_parts[1])

    assert benchlib.boundary_check(prom_ext_up, 1, 100000), "set promoter upstream extension expected to be >= 1 and <= 100000"
    assert benchlib.boundary_check(prom_ext_down, 0, 100000), "set promoter downstream extension expected to be >= 0 and <= 100000"

    """
    Get extension info for --in regions.
    
    """

    # Process extension info.
    ext_up, ext_down = benchlib.get_ext_parts(args.ext_up_down)

    args.ext_up = ext_up
    args.ext_down = ext_down

    """
    Filter / extend --in genomic regions BED file.

    """

    # Unstranded option.
    if args.unstranded:
        print("WARNING: --unstranded enabled. Using both strands for each --in BED region ... ")
        if ext_up != ext_down:
            print("asymmetric --ext and --unstranded set. Extend plus strand and use corresponding minus strand ... ")

    # Filter / extend --in BED.
    print("Preprocess --in sites ... ")
    reg2sc_dic = {}  # Used to store region scores for Wilcoxon rank-sum test.
    bed_chr_ids_dic = {}
    reg_stats_dic = benchlib.bed_filter_extend_bed(in_reg_bed, filtered_sites_bed,
                                          ext_up=ext_up,
                                          ext_down=ext_down,
                                          remove_dupl=True,
                                          reg2sc_dic=reg2sc_dic,
                                          score_col=args.bed_score_col,
                                          score_thr=args.bed_sc_thr,
                                          score_rev_filter=args.bed_sc_thr_rev_filter,
                                          chr_ids_dic=chr_ids_dic,
                                          bed_chr_ids_dic=bed_chr_ids_dic,
                                          use_region_ids=True,
                                          unstranded=args.unstranded)

    print("# --in regions pre-filtering:  ", reg_stats_dic["c_in"])
    print("# --in regions post-filtering: ", reg_stats_dic["c_out"])
    print("# regions with invalid chr_id: ", reg_stats_dic["c_chr_filter"])
    print("# duplicated regions removed:  ", reg_stats_dic["c_dupl_filter"])
    print("# regions filtered by score:   ", reg_stats_dic["c_sc_thr"])

    assert reg_stats_dic["c_out"], "no --in BED sites remain after chromosome ID (or optionally score) filtering. If caused by invalid chr_id filtering, make sure chromosome IDs in --genome FASTA and --in BED files are compatible (i.e., \"chr1\" vs. \"1\" notation). If --in regions are on transcripts, use rbpbench searchrna"

    """
    Calculate effective size of genomic regions.
    
    """
    print("Calculate effective genomic region size ... ")
    eff_reg_size = benchlib.get_uniq_gen_size(filtered_sites_bed)

    print("Called region length sum:      ", reg_stats_dic["reg_len_sum"])
    print("Effective region length sum:   ", eff_reg_size)

    """
    Get genomic region sequences from --genome.

    Output FASTA header format:
    >chr8:9772198-9772297(+)

    No need to convert sequences to uppercase, as FIMO works on both 
    lower- and uppercase (as long as DNA / RNA is set correct).

    """
    print("Extract sequences from --genome ... ")
    benchlib.bed_extract_sequences_from_fasta(filtered_sites_bed, 
                                              args.in_genome, filtered_sites_fa,
                                              print_warnings=True)


    """
    Get FASTA sequences and sequence lengths.
    """

    out_seqs_dic = benchlib.read_fasta_into_dic(filtered_sites_fa,
                                       dna=True,
                                       all_uc=True,
                                       id_check=True,
                                       empty_check=False,
                                       skip_n_seqs=False)

    assert out_seqs_dic, "no sequences extracted from FASTA file for --in BED sites. Make sure to use compatible FASTA/BED files!"

    # Update reg2sc_dic if out_seqs_dic different (normally if sequence extraction failed for certain regions, should not happen == very rare case).
    if len(reg2sc_dic) != len(out_seqs_dic):
        new_reg2sc_dic = {}
        for seq_id in out_seqs_dic:
            new_reg2sc_dic[seq_id] = reg2sc_dic[seq_id]
        reg2sc_dic = new_reg2sc_dic

    # Get region strands/polarities for motif distance calculations.
    reg2pol_dic = benchlib.extract_pol_from_seq_ids(out_seqs_dic)

    # Effective number of regions used for motif search.
    c_regions = len(out_seqs_dic)
    """
    --unstranded option and not --unstranded-ct

    chr8:9772198-9772297(+)
    """
    if args.unstranded and not args.unstranded_ct:
        # Check if sequences are even.
        assert not len(out_seqs_dic) % 2, "# of --in regions should be an even number since --unstranded is set. Please contact developers"
        c_regions = len(out_seqs_dic) // 2
    
    args.c_regions = c_regions

    # Called region size.
    called_reg_size = 0
    len_list = []
    for seq_id in out_seqs_dic:
        seq_len = len(out_seqs_dic[seq_id])
        called_reg_size += seq_len
        len_list.append(seq_len)

    # Length statistics.
    reg_len_median = statistics.median(len_list)
    reg_len_mean = statistics.mean(len_list)
    reg_len_mean = round(reg_len_mean, 2)
    reg_len_min = min(len_list)
    reg_len_max = max(len_list)

    """
    ====================================
    RUN SEQUENCE MOTIF SEARCH WITH FIMO.
    ====================================
    
    """
    fimo_hits_list = []
    call_dic = {}

    if seq_rbps_dic:

        """
        Print motifs to file.

        """

        print("Output motifs to XML ... ")
        out_str, c_added_motifs = benchlib.blocks_to_xml_string(seq_motif_blocks_dic, loaded_motif_ids_dic)

        benchlib.output_string_to_file(out_str, seq_motifs_xml)


        """
        Run FIMO on sequences + motifs.

        """

        print("Run FIMO ... ")
        benchlib.run_fast_fimo(filtered_sites_fa, seq_motifs_xml, fimo_res_tsv,
                    pval_thr=args.fimo_pval,
                    nt_freqs_file=fimo_freqs_file,
                    call_dic=call_dic,
                    params=fimo_params,
                    error_check=False)

        """
        Read in FIMO hits.

        """

        assert os.path.exists(fimo_res_tsv), "FIMO output file fimo.tsv %s does not exist! There must have been an error while running FIMO (invalid xml file provided?)" %(fimo_res_tsv)

        print("Read in FIMO results ... ")
        fimo_hits_list = benchlib.read_in_fimo_results(fimo_res_tsv,
                                                       only_best_hits=args.greatest_hits)

        c_fimo_hits = len(fimo_hits_list)
        print("# of FIMO motif hits:", c_fimo_hits)

        """
        If --regex is set, search for regex hits in sequences (stored in out_seqs_dic).

        """
        if args.regex:

            step_size_one = False
            if args.regex_search_mode == 1:
                step_size_one = True
            elif args.regex_search_mode == 2:
                step_size_one = False
            else:
                assert False, "invalid --regex-search-mode %i set" %(args.regex_search_mode)

            print("Run search for --regex \"%s\" ... " %(regex))
            regex_hits_list = benchlib.get_regex_hits(regex, regex_id, out_seqs_dic,
                                                      step_size_one=step_size_one,
                                                      # use_lookahead=args.regex_lookahead,
                                                      regex_type=regex_type,
                                                      regex_spacer_min=args.regex_spacer_min,
                                                      regex_spacer_max=args.regex_spacer_max,
                                                      regex_min_gc=args.regex_min_gc,
                                                      regex_max_gu=args.regex_max_gu,
                                                      use_motif_regex_id=args.motif_regex_id)

            c_regex_hits = len(regex_hits_list)
            print("# of regex hits:", c_regex_hits)

            # Add regex hits to fimo_hits_list.
            fimo_hits_list += regex_hits_list


    """
    =========================================
    RUN STRUCTURE MOTIF SEARCH WITH CMSEARCH.
    =========================================

    """
    cmsearch_hits_list = []

    if str_rbps_dic:
        
        print("Output covariance models to .cm ... ")
        benchlib.output_cm_blocks_to_file(str_motif_blocks_dic, loaded_motif_ids_dic, str_motifs_cm)

        # Run cmsearch.
        print("Run cmsearch ... ")
        cmsh_mode = ""
        if args.cmsearch_mode == 1:
            cmsh_mode = "--default"
        elif args.cmsearch_mode == 2:
            cmsh_mode = "--max"
        else:
            assert False, "invalid --cmsearch-mode %i set" %(args.cmsearch_mode)
        cmsh_params = "-g --tformat fasta --toponly --incT %s -T %s %s" %(args.cmsearch_bs, args.cmsearch_bs, cmsh_mode)
        benchlib.run_cmsearch(filtered_sites_fa, str_motifs_cm, cmsearch_res_txt,
                        error_check=False,
                        call_dic=call_dic,
                        params=cmsh_params) # or add --anytrunc and remove --g
        # Read in hits.
        print("Read in cmsearch results ... ")
        cmsearch_hits_list, c_cms_hits = benchlib.read_in_cmsearch_results(cmsearch_res_txt,
                                                                           only_best_hits=args.greatest_hits,
                                                                           check=True)

        print("# of cmsearch motif hits:", c_cms_hits)


    """
    Store for each RBP the regions with motif hits (and hit counts), using
    dictionary of dictionaries regions_with_motifs_dic.
    This tells us, how many input regions have motif hits, separated by RBP.
    Also store for each RBP the unique motif hit regions (and hit counts), using
    dictionary of dictionaries unique_motifs_dic.

    regions_with_motifs_dic:
        Dictionary of dictionaries, format:
        {rbp_id1 -> {'region1': motif_c_region1, 'region2': motif_c_region2}, rbp_id2 -> {'region1': motif_c_region1}}
    unique_motifs_dic:
        Dictionary of dictionaries, format:
        {rbp_id1 -> {'motif_region1': c_motif_region1, 'motif_region2': c_motif_region2}, rbp_id2 -> .. }

    """

    regions_with_motifs_dic = {}
    unique_motifs_dic = {}

    # Store regions with sequence motifs.
    for fh in fimo_hits_list:

        rbp_id = id2name_dic[fh.motif_id]

        if rbp_id in regions_with_motifs_dic:
            # fh.seq_name : FASTA header (== --in genomic sequence region).
            if fh.seq_name in regions_with_motifs_dic[rbp_id]:
                regions_with_motifs_dic[rbp_id][fh.seq_name] += 1
            else:
                regions_with_motifs_dic[rbp_id][fh.seq_name] = 1
        else:
            regions_with_motifs_dic[rbp_id] = {}
            regions_with_motifs_dic[rbp_id][fh.seq_name] = 1

        fh_str = repr(fh) # genomic motif region string.

        # Unique motif regions for each RBP.
        if rbp_id in unique_motifs_dic:
            if fh_str in unique_motifs_dic[rbp_id]:
                unique_motifs_dic[rbp_id][fh_str] += 1
            else:
                unique_motifs_dic[rbp_id][fh_str] = 1
        else:
            unique_motifs_dic[rbp_id] = {}
            unique_motifs_dic[rbp_id][fh_str] = 1

    # Store regions with structure motifs.
    for cmsh in cmsearch_hits_list:

        rbp_id = id2name_dic[cmsh.motif_id]

        if rbp_id in regions_with_motifs_dic:
            # cmsh.seq_name : FASTA header (== --in genomic sequence region).
            if cmsh.seq_name in regions_with_motifs_dic[rbp_id]:
                regions_with_motifs_dic[rbp_id][cmsh.seq_name] += 1
            else:
                regions_with_motifs_dic[rbp_id][cmsh.seq_name] = 1
        else:
            regions_with_motifs_dic[rbp_id] = {}
            regions_with_motifs_dic[rbp_id][cmsh.seq_name] = 1

        cmsh_str = repr(cmsh) # genomic motif region string.

        # Unique motif regions for each RBP.
        if rbp_id in unique_motifs_dic:
            if cmsh_str in unique_motifs_dic[rbp_id]:
                unique_motifs_dic[rbp_id][cmsh_str] += 1
            else:
                unique_motifs_dic[rbp_id][cmsh_str] = 1
        else:
            unique_motifs_dic[rbp_id] = {}
            unique_motifs_dic[rbp_id][cmsh_str] = 1

    """
    Store infos for each RBP in RBP class.

    search_rbps_dic[rbp_id] = rbp_class
    RBP class arguments:
            name: str,
            seq_motif_ids = None,
            str_motif_ids = None,
            c_hit_reg = 0, # # regions with motif hits.
            perc_hit_reg = 0.0, # % hit regions over all regions (i.e. how many input regions contain >= 1 RBP motif).
            c_motif_hits = 0, # # motif hits.
            c_uniq_motif_hits = 0, # # unique motif hits.
            c_uniq_motif_nts = 0, # # unique motif nucleotides.
            perc_uniq_motif_nts_eff_reg = 0.0, # % unique motif nts over effective region length.
            perc_uniq_motif_nts_cal_reg = 0.0, # % unique motif nts over called region length.
            uniq_motif_hits_eff_1000nt = 0.0, # unique motif hits per effective 1000 nt.
            uniq_motif_hits_cal_1000nt = 0.0, # unique motif hits per called 1000 nt.
            ks_pval = 1.0, # Kolmogorov-Smirnov (KS) statistic p-value (are higher scoring sites enriched with motifs).
            ks_stat = 0.0,
            organism: Optional[str] = None

    Number of sequences for FIMO / cmsearch:
    c_regions

    """
    # Set number of no-hit regions.
    for rbp_id in search_rbps_dic:
        search_rbps_dic[rbp_id].c_no_hit_reg = c_regions

    for rbp_id in regions_with_motifs_dic:
        # Number of --in regions with RBP motif hits.
        c_hit_reg = len(regions_with_motifs_dic[rbp_id])
        # Count hit regions if --unstranded and not --unstranded-ct.
        if args.unstranded and not args.unstranded_ct:
            c_hit_reg = 0
            seen_ids_dic = {}
            for seq_id in regions_with_motifs_dic[rbp_id]:
                core_id = benchlib.reg_get_core_id(seq_id)
                if core_id not in seen_ids_dic:
                    c_hit_reg += 1
                    seen_ids_dic[core_id] = 1

        # Number of motif hits on --in regions in total.
        c_motif_hits = 0
        for reg_id in regions_with_motifs_dic[rbp_id]:
            c_motif_hits += regions_with_motifs_dic[rbp_id][reg_id]
        search_rbps_dic[rbp_id].c_hit_reg = c_hit_reg
        search_rbps_dic[rbp_id].c_no_hit_reg = c_regions - c_hit_reg
        search_rbps_dic[rbp_id].c_motif_hits = c_motif_hits

        # % hit regions over all regions (i.e. how many input regions contain >= 1 RBP motif).
        search_rbps_dic[rbp_id].perc_hit_reg = (search_rbps_dic[rbp_id].c_hit_reg / c_regions) * 100

    """
    Get unique motif hits.

    unique_motifs_dic:
        Dictionary of dictionaries, format:
        {rbp_id1 -> {'motif_region1': c_motif_region1, 'motif_region2': c_motif_region2}, rbp_id2 -> .. }
    """

    for rbp_id in unique_motifs_dic:
        c_uniq_motif_hits = len(unique_motifs_dic[rbp_id])
        search_rbps_dic[rbp_id].c_uniq_motif_hits = c_uniq_motif_hits
        # Store individual motif unique hits.
        for motif_str_repr in unique_motifs_dic[rbp_id]:
            motif_id = benchlib.get_motif_id_from_str_repr(motif_str_repr)
            idx = motif_id2idx_dic[motif_id]
            if id2type_dic[motif_id] == "meme_xml":
                search_rbps_dic[rbp_id].seq_motif_hits[idx] += 1
            elif id2type_dic[motif_id] == "regex":
                search_rbps_dic[rbp_id].seq_motif_hits[idx] += 1
            elif id2type_dic[motif_id] == "cm":
                search_rbps_dic[rbp_id].str_motif_hits[idx] += 1
            else:
                assert False, "unknown motif type (\"%s\") set for motif_id %s" %(id2type_dic[motif_id], motif_id)

    """
    Number of motif nucleotides over called + effective region size.

    """

    print("Calculate effective motif region sizes for each RBP ... ")
    for rbp_id in unique_motifs_dic:
        # Output unique motif hit regions (sequence or structure) to BED for RBP rbp_id.
        benchlib.output_motif_hits_to_bed(rbp_id, unique_motifs_dic, out_tmp_bed,
                                          one_based_start=True)
        # Calculate effective motif region size.
        eff_motif_reg_size = benchlib.get_uniq_gen_size(out_tmp_bed)

        # Number of unique motif nucleotides.
        search_rbps_dic[rbp_id].c_uniq_motif_nts = eff_motif_reg_size
        # % unique motif nts over effective region length.
        search_rbps_dic[rbp_id].perc_uniq_motif_nts_eff_reg = (eff_motif_reg_size / eff_reg_size) * 100
        # % unique motif nts over called region length.
        search_rbps_dic[rbp_id].perc_uniq_motif_nts_cal_reg = (eff_motif_reg_size / called_reg_size) * 100
        # Number of unique motif hits per effective 1000 nt.
        search_rbps_dic[rbp_id].uniq_motif_hits_eff_1000nt  = search_rbps_dic[rbp_id].c_uniq_motif_hits / (eff_reg_size / 1000)
        # Number of unique motif hits per called 1000 nt.
        search_rbps_dic[rbp_id].uniq_motif_hits_cal_1000nt  = search_rbps_dic[rbp_id].c_uniq_motif_hits / (called_reg_size / 1000)

    """
    # Print RBP object stats.
    for rbp_id in search_rbps_dic:
        print(search_rbps_dic[rbp_id].__dict__)

    """

    # print(search_rbps_dic["AGGF1"].__dict__)
    print("# --in regions for motif search:", c_regions)
    print("Called genomic region size:     ", called_reg_size)
    print("Effective genomic region size:  ", eff_reg_size)


    """
    Some motif enrichment tests.

    Kolmogorov-Smirnov test:
    conda install -c conda-forge scipy
    from scipy.stats import ks_2samp
    scipy.stats.ks_2samp()
    https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ks_2samp.html
    Performs the two-sample Kolmogorov-Smirnov test for goodness of fit.    
    This test compares the underlying continuous distributions F(x) and G(x) of 
    two independent samples. See Notes for a description of the available null 
    and alternative hypotheses.

    Changing log2 FC to FC results in same p-values ...

    alternative: two-sided, greater, less
    
    Alternatively:
    Wilcoxon signed-rank test:
    https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wilcoxon.html
    alternative: two-sided, greater, less
    This however needs same input sizes (x,y)
    Alternatively, use Wilcoxon rank-sum test (Mann-Whitney U test)

    """

    # print("Calculate Kolmogorov-Smirnov (KS) statistics ... ")
    print("Calculate Wilcoxon rank-sum test statistics ... ")

    # Wilcoxon rank-sum test / Mann Whitney U test mode.
    wrs_alt_hypo = "greater"
    if args.wrs_mode == 1:
        wrs_alt_hypo = "greater"
        print("Check if motif-containing regions have significantly higher scores ... ")
    elif args.wrs_mode == 2:
        wrs_alt_hypo = "less"
        print("Check if motif-containing regions have significantly lower scores ... ")
    else:
        assert False, "Invalid Wilcoxon rank-sum (Mann Whitney U) test mode: %i" %(args.wrs_mode)

    # Check if all scores same (e.g. 0).
    reg_scores_dic = {}
    for reg_id in reg2sc_dic:
        reg_scores_dic[reg2sc_dic[reg_id]] = 1
    if len(reg_scores_dic) == 1:
        print("WARNING: all site scores identical. Reported p-values meaningless! (i.e., equal 1.0)")

    wc_pval_dic = {}
    wc_rbc_es_dic = {}
    wc_cl_es_dic = {}

    for rbp_id in regions_with_motifs_dic:
        hit_reg_scores = []
        non_hit_reg_scores = []
        seen_ids_dic = {}
        # CHECK?
        for reg_id in reg2sc_dic:
            reg_sc = reg2sc_dic[reg_id]  # float value.

            # If --unstranded, count regions only once as hit or no hit.
            if args.unstranded and not args.unstranded_ct:
                core_id = benchlib.reg_get_core_id(reg_id)
                reg_id1 = core_id + "(+)"
                reg_id2 = core_id + "(-)"
                if core_id not in seen_ids_dic:
                    if reg_id1 in regions_with_motifs_dic[rbp_id] or reg_id2 in regions_with_motifs_dic[rbp_id]:
                        hit_reg_scores.append(reg_sc)
                    else:
                        non_hit_reg_scores.append(reg_sc)
                    seen_ids_dic[core_id] = 1

            else:
                if reg_id in regions_with_motifs_dic[rbp_id]:
                    hit_reg_scores.append(reg_sc)
                else:
                    non_hit_reg_scores.append(reg_sc)

        # print("rbp_id:", rbp_id)
        # print("hit_reg_scores:", hit_reg_scores)
        # print("non_hit_reg_scores:", non_hit_reg_scores)
        # print("# hit regions:    ", len(hit_reg_scores))
        # print("# non-hit regions:", len(non_hit_reg_scores))

        # Calculate the KS statistic and p-value
        # ks_stat, ks_pval = ks_2samp(hit_reg_scores, non_hit_reg_scores, alternative="less")

        wc_pval = 1.0
        # In case no regions without motif hits.
        if not non_hit_reg_scores:
            print("WARNING: all input regions contain %s motifs. Adding dummy scores (median) ... " %(rbp_id))
            dummy_val = statistics.median(hit_reg_scores)
            hit_reg_scores.append(dummy_val)
            non_hit_reg_scores.append(dummy_val)

        # Wilcoxon rank-sum test (Mann-Whitney U test).
        if unique_motifs_dic[rbp_id]:
            wc_stat, wc_pval = mannwhitneyu(hit_reg_scores, non_hit_reg_scores, alternative=wrs_alt_hypo)
            # wc_stat_less, wc_pval_less = mannwhitneyu(hit_reg_scores, non_hit_reg_scores, alternative="less")

            rbc_eff_size, cl_eff_size = benchlib.get_eff_sizes(hit_reg_scores, non_hit_reg_scores, wc_stat,
                                                               round=True, round_n=4)

            # rbc_eff_size = benchlib.calc_rbc_effect_size(c_hit_reg, c_non_hit_reg, wc_stat)
            # cl_eff_size = benchlib.calc_cl_effect_size(c_hit_reg, c_non_hit_reg, wc_stat)

            # print("n1:", len(hit_reg_scores))
            # print("n2:", len(non_hit_reg_scores))
            # print("wc_stat:", wc_stat)
            # print("wc_pval:", wc_pval)
            # print("rbc_eff_size:", rbc_eff_size)

            # Round p-values and effect size.
            wc_pval = benchlib.round_to_n_significant_digits_v2(wc_pval, 4)
            # rbc_eff_size = benchlib.round_to_n_significant_digits_v2(rbc_eff_size, 4)
            # cl_eff_size = benchlib.round_to_n_significant_digits_v2(cl_eff_size, 4)

            #print("hit_reg_scores:", hit_reg_scores)
            #print("non_hit_reg_scores:", non_hit_reg_scores)
            # search_rbps_dic[rbp_id].ks_pval = ks_pval
            search_rbps_dic[rbp_id].wc_pval = wc_pval
            search_rbps_dic[rbp_id].wc_rbc_es = rbc_eff_size
            search_rbps_dic[rbp_id].wc_cl_es = cl_eff_size
            # search_rbps_dic[rbp_id].wc_pval_less = wc_pval_less
            wc_pval_dic[rbp_id] = wc_pval
            wc_rbc_es_dic[rbp_id] = rbc_eff_size
            wc_cl_es_dic[rbp_id] = cl_eff_size
            
        # print("KS p-value (%s):" %(rbp_id), p_value)


    # Print out RBPs sorted by Wilcoxon p-value.
    sorted_wc_pval_dic = dict(sorted(wc_pval_dic.items(), key=lambda item: item[1], reverse=False))
    print("Compact hit stats (RBP ID, # unique hits, Wilcoxon p-value, effect sizes (RBC, CL)):")
    for rbp_id, p_value in sorted_wc_pval_dic.items():
        # print("KS p-value (%s):" %(rbp_id), p_value)
        # print(rbp_id, ":", p_value)
        c_hits = search_rbps_dic[rbp_id].c_uniq_motif_hits
        rbc_eff_size = wc_rbc_es_dic[rbp_id]
        cl_eff_size = wc_cl_es_dic[rbp_id]
        # ks_pval = search_rbps_dic[rbp_id].ks_pval
        # wc_pval_less = search_rbps_dic[rbp_id].wc_pval_less
        # print("RBP:", rbp_id, "# hits:", c_uniq_motif_hits, "KS p-value:", p_value)
        # print("%s\t%i\t%s\t%s" %(rbp_id, c_hits, str(p_value), str(wc_pval_less)))
        print("%s\t%i\t%s (%s, %s)" %(rbp_id, c_hits, str(p_value), str(rbc_eff_size), str(cl_eff_size)))
    print("")
    """
    Output RBP hit stats (ie one row per RBP).

    Output clowns:
    rbp_id
    c_regions
    called_reg_size
    effective_reg_size
    c_reg_with_hits
    perc_reg_with_hits
    c_motif_hits
    c_uniq_motif_hits
    c_uniq_motif_nts
    perc_uniq_motif_nts_cal_reg
    perc_uniq_motif_nts_eff_reg
    uniq_motif_hits_cal_1000nt
    uniq_motif_hits_eff_1000nt
    wc_pval
    seq_motif_ids
    seq_motif_hits
    str_motif_ids
    str_motif_hits

    """

    rbp_list = []

    OUTSTATS = open(rbp_stats_out, "w")
    rbp_stats_header = "data_id\tmethod_id\trun_id\tmotif_db\trbp_id\tc_regions\tmean_reg_len\tmedian_reg_len\tmin_reg_len\tmax_reg_len\t"
    rbp_stats_header += "called_reg_size\teffective_reg_size\tc_reg_with_hits\tperc_reg_with_hits\t"
    rbp_stats_header += "c_motif_hits\tc_uniq_motif_hits\tc_uniq_motif_nts\tperc_uniq_motif_nts_cal_reg\tperc_uniq_motif_nts_eff_reg\tuniq_motif_hits_cal_1000nt\t"
    rbp_stats_header += "uniq_motif_hits_eff_1000nt\twc_pval\twc_rbc_eff_size\twc_cl_eff_size\tseq_motif_ids\tseq_motif_hits\tstr_motif_ids\tstr_motif_hits\tinternal_id\n"
    OUTSTATS.write(rbp_stats_header)

    for rbp_id in search_rbps_dic:

        # print(search_rbps_dic[rbp_id].__dict__)
        rbp_list.append(rbp_id)

        motif_db_out = loaded_rbps_dic[rbp_id]

        c_reg_with_hits = search_rbps_dic[rbp_id].c_hit_reg
        perc_reg_with_hits = search_rbps_dic[rbp_id].perc_hit_reg
        c_motif_hits = search_rbps_dic[rbp_id].c_motif_hits
        c_uniq_motif_hits = search_rbps_dic[rbp_id].c_uniq_motif_hits
        c_uniq_motif_nts = search_rbps_dic[rbp_id].c_uniq_motif_nts
        perc_uniq_motif_nts_cal_reg = search_rbps_dic[rbp_id].perc_uniq_motif_nts_cal_reg
        perc_uniq_motif_nts_eff_reg = search_rbps_dic[rbp_id].perc_uniq_motif_nts_eff_reg
        uniq_motif_hits_cal_1000nt = search_rbps_dic[rbp_id].uniq_motif_hits_cal_1000nt
        uniq_motif_hits_eff_1000nt = search_rbps_dic[rbp_id].uniq_motif_hits_eff_1000nt
        wc_pval = search_rbps_dic[rbp_id].wc_pval
        wc_rbc_es = search_rbps_dic[rbp_id].wc_rbc_es
        wc_cl_es = search_rbps_dic[rbp_id].wc_cl_es
        internal_id = search_rbps_dic[rbp_id].internal_id

        seq_motif_hits = ",".join(str(hc) for hc in search_rbps_dic[rbp_id].seq_motif_hits)
        str_motif_hits = ",".join(str(hc) for hc in search_rbps_dic[rbp_id].str_motif_hits)
        seq_motif_ids = ",".join(search_rbps_dic[rbp_id].seq_motif_ids)
        str_motif_ids = ",".join(search_rbps_dic[rbp_id].str_motif_ids)
        if not seq_motif_hits:
            seq_motif_hits = "-"
        if not str_motif_hits:
            str_motif_hits = "-"
        if not seq_motif_ids:
            seq_motif_ids = "-"
        if not str_motif_ids:
            str_motif_ids = "-"

        row_str = args.data_id + "\t"
        row_str += args.method_id + "\t"
        row_str += run_id + "\t"
        row_str += motif_db_out + "\t"
        row_str += rbp_id + "\t"

        row_str += str(c_regions) + "\t"

        row_str += str(reg_len_mean) + "\t"
        row_str += str(reg_len_median) + "\t"
        row_str += str(reg_len_min) + "\t"
        row_str += str(reg_len_max) + "\t"

        row_str += str(called_reg_size) + "\t"
        row_str += str(eff_reg_size) + "\t"

        row_str += str(c_reg_with_hits) + "\t"
        row_str += str(perc_reg_with_hits) + "\t"

        row_str += str(c_motif_hits) + "\t"
        row_str += str(c_uniq_motif_hits) + "\t"
        row_str += str(c_uniq_motif_nts) + "\t"
        row_str += str(perc_uniq_motif_nts_cal_reg) + "\t"
        row_str += str(perc_uniq_motif_nts_eff_reg) + "\t"
        row_str += str(uniq_motif_hits_cal_1000nt) + "\t"
        row_str += str(uniq_motif_hits_eff_1000nt) + "\t"
        row_str += str(wc_pval) + "\t"
        row_str += str(wc_rbc_es) + "\t"
        row_str += str(wc_cl_es) + "\t"
        row_str += seq_motif_ids + "\t"
        row_str += seq_motif_hits + "\t"
        row_str += str_motif_ids + "\t"
        row_str += str_motif_hits + "\t"
        row_str += internal_id + "\n"

        OUTSTATS.write(row_str)

    OUTSTATS.close()

    """
    Region ID list.
    
    """
    reg_ids_list = []
    reg_ids_dic = {}
    if args.unstranded and not args.unstranded_ct:
        for seq_id, seq in sorted(out_seqs_dic.items()):
            core_id = benchlib.reg_get_core_id(seq_id)
            reg_ids_list.append(core_id)
            reg_ids_dic[core_id] = 1
    else:
        for seq_id, seq in sorted(out_seqs_dic.items()):
            reg_ids_list.append(seq_id)
            reg_ids_dic[seq_id] = 1

    rbp_list.sort()
    len_rbp_list = len(rbp_list)
    # Store rbp_id -> for each region if hit: 1, else: 0, i.e.: [1,0,0,0,0]
    reg_hits_dic = {}
    add_count = False # Add # of motif hits in region or just add 1 (if False)

    # RBP ID to index mapping.
    rbp2idx_dic = {}
    idx2rbp_dic = {}
    print("Get RBP region occupancies ... ")
    for idx, rbp_id in enumerate(rbp_list):
        rbp2idx_dic[rbp_id] = idx
        idx2rbp_dic[idx] = rbp_id
        # Region has hits yes(1)/no(0).
        hit_list = []
        for reg_id in reg_ids_list:
            if args.unstranded and not args.unstranded_ct:
                reg_id1 = reg_id + "(+)"
                reg_id2 = reg_id + "(-)"
                if rbp_id in regions_with_motifs_dic and (reg_id1 in regions_with_motifs_dic[rbp_id] or reg_id2 in regions_with_motifs_dic[rbp_id]):
                    hit_list.append(1)
                else:
                    hit_list.append(0)
            else:
                if rbp_id in regions_with_motifs_dic and reg_id in regions_with_motifs_dic[rbp_id]:
                    if add_count:
                        hit_list.append(regions_with_motifs_dic[rbp_id][reg_id])
                    else:
                        hit_list.append(1)
                else:
                    hit_list.append(0)
        reg_hits_dic[rbp_id] = hit_list

    """
    Output occupancies.

    reg_hits_dic[rbp_id] = [0,1,0,0, ...]
    reg_ids_list = [reg_id1, reg_id2, ... ]

    rbp2regidx_dic:
        # rbp_id -> 0-based indexes of occupied regions, e.g. [0, 3, 12, 88, 114] 

    """
    rbp2regidx_dic = {}
    OUTOCC = open(rbp_reg_occ_table_out, "w")

    occ_header = r"#region_id \ rbp_id"
    for rbp_id, hit_list in sorted(reg_hits_dic.items()):
        occ_header += "\t%s" %(rbp_id)
        rbp2regidx_dic[rbp_id] = []
        for idx, label in enumerate(hit_list):
            if label:  # if occupied (i.e. 1-label).
                rbp2regidx_dic[rbp_id].append(idx)
    OUTOCC.write("%s\n" %(occ_header))
    for idx, reg_id in enumerate(reg_ids_list):
        occ_row = "%s" %(reg_id)
        for rbp_id, hit_list in sorted(reg_hits_dic.items()):
            occ_row += "\t%i" %(hit_list[idx])
        OUTOCC.write("%s\n" %(occ_row))
    OUTOCC.close()

    # occ_header = "#rbp_id \ region_id"
    # for reg_id in reg_ids_list:
    #     occ_header += "\t%s" %(reg_id)
    # OUTOCC.write("%s\n" %(occ_header))
    # for rbp_id in reg_hits_dic:
    #     rbp2regidx_dic[rbp_id] = []
    #     occ_row = "%s" %(rbp_id)
    #     for idx, label in enumerate(reg_hits_dic[rbp_id]):
    #         occ_row += "\t%i" %(label)
    #         if label:  # if occupied (i.e. 1-label).
    #             rbp2regidx_dic[rbp_id].append(idx)
    #     OUTOCC.write("%s\n" %(occ_row))
    # OUTOCC.close()

    """
    Store RBP binding information for each input region.
    Format region_rbp_binds_dic:
    region_id -> [False, True, False ... ]
    with list number of RBP IDs (len_rbp_list), alphabetically sorted.
    Format region_rbp_motif_pos_dic:
    Region ID -> "motif_id:start_1based:end_1based:p_value/-(bit_score)"
    E.g.
    region_rbp_motif_pos_dic["reg1"] = ["rbp1_m1:98:102:0.01", "rbp1_m1:110:115:0.1", "rbp1_m2:110:115:0.05", ...]
    region2motif_hits_dic:
    region_id -> [motif_hit1, motif_hit2, ...] with motif_hit format: "motif_id:seq_start-seq_end"

    """
    region_rbp_binds_dic = {}
    region_rbp_motif_pos_dic = {}
    region2motif_hits_dic = {}  # For plotting.
    rid2rbpidx2hcp_dic = {}  # region_id -> rbp_idx -> motif hit center position(s)

    # Checks (make sure use_region_ids=True in bed_filter_extend_bed() function).
    for reg_id in out_seqs_dic:
        assert reg_id in reg2sc_dic, "region ID \"%s\" from out_seqs_dic not found in reg2sc_dic" %(reg_id)
        if args.unstranded and not args.unstranded_ct:
            core_id = benchlib.reg_get_core_id(reg_id)  # If --unstranded (and not --unstranded-ct), get core ID to count region as one.
            if core_id not in region_rbp_binds_dic:
                region_rbp_binds_dic[core_id] = [False]*len_rbp_list
                region_rbp_motif_pos_dic[core_id] = []
                rid2rbpidx2hcp_dic[core_id] = {}
        else:
            region_rbp_binds_dic[reg_id] = [False]*len_rbp_list
            region_rbp_motif_pos_dic[reg_id] = []
            region2motif_hits_dic[reg_id] = []
            rid2rbpidx2hcp_dic[reg_id] = {}

    """
    Output motif region stats (1 row for each motif hit).
    Report ALL motif hits,
    plus report how many times one genomic motif hit occurs (uniq_count). 

    """

    OUTSTATS = open(motif_stats_out, "w")

    motif_stats_header = "data_id\tmethod_id\trun_id\tmotif_db\tregion_id\trbp_id\tmotif_id\tchr_id\tgen_s\tgen_e\tstrand\tregion_s\tregion_e\tregion_len\t"
    motif_stats_header += "uniq_count\tfimo_score\tfimo_pval\tcms_score\tcms_eval\tmatched_seq\tinternal_id\n"
    OUTSTATS.write(motif_stats_header)

    # Unique motif regions BED.
    motif_reg_dic = {}
    # Format: match_c_dic[rbp_id][motif_id][matched_seq] = count, counting only unique motif hits.
    match_c_dic = {}

    for rbp_id in search_rbps_dic:
        match_c_dic[rbp_id] = {}
        for motif_id in search_rbps_dic[rbp_id].seq_motif_ids:
            match_c_dic[rbp_id][motif_id] = {}
        for motif_id in search_rbps_dic[rbp_id].str_motif_ids:
            match_c_dic[rbp_id][motif_id] = {}

    # hit_id = "%s:%s-%s(%s)%s" %(cols[7], cols[8], cols[9], cols[10], cols[6])

    # fimo_hits_list also includes regex hits.
    for fh in fimo_hits_list:

        rbp_id = id2name_dic[fh.motif_id]
        region_id = fh.seq_name
        region_len = benchlib.get_length_from_seq_name(fh.seq_name)
        # genomic motif region string.
        fh_str = repr(fh)
        uniq_count = unique_motifs_dic[rbp_id][fh_str]
        # Store binding info of RBP in region.
        rbp_idx = rbp2idx_dic[rbp_id]

        # Motif hit string.
        motif_str = "%s:%i:%i:%s" %(fh.motif_id, fh.start, fh.end, str(fh.pval))
        # What gets displayed in hover box in violin plot.
        # motif_str_plot = "%s,%i-%i,%s" %(fh.motif_id, fh.seq_s, fh.seq_e, str(fh.pval))
        motif_str_plot = "%s:%i-%i" %(fh.motif_id, fh.seq_s, fh.seq_e)
        # Hit type.
        hit_type = fh.hit_type

        # Center position of motif hit.
        motif_hit_s = fh.seq_s - 1
        motif_hit_e = fh.seq_e
        center_pos = benchlib.get_center_position(motif_hit_s, motif_hit_e)
        
        if args.unstranded and not args.unstranded_ct:
            core_id = benchlib.reg_get_core_id(region_id)
            region_rbp_binds_dic[core_id][rbp_idx] = True
            region_rbp_motif_pos_dic[core_id].append(motif_str)
            region2motif_hits_dic[core_id].append(motif_str_plot)
            if rbp_idx not in rid2rbpidx2hcp_dic[core_id]:
                rid2rbpidx2hcp_dic[core_id][rbp_idx] = [center_pos]
            else:
                rid2rbpidx2hcp_dic[core_id][rbp_idx].append(center_pos)
        else:
            region_rbp_binds_dic[region_id][rbp_idx] = True
            region_rbp_motif_pos_dic[region_id].append(motif_str)
            region2motif_hits_dic[region_id].append(motif_str_plot)
            if rbp_idx not in rid2rbpidx2hcp_dic[region_id]:
                rid2rbpidx2hcp_dic[region_id][rbp_idx] = [center_pos]
            else:
                rid2rbpidx2hcp_dic[region_id][rbp_idx].append(center_pos)

        motif_db_out = loaded_motif_ids_dic[fh.motif_id]

        internal_id = search_rbps_dic[rbp_id].internal_id

        # Get matched sequence.
        matched_seq = benchlib.get_matched_seq(fh.seq_name, out_seqs_dic, fh.seq_s, fh.seq_e)
        if not matched_seq:
            matched_seq = "-"

        # Store motif hit as BED.
        hit_id = "%s:%s-%s(%s)%s" %(fh.chr_id, str(fh.start), str(fh.end), fh.strand, fh.motif_id)
        if hit_id not in motif_reg_dic:
            if fh.hit_type == "str_pat":
                bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t-1.0\t-1.0\t%s\t%s\t-\t%s" %(fh.chr_id, fh.start-1, fh.end, rbp_id, fh.motif_id, uniq_count, args.method_id, args.data_id, fh.strand, str(fh.gc_frac), str(fh.gu_frac), matched_seq)
            else:
                bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t%s\t%s\t-1.0\t-1.0\t-\t%s" %(fh.chr_id, fh.start-1, fh.end, rbp_id, fh.motif_id, uniq_count, args.method_id, args.data_id, fh.strand, str(fh.score), str(fh.pval), matched_seq)
            motif_reg_dic[hit_id] = bed_row
            if rbp_id not in match_c_dic:
                match_c_dic[rbp_id] = {}
            if fh.motif_id not in match_c_dic[rbp_id]:
                match_c_dic[rbp_id][fh.motif_id] = {}
            if matched_seq not in match_c_dic[rbp_id][fh.motif_id]:
                match_c_dic[rbp_id][fh.motif_id][matched_seq] = 0
            match_c_dic[rbp_id][fh.motif_id][matched_seq] += 1

        row_str = args.data_id + "\t"
        row_str += args.method_id + "\t"
        row_str += run_id + "\t"
        row_str += motif_db_out + "\t"
        row_str += region_id + "\t"
        row_str += rbp_id + "\t"
        row_str += fh.motif_id + "\t"
        row_str += fh.chr_id + "\t"
        row_str += str(fh.start) + "\t"  # 1-based.
        row_str += str(fh.end) + "\t"
        row_str += fh.strand + "\t"
        row_str += str(fh.seq_s) + "\t"  # 1-based.
        row_str += str(fh.seq_e) + "\t"
        row_str += str(region_len) + "\t"
        row_str += str(uniq_count) + "\t"
        row_str += str(fh.score) + "\t"
        row_str += str(fh.pval) + "\t"
        row_str += "-\t"
        row_str += "-\t"
        row_str += matched_seq + "\t"
        row_str += internal_id + "\n"

        OUTSTATS.write(row_str)

    for cmsh in cmsearch_hits_list:

        rbp_id = id2name_dic[cmsh.motif_id]
        region_id = cmsh.seq_name
        region_len = benchlib.get_length_from_seq_name(cmsh.seq_name)
        # genomic motif region string.
        cmsh_str = repr(cmsh) 
        uniq_count = unique_motifs_dic[rbp_id][cmsh_str]
        # Store binding info of RBP in region.
        rbp_idx = rbp2idx_dic[rbp_id]

        # Motif hit string.
        motif_str = "%s:%i:%i:%s" %(cmsh.motif_id, cmsh.start, cmsh.end, str(-1*cmsh.score))
        # motif_str_plot = "%s,%i-%i,%s" %(cmsh.motif_id, cmsh.seq_s, cmsh.seq_e, str(cmsh.score))
        motif_str_plot = "%s:%i-%i" %(cmsh.motif_id, cmsh.seq_s, cmsh.seq_e)

        # Center position of motif hit.
        motif_hit_s = cmsh.seq_s - 1
        motif_hit_e = cmsh.seq_e
        center_pos = benchlib.get_center_position(motif_hit_s, motif_hit_e)
        
        if args.unstranded and not args.unstranded_ct:
            core_id = benchlib.reg_get_core_id(region_id)
            region_rbp_binds_dic[core_id][rbp_idx] = True
            region_rbp_motif_pos_dic[core_id].append(motif_str)
            region2motif_hits_dic[core_id].append(motif_str_plot)
            if rbp_idx not in rid2rbpidx2hcp_dic[core_id]:
                rid2rbpidx2hcp_dic[core_id][rbp_idx] = [center_pos]
            else:
                rid2rbpidx2hcp_dic[core_id][rbp_idx].append(center_pos)
        else:
            region_rbp_binds_dic[region_id][rbp_idx] = True
            region_rbp_motif_pos_dic[region_id].append(motif_str)
            region2motif_hits_dic[region_id].append(motif_str_plot)
            if rbp_idx not in rid2rbpidx2hcp_dic[region_id]:
                rid2rbpidx2hcp_dic[region_id][rbp_idx] = [center_pos]
            else:
                rid2rbpidx2hcp_dic[region_id][rbp_idx].append(center_pos)

        motif_db_out = loaded_motif_ids_dic[cmsh.motif_id]

        internal_id = search_rbps_dic[rbp_id].internal_id

        # Get matched sequence.
        matched_seq = benchlib.get_matched_seq(cmsh.seq_name, out_seqs_dic, cmsh.seq_s, cmsh.seq_e)
        if not matched_seq:
            matched_seq = "-"

        # Store motif hit as BED.
        hit_id = "%s:%s-%s(%s)%s" %(cmsh.chr_id, str(cmsh.start), str(cmsh.end), cmsh.strand, cmsh.motif_id)
        if hit_id not in motif_reg_dic:
            bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t-1.0\t-1.0\t%s\t%s\t-\t%s" %(cmsh.chr_id, cmsh.start-1, cmsh.end, rbp_id, cmsh.motif_id, uniq_count, args.method_id, args.data_id, cmsh.strand, str(cmsh.score), str(cmsh.e_value), matched_seq)
            motif_reg_dic[hit_id] = bed_row
            if rbp_id not in match_c_dic:
                match_c_dic[rbp_id] = {}
            if cmsh.motif_id not in match_c_dic[rbp_id]:
                match_c_dic[rbp_id][cmsh.motif_id] = {}
            if matched_seq not in match_c_dic[rbp_id][cmsh.motif_id]:
                match_c_dic[rbp_id][cmsh.motif_id][matched_seq] = 0
            match_c_dic[rbp_id][cmsh.motif_id][matched_seq] += 1

        row_str = args.data_id + "\t"
        row_str += args.method_id + "\t"
        row_str += run_id + "\t"
        row_str += motif_db_out + "\t"
        row_str += region_id + "\t"
        row_str += rbp_id + "\t"
        row_str += cmsh.motif_id + "\t"
        row_str += cmsh.chr_id + "\t"
        row_str += str(cmsh.start) + "\t"
        row_str += str(cmsh.end) + "\t"
        row_str += cmsh.strand + "\t"
        row_str += str(cmsh.seq_s) + "\t"
        row_str += str(cmsh.seq_e) + "\t"
        row_str += str(region_len) + "\t"
        row_str += str(uniq_count) + "\t"
        row_str += "-\t"
        row_str += "-\t"
        row_str += str(cmsh.score) + "\t"
        row_str += str(cmsh.e_value) + "\t"
        row_str += matched_seq + "\t"
        row_str += internal_id + "\n"
        #print("region_id:", region_id)
        #print("evalue:", cmsh.e_value)
        #print(row_str)

        OUTSTATS.write(row_str)

    OUTSTATS.close()

    """
    Output motif hits as BED.

    The motif hits written to motif_hits_bed_out are unique motif hits already. 
    If same hit occurs > 1, this is recorded in BED column 4 with format:
    rbp_id:motif_id;uniq_count;method_id:data_id
    These also include regex hits, as they are part of fimo hits.
    
    """

    OUTBED = open(motif_hits_bed_out, "w")
    for hit_id in motif_reg_dic:
        OUTBED.write("%s\n" %(motif_reg_dic[hit_id]))
    OUTBED.close()


    """
    Output matched sequence counts in table. 

    Formats: 
    match_c_total_dic[rbp_id][motif_id] = count
    match_c_dic[rbp_id][motif_id][matched_seq] = count

    """

    OUTTSV = open(matched_seqs_out, "w")
    OUTTSV.write("rbp_id\tmotif_id\tmatched_seq\tmatch_count\tmatch_perc\n")

    match_c_total_dic = benchlib.get_match_c_total_dic(match_c_dic)

    for rbp_id in match_c_dic:
        for motif_id in match_c_dic[rbp_id]:
            total_c = match_c_total_dic[rbp_id][motif_id]
            for matched_seq in match_c_dic[rbp_id][motif_id]:
                match_c = match_c_dic[rbp_id][motif_id][matched_seq]
                match_perc = 0.0
                if match_c > 0:
                    match_perc = (float(match_c)/float(total_c)) * 100.0
                row_str = "%s\t%s\t%s\t%i\t%.2f\n" %(rbp_id, motif_id, matched_seq, match_c, match_perc)
                OUTTSV.write(row_str)
    OUTTSV.close()


    """
    Significance testing:
    Check for co-occurrences of motifs from different RBPs in regions.
    Use 2x2 contingency tables, and some test for significance,
    e.g. Fisher exact or Chi-squared

    region_rbp_binds_dic format:
    'chr20:62139082-62139128(-)': [False, False, False]
    ...
    rbp2idx_dic
    idx2rbp_dic

    from itertools import combinations
    Number of combinations (draw k from n elements, no order, no repetition)
    Binomialcoefficient: n over k, where n = #RBPs, and k = 2
    rbp_pairs = list(combinations(rbp_list, 2))

    make_contingency_table_2x2(region_labels_dic, idx1, idx2):
    https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.fisher_exact.html
                   List 1              Not in List 1
    List 2         A                   B
    Not in List 2  C                   D

    table = [[A, B], [C, D]]

    from scipy.stats import chi2_contingency
    stat, p, dof, expected = chi2_contingency(table)
    Note that chi2_contingency does not tolerate zero elements.

    """

    # Store p-values in 2d-list.
    pval_ll = []  # p-values (Fisher exact test).
    or_ll = []  # odds ratios.
    pval_cont_lll = []

    for rbp_id in rbp_list:
        pval_ll.append([1.0]*len_rbp_list)
        pval_cont_lll.append([]*len_rbp_list)
        # pval_cont_lll.append([["1.0","-", "-"]]*len_rbp_list)
        or_ll.append([0.0]*len_rbp_list)

    for i in range(len_rbp_list):
        for j in range(len_rbp_list):
            # Storing [p-value_str, pair_str, table_str, correlation_str].
            pval_cont_lll[i].append(["1.0", "-", "-", "-", "-", "-", "-", "", "", ""])  

    # Add p-values of 1.0 in diagonal.
    # for idx, rbp_id in enumerate(rbp_list):
    #     pval_ll[idx][idx] = 1.0
    #     pval_cont_lll[idx][idx] = 1.0

    rbp_pairs = list(combinations(rbp_list, 2))
    con_pval_dic = {}
    con_table_dic = {}
    pair_str_dic = {}

    # print("rbp2idx_dic:", rbp2idx_dic)
    # print("region_rbp_binds_dic:", region_rbp_binds_dic)
    # print("len(region_rbp_binds_dic):", len(region_rbp_binds_dic))
    # true_c = 0
    # for reg_id in region_rbp_binds_dic:
    #     for label in region_rbp_binds_dic[reg_id]:
    #         if label:
    #             true_c += 1
    # print("# TRUEs:", true_c)

    c_regions_with_hits = 0
    for reg_id in region_rbp_binds_dic:
        reg_hit = False
        for label in region_rbp_binds_dic[reg_id]:
            if label:
                reg_hit = True
        if reg_hit:
            c_regions_with_hits += 1
    print("# regions with hits (all motifs):", c_regions_with_hits)

    fisher_alt_hypo = "greater"
    if args.fisher_mode == 1:
        fisher_alt_hypo = "greater"
        print("Fisher mode = 1, reporting significantly overrepresented co-occurrences ... ")
    elif args.fisher_mode == 2:
        fisher_alt_hypo = "two-sided"
        print("Fisher mode = 2, reporting significantly over- AND underrepresented co-occurrences ... ")
    elif args.fisher_mode == 3:
        fisher_alt_hypo = "less"
        print("Fisher mode = 3, reporting significantly underrepresented co-occurrences ... ")
    else:
        assert False, "Invalid Fisher mode: %i" %(args.fisher_mode)

    p_val_list = []  # Fisher exact test p-values.

    print("Compute motif region co-occurrences between RBP pairs ... ")
    for pair in rbp_pairs:
        pair = list(pair)
        pair.sort()

        idx1 = rbp2idx_dic[pair[0]]
        idx2 = rbp2idx_dic[pair[1]]
        # pair_list = [pair[0], pair[1]]
        # pair_list.sort()
        pair_str = ",".join(pair)
        # pair_str_dic[pair_str] = [pair_list[0], pair_list[1]]
        pair_str_dic[pair_str] = [pair[0], pair[1]]

        # avg_min_dist and perc_close_hits = "-" if no common hit regions.
        table, avg_min_dist, perc_close_hits = benchlib.make_contingency_table_2x2_v2(
                                                       region_rbp_binds_dic, idx1, idx2,
                                                       rid2rbpidx2hcp_dic,
                                                       max_motif_dist=args.max_motif_dist)

        odds_ratio, p_value = fisher_exact(table, alternative=fisher_alt_hypo)

        # if p_value == 0:
        #   p_value = 2.2e-308
        #   print("PVALUE ZERO! (%s, odds_ratio: %s)" %(str(p_value), str(odds_ratio)))
        #   print("pair_str:", pair_str)
        #   print("table_str:", table_str)

        # con_pval_dic[pair_str] = p_value
        con_table_dic[pair_str] = table
        table_str = str(table)

        # print(pair_str, table_str, p_value)
        # print("Pair:", pair_str, "Odds ratio:", odds_ratio,"Fisher p-value:", p_value)

        # Corrected p-value.
        # corr_p_val = p_value * mult_test_corr_factor

        p_value_plotted = p_value
        p_val_list.append(p_value)
        # if p_value > cooc_pval_thr:
        #     p_value_plotted = 1.0

        pval_ll[idx1][idx2] = p_value_plotted
        pval_ll[idx2][idx1] = p_value_plotted
        pval_cont_lll[idx2][idx1][0] = str(p_value)
        pval_cont_lll[idx2][idx1][1] = str(p_value_plotted)
        pval_cont_lll[idx2][idx1][2] = pair_str
        pval_cont_lll[idx2][idx1][3] = table_str
        pval_cont_lll[idx2][idx1][4] = avg_min_dist
        pval_cont_lll[idx2][idx1][5] = perc_close_hits
        
        # pval_cont_lll[idx2][idx1][0] = str(p_value)
        # pval_cont_lll[idx2][idx1][1] = pair_str
        # pval_cont_lll[idx2][idx1][2] = table_str
        or_ll[idx1][idx2] = odds_ratio
        or_ll[idx2][idx1] = odds_ratio

    """
    Multiple testing correction.

    """

    cooc_pval_thr = args.cooc_pval_thr

    if args.cooc_pval_mode == 1:  # BH correction.

        pvals_corrected = false_discovery_control(p_val_list, method='bh')

        for i in range(len(p_val_list)):
            p_val_list[i] = pvals_corrected[i]
        
    elif args.cooc_pval_mode == 2:  # Bonferroni correction.

        # Multiple testing correction factor.
        mult_test_corr_factor = 1
        if len_rbp_list > 1:
            mult_test_corr_factor = (len_rbp_list*(len_rbp_list-1))/2

        cooc_pval_thr = args.cooc_pval_thr / mult_test_corr_factor
        cooc_pval_thr = benchlib.round_to_n_significant_digits_v2(cooc_pval_thr, 4)

    elif args.cooc_pval_mode == 3:  # No correction.

        cooc_pval_thr = args.cooc_pval_thr

    else:
        assert False, "Invalid co-occurrence p-value mode (--cooc-pval-mode) set: %i" %(args.cooc_pval_mode)

    args.cooc_pval_thr = cooc_pval_thr

    # Update + filter p-values.
    COSOUT = open(cooc_stats_out, "w")
    COSOUT.write("rbp_id1\trbp_id2\tc_1and2\tc_only2\tc_only1\tc_not1not2\tcooc_pval\tavg_min_dist\tperc_close_hits_%int\n" %(args.max_motif_dist))

    pv_idx = 0
    c_all_fisher_pval = 0
    c_sig_fisher_pval = 0

    for pair in rbp_pairs:
        pair = list(pair)
        pair.sort()

        idx1 = rbp2idx_dic[pair[0]]
        idx2 = rbp2idx_dic[pair[1]]

        pair_str = ",".join(pair)

        p_value = p_val_list[pv_idx]

        # Round p-values to 4 significant digits.
        p_value = benchlib.round_to_n_significant_digits_v2(p_value, 4)

        p_value_plotted = p_value

        avg_min_dist_str = pval_cont_lll[idx2][idx1][4]
        avg_min_dist = 10000
        if avg_min_dist_str != "-":
            avg_min_dist = float(avg_min_dist_str)

        if p_value > cooc_pval_thr:
            p_value_plotted = 1.0
            pval_cont_lll[idx2][idx1][7] = "(Filter: p-value > %s)<br>" %(str(cooc_pval_thr))
        
        if p_value <= cooc_pval_thr and avg_min_dist < args.min_motif_dist:
            p_value_plotted = 1.0
            pval_cont_lll[idx2][idx1][7] = "(Filter: mean minimum motif distance < %i)<br>" %(args.min_motif_dist)

        c_all_fisher_pval += 1
        if p_value <= cooc_pval_thr and avg_min_dist >= args.min_motif_dist:
            c_sig_fisher_pval += 1

        pval_ll[idx1][idx2] = p_value_plotted
        pval_ll[idx2][idx1] = p_value_plotted
        pval_cont_lll[idx2][idx1][0] = str(p_value)
        pval_cont_lll[idx2][idx1][1] = str(p_value_plotted)

        con_pval_dic[pair_str] = p_value

        pv_idx += 1

        # Get stats for output.
        rbp1 = pair[0]
        rbp2 = pair[1]
        con_table = con_table_dic[pair_str]
        perc_close_hits = pval_cont_lll[idx2][idx1][5]

        COSOUT.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" %(rbp1, rbp2, str(con_table[0][0]), str(con_table[0][1]), str(con_table[1][0]), str(con_table[1][1]), str(p_value), avg_min_dist_str, perc_close_hits))

    COSOUT.close()

    assert c_all_fisher_pval == len(p_val_list), "Number of p-values (%i) does not match number of RBP pairs (%i)" %(len(p_val_list), len(rbp_pairs))
    # Percentage rounded to 2 digits.
    perc_sig_fisher_pval = 0.0
    if c_all_fisher_pval > 0:
        perc_sig_fisher_pval = round((c_sig_fisher_pval/c_all_fisher_pval)*100, 2)

    print("# of RBP co-occurrence comparisons (calculated p-values in total): %i" %(c_all_fisher_pval))

    args.c_all_fisher_pval = c_all_fisher_pval
    args.c_sig_fisher_pval = c_sig_fisher_pval
    args.perc_sig_fisher_pval = perc_sig_fisher_pval

    min_motif_dist_info = ""
    if args.min_motif_dist > 0:
        min_motif_dist_info = " + --min-motif-dist >= %i" %(args.min_motif_dist)

    if args.cooc_pval_mode == 1:
        print("Number of significant co-occurrence p-values (BH corrected%s): %i (%.2f%%)" %(min_motif_dist_info, c_sig_fisher_pval, perc_sig_fisher_pval))
    elif args.cooc_pval_mode == 2:
        print("Number of significant co-occurrence p-values (Bonferroni corrected%s): %i (%.2f%%)" %(min_motif_dist_info, c_sig_fisher_pval, perc_sig_fisher_pval))
    elif args.cooc_pval_mode == 3:
        print("Number of significant co-occurrence p-values (no correction%s): %i (%.2f%%)" %(min_motif_dist_info, c_sig_fisher_pval, perc_sig_fisher_pval))

    """
    Print out RBPs sorted by KS p-value.

    """ 
    sorted_con_pval_dic = dict(sorted(con_pval_dic.items(), key=lambda item: item[1], reverse=False))

    print("Co-occurrence contingency table format: [A, B], [C, D]")
    print("A: RBP1 AND RBP2")
    print("B: NOT RBP1 AND RBP2")
    print("C: RBP1 AND NOT RBP2")
    print("D: NOT RBP1 AND NOT RBP2")

    if args.cooc_pval_mode == 1:
        print("Significance threshold: %s (on Benjamini-Hochberg corrected p-values)" %(str(cooc_pval_thr)))
    elif args.cooc_pval_mode == 2:
        print("Significance threshold: %s (Bonferroni corrected)" %(str(cooc_pval_thr)))
    elif args.cooc_pval_mode == 3:
        print("Significance threshold: %s" %(str(cooc_pval_thr)))
    else:
        assert False, "Invalid co-occurrence p-value mode (--cooc-pval-mode) set: %i" %(args.cooc_pval_mode)

    print("Co-occurrence of motifs (RBP IDs (RBP1,RBP2), # hits, contingency table, Fisher p-value)")

    c_reported = 0

    for pair_str, p_value in sorted_con_pval_dic.items():
        if p_value > cooc_pval_thr:
            break
        c_reported += 1
        rbp1 = pair_str_dic[pair_str][0]
        rbp2 = pair_str_dic[pair_str][1]
        con_table = con_table_dic[pair_str]
        rbp1_hits = search_rbps_dic[rbp1].c_uniq_motif_hits
        rbp2_hits = search_rbps_dic[rbp2].c_uniq_motif_hits
        print("%s\t%i,%i\t%s\t%s" %(pair_str, rbp1_hits, rbp2_hits, str(con_table), str(p_value)))

    if not c_reported:
        print("NO SIGNIFICANT CO-OCCURRENCES FOUND!")

    print("")

    # # Print table to file.
    # benchlib.output_con_table_results(con_res_out_tsv, pval_ll, rbp_list)

    """
    Calcuate correlations.

    Setting add_count = True changes correlations a bit (use motif hit counts 
    instead of 1 for any number of hits)

    """

    print("Calculate correlations ... ")
    # Correlation between RBPs dataframe.
    df = DataFrame(reg_hits_dic, columns=rbp_list)
    df_corr = df.corr(method='pearson')

    for i,rbp_i in enumerate(rbp_list):
        for j,rbp_j in enumerate(rbp_list):
            if j > i:
                pval_ll[i][j] = None

    # Fisher p-value dataframe.
    df_pval = DataFrame(pval_ll, columns=rbp_list, index=rbp_list)

    # Write None to upper-diagonal entries.
    for i,rbp_i in enumerate(rbp_list):
        for j,rbp_j in enumerate(rbp_list):
            if j > i:
                # df_corr.loc[rbp_i][rbp_j] = None  # FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!
                df_corr.loc[rbp_i, rbp_j] = None
            else:
                # Round correlation values if != 1.0.
                if df_corr.loc[rbp_i][rbp_j] == 1.0:
                    pval_cont_lll[i][j][6] = str(df_corr.loc[rbp_i][rbp_j])
                else:
                    pval_cont_lll[i][j][6] = "{:.8f}".format(df_corr.loc[rbp_i][rbp_j])

    for i,rbp_i in enumerate(rbp_list):
        for j,rbp_j in enumerate(rbp_list):
            if j > i:
                df_pval.loc[rbp_i, rbp_j] = None

    # for i in range(len(rbp_list)):
    #     for j in range(len(rbp_list)):
    #         if j > i:
    #             df_corr.iloc[i][j] = None

    # for i in range(len(rbp_list)):
    #     for j in range(len(rbp_list)):
    #         if j > i:
    #             df_pval.iloc[i][j] = None

    # print("df_pval:", df_pval)
    # Log transform p-values.
    benchlib.log_tf_df(df_pval, convert_zero_pv=True, rbp_list=rbp_list)
    # print("df_pval:", df_pval)

    # print("df:", df)
    # print("df.corr():", df.corr(method='pearson'))
    # print("reg_hits_dic[SLBP]:", reg_hits_dic["SLBP"])

    # Plot correlations.
    # motif_db
    # import plotly.express as px
    # fig = px.imshow(df_corr)
    # fig.show()


    """
    If --gtf set, get transcript infos and determine RNA region type for 
    each region (select one with biggest overlap). 

    Next steps:
    1)
    Extract most prominent transcripts (or use --tr-list ones).
    2)
    Overlap transcript exons+introns with regions.
    3)
    For exonic hit, further overlapping operations necessary to determine 
    RNA type /region type (ncRNA, CDS, UTR ...).

    """
    reg2annot_dic = {}
    reg_annot_table_file = None
    target_reg_annot_file = None
    # For GO enrichment analysis.
    target_genes_dic = {}  # Store gene IDs covered by regions -> region count.
    background_genes_dic = {}  # Store all gene IDs in GTF file to use as background genes.
    gid2tid_dic = {}
    tid2tio_dic = None
    ei_ol_stats_dic = {}
    add_annot_stats_dic = {}

    if args.in_gtf:  # and c_regions_with_hits:

        reg_annot_table_file = os.path.join(args.out_folder, "region_annotations.tsv")

        # Get gene infos.
        print("Read in gene features from --gtf ... ")
        tr2gid_dic = {}
        tr_types_dic = {}  # Store transcript biotypes in GTF file.
        gid2gio_dic = benchlib.gtf_read_in_gene_infos(args.in_gtf,
                                                        tr2gid_dic=tr2gid_dic,
                                                        tr_types_dic=tr_types_dic,
                                                        check_chr_ids_dic=chr_ids_dic,
                                                        chr_style=chr_style,
                                                        empty_check=False)
        assert gid2gio_dic, "no gene infos read in from --gtf. Please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE)"
        c_gene_infos = len(gid2gio_dic)
        print("# gene features read in from --gtf:", c_gene_infos)

        for gene_id in gid2gio_dic:
            background_genes_dic[gene_id] = gid2gio_dic[gene_id].gene_name

        # If --tr-list given.
        tr_ids_dic = {}
        if args.tr_list:
            assert os.path.exists(args.tr_list), "given --tr-list file \"%s\" not found" % (args.tr_list)
            tr_ids_dic = benchlib.read_ids_into_dic(args.tr_list,
                                                    check_dic=False)
            assert tr_ids_dic, "no IDs read in from provided --tr-list file. Please provide a valid IDs file (one ID per row)"
            for tr_id in tr_ids_dic:
                assert tr_id in tr2gid_dic, "transcript ID \"%s\" from provided --tr-list file does not appear in --gtf file. Please provide compatible IDs + files" %(tr_id)
                tr_ids_dic[tr_id] = tr2gid_dic[tr_id]
            print("# of transcript IDs (read in from --tr-list): ", len(tr_ids_dic))
        else:
            # Get most prominent transcripts from gene infos.
            tr_ids_dic = benchlib.select_mpts_from_gene_infos(gid2gio_dic,
                                    basic_tag=False,  # do not be strict (only_tsl=False too).
                                    ensembl_canonical_tag=False,
                                    prior_basic_tag=True,  # Prioritize basic tag transcript.
                                    prior_mane_select=True,  # mane select if set trumps all.
                                    prior_lncrna_primary_tag=True,  # for lncRNA genes prioritize gencode primary tagged transcripts (mane select still better but should not occur together for lncRNAs).
                                    only_tsl=False)
            assert tr_ids_dic, "most prominent transcript selection from gene infos failed. Please contact developers"
            print("# of transcript IDs (most prominent transcripts): ", len(tr_ids_dic))

        # Check exon order (return True if minus strand exon 1 is most downstream, not most upstream, which is the correct way).
        print("Check minus-strand exon order in --gtf ... ")
        correct_min_ex_order = benchlib.gtf_check_exon_order(args.in_gtf)
        if correct_min_ex_order:
            print("Correct order encountered ... ")
        else:
            print("Reverse order encountered ... ")
        # Get transcript infos.
        print("Read in transcript infos from --gtf ... ")
        tid2tio_dic = benchlib.gtf_read_in_transcript_infos(args.in_gtf, 
                                                            tr_ids_dic=tr_ids_dic,
                                                            correct_min_ex_order=correct_min_ex_order,
                                                            chr_style=chr_style,
                                                            empty_check=False)

        assert tid2tio_dic, "no transcript infos read in from --gtf. Please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE)"

        # (in)sanity checks.
        for tr_id in tr_ids_dic:
            assert tr_id in tid2tio_dic, "transcript ID %s not in tid2tio_dic"
        for tr_id in tid2tio_dic:
            assert tr_id in tr_ids_dic, "transcript ID %s not in tr_ids_dic"

        c_tr_infos = len(tid2tio_dic)
        print("# transcript features read in from --gtf:", c_tr_infos)

        # Extract exon + intron regions of selected transcripts from transcript infos.
        print("Output intron annotations to BED ... ")

        intron_exon_out_bed = os.path.join(args.out_folder, "intron_exon_regions.tmp.bed")

        benchlib.output_transcript_info_intron_exon_to_bed(tid2tio_dic, intron_exon_out_bed,
                                            output_mode=3,  # only introns.
                                            report_counts=True,
                                            add_tr_id=True,  # new reg_id format: intron;ENST000006666
                                            add_numbers=True,  # new reg_id format: intron;ENST000006666;1-2
                                            number_format=1,  # format intron;ENST000006666;1-2
                                            empty_check=False)

        # Custom transcript biotypes for upset plot.
        custom_annot_dic = None
        if args.tr_types_list:
            for tr_type in args.tr_types_list:
                assert tr_type in tr_types_dic, "given transcript biotype ID \"%s\" not found in GTF file"
                custom_annot_dic[tr_type] = tr_type

        # Append detailed exon annotations (CDS, UTR, transcript biotypes) to intron annotations.
        print("Output exon annotations to BED ... ")
        benchlib.output_exon_annotations(tid2tio_dic, intron_exon_out_bed,
                                         custom_annot_dic=custom_annot_dic,
                                         add_numbers=True,
                                         append=True)

        # Overlap with input regions.
        print("Overlap annotations with input regions ... ")
        params = "-s -wo -f %s" %(str(args.gtf_feat_min_overlap))
        # params = "-s -wo -f %s -F %s -e" %(str(args.gtf_feat_min_overlap), str(args.gtf_feat_min_overlap))
        overlap_annotations_bed = os.path.join(args.out_folder, "overlap_region_annotations.tmp.bed")

        benchlib.bed_intersect_files(filtered_sites_bed, intron_exon_out_bed, 
                                     overlap_annotations_bed,
                                     params=params)
        
        reg2annot_dic = benchlib.get_region_annotations(overlap_annotations_bed,
                                                        tid2tio_dic,
                                                        reg_ids_dic=reg_ids_dic)

        # Output assignments.
        print("Output region annotations ... ")
        
        OUTRAN = open(reg_annot_table_file, "w")
        OUTRAN.write("region_id\tgene_id\tgene_name\ttranscript_id\tregion_annotation\ttranscript_biotype\tborder_dist\tus_ds_label\tannot_reg_len\texon_intron_nr\n")

        tids_with_sites_dic = {}  # transcript IDs with sites.

        # So far reg2annot_dic format: reg_id -> [annot_id, tr_id, border_dist, us_ds_label, annot_reg_len, exon_intron_nr]
        # intergenic: ["intergenic", False, -1, "-", -1, "-"]
        #  annot_reg_len == intron/exon length.
        # exon_intron_nr format: 1-5, 2-6 ...

        for reg_id in reg2annot_dic:
            annot = reg2annot_dic[reg_id][0]
            tr_id = reg2annot_dic[reg_id][1]
            gene_id = "-"
            gene_name = "-"
            tr_biotype = "-"
            reg2annot_dic[reg_id].append("-")
            reg2annot_dic[reg_id].append("-")
            reg2annot_dic[reg_id].append("-")
            if tr_id:
                gene_id = tr2gid_dic[tr_id]
                gene_info = gid2gio_dic[gene_id]
                tr_biotype = tid2tio_dic[tr_id].tr_biotype
                gene_name = gene_info.gene_name
                gid2tid_dic[gene_id] = tr_id
                reg2annot_dic[reg_id][6] = gene_id
                reg2annot_dic[reg_id][7] = gene_name
                reg2annot_dic[reg_id][8] = tr_biotype
                if gene_id not in target_genes_dic:
                    target_genes_dic[gene_id] = 1
                else:
                    target_genes_dic[gene_id] += 1
            else:
                tr_id = "-"
            border_dist = str(reg2annot_dic[reg_id][2])  # -1 unless intron annotation.
            us_ds_label = reg2annot_dic[reg_id][3]  # "-" unless intron annotation.
            annot_reg_len = str(reg2annot_dic[reg_id][4])
            exon_intron_nr_c = reg2annot_dic[reg_id][5]  # format: 1-5, 2-6 ...

            OUTRAN.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" %(reg_id, gene_id, gene_name, tr_id, annot, tr_biotype, border_dist, us_ds_label, annot_reg_len, exon_intron_nr_c))

            if tr_id not in tids_with_sites_dic:
                tids_with_sites_dic[tr_id] = 1
            else:
                tids_with_sites_dic[tr_id] += 1
            
        OUTRAN.close()
    
        """
        Output additional annotations to BED files.

        """

        print("Output gene regions to BED ... ")

        benchlib.output_gene_regions_to_bed(gid2gio_dic, gene_regions_bed,
                                            add_annot_stats_dic=add_annot_stats_dic)

        print("Output putative promoter regions to BED ... ")

        benchlib.output_promoter_regions_to_bed(tid2tio_dic, promoter_regions_bed,
                                        prom_min_tr_len=args.prom_min_tr_len,
                                        prom_mrna_only=args.prom_mrna_only,
                                        prom_both_str=args.prom_both_str,
                                        mrna_biotype_label="protein_coding",
                                        prom_ext_up=prom_ext_up,
                                        prom_ext_down=prom_ext_down,
                                        add_annot_stats_dic=add_annot_stats_dic)


        """
        Overlap additional annotation regions with input regions.

        """

        print("Overlap input regions with gene regions ... ")

        c_outside_genes = benchlib.bed_intersect_files_count_lines(
                                        filtered_sites_bed, gene_regions_bed,
                                        params="-s -v")

        print("# input regions NOT overlapping with gene regions: %i" %(c_outside_genes))

        print("Overlap input regions with promoter regions ... ")

        c_inside_prom = benchlib.bed_intersect_files_count_lines(
                                        filtered_sites_bed, promoter_regions_bed,
                                        params="-s -u")

        print("# input regions overlapping with promoter regions: %i" %(c_inside_prom))

        c_add_annot = 0

        if args.add_annot_bed:

            print("Overlap input regions with --add-annot-bed regions ... ")

            params = "-s -u"  # count overlaps with --add-annot-bed regions.
            if args.add_annot_comp:
                params = "-s -v"  # count non-overlaps with --add-annot-bed regions.

            c_add_annot = benchlib.bed_intersect_files_count_lines(
                                                filtered_sites_bed, args.add_annot_bed,
                                                params=params)

            if args.add_annot_comp:
                print("# input regions NOT overlapping with --add-annot-bed regions: %i" %(c_add_annot))
            else:
                print("# input regions overlapping with --add-annot-bed regions: %i" %(c_add_annot))

        add_annot_stats_dic["c_outside_genes"] = c_outside_genes
        add_annot_stats_dic["c_inside_prom"] = c_inside_prom
        add_annot_stats_dic["c_add_annot"] = c_add_annot
        add_annot_stats_dic["prom_ext_up"] = prom_ext_up
        add_annot_stats_dic["prom_ext_down"] = prom_ext_down

        """
        Get exon+intron+eib overlap counts.

        reg2annot_dic format: reg_id -> 
        [annot_id, tr_id, border_dist, us_ds_label, annot_reg_len, exon_intron_nr, gene_id, gene_name, tr_biotype]
        For intergenic regions:
        ["intergenic", False, -1, "-", -1, "-", "-", "-", "-"]
        For intronic/exonic regions with center outside:
        [annot_id, tr_id, -1, "-", annot_reg_len, exon_intron_nr, gene_id, gene_name, tr_biotype]

        """

        ei_border_len = 50
        args.ei_border_len = ei_border_len

        eib_annot_c_dic = {
            "exonic" : 0,
            "intronic" : 0,
            "intergenic" : 0,
            "eib" : 0,
            "us_ib_dist" : 0,
            "ds_ib_dist" : 0,
            "us_ib" : 0,
            "ds_ib" : 0,
            "first_exon" : 0,
            "last_exon" : 0,
            "single_exon" : 0
        }
        
        for reg_id in reg2annot_dic:
            benchlib.get_eib_annot_c(reg2annot_dic[reg_id], eib_annot_c_dic,
                                     ib_len=args.gtf_intron_border_len,
                                     eib_len=ei_border_len)

        # print("Output exon + intron border regions ... ")
        # intron_exon_border_out_bed = args.out_folder + "/intron_exon_border_regions.tmp.bed"
        # benchlib.exon_intron_border_regions_to_bed(tid2tio_dic, intron_exon_border_out_bed,
        #                                     tr_ids_dic=None,
        #                                     intron_border_len=args.gtf_intron_border_len,
        #                                     ei_border_len=50)

        # # Overlap with input regions.
        # print("Overlap exon + intron regions with input regions ... ")
        # params = "-s -f %s -F %s -e -wb " %(str(args.gtf_eib_min_overlap), str(args.gtf_eib_min_overlap))
        # overlap_ei_regions_bed = args.out_folder + "/overlap_ei_regions.tmp.bed"
        # benchlib.bed_intersect_files(intron_exon_out_bed, filtered_sites_bed,
        #                              overlap_ei_regions_bed,
        #                              params=params)

        # # Get exon + intron overlap counts.
        # c_exon_ol, c_intron_ol = benchlib.get_intron_exon_ol_counts(overlap_ei_regions_bed)
        # print("# exon overlap regions:   %i" %(c_exon_ol))
        # print("# intron overlap regions: %i" %(c_intron_ol))

        # print("Overlap exon + intron border regions with input regions ... ")
        # params = "-s -f %s -F %s -e -wb " %(str(args.gtf_eib_min_overlap), str(args.gtf_eib_min_overlap))
        # overlap_eib_regions_bed = args.out_folder + "/overlap_eib_regions.tmp.bed"
        # benchlib.bed_intersect_files(intron_exon_border_out_bed, filtered_sites_bed,
        #                              overlap_eib_regions_bed,
        #                              params=params)

        # # Get other overlap counts.
        # c_eib_ol, c_us_ib_ol, c_ds_ib_ol = benchlib.get_eib_ol_counts(overlap_eib_regions_bed)

        c_exon_ol = eib_annot_c_dic["exonic"]
        c_intron_ol = eib_annot_c_dic["intronic"]
        c_intergenic = eib_annot_c_dic["intergenic"]
        c_eib_ol = eib_annot_c_dic["eib"]
        c_us_ib_ol = eib_annot_c_dic["us_ib"]
        c_ds_ib_ol = eib_annot_c_dic["ds_ib"]
        c_us_ib_dist_ol = eib_annot_c_dic["us_ib_dist"]
        c_ds_ib_dist_ol = eib_annot_c_dic["ds_ib_dist"]
        c_first_exon = eib_annot_c_dic["first_exon"]
        c_last_exon = eib_annot_c_dic["last_exon"]
        c_single_exon = eib_annot_c_dic["single_exon"]

        exon_intron_ol_stats = benchlib.ExonIntronOverlap("rbpbench_search", c_regions,
                                                          c_exon_sites=c_exon_ol,
                                                          c_intron_sites=c_intron_ol,
                                                          c_intergenic_sites=c_intergenic,
                                                          c_us_ib_sites=c_us_ib_ol,
                                                          c_ds_ib_sites=c_ds_ib_ol,
                                                          c_us_ib_dist_sites=c_us_ib_dist_ol,
                                                          c_ds_ib_dist_sites=c_ds_ib_dist_ol,
                                                          c_eib_sites=c_eib_ol,
                                                          c_first_exon_sites=c_first_exon,
                                                          c_last_exon_sites=c_last_exon,
                                                          c_single_exon_sites=c_single_exon,
                                                          min_overlap=args.gtf_feat_min_overlap,
                                                          intron_border_len=args.gtf_intron_border_len,
                                                          ei_border_len=ei_border_len,
                                                          c_tr_ids=len(tid2tio_dic),
                                                          c_tr_ids_with_sites=len(tids_with_sites_dic))
        
        ei_ol_stats_dic["rbpbench_search"] = exon_intron_ol_stats

        # If only target genes with RBP motifs hits for all RBPs should be considered.
        if args.goa_cooc_mode == 2:
            gid2tid_dic = {}
            new_target_genes_dic = benchlib.get_target_genes_with_rbp_hits(reg2annot_dic, tr2gid_dic, region_rbp_binds_dic,
                                                                           gid2tid_dic=gid2tid_dic,
                                                                           goa_cooc_mode=args.goa_cooc_mode)

            print("Keep only target genes containing regions with motif hits for any RBP ... ")
            print("# of target genes before filtering: %i" %(len(target_genes_dic)))
            print("# of target genes after filtering:  %i" %(len(new_target_genes_dic)))

            target_genes_dic = new_target_genes_dic

        elif args.goa_cooc_mode == 3:
            gid2tid_dic = {}
            new_target_genes_dic = benchlib.get_target_genes_with_rbp_hits(reg2annot_dic, tr2gid_dic, region_rbp_binds_dic,
                                                                           gid2tid_dic=gid2tid_dic,
                                                                           goa_cooc_mode=args.goa_cooc_mode)

            print("Keep only target genes containing regions with motif hits for all RBPs ... ")
            print("# of target genes before filtering: %i" %(len(target_genes_dic)))
            print("# of target genes after filtering:  %i" %(len(new_target_genes_dic)))

            target_genes_dic = new_target_genes_dic


        if args.goa_bg_gene_list:

            print("Read in background genes from --goa-gb-gene-list ... ")

            assert os.path.exists(args.goa_bg_gene_list), "given --goa-bg-gene-list file \"%s\" not found" % (args.goa_bg_gene_list)

            bg_gene_ids_dic = benchlib.read_ids_into_dic(args.goa_bg_gene_list,
                                                         check_dic=False)

            print("# of gene IDs read in: %i" %(len(bg_gene_ids_dic)))
            print("Filter background genes by --gtf genes ... ")

            new_background_genes_dic = {}
            for gene_id in bg_gene_ids_dic:
                if gene_id in gid2gio_dic:
                    new_background_genes_dic[gene_id] = gid2gio_dic[gene_id].gene_name
            
            assert new_background_genes_dic, "given --goa-gb-gene-list gene IDs not found in --gtf. Please provide compatible --gtf and --goa-bg-gene-list files"

            print("# of background genes before filtering: %i" %(len(background_genes_dic)))
            print("# of background genes after filtering:  %i" %(len(new_background_genes_dic)))

            background_genes_dic = new_background_genes_dic

            print("Filter target genes by new background gene list ... ")
            new_target_genes_dic = {}
            for gene_id in target_genes_dic:
                if gene_id in background_genes_dic:
                    new_target_genes_dic[gene_id] = target_genes_dic[gene_id]
            
            print("# of target genes before filtering: %i" %(len(target_genes_dic)))
            print("# of target genes after filtering:  %i" %(len(new_target_genes_dic)))

            target_genes_dic = new_target_genes_dic

    """
    Get data for mRNA region site occupancy plot.

    """

    mrna_prof_dic = {}

    if args.in_gtf:

        # Get mRNA transcripts, with 5'UTR,CDS,3'UTR lengths list.
        tid2regl_dic = benchlib.get_mrna_region_lengths(tid2tio_dic)

        mrna_exon_out_bed = os.path.join(args.out_folder, "mrna_exon_regions.tmp.bed")
        print("Output mRNA exon regions ... ")
        benchlib.output_transcript_info_intron_exon_to_bed(tid2tio_dic, mrna_exon_out_bed,
                                            tr_ids_dic=tid2regl_dic,
                                            output_mode=2,  # only exon regions.
                                            report_counts=False,
                                            add_tr_id=True,
                                            add_numbers=True,
                                            number_format=2, # format exon;ENST001;1
                                            empty_check=False)

        mrna_exon_sites_overlap_bed = os.path.join(args.out_folder, "mrna_exon_regions.filtered_sites.overlap.tmp.bed")
        print("Overlap mRNA exon regions with input sites ... ")
        # Fraction of B, so -F instead of -f.
        benchlib.bed_intersect_files(mrna_exon_out_bed, filtered_sites_bed, mrna_exon_sites_overlap_bed, params="-s -wb -F %s" %(str(args.gtf_min_mrna_overlap)))

        c_ol_mrna_sites, ol_mrna_tids_dic = benchlib.get_mrna_tids_and_sites(mrna_exon_sites_overlap_bed)

        if c_ol_mrna_sites:
            exon2pcl_dic = benchlib.get_exon_pos_count_list_dic(tid2tio_dic, tr_ids_dic=ol_mrna_tids_dic)
            benchlib.fill_exon_pos_count_lists(mrna_exon_sites_overlap_bed, tid2tio_dic, exon2pcl_dic)

            tr2pcl_dic = {}
            tid2ol_regl_dic = {}
            for tid in ol_mrna_tids_dic:
                assert tid in tid2regl_dic, "transcript ID %s not in tid2regl_dic"
                # Get exon number.
                exon_c = tid2tio_dic[tid].exon_c
                # print(tid, "exon_c:", exon_c)
                tr2pcl_dic[tid] = []
                for exon_i in range(1, exon_c+1):
                    exon_id = "exon;%s;%i" %(tid, exon_i)
                    tr2pcl_dic[tid] += exon2pcl_dic[exon_id]
                
                tr_len = tid2tio_dic[tid].tr_length  # spliced transcript length.
                assert tr_len == len(tr2pcl_dic[tid]), "transcript length %i != position count list length %i for transcript ID %s" %(tr_len, len(tr2pcl_dic[tid]), tid)
                tid2ol_regl_dic[tid] = tid2regl_dic[tid]

                # print(tid, tid2ol_regl_dic[tid])

            # print("tr2pcl_dic:", tr2pcl_dic)

            # print("tr2pcl_dic[ENST00000644773]:", tr2pcl_dic["ENST00000644773"])

            c_ol_mrnas = len(tid2ol_regl_dic)
            c_mrnas = len(tid2regl_dic)
            print("# of mRNA transcripts:            %i" %(c_mrnas))
            print("# of mRNA transcripts with sites: %i" %(c_ol_mrnas))

            # Get normalized mRNA region lengths.
            utr5_len_norm, cds_len_norm, utr3_len_norm, norm_mode = benchlib.get_mrna_reg_norm_len(
                                                        tid2ol_regl_dic,
                                                        mrna_norm_mode=args.mrna_norm_mode)

            utr5_pc_list = [0] * round(utr5_len_norm)
            cds_pc_list = [0] * round(cds_len_norm)
            utr3_pc_list = [0] * round(utr3_len_norm)

            len_norm_utr5_list = len(utr5_pc_list)
            len_norm_cds_list = len(cds_pc_list)
            len_norm_utr3_list = len(utr3_pc_list)

            # print("norm_len:", utr5_len_norm, cds_len_norm, utr3_len_norm)

            for tid in tr2pcl_dic:

                tr_len = tid2tio_dic[tid].tr_length  # spliced transcript length.

                # print("tid2regl_dic[tid]:", tid2regl_dic[tid])

                utr5_len = tid2regl_dic[tid][0]
                cds_len = tid2regl_dic[tid][1]
                utr3_len = tid2regl_dic[tid][2]

                reg_len_sum = utr5_len + cds_len + utr3_len
                assert reg_len_sum == len(tr2pcl_dic[tid]), "mRNA region length sum %i != position count list length %i for transcript ID %s" %(reg_len_sum, len(tr2pcl_dic[tid]), tid)
                
                if utr5_len:
                    utr5_part = tr2pcl_dic[tid][:utr5_len]
                    utr5_orig_pos = np.linspace(0, len(utr5_part) - 1, len(utr5_part))
                    utr5_new_pos = np.linspace(0, len(utr5_part) - 1, len_norm_utr5_list)
                    utr5_new_part = np.interp(utr5_new_pos, utr5_orig_pos, utr5_part)
                    utr5_new_part_int = list(np.round(utr5_new_part).astype(int))
                    assert len(utr5_new_part_int) == len(utr5_pc_list), "length of utr5_new_part_int %i != length of utr5_pc_list %i for transcript ID %s" %(len(utr5_new_part_int), len(utr5_pc_list), tid)
                    for i in range(len_norm_utr5_list):
                        utr5_pc_list[i] += utr5_new_part_int[i]
                    
                if cds_len:
                    cds_part = tr2pcl_dic[tid][utr5_len:utr5_len+cds_len]
                    cds_orig_pos = np.linspace(0, len(cds_part) - 1, len(cds_part))
                    cds_new_pos = np.linspace(0, len(cds_part) - 1, len_norm_cds_list)
                    cds_new_part = np.interp(cds_new_pos, cds_orig_pos, cds_part)
                    cds_new_part_int = list(np.round(cds_new_part).astype(int))
                    assert len(cds_new_part_int) == len(cds_pc_list), "length of cds_new_part_int %i != length of cds_pc_list %i for transcript ID %s" %(len(cds_new_part_int), len(cds_pc_list), tid)
                    for i in range(len_norm_cds_list):
                        cds_pc_list[i] += cds_new_part_int[i]
                
                if utr3_len:
                    utr3_part = tr2pcl_dic[tid][utr5_len+cds_len:]
                    utr3_orig_pos = np.linspace(0, len(utr3_part) - 1, len(utr3_part))
                    utr3_new_pos = np.linspace(0, len(utr3_part) - 1, len_norm_utr3_list)
                    utr3_new_part = np.interp(utr3_new_pos, utr3_orig_pos, utr3_part)
                    utr3_new_part_int = list(np.round(utr3_new_part).astype(int))
                    assert len(utr3_new_part_int) == len(utr3_pc_list), "length of utr3_new_part_int %i != length of utr3_pc_list %i for transcript ID %s" %(len(utr3_new_part_int), len(utr3_pc_list), tid)
                    for i in range(len_norm_utr3_list):
                        utr3_pc_list[i] += utr3_new_part_int[i]

                # print("tid:", tid, "utr5_len:", utr5_len, "cds_len:", cds_len, "utr3_len:", utr3_len)

                # utr5_e = utr5_len
                # cds_s = utr5_e
                # cds_e = cds_s + cds_len
                # utr3_s = cds_e
                # utr3_e = utr3_s + utr3_len
                # occ_pos_seen_dic = {}  # Check if a position already received a count.

                # for pos, pc in enumerate(tr2pcl_dic[tid]):

                #     center_pos = pos + 1  # make 1-based.

                #     if center_pos <= utr5_e:  # In 5'UTR.

                #         occ_pos = round((center_pos / utr5_len) * utr5_len_norm)
                #         if occ_pos > 0:
                #             occ_pos -= 1
                #         # utr5_pc_list[occ_pos] += pc
                #         occ_pos_str = "utr5;%i" %(occ_pos)
                #         if occ_pos_str not in occ_pos_seen_dic:
                #             utr5_pc_list[occ_pos] += pc
                #             occ_pos_seen_dic[occ_pos_str] = 1
                            
                #     elif center_pos > utr5_e and center_pos <= cds_e:  # In CDS.

                #         occ_pos = round(((center_pos - utr5_e) / cds_len) * cds_len_norm)
                #         if occ_pos > 0:
                #             occ_pos -= 1
                #         # cds_pc_list[occ_pos] += pc
                #         occ_pos_str = "cds;%i" %(occ_pos)
                #         if occ_pos_str not in occ_pos_seen_dic:
                #             cds_pc_list[occ_pos] += pc
                #             occ_pos_seen_dic[occ_pos_str] = 1

                #     elif center_pos > cds_e and center_pos <= utr3_e:  # In 3'UTR.

                #         occ_pos = round(((center_pos - cds_e) / utr3_len) * utr3_len_norm)
                #         if occ_pos > 0:
                #             occ_pos -= 1
                #         # utr3_pc_list[occ_pos] += pc
                #         occ_pos_str = "utr3;%i" %(occ_pos)
                #         if occ_pos_str not in occ_pos_seen_dic:
                #             utr3_pc_list[occ_pos] += pc
                #             occ_pos_seen_dic[occ_pos_str] = 1

                #     else:
                #         assert False, "center position %i not in 5'UTR, CDS or 3'UTR of transcript ID %s (transcript length: %i)" %(center_pos, tid, tr_len)

                # # print(tid, utr3_pc_list)

            c_ol_sites = c_ol_mrna_sites
            c_all_sites = reg_stats_dic["c_out"]

            perc_ol_sites = 0.0
            if c_ol_sites and c_all_sites:
                perc_ol_sites = round(c_ol_sites / c_all_sites * 100, 1)

            print("# of input regions on mRNAs: %i" %(c_ol_sites))
            print("# of all input regions:      %i" %(c_all_sites))
            print("%% of input regions on mRNAs: %s" %(str(perc_ol_sites)))

            mrna_profile = benchlib.MrnaRegionProfile("rbpbench_search", len_norm_utr5_list, len_norm_cds_list, 
                                                      len_norm_utr3_list, norm_mode,
                                                      c_ol_sites=c_ol_sites,
                                                      c_all_sites=c_all_sites,
                                                      c_ol_mrnas=c_ol_mrnas,
                                                      utr5_pc_list=utr5_pc_list, 
                                                      cds_pc_list=cds_pc_list,
                                                      utr3_pc_list=utr3_pc_list)

            # mrna_profile = benchlib.MrnaRegionProfile("rbpbench_search", utr5_len_norm, cds_len_norm, 
            #                                           utr3_len_norm, norm_mode,
            #                                           c_ol_sites=c_ol_sites,
            #                                           c_all_sites=c_all_sites,
            #                                           c_ol_mrnas=c_ol_mrnas,
            #                                           utr5_pc_list=utr5_pc_list, 
            #                                           cds_pc_list=cds_pc_list,
            #                                           utr3_pc_list=utr3_pc_list)

            mrna_prof_dic["rbpbench_search"] = mrna_profile

            # print("utr3_pc_list:", utr3_pc_list)

    # elif args.in_gtf and not c_regions_with_hits:
    #     print("No need to read in --gtf since no motif hits found .. ")

    """
    GO enrichment analysis.

    """
    goa_results_df = False
    goa_stats_dic = {}
    propagate_counts = True

    if args.run_goa:

        # Write empty file.
        open(goa_results_tsv, "w").close()

        print("")
        print("GOA enabled (--goa) ... ")

        goa_stats_dic["c_target_genes_pre_filter"] = len(target_genes_dic)
        goa_stats_dic["c_background_genes_pre_filter"] = len(background_genes_dic)
        goa_stats_dic["pval_thr"] = args.goa_pval
        goa_stats_dic["goa_obo_mode"] = args.goa_obo_mode
        goa_stats_dic["propagate_counts"] = propagate_counts
        goa_stats_dic["excluded_terms"] = "-"
        goa_stats_dic["goa_filter_purified"] = args.goa_filter_purified
        goa_stats_dic["goa_max_child"] = args.goa_max_child
        goa_stats_dic["goa_min_depth"] = args.goa_min_depth
        goa_stats_dic["goa_cooc_mode"] = args.goa_cooc_mode

        if target_genes_dic:

            gene_infos_file = benchlib_path + "/content/ensembl_gene_infos.biomart.GRCh38.112.tsv.gz"
            if os.path.exists(gene_infos_file):
                print("Output target region annotations ... ")
                target_reg_annot_file = os.path.join(args.out_folder, "target_region_annotations.tsv")
                
                benchlib.output_target_reg_annot(target_genes_dic, gene_infos_file, target_reg_annot_file,
                                                 gid2tid_dic=gid2tid_dic,
                                                 tid2tio_dic=tid2tio_dic)

            local_gid2go_file = benchlib_path + "/content/ensembl_gene_id2go_ids.biomart.GRCh38.112.tsv.gz"
            local_obo_file = benchlib_path + "/content/go-basic.obo.gz"

            assert os.path.exists(local_gid2go_file), "local gene ID to GO ID file \"%s\" not found" %(local_gid2go_file)
            assert os.path.exists(local_obo_file), "local GO OBO file \"%s\" not found" %(local_obo_file)

            gid2go_file = local_gid2go_file
            if args.goa_gene2go_file:
                gid2go_file = args.goa_gene2go_file
                assert os.path.exists(gid2go_file), "provided --goa-gene2go-file \"%s\" not found" %(gid2go_file)
            goa_obo_file = local_obo_file
            if args.goa_obo_file and args.goa_obo_mode == 3:
                goa_obo_file = args.goa_obo_file
                assert os.path.exists(goa_obo_file), "provided --goa-obo-file \"%s\" not found" %(goa_obo_file)

            # Run GOA.
            goa_results_df = benchlib.run_go_analysis(target_genes_dic, background_genes_dic, 
                                                      gid2go_file, args.out_folder,
                                                      pval_thr=args.goa_pval,
                                                      excluded_terms = [],  # do not exclude any GO terms.
                                                      goa_obo_mode=args.goa_obo_mode,
                                                      propagate_counts=propagate_counts,
                                                      stats_dic=goa_stats_dic,
                                                      store_gene_names=True,
                                                      goa_obo_file=goa_obo_file)

            print("# of enriched (i.e., with significantly higher concentration) GO terms: %i" %(goa_stats_dic["c_sig_go_terms_e"]))
            print("# of purified (i.e., with significantly lower concentration) GO terms:  %i" %(goa_stats_dic["c_sig_go_terms_p"]))

            goa_results_df.to_csv(goa_results_tsv, sep="\t", index=False)
            print("")

        else:
            print("No target genes for GOA (either no --in regions overlapping or through --goa-only-cooc). Skipping GOA ... ")


    """
    Motif annotations if --plot-motifs and --gtf set.

    """

    rbp2motif2annot2c_dic = {}  # rbp_id -> motif_id -> annot -> count

    if args.in_gtf and c_regions_with_hits:

        # Overlap motif hit BED with genomic annotation regions.
        print("Overlap annotations with motif hit regions ... ")

        params = "-s -wo -f %s" %(str(args.gtf_feat_min_overlap))
        motif_hits_bed_tmp_out = os.path.join(args.out_folder, "motif_hits.tmp.bed")
        benchlib.reformat_to_bed10(motif_hits_bed_out, motif_hits_bed_tmp_out)

        overlap_motif_hit_annotations_bed = os.path.join(args.out_folder, "overlap_motif_hit_annotations.tmp.bed")
        benchlib.bed_intersect_files(motif_hits_bed_tmp_out, intron_exon_out_bed, 
                                     overlap_motif_hit_annotations_bed,
                                     params=params)

        motif_hit2annot_dic = benchlib.get_region_annotations(
                                        overlap_motif_hit_annotations_bed,
                                        tid2tio_dic,
                                        motif_hits=True,
                                        reg_ids_dic=motif_reg_dic)

        for motif_hit in motif_hit2annot_dic:
            # motif_hit format: "chr1:10-15(+)motif_id". Get motif_id.
            # motif_id = motif_hit.split(")")[1]  # Should work since motif_id/regex cannot contain ")".
            motif_id = benchlib.get_motif_id_from_hit_str(motif_hit)
            assert motif_id, "motif_id extraction failed for motif_hit string \"%s\"" %(motif_hit)

            annot = motif_hit2annot_dic[motif_hit][0]
            # tr_id = motif_hit2annot_dic[motif_hit][1]
            rbp_id = id2name_dic[motif_id]
            if rbp_id not in rbp2motif2annot2c_dic:
                rbp2motif2annot2c_dic[rbp_id] = {}
            if motif_id not in rbp2motif2annot2c_dic[rbp_id]:
                rbp2motif2annot2c_dic[rbp_id][motif_id] = {}
            if annot not in rbp2motif2annot2c_dic[rbp_id][motif_id]:
                rbp2motif2annot2c_dic[rbp_id][motif_id][annot] = 1
            else:
                rbp2motif2annot2c_dic[rbp_id][motif_id][annot] += 1

        # Output motif hits BED again, with motif region annotations.
        print("Add motif hit annotations ... ")
        OUTBED = open(motif_hits_bed_out, "w")
        for hit_id in motif_reg_dic:
            annot = "-"
            if hit_id in motif_hit2annot_dic:
                annot = motif_hit2annot_dic[hit_id][0]
            cols = motif_reg_dic[hit_id].split("\t")
            match_seq = cols[11]
            cols_part1 = "\t".join(cols[0:10]) 
            OUTBED.write("%s\t%s\t%s\n" %(cols_part1, annot, match_seq))
        OUTBED.close()

    """
    Get annotation to color dictionary, which is needed for region annotation plots in HTML reports.

    """

    annot2color_dic = {}

    if reg2annot_dic or rbp2motif2annot2c_dic:

        annot_dic = {"3'UTR" : 0, "5'UTR" : 0, "CDS" : 0, "lncRNA" : 0, "intron" : 0, "intergenic" : 0}
        # annot_dic = {}

        if reg2annot_dic:
            for reg_id in reg2annot_dic:
                annot = reg2annot_dic[reg_id][0]
                if annot not in annot_dic:
                    annot_dic[annot] = 1
                else:
                    annot_dic[annot] += 1

        if rbp2motif2annot2c_dic:
            for rbp_id in rbp2motif2annot2c_dic:
                for motif_id in rbp2motif2annot2c_dic[rbp_id]:
                    for annot in rbp2motif2annot2c_dic[rbp_id][motif_id]:
                        if annot not in annot_dic:
                            annot_dic[annot] = 1
                        else:
                            annot_dic[annot] += 1

        if mrna_prof_dic:
            annot_dic["5'UTR"] = 1
            annot_dic["CDS"] = 1
            annot_dic["3'UTR"] = 1

        # hex_colors = get_hex_colors_list(min_len=len(annot_with_hits_dic))
        hex_colors = benchlib.get_hex_colors_list(min_len=len(annot_dic))

        idx = 0
        for annot in sorted(annot_dic, reverse=False):
            # hc = hex_colors[idx]
            # print("Assigning hex color %s to annotation %s ... " %(hc, annot))
            annot2color_dic[annot] = hex_colors[idx]
            idx += 1


    """
    Generate HTML report.
    
    """

    html_report_out = os.path.join(args.out_folder, "report.rbpbench_search.html")
    if args.plot_abs_paths:
        html_report_out = os.path.join(os.path.abspath(args.out_folder), "report.rbpbench_search.html")

    # If HTML file already exists, remove it.
    if os.path.exists(html_report_out):
        os.remove(html_report_out)

    seq_len_df = None
    if not args.disable_len_dist_plot:

        # Sequences dataframe for plotting sequence lengths violin plot.
        sequences = []
        seq_ids = []
        for seq_id in out_seqs_dic:
            seq_ids.append(seq_id)
            sequences.append(out_seqs_dic[seq_id])

        motif_hits = []
        for seq_id in seq_ids:
            # region2motif_hits_dic[seq_id].sort()
            # If list empty, append "-".
            if not region2motif_hits_dic[seq_id]:
                motif_hits.append("-")
            else:
                motif_hits.append(benchlib.join_motif_hits(
                                region2motif_hits_dic[seq_id],
                                motifs_per_line=4,
                                line_break_char="<br>"))
                # motif_hits.append("\n".join(region2motif_hits_dic[seq_id]))

        seq_len_df = DataFrame({
            'Sequence ID': seq_ids,
            'Sequence Length': [len(seq) for seq in sequences],
            'Sequence': [benchlib.insert_line_breaks(seq, line_len=50) for seq in sequences],
            'Motif hits': motif_hits
        })

    plots_subfolder = "html_report_plots"
    benchlib_path = os.path.dirname(benchlib.__file__)

    for reg_id in reg2annot_dic:
        if len(reg2annot_dic[reg_id]) != 9:
            print("!=9 for:", reg_id, reg2annot_dic[reg_id])


    print("Create report ... ")

    benchlib.search_generate_html_report(args,
                                        df_pval, pval_cont_lll,
                                        search_rbps_dic,
                                        id2name_dic, name2ids_dic,
                                        region_rbp_motif_pos_dic,
                                        reg2pol_dic,
                                        benchlib_path,
                                        rbp2regidx_dic,
                                        reg_ids_list,
                                        seq_len_df=seq_len_df,
                                        mrna_prof_dic=mrna_prof_dic,
                                        ei_ol_stats_dic=ei_ol_stats_dic,
                                        seq_motif_blocks_dic=seq_motif_blocks_dic,
                                        reg2annot_dic=reg2annot_dic,
                                        annot2color_dic=annot2color_dic,
                                        goa_results_df=goa_results_df,
                                        goa_stats_dic=goa_stats_dic,
                                        html_report_out=html_report_out,
                                        reg_seq_str="regions",
                                        reg2seq_dic=out_seqs_dic,
                                        reg2sc_dic=reg2sc_dic,
                                        add_annot_stats_dic=add_annot_stats_dic,
                                        plots_subfolder=plots_subfolder)


    """
    Motif plots and motif hit statistics HTML.

    """
    html_motif_plots_out = os.path.join(args.out_folder, "motif_plots.rbpbench_search.html")
    if args.plot_abs_paths:
        html_motif_plots_out = os.path.join(os.path.abspath(args.out_folder), "motif_plots.rbpbench_search.html")
    # If HTML file already exists, remove it.
    if os.path.exists(html_motif_plots_out):
        os.remove(html_motif_plots_out)

    if args.plot_motifs:
        
        print("Generate motif plots HTML ... ")
        
        plots_subfolder = "html_motif_plots"
        benchlib_path = os.path.dirname(benchlib.__file__)

        # Only valid for searchlongrna.
        args.run_goa_tr = False

        benchlib.search_generate_html_motif_plots(args,
                                         search_rbps_dic, seq_motif_blocks_dic, 
                                         str_motif_blocks_dic, 
                                         benchlib_path, loaded_motif_ids_dic,
                                         rbp2motif2annot2c_dic=rbp2motif2annot2c_dic,
                                         annot2color_dic=annot2color_dic,
                                         html_report_out=html_motif_plots_out,
                                         reg_seq_str="regions",
                                         id2pids_dic=id2pids_dic,
                                         id2exp_dic=id2exp_dic,
                                         match_c_dic=match_c_dic,
                                         match_c_total_dic=match_c_total_dic,
                                         plots_subfolder=plots_subfolder)

    """
    Output parameter settings.

    """
    # Output mode settings.
    print("Output parameter settings ... ")
    SETOUT = open(settings_file, "w")
    for arg in vars(args):
        SETOUT.write("%s\t%s\n" %(arg, str(getattr(args, arg))))
    for call in call_dic:
        SETOUT.write("%s\t%s\n" %(call, call_dic[call]))
    SETOUT.close()

    """
    Take out the trash.

    """
    print("Delete .tmp files ... ")
    if os.path.exists(out_tmp_bed):
        os.remove(out_tmp_bed)
    if os.path.exists(cmstat_tmp_out):
        os.remove(cmstat_tmp_out)

    """
    Inform about outputs.

    """
    print("")
    print("OUTPUT FILES")
    print("============")
    print("")
    print("Run parameter settings:\n%s" %(settings_file))
    # print("Co-occurrence p-values for each RBP pair .tsv:\n%s" %(con_res_out_tsv))
    print("RBP co-occurrence stats .tsv:\n%s" %(cooc_stats_out))
    print("Filtered input regions .bed:\n%s" %(filtered_sites_bed))
    print("Filtered input regions .fa:\n%s" %(filtered_sites_fa))
    print("Motif hits .bed:\n%s" %(motif_hits_bed_out))
    print("Matched sequence stats .tsv:\n%s" %(matched_seqs_out))
    print("RBP region occupancies .tsv:\n%s" %(rbp_reg_occ_table_out))
    print("RBP hit stats .tsv:\n%s" %(rbp_stats_out))
    print("Motif hit stats .tsv:\n%s" %(motif_stats_out))
    if args.run_goa:
        print("GO enrichment analysis results .tsv:\n%s" %(goa_results_tsv))
    if reg_annot_table_file is not None:
        print("Region annotations .tsv:\n%s" %(reg_annot_table_file))
    if target_reg_annot_file is not None:
        print("Target region annotations .tsv:\n%s" %(target_reg_annot_file))
    if args.plot_motifs:
        print("Motif plots and hit statistics .html:\n%s" %(html_motif_plots_out))
    print("Search report .html:\n%s" %(html_report_out))
    print("")


################################################################################

def main_batch(args):
    """
    Batch search motifs.

    """

    print("Running for you in BATCH mode ... ")


    assert os.path.exists(args.in_genome), "--genome file \"%s\" not found" % (args.in_genome)

    """
    Check input --bed BED files.

    """
    bed_path = False
    bed_list = []

    if len(args.bed_files) == 1:
        if os.path.isdir(args.bed_files[0]):
            bed_path = args.bed_files[0]
            bed_files = benchlib.dir_get_files(bed_path, file_ending="bed")
            assert bed_files, "no BED files (.bed file extension expected) found in given --bed %s folder" %(bed_path)
            for bed_file in bed_files:
                bed_list.append(bed_file)
        elif os.path.isfile(args.bed_files[0]):
            bed_list.append(args.bed_files[0])
        else:
            assert False, "given --bed argument %s is not a file or a folder" %(args.bed_files[0])
    elif len(args.bed_files) > 1:
        for bed_file in args.bed_files:
            if os.path.isfile(bed_file):
                bed_list.append(bed_file)
            else:
                assert False, "given --bed argument %s is not a file. > 1 --bed argument was provided, so each element is expected to be a file" %(bed_file)
        # Demand RBP list to be provided.
        assert args.list_rbp_ids, "single BED files provided via --bed, but --rbp-list is not set. Please provide RBP IDs for each BED file in matching order"
    else:
        assert False, "no --bed arguments supplied"
    assert bed_list, "bed_list empty"

    # Is MEME >= v5 installed?
    if not args.meme_disable_check:
        assert benchlib.is_tool("meme"), "meme not in PATH"
        check, meme_version = benchlib.check_tool_version("meme -version", "5.0")
        assert check, "RBPBench requires meme version >= 5.0 (installed version: %s)" %(meme_version)

    if args.hk_gene_list:
        assert args.in_gtf, "set --goa requires --gtf GTF file"

    if args.run_goa:
        assert args.in_gtf, "set --goa requires --gtf GTF file"
        if args.goa_obo_mode == 3:
            assert args.goa_obo_file, "set --goa-obo-mode 3 requires --goa-obo-file"
            assert os.path.exists(args.goa_obo_file), "--goa-obo-file file \"%s\" not found" % (args.goa_obo_file)
        if args.goa_obo_file:
            assert args.goa_obo_mode == 3, "--goa-obo-file requires --goa-obo-mode 3"
        if args.goa_max_child is not None:
            assert args.goa_max_child >= 0, "set --goa-max-child expected to be >= 0"
        if args.goa_min_depth is not None:
            assert args.goa_min_depth >= 0, "set --goa-min-depth expected to be >= 0"

    # Check if MEME version is >= 5.5.4 (need to add fimo --no-pgc option to produce same results!).
    fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text"
    if not args.meme_disable_check:
        check, meme_version = benchlib.check_tool_version("meme -version", "5.5.4")
        args.meme_version = meme_version
        if check:
            fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text --no-pgc"
    if args.meme_no_pgc:
        fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text --no-pgc"

    # --report / --gtf checks.
    if args.tr_list:
        assert args.in_gtf, "--tr-list set, but --gtf not set. Please provide --gtf file"
    assert benchlib.boundary_check(args.gtf_feat_min_overlap, 1E-9, 1.0), "set --gtf-feat-min-overlap expected to be >= 1E-9 and <= 1.0"
    assert benchlib.boundary_check(args.gtf_intron_border_len, 1, 1000), "set --gtf-intron-border-eln expected to be >= 1 and <= 1000"
    if args.in_gtf:
        assert os.path.exists(args.in_gtf), "set --gtf file not found"
    assert benchlib.boundary_check(args.kmer_size, 1, 6), "set --kmer-size expected to be >= 1 and <= 6"
    assert benchlib.boundary_check(args.seq_var_kmer_size, 1, 5), "set --seq-var-kmer-size expected to be >= 1 and <= 5"

    # Additional BED regions for annotation.
    if args.add_annot_bed:
        assert os.path.exists(args.add_annot_bed), "--add-annot-bed file \"%s\" not found" % (args.add_annot_bed)
        # Check if BED format.
        benchlib.bed_check_format(args.add_annot_bed, param_str="--add-annot-bed")
        # Check annotation ID.
        args.add_annot_id = benchlib.remove_special_chars_from_str(args.add_annot_id)
        assert args.add_annot_id, "empty string after removing special chars from --add-annot-id. Please provide alphanumeric string for custom motif database ID (- or _ are okay as well)"

    """
    Regex checks.

    """

    regex_type = "sequence"
    regex = False

    if args.regex:

        if args.regex_type == 1:
            print("Check given --regex type ... ")
            if benchlib.looks_like_structure(args.regex):
                print("Given --regex looks like structure pattern ... ")
                regex_type = "structure"
            else:
                print("Given --regex assumed to be standard regex ... ")
                regex_type = "sequence"
        elif args.regex_type == 2:
            regex_type = "sequence"
        elif args.regex_type == 3:
            regex_type = "structure"
        else:
            assert False, "unexpected --regex-type value set (%d)" %(args.regex_type)

        if regex_type == "sequence":

            assert benchlib.is_valid_regex(args.regex), "given --regex \"%s\" is not a valid regular expression. Please provide valid expression" % (args.regex)

            # Remove , ; from given regex, to avoid motif_id format conflicts.
            regex = benchlib.remove_special_chars_from_str(args.regex,
                                                        reg_ex="[ ;]",
                                                        to_upper=False)  # [ :;\(\)]
            
            assert regex, "empty string after removing special chars ( ;) from --regex. Please provide a valid regex with DNA letters"

            # Convert IUPAC codes (if present) in regex to standard regex format.
            regex = benchlib.convert_iupac_in_regex(regex)

        elif regex_type == "structure":

            # Check structure pattern and format.
            regex = benchlib.check_format_str_pattern(args.regex)

        args.regex = regex


    """
    Check other list inputs and store.

    """

    rbp_ids_list = []
    if args.list_rbp_ids:
        assert not bed_path, "--rbp-list expects BED files provided in same order with --bed, not as BED folder"
        assert len(args.list_rbp_ids) == len(bed_list), "number of --rbp-list arguments != number of --bed arguments"
        for rbp_id in args.list_rbp_ids:
            # new_rbp_id = benchlib.remove_special_chars_from_str(rbp_id)
            # assert new_rbp_id, "empty string after removing special chars from --rbp-list argument %s. Please provide valid RBP ID (i.e., RBP ID from database)" %(rbp_id)
            rbp_ids_list.append(rbp_id)

    data_ids_list = []
    if args.list_data_ids:
        assert not bed_path, "--data-list expects BED files provided in same order with --bed, not as BED folder"
        assert len(args.list_data_ids) == len(bed_list), "number of --data-list arguments != number of --bed arguments"
        for data_id in args.list_data_ids:
            new_data_id = benchlib.remove_special_chars_from_str(data_id)
            assert new_data_id, "empty string after removing special chars from --data-list argument %s. Please provide alphanumeric string (- or _ are okay as well)" %(data_id)
            data_ids_list.append(new_data_id)
    else:
        args.data_id = benchlib.remove_special_chars_from_str(args.data_id)
        assert args.data_id, "empty string after removing special chars from --data-id. Please provide alphanumeric string for data ID (- or _ are okay as well)"

    method_ids_list = []
    if args.list_method_ids:
        assert not bed_path, "--method-list expects BED files provided in same order with --bed, not as BED folder"
        assert len(args.list_method_ids) == len(bed_list), "number of --method-list arguments != number of --bed arguments"
        for method_id in args.list_method_ids:
            new_method_id = benchlib.remove_special_chars_from_str(method_id)
            assert new_method_id, "empty string after removing special chars from --method-list argument %s. Please provide alphanumeric string (- or _ are okay as well)" %(method_id)
            method_ids_list.append(new_method_id)
    else:
        args.method_id = benchlib.remove_special_chars_from_str(args.method_id)
        assert args.method_id, "empty string after removing special chars from --method-id. Please provide alphanumeric string for method ID (- or _ are okay as well)"

    if not bed_path and len(bed_list) == 1:
        # Check if file is a table file and not a BED file.
        if benchlib.check_table_file(bed_list[0]):
            print("Table file provided via --bed ... ")
            print("Read in table file infos (including RBP, method, data IDs + BED file paths) ... ")
            dataset_list = benchlib.read_in_table_file(bed_list[0])
            rbp_ids_list = []
            method_ids_list = []
            data_ids_list = []
            bed_list = []
            for dataset in dataset_list:
                rbp_ids_list.append(dataset[0])
                method_ids_list.append(dataset[1])
                data_ids_list.append(dataset[2])
                bed_list.append(dataset[3])

    """
    Extracting RBP IDs from BED file names.

    """

    if not rbp_ids_list:
        print("No --rbp-list provided. Extracting RBP IDs from --bed file names ... ")

        for bed_file in bed_list:
            rbp_id = False
            if re.search(r".+\.bed", bed_file):
                m = re.search(r"(.+)\.bed", bed_file)
                fn = m.group(1)
                fnp = fn.split("_")
                rbp_id = fnp[0]
            else:
                assert False, "BED file %s with invalid ending (.bed expected)" %(bed_file)
            assert rbp_id, "no RBP ID extracted from BED file name %s" %(bed_file)
            rbp_ids_list.append(rbp_id)

    assert rbp_ids_list, "no RBP IDs list after pre-processing"
    assert len(rbp_ids_list) == len(bed_list), "number of RBP IDs != number of --bed files"

    """
    Library path.
    """
    benchlib_path = os.path.dirname(benchlib.__file__)
    db_path = benchlib_path + "/content"

    """
    FIMO nt frequencies.

    """
    fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt.txt"
    if args.fimo_ntf_mode == 2:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt_with_introns.txt"
    elif args.fimo_ntf_mode == 3:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.uniform.txt"

    if args.fimo_user_ntf_file:
        fimo_freqs_file = args.fimo_user_ntf_file
    assert os.path.exists(fimo_freqs_file), "set FIMO nucleotide frequencies file \"%s\" not found" % (fimo_freqs_file)

    """
    Motif database.
    """
    seq_motifs_db_file, str_motifs_db_file, rbp2ids_file, motif_db_str = specify_motif_db(args.motif_db, 
                                                                                          db_path=db_path)

    # Custom motif database: folder given.
    if args.custom_db:
        assert not args.custom_db_meme_xml, "--custom-db folder set incompatible with --custom-db-meme-xml. Provide custom motif database either via folder (--custom-db) or as single files"
        assert not args.custom_db_cm, "--custom-db folder set incompatible with --custom-db-cm. Provide custom motif database either via folder (--custom-db) or as single files"
        assert not args.custom_db_info, "--custom-db folder set incompatible with --custom-db-info. Provide custom motif database either via folder (--custom-db) or as single files"
        seq_motifs_db_file, str_motifs_db_file, rbp2ids_file = specify_custom_motif_db(args.custom_db)
        args.custom_db_id = benchlib.remove_special_chars_from_str(args.custom_db_id)
        assert args.custom_db_id, "empty string after removing special chars from --custom-db-id. Please provide alphanumeric string for custom motif database ID (- or _ are okay as well)"
        motif_db_str = args.custom_db_id
    # Custom motif database: single files given.
    if args.custom_db_meme_xml or args.custom_db_cm or args.custom_db_info:
        args.custom_db_id = benchlib.remove_special_chars_from_str(args.custom_db_id)
        assert args.custom_db_id, "empty string after removing special chars from --custom-db-id. Please provide alphanumeric string for custom motif database ID (- or _ are okay as well)"
        motif_db_str = args.custom_db_id
        assert not args.custom_db, "single custom motif database files provided not compatible with --custom-db. Provide custom motif database either via folder (--custom-db) or as single files"
        assert args.custom_db_info, "--custom-db-info needed to define custom motif database"
        rbp2ids_file = args.custom_db_info
        assert args.custom_db_meme_xml or args.custom_db_cm, "--custom-db-meme-xml and/or --custom-db-cm needed to define custom motif database"
        if args.custom_db_meme_xml:
            seq_motifs_db_file = args.custom_db_meme_xml
        else:
            seq_motifs_db_file = ""  # setting to empty string results in os.path.exists -> False.
        if args.custom_db_cm:
            str_motifs_db_file = args.custom_db_cm
        else:
            str_motifs_db_file = ""

    args.motif_db_str = motif_db_str

    """
    Get ID mappings

    name2ids_dic:
    RBP name -> motif IDs mapping
    id2type_dic:
    motif ID -> motif type (cm, meme_xml)
    id2org_dic (ignore for now):
    motif ID -> organism ID (so far only "human")

    """
    name2ids_dic, id2type_dic, name2gid_dic, name2fids_dic, id2pids_dic, id2exp_dic = benchlib.get_rbp_id_mappings(rbp2ids_file)

    # Motif ID -> RBP name (== RBP ID) mapping.
    id2name_dic = {}
    for rbp_id in name2ids_dic:
        for motif_id in name2ids_dic[rbp_id]:
            id2name_dic[motif_id] = rbp_id


    """
    Get MEME XML database motif blocks dictionary.

    """
    seq_motif_blocks_dic = {}
    if os.path.exists(seq_motifs_db_file):
        seq_motif_blocks_dic = benchlib.read_in_xml_motifs(seq_motifs_db_file,
                                                           empty_check=True)
    for motif_id in seq_motif_blocks_dic:
        assert motif_id in id2name_dic, "MEME XML motif ID \"%s\" not found in prior mapping. Please contact developers!" %(motif_id)

    """
    Get covariance model database motif blocks dictionary.

    """
    str_motif_blocks_dic = {}
    if str_motifs_db_file and os.path.exists(str_motifs_db_file):
        str_motif_blocks_dic = benchlib.read_in_cm_blocks(str_motifs_db_file,
                                                          empty_check=True)
        for motif_id in str_motif_blocks_dic:
            assert motif_id in id2name_dic, "Covariance model accession ID \"%s\" not found in prior mapping. Please contact developers!" %(motif_id)

    """
    Check if RBP IDs are in database.

    """
    for rbp_id in rbp_ids_list:
        assert rbp_id in name2ids_dic, "given RBP ID \"%s\" not in motif database. Please provide RBP IDs with motifs in database (check available RBPs via rbpbench info)" %(rbp_id)

    for rbp_id in rbp_ids_list:
        for motif_id in name2ids_dic[rbp_id]:
            found = 0
            if motif_id in seq_motif_blocks_dic:
                found += 1
            if motif_id in str_motif_blocks_dic:
                found += 1
            assert found, "no motifs loaded for RBP ID \"%s\". Please provide the respective motifs file" %(rbp_id)

    """
    Run ID definition.

    Use internal_id instead (to connect RBP and motif stats tables)
    import os
    import base64
    base64.urlsafe_b64encode(os.urandom(6)).decode()
    https://stackoverflow.com/questions/13484726/safe-enough-8-character-short-unique-random-string
    
    """

    run_id = "run_id"
    if args.run_id:
        run_id = benchlib.remove_special_chars_from_str(args.run_id)
        assert run_id, "empty string after removing special chars from --run-id. Please provide alphanumeric string for run ID (- or _ are okay as well)"
    # else:
    #     random_id = uuid.uuid4()
    #     run_id = str(random_id)
    print("Run ID:", run_id)

    """
    Output folders + files.

    """
    if not os.path.exists(args.out_folder):
        os.makedirs(args.out_folder)

    filtered_sites_bed = os.path.join(args.out_folder, "in_sites.filtered.bed")
    filtered_sites_fa = os.path.join(args.out_folder, "in_sites.filtered.fa")
    seq_motifs_xml = os.path.join(args.out_folder, "seq_motifs.xml")
    str_motifs_cm = os.path.join(args.out_folder, "str_motifs.cm")
    fimo_res_tsv = os.path.join(args.out_folder, "fimo_results.tsv")
    cmsearch_res_txt = os.path.join(args.out_folder, "cmsearch_results.txt")

    rbp_stats_out = os.path.join(args.out_folder, "rbp_hit_stats.tsv")
    motif_stats_out = os.path.join(args.out_folder, "motif_hit_stats.tsv")
    # con_res_out_tsv = os.path.join(args.out_folder, "contingency_table_results.tsv")
    settings_file = os.path.join(args.out_folder, "settings.rbpbench_batch.out")

    # Output unique motif hits.
    motif_hits_bed_out = args.out_folder + "/motif_hits.rbpbench_batch.bed"
    # Unique regex hits over all input datasets.
    regex_hits_bed_out = args.out_folder + "/regex_hits.rbpbench_batch.bed"

    # GOA results.
    goa_results_tsv = args.out_folder + "/goa_results.tsv"

    # Addition annotation regions output BED files.
    gene_regions_bed = args.out_folder + "/gene_regions.bed"
    promoter_regions_bed = args.out_folder + "/promoter_regions.bed"

    # Temp files.
    # random_id = uuid.uuid1()
    # tmp_out_bed = args.out_folder + "/" + str(random_id) + ".filtered_in.bed"
    out_tmp_bed = args.out_folder + "/rbp_motif_hit_regions.tmp.bed"
    cmstat_tmp_out = args.out_folder + "/cmstat_out.tmp.txt"

    # Delete if existing folder.
    if os.path.exists(fimo_res_tsv):
        os.remove(fimo_res_tsv)
    if os.path.exists(cmsearch_res_txt):
        os.remove(cmsearch_res_txt)

    """
    Get chromosome IDs from --genome.
    """
    print("Get --genome FASTA headers ... ")
    chr_ids_dic = benchlib.get_fasta_headers(args.in_genome)


    """
    Guess chromosome ID style.

    chr_style:
        1: chr1, chr2, ..., chrX, chrM
        2: 1, 2, ... , X, MT

    """
    print("Guess chromosome ID style (based on --genome FASTA headers) ... ")
    chr_style = benchlib.guess_chr_id_style(chr_ids_dic)

    """
    Region extensions.

    """

    # Process extension info.
    ext_up, ext_down = benchlib.get_ext_parts(args.ext_up_down)

    args.ext_up = ext_up
    args.ext_down = ext_down


    """
    Promoter region definition.
    
    """

    prom_ext_parts = args.prom_ext_up_down.split(",")
    c_prom_ext_parts = len(prom_ext_parts)
    assert c_prom_ext_parts == 2, "invalid --prom-ext argument provided (correct format: --prom-ext 1000,100, i.e., please provide two integers separated by a comma)"

    prom_ext_up = int(prom_ext_parts[0])
    prom_ext_down = int(prom_ext_parts[1])

    assert benchlib.boundary_check(prom_ext_up, 1, 100000), "set promoter upstream extension expected to be >= 1 and <= 100000"
    assert benchlib.boundary_check(prom_ext_down, 0, 100000), "set promoter downstream extension expected to be >= 0 and <= 100000"


    """
    If --gtf file, read in GTF info.

    """
    intron_exon_out_bed = args.out_folder + "/intron_exon_regions.tmp.bed"
    intron_exon_border_out_bed = args.out_folder + "/intron_exon_border_regions.tmp.bed"

    tr_ids_dic = {}  # Store (most prominent / representative) transcript ID -> gene ID.
    all_sets_occ_tr_ids_dic = {}  # Store only transcript IDs which are occupied in any dataset.

    # For gene/representative transcript region occupancy PCA plot.
    id2occ_list_dic = {}  # internal ID -> transcript/gene occupancy labels list.
    occ_mode = 1  # 1: binary occupancy, 2: continuous value occupancy.
    occ_all_tr = False  # Store all transcript occupancy values for PCA plot.
    id2occ_tr_ids_dic = {}  # internal ID -> transcript ID -> occupancy value.
    id2occ_hits_tr_ids_dic = {}  # internal ID -> transcript ID -> occupancy value (only sites with motif hits considered here).
    background_genes_dic = {}  # Store all gene IDs in GTF file to use as background genes.
    id2hk_gene_stats_dic = {}  # internal ID -> housekeeping gene stats dictionary.
    hk_tr_ids_dic = {}  # Store loaded transcript IDs that stem from housekeeping genes.
    gid2gio_dic = None
    tid2tio_dic = None
    add_annot_stats_dic = {}

    if args.in_gtf:
    
        # Get gene infos.
        print("Read in gene features from --gtf ... ")
        tr2gid_dic = {}
        tr_types_dic = {}  # Store transcript biotypes in GTF file.
        gid2gio_dic = benchlib.gtf_read_in_gene_infos(args.in_gtf,
                                                    tr2gid_dic=tr2gid_dic,
                                                    tr_types_dic=tr_types_dic,
                                                    check_chr_ids_dic=chr_ids_dic,
                                                    chr_style=chr_style,
                                                    empty_check=False)

        assert gid2gio_dic, "no gene infos read in from --gtf. Please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE)"
        c_gene_infos = len(gid2gio_dic)
        print("# gene features read in from --gtf:", c_gene_infos)

        for gene_id in gid2gio_dic:
            background_genes_dic[gene_id] = gid2gio_dic[gene_id].gene_name

        # If --tr-list given.
        if args.tr_list:
            assert os.path.exists(args.tr_list), "given --tr-list file \"%s\" not found" % (args.tr_list)
            tr_ids_dic = benchlib.read_ids_into_dic(args.tr_list,
                                                    check_dic=False)
            assert tr_ids_dic, "no IDs read in from provided --tr-list file. Please provide a valid IDs file (one ID per row)"
            for tr_id in tr_ids_dic:
                assert tr_id in tr2gid_dic, "transcript ID \"%s\" from provided --tr-list file does not appear in --gtf file. Please provide compatible IDs + files" %(tr_id)
                tr_ids_dic[tr_id] = tr2gid_dic[tr_id]
            print("# of transcript IDs (read in from --tr-list): ", len(tr_ids_dic))
        else:
            # Get most prominent transcripts from gene infos.
            tr_ids_dic = benchlib.select_mpts_from_gene_infos(gid2gio_dic,
                                    basic_tag=False,  # do not be strict (only_tsl=False too).
                                    ensembl_canonical_tag=False,
                                    prior_basic_tag=True,  # Prioritize basic tag transcript.
                                    prior_mane_select=True,  # mane select if set trumps all.
                                    prior_lncrna_primary_tag=True,
                                    only_tsl=False)
            assert tr_ids_dic, "most prominent transcript selection from gene infos failed. Please contact developers"
            print("# of transcript IDs (most prominent transcripts): ", len(tr_ids_dic))

        # Check exon order (return True if minus strand exon 1 is most downstream, not most upstream, which is the correct way).
        print("Check minus-strand exon order in --gtf ... ")
        correct_min_ex_order = benchlib.gtf_check_exon_order(args.in_gtf)
        if correct_min_ex_order:
            print("Correct order encountered ... ")
        else:
            print("Reverse order encountered ... ")
        # Get transcript infos.
        print("Read in transcript infos from --gtf ... ")
        tid2tio_dic = benchlib.gtf_read_in_transcript_infos(args.in_gtf, 
                                                            tr_ids_dic=tr_ids_dic,
                                                            correct_min_ex_order=correct_min_ex_order,
                                                            chr_style=chr_style,
                                                            empty_check=False)

        assert tid2tio_dic, "no transcript infos read in from --gtf. Please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE)"

        # (in)sanity checks.
        for tr_id in tr_ids_dic:
            assert tr_id in tid2tio_dic, "transcript ID %s not in tid2tio_dic"
        for tr_id in tid2tio_dic:
            assert tr_id in tr_ids_dic, "transcript ID %s not in tr_ids_dic"

        c_tr_infos = len(tid2tio_dic)
        print("# transcript features read in from --gtf:", c_tr_infos)

        # If housekeeping genes list given.
        if args.hk_gene_list:

            print("Read in --hk-gene-list ... ")

            assert os.path.exists(args.hk_gene_list), "given --hk-gene-list file \"%s\" not found" % (args.hk_gene_list)

            hk_gene_ids_dic = benchlib.read_ids_into_dic(args.hk_gene_list,
                                                        check_dic=False)

            assert hk_gene_ids_dic, "no gene IDs read in from --hk-gene-list. Please provide gene IDs in a text file, one ID per line"

            print("# of housekeeping gene IDs read in: %i" %(len(hk_gene_ids_dic)))

            c_hkg_in_gtf = 0
            for gid in hk_gene_ids_dic:
                if gid in gid2gio_dic:
                    c_hkg_in_gtf += 1

            assert c_hkg_in_gtf, "none of the provided housekeeping gene IDs found in --gtf. Please provide compatible --gtf and --hk-gene-list files"

            for tid in tid2tio_dic:
                gid = tid2tio_dic[tid].gene_id
                if gid in hk_gene_ids_dic:
                    hk_tr_ids_dic[tid] = gid

            assert hk_tr_ids_dic, "none of the loaded transcript IDs are associated with housekeeping genes. Please provide compatible transcript IDs and --hk-gene-list gene IDs"

            print("# of housekeeping gene-associated transcript IDs: %i" %(len(hk_tr_ids_dic)))


        # Extract exon + intron regions of selected transcripts from transcript infos.
        print("Output intron annotations to BED ... ")
        benchlib.output_transcript_info_intron_exon_to_bed(tid2tio_dic, intron_exon_out_bed,
                                            output_mode=3,  # only introns.
                                            report_counts=True,
                                            add_tr_id=True,  # new reg_id format: intron;ENST000006666
                                            add_numbers=True,  # new reg_id format: intron;ENST000006666;1-2
                                            number_format=1,  # format intron;ENST000006666;1-2
                                            empty_check=False)

        # Custom transcript biotypes for upset plot.
        custom_annot_dic = None
        if args.tr_types_list:
            for tr_type in args.tr_types_list:
                assert tr_type in tr_types_dic, "given transcript biotype ID \"%s\" not found in GTF file"
                custom_annot_dic[tr_type] = tr_type

        # Append detailed exon annotations (CDS, UTR, transcript biotypes) to intron annotations.
        print("Output exon annotations to BED ... ")
        benchlib.output_exon_annotations(tid2tio_dic, intron_exon_out_bed,
                                         custom_annot_dic=custom_annot_dic,
                                         add_numbers=True,
                                         append=True)

        print("Output exon + intron border regions ... ")
        benchlib.exon_intron_border_regions_to_bed(tid2tio_dic, intron_exon_border_out_bed,
                                            tr_ids_dic=None,
                                            intron_border_len=args.gtf_intron_border_len,
                                            ei_border_len=50)

        """
        Output additional annotations to BED files.

        """

        print("Output gene regions to BED ... ")

        benchlib.output_gene_regions_to_bed(gid2gio_dic, gene_regions_bed,
                                            add_annot_stats_dic=add_annot_stats_dic)

        print("Output putative promoter regions to BED ... ")

        benchlib.output_promoter_regions_to_bed(tid2tio_dic, promoter_regions_bed,
                                        prom_min_tr_len=args.prom_min_tr_len,
                                        prom_mrna_only=args.prom_mrna_only,
                                        prom_both_str=args.prom_both_str,
                                        mrna_biotype_label="protein_coding",
                                        prom_ext_up=prom_ext_up,
                                        prom_ext_down=prom_ext_down,
                                        add_annot_stats_dic=add_annot_stats_dic)


        if args.goa_bg_gene_list:

            print("Read in background genes from --goa-gb-gene-list ... ")

            assert os.path.exists(args.goa_bg_gene_list), "given --goa-bg-gene-list file \"%s\" not found" % (args.goa_bg_gene_list)

            bg_gene_ids_dic = benchlib.read_ids_into_dic(args.goa_bg_gene_list,
                                                         check_dic=False)

            print("# of gene IDs read in: %i" %(len(bg_gene_ids_dic)))
            print("Filter background genes by --gtf genes ... ")

            new_background_genes_dic = {}
            for gene_id in bg_gene_ids_dic:
                if gene_id in gid2gio_dic:
                    new_background_genes_dic[gene_id] = gid2gio_dic[gene_id].gene_name
            
            assert new_background_genes_dic, "given --goa-gb-gene-list gene IDs not found in --gtf. Please provide compatible --gtf and --goa-bg-gene-list files"

            print("# of background genes before filtering: %i" %(len(background_genes_dic)))
            print("# of background genes after filtering:  %i" %(len(new_background_genes_dic)))

            background_genes_dic = new_background_genes_dic


    """
    Output files + write headers.

    """
    # RBP batch stats file.
    OUTRBPSTATS = open(rbp_stats_out,"w")
    rbp_stats_header = "data_id\tmethod_id\trun_id\tmotif_db\trbp_id\tc_regions\tmean_reg_len\tmedian_reg_len\tmin_reg_len\tmax_reg_len\t"
    rbp_stats_header += "called_reg_size\teffective_reg_size\tc_reg_with_hits\tperc_reg_with_hits\t"
    rbp_stats_header += "c_motif_hits\tc_uniq_motif_hits\tc_uniq_motif_nts\tperc_uniq_motif_nts_cal_reg\tperc_uniq_motif_nts_eff_reg\tuniq_motif_hits_cal_1000nt\t"
    rbp_stats_header += "uniq_motif_hits_eff_1000nt\twc_pval\twc_rbc_eff_size\twc_cl_eff_size\tseq_motif_ids\tseq_motif_hits\tstr_motif_ids\tstr_motif_hits\tinternal_id\n"
    OUTRBPSTATS.write(rbp_stats_header)

    # Motif batch stats file.
    OUTMTFSTATS = open(motif_stats_out,"w")
    motif_stats_header = "data_id\tmethod_id\trun_id\tmotif_db\tregion_id\trbp_id\tmotif_id\tchr_id\tgen_s\tgen_e\tstrand\tregion_s\tregion_e\tregion_len\t"
    motif_stats_header += "uniq_count\tfimo_score\tfimo_pval\tcms_score\tcms_eval\tmatched_seq\tinternal_id\n"
    OUTMTFSTATS.write(motif_stats_header)

    args.internal_id = []

    # Unique motif regions BED.
    motif_reg_dic = {}
    # Unique regex regions BED.
    regex_reg_dic = {}

    # Unstranded option.
    if args.unstranded:
        print("WARNING: --unstranded enabled. Using both strands for each --in BED region ... ")
        if ext_up != ext_down:
            print("asymmetric --ext and --unstranded set. Extend plus strand and use corresponding minus strand ... ")

    print("Run motif search for each dataset ... ")

    """
    Hypothesis test modes.

    """
    # Wilcoxon rank-sum test / Mann Whitney U test mode.
    wrs_alt_hypo = "greater"
    if args.wrs_mode == 1:
        wrs_alt_hypo = "greater"
        # print("Check if motif-containing regions have significantly higher scores ... ")
    elif args.wrs_mode == 2:
        wrs_alt_hypo = "less"
        # print("Check if motif-containing regions have significantly lower scores ... ")
    else:
        assert False, "Invalid Wilcoxon rank-sum (Mann Whitney U) test mode: %i" %(args.wrs_mode)

    # Fisher exact test mode.
    fisher_alt_hypo = "greater"
    if args.fisher_mode == 1:
        fisher_alt_hypo = "greater"
        # print("Fisher mode = 1, reporting significantly overrepresented co-occurrences ... ")
    elif args.fisher_mode == 2:
        fisher_alt_hypo = "two-sided"
        # print("Fisher mode = 2, reporting significantly over- AND underrepresented co-occurrences ... ")
    elif args.fisher_mode == 3:
        fisher_alt_hypo = "less"
        # print("Fisher mode = 3, reporting significantly underrepresented co-occurrences ... ")
    else:
        assert False, "Invalid Fisher mode: %i" %(args.fisher_mode)

    """
    Run motif search for each BED / RBP ID combination.

    """
    call_dic = {}
    annot_dic = {}  # Store all occurring annotations as keys.
    id2c_regions_dic = {}  # Internal ID to number of sequences / regions in input BED file + numbers with hits, so format: [c_all, c_with_hits]
    id2reg_annot_dic = {}  # Store internal ID to region annotation dictionary (all sites, with and without hits).
    id2hit_reg_annot_dic = {}  # Store internal ID to region annotation dictionary (only sites with hits).
    id2infos_dic = {}  # Store internal ID to list containing RBP ID, data ID, method ID, database ID, BED file path.
    id2motif_enrich_stats_dic = {}  # Store internal ID to RBP motif enrichment stats dictionary.
    id2wc_pval_dic = {}  # Store internal ID to Wilcoxon rank-sum test p-value and effect sizes.
    id2regex_stats_dic = {}  # Store internal ID to regex stats dictionary.
    # id2kmer_dic = {}  # Store internal ID to k-mer dictionary.
    c_all_hits = 0  # Sum up hits for all RBPs.
    kmer_freqs_ll = []  # For PCA plot.
    kmer_list = []  # For PCA plot.
    dataset_ids_list = []  # For PCA plot.
    internal_ids_list = []
    add_motif_db_info = False  # Whether to add the motif database string to the dataset IDs in the plots.
    # dataset_idx = 0  # For PCA plot.
    seq_len_stats_ll = []  # For sequence length stats in report.
    seq_feat_ll = []  # For sequence feature stats inreport.
    seq_var_ll = []  # For sequence variation stats in report.
    seq_var_kmer_l = []  # For sequence variation stats in report.
    ei_ol_stats_dic = {}
    # Dataset IDs to additional annotation stats dictionary.
    dsid2add_annot_stats_dic = {}
    # Settings for all datasets.
    if add_annot_stats_dic:
        dsid2add_annot_stats_dic["general"] = {"c_genes": add_annot_stats_dic["c_genes"], 
                                            "c_promoters": add_annot_stats_dic["c_promoters"], 
                                            "c_filt_min_tr_len": add_annot_stats_dic["c_filt_min_tr_len"], 
                                            "c_filt_mrna_only": add_annot_stats_dic["c_filt_mrna_only"],
                                            "prom_ext_up": prom_ext_up,
                                            "prom_ext_down": prom_ext_down}

    for idx, bed_file in enumerate(bed_list):

        rbp_id = rbp_ids_list[idx]
        bed_file_path = bed_file
        if bed_path:
            bed_file_path = bed_path + "/" + bed_file

        data_id = args.data_id
        if data_ids_list:
            data_id = data_ids_list[idx]
        method_id = args.method_id
        if method_ids_list:
            method_id = method_ids_list[idx]

        # Motif IDs for search.
        loaded_motif_ids_dic = {}
        for motif_id in name2ids_dic[rbp_id]:
            loaded_motif_ids_dic[motif_id] = motif_db_str

        # Store motif IDs for search.
        # search_rbps_dic = {}
        seq_rbps_dic = {}
        str_rbps_dic = {}
        motif_id2idx_dic = {} # motif ID -> list index.

        print("")
        print("RBP ID:        ", rbp_id)
        print("BED file:      ", bed_file)
        print("# of motif IDs:", len(loaded_motif_ids_dic))
        print("Motif IDs:     ", loaded_motif_ids_dic)
        internal_id = base64.urlsafe_b64encode(os.urandom(6)).decode()
        args.internal_id.append(internal_id)
        print("Data ID:       ", data_id)
        print("Method ID:     ", method_id)
        print("Internal ID:   ", internal_id)

        id2infos_dic[internal_id] = [rbp_id, data_id, method_id, motif_db_str, bed_file_path]

        rbp = benchlib.RBP(rbp_id, internal_id)

        for motif_id in name2ids_dic[rbp_id]:
            if id2type_dic[motif_id] == "meme_xml":
                rbp.seq_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.seq_motif_ids) - 1
                rbp.seq_motif_hits.append(0)
                seq_rbps_dic[rbp_id] = 1
            else:
                rbp.str_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.str_motif_ids) - 1
                rbp.str_motif_hits.append(0)
                str_rbps_dic[rbp_id] = 1

        """
        Filter / extend --in genomic regions BED file.

        """
        print("Preprocess --in sites ... ")
        reg2sc_dic = {}
        reg_stats_dic = benchlib.bed_filter_extend_bed(bed_file_path, filtered_sites_bed,
                                            ext_up=ext_up,
                                            ext_down=ext_down,
                                            remove_dupl=True,
                                            reg2sc_dic=reg2sc_dic,
                                            score_col=args.bed_score_col,
                                            score_thr=args.bed_sc_thr,
                                            score_rev_filter=args.bed_sc_thr_rev_filter,
                                            chr_ids_dic=chr_ids_dic,
                                            use_region_ids=True,
                                            unstranded=args.unstranded)

        print("# --in regions pre-filtering:  ", reg_stats_dic["c_in"])
        print("# --in regions post-filtering: ", reg_stats_dic["c_out"])
        print("# regions with invalid chr_id: ", reg_stats_dic["c_chr_filter"])
        print("# duplicated regions removed:  ", reg_stats_dic["c_dupl_filter"])
        print("# regions filtered by score:   ", reg_stats_dic["c_sc_thr"])

        assert reg_stats_dic["c_out"], "no --in BED sites remain after chromosome ID (or optionally score) filtering"

        """
        Calculate effective size of genomic regions.
        
        """
        print("Calculate effective genomic region size ... ")
        eff_reg_size = benchlib.get_uniq_gen_size(filtered_sites_bed)

        print("Called region length sum:      ", reg_stats_dic["reg_len_sum"])
        print("Effective region length sum:   ", eff_reg_size)

        """
        Get genomic region sequences from --genome.

        """
        print("Extract sequences from --genome ... ")
        benchlib.bed_extract_sequences_from_fasta(filtered_sites_bed, 
                                                  args.in_genome, filtered_sites_fa,
                                                  print_warnings=True)

        """
        Get FASTA sequences and sequence lengths.
        """

        out_seqs_dic = benchlib.read_fasta_into_dic(filtered_sites_fa,
                                                    dna=True,
                                                    all_uc=True,
                                                    id_check=True,
                                                    empty_check=False,
                                                    skip_n_seqs=False)

        assert out_seqs_dic, "no sequences extracted from FASTA file for --in BED sites. Make sure to use compatible FASTA/BED files!"

        # Effective number of regions used for motif search.
        c_regions = len(out_seqs_dic)
        if args.unstranded and not args.unstranded_ct:
            # Check if sequences are even.
            assert not len(out_seqs_dic) % 2, "# of --in regions should be an even number since --unstranded is set. Please contact developers"
            c_regions = len(out_seqs_dic) // 2

        # Called region size.
        called_reg_size = 0
        len_list = []
        for seq_id in out_seqs_dic:
            seq_len = len(out_seqs_dic[seq_id])
            called_reg_size += seq_len
            len_list.append(seq_len)

        # Length statistics.
        reg_len_median = statistics.median(len_list)
        reg_len_mean = statistics.mean(len_list)
        reg_len_mean = round(reg_len_mean, 2)
        reg_len_min = min(len_list)
        reg_len_max = max(len_list)

        """
        ====================================
        RUN SEQUENCE MOTIF SEARCH WITH FIMO.
        ====================================

        """
        fimo_hits_list = []

        if seq_rbps_dic:

            """
            Print motifs to file.

            """

            print("Output motifs to XML ... ")
            out_str, c_added_motifs = benchlib.blocks_to_xml_string(seq_motif_blocks_dic, loaded_motif_ids_dic)

            benchlib.output_string_to_file(out_str, seq_motifs_xml)

            """
            Run FIMO on sequences + motifs.

            """

            print("Run FIMO ... ")
            benchlib.run_fast_fimo(filtered_sites_fa, seq_motifs_xml, fimo_res_tsv,
                        pval_thr=args.fimo_pval,
                        nt_freqs_file=fimo_freqs_file,
                        call_dic=call_dic,
                        params=fimo_params,
                        error_check=False)

            """
            Read in FIMO hits.

            """

            assert os.path.exists(fimo_res_tsv), "FIMO output file fimo.tsv %s does not exist! There must have been an error while running FIMO (invalid xml file provided?)" %(fimo_res_tsv)

            print("Read in FIMO results ... ")
            fimo_hits_list = benchlib.read_in_fimo_results(fimo_res_tsv,
                                                           only_best_hits=args.greatest_hits)

            c_fimo_hits = len(fimo_hits_list)
            print("# of FIMO motif hits:", c_fimo_hits)


        """
        =========================================
        RUN STRUCTURE MOTIF SEARCH WITH CMSEARCH.
        =========================================

        """
        cmsearch_hits_list = []

        if str_rbps_dic:
            
            print("Output covariance models to .cm ... ")
            benchlib.output_cm_blocks_to_file(str_motif_blocks_dic, loaded_motif_ids_dic, str_motifs_cm)

            # Run cmsearch.
            print("Run cmsearch ... ")
            cmsh_mode = ""
            if args.cmsearch_mode == 1:
                cmsh_mode = "--default"
            elif args.cmsearch_mode == 2:
                cmsh_mode = "--max"
            else:
                assert False, "invalid --cmsearch-mode %i set" %(args.cmsearch_mode)
            cmsh_params = "-g --tformat fasta --toponly --incT %s -T %s %s" %(args.cmsearch_bs, args.cmsearch_bs, cmsh_mode)

            benchlib.run_cmsearch(filtered_sites_fa, str_motifs_cm, cmsearch_res_txt,
                                  error_check=False,
                                  call_dic=call_dic,
                                  params=cmsh_params)
            # Read in hits.
            print("Read in cmsearch results ... ")
            cmsearch_hits_list, c_cms_hits = benchlib.read_in_cmsearch_results(cmsearch_res_txt,
                                                                               only_best_hits=args.greatest_hits)

            print("# of cmsearch motif hits:", c_cms_hits)

        """
        ==========================
        ADDITIONAL --regex SEARCH.
        ==========================

        regions_with_regex_dic:
            region -> regex_c_region

        unique_regex_dic:
            regex_region -> c_regex_region
        """

        regions_with_regex_dic = {}
        unique_regex_dic = {}
        # Dictionaries needed for Fisher test.
        region_rbp_binds_dic = {}
        # RBP idx: 0, regex idx: 1
        rid2rbpidx2hcp_dic = {}  # region_id -> rbp_idx -> motif hit center position(s)

        if args.regex:

            step_size_one = False
            if args.regex_search_mode == 1:
                step_size_one = True
            elif args.regex_search_mode == 2:
                step_size_one = False
            else:
                assert False, "invalid --regex-search-mode %i set" %(args.regex_search_mode)

            print("Run search for --regex \"%s\" ... " %(regex))
            regex_hits_list = benchlib.get_regex_hits(regex, regex, out_seqs_dic,
                                                      step_size_one=step_size_one,
                                                      regex_type=regex_type,
                                                      regex_spacer_min=args.regex_spacer_min,
                                                      regex_spacer_max=args.regex_spacer_max,
                                                      regex_min_gc=args.regex_min_gc,
                                                      regex_max_gu=args.regex_max_gu,
                                                      use_motif_regex_id=True)

            # Store regions with regex motif hits.
            for rh in regex_hits_list:

                if rh.seq_name in regions_with_regex_dic:
                    regions_with_regex_dic[rh.seq_name] += 1
                else:
                    regions_with_regex_dic[rh.seq_name] = 1 

                rh_str = repr(rh) # genomic regex region string.

                if rh_str in unique_regex_dic:
                    unique_regex_dic[rh_str] += 1
                else:
                    unique_regex_dic[rh_str] = 1

                # Store regex hit as BED.
                hit_id = "%s:%s-%s(%s)" %(rh.chr_id, str(rh.start), str(rh.end), rh.strand)
                if hit_id not in regex_reg_dic:
                    bed_row = "%s\t%i\t%i\t%s\t0\t%s" %(rh.chr_id, rh.start-1, rh.end, regex, rh.strand)
                    regex_reg_dic[hit_id] = bed_row

            # Init dictionaries for Fisher test.
            for reg_id in out_seqs_dic:
                assert reg_id in reg2sc_dic, "region ID \"%s\" from out_seqs_dic not found in reg2sc_dic" %(reg_id)
                if args.unstranded and not args.unstranded_ct:
                    core_id = benchlib.reg_get_core_id(reg_id)  # If --unstranded (and not --unstranded-ct), get core ID to count region as one.
                    if core_id not in region_rbp_binds_dic:
                        region_rbp_binds_dic[core_id] = [False, False]
                        rid2rbpidx2hcp_dic[core_id] = {}
                else:
                    region_rbp_binds_dic[reg_id] = [False, False]
                    rid2rbpidx2hcp_dic[reg_id] = {}

            # Populate dictionaties fimo + cmsearch + regex hits.
            rbp_idx = 0

            for fh in fimo_hits_list:

                region_id = fh.seq_name

                # Center position of motif hit.
                motif_hit_s = fh.seq_s - 1
                motif_hit_e = fh.seq_e
                center_pos = benchlib.get_center_position(motif_hit_s, motif_hit_e)

                if args.unstranded and not args.unstranded_ct:
                    core_id = benchlib.reg_get_core_id(region_id)
                    region_rbp_binds_dic[core_id][rbp_idx] = True

                    if rbp_idx not in rid2rbpidx2hcp_dic[core_id]:
                        rid2rbpidx2hcp_dic[core_id][rbp_idx] = [center_pos]
                    else:
                        rid2rbpidx2hcp_dic[core_id][rbp_idx].append(center_pos)
                else:
                    region_rbp_binds_dic[region_id][rbp_idx] = True

                    if rbp_idx not in rid2rbpidx2hcp_dic[region_id]:
                        rid2rbpidx2hcp_dic[region_id][rbp_idx] = [center_pos]
                    else:
                        rid2rbpidx2hcp_dic[region_id][rbp_idx].append(center_pos)

            for cmsh in cmsearch_hits_list:

                region_id = cmsh.seq_name

                # Center position of motif hit.
                motif_hit_s = cmsh.seq_s - 1
                motif_hit_e = cmsh.seq_e
                center_pos = benchlib.get_center_position(motif_hit_s, motif_hit_e)

                if args.unstranded and not args.unstranded_ct:
                    core_id = benchlib.reg_get_core_id(region_id)
                    region_rbp_binds_dic[core_id][rbp_idx] = True

                    if rbp_idx not in rid2rbpidx2hcp_dic[core_id]:
                        rid2rbpidx2hcp_dic[core_id][rbp_idx] = [center_pos]
                    else:
                        rid2rbpidx2hcp_dic[core_id][rbp_idx].append(center_pos)
                else:
                    region_rbp_binds_dic[region_id][rbp_idx] = True

                    if rbp_idx not in rid2rbpidx2hcp_dic[region_id]:
                        rid2rbpidx2hcp_dic[region_id][rbp_idx] = [center_pos]
                    else:
                        rid2rbpidx2hcp_dic[region_id][rbp_idx].append(center_pos)

            rbp_idx = 1
            for rh in regex_hits_list:

                region_id = rh.seq_name

                # Center position of motif hit.
                motif_hit_s = rh.seq_s - 1
                motif_hit_e = rh.seq_e
                center_pos = benchlib.get_center_position(motif_hit_s, motif_hit_e)

                if args.unstranded and not args.unstranded_ct:
                    core_id = benchlib.reg_get_core_id(region_id)
                    region_rbp_binds_dic[core_id][rbp_idx] = True

                    if rbp_idx not in rid2rbpidx2hcp_dic[core_id]:
                        rid2rbpidx2hcp_dic[core_id][rbp_idx] = [center_pos]
                    else:
                        rid2rbpidx2hcp_dic[core_id][rbp_idx].append(center_pos)
                else:
                    region_rbp_binds_dic[region_id][rbp_idx] = True

                    if rbp_idx not in rid2rbpidx2hcp_dic[region_id]:
                        rid2rbpidx2hcp_dic[region_id][rbp_idx] = [center_pos]
                    else:
                        rid2rbpidx2hcp_dic[region_id][rbp_idx].append(center_pos)


        """
        Store regions with motif hits (and hit counts).
        This tells us, how many input regions have motif hits (+ how many hits).

        Also store the unique motif hit regions (and hit counts).

        regions_with_motifs_dic:
            region -> motif_c_region

        unique_motifs_dic:
            motif_region -> c_motif_region

        """

        regions_with_motifs_dic = {}
        unique_motifs_dic = {}

        # Store regions with sequence motifs.
        for fh in fimo_hits_list:

            if fh.seq_name in regions_with_motifs_dic:
                regions_with_motifs_dic[fh.seq_name] += 1
            else:
                regions_with_motifs_dic[fh.seq_name] = 1 

            fh_str = repr(fh) # genomic motif region string.

            if fh_str in unique_motifs_dic:
                unique_motifs_dic[fh_str] += 1
            else:
                unique_motifs_dic[fh_str] = 1

        # Store regions with structure motifs.
        for cmsh in cmsearch_hits_list:

            if cmsh.seq_name in regions_with_motifs_dic:
                regions_with_motifs_dic[cmsh.seq_name] += 1
            else:
                regions_with_motifs_dic[cmsh.seq_name] = 1 

            cmsh_str = repr(cmsh) # genomic motif region string.

            if cmsh_str in unique_motifs_dic:
                unique_motifs_dic[cmsh_str] += 1
            else:
                unique_motifs_dic[cmsh_str] = 1

        """
        Store infos in RBP object.

        """
        # number of --in regions with RBP motif hits.
        c_hit_reg = len(regions_with_motifs_dic)
        if args.unstranded and not args.unstranded_ct:
            c_hit_reg = 0
            seen_ids_dic = {}
            for seq_id in regions_with_motifs_dic:
                core_id = benchlib.reg_get_core_id(seq_id)
                if core_id not in seen_ids_dic:
                    c_hit_reg += 1
                    seen_ids_dic[core_id] = 1

        # number of motif hits on --in regions in total.
        c_motif_hits = 0
        for reg_id in regions_with_motifs_dic:
            c_motif_hits += regions_with_motifs_dic[reg_id]
        
        rbp.c_hit_reg = c_hit_reg
        rbp.c_motif_hits = c_motif_hits
        # % hit regions over all regions (i.e. how many input regions contain >= 1 RBP motif).
        rbp.perc_hit_reg = (rbp.c_hit_reg / c_regions) * 100

        # Store number of regions for internal ID.
        id2c_regions_dic[internal_id] = [c_regions, c_hit_reg]

        """
        Get unique motif hits.

        """
        rbp.c_uniq_motif_hits = len(unique_motifs_dic)
        # Store individual motif unique hits.
        for motif_str_repr in unique_motifs_dic:
            motif_id = benchlib.get_motif_id_from_str_repr(motif_str_repr)
            idx = motif_id2idx_dic[motif_id]
            if id2type_dic[motif_id] == "meme_xml":
                rbp.seq_motif_hits[idx] += 1
            else:
                rbp.str_motif_hits[idx] += 1

        """
        Number of motif nucleotides over called + effective region size.

        """

        print("Calculate effective motif region sizes ... ")

        # Output unique motif hit regions (sequence or structure) to BED.
        eff_motif_reg_size = 0
        if unique_motifs_dic:
            benchlib.batch_output_motif_hits_to_bed(unique_motifs_dic, out_tmp_bed,
                                                    one_based_start=True)
            # Calculate effective motif region size.
            eff_motif_reg_size = benchlib.get_uniq_gen_size(out_tmp_bed)

        # Number of unique motif nucleotides.
        rbp.c_uniq_motif_nts = eff_motif_reg_size
        # % unique motif nts over effective region length.
        rbp.perc_uniq_motif_nts_eff_reg = (eff_motif_reg_size / eff_reg_size) * 100
        # % unique motif nts over called region length.
        rbp.perc_uniq_motif_nts_cal_reg = (eff_motif_reg_size / called_reg_size) * 100
        # Number of unique motif hits per effective 1000 nt.
        rbp.uniq_motif_hits_eff_1000nt  = rbp.c_uniq_motif_hits / (eff_reg_size / 1000)
        # Number of unique motif hits per called 1000 nt.
        rbp.uniq_motif_hits_cal_1000nt  = rbp.c_uniq_motif_hits / (called_reg_size / 1000)

        print("# --in regions for motif search:", c_regions)
        print("Called genomic region size:     ", called_reg_size)
        print("Effective genomic region size:  ", eff_reg_size)

        """
        Motif enrichment test:
        Are motifs enriched in higher scoring sites?

        """

        print("Calculate Wilcoxon rank-sum test statistics ... ")

        # Check if all scores same (e.g. 0).
        reg_scores_dic = {}
        for reg_id in reg2sc_dic:
            reg_scores_dic[reg2sc_dic[reg_id]] = 1
        if len(reg_scores_dic) == 1:
            print("WARNING: all site scores identical. Reported p-values meaningless! (i.e., equal 1.0)")

        hit_reg_scores = []
        non_hit_reg_scores = []
        seen_ids_dic = {}

        for reg_id in reg2sc_dic:
            reg_sc = reg2sc_dic[reg_id] # float value.
            # If --unstranded, count regions only once as hit or no hit.
            if args.unstranded and not args.unstranded_ct:
                core_id = benchlib.reg_get_core_id(reg_id)
                reg_id1 = core_id + "(+)"
                reg_id2 = core_id + "(-)"
                if core_id not in seen_ids_dic:
                    if reg_id1 in regions_with_motifs_dic or reg_id2 in regions_with_motifs_dic:
                        hit_reg_scores.append(reg_sc)
                    else:
                        non_hit_reg_scores.append(reg_sc)
                    seen_ids_dic[core_id] = 1
            else:
                if reg_id in regions_with_motifs_dic:
                    hit_reg_scores.append(reg_sc)
                else:
                    non_hit_reg_scores.append(reg_sc)

        wc_pval = 1.0
        wc_rbc_es = 0.0
        wc_cl_es = 0.0
        # In case no regions without motif hits.
        if not non_hit_reg_scores:
            print("WARNING: all input regions contain motifs. Adding dummy scores (median) ... ")
            dummy_val = statistics.median(hit_reg_scores)
            hit_reg_scores.append(dummy_val)
            non_hit_reg_scores.append(dummy_val)

        print("# hit regions:    ", len(hit_reg_scores))
        print("# non-hit regions:", len(non_hit_reg_scores))

        if unique_motifs_dic:
            wc_stat, wc_pval = mannwhitneyu(hit_reg_scores, non_hit_reg_scores, alternative=wrs_alt_hypo)

            wc_pval = benchlib.round_to_n_significant_digits_v2(wc_pval, 4)
            rbp.wc_pval = wc_pval
            # Get effect sizes.
            wc_rbc_es, wc_cl_es = benchlib.get_eff_sizes(hit_reg_scores, non_hit_reg_scores, wc_stat,
                                                         round=True, round_n=4)

            rbp.wc_rbc_es = wc_rbc_es
            rbp.wc_cl_es = wc_cl_es

        print("Compact hit stats (RBP ID, # unique hits, Wilcoxon p-value, effect sizes (RBC, CL)):")
        print("%s\t%i\t%s (%s, %s)" %(rbp_id, rbp.c_uniq_motif_hits, str(wc_pval), str(wc_rbc_es), str(wc_cl_es)))

        # Store RBP Wilcoxon rank-sum test p-value + effect sizes.
        id2wc_pval_dic[internal_id] = [str(wc_pval), str(wc_rbc_es), str(wc_cl_es)]

        """
        Motif enrichment test for regex motifs.
        
        """
        if args.regex:
                
            print("Calculate Wilcoxon rank-sum test statistics for --regex ... ")

            hit_reg_scores = []
            non_hit_reg_scores = []
            seen_ids_dic = {}

            for reg_id in reg2sc_dic:
                reg_sc = reg2sc_dic[reg_id] # float value.
                # If --unstranded, count regions only once as hit or no hit.
                if args.unstranded and not args.unstranded_ct:
                    core_id = benchlib.reg_get_core_id(reg_id)
                    reg_id1 = core_id + "(+)"
                    reg_id2 = core_id + "(-)"
                    if core_id not in seen_ids_dic:
                        if reg_id1 in regions_with_regex_dic or reg_id2 in regions_with_regex_dic:
                            hit_reg_scores.append(reg_sc)
                        else:
                            non_hit_reg_scores.append(reg_sc)
                        seen_ids_dic[core_id] = 1
                else:
                    if reg_id in regions_with_regex_dic:
                        hit_reg_scores.append(reg_sc)
                    else:
                        non_hit_reg_scores.append(reg_sc)

            wc_pval = 1.0
            wc_rbc_es = 0.0
            wc_cl_es = 0.0
            # In case no regions without motif hits.
            if not non_hit_reg_scores:
                print("WARNING: all input regions contain motifs. Adding dummy scores (median) ... ")
                dummy_val = statistics.median(hit_reg_scores)
                hit_reg_scores.append(dummy_val)
                non_hit_reg_scores.append(dummy_val)

            print("# hit regions:    ", len(hit_reg_scores))
            print("# non-hit regions:", len(non_hit_reg_scores))

            c_uniq_regex_hits = len(unique_regex_dic)

            if unique_regex_dic:
                wc_stat, wc_pval = mannwhitneyu(hit_reg_scores, non_hit_reg_scores, alternative=wrs_alt_hypo)
                wc_pval = benchlib.round_to_n_significant_digits_v2(wc_pval, 4)
                # Get effect sizes.
                wc_rbc_es, wc_cl_es = benchlib.get_eff_sizes(hit_reg_scores, non_hit_reg_scores, wc_stat,
                                                             round=True, round_n=4)

            print("Compact hit stats (RBP ID, # unique hits, Wilcoxon p-value, effect sizes (RBC, CL)):")
            print("regex\t%i\t%s (%s, %s)" %(c_uniq_regex_hits, str(wc_pval), str(wc_rbc_es), str(wc_cl_es)))

            c_regex_hit_reg = len(hit_reg_scores)
            c_regex_no_hit_reg = len(non_hit_reg_scores)

            id2regex_stats_dic[internal_id] = [c_regex_hit_reg, c_regex_no_hit_reg, c_uniq_regex_hits, wc_pval, wc_rbc_es, wc_cl_es]

            # Calculate Fisher exact test for regex vs RBP hits.
            print("Calculate Fisher exact test statistics for --regex ... ")

            table, avg_min_dist, perc_close_hits = benchlib.make_contingency_table_2x2_v2(
                                                        region_rbp_binds_dic, 1, 0,
                                                        rid2rbpidx2hcp_dic,
                                                        max_motif_dist=args.max_motif_dist)

            odds_ratio, p_value = fisher_exact(table, alternative=fisher_alt_hypo)

            p_value = benchlib.round_to_n_significant_digits_v2(p_value, 4)

            id2regex_stats_dic[internal_id].append(avg_min_dist)
            id2regex_stats_dic[internal_id].append(perc_close_hits)
            id2regex_stats_dic[internal_id].append(str(table))
            id2regex_stats_dic[internal_id].append(str(p_value))

        """
        Output RBP hit stats (one row per RBP).

        """
        c_reg_with_hits = rbp.c_hit_reg
        perc_reg_with_hits = rbp.perc_hit_reg
        c_motif_hits = rbp.c_motif_hits
        c_uniq_motif_hits = rbp.c_uniq_motif_hits
        c_uniq_motif_nts = rbp.c_uniq_motif_nts
        perc_uniq_motif_nts_cal_reg = rbp.perc_uniq_motif_nts_cal_reg
        perc_uniq_motif_nts_eff_reg = rbp.perc_uniq_motif_nts_eff_reg
        uniq_motif_hits_cal_1000nt = rbp.uniq_motif_hits_cal_1000nt
        uniq_motif_hits_eff_1000nt = rbp.uniq_motif_hits_eff_1000nt
        wc_pval = rbp.wc_pval
        wc_rbc_es = rbp.wc_rbc_es
        wc_cl_es = rbp.wc_cl_es

        seq_motif_hits = ",".join(str(hc) for hc in rbp.seq_motif_hits)
        str_motif_hits = ",".join(str(hc) for hc in rbp.str_motif_hits)
        seq_motif_ids = ",".join(rbp.seq_motif_ids)
        str_motif_ids = ",".join(rbp.str_motif_ids)
        if not seq_motif_hits:
            seq_motif_hits = "-"
        if not str_motif_hits:
            str_motif_hits = "-"
        if not seq_motif_ids:
            seq_motif_ids = "-"
        if not str_motif_ids:
            str_motif_ids = "-"

        row_str = data_id + "\t"
        row_str += method_id + "\t"
        row_str += run_id + "\t"
        row_str += motif_db_str + "\t"
        row_str += rbp_id + "\t"

        row_str += str(c_regions) + "\t"

        row_str += str(reg_len_mean) + "\t"
        row_str += str(reg_len_median) + "\t"
        row_str += str(reg_len_min) + "\t"
        row_str += str(reg_len_max) + "\t"

        row_str += str(called_reg_size) + "\t"
        row_str += str(eff_reg_size) + "\t"

        row_str += str(c_reg_with_hits) + "\t"
        row_str += str(perc_reg_with_hits) + "\t"

        row_str += str(c_motif_hits) + "\t"
        row_str += str(c_uniq_motif_hits) + "\t"
        row_str += str(c_uniq_motif_nts) + "\t"
        row_str += str(perc_uniq_motif_nts_cal_reg) + "\t"
        row_str += str(perc_uniq_motif_nts_eff_reg) + "\t"
        row_str += str(uniq_motif_hits_cal_1000nt) + "\t"
        row_str += str(uniq_motif_hits_eff_1000nt) + "\t"
        row_str += str(wc_pval) + "\t"
        row_str += str(wc_rbc_es) + "\t"
        row_str += str(wc_cl_es) + "\t"
        row_str += seq_motif_ids + "\t"
        row_str += seq_motif_hits + "\t"
        row_str += str_motif_ids + "\t"
        row_str += str_motif_hits + "\t"
        row_str += internal_id + "\n"

        OUTRBPSTATS.write(row_str)

        # For motif enrichment stats table in HTML report.
        id2motif_enrich_stats_dic[internal_id] = [c_reg_with_hits, perc_reg_with_hits, c_uniq_motif_hits, wc_pval, wc_rbc_es, wc_cl_es]

        """
        Output motif region stats (one row per motif hit).
        Report ALL motif hits, plus report how many times one genomic 
        motif hit occurs (uniq_count). 

        """

        for fh in fimo_hits_list:

            region_id = fh.seq_name
            region_len = benchlib.get_length_from_seq_name(fh.seq_name)
            # Genomic motif region string.
            fh_str = repr(fh) 
            uniq_count = unique_motifs_dic[fh_str]

            # Get matched sequence.
            matched_seq = benchlib.get_matched_seq(fh.seq_name, out_seqs_dic, fh.seq_s, fh.seq_e)
            if not matched_seq:
                matched_seq = "-"

            # Store motif hit as BED.
            hit_id = "%s:%s-%s(%s)%s;%s" %(fh.chr_id, str(fh.start), str(fh.end), fh.strand, fh.motif_id, internal_id)
            if hit_id not in motif_reg_dic:
                bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t%s\t%s\t-1.0\t-1.0\t-\t%s" %(fh.chr_id, fh.start-1, fh.end, rbp_id, fh.motif_id, uniq_count, method_id, data_id, fh.strand, str(fh.score), str(fh.pval), matched_seq)
                motif_reg_dic[hit_id] = bed_row

            row_str = data_id + "\t"
            row_str += method_id + "\t"
            row_str += run_id + "\t"
            row_str += motif_db_str + "\t"
            row_str += region_id + "\t"
            row_str += rbp_id + "\t"
            row_str += fh.motif_id + "\t"
            row_str += fh.chr_id + "\t"
            row_str += str(fh.start) + "\t"
            row_str += str(fh.end) + "\t"
            row_str += fh.strand + "\t"
            row_str += str(fh.seq_s) + "\t"
            row_str += str(fh.seq_e) + "\t"
            row_str += str(region_len) + "\t"
            row_str += str(uniq_count) + "\t"
            row_str += str(fh.score) + "\t"
            row_str += str(fh.pval) + "\t"
            row_str += "-\t"
            row_str += "-\t"
            row_str += matched_seq + "\t"
            row_str += internal_id + "\n"

            OUTMTFSTATS.write(row_str)

        for cmsh in cmsearch_hits_list:

            region_id = cmsh.seq_name
            region_len = benchlib.get_length_from_seq_name(cmsh.seq_name)
            # Genomic motif region string.
            cmsh_str = repr(cmsh)
            uniq_count = unique_motifs_dic[cmsh_str]

            # Get matched sequence.
            matched_seq = benchlib.get_matched_seq(cmsh.seq_name, out_seqs_dic, cmsh.seq_s, cmsh.seq_e)
            if not matched_seq:
                matched_seq = "-"

            # Store motif hit as BED.
            hit_id = "%s:%s-%s(%s)%s;%s" %(cmsh.chr_id, str(cmsh.start), str(cmsh.end), cmsh.strand, cmsh.motif_id, internal_id)
            if hit_id not in motif_reg_dic:
                bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t-1.0\t-1.0\t%s\t%s\t-\t%s" %(cmsh.chr_id, cmsh.start-1, cmsh.end, rbp_id, cmsh.motif_id, uniq_count, cmsh.strand, method_id, data_id, str(cmsh.score), str(cmsh.e_value), matched_seq)
                motif_reg_dic[hit_id] = bed_row

            row_str = data_id + "\t"
            row_str += method_id + "\t"
            row_str += run_id + "\t"
            row_str += motif_db_str + "\t"
            row_str += region_id + "\t"
            row_str += rbp_id + "\t"
            row_str += cmsh.motif_id + "\t"
            row_str += cmsh.chr_id + "\t"
            row_str += str(cmsh.start) + "\t"
            row_str += str(cmsh.end) + "\t"
            row_str += cmsh.strand + "\t"
            row_str += str(cmsh.seq_s) + "\t"
            row_str += str(cmsh.seq_e) + "\t"
            row_str += str(region_len) + "\t"
            row_str += str(uniq_count) + "\t"
            row_str += "-\t"
            row_str += "-\t"
            row_str += str(cmsh.score) + "\t"
            row_str += str(cmsh.e_value) + "\t"
            row_str += matched_seq + "\t"
            row_str += internal_id + "\n"

            OUTMTFSTATS.write(row_str)

        """
        if --gtf is set, overlap regions with annotations.
        
        """
        c_all_hits += c_hit_reg

        if args.in_gtf:
            print("--gtf file set. Overlap regions with gene annotations ... ")

            # reg_ids_dic needed to complete reg2annot_dic (for regions with no overlaps).
            reg_ids_dic = {}
            if args.unstranded and not args.unstranded_ct:
                for seq_id, seq in sorted(out_seqs_dic.items()):
                    core_id = benchlib.reg_get_core_id(seq_id)
                    reg_ids_dic[core_id] = 1
            else:
                for seq_id, seq in sorted(out_seqs_dic.items()):
                    reg_ids_dic[seq_id] = 1

            # Overlap with input regions.
            print("Overlap annotations with input regions ... ")
            params = "-s -wo -f %s" %(str(args.gtf_feat_min_overlap))
            overlap_annotations_bed = args.out_folder + "/overlap_annotations.tmp.bed"
            benchlib.bed_intersect_files(filtered_sites_bed, intron_exon_out_bed, 
                                         overlap_annotations_bed,
                                         params=params)
            
            reg2annot_dic = benchlib.get_region_annotations(overlap_annotations_bed,
                                                            tid2tio_dic,
                                                            reg_ids_dic=reg_ids_dic)

            """
            Overlap additional annotation regions with input regions.

            """

            # print("Overlap input regions with gene regions ... ")

            c_outside_genes = benchlib.bed_intersect_files_count_lines(
                                            filtered_sites_bed, gene_regions_bed,
                                            params="-s -v")

            # print("# input regions NOT overlapping with gene regions: %i" %(c_outside_genes))

            # print("Overlap input regions with promoter regions ... ")

            c_inside_prom = benchlib.bed_intersect_files_count_lines(
                                            filtered_sites_bed, promoter_regions_bed,
                                            params="-s -u")

            # print("# input regions overlapping with promoter regions: %i" %(c_inside_prom))

            c_add_annot = 0

            if args.add_annot_bed:

                # print("Overlap input regions with --add-annot-bed regions ... ")

                params = "-s -u"  # count overlaps with --add-annot-bed regions.
                if args.add_annot_comp:
                    params = "-s -v"  # count non-overlaps with --add-annot-bed regions.

                c_add_annot = benchlib.bed_intersect_files_count_lines(
                                                    filtered_sites_bed, args.add_annot_bed,
                                                    params=params)

                # if args.add_annot_comp:
                #     print("# input regions NOT overlapping with --add-annot-bed regions: %i" %(c_add_annot))
                # else:
                #     print("# input regions overlapping with --add-annot-bed regions: %i" %(c_add_annot))

            dsid2add_annot_stats_dic[internal_id] = {"c_outside_genes": c_outside_genes,
                                                     "c_inside_prom": c_inside_prom,
                                                     "c_add_annot": c_add_annot,
                                                     "c_regions": c_regions}

            # reg2annot_dic format right now: 
            # reg_id -> [annot_id, tr_id, border_dist, us_ds_label, annot_reg_len, exon_intron_nr]

            # Store annotations for all regions.
            occ_tr_ids_dic = {}  # Transcript IDs occupied by dataset regions.
            occ_hits_tr_ids_dic = {}  # Transcript IDs occupied by dataset regions with motif hits.
            id2reg_annot_dic[internal_id] = {}
            
            for reg_id in reg2annot_dic:
                annot = reg2annot_dic[reg_id][0]
                tr_id = reg2annot_dic[reg_id][1]
                # Extend to 9 elements format.
                reg2annot_dic[reg_id].append("-")
                reg2annot_dic[reg_id].append("-")
                reg2annot_dic[reg_id].append("-")

                if tr_id:  # if tr_id is not False (if region does not overlap with transcripts/genes).

                    # Record transcript IDs occupied by all dataset regions.
                    if tr_id in all_sets_occ_tr_ids_dic:
                        all_sets_occ_tr_ids_dic[tr_id] += 1
                    else:
                        all_sets_occ_tr_ids_dic[tr_id] = 1

                    # Record transcript IDs occupied by regions from this specific dataset.
                    if tr_id in occ_tr_ids_dic:
                        occ_tr_ids_dic[tr_id] += 1
                    else:
                        occ_tr_ids_dic[tr_id] = 1

                annot_dic[annot] = 1
                if annot in id2reg_annot_dic[internal_id]:
                    id2reg_annot_dic[internal_id][annot] += 1
                else:
                    id2reg_annot_dic[internal_id][annot] = 1
            
            # Store annotations for regions with hits only.
            id2hit_reg_annot_dic[internal_id] = {}
            for reg_id in regions_with_motifs_dic:
                annot = reg2annot_dic[reg_id][0]
                tr_id = reg2annot_dic[reg_id][1]

                if tr_id:
                    if tr_id in occ_hits_tr_ids_dic:
                        occ_hits_tr_ids_dic[tr_id] += 1
                    else:
                        occ_hits_tr_ids_dic[tr_id] = 1

                if annot in id2hit_reg_annot_dic[internal_id]:
                    id2hit_reg_annot_dic[internal_id][annot] += 1
                else:
                    id2hit_reg_annot_dic[internal_id][annot] = 1

            id2occ_hits_tr_ids_dic[internal_id] = occ_hits_tr_ids_dic

            # Record transcript ID occupancies for all transcripts in --gtf annotation.
            if occ_all_tr:
                id2occ_list_dic[internal_id] = []
                for tr_id in sorted(tr_ids_dic):
                    if tr_id in occ_tr_ids_dic:
                        if occ_mode == 1:
                            id2occ_list_dic[internal_id].append(1)
                        elif occ_mode == 2:
                            id2occ_list_dic[internal_id].append(occ_tr_ids_dic[tr_id])
                    else:
                        id2occ_list_dic[internal_id].append(0)
            else:
                id2occ_tr_ids_dic[internal_id] = occ_tr_ids_dic

            # Record amount of occupied housekeeping genes (actually transcripts) in internal_id dataset.
            if id2occ_tr_ids_dic[internal_id] and hk_tr_ids_dic:
                id2hk_gene_stats_dic[internal_id] = [0, 0, 0]
                c_tids = 0
                c_hk_tids = 0
                for tid in id2occ_tr_ids_dic[internal_id]:
                    c_tids += 1
                    if tid in hk_tr_ids_dic:
                        c_hk_tids += 1

                id2hk_gene_stats_dic[internal_id][0] = c_tids
                id2hk_gene_stats_dic[internal_id][1] = c_hk_tids
                if c_tids:
                    id2hk_gene_stats_dic[internal_id][2] = round((c_hk_tids / c_tids) * 100, 2)

            # --unstranded option, count both strands only once.
            if args.unstranded and not args.unstranded_ct:
                for internal_id in id2reg_annot_dic:
                    for annot in id2reg_annot_dic[internal_id]:
                        annot_c = id2reg_annot_dic[internal_id][annot]
                        assert not annot_c % 2, "# of regions with annotation %s should be even since --unstranded is set" %(annot)
                        new_annot_c = annot_c // 2
                        id2reg_annot_dic[internal_id][annot] = new_annot_c
                for internal_id in id2hit_reg_annot_dic:
                    for annot in id2hit_reg_annot_dic[internal_id]:
                        annot_c = id2hit_reg_annot_dic[internal_id][annot]
                        assert not annot_c % 2, "# of regions with annotation %s should be even since --unstranded is set" %(annot)
                        new_annot_c = annot_c // 2
                        id2hit_reg_annot_dic[internal_id][annot] = new_annot_c

            # # Overlap with input regions.
            # print("Overlap exon + intron regions with input regions ... ")
            # params = "-s -f %s -F %s -e -wb " %(str(args.gtf_eib_min_overlap), str(args.gtf_eib_min_overlap))
            # overlap_ei_regions_bed = args.out_folder + "/overlap_ei_regions.tmp.bed"
            # benchlib.bed_intersect_files(intron_exon_out_bed, filtered_sites_bed,
            #                              overlap_ei_regions_bed,
            #                              params=params)

            # # Get exon + intron overlap counts.
            # c_exon_ol, c_intron_ol = benchlib.get_intron_exon_ol_counts(overlap_ei_regions_bed)
            # # print("# exon overlap regions:   %i" %(c_exon_ol))
            # # print("# intron overlap regions: %i" %(c_intron_ol))

            # print("Overlap exon + intron border regions with input regions ... ")
            # params = "-s -f %s -F %s -e -wb " %(str(args.gtf_eib_min_overlap), str(args.gtf_eib_min_overlap))
            # overlap_eib_regions_bed = args.out_folder + "/overlap_eib_regions.tmp.bed"
            # benchlib.bed_intersect_files(intron_exon_border_out_bed, filtered_sites_bed,
            #                              overlap_eib_regions_bed,
            #                              params=params)

            # # Get other overlap counts.
            # c_eib_ol, c_us_ib_ol, c_ds_ib_ol = benchlib.get_eib_ol_counts(overlap_eib_regions_bed)

            """
            Get exon-intron + border overlap statistics from reg2annot_dic.

            """

            ei_border_len = 50
            args.ei_border_len = ei_border_len

            eib_annot_c_dic = {
                "exonic" : 0,
                "intronic" : 0,
                "intergenic" : 0,
                "eib" : 0,
                "us_ib_dist" : 0,
                "ds_ib_dist" : 0,
                "us_ib" : 0,
                "ds_ib" : 0,
                "first_exon" : 0,
                "last_exon" : 0,
                "single_exon" : 0
            }

            # reg2annot_dic format: 
            # reg_id -> [annot_id, tr_id, border_dist, us_ds_label, annot_reg_len, exon_intron_nr, gene_id, gene_name, tr_biotype]

            for reg_id in reg2annot_dic:
                benchlib.get_eib_annot_c(reg2annot_dic[reg_id], eib_annot_c_dic,
                                        ib_len=args.gtf_intron_border_len,
                                        eib_len=ei_border_len)

            c_exon_ol = eib_annot_c_dic["exonic"]
            c_intron_ol = eib_annot_c_dic["intronic"]
            c_intergenic = eib_annot_c_dic["intergenic"]
            c_eib_ol = eib_annot_c_dic["eib"]
            c_us_ib_ol = eib_annot_c_dic["us_ib"]
            c_ds_ib_ol = eib_annot_c_dic["ds_ib"]
            c_us_ib_dist_ol = eib_annot_c_dic["us_ib_dist"]
            c_ds_ib_dist_ol = eib_annot_c_dic["ds_ib_dist"]
            c_first_exon = eib_annot_c_dic["first_exon"]
            c_last_exon = eib_annot_c_dic["last_exon"]
            c_single_exon = eib_annot_c_dic["single_exon"]

            exon_intron_ol_stats = benchlib.ExonIntronOverlap(internal_id, c_regions,
                                                            c_exon_sites=c_exon_ol,
                                                            c_intron_sites=c_intron_ol,
                                                            c_intergenic_sites=c_intergenic,
                                                            c_us_ib_sites=c_us_ib_ol,
                                                            c_ds_ib_sites=c_ds_ib_ol,
                                                            c_us_ib_dist_sites=c_us_ib_dist_ol,
                                                            c_ds_ib_dist_sites=c_ds_ib_dist_ol,
                                                            c_eib_sites=c_eib_ol,
                                                            c_first_exon_sites=c_first_exon,
                                                            c_last_exon_sites=c_last_exon,
                                                            c_single_exon_sites=c_single_exon,
                                                            min_overlap=args.gtf_feat_min_overlap,
                                                            intron_border_len=args.gtf_intron_border_len,
                                                            ei_border_len=ei_border_len,
                                                            c_tr_ids=len(tid2tio_dic),
                                                            c_tr_ids_with_sites=len(occ_tr_ids_dic))
            
            ei_ol_stats_dic[internal_id] = exon_intron_ol_stats

        # Dataset ID.
        # dataset_idx += 1
        # dataset_id = str(dataset_idx) + "," + rbp_id + "," + motif_db_str + "," + method_id + "," + data_id
        if add_motif_db_info:
            dataset_id = rbp_id + "," + motif_db_str + "," + method_id + "," + data_id
        else:
            dataset_id = rbp_id + "," + method_id + "," + data_id

        dataset_ids_list.append(dataset_id)
        internal_ids_list.append(internal_id)

        # Get k-mer frequencies for all regions.
        print("Calculate k-mer frequencies ... ")

        # k-mer frequencies unaffected by --unstranded options.
        kmer_dic = benchlib.seqs_dic_count_kmer_freqs(out_seqs_dic, args.kmer_size, 
                                                      rna=False,
                                                      convert_to_uc=True,
                                                      skip_non_dic_keys=True,  # skip k-mers containing N-bases.
                                                      return_ratios=True)

        # Populate k-mers list.
        if not kmer_list:
            for kmer in sorted(kmer_dic):
                kmer_list.append(kmer)

        # Store the k-mer frequencies for later comparison plot in a list, sorted by key.
        kmer_freqs_list = []
        for kmer in sorted(kmer_dic):
            kmer_freqs_list.append(kmer_dic[kmer])
        kmer_freqs_ll.append(kmer_freqs_list)

        print("Get sequence length statistics ... ")

        # Sequence length statistics for out_seqs_dic.
        unstranded_len_stats = False
        if args.unstranded and not args.unstranded_ct:
            # In this case we want to count the two strands of a region as one region.
            unstranded_len_stats = True
        seq_len_stats_l = benchlib.get_sequence_length_statistics(out_seqs_dic,
                                                                  unstranded=unstranded_len_stats)
        # Add dataset ID to seq_len_stats_l.
        seq_len_stats_l.insert(0, dataset_id)
        seq_len_stats_ll.append(seq_len_stats_l)

        # Sequence complexities.
        if not args.no_comp_feat:

            print("Calculate sequence complexities ... ")

            # Calculate sequence complexities.
            seqs_entr_list = benchlib.seqs_dic_calc_entropies(out_seqs_dic,
                                                              k=args.seq_comp_k,
                                                              rna=False)
            mean_complexity = statistics.mean(seqs_entr_list)

            # Get sequence single nucleotide frequencies / counts.
            seqs_ntc_dic = benchlib.seqs_dic_count_nt_freqs(out_seqs_dic, rna=False)

            # Get single nucleotide percentages.
            seqs_ntp_dic = benchlib.ntc_dic_to_ratio_dic(seqs_ntc_dic, perc=True)
            
            # Get bi-nt percentages (AC, AG, AT, CG, CT, GT).
            seqs_bint_perc_dic = benchlib.get_bint_perc_from_ntc_dic(seqs_ntc_dic)

            seq_feat_l = [dataset_id, round(mean_complexity, 5)]
            for nt in sorted(seqs_ntp_dic):
                seq_feat_l.append(round(seqs_ntp_dic[nt], 2))
            for bint in sorted(seqs_bint_perc_dic):
                seq_feat_l.append(round(seqs_bint_perc_dic[bint], 2))
            seq_feat_ll.append(seq_feat_l)

        # Sequence variation stats.
        print("Calculate sequence variations statistics ... ")
        if args.seq_var_feat_mode == 1:

            kmer2site_ratio_dic, avg_site_ratio, pk_rat = benchlib.calculate_k_site_ratios(out_seqs_dic,
                                                                            k=args.seq_var_kmer_size,
                                                                            nucleotides=['A', 'C', 'G', 'T'],
                                                                            only_observed=True)

            seq_var_l = [dataset_id, avg_site_ratio, pk_rat]

            if not seq_var_kmer_l:
                for kmer, site_ratio in sorted(kmer2site_ratio_dic.items()):
                    seq_var_kmer_l.append(kmer)
            
            for kmer, site_ratio in sorted(kmer2site_ratio_dic.items()):
                seq_var_l.append(site_ratio)

            seq_var_ll.append(seq_var_l)

        elif args.seq_var_feat_mode == 2:

            single_cv_dic, avg_cv = benchlib.calculate_k_nucleotide_cv(out_seqs_dic,
                                                                    k=args.seq_var_kmer_size,
                                                                    nucleotides=['A', 'C', 'G', 'T'],
                                                                    only_observed=True)

            seq_var_l = [dataset_id, avg_cv]

            if not seq_var_kmer_l:
                for kmer, single_cv in sorted(single_cv_dic.items()):
                    seq_var_kmer_l.append(kmer)
            
            for kmer, single_cv in sorted(single_cv_dic.items()):
                seq_var_l.append(single_cv)

            seq_var_ll.append(seq_var_l)


    OUTRBPSTATS.close()
    OUTMTFSTATS.close()

    print("Output motif hits ... ")
    OUTBED = open(motif_hits_bed_out, "w")
    for hit_id in motif_reg_dic:
        OUTBED.write("%s\n" %(motif_reg_dic[hit_id]))
    OUTBED.close()

    # format: {'CDS': 66, 'intron': 8, "3'UTR": 7, "5'UTR": 2, .. }
    regex_annot_dic = {}

    if args.regex:
        print("Output regex motif hits ... ")
        OUTREGEX = open(regex_hits_bed_out,"w")
        for hit_id in regex_reg_dic:
            OUTREGEX.write("%s\n" %(regex_reg_dic[hit_id]))
        OUTREGEX.close()

        # Get regex hit region annotations.
        if args.in_gtf:

            print("Get regex hit region annotations ... ")

            params = "-s -wao -f %s" %(str(args.gtf_feat_min_overlap))
            overlap_regex_hit_annotations_bed = args.out_folder + "/overlap_regex_hit_annotations.tmp.bed"
            benchlib.bed_intersect_files(regex_hits_bed_out, intron_exon_out_bed, 
                                         overlap_regex_hit_annotations_bed,
                                         params=params)

            regex2annot_dic = benchlib.get_regex_hit_region_annotations(overlap_regex_hit_annotations_bed,
                                                                        tid2tio_dic=tid2tio_dic)

            for reg_id in regex2annot_dic:
                annot = regex2annot_dic[reg_id][0]
                if annot in regex_annot_dic:
                    regex_annot_dic[annot] += 1
                else:
                    regex_annot_dic[annot] = 1

    """
    Gene region occupancy stats only for genes occupied by input datasets.
    (i.e. if occ_all_tr == False).
    
    """

    if args.in_gtf and not occ_all_tr:

        for internal_id in id2infos_dic:

            id2occ_list_dic[internal_id] = []

            for tr_id in sorted(all_sets_occ_tr_ids_dic):
                if tr_id in id2occ_tr_ids_dic[internal_id]:
                    if occ_mode == 1:
                        id2occ_list_dic[internal_id].append(1)
                    elif occ_mode == 2:
                        id2occ_list_dic[internal_id].append(id2occ_tr_ids_dic[internal_id][tr_id])
                else:
                    id2occ_list_dic[internal_id].append(0)


    """
    Output gene region occupancy table.
    Infos included: gene ID, gene name, transcript ID, for each dataset the occupancy status 0,1,...
    
    all_sets_occ_tr_ids_dic:
        All occupied transcript IDs (over all input datasets), so a transcript ID in this
        dictionary is occupied by sites of >= 1 input dataset.

    """

    gene_occ_table_out = args.out_folder + "/gene_region_occupancies.tsv"

    if args.in_gtf:

        print("Output gene region occupancies ... ")

        OUTTAB = open(gene_occ_table_out,"w")
        OUTTAB.write("gene_id\tgene_name\ttranscript_id\t%s\n" %("\t".join(dataset_ids_list)))

        for idx, tr_id in enumerate(sorted(all_sets_occ_tr_ids_dic)):
            gene_id = tr2gid_dic[tr_id]
            gene_name = gid2gio_dic[gene_id].gene_name

            occ_list = []
            for internal_id in internal_ids_list:
                occ_list.append(id2occ_list_dic[internal_id][idx])
            
            occ_list_str = "\t".join(str(occ) for occ in occ_list)
            OUTTAB.write("%s\t%s\t%s\t%s\n" %(gene_id, gene_name, tr_id, occ_list_str))

        OUTTAB.close()


    """
    Ouput exon + intron overlap stats.
    
    """
    eib_ol_table_out = args.out_folder + "/exon_intron_overlap_stats.tsv"
    if ei_ol_stats_dic:
        OUTEIB = open(eib_ol_table_out,"w")
        OUTEIB.write("dataset_id\tperc_exons\tperc_introns\tperc_us_ib\tperc_ds_ib\tperc_dist_us_ib\tperc_dist_ds_ib\t"
                     "perc_eib\tperc_first_exon\tperc_last_exon\tperc_single_exon\t"
                     "c_regions\tc_exon_sites\tc_intron_sites\tc_us_ib_sites\tc_ds_ib_sites\tc_dist_us_ib_sites\tc_dist_ds_ib_sites\t"
                     "c_eib_sites\tc_first_exon_sites\tc_last_exon_sites\tc_single_exon_sites\t"
                     "c_tr_ids\tc_tr_ids_with_sites\n")

        for internal_id in ei_ol_stats_dic:
            combined_id = internal_id
            rbp_id = id2infos_dic[internal_id][0]
            data_id = id2infos_dic[internal_id][1]
            method_id = id2infos_dic[internal_id][2]
            database_id = id2infos_dic[internal_id][3]
            if add_motif_db_info:
                combined_id = rbp_id + "," + database_id + "," + method_id + "," + data_id
            else:
                combined_id = rbp_id + "," + method_id + "," + data_id

            ei_ol_stats = ei_ol_stats_dic[internal_id]
            exon_sites_perc = 0.0
            if ei_ol_stats.c_exon_sites and ei_ol_stats.c_input_sites:
                exon_sites_perc = round(ei_ol_stats.c_exon_sites / ei_ol_stats.c_input_sites * 100, 1)
            intron_sites_perc = 0.0
            if ei_ol_stats.c_intron_sites and ei_ol_stats.c_input_sites:
                intron_sites_perc = round(ei_ol_stats.c_intron_sites / ei_ol_stats.c_input_sites * 100, 1)
            us_ib_sites_perc = 0.0
            if ei_ol_stats.c_us_ib_sites and ei_ol_stats.c_input_sites:
                us_ib_sites_perc = round(ei_ol_stats.c_us_ib_sites / ei_ol_stats.c_input_sites * 100, 1)
            ds_ib_sites_perc = 0.0
            if ei_ol_stats.c_ds_ib_sites and ei_ol_stats.c_input_sites:
                ds_ib_sites_perc = round(ei_ol_stats.c_ds_ib_sites / ei_ol_stats.c_input_sites * 100, 1)
            us_ib_dist_sites_perc = 0.0
            if ei_ol_stats.c_us_ib_dist_sites and ei_ol_stats.c_input_sites:
                us_ib_dist_sites_perc = round(ei_ol_stats.c_us_ib_dist_sites / ei_ol_stats.c_input_sites * 100, 1)
            ds_ib_dist_sites_perc = 0.0
            if ei_ol_stats.c_ds_ib_dist_sites and ei_ol_stats.c_input_sites:
                ds_ib_dist_sites_perc = round(ei_ol_stats.c_ds_ib_dist_sites / ei_ol_stats.c_input_sites * 100, 1)
            first_exon_sites_perc = 0.0
            if ei_ol_stats.c_first_exon_sites and ei_ol_stats.c_input_sites:
                first_exon_sites_perc = round(ei_ol_stats.c_first_exon_sites / ei_ol_stats.c_input_sites * 100, 1)
            last_exon_sites_perc = 0.0
            if ei_ol_stats.c_last_exon_sites and ei_ol_stats.c_input_sites:
                last_exon_sites_perc = round(ei_ol_stats.c_last_exon_sites / ei_ol_stats.c_input_sites * 100, 1)
            single_exon_sites_perc = 0.0
            if ei_ol_stats.c_single_exon_sites and ei_ol_stats.c_input_sites:
                single_exon_sites_perc = round(ei_ol_stats.c_single_exon_sites / ei_ol_stats.c_input_sites * 100, 1)
            eib_sites_perc = 0.0
            if ei_ol_stats.c_eib_sites and ei_ol_stats.c_input_sites:
                eib_sites_perc = round(ei_ol_stats.c_eib_sites / ei_ol_stats.c_input_sites * 100, 1)
            # perc_min_overlap = round(args.gtf_feat_min_overlap * 100, 1)

            OUTEIB.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%i\t%i\t%i\t%i\t%i\t%i\t%i\t%i\t%i\t%i\t%i\t%i\t%i\n" %(
                                                                                combined_id, 
                                                                                str(exon_sites_perc), 
                                                                                str(intron_sites_perc), 
                                                                                str(us_ib_sites_perc), 
                                                                                str(ds_ib_sites_perc),
                                                                                str(us_ib_dist_sites_perc),
                                                                                str(ds_ib_dist_sites_perc),
                                                                                str(eib_sites_perc),
                                                                                str(first_exon_sites_perc),
                                                                                str(last_exon_sites_perc),
                                                                                str(single_exon_sites_perc),
                                                                                ei_ol_stats.c_input_sites, 
                                                                                ei_ol_stats.c_exon_sites, 
                                                                                ei_ol_stats.c_intron_sites, 
                                                                                ei_ol_stats.c_us_ib_sites, 
                                                                                ei_ol_stats.c_ds_ib_sites,
                                                                                ei_ol_stats.c_us_ib_dist_sites,
                                                                                ei_ol_stats.c_ds_ib_dist_sites,
                                                                                ei_ol_stats.c_eib_sites,
                                                                                ei_ol_stats.c_first_exon_sites,
                                                                                ei_ol_stats.c_last_exon_sites,
                                                                                ei_ol_stats.c_single_exon_sites,
                                                                                ei_ol_stats.c_tr_ids, 
                                                                                ei_ol_stats.c_tr_ids_with_sites))

        OUTEIB.close()



    """

    Run GOA.

    1)
    Get genes occupied in all datasets for GOA.

    If --goa-only-cooc is set, use id2occ_hits_tr_ids_dic instead of id2occ_tr_ids_dic 
    to get occupied gene list (i.e., only genes with sites containing motif hits instead 
    of genes with sites).

    2)
    Filter target + background genes and run GOA.

    """

    goa_results_df = False
    goa_stats_dic = {}
    propagate_counts = True

    if args.run_goa:

        print("")
        print("Prepare GOA ... ")
        print("Get target genes list for GOA ...")

        c_datasets = len(dataset_ids_list)
        assert c_datasets, "no datasets found to determine genes occupied in all datasets"
        assert len(id2occ_tr_ids_dic) == c_datasets, "number of datasets and id2occ_tr_ids_dic entries do not match"
        assert len(id2occ_hits_tr_ids_dic) == c_datasets, "number of datasets and id2occ_hits_tr_ids_dic entries do not match"

        tid2c_dic = {}
        if args.goa_only_cooc:
            print("--goa-only-cooc set:")
            print("Use only genes covered by regions with motif hits from all datasets ...")
            for internal_id in id2occ_hits_tr_ids_dic:
                for tid in id2occ_hits_tr_ids_dic[internal_id]:
                    if tid in tid2c_dic:
                        tid2c_dic[tid] += 1
                    else:
                        tid2c_dic[tid] = 1
        else:
            print("Use only genes covered by regions from all datasets ...")
            for internal_id in id2occ_tr_ids_dic:
                for tid in id2occ_tr_ids_dic[internal_id]:
                    if tid in tid2c_dic:
                        tid2c_dic[tid] += 1
                    else:
                        tid2c_dic[tid] = 1

        occ_tids_list = []
        for tid in tid2c_dic:
            if tid2c_dic[tid] == c_datasets:
                occ_tids_list.append(tid)

        target_genes_dic = {}
        for tid in occ_tids_list:
            assert tid in tr2gid_dic, "transcript ID %s not found in tr2gid_dic" %(tid)
            gid = tr2gid_dic[tid]
            target_genes_dic[gid] = 1

        if args.goa_only_cooc:
            print("# genes covered by regions with motif hits from all datasets: %i" %(len(target_genes_dic)))
            assert target_genes_dic, "no target genes remain for GOA when accepting only genes covered by regions with motif hits from all datasets"
        else:
            print("# genes covered by regions from all datasets: %i" %(len(target_genes_dic)))
            assert target_genes_dic, "no target genes remain for GOA when accepting only genes covered by regions from all datasets"

        # Check if target genes are part of background gene list.

        print("Filter target genes by background gene list ... ")
        new_target_genes_dic = {}
        for gid in target_genes_dic:
            if gid in background_genes_dic:
                new_target_genes_dic[gid] = target_genes_dic[gid]
    
        print("# of target genes before filtering: %i" %(len(target_genes_dic)))
        print("# of target genes after filtering:  %i" %(len(new_target_genes_dic)))

        assert new_target_genes_dic, "no target genes remaining after filtering by background gene list. If --goa-gb-gene-list was given, please provide compatible --gtf and --goa-bg-gene-list files"
        target_genes_dic = new_target_genes_dic


        """
        GO term enrichment analysis.

        """

        print("")
        print("Run GOA ... ")

        # Write empty file.
        open(goa_results_tsv, "w").close()

        goa_stats_dic["c_target_genes_pre_filter"] = len(target_genes_dic)
        goa_stats_dic["c_background_genes_pre_filter"] = len(background_genes_dic)
        goa_stats_dic["pval_thr"] = args.goa_pval
        goa_stats_dic["goa_obo_mode"] = args.goa_obo_mode
        goa_stats_dic["propagate_counts"] = propagate_counts
        goa_stats_dic["excluded_terms"] = "-"
        goa_stats_dic["goa_filter_purified"] = args.goa_filter_purified
        goa_stats_dic["goa_max_child"] = args.goa_max_child
        goa_stats_dic["goa_min_depth"] = args.goa_min_depth
        goa_stats_dic["goa_only_cooc"] = args.goa_only_cooc

            
        local_gid2go_file = benchlib_path + "/content/ensembl_gene_id2go_ids.biomart.GRCh38.112.tsv.gz"
        local_obo_file = benchlib_path + "/content/go-basic.obo.gz"

        assert os.path.exists(local_gid2go_file), "local gene ID to GO ID file \"%s\" not found" %(local_gid2go_file)
        assert os.path.exists(local_obo_file), "local GO OBO file \"%s\" not found" %(local_obo_file)

        gid2go_file = local_gid2go_file
        if args.goa_gene2go_file:
            gid2go_file = args.goa_gene2go_file
            assert os.path.exists(gid2go_file), "provided --goa-gene2go-file \"%s\" not found" %(gid2go_file)
        goa_obo_file = local_obo_file
        if args.goa_obo_file and args.goa_obo_mode == 3:
            goa_obo_file = args.goa_obo_file
            assert os.path.exists(goa_obo_file), "provided --goa-obo-file \"%s\" not found" %(goa_obo_file)

        # Run GOA.
        goa_results_df = benchlib.run_go_analysis(target_genes_dic, background_genes_dic, 
                                                  gid2go_file, args.out_folder,
                                                  pval_thr=args.goa_pval,
                                                  excluded_terms = [],  # do not exclude any GO terms.
                                                  goa_obo_mode=args.goa_obo_mode,
                                                  propagate_counts=propagate_counts,
                                                  stats_dic=goa_stats_dic,
                                                  store_gene_names=True,
                                                  goa_obo_file=goa_obo_file)

        print("# of enriched (i.e., with significantly higher concentration) GO terms: %i" %(goa_stats_dic["c_sig_go_terms_e"]))
        print("# of purified (i.e., with significantly lower concentration) GO terms:  %i" %(goa_stats_dic["c_sig_go_terms_p"]))

        goa_results_df.to_csv(goa_results_tsv, sep="\t", index=False)
        print("")


    """
    Example id2reg_annot_dic:
    {'1wz3Cd6R': {'CDS': 66, 'intron': 8, "3'UTR": 7, "5'UTR": 2}, 'VoSg9-Mm': {'other (nc)RNA': 1, "3'UTR": 73, 'CDS': 10, 'lncRNA': 1, 'intron': 3}}

    """
    # print("id2infos_dic:")
    # print(id2infos_dic)
    # print("id2reg_annot_dic:")
    # print(id2reg_annot_dic)
    # print("dataset_ids_list:")
    # print(dataset_ids_list)
    # print("kmer_freqs_ll:")
    # print(kmer_freqs_ll)

    html_report_out = args.out_folder + "/" + "report.rbpbench_batch.html"
    if args.plot_abs_paths:
        html_report_out = os.path.abspath(args.out_folder) + "/" + "report.rbpbench_batch.html"

    assert dataset_ids_list, "no dataset IDs found for report creation"
    assert kmer_freqs_ll, "no k-mer frequencies found for report creation"
    assert kmer_list, "no k-mers found for report creation"

    plots_subfolder = "html_report_plots"
    benchlib_path = os.path.dirname(benchlib.__file__)

    gene_occ_cooc_plot = True
    if args.no_occ_heatmap:
        gene_occ_cooc_plot = False
    heatmap_cluster_olo = True
    if args.disable_heatmap_cluster_olo:
        heatmap_cluster_olo = False
    if len(dataset_ids_list) < 2:
        print("Only one input dataset. Disable gene regions similarity heat map ... ")
        gene_occ_cooc_plot = False

    print("Create HTML report ... ")
    benchlib.batch_generate_html_report(args, dataset_ids_list, kmer_list,
                                        kmer_freqs_ll,
                                        id2infos_dic, 
                                        id2reg_annot_dic, 
                                        id2hit_reg_annot_dic,
                                        benchlib_path,
                                        seq_len_stats_ll,
                                        seq_feat_ll=seq_feat_ll,
                                        seq_var_ll=seq_var_ll,
                                        seq_var_kmer_l=seq_var_kmer_l,
                                        html_report_out=html_report_out,
                                        id2hk_gene_stats_dic=id2hk_gene_stats_dic,
                                        id2motif_enrich_stats_dic=id2motif_enrich_stats_dic,
                                        id2regex_stats_dic=id2regex_stats_dic,
                                        regex_annot_dic=regex_annot_dic,
                                        id2occ_list_dic=id2occ_list_dic,
                                        gene_occ_cooc_plot=gene_occ_cooc_plot,
                                        ei_ol_stats_dic=ei_ol_stats_dic,
                                        add_motif_db_info=add_motif_db_info,
                                        heatmap_cluster_olo=heatmap_cluster_olo,
                                        goa_results_df=goa_results_df,
                                        goa_stats_dic=goa_stats_dic,
                                        dsid2add_annot_stats_dic=dsid2add_annot_stats_dic,
                                        plots_subfolder=plots_subfolder)

    """
    All annot:
    annot_dic[annot] = 1
    id2hit_reg_annot_dic[internal_id][annot] += 1
    id2reg_annot_dic[internal_id][annot] += 1
    id2infos_dic[internal_id] = [rbp_id, data_id, method_id, motif_db_str, bed_file_path]

    """

    # Genomic region annotation stats on region containing RBP motif hits.
    rbp_annot_out = args.out_folder + "/rbp_motif_region_annotation_stats.tsv"
    # Genomic region annotation stats on all regions (with or without RBP motif hits).
    all_annot_out = args.out_folder + "/all_region_annotation_stats.tsv"

    if args.in_gtf:

        OUTRBP = open(rbp_annot_out, "w")
        OUTALL = open(all_annot_out, "w")

        # List all annotations encountered in batch run.
        annot_list = []
        for annot in annot_dic:
            annot_list.append(annot)
        # Sort list alphabetically.
        annot_list = sorted(annot_list)

        # Write header.
        header_annot_str = "\t".join(annot_list)
        OUTRBP.write("rbp_id\tmethod_id\tdata_id\tc_hit_regions\tc_all_regions\tperc_hit_reg\twc_pval\twc_rbc_es\twc_cl_es\t%s\tinternal_id\n" %(header_annot_str))
        OUTALL.write("rbp_id\tmethod_id\tdata_id\tc_all_regions\t%s\tinternal_id\n" %(header_annot_str))

        for internal_id in id2reg_annot_dic:

            # All regions in dataset (== total annotation count).
            reg_counts = id2c_regions_dic[internal_id]
            c_all_regions = reg_counts[0]
            c_hit_regions = reg_counts[1]

            id_infos = id2infos_dic[internal_id]

            wc_pval = id2wc_pval_dic[internal_id][0]
            wc_rbc_es = id2wc_pval_dic[internal_id][1]
            wc_cl_es = id2wc_pval_dic[internal_id][2]

            rbp_id = id_infos[0]
            data_id = id_infos[1]
            method_id = id_infos[2]

            if add_motif_db_info:
                rbp_id = id_infos[0]
                motif_db_str = id_infos[1]
                data_id = id_infos[2]
                method_id = id_infos[3]

            id_rbp_annot_list = []
            for annot in annot_list:
                perc_annot = 0.0
                if annot in id2hit_reg_annot_dic[internal_id]:
                    # Calculate percentage of annotation counts over RBP motif hit annotation counts.
                    perc_annot = (id2hit_reg_annot_dic[internal_id][annot] / c_hit_regions) * 100
                id_rbp_annot_list.append(perc_annot)

            perc_rbp_annot_str = "\t".join(str(round(perc, 2)) for perc in id_rbp_annot_list)
            perc_hit_all = (c_hit_regions / c_all_regions) * 100
            perc_hit_all = str(round(perc_hit_all, 2))
            OUTRBP.write("%s\t%s\t%s\t%i\t%i\t%s\t%s\t%s\t%s\t%s\t%s\n" %(rbp_id, method_id, data_id, c_hit_regions, c_all_regions, perc_hit_all, wc_pval, wc_rbc_es, wc_cl_es, perc_rbp_annot_str, internal_id))

            id_all_annot_list = []
            for annot in annot_list:
                perc_annot = 0.0
                if annot in id2reg_annot_dic[internal_id]:
                    # Calculate percentage of annotation counts over all annotation counts.
                    perc_annot = (id2reg_annot_dic[internal_id][annot] / c_all_regions) * 100
                id_all_annot_list.append(perc_annot)

            perc_all_annot_str = "\t".join(str(round(perc, 2)) for perc in id_all_annot_list)
            OUTALL.write("%s\t%s\t%s\t%i\t%s\t%s\n" %(rbp_id, method_id, data_id, c_all_regions, perc_all_annot_str, internal_id))

        OUTRBP.close()
        OUTALL.close()


    """
    Output parameter settings.

    """
    # Output mode settings.
    print("")
    print("Output parameter settings ... ")
    SETOUT = open(settings_file, "w")
    for arg in vars(args):
        SETOUT.write("%s\t%s\n" %(arg, str(getattr(args, arg))))
    for call in call_dic:
        SETOUT.write("%s\t%s\n" %(call, call_dic[call]))
    SETOUT.close()

    """
    Take out the trash.

    """
    print("Delete .tmp files ... ")
    if os.path.exists(out_tmp_bed):
        os.remove(out_tmp_bed)
    if os.path.exists(cmstat_tmp_out):
        os.remove(cmstat_tmp_out)

    """
    Inform about outputs.

    """
    print("")
    print("OUTPUT FILES")
    print("============")
    print("")
    print("Run parameter settings:\n%s" %(settings_file))
    print("Motif hits .bed:\n%s" %(motif_hits_bed_out))
    print("RBP hit stats .tsv:\n%s" %(rbp_stats_out))
    print("Motif hit stats .tsv:\n%s" %(motif_stats_out))
    if ei_ol_stats_dic:
        print("Exon + intron overlap stats .tsv:\n%s" %(eib_ol_table_out))
    if args.run_goa:
        print("GO enrichment analysis results .tsv:\n%s" %(goa_results_tsv))
    if args.in_gtf:
        print("RBP motif region annotation stats .tsv:\n%s" %(rbp_annot_out))
        print("All region annotation stats .tsv:\n%s" %(all_annot_out))
        print("Gene region occupancies .tsv:\n%s" %(gene_occ_table_out))

    print("Batch report .html:\n%s" %(html_report_out))
    print("")


################################################################################

def main_searchregex(args):
    """
    Regex motif search.

    """

    print("Running for you in SEARCHREGEX mode ... ")

    assert os.path.exists(args.in_file), "--in file \"%s\" not found" % (args.in_file)

    regex_check_dic = {}

    if os.path.isfile(args.regex):

        regex_check_dic = benchlib.get_regexes_from_file(args.regex)

    else:

        # assert benchlib.is_valid_regex(args.regex), "invalid --regex \"%s\" given. Please provide a valid regex string" % (args.regex)
        
        regex_check_dic["regex"] = args.regex
    
    assert regex_check_dic, "no regexes found from --regex input"

    # Check regexes.
    print("Checking given regex(es) ... ")

    regex_dic = {}

    for regex_id in regex_check_dic:

        regex = regex_check_dic[regex_id]

        if benchlib.looks_like_structure(regex):

            print("Given regex \"%s\" looks like structure pattern ... " % regex)

            regex = benchlib.check_format_str_pattern(regex)

        else:

            print("Given regex \"%s\" assumed to be standard regex ... " % regex)

            assert benchlib.is_valid_regex(regex), "given regex \"%s\" is not a valid regular expression. Please provide valid expression" % (regex)

            regex = benchlib.convert_iupac_in_regex(regex)
        
        regex_dic[regex_id] = regex

    assert regex_dic, "no remaining regexes after regex checks"

    # Output files.
    if not os.path.exists(args.out_folder):
        os.makedirs(args.out_folder)
    in_seqs_fa = args.out_folder + "/in_sequences.fa"
    in_reg_bed = args.out_folder + "/in_regions.bed"
    motif_hits_bed_out = args.out_folder + "/motif_hits.rbpbench_searchregex.bed"
    regions_with_hit_counts_bed_out = args.out_folder + "/regions_with_hit_counts.bed"

    """
    Check if --in is BED or FASTA.
    Get sequences dictionary accordingly.
    
    """

    id_check = True
    if args.make_uniq_headers:
        id_check = False

    in_seqs_dic = {}
    bed_reg_dic = {}
    is_bed = False

    if benchlib.fasta_check_format(args.in_file):

        print("--in input file is FASTA. Read in sequences ... ")

        in_seqs_dic = benchlib.read_fasta_into_dic(args.in_file,
                                        dna=True,
                                        all_uc=True,
                                        id_check=id_check,
                                        empty_check=False,
                                        new_header_id=args.header_id,
                                        make_uniq_headers=args.make_uniq_headers,
                                        report=1,
                                        skip_n_seqs=False)

        assert in_seqs_dic, "no sequences read in from --in FASTA file. Make sure to provide a FASTA formatted file with DNA or RNA sequences"

    elif benchlib.bed_check_format(args.in_file, asserts=False):

        print("--in input file is BED. Extract region sequences from --genome FASTA file ... ")

        if not args.in_genome:
            assert False, "--in BED file provided, but no --genome FASTA file provided. Please provide a FASTA file with genomic sequences"
        assert os.path.exists(args.in_genome), "--genome FASTA file \"%s\" not found" % (args.in_genome)

        is_bed = True

        # Process extension info.
        ext_up, ext_down = benchlib.get_ext_parts(args.ext_up_down)

        bed_reg_dic = benchlib.bed_check_ids_output_bed(args.in_file, in_reg_bed,
                                                        id_check=id_check,
                                                        ext_up=ext_up,
                                                        ext_down=ext_down,
                                                        new_header_id=args.header_id,
                                                        make_uniq_headers=args.make_uniq_headers)

        # Remove index file if it exists to avoid using old index.
        fasta_index_file = in_seqs_fa + ".fai"
        if os.path.exists(fasta_index_file):
            os.remove(fasta_index_file)

        benchlib.bed_extract_sequences_from_fasta(in_reg_bed, args.in_genome, in_seqs_fa,
                                                  add_param="-name",  # new FASTA header format: bed_col4_id::chr21:45528055-45528135(-)
                                                  print_warnings=True)

        in_seqs_dic = benchlib.read_fasta_into_dic(in_seqs_fa,
                                                   dna=True,
                                                   all_uc=True,
                                                   name_bed=True,  # Makes sure that in_seqs_dic keys are BED column 4 IDs.
                                                   id_check=True,
                                                   empty_check=False,
                                                   skip_n_seqs=False)
        
        assert in_seqs_dic, "no sequences extracted from --in BED file. Make sure to provide a BED file with genomic regions and a compatible FASTA file with genomic sequences"

    else:
        assert False, "--in input file format not supported. Provide either FASTA or BED file"

    assert in_seqs_dic, "no sequences read in from --in FASTA file. Make sure to provide a FASTA formatted file with DNA or RNA sequences"


    """
    Search for regex motif hits in sequence dictionary.
    
    hits_dic format:
    {'seq1': [[0, 2, 'AT'], [4, 6, 'AG']]}

    bed_reg_dic[reg_id] = [chr_id, reg_s, reg_e, reg_pol]

    """
    step_size_one = False
    if args.regex_search_mode == 1:
        step_size_one = True
    elif args.regex_search_mode == 2:
        step_size_one = False
    else:
        assert False, "invalid --regex-search-mode %i set" %(args.regex_search_mode)


    """
    Loop over provided regexes and search for hits in sequences.

    """

    c_all_hits = 0

    MOTIFOUT = open(motif_hits_bed_out, "w")

    seqids2hit_c_dic = {}

    for regex_id in regex_dic:

        regex = regex_dic[regex_id]
        
        print("Search for regex \"%s\" hits (ID: %s) ..." %(regex, regex_id))

        # Determine regex type.
        regex_type = "sequence"
        if benchlib.looks_like_structure(regex):
            regex_type = "structure"

        # Run regex search based on type.
        if regex_type == "sequence":

            hits_dic = benchlib.search_regex_in_seqs_dic(regex, in_seqs_dic,
                                                        step_size_one=step_size_one,
                                                        case_sensitive=True)

        else:  # structure pattern assumed.

            hits_dic = benchlib.search_str_pat_in_seqs_dic(regex, in_seqs_dic,
                                        step_size_one=step_size_one,
                                        regex_spacer_min=args.regex_spacer_min,
                                        regex_spacer_max=args.regex_spacer_max,
                                        regex_max_gu=args.regex_max_gu,
                                        regex_min_gc=args.regex_min_gc)

        for hit in hits_dic:
            c_all_hits += len(hits_dic[hit])

        # print("")
        if is_bed:
            print("# input regions:     ", len(in_seqs_dic))
            print("# regex motif hits:  ", c_all_hits)
        else:
            print("# input sequences:   ", len(in_seqs_dic))
            print("# regex motif hits:  ", c_all_hits)

        """
        Output files.
        
        BED sequence ID format:
        chr20:62139082-62139128(-)

        """

        seen_motif_sites_dic = {}

        add_info1 = "-"
        add_info2 = "-"

        for seq_id in hits_dic:

            c_hits = len(hits_dic[seq_id])

            if seq_id in seqids2hit_c_dic:
                seqids2hit_c_dic[seq_id] += c_hits
            else:
                seqids2hit_c_dic[seq_id] = c_hits

            for hit_info in hits_dic[seq_id]:  # for each hit on sequence with seq_id.
                start = hit_info[0]
                end = hit_info[1]
                seq = hit_info[2]

                if regex_type == "structure":

                    add_info1 = hit_info[6]  # GC base pair fraction.
                    add_info2 = hit_info[7]  # GU base pair fraction.

                if is_bed:

                    gen_reg_coords = bed_reg_dic[seq_id]
                    reg_id = gen_reg_coords[0] + ":" + gen_reg_coords[1] + "-" + gen_reg_coords[2] + "(" + gen_reg_coords[3] + ")"

                    gen_motif_coords = benchlib.get_genomic_coords_from_seq_name(reg_id, start, end,
                                                                                one_based_start=False)

                    chr_id = gen_motif_coords[0]
                    gen_motif_s = gen_motif_coords[1]
                    gen_motif_e = gen_motif_coords[2]
                    strand = gen_motif_coords[3]

                    motif_site_str = "%s:%i-%i(%s)" %(chr_id, gen_motif_s, gen_motif_e, strand)
                    if motif_site_str in seen_motif_sites_dic:
                        continue
                    else:
                        seen_motif_sites_dic[motif_site_str] = 1
                        MOTIFOUT.write("%s\t%i\t%i\t%s\t0\t%s\t%s\t%s\t%s\t%s\n" %(chr_id, gen_motif_s, gen_motif_e, regex_id, strand, seq, regex, add_info1, add_info2))

                else:

                    motif_site_str = "%s:%i-%i" %(seq_id, start, end)
                    if motif_site_str in seen_motif_sites_dic:
                        continue
                    else:
                        seen_motif_sites_dic[motif_site_str] = 1
                        MOTIFOUT.write("%s\t%s\t%s\t%s\t0\t+\t%s\t%s\t%s\t%s\n" %(seq_id, start, end, regex_id, seq, regex, add_info1, add_info2))

        print("# unique regex hits: ", len(seen_motif_sites_dic))

    MOTIFOUT.close()

    """
    Output regions with hit counts.

    """

    REGIONOUT = open(regions_with_hit_counts_bed_out, "w")

    for seq_id in seqids2hit_c_dic:

        c_hits = seqids2hit_c_dic[seq_id]
        len_seq = len(in_seqs_dic[seq_id])

        if is_bed:

            gen_reg_coords = bed_reg_dic[seq_id]
            chr_id = gen_reg_coords[0]
            reg_s = gen_reg_coords[1]
            reg_e = gen_reg_coords[2]
            strand = gen_reg_coords[3]
            REGIONOUT.write("%s\t%s\t%s\t%s\t%i\t%s\n" %(chr_id, reg_s, reg_e, seq_id, c_hits, strand))

        else:

            REGIONOUT.write("%s\t0\t%i\t%s\t%i\t+\n" %(seq_id, len_seq, seq_id, c_hits))

    """
    If regions with zero hits should also be added to REGIONOUT.

    """

    if args.add_zero_hits:
        
        print("Also add regions with zero hits ... ")

        for seq_id in in_seqs_dic:
            if seq_id not in seqids2hit_c_dic:
                if is_bed:
                    gen_reg_coords = bed_reg_dic[seq_id]
                    chr_id = gen_reg_coords[0]
                    reg_s = gen_reg_coords[1]
                    reg_e = gen_reg_coords[2]
                    strand = gen_reg_coords[3]
                    REGIONOUT.write("%s\t%s\t%s\t%s\t0\t%s\n" %(chr_id, reg_s, reg_e, seq_id, strand))

                else:
                    len_seq = len(in_seqs_dic[seq_id])
                    REGIONOUT.write("%s\t0\t%i\t%s\t0\t+\n" %(seq_id, len_seq, seq_id))

    REGIONOUT.close()

    if not seqids2hit_c_dic:
        print("No regex motif hits found in --in regions / sequences")

    print("")
    print("OUTPUT FILES")
    print("============")
    print("")
    print("Regex motif hits .bed:\n%s" %(motif_hits_bed_out))
    print("Sequences / regions with hit counts .bed:\n%s" %(regions_with_hit_counts_bed_out))
    print("")


################################################################################

def main_searchseq(args):
    """
    Sequence motifs search.

    """

    print("Running for you in SEARCHSEQ mode ... ")


    assert os.path.exists(args.in_seqs), "--in FASTA file \"%s\" not found" % (args.in_seqs)
    assert benchlib.boundary_check(args.profiles_k, 1, 6), "set --profiles-k expected to be >= 1 and <= 6"

    # Is MEME >= v5 installed?
    if not args.meme_disable_check:
        assert benchlib.is_tool("meme"), "meme not in PATH"
        check, meme_version = benchlib.check_tool_version("meme -version", "5.0")
        assert check, "RBPBench requires meme version >= 5.0 (installed version: %s)" %(meme_version)

    # Check if MEME version is >= 5.5.4 (need to add fimo --no-pgc option to produce same results!).
    fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text"
    if not args.meme_disable_check:
        check, meme_version = benchlib.check_tool_version("meme -version", "5.5.4")
        args.meme_version = meme_version
        if check:
            fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text --no-pgc"
    if args.meme_no_pgc:
        fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text --no-pgc"

    """
    Library path.
    """
    benchlib_path = os.path.dirname(benchlib.__file__)
    db_path = benchlib_path + "/content"

    """
    FIMO nt frequencies.

    """
    fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt.txt"
    if args.fimo_ntf_mode == 2:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt_with_introns.txt"
    elif args.fimo_ntf_mode == 3:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.uniform.txt"

    if args.fimo_user_ntf_file:
        fimo_freqs_file = args.fimo_user_ntf_file
    assert os.path.exists(fimo_freqs_file), "set FIMO nucleotide frequencies file \"%s\" not found" % (fimo_freqs_file)

    # Regex check.
    regex_type = "sequence"
    if args.regex:
        if args.regex_type == 1:
            print("Check given --regex type ... ")
            if benchlib.looks_like_structure(args.regex):
                print("Given --regex looks like structure pattern ... ")
                regex_type = "structure"
            else:
                print("Given --regex assumed to be standard regex ... ")
                regex_type = "sequence"
        elif args.regex_type == 2:
            regex_type = "sequence"
        elif args.regex_type == 3:
            regex_type = "structure"
        else:
            assert False, "unexpected --regex-type value set (%d)" %(args.regex_type)
        # If standard regex, check validity.
        if regex_type == "sequence":
            assert benchlib.is_valid_regex(args.regex), "given --regex \"%s\" is not a valid regular expression. Please provide valid expression" % (args.regex)

    """
    Motif database.
    """
    seq_motifs_db_file, str_motifs_db_file, rbp2ids_file, motif_db_str = specify_motif_db(args.motif_db, 
                                                                                          db_path=db_path)
    # Custom motif database: folder given.
    if args.custom_db:
        assert not args.custom_db_meme_xml, "--custom-db folder set incompatible with --custom-db-meme-xml. Provide custom motif database either via folder (--custom-db) or as single files"
        assert not args.custom_db_cm, "--custom-db folder set incompatible with --custom-db-cm. Provide custom motif database either via folder (--custom-db) or as single files"
        assert not args.custom_db_info, "--custom-db folder set incompatible with --custom-db-info. Provide custom motif database either via folder (--custom-db) or as single files"
        seq_motifs_db_file, str_motifs_db_file, rbp2ids_file = specify_custom_motif_db(args.custom_db)
        args.custom_db_id = benchlib.remove_special_chars_from_str(args.custom_db_id)
        assert args.custom_db_id, "empty string after removing special chars from --custom-db-id. Please provide alphanumeric string for custom motif database ID (- or _ are okay as well)"
        motif_db_str = args.custom_db_id
    # Custom motif database: single files given.
    if args.custom_db_meme_xml or args.custom_db_cm or args.custom_db_info:
        args.custom_db_id = benchlib.remove_special_chars_from_str(args.custom_db_id)
        assert args.custom_db_id, "empty string after removing special chars from --custom-db-id. Please provide alphanumeric string for custom motif database ID (- or _ are okay as well)"
        motif_db_str = args.custom_db_id
        assert not args.custom_db, "single custom motif database files provided not compatible with --custom-db. Provide custom motif database either via folder (--custom-db) or as single files"
        assert args.custom_db_info, "--custom-db-info needed to define custom motif database"
        rbp2ids_file = args.custom_db_info
        assert args.custom_db_meme_xml or args.custom_db_cm, "--custom-db-meme-xml and/or --custom-db-cm needed to define custom motif database"
        if args.custom_db_meme_xml:
            seq_motifs_db_file = args.custom_db_meme_xml
        else:
            seq_motifs_db_file = ""  # setting to empty string results in os.path.exists -> False.
        if args.custom_db_cm:
            str_motifs_db_file = args.custom_db_cm
        else:
            str_motifs_db_file = ""

    args.motif_db_str = motif_db_str

    """
    Get ID mappings

    name2ids_dic:
    RBP name -> motif IDs mapping
    id2type_dic:
    motif ID -> motif type (cm, meme_xml)
    id2org_dic (ignore for now):
    motif ID -> organism ID (so far only "human")

    """
    name2ids_dic, id2type_dic, name2gid_dic, name2fids_dic, id2pids_dic, id2exp_dic = benchlib.get_rbp_id_mappings(rbp2ids_file)

    # Motif ID -> RBP name (== RBP ID) mapping.
    id2name_dic = {}
    for rbp_id in name2ids_dic:
        for motif_id in name2ids_dic[rbp_id]:
            id2name_dic[motif_id] = rbp_id

    """
    Get MEME XML database motif blocks dictionary.

    """
    seq_motif_blocks_dic = {}
    if os.path.exists(seq_motifs_db_file):
        seq_motif_blocks_dic = benchlib.read_in_xml_motifs(seq_motifs_db_file,
                                                           empty_check=True)
    for motif_id in seq_motif_blocks_dic:
        assert motif_id in id2name_dic, "MEME XML motif ID \"%s\" not found in prior mapping. Please contact developers!" %(motif_id)

    """
    Get covariance model database motif blocks dictionary.

    """
    str_motif_blocks_dic = {}
    if str_motifs_db_file and os.path.exists(str_motifs_db_file):
        str_motif_blocks_dic = benchlib.read_in_cm_blocks(str_motifs_db_file, 
                                                          empty_check=True)
        for motif_id in str_motif_blocks_dic:
            assert motif_id in id2name_dic, "Covariance model accession ID \"%s\" not found in prior mapping. Please contact developers!" %(motif_id)

    # Remove special chars from run ID.
    args.data_id = benchlib.remove_special_chars_from_str(args.data_id)
    assert args.data_id, "empty string after removing special chars from --data-id. Please provide alphanumeric string for data ID (- or _ are okay as well)"
    args.method_id = benchlib.remove_special_chars_from_str(args.method_id)
    assert args.method_id, "empty string after removing special chars from --method-id. Please provide alphanumeric string for method ID (- or _ are okay as well)"
    # Run ID definition.
    run_id = "run_id"
    # if args.run_id:
    #     run_id = benchlib.remove_special_chars_from_str(args.run_id)
    #     assert run_id, "empty string after removing special chars from --run-id. Please provide alphanumeric string for run ID (- or _ are okay as well)"
    # else:
    #     random_id = uuid.uuid4()
    #     run_id = str(random_id)

    # hash_len = max(len(run_id), len(args.data_id), len(args.method_id))
    # print("###################" + "#"*hash_len)
    print("Run ID:     ", run_id)
    print("Data ID:    ", args.data_id)
    print("Method ID:  ", args.method_id)
    # print("###################" + "#"*hash_len)

    """
    Output folders + files.

    """
    if not os.path.exists(args.out_folder):
        os.makedirs(args.out_folder)

    # filtered_sites_bed = args.out_folder + "/in_sites.filtered.bed"
    filtered_seqs_fa = args.out_folder + "/in_sequences.filtered.fa"
    seq_motifs_xml = args.out_folder + "/seq_motifs.xml"
    str_motifs_cm = args.out_folder + "/str_motifs.cm"
    fimo_res_tsv = args.out_folder + "/fimo_results.tsv"
    cmsearch_res_txt = args.out_folder + "/cmsearch_results.txt"

    rbp_stats_out = args.out_folder + "/rbp_hit_stats.tsv"
    motif_stats_out = args.out_folder + "/motif_hit_stats.tsv"
    # con_res_out_tsv = args.out_folder + "/contingency_table_results.tsv"
    settings_file = args.out_folder + "/settings.rbpbench_searchseq.out"
    rbp_reg_occ_table_out = args.out_folder + "/rbp_region_occupancies.tsv"

    # Output unique motif hits.
    motif_hits_bed_out = args.out_folder + "/motif_hits.rbpbench_searchseq.bed"
    # Output matched sequences stats.
    matched_seqs_out = args.out_folder + "/matched_seq_stats.tsv"
    # Profile output files.
    hit_profiles_out = args.out_folder + "/hit_profiles.tsv"
    kmer_profiles_out = args.out_folder + "/kmer_profiles.tsv"

    # Temp files.
    # random_id = uuid.uuid1()
    # tmp_out_bed = args.out_folder + "/" + str(random_id) + ".filtered_in.bed"
    out_tmp_bed = args.out_folder + "/rbp_motif_hit_regions.tmp.bed"
    cmstat_tmp_out = args.out_folder + "/cmstat_out.tmp.txt"

    # Delete if existing folder.
    if os.path.exists(fimo_res_tsv):
        os.remove(fimo_res_tsv)
    if os.path.exists(cmsearch_res_txt):
        os.remove(cmsearch_res_txt)


    """
    Load RBP data based on --rbps (+ optionally USER data).

    """

    rbp_in_dic = {}
    for rbp_id in args.list_rbps:
        rbp_in_dic[rbp_id] = 1

    # RBPs for motif search.
    loaded_rbps_dic = {}

    # USER set?
    user_motifs = False
    user_rbp_id = False
    if "USER" in rbp_in_dic:
        user_motifs = True
    else:
        assert not args.user_meme_xml, "--user-meme-xml provided but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"
        assert not args.user_cm, "--user-cm provided but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"
        assert not args.user_rbp_id, "--user-rbp-id set but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"

    special_rbp_ids_list = ["USER", "REGEX"]

    # If ALL set, load all RBPs (+ optinally USER).
    if "ALL" in rbp_in_dic:
        if len(rbp_in_dic) == 2:
            assert user_motifs, "set --rbps ALL, --rbps ALL USER, or individual RBPs (+ optinally USER) --rbps RBP1 RBP2 ... USER"
        if len(rbp_in_dic) > 2:
            assert False, "set --rbps ALL, --rbps ALL USER, or individual RBPs (+ optinally USER) --rbps RBP1 RBP2 ... USER"
        print("--rbps ALL selected. Loading all database motifs ... ")
        for rbp_id in name2ids_dic:
            loaded_rbps_dic[rbp_id] = motif_db_str

    else:
        # Load individual RBPs.
        for rbp_id in rbp_in_dic:
            if rbp_id not in special_rbp_ids_list:
                """
                Check if RBP ID in database.
                Suggest similar RBPs based on string similarity (edit distance).

                """
                if rbp_id not in name2ids_dic:
                    db_rbp_list = []
                    for db_rbp_id in name2ids_dic:
                        db_rbp_list.append(db_rbp_id)
                    pair_dist_dic = benchlib.calc_edit_dist_query_list(rbp_id, db_rbp_list)
                    max_c = 10
                    c = 0
                    suggested_rbps = []
                    for key, value in sorted(pair_dist_dic.items(), key=lambda item: item[1], reverse=False):
                        if c >= max_c:
                            break
                        c += 1
                        suggested_rbps.append(key)
                    suggested_rbps_str = ",".join(suggested_rbps)
                    assert False, "provided --rbps ID %s not in internal motif database (%s). Please provide RBP name present in database. Did you mean (any of) the following database ID(s) (top 10 hits based on string similarity): %s ?" %(rbp_id, motif_db_str, suggested_rbps_str)
                # assert rbp_id in name2ids_dic, "provided --rbps ID %s not in internal motif database. Please provide RBP name present in database" %(rbp_id)
                loaded_rbps_dic[rbp_id] = motif_db_str

    # Motif IDs for search.
    loaded_motif_ids_dic = {}
    for rbp_id in loaded_rbps_dic:
        for motif_id in name2ids_dic[rbp_id]:
            loaded_motif_ids_dic[motif_id] = motif_db_str

    """
    Check and load provided USER data.

    """

    if user_motifs:
        print("--rbps USER selected. Check + load provided USER motifs ... ")
        assert args.user_rbp_id, "--rbps USER demands --user-rbp-id to be set to connect the supplied motif(s) with an RBP ID"
        assert args.user_meme_xml or args.user_cm, "--rbps USER requires a provided sequence or structure motif file (via --user-meme-xml AND/OR --user-cm)"

        # Reformat user_rbp_id. 
        user_rbp_id = benchlib.remove_special_chars_from_str(args.user_rbp_id)
        assert user_rbp_id, "empty string after removing special chars from --user-rbp-id. Please provide alphanumeric string for RBP ID (- or _ are okay as well)"

        assert user_rbp_id not in loaded_rbps_dic, "user RBP ID %s already selected from database. Please deselect respective database RBP ID or provide unique user RBP ID via --user-rbp-id" %(user_rbp_id)
        loaded_rbps_dic[user_rbp_id] = "user"
        # In case user_rbp_id in database, reset motif IDs associated to user_rbp_id.
        name2ids_dic[user_rbp_id] = []
        print("RBP ID for user-supplied motifs:", user_rbp_id)

        user_seq_motif_blocks_dic = {}
        if args.user_meme_xml:
            assert os.path.exists(args.user_meme_xml), "--user-meme-xml file \"%s\" not found" % (args.user_meme_xml)
            user_seq_motif_blocks_dic = benchlib.read_in_xml_motifs(args.user_meme_xml, empty_check=False)
            assert user_seq_motif_blocks_dic, "no motifs read in from provided --user-meme-xml. Make sure to supply sequence motifs in MEME XML format!"
            # Check if motif ID already loaded.
            for acc_id in user_seq_motif_blocks_dic:
                assert acc_id not in loaded_motif_ids_dic, "user-supplied MEME XML motif ID %s already selected, i.e., ID is already associated with selected database RBP %s. Please change user motif ID to a unique motif ID or deselect the respective RBP" %(acc_id, id2name_dic[acc_id])
                loaded_motif_ids_dic[acc_id] = "user"
                # Add user to database blocks (overwrite if same ID encountered).
                seq_motif_blocks_dic[acc_id] = user_seq_motif_blocks_dic[acc_id]
                name2ids_dic[user_rbp_id].append(acc_id)
                id2type_dic[acc_id] = "meme_xml"
                id2name_dic[acc_id] = user_rbp_id

        user_str_motif_blocks_dic = {}
        if args.user_cm:
            assert os.path.exists(args.user_cm), "--user-cm file \"%s\" not found" % (args.user_cm)
            # Check for valid format.
            acc_ids_dic = benchlib.check_cm_file(args.user_cm, cmstat_tmp_out, empty_check=False)
            # Read in covariance model blocks.
            user_str_motif_blocks_dic = benchlib.read_in_cm_blocks(args.user_cm)
            for acc_id in acc_ids_dic:
                assert acc_id in user_str_motif_blocks_dic, "accession ID %s not in blocks dictionary. Please contact developers!" %(acc_id)
            # Check if motif ID already loaded.
            for acc_id in user_str_motif_blocks_dic:
                assert acc_id not in loaded_motif_ids_dic, "user-supplied covariance model accession (ACC) ID %s already selected, i.e., ID is already associated with selected database RBP %s. Please change to a unique accession ID or deselect the respective RBP" %(acc_id, id2name_dic[acc_id])
                loaded_motif_ids_dic[acc_id] = "user"
                # Add user to database blocks (overwrite if same ID encountered).
                str_motif_blocks_dic[acc_id] = user_str_motif_blocks_dic[acc_id]
                name2ids_dic[user_rbp_id].append(acc_id)
                id2type_dic[acc_id] = "cm"
                id2name_dic[acc_id] = user_rbp_id

    """
    Get sequence motif lengths.

    """

    id2len_dic = benchlib.get_seq_motif_lengths(seq_motif_blocks_dic)

    """
    Optionally filter DREME/MEME sequence motifs by length.

    """

    if args.motif_min_len or args.motif_max_len:

        if args.motif_min_len and args.motif_max_len:
            assert args.motif_min_len <= args.motif_max_len, "set --motif-min-len needs to be <= --motif-max-len!"

        print("Filtering sequence motifs by set min/max lengths ... ")

        seq_motif_blocks_dic, loaded_rbps_dic, loaded_motif_ids_dic, name2ids_dic, c_flt_out = benchlib.filter_dic_by_motif_lengths(
                seq_motif_blocks_dic, str_motif_blocks_dic, loaded_rbps_dic, loaded_motif_ids_dic, id2name_dic,
                id2len_dic, motif_min_len=args.motif_min_len, motif_max_len=args.motif_max_len
            )

        assert loaded_rbps_dic, "no MEME/DREME sequence motifs left after length filtering. Please adjust length filter range (--motif-min-len, --motif-max-len), RBP selection, or disable length filtering!"

        print("Filtered out %d sequence motifs outside set length range" %(c_flt_out))


    """
    Check if loaded RBP IDs have motifs.

    """
    for rbp_id in loaded_rbps_dic:
        for motif_id in name2ids_dic[rbp_id]:
            found = 0
            if motif_id in seq_motif_blocks_dic:
                found += 1
            if motif_id in str_motif_blocks_dic:
                found += 1
            assert found, "no motifs loaded for RBP ID \"%s\". Please provide the respective motifs file" %(rbp_id)



    """
    If --regex is set:
    Treat regex as sequence motif / fimo type.
    rbp_id: regex, motif_id: regex, motif_db: regex

    """

    regex_id = args.regex_id
    regex = args.regex

    if args.regex:

        if regex_type == "sequence":

            # Remove , ; from given regex, to avoid motif_id format conflicts.
            regex = benchlib.remove_special_chars_from_str(args.regex,
                                                        reg_ex="[ ;]",
                                                        to_upper=False)  # [ :;\(\)]
            
            assert regex, "empty string after removing special chars ( ;) from --regex. Please provide a valid regex with DNA letters"

            # Convert IUPAC codes (if present) in regex to standard regex format.
            regex = benchlib.convert_iupac_in_regex(regex)

        elif regex_type == "structure":

            # Check structure pattern and format.
            regex = benchlib.check_format_str_pattern(args.regex)

        args.regex = regex

        regex_id = benchlib.remove_special_chars_from_str(args.regex_id)

        assert regex_id, "empty string after removing special chars from --regex-id. Please provide alphanumeric string for regex ID (- or _ are okay as well)"
        assert regex_id not in name2ids_dic, "--regex set but a different RBP ID with name \"%s\" was found. Please provide a different RBP ID or --regex-id" %(regex_id)

        args.regex_id = regex_id

        if args.motif_regex_id:
            assert regex_id not in id2type_dic, "--regex set but a different motif ID with name \"%s\" was found. Please provide a different motif ID or --regex-id" %(regex_id)

            id2name_dic[regex_id] = regex_id    # motif_id -> rbp_id
            id2type_dic[regex_id] = "regex"  # motif_id -> motif type string; motif type string can be regex, cm, meme_xml
            loaded_motif_ids_dic[regex_id] = regex_id  # motif_id -> motif_db_str
            name2ids_dic[regex_id] = [regex_id]  # rbp_id -> motif_ids

        else:
            assert regex not in id2type_dic, "--regex set but a different motif ID with name \"%s\" was found. Please provide a different motif ID or use --motif-regex-id" %(regex_id)

            id2name_dic[regex] = regex_id    # motif_id -> rbp_id
            id2type_dic[regex] = "regex"  # motif_id -> motif type string; motif type string can be regex, cm, meme_xml
            loaded_motif_ids_dic[regex] = regex_id  # motif_id -> motif_db_str
            name2ids_dic[regex_id] = [regex]  # rbp_id -> motif_ids

        loaded_rbps_dic[regex_id] = regex_id  # rbp_id -> motif_db_str

        """
        Dictionaries that use motif_id as keys.
        loaded_motif_ids_dic
        name2ids_dic
        id2name_dic
        id2type_dic
        """
    else:
        regex_id = False



    """
    Get function ID -> function descriptions mapping.

    """
    fid2desc_file = db_path + "/rbp_function_list.32728246.tsv"
    fid2desc_dic, desc2fid_dic = benchlib.get_fid2desc_mapping(fid2desc_file)


    """
    If --motifs sets, filter loaded_rbps_dic + loaded_motif_ids_dic.
    
    """

    if args.motifs_list:

        print("Filtering loaded motifs by provided --motifs ... ")

        motif_fids_dic = {}

        for motif_id in args.motifs_list:
            motif_fids_dic[motif_id] = 1

        filtered_rbps_dic = {}
        filtered_motif_ids_dic = {}
        filtered_name2ids_dic = {}
        for motif_id in motif_fids_dic:
            assert motif_id in id2name_dic, "motif ID \"%s\" provided via --motifs not found in internal motif ID -> RBP ID mapping. Please provide valid motif IDs" %(motif_id)
            assert motif_id in loaded_motif_ids_dic, "motif ID \"%s\" provided via --motifs not found in loaded motifs (via --rbps). Please provide motif IDs that are part of loaded RBPs" %(motif_id)
            set_db_str = loaded_motif_ids_dic[motif_id]
            filtered_motif_ids_dic[motif_id] = set_db_str
            rbp_id = id2name_dic[motif_id]
            assert rbp_id in loaded_rbps_dic, "motif ID \"%s\" provided via --motifs not found in loaded RBPs (via --rbps). Please provide motif IDs that are part of loaded RBPs" %(motif_id)
            set_db_str = loaded_rbps_dic[rbp_id]
            filtered_rbps_dic[rbp_id] = set_db_str
            if rbp_id in filtered_name2ids_dic:
                filtered_name2ids_dic[rbp_id].append(motif_id)
            else:
                filtered_name2ids_dic[rbp_id] = [motif_id]

        if args.regex:
            filtered_rbps_dic[regex_id] = regex_id
            if args.motif_regex_id:
                filtered_motif_ids_dic[regex_id] = regex_id
                filtered_name2ids_dic[regex_id] = [regex_id]
            else:
                filtered_motif_ids_dic[regex] = regex_id
                filtered_name2ids_dic[regex_id] = [regex]

        c_loaded_rbps_pre = len(loaded_rbps_dic)
        c_loaded_rbps_post = len(filtered_rbps_dic)
        loaded_rbps_dic = filtered_rbps_dic

        print("# RBPs pre-filtering by --motifs: ", c_loaded_rbps_pre)
        print("# RBPs post-filtering by --motifs:", c_loaded_rbps_post)

        assert loaded_rbps_dic, "no remaining RBPs after filtering by provided motif IDs (via --motifs). Please provide compatible RBPs + motif IDs"

        c_loaded_motif_ids_pre = len(loaded_motif_ids_dic)
        c_loaded_motif_ids_post = len(filtered_motif_ids_dic)
        loaded_motif_ids_dic = filtered_motif_ids_dic

        print("# motif IDs pre-filtering by --motifs: ", c_loaded_motif_ids_pre)
        print("# motif IDs post-filtering by --motifs:", c_loaded_motif_ids_post)

        assert loaded_motif_ids_dic, "no remaining motifs after filtering by provided motif IDs (via --motifs). Please provide compatible RBPs + motif IDs"

        name2ids_dic = filtered_name2ids_dic


    """
    If --functions set, filter loaded_rbps_dic + loaded_motif_ids_dic.

    """
    rbp_fids_dic = {}
    if args.rbp_functions:

        print("Filtering loaded RBPs by provided function IDs ... ")

        for fid in args.rbp_functions:
            rbp_fids_dic[fid] = 1

        # Check if provided function IDs are valid.
        for fid in rbp_fids_dic:
            assert fid in fid2desc_dic, "function ID \"%s\" provided via --functions not found in internal function ID -> description mapping. Please provide valid function IDs (see rbpbench info for a detailed description)" %(fid)

        # Filter loaded_rbps_dic.
        filtered_rbps_dic = {}
        for rbp_id in loaded_rbps_dic:
            if rbp_id == regex_id:  # keep regex if set.
                filtered_rbps_dic[regex_id] = regex_id
                continue
            if rbp_id not in name2fids_dic:
                continue
            for fid in rbp_fids_dic:
                if fid in name2fids_dic[rbp_id]:
                    set_db_str = loaded_rbps_dic[rbp_id]
                    filtered_rbps_dic[rbp_id] = set_db_str
                    break

        c_loaded_rbps_pre = len(loaded_rbps_dic)
        c_loaded_rbps_post = len(filtered_rbps_dic)
        loaded_rbps_dic = filtered_rbps_dic

        print("# RBPs pre-filtering by functions: ", c_loaded_rbps_pre)
        print("# RBPs post-filtering by functions:", c_loaded_rbps_post)

        assert loaded_rbps_dic, "no remaining RBPs after filtering by provided function IDs. Please provide compatible RBPs + function IDs (see rbpbench info for annotated RBP functions)"

        # Filter loaded motif IDs.
        filtered_motif_ids_dic = {}
        for rbp_id in loaded_rbps_dic:
            if rbp_id == regex_id:
                if args.motif_regex_id:
                    filtered_motif_ids_dic[regex_id] = regex_id
                else:
                    filtered_motif_ids_dic[regex] = regex_id
            else:
                for motif_id in name2ids_dic[rbp_id]:
                    set_db_str = loaded_motif_ids_dic[motif_id]
                    filtered_motif_ids_dic[motif_id] = set_db_str
        

        c_loaded_motif_ids_pre = len(loaded_motif_ids_dic)
        c_loaded_motif_ids_post = len(filtered_motif_ids_dic)
        loaded_motif_ids_dic = filtered_motif_ids_dic

        print("# motif IDs pre-filtering by functions: ", c_loaded_motif_ids_pre)
        print("# motif IDs post-filtering by functions:", c_loaded_motif_ids_post)



    """
    Load RBP data, store in RBP() class.

    """

    # Store motif IDs for search.
    search_rbps_dic = {}
    seq_rbps_dic = {}
    str_rbps_dic = {}
    motif_id2idx_dic = {} # motif ID -> list index.
    args.internal_id = []

    for rbp_id in loaded_rbps_dic:
    
        internal_id = base64.urlsafe_b64encode(os.urandom(6)).decode()
        args.internal_id.append(internal_id)
        rbp = benchlib.RBP(rbp_id, internal_id)

        for motif_id in name2ids_dic[rbp_id]:
        
            assert motif_id in loaded_motif_ids_dic, "motif_id %s not in loaded_motif_ids_dic" %(motif_id)

            if id2type_dic[motif_id] == "meme_xml":
                rbp.seq_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.seq_motif_ids) - 1
                rbp.seq_motif_hits.append(0)
                seq_rbps_dic[rbp_id] = 1
            elif id2type_dic[motif_id] == "regex":
                rbp.seq_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.seq_motif_ids) - 1
                rbp.seq_motif_hits.append(0)
                seq_rbps_dic[rbp_id] = 1
            elif id2type_dic[motif_id] == "cm":
                rbp.str_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.str_motif_ids) - 1
                rbp.str_motif_hits.append(0)
                str_rbps_dic[rbp_id] = 1
            else:
                assert False, "unknown motif type (\"%s\") set for motif_id %s" %(id2type_dic[motif_id], motif_id)

        search_rbps_dic[rbp_id] = rbp


    print("# of RBP IDs for search:    ", len(loaded_rbps_dic))
    print("# of motif IDs for search:  ", len(loaded_motif_ids_dic))

    """
    Read in sequences.
    """
    id_check = True
    if args.make_uniq_headers:
        id_check = False
    
    print("Read in sequences from --in FASTA ... ")
    
    in_seqs_dic = benchlib.read_fasta_into_dic(args.in_seqs,
                                       dna=True,
                                       all_uc=True,
                                       id_check=id_check,
                                       empty_check=False,
                                       new_header_id=args.header_id,
                                       make_uniq_headers=args.make_uniq_headers,
                                       report=1,
                                       remove_regex=r"[ :\(\)]",
                                       skip_n_seqs=False)

    assert in_seqs_dic, "no sequences read in from --in FASTA file. Make sure to provide a FASTA formatted file with DNA or RNA sequences"

    print("# of read-in sequences:   ", len(in_seqs_dic))

    if args.min_seq_len or args.max_seq_len:
        if args.min_seq_len and args.max_seq_len:
            assert args.min_seq_len <= args.max_seq_len, "set --min-seq-length should be <= --max-seq-length"
        
        print("Filtering input sequences by set min/max lengths ... ")
        # print("# of input sequences pre-filtering:  ", len(in_seqs_dic))

        filtered_seqs_dic = {}
        for seq_id, seq in in_seqs_dic.items():
            seq_len = len(seq)
            if args.min_seq_len and not args.max_seq_len:
                if seq_len >= args.min_seq_len:
                    filtered_seqs_dic[seq_id] = seq
            elif args.max_seq_len and not args.min_seq_len:
                if seq_len <= args.max_seq_len:
                    filtered_seqs_dic[seq_id] = seq
            else:  # both min and max set.
                if seq_len >= args.min_seq_len and seq_len <= args.max_seq_len:
                    filtered_seqs_dic[seq_id] = seq

        assert filtered_seqs_dic, "no sequences remaining after filtering by set min/max lengths ... "
        print("# of remaining sequences: ", len(filtered_seqs_dic))
        in_seqs_dic = filtered_seqs_dic



    if args.profiles_seq_id:
        assert args.profiles_seq_id in in_seqs_dic, "specified --profiles-seq-id is not a sequence ID found in input sequences (or filtered out by min/max length?). Please provide a compatible sequence ID"

    # Output sequences to FASTA.
    benchlib.fasta_output_dic(in_seqs_dic, filtered_seqs_fa,
                              split=True)


    # Effective number of regions used for motif search.
    c_regions = len(in_seqs_dic)

    # Called region size.
    called_reg_size = 0
    len_list = []
    for seq_id in in_seqs_dic:
        seq_len = len(in_seqs_dic[seq_id])
        called_reg_size += seq_len
        len_list.append(seq_len)

    # Length statistics.
    reg_len_median = statistics.median(len_list)
    reg_len_mean = statistics.mean(len_list)
    reg_len_mean = round(reg_len_mean, 2)
    reg_len_min = min(len_list)
    reg_len_max = max(len_list)

    print("Mean sequence length:     ", reg_len_mean)
    print("Median sequence length:   ", reg_len_median)
    print("Minimum sequence length:  ", reg_len_min)
    print("Maximum sequence length:  ", reg_len_max)


    """
    ====================================
    RUN SEQUENCE MOTIF SEARCH WITH FIMO.
    ====================================
    
    """
    fimo_hits_list = []
    call_dic = {}

    if seq_rbps_dic:

        """
        Print motifs to file.

        """

        print("Output motifs to XML ... ")
        out_str, c_added_motifs = benchlib.blocks_to_xml_string(seq_motif_blocks_dic, loaded_motif_ids_dic)

        benchlib.output_string_to_file(out_str, seq_motifs_xml)


        """
        Run FIMO on sequences + motifs.

        """

        print("Run FIMO ... ")
        benchlib.run_fast_fimo(filtered_seqs_fa, seq_motifs_xml, fimo_res_tsv,
                    pval_thr=args.fimo_pval,
                    nt_freqs_file=fimo_freqs_file,
                    call_dic=call_dic,
                    params=fimo_params,
                    error_check=False)

        """
        Read in FIMO hits.

        """

        assert os.path.exists(fimo_res_tsv), "FIMO output file fimo.tsv %s does not exist! There must have been an error while running FIMO (invalid xml file provided?)" %(fimo_res_tsv)

        print("Read in FIMO results ... ")
        fimo_hits_list = benchlib.read_in_fimo_results(fimo_res_tsv,
                                                       only_best_hits=args.greatest_hits,
                                                       seq_based=True)

        c_fimo_hits = len(fimo_hits_list)
        print("# of FIMO motif hits:", c_fimo_hits)


        """
        If --regex is set, search for regex hits in sequences (stored in in_seqs_dic).

        """
        if args.regex:

            step_size_one = False
            if args.regex_search_mode == 1:
                step_size_one = True
            elif args.regex_search_mode == 2:
                step_size_one = False
            else:
                assert False, "invalid --regex-search-mode %i set" %(args.regex_search_mode)

            print("Run search for --regex \"%s\" ... " %(regex))
            regex_hits_list = benchlib.get_regex_hits(regex, regex_id, in_seqs_dic,
                                                      step_size_one=step_size_one,
                                                      regex_type=regex_type,
                                                      seq_based=True,
                                                      regex_spacer_min=args.regex_spacer_min,
                                                      regex_spacer_max=args.regex_spacer_max,
                                                      regex_min_gc=args.regex_min_gc,
                                                      regex_max_gu=args.regex_max_gu,
                                                      use_motif_regex_id=args.motif_regex_id)

            c_regex_hits = len(regex_hits_list)
            print("# of regex hits:", c_regex_hits)

            # Add regex hits to fimo_hits_list.
            fimo_hits_list += regex_hits_list


    """
    =========================================
    RUN STRUCTURE MOTIF SEARCH WITH CMSEARCH.
    =========================================

    """
    cmsearch_hits_list = []

    if str_rbps_dic:
        
        print("Output covariance models to .cm ... ")
        benchlib.output_cm_blocks_to_file(str_motif_blocks_dic, loaded_motif_ids_dic, str_motifs_cm)

        # Run cmsearch.
        print("Run cmsearch ... ")
        cmsh_mode = ""
        if args.cmsearch_mode == 1:
            cmsh_mode = "--default"
        elif args.cmsearch_mode == 2:
            cmsh_mode = "--max"
        else:
            assert False, "invalid --cmsearch-mode %i set" %(args.cmsearch_mode)
        cmsh_params = "-g --tformat fasta --toponly --incT %s -T %s %s" %(args.cmsearch_bs, args.cmsearch_bs, cmsh_mode)

        benchlib.run_cmsearch(filtered_seqs_fa, str_motifs_cm, cmsearch_res_txt,
                        error_check=False,
                        call_dic=call_dic,
                        params=cmsh_params)  # or add --anytrunc and remove --g
        # Read in hits.
        print("Read in cmsearch results ... ")
        cmsearch_hits_list, c_cms_hits = benchlib.read_in_cmsearch_results(cmsearch_res_txt,
                                                                           seq_based=True,
                                                                           only_best_hits=args.greatest_hits,
                                                                           check=True)

        print("# of cmsearch motif hits:", c_cms_hits)


    """
    Store for each RBP the regions with motif hits (and hit counts), using
    dictionary of dictionaries regions_with_motifs_dic.
    This tells us, how many input regions have motif hits, separated by RBP.
    Also store for each RBP the unique motif hit regions (and hit counts), using
    dictionary of dictionaries unique_motifs_dic.

    regions_with_motifs_dic:
        Dictionary of dictionaries, format:
        {rbp_id1 -> {'region1': motif_c_region1, 'region2': motif_c_region2}, rbp_id2 -> {'region1': motif_c_region1}}
    unique_motifs_dic:
        Dictionary of dictionaries, format:
        {rbp_id1 -> {'motif_region1': c_motif_region1, 'motif_region2': c_motif_region2}, rbp_id2 -> .. }

    """

    regions_with_motifs_dic = {}
    unique_motifs_dic = {}

    # Store regions with sequence motifs.
    for fh in fimo_hits_list:

        rbp_id = id2name_dic[fh.motif_id]

        if rbp_id in regions_with_motifs_dic:
            # fh.seq_name : FASTA header (== --in genomic sequence region).
            if fh.seq_name in regions_with_motifs_dic[rbp_id]:
                regions_with_motifs_dic[rbp_id][fh.seq_name] += 1
            else:
                regions_with_motifs_dic[rbp_id][fh.seq_name] = 1
        else:
            regions_with_motifs_dic[rbp_id] = {}
            regions_with_motifs_dic[rbp_id][fh.seq_name] = 1

        fh_str = repr(fh) # genomic motif region string.

        # Unique motif regions for each RBP.
        if rbp_id in unique_motifs_dic:
            if fh_str in unique_motifs_dic[rbp_id]:
                unique_motifs_dic[rbp_id][fh_str] += 1
            else:
                unique_motifs_dic[rbp_id][fh_str] = 1
        else:
            unique_motifs_dic[rbp_id] = {}
            unique_motifs_dic[rbp_id][fh_str] = 1

        # Output motif stats.


    # Store regions with structure motifs.
    for cmsh in cmsearch_hits_list:

        rbp_id = id2name_dic[cmsh.motif_id]

        if rbp_id in regions_with_motifs_dic:
            # cmsh.seq_name : FASTA header (== --in genomic sequence region).
            if cmsh.seq_name in regions_with_motifs_dic[rbp_id]:
                regions_with_motifs_dic[rbp_id][cmsh.seq_name] += 1
            else:
                regions_with_motifs_dic[rbp_id][cmsh.seq_name] = 1
        else:
            regions_with_motifs_dic[rbp_id] = {}
            regions_with_motifs_dic[rbp_id][cmsh.seq_name] = 1

        cmsh_str = repr(cmsh) # genomic motif region string.

        # Unique motif regions for each RBP.
        if rbp_id in unique_motifs_dic:
            if cmsh_str in unique_motifs_dic[rbp_id]:
                unique_motifs_dic[rbp_id][cmsh_str] += 1
            else:
                unique_motifs_dic[rbp_id][cmsh_str] = 1
        else:
            unique_motifs_dic[rbp_id] = {}
            unique_motifs_dic[rbp_id][cmsh_str] = 1

    """
    Store infos for each RBP in RBP class.

    search_rbps_dic[rbp_id] = rbp_class
    RBP class arguments:
            name: str,
            seq_motif_ids = None,
            str_motif_ids = None,
            c_hit_reg = 0, # # regions with motif hits.
            perc_hit_reg = 0.0, # % hit regions over all regions (i.e. how many input regions contain >= 1 RBP motif).
            c_motif_hits = 0, # # motif hits.
            c_uniq_motif_hits = 0, # # unique motif hits.
            c_uniq_motif_nts = 0, # # unique motif nucleotides.
            perc_uniq_motif_nts_eff_reg = 0.0, # % unique motif nts over effective region length.
            perc_uniq_motif_nts_cal_reg = 0.0, # % unique motif nts over called region length.
            uniq_motif_hits_eff_1000nt = 0.0, # unique motif hits per effective 1000 nt.
            uniq_motif_hits_cal_1000nt = 0.0, # unique motif hits per called 1000 nt.
            ks_pval = 1.0, # Kolmogorov-Smirnov (KS) statistic p-value (are higher scoring sites enriched with motifs).
            ks_stat = 0.0,
            organism: Optional[str] = None

    Number of sequences for FIMO / cmsearch: 
    c_regions

    """
    # Set number of no-hit regions.
    for rbp_id in search_rbps_dic:
        search_rbps_dic[rbp_id].c_no_hit_reg = c_regions

    for rbp_id in regions_with_motifs_dic:
        # Number of --in regions with RBP motif hits.
        c_hit_reg = len(regions_with_motifs_dic[rbp_id])

        # Number of motif hits on --in regions in total.
        c_motif_hits = 0
        for reg_id in regions_with_motifs_dic[rbp_id]:
            c_motif_hits += regions_with_motifs_dic[rbp_id][reg_id]
        search_rbps_dic[rbp_id].c_hit_reg = c_hit_reg
        search_rbps_dic[rbp_id].c_no_hit_reg = c_regions - c_hit_reg
        search_rbps_dic[rbp_id].c_motif_hits = c_motif_hits

        # % hit regions over all regions (i.e. how many input regions contain >= 1 RBP motif).
        search_rbps_dic[rbp_id].perc_hit_reg = (search_rbps_dic[rbp_id].c_hit_reg / c_regions) * 100

    """
    Get unique motif hits.

    unique_motifs_dic:
        Dictionary of dictionaries, format:
        {rbp_id1 -> {'motif_region1': c_motif_region1, 'motif_region2': c_motif_region2}, rbp_id2 -> .. }
    """

    for rbp_id in unique_motifs_dic:
        c_uniq_motif_hits = len(unique_motifs_dic[rbp_id])
        search_rbps_dic[rbp_id].c_uniq_motif_hits = c_uniq_motif_hits
        # Store individual motif unique hits.
        for motif_str_repr in unique_motifs_dic[rbp_id]:
            motif_id = benchlib.get_motif_id_from_str_repr(motif_str_repr)
            idx = motif_id2idx_dic[motif_id]
            if id2type_dic[motif_id] == "meme_xml":
                search_rbps_dic[rbp_id].seq_motif_hits[idx] += 1
            elif id2type_dic[motif_id] == "regex":
                search_rbps_dic[rbp_id].seq_motif_hits[idx] += 1
            elif id2type_dic[motif_id] == "cm":
                search_rbps_dic[rbp_id].str_motif_hits[idx] += 1
            else:
                assert False, "unknown motif type (\"%s\") set for motif_id %s" %(id2type_dic[motif_id], motif_id)

    """
    Number of motif nucleotides over called + effective region size.

    """

    # Effective region size same as called for sequences (searchseq).
    eff_reg_size = called_reg_size

    print("Calculate effective motif region sizes for each RBP ... ")
    for rbp_id in unique_motifs_dic:
        # Output unique motif hit regions (sequence or structure) to BED for RBP rbp_id.
        benchlib.output_motif_hits_to_bed(rbp_id, unique_motifs_dic, out_tmp_bed,
                                          one_based_start=True)
        # Calculate effective motif region size.
        eff_motif_reg_size = benchlib.get_uniq_gen_size(out_tmp_bed)

        # Number of unique motif nucleotides.
        search_rbps_dic[rbp_id].c_uniq_motif_nts = eff_motif_reg_size
        # % unique motif nts over effective region length.
        search_rbps_dic[rbp_id].perc_uniq_motif_nts_eff_reg = (eff_motif_reg_size / eff_reg_size) * 100
        # % unique motif nts over called region length.
        search_rbps_dic[rbp_id].perc_uniq_motif_nts_cal_reg = (eff_motif_reg_size / called_reg_size) * 100
        # Number of unique motif hits per effective 1000 nt.
        search_rbps_dic[rbp_id].uniq_motif_hits_eff_1000nt  = search_rbps_dic[rbp_id].c_uniq_motif_hits / (eff_reg_size / 1000)
        # Number of unique motif hits per called 1000 nt.
        search_rbps_dic[rbp_id].uniq_motif_hits_cal_1000nt  = search_rbps_dic[rbp_id].c_uniq_motif_hits / (called_reg_size / 1000)


    print("# --in sequences for motif search:", c_regions)
    print("Total sequence length:            ", called_reg_size)


    """
    Output RBP hit stats (ie one row per RBP).

    Output clowns:
    rbp_id
    c_regions
    called_reg_size
    effective_reg_size
    c_reg_with_hits
    perc_reg_with_hits
    c_motif_hits
    c_uniq_motif_hits
    c_uniq_motif_nts
    perc_uniq_motif_nts_cal_reg
    perc_uniq_motif_nts_eff_reg
    uniq_motif_hits_cal_1000nt
    uniq_motif_hits_eff_1000nt
    wc_pval
    seq_motif_ids
    seq_motif_hits
    str_motif_ids
    str_motif_hits

    """

    rbp_list = []

    OUTSTATS = open(rbp_stats_out, "w")
    rbp_stats_header = "data_id\tmethod_id\trun_id\tmotif_db\trbp_id\tc_regions\tmean_reg_len\tmedian_reg_len\tmin_reg_len\tmax_reg_len\t"
    rbp_stats_header += "called_reg_size\teffective_reg_size\tc_reg_with_hits\tperc_reg_with_hits\t"
    rbp_stats_header += "c_motif_hits\tc_uniq_motif_hits\tc_uniq_motif_nts\tperc_uniq_motif_nts_cal_reg\tperc_uniq_motif_nts_eff_reg\tuniq_motif_hits_cal_1000nt\t"
    rbp_stats_header += "uniq_motif_hits_eff_1000nt\twc_pval\twc_rbc_eff_size\twc_cl_eff_size\tseq_motif_ids\tseq_motif_hits\tstr_motif_ids\tstr_motif_hits\tinternal_id\n"
    OUTSTATS.write(rbp_stats_header)

    for rbp_id in search_rbps_dic:

        # print(search_rbps_dic[rbp_id].__dict__)
        rbp_list.append(rbp_id)

        motif_db_out = loaded_rbps_dic[rbp_id]

        c_reg_with_hits = search_rbps_dic[rbp_id].c_hit_reg
        perc_reg_with_hits = search_rbps_dic[rbp_id].perc_hit_reg
        c_motif_hits = search_rbps_dic[rbp_id].c_motif_hits
        c_uniq_motif_hits = search_rbps_dic[rbp_id].c_uniq_motif_hits
        c_uniq_motif_nts = search_rbps_dic[rbp_id].c_uniq_motif_nts
        perc_uniq_motif_nts_cal_reg = search_rbps_dic[rbp_id].perc_uniq_motif_nts_cal_reg
        perc_uniq_motif_nts_eff_reg = search_rbps_dic[rbp_id].perc_uniq_motif_nts_eff_reg
        uniq_motif_hits_cal_1000nt = search_rbps_dic[rbp_id].uniq_motif_hits_cal_1000nt
        uniq_motif_hits_eff_1000nt = search_rbps_dic[rbp_id].uniq_motif_hits_eff_1000nt
        # wc_pval = search_rbps_dic[rbp_id].wc_pval
        # wc_rbc_es = search_rbps_dic[rbp_id].wc_rbc_es
        # wc_cl_es = search_rbps_dic[rbp_id].wc_cl_es
        wc_pval = "-"
        wc_rbc_es = "-"
        wc_cl_es = "-"

        internal_id = search_rbps_dic[rbp_id].internal_id

        seq_motif_hits = ",".join(str(hc) for hc in search_rbps_dic[rbp_id].seq_motif_hits)
        str_motif_hits = ",".join(str(hc) for hc in search_rbps_dic[rbp_id].str_motif_hits)
        seq_motif_ids = ",".join(search_rbps_dic[rbp_id].seq_motif_ids)
        str_motif_ids = ",".join(search_rbps_dic[rbp_id].str_motif_ids)
        if not seq_motif_hits:
            seq_motif_hits = "-"
        if not str_motif_hits:
            str_motif_hits = "-"
        if not seq_motif_ids:
            seq_motif_ids = "-"
        if not str_motif_ids:
            str_motif_ids = "-"

        row_str = args.data_id + "\t"
        row_str += args.method_id + "\t"
        row_str += run_id + "\t"
        row_str += motif_db_out + "\t"
        row_str += rbp_id + "\t"

        row_str += str(c_regions) + "\t"

        row_str += str(reg_len_mean) + "\t"
        row_str += str(reg_len_median) + "\t"
        row_str += str(reg_len_min) + "\t"
        row_str += str(reg_len_max) + "\t"

        row_str += str(called_reg_size) + "\t"
        row_str += str(eff_reg_size) + "\t"

        row_str += str(c_reg_with_hits) + "\t"
        row_str += str(perc_reg_with_hits) + "\t"

        row_str += str(c_motif_hits) + "\t"
        row_str += str(c_uniq_motif_hits) + "\t"
        row_str += str(c_uniq_motif_nts) + "\t"
        row_str += str(perc_uniq_motif_nts_cal_reg) + "\t"
        row_str += str(perc_uniq_motif_nts_eff_reg) + "\t"
        row_str += str(uniq_motif_hits_cal_1000nt) + "\t"
        row_str += str(uniq_motif_hits_eff_1000nt) + "\t"
        row_str += str(wc_pval) + "\t"
        row_str += str(wc_rbc_es) + "\t"
        row_str += str(wc_cl_es) + "\t"
        row_str += seq_motif_ids + "\t"
        row_str += seq_motif_hits + "\t"
        row_str += str_motif_ids + "\t"
        row_str += str_motif_hits + "\t"
        row_str += internal_id + "\n"

        OUTSTATS.write(row_str)

    OUTSTATS.close()

    """
    Region ID list.
    
    """
    reg_ids_list = []
    reg_ids_dic = {}
    for seq_id, seq in sorted(in_seqs_dic.items()):
        reg_ids_list.append(seq_id)
        reg_ids_dic[seq_id] = 1

    rbp_list.sort()
    len_rbp_list = len(rbp_list)
    # Store rbp_id -> for each region if hit: 1, else: 0, i.e.: [1,0,0,0,0]
    reg_hits_dic = {}
    add_count = False # Add # of motif hits in region or just add 1 (if False)

    # RBP ID to index mapping.
    rbp2idx_dic = {}
    idx2rbp_dic = {}
    print("Get RBP region occupancies ... ")
    for idx, rbp_id in enumerate(rbp_list):
        rbp2idx_dic[rbp_id] = idx
        idx2rbp_dic[idx] = rbp_id
        # Region has hits yes(1)/no(0).
        hit_list = []
        for reg_id in reg_ids_list:
            if rbp_id in regions_with_motifs_dic and reg_id in regions_with_motifs_dic[rbp_id]:
                if add_count:
                    hit_list.append(regions_with_motifs_dic[rbp_id][reg_id])
                else:
                    hit_list.append(1)
            else:
                hit_list.append(0)
        reg_hits_dic[rbp_id] = hit_list

    """
    Output occupancies.

    reg_hits_dic[rbp_id] = [0,1,0,0, ...]
    reg_ids_list = [reg_id1, reg_id2, ... ]

    rbp2regidx_dic:
        # rbp_id -> 0-based indexes of occupied regions, e.g. [0, 3, 12, 88, 114] 

    """
    rbp2regidx_dic = {}
    OUTOCC = open(rbp_reg_occ_table_out,"w")

    occ_header = r"#region_id \ rbp_id"
    for rbp_id, hit_list in sorted(reg_hits_dic.items()):
        occ_header += "\t%s" %(rbp_id)
        rbp2regidx_dic[rbp_id] = []
        for idx, label in enumerate(hit_list):
            if label:  # if occupied (i.e. 1-label).
                rbp2regidx_dic[rbp_id].append(idx)
    OUTOCC.write("%s\n" %(occ_header))
    for idx, reg_id in enumerate(reg_ids_list):
        occ_row = "%s" %(reg_id)
        for rbp_id, hit_list in sorted(reg_hits_dic.items()):
            occ_row += "\t%i" %(hit_list[idx])
        OUTOCC.write("%s\n" %(occ_row))
    OUTOCC.close()

    """
    Store RBP binding information for each input region.
    Format region_rbp_binds_dic:
    region_id -> [False, True, False ... ] 
    with list number of RBP IDs (len_rbp_list), alphabetically sorted.
    Format region_rbp_motif_pos_dic:
    Region ID -> "motif_id,start_1based,end_1based,p_value/-(bit_score)"
    E.g.
    region_rbp_motif_pos_dic["reg1"] = ["rbp1_m1:98:102:0.01", "rbp1_m1:110:115:0.1", "rbp1_m2:110:115:0.05"]

    """
    region_rbp_binds_dic = {}
    region_rbp_motif_pos_dic = {}

    for reg_id in in_seqs_dic:
        # assert reg_id in reg2sc_dic, "region ID \"%s\" from in_seqs_dic not found in reg2sc_dic" %(reg_id)
        region_rbp_binds_dic[reg_id] = [False]*len_rbp_list
        region_rbp_motif_pos_dic[reg_id] = []

    """
    Init hit profiles for each sequence ID.
    I.e., for each sequence ID, store motif hit (or RBP hit) occurrences (0 if not hit).

    """

    pid_list = []  # profile IDs == motif IDs (on motif level or RBP level).
    pid2idx_dic = {}  # profile ID -> idx dictionary.
    # profile_kmer_ids_list = []

    idx = 0
    for rbp_id in search_rbps_dic:
        if args.profiles_level == 1:  # on RBP hit level.
            pid_list.append(rbp_id)
            pid2idx_dic[rbp_id] = idx
            idx += 1
        elif args.profiles_level == 2:  # on individual motif hit level.
            for motif_id in search_rbps_dic[rbp_id].seq_motif_ids:
                rbp_motif_id = rbp_id + ";" + motif_id
                pid_list.append(rbp_motif_id)
                pid2idx_dic[rbp_motif_id] = idx
                idx += 1
            for motif_id in search_rbps_dic[rbp_id].str_motif_ids:
                rbp_motif_id = rbp_id + ";" + motif_id
                pid_list.append(rbp_motif_id)
                pid2idx_dic[rbp_motif_id] = idx
                idx += 1
        else:
            assert False, "invalid hit profile level %i set. Please set --profiles-level to 1 or 2" %(args.profiles_level)

    # seqid2feat_dic = {}

    # for seq_id in in_seqs_dic:
    #     seq_feat = benchlib.SeqFeat(seq_id, in_seqs_dic[seq_id], k=args.profiles_k)
    #     for profile_id in profile_ids_list:
    #         seq_feat.hit_profile.append(0)
    #     seqid2feat_dic[seq_id] = seq_feat
    #     if not profile_kmer_ids_list:
    #         for kmer in seq_feat.kmer_perc:
    #             profile_kmer_ids_list.append(kmer)

    # get_kmer_counts_dic(self.seq, 1, rna=False, count_norm_mode=2, empty_check=False)  # get percentages.

    nr_motifs = len(pid_list)  # number of search motifs / rbps == number of hit counts.
    nr_kmers = 4 ** args.profiles_k
    nr_seqs = len(in_seqs_dic)

    # Dense k-mer percentage matrix (float32).
    kmer_mat = np.zeros((nr_seqs, nr_kmers), dtype=np.float32)

    # Dense motif hit matrix (fill later).
    motif_mat = np.zeros((nr_seqs, nr_motifs), dtype=np.float32)

    # Get kmer to index dictionary.
    from itertools import product
    # Dictionary format: {'AA': 0, 'AC': 1, 'AG': 2, ... }
    kmer2idx_dic = {"".join(p): idx for idx, p in enumerate(product("ACGT", repeat=args.profiles_k))}

    # seqfeats = []
    seqid2feat_dic = {}
    seqid2idx_dic = {}  # seq_id to matrix index.

    # for seq_id in in_seqs_dic:
    for idx, seq_id in enumerate(sorted(in_seqs_dic)):
        seq = in_seqs_dic[seq_id].upper()
        seq_len = len(seq)

        # Mono-nt percentages.
        mono_nt_perc = benchlib.get_kmer_counts_dic(seq, 1,   # get mono-nt percentages.
                                                    rna=False,
                                                    count_norm_mode=2,
                                                    empty_check=False)
        
        a_perc = mono_nt_perc.get("A", 0.0)
        c_perc = mono_nt_perc.get("C", 0.0)
        g_perc = mono_nt_perc.get("G", 0.0)
        t_perc = mono_nt_perc.get("T", 0.0)

        # Mono-nt percentage string.
        mono_nt_perc_str = benchlib.get_kmer_perc_str(mono_nt_perc)

        # Mono-nt counts.
        mono_nt_c = benchlib.get_kmer_counts_dic(seq, 1, 
                                        rna=False, 
                                        count_norm_mode=1, 
                                        empty_check=False)
        # GC percentage.
        gc_perc = benchlib.calc_seq_gc_cont(mono_nt_c, get_perc=True)

        # Entropy.
        entropy = benchlib.seq_calc_entropy(seq, rna=False, k=1) if seq else 0.0
        entropy = round(entropy, 6)

        # Get k-mer percentages dictionary from sequence.
        kmer_perc_dic = benchlib.get_kmer_counts_dic(seq, 
                                                     args.profiles_k, 
                                                     rna=False, 
                                                     count_norm_mode=2, 
                                                     empty_check=False)

        row = kmer_mat[idx]  # Gets view into row i (not a copy!).
        non_zero = 0
        for kmer, val in kmer_perc_dic.items():
            if val:
                row[kmer2idx_dic[kmer]] = float(val)
                non_zero += 1

        seq_feat = benchlib.SeqFeat(
            seq_id=seq_id,
            seq_len=seq_len,
            gc_perc=gc_perc,
            a_perc=a_perc,
            c_perc=c_perc,
            g_perc=g_perc,
            t_perc=t_perc,
            entropy=entropy,
            c_hits=0,  # Add motif hit counts later.
            c_non_zero_k=non_zero,  # Number of appearing k-mers (== non-zero percentage k-mers).
            mono_nt_perc_str=mono_nt_perc_str,
        )

        seqid2feat_dic[seq_id] = seq_feat
        seqid2idx_dic[seq_id] = idx

    """
    Output motif region stats (1 row for each motif hit).
    Report ALL motif hits,
    plus report how many times one genomic motif hit occurs (uniq_count). 

    """

    OUTSTATS = open(motif_stats_out,"w")

    motif_stats_header = "data_id\tmethod_id\trun_id\tmotif_db\tregion_id\trbp_id\tmotif_id\tchr_id\tgen_s\tgen_e\tstrand\tregion_s\tregion_e\tregion_len\t"
    motif_stats_header += "uniq_count\tfimo_score\tfimo_pval\tcms_score\tcms_eval\tmatched_seq\tinternal_id\n"
    OUTSTATS.write(motif_stats_header)

    # Unique motif regions BED.
    motif_reg_dic = {}
    match_c_dic = {}

    for rbp_id in search_rbps_dic:
        match_c_dic[rbp_id] = {}
        for motif_id in search_rbps_dic[rbp_id].seq_motif_ids:
            match_c_dic[rbp_id][motif_id] = {}
        for motif_id in search_rbps_dic[rbp_id].str_motif_ids:
            match_c_dic[rbp_id][motif_id] = {}

    # hit_id = "%s:%s-%s(%s)%s" %(cols[7], cols[8], cols[9], cols[10], cols[6])

    for fh in fimo_hits_list:

        rbp_id = id2name_dic[fh.motif_id]
        region_id = fh.seq_name
        region_len = len(in_seqs_dic[region_id])
        # genomic motif region string.
        fh_str = repr(fh)
        uniq_count = unique_motifs_dic[rbp_id][fh_str]
        # Store binding info of RBP in region.
        rbp_idx = rbp2idx_dic[rbp_id]

        # Motif hit string.
        motif_str = "%s:%i:%i:%s" %(fh.motif_id, fh.start, fh.end, str(fh.pval))
        # # Center position of motif hit.
        # motif_hit_s = fh.start - 1
        # motif_hit_e = fh.end
        # center_pos = benchlib.get_center_position(motif_hit_s, motif_hit_e)        

        region_rbp_binds_dic[region_id][rbp_idx] = True
        region_rbp_motif_pos_dic[region_id].append(motif_str)

        motif_db_out = loaded_motif_ids_dic[fh.motif_id]

        internal_id = search_rbps_dic[rbp_id].internal_id

        # Get matched sequence.
        matched_seq = benchlib.get_matched_seq(fh.seq_name, in_seqs_dic, fh.seq_s, fh.seq_e)
        if not matched_seq:
            matched_seq = "-"

        rbp_motif_id = "%s;%s" %(rbp_id, fh.motif_id)

        # Store motif hit as BED.
        hit_id = "%s:%s-%s(%s)%s" %(fh.chr_id, str(fh.start), str(fh.end), fh.strand, fh.motif_id)
        if hit_id not in motif_reg_dic:
            if fh.hit_type == "str_pat":
                bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t-1.0\t-1.0\t%s\t%s\t-\t%s" %(fh.chr_id, fh.start-1, fh.end, rbp_id, fh.motif_id, uniq_count, args.method_id, args.data_id, fh.strand, str(fh.gc_frac), str(fh.gu_frac), matched_seq)
            else:
                bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t%s\t%s\t-1.0\t-1.0\t-\t%s" %(fh.chr_id, fh.start-1, fh.end, rbp_id, fh.motif_id, uniq_count, args.method_id, args.data_id, fh.strand, str(fh.score), str(fh.pval), matched_seq)
            # bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t%s\t%s\t-1.0\t-1.0\t-\t%s" %(fh.chr_id, fh.start-1, fh.end, rbp_id, fh.motif_id, uniq_count, args.method_id, args.data_id, fh.strand, str(fh.score), str(fh.pval), matched_seq)
            motif_reg_dic[hit_id] = bed_row
            if rbp_id not in match_c_dic:
                match_c_dic[rbp_id] = {}
            if fh.motif_id not in match_c_dic[rbp_id]:
                match_c_dic[rbp_id][fh.motif_id] = {}
            if matched_seq not in match_c_dic[rbp_id][fh.motif_id]:
                match_c_dic[rbp_id][fh.motif_id][matched_seq] = 0
            match_c_dic[rbp_id][fh.motif_id][matched_seq] += 1
            # Record motif hit counts in matrix.
            if args.profiles_level == 1:
                # motif_mat = np.zeros((nr_seqs, nr_motifs), dtype=np.float32)
                motif_mat[seqid2idx_dic[region_id], pid2idx_dic[rbp_id]] += 1
                # seqid2feat_dic[region_id].hit_profile[pid2idx_dic[rbp_id]] += 1
            elif args.profiles_level == 2:
                motif_mat[seqid2idx_dic[region_id], pid2idx_dic[rbp_motif_id]] += 1
                # seqid2feat_dic[region_id].hit_profile[pid2idx_dic[rbp_motif_id]] += 1
            # Increment total hit count.
            seqid2feat_dic[region_id].c_hits += 1

        row_str = args.data_id + "\t"
        row_str += args.method_id + "\t"
        row_str += run_id + "\t"
        row_str += motif_db_out + "\t"
        row_str += region_id + "\t"
        row_str += rbp_id + "\t"
        row_str += fh.motif_id + "\t"
        row_str += fh.chr_id + "\t"
        row_str += str(fh.start) + "\t"  # 1-based.
        row_str += str(fh.end) + "\t"
        row_str += fh.strand + "\t"
        row_str += str(fh.seq_s) + "\t"  # 1-based.
        row_str += str(fh.seq_e) + "\t"
        row_str += str(region_len) + "\t"
        row_str += str(uniq_count) + "\t"
        row_str += str(fh.score) + "\t"
        row_str += str(fh.pval) + "\t"
        row_str += "-\t"
        row_str += "-\t"
        row_str += matched_seq + "\t"
        row_str += internal_id + "\n"

        OUTSTATS.write(row_str)

    for cmsh in cmsearch_hits_list:

        rbp_id = id2name_dic[cmsh.motif_id]
        region_id = cmsh.seq_name
        region_len = len(in_seqs_dic[region_id])
        # genomic motif region string.
        cmsh_str = repr(cmsh) 
        uniq_count = unique_motifs_dic[rbp_id][cmsh_str]
        # Store binding info of RBP in region.
        rbp_idx = rbp2idx_dic[rbp_id]

        # Motif hit string.
        motif_str = "%s:%i:%i:%s" %(cmsh.motif_id, cmsh.start, cmsh.end, str(-1*cmsh.score))
        # # Center position of motif hit.
        # motif_hit_s = cmsh.start - 1
        # motif_hit_e = cmsh.end
        # center_pos = benchlib.get_center_position(motif_hit_s, motif_hit_e)
        
        region_rbp_binds_dic[region_id][rbp_idx] = True
        region_rbp_motif_pos_dic[region_id].append(motif_str)

        motif_db_out = loaded_motif_ids_dic[cmsh.motif_id]

        internal_id = search_rbps_dic[rbp_id].internal_id

        # Get matched sequence.
        matched_seq = benchlib.get_matched_seq(cmsh.seq_name, in_seqs_dic, cmsh.seq_s, cmsh.seq_e)
        if not matched_seq:
            matched_seq = "-"

        rbp_motif_id = "%s;%s" %(rbp_id, cmsh.motif_id)

        # Store motif hit as BED.
        hit_id = "%s:%s-%s(%s)%s" %(cmsh.chr_id, str(cmsh.start), str(cmsh.end), cmsh.strand, cmsh.motif_id)
        if hit_id not in motif_reg_dic:
            bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t-1.0\t-1.0\t%s\t%s\t-\t%s" %(cmsh.chr_id, cmsh.start-1, cmsh.end, rbp_id, cmsh.motif_id, uniq_count, args.method_id, args.data_id, cmsh.strand, str(cmsh.score), str(cmsh.e_value), matched_seq)
            motif_reg_dic[hit_id] = bed_row
            if rbp_id not in match_c_dic:
                match_c_dic[rbp_id] = {}
            if cmsh.motif_id not in match_c_dic[rbp_id]:
                match_c_dic[rbp_id][cmsh.motif_id] = {}
            if matched_seq not in match_c_dic[rbp_id][cmsh.motif_id]:
                match_c_dic[rbp_id][cmsh.motif_id][matched_seq] = 0
            match_c_dic[rbp_id][cmsh.motif_id][matched_seq] += 1
            # Record motif hit counts in matrix.
            if args.profiles_level == 1:
                motif_mat[seqid2idx_dic[region_id], pid2idx_dic[rbp_id]] += 1
            elif args.profiles_level == 2:
                motif_mat[seqid2idx_dic[region_id], pid2idx_dic[rbp_motif_id]] += 1
            # Increment total hit count.
            seqid2feat_dic[region_id].c_hits += 1

        row_str = args.data_id + "\t"
        row_str += args.method_id + "\t"
        row_str += run_id + "\t"
        row_str += motif_db_out + "\t"
        row_str += region_id + "\t"
        row_str += rbp_id + "\t"
        row_str += cmsh.motif_id + "\t"
        row_str += cmsh.chr_id + "\t"
        row_str += str(cmsh.start) + "\t"
        row_str += str(cmsh.end) + "\t"
        row_str += cmsh.strand + "\t"
        row_str += str(cmsh.seq_s) + "\t"
        row_str += str(cmsh.seq_e) + "\t"
        row_str += str(region_len) + "\t"
        row_str += str(uniq_count) + "\t"
        row_str += "-\t"
        row_str += "-\t"
        row_str += str(cmsh.score) + "\t"
        row_str += str(cmsh.e_value) + "\t"
        row_str += matched_seq + "\t"
        row_str += internal_id + "\n"
        #print("region_id:", region_id)
        #print("evalue:", cmsh.e_value)
        #print(row_str)

        OUTSTATS.write(row_str)

    OUTSTATS.close()

    """
    Output unique motif hits as BED.
    """

    OUTBED = open(motif_hits_bed_out, "w")
    for hit_id in motif_reg_dic:
        OUTBED.write("%s\n" %(motif_reg_dic[hit_id]))
    OUTBED.close()

    """
    Normalize profile hit counts.

    Motif hit count matrix definition:
    motif_mat = np.zeros((nr_seqs, nr_motifs), dtype=np.float32)

    """

    for seq_id in seqid2feat_dic:
        # profile_list = seqid2feat_dic[seq_id].hit_profile
        seq_len = seqid2feat_dic[seq_id].seq_len
        row = motif_mat[seqid2idx_dic[seq_id]]  # Fix hit counts row for specific sequence.

        for j in range(nr_motifs):
                
                hit_c = row[j]

                if args.profiles_norm == 1:  # Normalize to hits per 1000 nt.
                    if hit_c > 0:
                        row[j] = (float(hit_c) / float(seq_len)) * 1000.0 if seq_len > 0 else 0.0
                    else:
                        row[j] = 0.0
                elif args.profiles_norm == 2:  # Set count to 1 if hits, else 0.
                    if hit_c > 0:
                        row[j] = 1
                    else:
                        row[j] = 0


    """
    Output motif hit and k-mer profile values.
    """

    OUTHITP = open(hit_profiles_out, "w")
    motif_ids_str = ";".join(pid_list)
    OUTHITP.write("seq_id\tseq_len\tseq_entropy\tA_perc\tC_perc\tG_perc\tT_perc\tc_hits\t%s\n" %(motif_ids_str))
    OUTKMERP = open(kmer_profiles_out, "w")
    kmer_ids_str = ";".join(kmer2idx_dic.keys())
    OUTKMERP.write("seq_id\tseq_len\tseq_entropy\tA_perc\tC_perc\tG_perc\tT_perc\tk\t%s\n" %(kmer_ids_str))

    for seq_id in seqid2feat_dic:
        seq_feat = seqid2feat_dic[seq_id]
        seq_len = seq_feat.seq_len
        seq_entropy = seq_feat.entropy
        c_hits = seq_feat.c_hits
        kmer_k = args.profiles_k

        # Row index of sequence in motif_mat and kmer_mat.
        seq_idx = seqid2idx_dic[seq_id]

        kmer_row = kmer_mat[seq_idx]

        kmer_profile_str_list = []
        for kmer in kmer2idx_dic:
            kmer_idx = kmer2idx_dic[kmer]
            kmer_perc = kmer_row[kmer_idx]
            if kmer_perc > 0:
                kmer_perc = "%.4f" %(kmer_perc)
            kmer_profile_str_list.append(str(kmer_perc))

        motif_row = motif_mat[seq_idx]

        hit_profile_str_list = []
        for pid in pid2idx_dic:
            motif_idx = pid2idx_dic[pid]
            motif_hit_val = motif_row[motif_idx]
            if motif_hit_val > 0:
                motif_hit_val = "%.4f" %(motif_hit_val)
            hit_profile_str_list.append(str(motif_hit_val))

        # hit_profile_list = seq_feat.hit_profile  # motif hit profile.
        # kmer_profile_dic = seq_feat.kmer_perc  # k-mer percentages profile.
        # hit_profile_str_list = []
        # for hv in hit_profile_list:
        #     if hv > 0:
        #         hv = "%.4f" %(hv)
        #     hit_profile_str_list.append(str(hv))
        
        # # hit_profile_str = ";".join(str(hv) for hv in hit_profile_list)
        # kmer_profile_str_list = []
        # for kmer in kmer2idx_dic:
        #     kmer_perc = kmer_profile_dic[kmer]
        #     if kmer_perc > 0:
        #         kmer_perc = "%.4f" %(kmer_perc)
        #     kmer_profile_str_list.append(str(kmer_perc))

        hit_profile_str = ";".join(hit_profile_str_list)
        kmer_profile_str = ";".join(kmer_profile_str_list)
        # kmer_profile_str = ";".join(str(kmer_profile_dic[kmer]) for kmer in profile_kmer_ids_list)
        perc_A = seq_feat.a_perc
        perc_C = seq_feat.c_perc
        perc_G = seq_feat.g_perc
        perc_T = seq_feat.t_perc
        row_str = "%s\t%i\t%.4f\t%.2f\t%.2f\t%.2f\t%.2f\t%i\t%s\n" %(seq_id, seq_len, seq_entropy, perc_A, perc_C, perc_G, perc_T, c_hits, hit_profile_str)
        OUTHITP.write(row_str)
        row_str = "%s\t%i\t%.4f\t%.2f\t%.2f\t%.2f\t%.2f\t%i\t%s\n" %(seq_id, seq_len, seq_entropy, perc_A, perc_C, perc_G, perc_T, kmer_k, kmer_profile_str)
        OUTKMERP.write(row_str)
    OUTHITP.close()
    OUTKMERP.close()

    """
    Output matched sequence counts in table. 

    Formats: 
    match_c_total_dic[rbp_id][motif_id] = count
    match_c_dic[rbp_id][motif_id][matched_seq] = count

    """

    OUTTSV = open(matched_seqs_out, "w")
    OUTTSV.write("rbp_id\tmotif_id\tmatched_seq\tmatch_count\tmatch_perc\n")

    match_c_total_dic = benchlib.get_match_c_total_dic(match_c_dic)

    for rbp_id in match_c_dic:
        for motif_id in match_c_dic[rbp_id]:
            total_c = match_c_total_dic[rbp_id][motif_id]
            for matched_seq in match_c_dic[rbp_id][motif_id]:
                match_c = match_c_dic[rbp_id][motif_id][matched_seq]
                match_perc = 0.0
                if match_c > 0:
                    match_perc = (float(match_c)/float(total_c)) * 100.0
                row_str = "%s\t%s\t%s\t%i\t%.2f\n" %(rbp_id, motif_id, matched_seq, match_c, match_perc)
                OUTTSV.write(row_str)
    OUTTSV.close()


    """
    Generate HTML report (if --profiles set).

    """

    benchlib_path = os.path.dirname(benchlib.__file__)

    html_report_out = args.out_folder + "/" + "report.rbpbench_searchseq.html"
    if args.plot_abs_paths:
        html_report_out = os.path.abspath(args.out_folder) + "/" + "report.rbpbench_searchseq.html"

    if args.profiles and seqid2feat_dic:

        if os.path.exists(html_report_out):
            os.remove(html_report_out)

        plots_subfolder = "html_report_plots"

        print("Create report containing profiles ... ")

        benchlib.searchseq_generate_html_report(args,
                                                seqid2feat_dic,
                                                seqid2idx_dic,
                                                motif_mat,
                                                kmer_mat,
                                                benchlib_path,
                                                html_report_out=html_report_out,
                                                c_search_rbps=len(loaded_rbps_dic),
                                                c_search_motifs=len(loaded_motif_ids_dic),
                                                plots_subfolder=plots_subfolder)



    """
    Motif plots and motif hit statistics HTML.

    """
    html_motif_plots_out = args.out_folder + "/" + "motif_plots.rbpbench_searchseq.html"
    if args.plot_abs_paths:
        html_motif_plots_out = os.path.abspath(args.out_folder) + "/" + "motif_plots.rbpbench_searchseq.html"

    if args.plot_motifs:
        
        print("Generate motif plots HTML ... ")
        
        plots_subfolder = "html_motif_plots"
        
        # args.run_goa = False
        args.bed_sc_thr = None
        args.c_regions = c_regions
        args.ext_up = None
        args.ext_down = None
        # args.regex_id = False
        # Only valid for searchlongrna.
        args.run_goa_tr = False

        benchlib.search_generate_html_motif_plots(args,
                                         search_rbps_dic, seq_motif_blocks_dic, 
                                         str_motif_blocks_dic, 
                                         benchlib_path, loaded_motif_ids_dic,
                                         html_report_out=html_motif_plots_out,
                                         rbpbench_mode="searchseq --plot-motifs",
                                         reg_seq_str="sequences",
                                         id2pids_dic=id2pids_dic,
                                         id2exp_dic=id2exp_dic,
                                         match_c_dic=match_c_dic,
                                         match_c_total_dic=match_c_total_dic,
                                         plots_subfolder=plots_subfolder)


    """
    Output parameter settings.

    """
    # Output mode settings.
    print("Output parameter settings ... ")
    SETOUT = open(settings_file, "w")
    for arg in vars(args):
        SETOUT.write("%s\t%s\n" %(arg, str(getattr(args, arg))))
    for call in call_dic:
        SETOUT.write("%s\t%s\n" %(call, call_dic[call]))
    SETOUT.close()

    """
    Take out the trash.

    """
    print("Delete .tmp files ... ")
    if os.path.exists(out_tmp_bed):
        os.remove(out_tmp_bed)
    if os.path.exists(cmstat_tmp_out):
        os.remove(cmstat_tmp_out)


    """
    Inform about outputs.

    """
    print("")
    print("OUTPUT FILES")
    print("============")
    print("")
    print("Run parameter settings:\n%s" %(settings_file))
    # print("Co-occurrence p-values for each RBP pair .tsv:\n%s" %(con_res_out_tsv))
    # print("Filtered input regions .bed:\n%s" %(filtered_sites_bed))
    print("Filtered input sequences .fa:\n%s" %(filtered_seqs_fa))
    print("Motif hits .bed:\n%s" %(motif_hits_bed_out))
    print("Matched sequence stats .tsv:\n%s" %(matched_seqs_out))
    print("RBP region occupancies .tsv:\n%s" %(rbp_reg_occ_table_out))
    print("RBP hit stats .tsv:\n%s" %(rbp_stats_out))
    print("Motif hit stats .tsv:\n%s" %(motif_stats_out))
    # if reg_annot_table_file is not None:
    #     print("Region annotations .tsv:\n%s" %(reg_annot_table_file))
    if args.plot_motifs:
        print("Motif plots and hit statistics .html:\n%s" %(html_motif_plots_out))
    # if args.report:
    #     print("Search report .html:\n%s" %(html_report_out))
    print("")


################################################################################

def main_searchlong(args):
    """
    Motif search in long genomic regions.

    """

    print("Running for you in SEARCHLONG mode ... ")

    assert os.path.exists(args.in_file), "--in file \"%s\" not found" % (args.in_file)
    assert os.path.exists(args.in_genome), "--genome file \"%s\" not found" % (args.in_genome)

    if args.tr_list:
        assert args.in_gtf, "set --tr-list requires --gtf GTF file"
    assert benchlib.boundary_check(args.gtf_feat_min_overlap, 1E-9, 1.0), "set --gtf-feat-min-overlap expected to be >= 1E-9 and <= 1.0"
    # if args.in_gtf:
    #     assert args.plot_motifs, "set --gtf requires --plot-motifs"

    # Is MEME >= v5 installed?
    if not args.meme_disable_check:
        assert benchlib.is_tool("meme"), "meme not in PATH"
        check, meme_version = benchlib.check_tool_version("meme -version", "5.0")
        assert check, "RBPBench requires meme version >= 5.0 (installed version: %s)" %(meme_version)

    # Check if MEME version is >= 5.5.4 (need to add fimo --no-pgc option to produce same results!).
    fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text"
    if not args.meme_disable_check:
        check, meme_version = benchlib.check_tool_version("meme -version", "5.5.4")
        args.meme_version = meme_version
        if check:
            fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text --no-pgc"
    if args.meme_no_pgc:
        fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text --no-pgc"

    # Regex check.
    regex_type = "sequence"
    if args.regex:
        if args.regex_type == 1:
            print("Check given --regex type ... ")
            if benchlib.looks_like_structure(args.regex):
                print("Given --regex looks like structure pattern ... ")
                regex_type = "structure"
            else:
                print("Given --regex assumed to be standard regex ... ")
                regex_type = "sequence"
        elif args.regex_type == 2:
            regex_type = "sequence"
        elif args.regex_type == 3:
            regex_type = "structure"
        else:
            assert False, "unexpected --regex-type value set (%d)" %(args.regex_type)
        # If standard regex, check validity.
        if regex_type == "sequence":
            assert benchlib.is_valid_regex(args.regex), "given --regex \"%s\" is not a valid regular expression. Please provide valid expression" % (args.regex)


    """
    Library path.
    """
    benchlib_path = os.path.dirname(benchlib.__file__)
    db_path = benchlib_path + "/content"

    """
    FIMO nt frequencies.

    """
    fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt.txt"
    if args.fimo_ntf_mode == 2:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt_with_introns.txt"
    elif args.fimo_ntf_mode == 3:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.uniform.txt"

    if args.fimo_user_ntf_file:
        fimo_freqs_file = args.fimo_user_ntf_file
    assert os.path.exists(fimo_freqs_file), "set FIMO nucleotide frequencies file \"%s\" not found" % (fimo_freqs_file)

    """
    Motif database.
    """
    seq_motifs_db_file, str_motifs_db_file, rbp2ids_file, motif_db_str = specify_motif_db(args.motif_db, 
                                                                            db_path=db_path)
    # Custom motif database: folder given.
    if args.custom_db:
        assert not args.custom_db_meme_xml, "--custom-db folder set incompatible with --custom-db-meme-xml. Provide custom motif database either via folder (--custom-db) or as single files"
        assert not args.custom_db_cm, "--custom-db folder set incompatible with --custom-db-cm. Provide custom motif database either via folder (--custom-db) or as single files"
        assert not args.custom_db_info, "--custom-db folder set incompatible with --custom-db-info. Provide custom motif database either via folder (--custom-db) or as single files"
        seq_motifs_db_file, str_motifs_db_file, rbp2ids_file = specify_custom_motif_db(args.custom_db)
        args.custom_db_id = benchlib.remove_special_chars_from_str(args.custom_db_id)
        assert args.custom_db_id, "empty string after removing special chars from --custom-db-id. Please provide alphanumeric string for custom motif database ID (- or _ are okay as well)"
        motif_db_str = args.custom_db_id
    # Custom motif database: single files given.
    if args.custom_db_meme_xml or args.custom_db_cm or args.custom_db_info:
        args.custom_db_id = benchlib.remove_special_chars_from_str(args.custom_db_id)
        assert args.custom_db_id, "empty string after removing special chars from --custom-db-id. Please provide alphanumeric string for custom motif database ID (- or _ are okay as well)"
        motif_db_str = args.custom_db_id
        assert not args.custom_db, "single custom motif database files provided not compatible with --custom-db. Provide custom motif database either via folder (--custom-db) or as single files"
        assert args.custom_db_info, "--custom-db-info needed to define custom motif database"
        rbp2ids_file = args.custom_db_info
        assert args.custom_db_meme_xml or args.custom_db_cm, "--custom-db-meme-xml and/or --custom-db-cm needed to define custom motif database"
        if args.custom_db_meme_xml:
            seq_motifs_db_file = args.custom_db_meme_xml
        else:
            seq_motifs_db_file = ""  # setting to empty string results in os.path.exists -> False.
        if args.custom_db_cm:
            str_motifs_db_file = args.custom_db_cm
        else:
            str_motifs_db_file = ""

    args.motif_db_str = motif_db_str

    """
    Get ID mappings

    name2ids_dic:
    RBP name -> motif IDs mapping
    id2type_dic:
    motif ID -> motif type (cm, meme_xml)
    id2org_dic (ignore for now):
    motif ID -> organism ID (so far only "human")

    """
    name2ids_dic, id2type_dic, name2gid_dic, name2fids_dic, id2pids_dic, id2exp_dic = benchlib.get_rbp_id_mappings(rbp2ids_file)

    # Motif ID -> RBP name (== RBP ID) mapping.
    id2name_dic = {}
    for rbp_id in name2ids_dic:
        for motif_id in name2ids_dic[rbp_id]:
            id2name_dic[motif_id] = rbp_id

    """
    Get MEME XML database motif blocks dictionary.

    """
    seq_motif_blocks_dic = {}
    if os.path.exists(seq_motifs_db_file):
        seq_motif_blocks_dic = benchlib.read_in_xml_motifs(seq_motifs_db_file,
                                                           empty_check=True)
    for motif_id in seq_motif_blocks_dic:
        assert motif_id in id2name_dic, "MEME XML motif ID \"%s\" not found in prior mapping. Please contact developers!" %(motif_id)

    """
    Get covariance model database motif blocks dictionary.

    """
    str_motif_blocks_dic = {}
    if str_motifs_db_file and os.path.exists(str_motifs_db_file):
        str_motif_blocks_dic = benchlib.read_in_cm_blocks(str_motifs_db_file, 
                                                          empty_check=True)
        for motif_id in str_motif_blocks_dic:
            assert motif_id in id2name_dic, "Covariance model accession ID \"%s\" not found in prior mapping. Please contact developers!" %(motif_id)

    # Remove special chars from run ID.
    args.data_id = benchlib.remove_special_chars_from_str(args.data_id)
    assert args.data_id, "empty string after removing special chars from --data-id. Please provide alphanumeric string for data ID (- or _ are okay as well)"
    args.method_id = benchlib.remove_special_chars_from_str(args.method_id)
    assert args.method_id, "empty string after removing special chars from --method-id. Please provide alphanumeric string for method ID (- or _ are okay as well)"
    # Run ID definition.
    run_id = "run_id"
    if args.run_id:
        run_id = benchlib.remove_special_chars_from_str(args.run_id)
        assert run_id, "empty string after removing special chars from --run-id. Please provide alphanumeric string for run ID (- or _ are okay as well)"

    print("Run ID:     ", run_id)
    print("Data ID:    ", args.data_id)
    print("Method ID:  ", args.method_id)


    """
    Output folders + files.

    """
    if not os.path.exists(args.out_folder):
        os.makedirs(args.out_folder)

    filtered_sites_bed = args.out_folder + "/in_sites.filtered.bed"
    filtered_sites_fa = args.out_folder + "/in_sites.filtered.fa"
    seq_motifs_xml = args.out_folder + "/seq_motifs.xml"
    str_motifs_cm = args.out_folder + "/str_motifs.cm"
    fimo_res_tsv = args.out_folder + "/fimo_results.tsv"
    cmsearch_res_txt = args.out_folder + "/cmsearch_results.txt"

    rbp_stats_out = args.out_folder + "/rbp_hit_stats.tsv"
    motif_stats_out = args.out_folder + "/motif_hit_stats.tsv"
    # con_res_out_tsv = args.out_folder + "/contingency_table_results.tsv"
    settings_file = args.out_folder + "/settings.rbpbench_searchlong.out"
    # rbp_reg_occ_table_out = args.out_folder + "/rbp_region_occupancies.tsv"

    # Output unique motif hits.
    motif_hits_bed_out = args.out_folder + "/motif_hits.rbpbench_searchlong.bed"
    # Output matched sequences stats.
    matched_seqs_out = args.out_folder + "/matched_seq_stats.tsv"

    out_tmp_bed = args.out_folder + "/rbp_motif_hit_regions.tmp.bed"
    cmstat_tmp_out = args.out_folder + "/cmstat_out.tmp.txt"

    # Delete if existing folder.
    if os.path.exists(fimo_res_tsv):
        os.remove(fimo_res_tsv)
    if os.path.exists(cmsearch_res_txt):
        os.remove(cmsearch_res_txt)


    """
    Load RBP data based on --rbps (+ optionally USER data).

    """

    rbp_in_dic = {}
    for rbp_id in args.list_rbps:
        rbp_in_dic[rbp_id] = 1

    # RBPs for motif search.
    loaded_rbps_dic = {}

    # USER set?
    user_motifs = False
    user_rbp_id = False
    if "USER" in rbp_in_dic:
        user_motifs = True
    else:
        assert not args.user_meme_xml, "--user-meme-xml provided but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"
        assert not args.user_cm, "--user-cm provided but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"
        assert not args.user_rbp_id, "--user-rbp-id set but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"

    special_rbp_ids_list = ["USER", "REGEX"]

    # If ALL set, load all RBPs (+ optinally USER).
    if "ALL" in rbp_in_dic:
        if len(rbp_in_dic) == 2:
            assert user_motifs, "set --rbps ALL, --rbps ALL USER, or individual RBPs (+ optinally USER) --rbps RBP1 RBP2 ... USER"
        if len(rbp_in_dic) > 2:
            assert False, "set --rbps ALL, --rbps ALL USER, or individual RBPs (+ optinally USER) --rbps RBP1 RBP2 ... USER"
        print("--rbps ALL selected. Loading all database motifs ... ")
        for rbp_id in name2ids_dic:
            loaded_rbps_dic[rbp_id] = motif_db_str

    else:
        # Load individual RBPs.
        for rbp_id in rbp_in_dic:
            if rbp_id not in special_rbp_ids_list:
                """
                Check if RBP ID in database.
                Suggest similar RBPs based on string similarity (edit distance).

                """
                if rbp_id not in name2ids_dic:
                    db_rbp_list = []
                    for db_rbp_id in name2ids_dic:
                        db_rbp_list.append(db_rbp_id)
                    pair_dist_dic = benchlib.calc_edit_dist_query_list(rbp_id, db_rbp_list)
                    max_c = 10
                    c = 0
                    suggested_rbps = []
                    for key, value in sorted(pair_dist_dic.items(), key=lambda item: item[1], reverse=False):
                        if c >= max_c:
                            break
                        c += 1
                        suggested_rbps.append(key)
                    suggested_rbps_str = ",".join(suggested_rbps)
                    assert False, "provided --rbps ID %s not in internal motif database (%s). Please provide RBP name present in database. Did you mean (any of) the following database ID(s) (top 10 hits based on string similarity): %s ?" %(rbp_id, motif_db_str, suggested_rbps_str)
                # assert rbp_id in name2ids_dic, "provided --rbps ID %s not in internal motif database. Please provide RBP name present in database" %(rbp_id)
                loaded_rbps_dic[rbp_id] = motif_db_str

    # Motif IDs for search.
    loaded_motif_ids_dic = {}
    for rbp_id in loaded_rbps_dic:
        for motif_id in name2ids_dic[rbp_id]:
            loaded_motif_ids_dic[motif_id] = motif_db_str

    """
    Check and load provided USER data.

    """

    if user_motifs:
        print("--rbps USER selected. Check + load provided USER motifs ... ")
        assert args.user_rbp_id, "--rbps USER demands --user-rbp-id to be set to connect the supplied motif(s) with an RBP ID"
        assert args.user_meme_xml or args.user_cm, "--rbps USER requires a provided sequence or structure motif file (via --user-meme-xml AND/OR --user-cm)"

        # Reformat user_rbp_id. 
        user_rbp_id = benchlib.remove_special_chars_from_str(args.user_rbp_id)
        assert user_rbp_id, "empty string after removing special chars from --user-rbp-id. Please provide alphanumeric string for RBP ID (- or _ are okay as well)"

        assert user_rbp_id not in loaded_rbps_dic, "user RBP ID %s already selected from database. Please deselect respective database RBP ID or provide unique user RBP ID via --user-rbp-id" %(user_rbp_id)
        loaded_rbps_dic[user_rbp_id] = "user"
        # In case user_rbp_id in database, reset motif IDs associated to user_rbp_id.
        name2ids_dic[user_rbp_id] = []
        print("RBP ID for user-supplied motifs:", user_rbp_id)

        user_seq_motif_blocks_dic = {}
        if args.user_meme_xml:
            assert os.path.exists(args.user_meme_xml), "--user-meme-xml file \"%s\" not found" % (args.user_meme_xml)
            user_seq_motif_blocks_dic = benchlib.read_in_xml_motifs(args.user_meme_xml, empty_check=False)
            assert user_seq_motif_blocks_dic, "no motifs read in from provided --user-meme-xml. Make sure to supply sequence motifs in MEME XML format!"
            # Check if motif ID already loaded.
            for acc_id in user_seq_motif_blocks_dic:
                assert acc_id not in loaded_motif_ids_dic, "user-supplied MEME XML motif ID %s already selected, i.e., ID is already associated with selected database RBP %s. Please change user motif ID to a unique motif ID or deselect the respective RBP" %(acc_id, id2name_dic[acc_id])
                loaded_motif_ids_dic[acc_id] = "user"
                # Add user to database blocks (overwrite if same ID encountered).
                seq_motif_blocks_dic[acc_id] = user_seq_motif_blocks_dic[acc_id]
                name2ids_dic[user_rbp_id].append(acc_id)
                id2type_dic[acc_id] = "meme_xml"
                id2name_dic[acc_id] = user_rbp_id

        user_str_motif_blocks_dic = {}
        if args.user_cm:
            assert os.path.exists(args.user_cm), "--user-cm file \"%s\" not found" % (args.user_cm)
            # Check for valid format.
            acc_ids_dic = benchlib.check_cm_file(args.user_cm, cmstat_tmp_out, empty_check=False)
            # Read in covariance model blocks.
            user_str_motif_blocks_dic = benchlib.read_in_cm_blocks(args.user_cm)
            for acc_id in acc_ids_dic:
                assert acc_id in user_str_motif_blocks_dic, "accession ID %s not in blocks dictionary. Please contact developers!" %(acc_id)
            # Check if motif ID already loaded.
            for acc_id in user_str_motif_blocks_dic:
                assert acc_id not in loaded_motif_ids_dic, "user-supplied covariance model accession (ACC) ID %s already selected, i.e., ID is already associated with selected database RBP %s. Please change to a unique accession ID or deselect the respective RBP" %(acc_id, id2name_dic[acc_id])
                loaded_motif_ids_dic[acc_id] = "user"
                # Add user to database blocks (overwrite if same ID encountered).
                str_motif_blocks_dic[acc_id] = user_str_motif_blocks_dic[acc_id]
                name2ids_dic[user_rbp_id].append(acc_id)
                id2type_dic[acc_id] = "cm"
                id2name_dic[acc_id] = user_rbp_id


    """
    Get sequence motif lengths.

    """

    id2len_dic = benchlib.get_seq_motif_lengths(seq_motif_blocks_dic)

    """
    Optionally filter DREME/MEME sequence motifs by length.

    """

    if args.motif_min_len or args.motif_max_len:

        if args.motif_min_len and args.motif_max_len:
            assert args.motif_min_len <= args.motif_max_len, "set --motif-min-len needs to be <= --motif-max-len!"

        print("Filtering sequence motifs by set min/max lengths ... ")

        seq_motif_blocks_dic, loaded_rbps_dic, loaded_motif_ids_dic, name2ids_dic, c_flt_out = benchlib.filter_dic_by_motif_lengths(
                seq_motif_blocks_dic, str_motif_blocks_dic, loaded_rbps_dic, loaded_motif_ids_dic, id2name_dic,
                id2len_dic, motif_min_len=args.motif_min_len, motif_max_len=args.motif_max_len
            )

        assert loaded_rbps_dic, "no MEME/DREME sequence motifs left after length filtering. Please adjust length filter range (--motif-min-len, --motif-max-len), RBP selection, or disable length filtering!"

        print("Filtered out %d sequence motifs outside set length range" %(c_flt_out))


    """
    Check if loaded RBP IDs have motifs.

    """
    for rbp_id in loaded_rbps_dic:
        for motif_id in name2ids_dic[rbp_id]:
            found = 0
            if motif_id in seq_motif_blocks_dic:
                found += 1
            if motif_id in str_motif_blocks_dic:
                found += 1
            assert found, "no motifs loaded for RBP ID \"%s\". Please provide the respective motifs file" %(rbp_id)


    """
    If --regex is set:
    Treat regex as sequence motif / fimo type.
    rbp_id: regex, motif_id: regex, motif_db: regex
    
    """

    regex_id = args.regex_id
    regex = args.regex
    
    if args.regex:

        if regex_type == "sequence":

            # Remove , ; from given regex, to avoid motif_id format conflicts.
            regex = benchlib.remove_special_chars_from_str(args.regex,
                                                        reg_ex="[ ;]",
                                                        to_upper=False)  # [ :;\(\)]
            
            assert regex, "empty string after removing special chars ( ;) from --regex. Please provide a valid regex with DNA letters"

            # Convert IUPAC codes (if present) in regex to standard regex format.
            regex = benchlib.convert_iupac_in_regex(regex)

        elif regex_type == "structure":

            # Check structure pattern and format.
            regex = benchlib.check_format_str_pattern(args.regex)

        args.regex = regex

        regex_id = benchlib.remove_special_chars_from_str(args.regex_id)

        assert regex_id, "empty string after removing special chars from --regex-id. Please provide alphanumeric string for regex ID (- or _ are okay as well)"
        assert regex_id not in name2ids_dic, "--regex set but a different RBP ID with name \"%s\" was found. Please provide a different RBP ID or --regex-id" %(regex_id)

        args.regex_id = regex_id

        if args.motif_regex_id:
            assert regex_id not in id2type_dic, "--regex set but a different motif ID with name \"%s\" was found. Please provide a different motif ID or --regex-id" %(regex_id)

            id2name_dic[regex_id] = regex_id    # motif_id -> rbp_id
            id2type_dic[regex_id] = "regex"  # motif_id -> motif type string; motif type string can be regex, cm, meme_xml
            loaded_motif_ids_dic[regex_id] = regex_id  # motif_id -> motif_db_str
            name2ids_dic[regex_id] = [regex_id]  # rbp_id -> motif_ids

        else:
            assert regex not in id2type_dic, "--regex set but a different motif ID with name \"%s\" was found. Please provide a different motif ID or use --motif-regex-id" %(regex_id)

            id2name_dic[regex] = regex_id    # motif_id -> rbp_id
            id2type_dic[regex] = "regex"  # motif_id -> motif type string; motif type string can be regex, cm, meme_xml
            loaded_motif_ids_dic[regex] = regex_id  # motif_id -> motif_db_str
            name2ids_dic[regex_id] = [regex]  # rbp_id -> motif_ids

        loaded_rbps_dic[regex_id] = regex_id  # rbp_id -> motif_db_str

    else:
        regex_id = False


    """
    Get function ID -> function descriptions mapping.

    """
    fid2desc_file = db_path + "/rbp_function_list.32728246.tsv"
    fid2desc_dic, desc2fid_dic = benchlib.get_fid2desc_mapping(fid2desc_file)

    """
    If --motifs sets, filter loaded_rbps_dic + loaded_motif_ids_dic.
    
    """

    if args.motifs_list:

        print("Filtering loaded motifs by provided --motifs ... ")

        motif_fids_dic = {}

        for motif_id in args.motifs_list:
            motif_fids_dic[motif_id] = 1

        filtered_rbps_dic = {}
        filtered_motif_ids_dic = {}
        filtered_name2ids_dic = {}
        for motif_id in motif_fids_dic:
            assert motif_id in id2name_dic, "motif ID \"%s\" provided via --motifs not found in internal motif ID -> RBP ID mapping. Please provide valid motif IDs" %(motif_id)
            assert motif_id in loaded_motif_ids_dic, "motif ID \"%s\" provided via --motifs not found in loaded motifs (via --rbps). Please provide motif IDs that are part of loaded RBPs" %(motif_id)
            set_db_str = loaded_motif_ids_dic[motif_id]
            filtered_motif_ids_dic[motif_id] = set_db_str
            rbp_id = id2name_dic[motif_id]
            assert rbp_id in loaded_rbps_dic, "motif ID \"%s\" provided via --motifs not found in loaded RBPs (via --rbps). Please provide motif IDs that are part of loaded RBPs" %(motif_id)
            set_db_str = loaded_rbps_dic[rbp_id]
            filtered_rbps_dic[rbp_id] = set_db_str
            if rbp_id in filtered_name2ids_dic:
                filtered_name2ids_dic[rbp_id].append(motif_id)
            else:
                filtered_name2ids_dic[rbp_id] = [motif_id]

        if args.regex:
            filtered_rbps_dic[regex_id] = regex_id
            if args.motif_regex_id:
                filtered_motif_ids_dic[regex_id] = regex_id
                filtered_name2ids_dic[regex_id] = [regex_id]
            else:
                filtered_motif_ids_dic[regex] = regex_id
                filtered_name2ids_dic[regex_id] = [regex]

        c_loaded_rbps_pre = len(loaded_rbps_dic)
        c_loaded_rbps_post = len(filtered_rbps_dic)
        loaded_rbps_dic = filtered_rbps_dic

        print("# RBPs pre-filtering by --motifs: ", c_loaded_rbps_pre)
        print("# RBPs post-filtering by --motifs:", c_loaded_rbps_post)

        assert loaded_rbps_dic, "no remaining RBPs after filtering by provided motif IDs (via --motifs). Please provide compatible RBPs + motif IDs"

        c_loaded_motif_ids_pre = len(loaded_motif_ids_dic)
        c_loaded_motif_ids_post = len(filtered_motif_ids_dic)
        loaded_motif_ids_dic = filtered_motif_ids_dic

        print("# motif IDs pre-filtering by --motifs: ", c_loaded_motif_ids_pre)
        print("# motif IDs post-filtering by --motifs:", c_loaded_motif_ids_post)

        assert loaded_motif_ids_dic, "no remaining motifs after filtering by provided motif IDs (via --motifs). Please provide compatible RBPs + motif IDs"

        name2ids_dic = filtered_name2ids_dic


    """
    If --functions set, filter loaded_rbps_dic + loaded_motif_ids_dic.

    """
    rbp_fids_dic = {}
    if args.rbp_functions:

        print("Filtering loaded RBPs by provided function IDs ... ")

        for fid in args.rbp_functions:
            rbp_fids_dic[fid] = 1

        # Check if provided function IDs are valid.
        for fid in rbp_fids_dic:
            assert fid in fid2desc_dic, "function ID \"%s\" provided via --functions not found in internal function ID -> description mapping. Please provide valid function IDs (see rbpbench info for a detailed description)" %(fid)

        # Filter loaded_rbps_dic.
        filtered_rbps_dic = {}
        for rbp_id in loaded_rbps_dic:
            if rbp_id == regex_id:  # keep regex if set.
                filtered_rbps_dic[regex_id] = regex_id
                continue
            if rbp_id not in name2fids_dic:
                continue
            for fid in rbp_fids_dic:
                if fid in name2fids_dic[rbp_id]:
                    set_db_str = loaded_rbps_dic[rbp_id]
                    filtered_rbps_dic[rbp_id] = set_db_str
                    break

        c_loaded_rbps_pre = len(loaded_rbps_dic)
        c_loaded_rbps_post = len(filtered_rbps_dic)
        loaded_rbps_dic = filtered_rbps_dic

        print("# RBPs pre-filtering by functions: ", c_loaded_rbps_pre)
        print("# RBPs post-filtering by functions:", c_loaded_rbps_post)

        assert loaded_rbps_dic, "no remaining RBPs after filtering by provided function IDs. Please provide compatible RBPs + function IDs (see rbpbench info for annotated RBP functions)"

        # Filter loaded motif IDs.
        filtered_motif_ids_dic = {}
        for rbp_id in loaded_rbps_dic:
            if rbp_id == regex_id:
                if args.motif_regex_id:
                    filtered_motif_ids_dic[regex_id] = regex_id
                else:
                    filtered_motif_ids_dic[regex] = regex_id
            else:
                for motif_id in name2ids_dic[rbp_id]:
                    set_db_str = loaded_motif_ids_dic[motif_id]
                    filtered_motif_ids_dic[motif_id] = set_db_str
        
        c_loaded_motif_ids_pre = len(loaded_motif_ids_dic)
        c_loaded_motif_ids_post = len(filtered_motif_ids_dic)
        loaded_motif_ids_dic = filtered_motif_ids_dic

        print("# motif IDs pre-filtering by functions: ", c_loaded_motif_ids_pre)
        print("# motif IDs post-filtering by functions:", c_loaded_motif_ids_post)


    """
    Load RBP data, store in RBP() class.

    """

    # Store motif IDs for search.
    search_rbps_dic = {}
    seq_rbps_dic = {}
    str_rbps_dic = {}
    motif_id2idx_dic = {} # motif ID -> list index.
    args.internal_id = []

    for rbp_id in loaded_rbps_dic:
    
        internal_id = base64.urlsafe_b64encode(os.urandom(6)).decode()
        args.internal_id.append(internal_id)
        rbp = benchlib.RBP(rbp_id, internal_id)

        for motif_id in name2ids_dic[rbp_id]:
        
            assert motif_id in loaded_motif_ids_dic, "motif_id %s not in loaded_motif_ids_dic" %(motif_id)

            if id2type_dic[motif_id] == "meme_xml":
                rbp.seq_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.seq_motif_ids) - 1
                rbp.seq_motif_hits.append(0)
                seq_rbps_dic[rbp_id] = 1
            elif id2type_dic[motif_id] == "regex":
                rbp.seq_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.seq_motif_ids) - 1
                rbp.seq_motif_hits.append(0)
                seq_rbps_dic[rbp_id] = 1
            elif id2type_dic[motif_id] == "cm":
                rbp.str_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.str_motif_ids) - 1
                rbp.str_motif_hits.append(0)
                str_rbps_dic[rbp_id] = 1
            else:
                assert False, "unknown motif type (\"%s\") set for motif_id %s" %(id2type_dic[motif_id], motif_id)

        search_rbps_dic[rbp_id] = rbp


    print("# of RBP IDs for search:    ", len(loaded_rbps_dic))
    print("# of motif IDs for search:  ", len(loaded_motif_ids_dic))

    """
    Get chromosome IDs from --genome.
    """
    print("Get --genome FASTA headers ... ")
    chr_ids_dic = benchlib.get_fasta_headers(args.in_genome)

    """
    Guess chromosome ID style.

    chr_style:
        1: chr1, chr2, ..., chrX, chrM
        2: 1, 2, ... , X, MT

    """
    print("Guess chromosome ID style (based on --genome FASTA headers) ... ")
    chr_style = benchlib.guess_chr_id_style(chr_ids_dic)

    """
    Find out what --in file is: BED or transcript IDs list.
    
    """

    search_tr_ids_dic = {}
    in_bed_file = False

    if not benchlib.bed_check_format(args.in_file, asserts=False):

        assert args.in_gtf, "--in file is not a BED file so a GTF file (--gtf) is required for transcript ID mapping"
        search_tr_ids_dic = benchlib.read_ids_into_dic(args.in_file,
                                                       check_dic=False)
        assert search_tr_ids_dic, "no transcript IDs read in from provided --in file. Please provide a valid IDs file (one transcript ID per row)"
        assert not args.tr_list, "--tr-list set but --in file is already a list of transcript IDs. Please provide --in BED file in case of --tr-list"

    else:
        in_bed_file = args.in_file

    """
    If --gtf given, get transcript infos.

    """

    tid2tio_dic = None
    tr2gid_dic = {}
    tr_types_dic = {}  # Store transcript biotypes in GTF file.

    if args.in_gtf:

        # Get gene infos.
        print("Read in gene features from --gtf ... ")
        
        gid2gio_dic = benchlib.gtf_read_in_gene_infos(args.in_gtf,
                                                      tr2gid_dic=tr2gid_dic,
                                                      tr_types_dic=tr_types_dic,
                                                      check_chr_ids_dic=chr_ids_dic,
                                                      chr_style=chr_style,
                                                      empty_check=False)
        assert gid2gio_dic, "no gene infos read in from --gtf. Please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE)"
        assert tr2gid_dic, "no transcript -> gene ID mapping read in from --gtf. Please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE)"
        assert tr_types_dic, "no transcript biotypes read in from --gtf. Please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE)"

        c_gene_infos = len(gid2gio_dic)
        print("# gene features read in from --gtf:", c_gene_infos)

        if search_tr_ids_dic:
            for tr_id in search_tr_ids_dic:
                assert tr_id in tr2gid_dic, "transcript ID \"%s\" from --in file does not appear in --gtf file. Check if transcript IDs in --gtf include version numbers (change IDs), or provide compatible files" %(tr_id)

        # If --tr-list given.
        tr_ids_dic = {}
        if args.tr_list:
            assert os.path.exists(args.tr_list), "given --tr-list file \"%s\" not found" % (args.tr_list)
            tr_ids_dic = benchlib.read_ids_into_dic(args.tr_list,
                                                    check_dic=False)
            assert tr_ids_dic, "no IDs read in from provided --tr-list file. Please provide a valid IDs file (one ID per row)"
            for tr_id in tr_ids_dic:
                assert tr_id in tr2gid_dic, "transcript ID \"%s\" from provided --tr-list file does not appear in --gtf file. Please provide compatible IDs + files" %(tr_id)
                tr_ids_dic[tr_id] = tr2gid_dic[tr_id]
            print("# of transcript IDs (read in from --tr-list): ", len(tr_ids_dic))
        else:
            if not search_tr_ids_dic:
                # Get most prominent transcripts from gene infos.
                tr_ids_dic = benchlib.select_mpts_from_gene_infos(gid2gio_dic,
                                        basic_tag=False,  # do not be strict (only_tsl=False too).
                                        ensembl_canonical_tag=False,
                                        prior_basic_tag=True,  # Prioritize basic tag transcript.
                                        prior_mane_select=True,  # mane select if set trumps all.
                                        prior_lncrna_primary_tag=True,
                                        only_tsl=False)
                assert tr_ids_dic, "most prominent transcript selection from gene infos failed. Please contact developers"
                print("# of transcript IDs (most prominent transcripts): ", len(tr_ids_dic))
            else:
                tr_ids_dic = search_tr_ids_dic  # Only transcript IDs / annotations needed for search.

        # Check exon order (return True if minus strand exon 1 is most downstream, not most upstream, which is the correct way).
        print("Check minus-strand exon order in --gtf ... ")
        correct_min_ex_order = benchlib.gtf_check_exon_order(args.in_gtf)
        if correct_min_ex_order:
            print("Correct order encountered ... ")
        else:
            print("Reverse order encountered ... ")
        # Get transcript infos.
        print("Read in transcript infos from --gtf ... ")
        tid2tio_dic = benchlib.gtf_read_in_transcript_infos(args.in_gtf, 
                                                            tr_ids_dic=tr_ids_dic,
                                                            correct_min_ex_order=correct_min_ex_order,
                                                            chr_style=chr_style,
                                                            empty_check=False)

        assert tid2tio_dic, "no transcript infos read in from --gtf. Please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE)"

        # (in)sanity checks.
        for tr_id in tr_ids_dic:
            assert tr_id in tid2tio_dic, "transcript ID %s not in tid2tio_dic"
        for tr_id in tid2tio_dic:
            assert tr_id in tr_ids_dic, "transcript ID %s not in tr_ids_dic"

        c_tr_infos = len(tid2tio_dic)
        print("# transcript features read in from --gtf:", c_tr_infos)

        # Output transcript regions as BED file.
        if search_tr_ids_dic:
            print("Output --in transcript IDs as transcript regions to BED ... ")
            in_bed_file = args.out_folder + "/transcript_regions.searchlong.tmp.bed"
            # File handle.
            OUTBED = open(in_bed_file, "w")
            # Output transcript regions.
            for tr_id in search_tr_ids_dic:
                chr_id = tid2tio_dic[tr_id].chr_id
                tr_s = tid2tio_dic[tr_id].tr_s - 1
                tr_e = tid2tio_dic[tr_id].tr_e
                tr_pol = tid2tio_dic[tr_id].tr_pol
                OUTBED.write("%s\t%i\t%i\t%s\t0\t%s\n" %(chr_id, tr_s, tr_e, tr_id, tr_pol))
            OUTBED.close()
        
    """
    Filter / extend --in genomic regions BED file.

    """

    ext_up = 0
    ext_down = 0
    bed_score_col = 5

    # Filter / extend --in BED.
    print("Preprocess --in sites ... ")
    reg2sc_dic = {}  # Used to store region scores for Wilcoxon rank-sum test.
    reg2pol_dic = {}
    bed_chr_ids_dic = {}
    reg_stats_dic = benchlib.bed_filter_extend_bed(in_bed_file, filtered_sites_bed,
                                          ext_up=ext_up,
                                          ext_down=ext_down,
                                          remove_dupl=True,
                                          reg2sc_dic=reg2sc_dic,
                                          reg2pol_dic=reg2pol_dic,
                                          score_col=bed_score_col,
                                          chr_ids_dic=chr_ids_dic,
                                          bed_chr_ids_dic=bed_chr_ids_dic,
                                          use_region_ids=True,
                                          unstranded=False)

    print("# --in regions pre-filtering:  ", reg_stats_dic["c_in"])
    print("# --in regions post-filtering: ", reg_stats_dic["c_out"])
    print("# regions with invalid chr_id: ", reg_stats_dic["c_chr_filter"])
    print("# duplicated regions removed:  ", reg_stats_dic["c_dupl_filter"])

    assert reg_stats_dic["c_out"], "no --in regions remain after chromosome ID filtering. If caused by invalid chr_id filtering, make sure chromosome IDs in --genome FASTA and --in BED files (or GTF if --in is transcript IDs file) are compatible (i.e., \"chr1\" vs. \"1\" notation)"

    """
    Calculate effective size of genomic regions.
    
    """
    print("Calculate effective genomic region size ... ")
    eff_reg_size = benchlib.get_uniq_gen_size(filtered_sites_bed)

    print("Called region length sum:      ", reg_stats_dic["reg_len_sum"])
    print("Effective region length sum:   ", eff_reg_size)


    """
    Get genomic region sequences from --genome.

    Output FASTA header format:
    >chr8:9772198-9772297(+)

    No need to convert sequences to uppercase, as FIMO works on both 
    lower- and uppercase (as long as DNA / RNA is set correct).

    """
    print("Extract sequences from --genome ... ")
    benchlib.bed_extract_sequences_from_fasta(filtered_sites_bed, 
                                              args.in_genome, filtered_sites_fa,
                                              print_warnings=True)

    """
    Get FASTA sequences and sequence lengths.
    """

    out_seqs_dic = benchlib.read_fasta_into_dic(filtered_sites_fa,
                                       dna=True,
                                       all_uc=True,
                                       id_check=True,
                                       empty_check=False,
                                       skip_n_seqs=False)

    assert out_seqs_dic, "no sequences extracted from FASTA file for --in BED regions. Make sure to use compatible FASTA/BED files!"

    # Get region strands/polarities for motif distance calculations.
    reg2pol_dic = benchlib.extract_pol_from_seq_ids(out_seqs_dic)

    # Effective number of regions used for motif search.
    c_regions = len(out_seqs_dic)
    args.c_regions = c_regions

    # Called region size.
    called_reg_size = 0
    len_list = []
    for seq_id in out_seqs_dic:
        seq_len = len(out_seqs_dic[seq_id])
        called_reg_size += seq_len
        len_list.append(seq_len)

    # Length statistics.
    reg_len_median = statistics.median(len_list)
    reg_len_mean = statistics.mean(len_list)
    reg_len_mean = round(reg_len_mean, 2)
    reg_len_min = min(len_list)
    reg_len_max = max(len_list)

    """
    ====================================
    RUN SEQUENCE MOTIF SEARCH WITH FIMO.
    ====================================
    
    """
    fimo_hits_list = []
    call_dic = {}

    if seq_rbps_dic:

        """
        Print motifs to file.

        """

        print("Output motifs to XML ... ")
        out_str, c_added_motifs = benchlib.blocks_to_xml_string(seq_motif_blocks_dic, loaded_motif_ids_dic)

        benchlib.output_string_to_file(out_str, seq_motifs_xml)


        """
        Run FIMO on sequences + motifs.

        """

        print("Run FIMO ... ")
        benchlib.run_fast_fimo(filtered_sites_fa, seq_motifs_xml, fimo_res_tsv,
                    pval_thr=args.fimo_pval,
                    nt_freqs_file=fimo_freqs_file,
                    call_dic=call_dic,
                    params=fimo_params,
                    error_check=False)

        """
        Read in FIMO hits.

        """

        assert os.path.exists(fimo_res_tsv), "FIMO output file fimo.tsv %s does not exist! There must have been an error while running FIMO (invalid xml file provided?)" %(fimo_res_tsv)

        print("Read in FIMO results ... ")
        fimo_hits_list = benchlib.read_in_fimo_results(fimo_res_tsv,
                                                       only_best_hits=args.greatest_hits)

        c_fimo_hits = len(fimo_hits_list)
        print("# of FIMO motif hits:", c_fimo_hits)

        """
        If --regex is set, search for regex hits in sequences (stored in out_seqs_dic).

        """
        if args.regex:

            step_size_one = False
            if args.regex_search_mode == 1:
                step_size_one = True
            elif args.regex_search_mode == 2:
                step_size_one = False
            else:
                assert False, "invalid --regex-search-mode %i set" %(args.regex_search_mode)

            print("Run search for --regex \"%s\" ... " %(regex))
            regex_hits_list = benchlib.get_regex_hits(regex, regex_id, out_seqs_dic,
                                                      step_size_one=step_size_one,
                                                      regex_type=regex_type,
                                                      regex_spacer_min=args.regex_spacer_min,
                                                      regex_spacer_max=args.regex_spacer_max,
                                                      regex_min_gc=args.regex_min_gc,
                                                      regex_max_gu=args.regex_max_gu,
                                                      use_motif_regex_id=args.motif_regex_id)


            c_regex_hits = len(regex_hits_list)
            print("# of regex hits:", c_regex_hits)

            # Add regex hits to fimo_hits_list.
            fimo_hits_list += regex_hits_list


    """
    =========================================
    RUN STRUCTURE MOTIF SEARCH WITH CMSEARCH.
    =========================================

    """
    cmsearch_hits_list = []

    if str_rbps_dic:
        
        print("Output covariance models to .cm ... ")
        benchlib.output_cm_blocks_to_file(str_motif_blocks_dic, loaded_motif_ids_dic, str_motifs_cm)

        # Run cmsearch.
        print("Run cmsearch ... ")
        cmsh_mode = ""
        if args.cmsearch_mode == 1:
            cmsh_mode = "--default"
        elif args.cmsearch_mode == 2:
            cmsh_mode = "--max"
        else:
            assert False, "invalid --cmsearch-mode %i set" %(args.cmsearch_mode)
        cmsh_params = "-g --tformat fasta --toponly --incT %s -T %s %s" %(args.cmsearch_bs, args.cmsearch_bs, cmsh_mode)

        benchlib.run_cmsearch(filtered_sites_fa, str_motifs_cm, cmsearch_res_txt,
                        error_check=False,
                        call_dic=call_dic,
                        params=cmsh_params) # or add --anytrunc and remove --g
        # Read in hits.
        print("Read in cmsearch results ... ")
        cmsearch_hits_list, c_cms_hits = benchlib.read_in_cmsearch_results(cmsearch_res_txt,
                                                                           only_best_hits=args.greatest_hits,
                                                                           check=True)

        print("# of cmsearch motif hits:", c_cms_hits)


    """
    Store for each RBP the regions with motif hits (and hit counts), using
    dictionary of dictionaries regions_with_motifs_dic.
    This tells us, how many input regions have motif hits, separated by RBP.
    Also store for each RBP the unique motif hit regions (and hit counts), using
    dictionary of dictionaries unique_motifs_dic.

    regions_with_motifs_dic:
        Dictionary of dictionaries, format:
        {rbp_id1 -> {'region1': motif_c_region1, 'region2': motif_c_region2}, rbp_id2 -> {'region1': motif_c_region1}}
    unique_motifs_dic:
        Dictionary of dictionaries, format:
        {rbp_id1 -> {'motif_region1': c_motif_region1, 'motif_region2': c_motif_region2}, rbp_id2 -> .. }

    """

    regions_with_motifs_dic = {}
    unique_motifs_dic = {}

    # Store regions with sequence motifs.
    for fh in fimo_hits_list:

        rbp_id = id2name_dic[fh.motif_id]

        if rbp_id in regions_with_motifs_dic:
            # fh.seq_name : FASTA header (== --in genomic sequence region).
            if fh.seq_name in regions_with_motifs_dic[rbp_id]:
                regions_with_motifs_dic[rbp_id][fh.seq_name] += 1
            else:
                regions_with_motifs_dic[rbp_id][fh.seq_name] = 1
        else:
            regions_with_motifs_dic[rbp_id] = {}
            regions_with_motifs_dic[rbp_id][fh.seq_name] = 1

        fh_str = repr(fh) # genomic motif region string.

        # Unique motif regions for each RBP.
        if rbp_id in unique_motifs_dic:
            if fh_str in unique_motifs_dic[rbp_id]:
                unique_motifs_dic[rbp_id][fh_str] += 1
            else:
                unique_motifs_dic[rbp_id][fh_str] = 1
        else:
            unique_motifs_dic[rbp_id] = {}
            unique_motifs_dic[rbp_id][fh_str] = 1

    # Store regions with structure motifs.
    for cmsh in cmsearch_hits_list:

        rbp_id = id2name_dic[cmsh.motif_id]

        if rbp_id in regions_with_motifs_dic:
            # cmsh.seq_name : FASTA header (== --in genomic sequence region).
            if cmsh.seq_name in regions_with_motifs_dic[rbp_id]:
                regions_with_motifs_dic[rbp_id][cmsh.seq_name] += 1
            else:
                regions_with_motifs_dic[rbp_id][cmsh.seq_name] = 1
        else:
            regions_with_motifs_dic[rbp_id] = {}
            regions_with_motifs_dic[rbp_id][cmsh.seq_name] = 1

        cmsh_str = repr(cmsh) # genomic motif region string.

        # Unique motif regions for each RBP.
        if rbp_id in unique_motifs_dic:
            if cmsh_str in unique_motifs_dic[rbp_id]:
                unique_motifs_dic[rbp_id][cmsh_str] += 1
            else:
                unique_motifs_dic[rbp_id][cmsh_str] = 1
        else:
            unique_motifs_dic[rbp_id] = {}
            unique_motifs_dic[rbp_id][cmsh_str] = 1

    """
    Store infos for each RBP in RBP class.

    search_rbps_dic[rbp_id] = rbp_class
    RBP class arguments:
            name: str,
            seq_motif_ids = None,
            str_motif_ids = None,
            c_hit_reg = 0, # # regions with motif hits.
            perc_hit_reg = 0.0, # % hit regions over all regions (i.e. how many input regions contain >= 1 RBP motif).
            c_motif_hits = 0, # # motif hits.
            c_uniq_motif_hits = 0, # # unique motif hits.
            c_uniq_motif_nts = 0, # # unique motif nucleotides.
            perc_uniq_motif_nts_eff_reg = 0.0, # % unique motif nts over effective region length.
            perc_uniq_motif_nts_cal_reg = 0.0, # % unique motif nts over called region length.
            uniq_motif_hits_eff_1000nt = 0.0, # unique motif hits per effective 1000 nt.
            uniq_motif_hits_cal_1000nt = 0.0, # unique motif hits per called 1000 nt.
            ks_pval = 1.0, # Kolmogorov-Smirnov (KS) statistic p-value (are higher scoring sites enriched with motifs).
            ks_stat = 0.0,
            organism: Optional[str] = None

    Number of sequences for FIMO / cmsearch: 
    c_regions

    """
    # Set number of no-hit regions.
    for rbp_id in search_rbps_dic:
        search_rbps_dic[rbp_id].c_no_hit_reg = c_regions

    for rbp_id in regions_with_motifs_dic:
        # Number of --in regions with RBP motif hits.
        c_hit_reg = len(regions_with_motifs_dic[rbp_id])

        # Number of motif hits on --in regions in total.
        c_motif_hits = 0
        for reg_id in regions_with_motifs_dic[rbp_id]:
            c_motif_hits += regions_with_motifs_dic[rbp_id][reg_id]
        search_rbps_dic[rbp_id].c_hit_reg = c_hit_reg
        search_rbps_dic[rbp_id].c_no_hit_reg = c_regions - c_hit_reg
        search_rbps_dic[rbp_id].c_motif_hits = c_motif_hits

        # % hit regions over all regions (i.e. how many input regions contain >= 1 RBP motif).
        search_rbps_dic[rbp_id].perc_hit_reg = (search_rbps_dic[rbp_id].c_hit_reg / c_regions) * 100

    """
    Get unique motif hits.

    unique_motifs_dic:
        Dictionary of dictionaries, format:
        {rbp_id1 -> {'motif_region1': c_motif_region1, 'motif_region2': c_motif_region2}, rbp_id2 -> .. }
    """

    for rbp_id in unique_motifs_dic:
        c_uniq_motif_hits = len(unique_motifs_dic[rbp_id])
        search_rbps_dic[rbp_id].c_uniq_motif_hits = c_uniq_motif_hits
        # Store individual motif unique hits.
        for motif_str_repr in unique_motifs_dic[rbp_id]:
            motif_id = benchlib.get_motif_id_from_str_repr(motif_str_repr)
            idx = motif_id2idx_dic[motif_id]
            if id2type_dic[motif_id] == "meme_xml":
                search_rbps_dic[rbp_id].seq_motif_hits[idx] += 1
            elif id2type_dic[motif_id] == "regex":
                search_rbps_dic[rbp_id].seq_motif_hits[idx] += 1
            elif id2type_dic[motif_id] == "cm":
                search_rbps_dic[rbp_id].str_motif_hits[idx] += 1
            else:
                assert False, "unknown motif type (\"%s\") set for motif_id %s" %(id2type_dic[motif_id], motif_id)

    """
    Number of motif nucleotides over called + effective region size.

    """

    print("Calculate effective motif region sizes for each RBP ... ")
    for rbp_id in unique_motifs_dic:
        # Output unique motif hit regions (sequence or structure) to BED for RBP rbp_id.
        benchlib.output_motif_hits_to_bed(rbp_id, unique_motifs_dic, out_tmp_bed,
                                          one_based_start=True)
        # Calculate effective motif region size.
        eff_motif_reg_size = benchlib.get_uniq_gen_size(out_tmp_bed)

        # Number of unique motif nucleotides.
        search_rbps_dic[rbp_id].c_uniq_motif_nts = eff_motif_reg_size
        # % unique motif nts over effective region length.
        search_rbps_dic[rbp_id].perc_uniq_motif_nts_eff_reg = (eff_motif_reg_size / eff_reg_size) * 100
        # % unique motif nts over called region length.
        search_rbps_dic[rbp_id].perc_uniq_motif_nts_cal_reg = (eff_motif_reg_size / called_reg_size) * 100
        # Number of unique motif hits per effective 1000 nt.
        search_rbps_dic[rbp_id].uniq_motif_hits_eff_1000nt  = search_rbps_dic[rbp_id].c_uniq_motif_hits / (eff_reg_size / 1000)
        # Number of unique motif hits per called 1000 nt.
        search_rbps_dic[rbp_id].uniq_motif_hits_cal_1000nt  = search_rbps_dic[rbp_id].c_uniq_motif_hits / (called_reg_size / 1000)

    """
    # Print RBP object stats.
    for rbp_id in search_rbps_dic:
        print(search_rbps_dic[rbp_id].__dict__)

    """

    # print(search_rbps_dic["AGGF1"].__dict__)
    print("# --in regions for motif search:", c_regions)
    # print("Called genomic region size:     ", called_reg_size)
    # print("Effective genomic region size:  ", eff_reg_size)


    """
    Output RBP hit stats (ie one row per RBP).

    Output clowns:
    rbp_id
    c_regions
    called_reg_size
    effective_reg_size
    c_reg_with_hits
    perc_reg_with_hits
    c_motif_hits
    c_uniq_motif_hits
    c_uniq_motif_nts
    perc_uniq_motif_nts_cal_reg
    perc_uniq_motif_nts_eff_reg
    uniq_motif_hits_cal_1000nt
    uniq_motif_hits_eff_1000nt
    wc_pval
    seq_motif_ids
    seq_motif_hits
    str_motif_ids
    str_motif_hits

    """

    rbp_list = []

    OUTSTATS = open(rbp_stats_out, "w")
    rbp_stats_header = "data_id\tmethod_id\trun_id\tmotif_db\trbp_id\tc_regions\tmean_reg_len\tmedian_reg_len\tmin_reg_len\tmax_reg_len\t"
    rbp_stats_header += "called_reg_size\teffective_reg_size\tc_reg_with_hits\tperc_reg_with_hits\t"
    rbp_stats_header += "c_motif_hits\tc_uniq_motif_hits\tc_uniq_motif_nts\tperc_uniq_motif_nts_cal_reg\tperc_uniq_motif_nts_eff_reg\tuniq_motif_hits_cal_1000nt\t"
    rbp_stats_header += "uniq_motif_hits_eff_1000nt\twc_pval\twc_rbc_eff_size\twc_cl_eff_size\tseq_motif_ids\tseq_motif_hits\tstr_motif_ids\tstr_motif_hits\tinternal_id\n"
    OUTSTATS.write(rbp_stats_header)

    for rbp_id in search_rbps_dic:

        # print(search_rbps_dic[rbp_id].__dict__)
        rbp_list.append(rbp_id)

        motif_db_out = loaded_rbps_dic[rbp_id]

        c_reg_with_hits = search_rbps_dic[rbp_id].c_hit_reg
        perc_reg_with_hits = search_rbps_dic[rbp_id].perc_hit_reg
        c_motif_hits = search_rbps_dic[rbp_id].c_motif_hits
        c_uniq_motif_hits = search_rbps_dic[rbp_id].c_uniq_motif_hits
        c_uniq_motif_nts = search_rbps_dic[rbp_id].c_uniq_motif_nts
        perc_uniq_motif_nts_cal_reg = search_rbps_dic[rbp_id].perc_uniq_motif_nts_cal_reg
        perc_uniq_motif_nts_eff_reg = search_rbps_dic[rbp_id].perc_uniq_motif_nts_eff_reg
        uniq_motif_hits_cal_1000nt = search_rbps_dic[rbp_id].uniq_motif_hits_cal_1000nt
        uniq_motif_hits_eff_1000nt = search_rbps_dic[rbp_id].uniq_motif_hits_eff_1000nt
        wc_pval = "-"
        wc_rbc_es = "-"
        wc_cl_es = "-"
        internal_id = search_rbps_dic[rbp_id].internal_id

        seq_motif_hits = ",".join(str(hc) for hc in search_rbps_dic[rbp_id].seq_motif_hits)
        str_motif_hits = ",".join(str(hc) for hc in search_rbps_dic[rbp_id].str_motif_hits)
        seq_motif_ids = ",".join(search_rbps_dic[rbp_id].seq_motif_ids)
        str_motif_ids = ",".join(search_rbps_dic[rbp_id].str_motif_ids)
        if not seq_motif_hits:
            seq_motif_hits = "-"
        if not str_motif_hits:
            str_motif_hits = "-"
        if not seq_motif_ids:
            seq_motif_ids = "-"
        if not str_motif_ids:
            str_motif_ids = "-"

        row_str = args.data_id + "\t"
        row_str += args.method_id + "\t"
        row_str += run_id + "\t"
        row_str += motif_db_out + "\t"
        row_str += rbp_id + "\t"

        row_str += str(c_regions) + "\t"

        row_str += str(reg_len_mean) + "\t"
        row_str += str(reg_len_median) + "\t"
        row_str += str(reg_len_min) + "\t"
        row_str += str(reg_len_max) + "\t"

        row_str += str(called_reg_size) + "\t"
        row_str += str(eff_reg_size) + "\t"

        row_str += str(c_reg_with_hits) + "\t"
        row_str += str(perc_reg_with_hits) + "\t"

        row_str += str(c_motif_hits) + "\t"
        row_str += str(c_uniq_motif_hits) + "\t"
        row_str += str(c_uniq_motif_nts) + "\t"
        row_str += str(perc_uniq_motif_nts_cal_reg) + "\t"
        row_str += str(perc_uniq_motif_nts_eff_reg) + "\t"
        row_str += str(uniq_motif_hits_cal_1000nt) + "\t"
        row_str += str(uniq_motif_hits_eff_1000nt) + "\t"
        row_str += wc_pval + "\t"
        row_str += wc_rbc_es + "\t"
        row_str += wc_cl_es + "\t"
        row_str += seq_motif_ids + "\t"
        row_str += seq_motif_hits + "\t"
        row_str += str_motif_ids + "\t"
        row_str += str_motif_hits + "\t"
        row_str += internal_id + "\n"

        OUTSTATS.write(row_str)

    OUTSTATS.close()


    """
    Output motif region stats (1 row for each motif hit).
    Report ALL motif hits,
    plus report how many times one genomic motif hit occurs (uniq_count). 

    """

    OUTSTATS = open(motif_stats_out,"w")

    motif_stats_header = "data_id\tmethod_id\trun_id\tmotif_db\tregion_id\trbp_id\tmotif_id\tchr_id\tgen_s\tgen_e\tstrand\tregion_s\tregion_e\tregion_len\t"
    motif_stats_header += "uniq_count\tfimo_score\tfimo_pval\tcms_score\tcms_eval\tmatched_seq\tinternal_id\n"
    OUTSTATS.write(motif_stats_header)

    # Unique motif regions BED.
    motif_reg_dic = {}
    match_c_dic = {}

    for rbp_id in search_rbps_dic:
        match_c_dic[rbp_id] = {}
        for motif_id in search_rbps_dic[rbp_id].seq_motif_ids:
            match_c_dic[rbp_id][motif_id] = {}
        for motif_id in search_rbps_dic[rbp_id].str_motif_ids:
            match_c_dic[rbp_id][motif_id] = {}

    for fh in fimo_hits_list:

        rbp_id = id2name_dic[fh.motif_id]
        region_id = fh.seq_name
        region_len = benchlib.get_length_from_seq_name(fh.seq_name)
        # genomic motif region string.
        fh_str = repr(fh)
        uniq_count = unique_motifs_dic[rbp_id][fh_str]

        motif_db_out = loaded_motif_ids_dic[fh.motif_id]

        internal_id = search_rbps_dic[rbp_id].internal_id

        # Get matched sequence.
        matched_seq = benchlib.get_matched_seq(fh.seq_name, out_seqs_dic, fh.seq_s, fh.seq_e)
        if not matched_seq:
            matched_seq = "-"

        # Store motif hit as BED.
        hit_id = "%s:%s-%s(%s)%s" %(fh.chr_id, str(fh.start), str(fh.end), fh.strand, fh.motif_id)
        if hit_id not in motif_reg_dic:
            if fh.hit_type == "str_pat":
                bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t-1.0\t-1.0\t%s\t%s\t-\t%s" %(fh.chr_id, fh.start-1, fh.end, rbp_id, fh.motif_id, uniq_count, args.method_id, args.data_id, fh.strand, str(fh.gc_frac), str(fh.gu_frac), matched_seq)
            else:
                bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t%s\t%s\t-1.0\t-1.0\t-\t%s" %(fh.chr_id, fh.start-1, fh.end, rbp_id, fh.motif_id, uniq_count, args.method_id, args.data_id, fh.strand, str(fh.score), str(fh.pval), matched_seq)
            motif_reg_dic[hit_id] = bed_row
            # bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t%s\t%s\t-1.0\t-1.0\t-\t%s" %(fh.chr_id, fh.start-1, fh.end, rbp_id, fh.motif_id, uniq_count, args.method_id, args.data_id, fh.strand, str(fh.score), str(fh.pval), matched_seq)
            # motif_reg_dic[hit_id] = bed_row
            if rbp_id not in match_c_dic:
                match_c_dic[rbp_id] = {}
            if fh.motif_id not in match_c_dic[rbp_id]:
                match_c_dic[rbp_id][fh.motif_id] = {}
            if matched_seq not in match_c_dic[rbp_id][fh.motif_id]:
                match_c_dic[rbp_id][fh.motif_id][matched_seq] = 0
            match_c_dic[rbp_id][fh.motif_id][matched_seq] += 1

        row_str = args.data_id + "\t"
        row_str += args.method_id + "\t"
        row_str += run_id + "\t"
        row_str += motif_db_out + "\t"
        row_str += region_id + "\t"
        row_str += rbp_id + "\t"
        row_str += fh.motif_id + "\t"
        row_str += fh.chr_id + "\t"
        row_str += str(fh.start) + "\t"  # 1-based.
        row_str += str(fh.end) + "\t"
        row_str += fh.strand + "\t"
        row_str += str(fh.seq_s) + "\t"  # 1-based.
        row_str += str(fh.seq_e) + "\t"
        row_str += str(region_len) + "\t"
        row_str += str(uniq_count) + "\t"
        row_str += str(fh.score) + "\t"
        row_str += str(fh.pval) + "\t"
        row_str += "-\t"
        row_str += "-\t"
        row_str += matched_seq + "\t"
        row_str += internal_id + "\n"

        OUTSTATS.write(row_str)

    for cmsh in cmsearch_hits_list:

        rbp_id = id2name_dic[cmsh.motif_id]
        region_id = cmsh.seq_name
        region_len = benchlib.get_length_from_seq_name(cmsh.seq_name)
        # genomic motif region string.
        cmsh_str = repr(cmsh) 
        uniq_count = unique_motifs_dic[rbp_id][cmsh_str]

        motif_db_out = loaded_motif_ids_dic[cmsh.motif_id]

        internal_id = search_rbps_dic[rbp_id].internal_id

        # Get matched sequence.
        matched_seq = benchlib.get_matched_seq(cmsh.seq_name, out_seqs_dic, cmsh.seq_s, cmsh.seq_e)
        if not matched_seq:
            matched_seq = "-"

        # Store motif hit as BED.
        hit_id = "%s:%s-%s(%s)%s" %(cmsh.chr_id, str(cmsh.start), str(cmsh.end), cmsh.strand, cmsh.motif_id)
        if hit_id not in motif_reg_dic:
            bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t-1.0\t-1.0\t%s\t%s\t-\t%s" %(cmsh.chr_id, cmsh.start-1, cmsh.end, rbp_id, cmsh.motif_id, uniq_count, args.method_id, args.data_id, cmsh.strand, str(cmsh.score), str(cmsh.e_value), matched_seq)
            motif_reg_dic[hit_id] = bed_row
            if rbp_id not in match_c_dic:
                match_c_dic[rbp_id] = {}
            if cmsh.motif_id not in match_c_dic[rbp_id]:
                match_c_dic[rbp_id][cmsh.motif_id] = {}
            if matched_seq not in match_c_dic[rbp_id][cmsh.motif_id]:
                match_c_dic[rbp_id][cmsh.motif_id][matched_seq] = 0
            match_c_dic[rbp_id][cmsh.motif_id][matched_seq] += 1

        row_str = args.data_id + "\t"
        row_str += args.method_id + "\t"
        row_str += run_id + "\t"
        row_str += motif_db_out + "\t"
        row_str += region_id + "\t"
        row_str += rbp_id + "\t"
        row_str += cmsh.motif_id + "\t"
        row_str += cmsh.chr_id + "\t"
        row_str += str(cmsh.start) + "\t"
        row_str += str(cmsh.end) + "\t"
        row_str += cmsh.strand + "\t"
        row_str += str(cmsh.seq_s) + "\t"
        row_str += str(cmsh.seq_e) + "\t"
        row_str += str(region_len) + "\t"
        row_str += str(uniq_count) + "\t"
        row_str += "-\t"
        row_str += "-\t"
        row_str += str(cmsh.score) + "\t"
        row_str += str(cmsh.e_value) + "\t"
        row_str += matched_seq + "\t"
        row_str += internal_id + "\n"
        #print("region_id:", region_id)
        #print("evalue:", cmsh.e_value)
        #print(row_str)

        OUTSTATS.write(row_str)

    OUTSTATS.close()

    """
    Output motif hits as BED.

    The motif hits written to motif_hits_bed_out are unique motif hits already. 
    If same hit occurs > 1, this is recorded in BED column 4 with format:
    rbp_id:motif_id;uniq_count;method_id:data_id
    These also include regex hits, as they are part of fimo hits.
    
    """

    OUTBED = open(motif_hits_bed_out, "w")
    for hit_id in motif_reg_dic:
        OUTBED.write("%s\n" %(motif_reg_dic[hit_id]))
    OUTBED.close()


    """
    Output matched sequence counts in table. 

    Formats:
    match_c_total_dic[rbp_id][motif_id] = count
    match_c_dic[rbp_id][motif_id][matched_seq] = count

    """

    OUTTSV = open(matched_seqs_out, "w")
    OUTTSV.write("rbp_id\tmotif_id\tmatched_seq\tmatch_count\tmatch_perc\n")

    match_c_total_dic = benchlib.get_match_c_total_dic(match_c_dic)

    for rbp_id in match_c_dic:
        for motif_id in match_c_dic[rbp_id]:
            total_c = match_c_total_dic[rbp_id][motif_id]
            for matched_seq in match_c_dic[rbp_id][motif_id]:
                match_c = match_c_dic[rbp_id][motif_id][matched_seq]
                match_perc = 0.0
                if match_c > 0:
                    match_perc = (float(match_c)/float(total_c)) * 100.0
                row_str = "%s\t%s\t%s\t%i\t%.2f\n" %(rbp_id, motif_id, matched_seq, match_c, match_perc)
                OUTTSV.write(row_str)
    OUTTSV.close()


    """
    Motif annotations if --plot-motifs and --gtf set.

    """

    rbp2motif2annot2c_dic = {}  # rbp_id -> motif_id -> annot -> count
    rbp2motif2annot2normc_dic = {}  # rbp_id -> motif_id -> annot -> norm_count
    args.eff_in_reg_size = 0

    if args.in_gtf:

        # Extract exon + intron regions of selected transcripts from transcript infos.
        print("Output intron annotations to BED ... ")
        intron_exon_out_bed = args.out_folder + "/intron_exon_regions.tmp.bed"
        benchlib.output_transcript_info_intron_exon_to_bed(tid2tio_dic, intron_exon_out_bed,
                                            output_mode=3,  # only introns.
                                            report_counts=True,
                                            add_tr_id=True,
                                            empty_check=False)

        # Custom transcript biotypes for upset plot.
        custom_annot_dic = None
        if args.tr_types_list:
            for tr_type in args.tr_types_list:
                assert tr_type in tr_types_dic, "given transcript biotype ID \"%s\" not found in GTF file"
                custom_annot_dic[tr_type] = tr_type

        # Append detailed exon annotations (CDS, UTR, transcript biotypes) to intron annotations.
        print("Output exon annotations to BED ... ")
        benchlib.output_exon_annotations(tid2tio_dic, intron_exon_out_bed,
                                         custom_annot_dic=custom_annot_dic,
                                         append=True)

        motif_hits_bed_tmp_out = args.out_folder + "/motif_hits.tmp.bed"
        benchlib.reformat_to_bed10(motif_hits_bed_out, motif_hits_bed_tmp_out)

        # Overlap motif hit BED with genomic annotation regions.
        print("Overlap annotations with motif hit regions ... ")

        # -wao also captures motif hit regions without annotation overlap.
        # use compatible function get_motif_hit_region_annotations().
        params = "-s -wao -f %s" %(str(args.gtf_feat_min_overlap))
        overlap_motif_hit_annotations_bed = args.out_folder + "/overlap_motif_hit_annotations.tmp.bed"
        benchlib.bed_intersect_files(motif_hits_bed_tmp_out, intron_exon_out_bed, 
                                     overlap_motif_hit_annotations_bed,
                                     params=params)

        motif_hit2annot_dic = benchlib.get_motif_hit_region_annotations(overlap_motif_hit_annotations_bed,
                                                                        tid2tio_dic=tid2tio_dic)
        # motif_hit2annot_dic = benchlib.get_region_annotations(
        #                                     overlap_motif_hit_annotations_bed,
        #                                     motif_hits=True,
        #                                     reg_ids_dic=motif_reg_dic)

        # # Overwrite motif_hits_bed_out, adding annotation column.
        # OUTBED = open(motif_hits_bed_out,"w")
        # with open(overlap_motif_hit_annotations_bed) as f:
        #     for line in f:
        #         cols = line.strip().split("\t")
        #         annot_str = cols[13]
        #         annot = "intergenic"
        #         if annot_str != ".":
        #             annot = annot_str.split(";")[0]
                
        #         OUTBED.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" %(cols[0], cols[1], cols[2], cols[3], cols[4], cols[5], cols[6], cols[7], cols[8], cols[9], annot))

        # f.closed
        # OUTBED.close()

        # Output motif hits BED again, with motif region annotations.
        OUTBED = open(motif_hits_bed_out, "w")
        for hit_id in motif_reg_dic:
            annot = "-"
            if hit_id in motif_hit2annot_dic:
                annot = motif_hit2annot_dic[hit_id][0]
            cols = motif_reg_dic[hit_id].split("\t")
            match_seq = cols[11]
            cols_part1 = "\t".join(cols[0:10]) 
            OUTBED.write("%s\t%s\t%s\n" %(cols_part1, annot, match_seq))
        OUTBED.close()

        for motif_hit in motif_hit2annot_dic:
            # motif_hit format: "chr1:10-15(+)motif_id". Get motif_id.
            motif_id = motif_hit.split(")")[1]
            annot = motif_hit2annot_dic[motif_hit][0]
            # tr_id = motif_hit2annot_dic[motif_hit][1]
            rbp_id = id2name_dic[motif_id]
            if rbp_id not in rbp2motif2annot2c_dic:
                rbp2motif2annot2c_dic[rbp_id] = {}
            if motif_id not in rbp2motif2annot2c_dic[rbp_id]:
                rbp2motif2annot2c_dic[rbp_id][motif_id] = {}
            if annot not in rbp2motif2annot2c_dic[rbp_id][motif_id]:
                rbp2motif2annot2c_dic[rbp_id][motif_id][annot] = 1
            else:
                rbp2motif2annot2c_dic[rbp_id][motif_id][annot] += 1

        """
        Get normalized rbp2motif2annot2c_dic (rbp2motif2annot2normc_dic)
        I.e., the count gets normalized by the total annot region length in the 
        filtered_sites_bed. This way we can better see whether motifs tend to occur
        more often in certain annotated regions.
        Note that overlap is done with filtered_sites_bed vs intron_exon_out_bed,
        counting all feature overlaps, whereas in rbp2motif2annot2c_dic the best 
        overlapping annotation is determined inside get_motif_hit_region_annotations.
        So a motif hit can only overlap with one annotation for the rbp2motif2annot2c_dic
        counts, whereas the filtered_sites_bed can overlap with different annotations
        (all the annotations in intron_exon_out_bed).

        eff_in_reg_size:
            Effective input regions size (i.e., overlapping regions merged).

        """

        print("Get normalized annotation counts ... ")
        rbp2motif2annot2normc_dic, eff_in_reg_size = benchlib.get_normalized_annot_counts(
                                        filtered_sites_bed, intron_exon_out_bed,
                                        rbp2motif2annot2c_dic, reg2pol_dic, 
                                        args.out_folder)

        args.eff_in_reg_size = eff_in_reg_size

    """
    Get annotation to color dictionary, which is needed for region annotation plots in HTML reports.

    """

    annot2color_dic = {}

    if rbp2motif2annot2c_dic:

        annot_dic = {"3'UTR" : 0, "5'UTR" : 0, "CDS" : 0, "lncRNA" : 0, "intron" : 0, "intergenic" : 0}

        if rbp2motif2annot2c_dic:
            for rbp_id in rbp2motif2annot2c_dic:
                for motif_id in rbp2motif2annot2c_dic[rbp_id]:
                    for annot in rbp2motif2annot2c_dic[rbp_id][motif_id]:
                        if annot not in annot_dic:
                            annot_dic[annot] = 1
                        else:
                            annot_dic[annot] += 1

        # hex_colors = get_hex_colors_list(min_len=len(annot_with_hits_dic))
        hex_colors = benchlib.get_hex_colors_list(min_len=len(annot_dic))

        idx = 0
        for annot in sorted(annot_dic, reverse=False):
            # hc = hex_colors[idx]
            # print("Assigning hex color %s to annotation %s ... " %(hc, annot))
            annot2color_dic[annot] = hex_colors[idx]
            idx += 1


    """
    Motif plots and motif hit statistics HTML.

    """
    html_motif_plots_out = args.out_folder + "/" + "motif_plots.rbpbench_searchlong.html"
    if args.plot_abs_paths:
        html_motif_plots_out = os.path.abspath(args.out_folder) + "/" + "motif_plots.rbpbench_search.html"
    # If HTML file already exists, remove it.
    if os.path.exists(html_motif_plots_out):
        os.remove(html_motif_plots_out)

    # if args.plot_motifs:

    print("Generate motif plots HTML ... ")
    
    plots_subfolder = "html_motif_plots"
    benchlib_path = os.path.dirname(benchlib.__file__)

    # args.run_goa = False
    args.bed_sc_thr = None
    args.c_regions = c_regions
    args.ext_up = None
    args.ext_down = None
    # Only valid for searchlongrna.
    args.run_goa_tr = False

    benchlib.search_generate_html_motif_plots(args,
                                        search_rbps_dic, seq_motif_blocks_dic, 
                                        str_motif_blocks_dic,
                                        benchlib_path, loaded_motif_ids_dic,
                                        rbp2motif2annot2c_dic=rbp2motif2annot2c_dic,
                                        rbp2motif2annot2normc_dic=rbp2motif2annot2normc_dic,
                                        annot2color_dic=annot2color_dic,
                                        html_report_out=html_motif_plots_out,
                                        rbpbench_mode="searchlong --plot-motifs",
                                        reg_seq_str="regions",
                                        id2pids_dic=id2pids_dic,
                                        id2exp_dic=id2exp_dic,
                                        match_c_dic=match_c_dic,
                                        match_c_total_dic=match_c_total_dic,
                                        plots_subfolder=plots_subfolder)

    """
    Output parameter settings.

    """
    # Output mode settings.
    print("Output parameter settings ... ")
    SETOUT = open(settings_file, "w")
    for arg in vars(args):
        SETOUT.write("%s\t%s\n" %(arg, str(getattr(args, arg))))
    for call in call_dic:
        SETOUT.write("%s\t%s\n" %(call, call_dic[call]))
    SETOUT.close()

    """
    Take out the trash.

    """
    print("Delete .tmp files ... ")
    if os.path.exists(out_tmp_bed):
        os.remove(out_tmp_bed)
    if os.path.exists(cmstat_tmp_out):
        os.remove(cmstat_tmp_out)


    """
    Inform about outputs.

    """
    print("")
    print("OUTPUT FILES")
    print("============")
    print("")
    print("Run parameter settings:\n%s" %(settings_file))
    # print("Co-occurrence p-values for each RBP pair .tsv:\n%s" %(con_res_out_tsv))
    print("Filtered input regions .bed:\n%s" %(filtered_sites_bed))
    print("Filtered input regions .fa:\n%s" %(filtered_sites_fa))
    print("Motif hits .bed:\n%s" %(motif_hits_bed_out))
    print("Matched sequence stats .tsv:\n%s" %(matched_seqs_out))
    # print("RBP region occupancies .tsv:\n%s" %(rbp_reg_occ_table_out))
    print("RBP hit stats .tsv:\n%s" %(rbp_stats_out))
    print("Motif hit stats .tsv:\n%s" %(motif_stats_out))
    # if reg_annot_table_file is not None:
    #     print("Region annotations .tsv:\n%s" %(reg_annot_table_file))
    # if args.plot_motifs:
    print("Motif plots and hit statistics .html:\n%s" %(html_motif_plots_out))
    # if args.report:
    #     print("Search report .html:\n%s" %(html_report_out))
    print("")


################################################################################

def main_searchrna(args):
    """
    Motif search in spliced transcript sites and check motif co-occurrences.

    """

    print("Running for you in SEARCHRNA mode ... ")

    assert os.path.exists(args.in_sites), "--in file \"%s\" not found" % (args.in_sites)
    assert os.path.exists(args.in_gtf), "--gtf file \"%s\" not found" % (args.in_gtf)
    assert os.path.exists(args.in_genome), "--genome file \"%s\" not found" % (args.in_genome)
    assert benchlib.boundary_check(args.min_motif_dist, 0, 1000), "set --min-motif-dist expected to be >= 0 and <= 1000"

    if args.run_goa:
        if args.goa_obo_mode == 3:
            assert args.goa_obo_file, "set --goa-obo-mode 3 requires --goa-obo-file"
            assert os.path.exists(args.goa_obo_file), "--goa-obo-file file \"%s\" not found" % (args.goa_obo_file)
        if args.goa_obo_file:
            assert args.goa_obo_mode == 3, "--goa-obo-file requires --goa-obo-mode 3"
        if args.goa_max_child is not None:
            assert args.goa_max_child >= 0, "set --goa-max-child expected to be >= 0"
        if args.goa_min_depth is not None:
            assert args.goa_min_depth >= 0, "set --goa-min-depth expected to be >= 0"

    # Is MEME >= v5 installed?
    if not args.meme_disable_check:
        assert benchlib.is_tool("meme"), "meme not in PATH"
        check, meme_version = benchlib.check_tool_version("meme -version", "5.0")
        assert check, "RBPBench requires meme version >= 5.0 (installed version: %s)" %(meme_version)

    # Check if MEME version is >= 5.5.4 (need to add fimo --no-pgc option to produce same results!).
    fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text"
    if not args.meme_disable_check:
        check, meme_version = benchlib.check_tool_version("meme -version", "5.5.4")
        args.meme_version = meme_version
        if check:
            fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text --no-pgc"
    if args.meme_no_pgc:
        fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text --no-pgc"

    # Regex check.
    regex_type = "sequence"
    if args.regex:
        if args.regex_type == 1:
            print("Check given --regex type ... ")
            if benchlib.looks_like_structure(args.regex):
                print("Given --regex looks like structure pattern ... ")
                regex_type = "structure"
            else:
                print("Given --regex assumed to be standard regex ... ")
                regex_type = "sequence"
        elif args.regex_type == 2:
            regex_type = "sequence"
        elif args.regex_type == 3:
            regex_type = "structure"
        else:
            assert False, "unexpected --regex-type value set (%d)" %(args.regex_type)
        # If standard regex, check validity.
        if regex_type == "sequence":
            assert benchlib.is_valid_regex(args.regex), "given --regex \"%s\" is not a valid regular expression. Please provide valid expression" % (args.regex)

    """
    Library path.
    """
    benchlib_path = os.path.dirname(benchlib.__file__)
    db_path = benchlib_path + "/content"

    """
    FIMO nt frequencies.

    """
    fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt.txt"
    if args.fimo_ntf_mode == 2:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt_with_introns.txt"
    elif args.fimo_ntf_mode == 3:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.uniform.txt"

    if args.fimo_user_ntf_file:
        fimo_freqs_file = args.fimo_user_ntf_file
    assert os.path.exists(fimo_freqs_file), "set FIMO nucleotide frequencies file \"%s\" not found" % (fimo_freqs_file)

    """
    Motif database.
    """
    seq_motifs_db_file, str_motifs_db_file, rbp2ids_file, motif_db_str = specify_motif_db(args.motif_db, 
                                                                            db_path=db_path)
    # Custom motif database: folder given.
    if args.custom_db:
        assert not args.custom_db_meme_xml, "--custom-db folder set incompatible with --custom-db-meme-xml. Provide custom motif database either via folder (--custom-db) or as single files"
        assert not args.custom_db_cm, "--custom-db folder set incompatible with --custom-db-cm. Provide custom motif database either via folder (--custom-db) or as single files"
        assert not args.custom_db_info, "--custom-db folder set incompatible with --custom-db-info. Provide custom motif database either via folder (--custom-db) or as single files"
        seq_motifs_db_file, str_motifs_db_file, rbp2ids_file = specify_custom_motif_db(args.custom_db)
        args.custom_db_id = benchlib.remove_special_chars_from_str(args.custom_db_id)
        assert args.custom_db_id, "empty string after removing special chars from --custom-db-id. Please provide alphanumeric string for custom motif database ID (- or _ are okay as well)"
        motif_db_str = args.custom_db_id
    # Custom motif database: single files given.
    if args.custom_db_meme_xml or args.custom_db_cm or args.custom_db_info:
        args.custom_db_id = benchlib.remove_special_chars_from_str(args.custom_db_id)
        assert args.custom_db_id, "empty string after removing special chars from --custom-db-id. Please provide alphanumeric string for custom motif database ID (- or _ are okay as well)"
        motif_db_str = args.custom_db_id
        assert not args.custom_db, "single custom motif database files provided not compatible with --custom-db. Provide custom motif database either via folder (--custom-db) or as single files"
        assert args.custom_db_info, "--custom-db-info needed to define custom motif database"
        rbp2ids_file = args.custom_db_info
        assert args.custom_db_meme_xml or args.custom_db_cm, "--custom-db-meme-xml and/or --custom-db-cm needed to define custom motif database"
        if args.custom_db_meme_xml:
            seq_motifs_db_file = args.custom_db_meme_xml
        else:
            seq_motifs_db_file = ""  # setting to empty string results in os.path.exists -> False.
        if args.custom_db_cm:
            str_motifs_db_file = args.custom_db_cm
        else:
            str_motifs_db_file = ""

    args.motif_db_str = motif_db_str

    """
    Get ID mappings

    name2ids_dic:
    RBP name -> motif IDs mapping
    id2type_dic:
    motif ID -> motif type (cm, meme_xml)
    id2org_dic (ignore for now):
    motif ID -> organism ID (so far only "human")

    """
    name2ids_dic, id2type_dic, name2gid_dic, name2fids_dic, id2pids_dic, id2exp_dic = benchlib.get_rbp_id_mappings(rbp2ids_file)
    
    # Motif ID -> RBP name (== RBP ID) mapping.
    id2name_dic = {}
    for rbp_id in name2ids_dic:
        for motif_id in name2ids_dic[rbp_id]:
            id2name_dic[motif_id] = rbp_id

    """
    Get MEME XML database motif blocks dictionary.

    """
    seq_motif_blocks_dic = {}
    if os.path.exists(seq_motifs_db_file):
        seq_motif_blocks_dic = benchlib.read_in_xml_motifs(seq_motifs_db_file,
                                                           empty_check=True)
    for motif_id in seq_motif_blocks_dic:
        assert motif_id in id2name_dic, "MEME XML motif ID \"%s\" not found in prior mapping. Please contact developers!" %(motif_id)

    """
    Get covariance model database motif blocks dictionary.

    """
    str_motif_blocks_dic = {}
    if str_motifs_db_file and os.path.exists(str_motifs_db_file):
        str_motif_blocks_dic = benchlib.read_in_cm_blocks(str_motifs_db_file, 
                                                          empty_check=True)
        for motif_id in str_motif_blocks_dic:
            assert motif_id in id2name_dic, "Covariance model accession ID \"%s\" not found in prior mapping. Please contact developers!" %(motif_id)

    # Remove special chars from run ID.
    args.data_id = benchlib.remove_special_chars_from_str(args.data_id)
    assert args.data_id, "empty string after removing special chars from --data-id. Please provide alphanumeric string for data ID (- or _ are okay as well)"
    args.method_id = benchlib.remove_special_chars_from_str(args.method_id)
    assert args.method_id, "empty string after removing special chars from --method-id. Please provide alphanumeric string for method ID (- or _ are okay as well)"
    # Run ID definition.
    run_id = "run_id"
    if args.run_id:
        run_id = benchlib.remove_special_chars_from_str(args.run_id)
        assert run_id, "empty string after removing special chars from --run-id. Please provide alphanumeric string for run ID (- or _ are okay as well)"
    # else:
    #     random_id = uuid.uuid4()
    #     run_id = str(random_id)

    # hash_len = max(len(run_id), len(args.data_id), len(args.method_id))
    # print("###################" + "#"*hash_len)
    print("Run ID:     ", run_id)
    print("Data ID:    ", args.data_id)
    print("Method ID:  ", args.method_id)
    # print("###################" + "#"*hash_len)

    """
    Output folders + files.

    """
    if not os.path.exists(args.out_folder):
        os.makedirs(args.out_folder)

    tr_seqs_fa = args.out_folder + "/transcript_sequences.fa"
    filtered_sites_bed = args.out_folder + "/in_sites.filtered.bed"
    filtered_sites_fa = args.out_folder + "/in_sites.filtered.fa"
    seq_motifs_xml = args.out_folder + "/seq_motifs.xml"
    str_motifs_cm = args.out_folder + "/str_motifs.cm"
    fimo_res_tsv = args.out_folder + "/fimo_results.tsv"
    cmsearch_res_txt = args.out_folder + "/cmsearch_results.txt"

    rbp_stats_out = args.out_folder + "/rbp_hit_stats.tsv"
    motif_stats_out = args.out_folder + "/motif_hit_stats.tsv"
    # con_res_out_tsv = args.out_folder + "/contingency_table_results.tsv"
    settings_file = args.out_folder + "/settings.rbpbench_searchrna.out"
    rbp_reg_occ_table_out = args.out_folder + "/rbp_region_occupancies.tsv"
    # Output RBP co-ooccurrence stats as table.
    cooc_stats_out = args.out_folder + "/rbp_cooc_stats.tsv"

    # GOA results.
    goa_results_tsv = args.out_folder + "/goa_results.tsv"

    # Output unique motif hits.
    motif_hits_bed_out = args.out_folder + "/motif_hits.rbpbench_searchrna.bed"
    # Output matched sequences stats.
    matched_seqs_out = args.out_folder + "/matched_seq_stats.tsv"

    # Temp files.
    # random_id = uuid.uuid1()
    # tmp_out_bed = args.out_folder + "/" + str(random_id) + ".filtered_in.bed"
    out_tmp_bed = args.out_folder + "/rbp_motif_hit_regions.tmp.bed"
    cmstat_tmp_out = args.out_folder + "/cmstat_out.tmp.txt"

    # Delete if existing folder.
    if os.path.exists(fimo_res_tsv):
        os.remove(fimo_res_tsv)
    if os.path.exists(cmsearch_res_txt):
        os.remove(cmsearch_res_txt)


    """
    Load RBP data based on --rbps (+ optionally USER data).

    """

    rbp_in_dic = {}
    for rbp_id in args.list_rbps:
        rbp_in_dic[rbp_id] = 1

    # RBPs for motif search.
    loaded_rbps_dic = {}

    # USER set?
    user_motifs = False
    user_rbp_id = False
    if "USER" in rbp_in_dic:
        user_motifs = True
    else:
        assert not args.user_meme_xml, "--user-meme-xml provided but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"
        assert not args.user_cm, "--user-cm provided but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"
        assert not args.user_rbp_id, "--user-rbp-id set but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"

    special_rbp_ids_list = ["USER", "REGEX"]

    # If ALL set, load all RBPs (+ optinally USER).
    if "ALL" in rbp_in_dic:
        if len(rbp_in_dic) == 2:
            assert user_motifs, "set --rbps ALL, --rbps ALL USER, or individual RBPs (+ optinally USER) --rbps RBP1 RBP2 ... USER"
        if len(rbp_in_dic) > 2:
            assert False, "set --rbps ALL, --rbps ALL USER, or individual RBPs (+ optinally USER) --rbps RBP1 RBP2 ... USER"
        print("--rbps ALL selected. Loading all database motifs ... ")
        for rbp_id in name2ids_dic:
            loaded_rbps_dic[rbp_id] = motif_db_str

    else:
        # Load individual RBPs.
        for rbp_id in rbp_in_dic:
            if rbp_id not in special_rbp_ids_list:
                """
                Check if RBP ID in database.
                Suggest similar RBPs based on string similarity (edit distance).

                """
                if rbp_id not in name2ids_dic:
                    db_rbp_list = []
                    for db_rbp_id in name2ids_dic:
                        db_rbp_list.append(db_rbp_id)
                    pair_dist_dic = benchlib.calc_edit_dist_query_list(rbp_id, db_rbp_list)
                    max_c = 10
                    c = 0
                    suggested_rbps = []
                    for key, value in sorted(pair_dist_dic.items(), key=lambda item: item[1], reverse=False):
                        if c >= max_c:
                            break
                        c += 1
                        suggested_rbps.append(key)
                    suggested_rbps_str = ",".join(suggested_rbps)
                    assert False, "provided --rbps ID %s not in internal motif database (%s). Please provide RBP name present in database. Did you mean (any of) the following database ID(s) (top 10 hits based on string similarity): %s ?" %(rbp_id, motif_db_str, suggested_rbps_str)
                # assert rbp_id in name2ids_dic, "provided --rbps ID %s not in internal motif database. Please provide RBP name present in database" %(rbp_id)
                loaded_rbps_dic[rbp_id] = motif_db_str

    # Motif IDs for search.
    loaded_motif_ids_dic = {}
    for rbp_id in loaded_rbps_dic:
        for motif_id in name2ids_dic[rbp_id]:
            loaded_motif_ids_dic[motif_id] = motif_db_str

    """
    Check and load provided USER data.

    """

    if user_motifs:
        print("--rbps USER selected. Check + load provided USER motifs ... ")
        assert args.user_rbp_id, "--rbps USER demands --user-rbp-id to be set to connect the supplied motif(s) with an RBP ID"
        assert args.user_meme_xml or args.user_cm, "--rbps USER requires a provided sequence or structure motif file (via --user-meme-xml AND/OR --user-cm)"

        # Reformat user_rbp_id. 
        user_rbp_id = benchlib.remove_special_chars_from_str(args.user_rbp_id)
        assert user_rbp_id, "empty string after removing special chars from --user-rbp-id. Please provide alphanumeric string for RBP ID (- or _ are okay as well)"

        assert user_rbp_id not in loaded_rbps_dic, "user RBP ID %s already selected from database. Please deselect respective database RBP ID or provide unique user RBP ID via --user-rbp-id" %(user_rbp_id)
        loaded_rbps_dic[user_rbp_id] = "user"
        # In case user_rbp_id in database, reset motif IDs associated to user_rbp_id.
        name2ids_dic[user_rbp_id] = []
        print("RBP ID for user-supplied motifs:", user_rbp_id)

        user_seq_motif_blocks_dic = {}
        if args.user_meme_xml:
            assert os.path.exists(args.user_meme_xml), "--user-meme-xml file \"%s\" not found" % (args.user_meme_xml)
            user_seq_motif_blocks_dic = benchlib.read_in_xml_motifs(args.user_meme_xml, empty_check=False)
            assert user_seq_motif_blocks_dic, "no motifs read in from provided --user-meme-xml. Make sure to supply sequence motifs in MEME XML format!"
            # Check if motif ID already loaded.
            for acc_id in user_seq_motif_blocks_dic:
                assert acc_id not in loaded_motif_ids_dic, "user-supplied MEME XML motif ID %s already selected, i.e., ID is already associated with selected database RBP %s. Please change user motif ID to a unique motif ID or deselect the respective RBP" %(acc_id, id2name_dic[acc_id])
                loaded_motif_ids_dic[acc_id] = "user"
                # Add user to database blocks (overwrite if same ID encountered).
                seq_motif_blocks_dic[acc_id] = user_seq_motif_blocks_dic[acc_id]
                name2ids_dic[user_rbp_id].append(acc_id)
                id2type_dic[acc_id] = "meme_xml"
                id2name_dic[acc_id] = user_rbp_id

        user_str_motif_blocks_dic = {}
        if args.user_cm:
            assert os.path.exists(args.user_cm), "--user-cm file \"%s\" not found" % (args.user_cm)
            # Check for valid format.
            acc_ids_dic = benchlib.check_cm_file(args.user_cm, cmstat_tmp_out, empty_check=False)
            # Read in covariance model blocks.
            user_str_motif_blocks_dic = benchlib.read_in_cm_blocks(args.user_cm)
            for acc_id in acc_ids_dic:
                assert acc_id in user_str_motif_blocks_dic, "accession ID %s not in blocks dictionary. Please contact developers!" %(acc_id)
            # Check if motif ID already loaded.
            for acc_id in user_str_motif_blocks_dic:
                assert acc_id not in loaded_motif_ids_dic, "user-supplied covariance model accession (ACC) ID %s already selected, i.e., ID is already associated with selected database RBP %s. Please change to a unique accession ID or deselect the respective RBP" %(acc_id, id2name_dic[acc_id])
                loaded_motif_ids_dic[acc_id] = "user"
                # Add user to database blocks (overwrite if same ID encountered).
                str_motif_blocks_dic[acc_id] = user_str_motif_blocks_dic[acc_id]
                name2ids_dic[user_rbp_id].append(acc_id)
                id2type_dic[acc_id] = "cm"
                id2name_dic[acc_id] = user_rbp_id


    """
    Get sequence motif lengths.

    """

    id2len_dic = benchlib.get_seq_motif_lengths(seq_motif_blocks_dic)

    """
    Optionally filter DREME/MEME sequence motifs by length.

    """

    if args.motif_min_len or args.motif_max_len:

        if args.motif_min_len and args.motif_max_len:
            assert args.motif_min_len <= args.motif_max_len, "set --motif-min-len needs to be <= --motif-max-len!"

        print("Filtering sequence motifs by set min/max lengths ... ")

        seq_motif_blocks_dic, loaded_rbps_dic, loaded_motif_ids_dic, name2ids_dic, c_flt_out = benchlib.filter_dic_by_motif_lengths(
                seq_motif_blocks_dic, str_motif_blocks_dic, loaded_rbps_dic, loaded_motif_ids_dic, id2name_dic,
                id2len_dic, motif_min_len=args.motif_min_len, motif_max_len=args.motif_max_len
            )

        assert loaded_rbps_dic, "no MEME/DREME sequence motifs left after length filtering. Please adjust length filter range (--motif-min-len, --motif-max-len), RBP selection, or disable length filtering!"

        print("Filtered out %d sequence motifs outside set length range" %(c_flt_out))


    """
    Check if loaded RBP IDs have motifs.

    """
    for rbp_id in loaded_rbps_dic:
        for motif_id in name2ids_dic[rbp_id]:
            found = 0
            if motif_id in seq_motif_blocks_dic:
                found += 1
            if motif_id in str_motif_blocks_dic:
                found += 1
            assert found, "no motifs loaded for RBP ID \"%s\". Please provide the respective motifs file" %(rbp_id)


    """
    If --regex is set:
    Treat regex as sequence motif / fimo type.
    rbp_id: regex, motif_id: regex, motif_db: regex

    """

    regex_id = args.regex_id
    regex = args.regex
    
    if args.regex:

        if regex_type == "sequence":

            # Remove , ; from given regex, to avoid motif_id format conflicts.
            regex = benchlib.remove_special_chars_from_str(args.regex,
                                                        reg_ex="[ ;]",
                                                        to_upper=False)  # [ :;\(\)]
            
            assert regex, "empty string after removing special chars ( ;) from --regex. Please provide a valid regex with DNA letters"

            # Convert IUPAC codes (if present) in regex to standard regex format.
            regex = benchlib.convert_iupac_in_regex(regex)

        elif regex_type == "structure":

            # Check structure pattern and format.
            regex = benchlib.check_format_str_pattern(args.regex)

        args.regex = regex

        regex_id = benchlib.remove_special_chars_from_str(args.regex_id)

        assert regex_id, "empty string after removing special chars from --regex-id. Please provide alphanumeric string for regex ID (- or _ are okay as well)"
        assert regex_id not in name2ids_dic, "--regex set but a different RBP ID with name \"%s\" was found. Please provide a different RBP ID or --regex-id" %(regex_id)

        args.regex_id = regex_id

        if args.motif_regex_id:
            assert regex_id not in id2type_dic, "--regex set but a different motif ID with name \"%s\" was found. Please provide a different motif ID or --regex-id" %(regex_id)

            id2name_dic[regex_id] = regex_id    # motif_id -> rbp_id
            id2type_dic[regex_id] = "regex"  # motif_id -> motif type string; motif type string can be regex, cm, meme_xml
            loaded_motif_ids_dic[regex_id] = regex_id  # motif_id -> motif_db_str
            name2ids_dic[regex_id] = [regex_id]  # rbp_id -> motif_ids

        else:
            assert regex not in id2type_dic, "--regex set but a different motif ID with name \"%s\" was found. Please provide a different motif ID or use --motif-regex-id" %(regex_id)

            id2name_dic[regex] = regex_id    # motif_id -> rbp_id
            id2type_dic[regex] = "regex"  # motif_id -> motif type string; motif type string can be regex, cm, meme_xml
            loaded_motif_ids_dic[regex] = regex_id  # motif_id -> motif_db_str
            name2ids_dic[regex_id] = [regex]  # rbp_id -> motif_ids

        loaded_rbps_dic[regex_id] = regex_id  # rbp_id -> motif_db_str

        """
        Dictionaries that use motif_id as keys.
        loaded_motif_ids_dic
        name2ids_dic
        id2name_dic
        id2type_dic
        """
    else:
        regex_id = False

    """
    If --set-rbp-id set, check if it is part of loaded RBP IDs.

    """

    if args.set_rbp_id is not None:
        assert args.set_rbp_id in loaded_rbps_dic, "given --set-rbp-id \"%s\" is not part of loaded RBP IDs. Please provide --set-rbp-id that is part of loaded RBPs (user-defined or database RBP ID)" %(args.set_rbp_id)



    """
    Get function ID -> function descriptions mapping.

    """
    fid2desc_file = db_path + "/rbp_function_list.32728246.tsv"
    fid2desc_dic, desc2fid_dic = benchlib.get_fid2desc_mapping(fid2desc_file)

    """
    If --motifs sets, filter loaded_rbps_dic + loaded_motif_ids_dic.
    
    """

    if args.motifs_list:

        print("Filtering loaded motifs by provided --motifs ... ")

        motif_fids_dic = {}

        for motif_id in args.motifs_list:
            motif_fids_dic[motif_id] = 1

        filtered_rbps_dic = {}
        filtered_motif_ids_dic = {}
        filtered_name2ids_dic = {}
        for motif_id in motif_fids_dic:
            assert motif_id in id2name_dic, "motif ID \"%s\" provided via --motifs not found in internal motif ID -> RBP ID mapping. Please provide valid motif IDs" %(motif_id)
            assert motif_id in loaded_motif_ids_dic, "motif ID \"%s\" provided via --motifs not found in loaded motifs (via --rbps). Please provide motif IDs that are part of loaded RBPs" %(motif_id)
            set_db_str = loaded_motif_ids_dic[motif_id]
            filtered_motif_ids_dic[motif_id] = set_db_str
            rbp_id = id2name_dic[motif_id]
            assert rbp_id in loaded_rbps_dic, "motif ID \"%s\" provided via --motifs not found in loaded RBPs (via --rbps). Please provide motif IDs that are part of loaded RBPs" %(motif_id)
            set_db_str = loaded_rbps_dic[rbp_id]
            filtered_rbps_dic[rbp_id] = set_db_str
            if rbp_id in filtered_name2ids_dic:
                filtered_name2ids_dic[rbp_id].append(motif_id)
            else:
                filtered_name2ids_dic[rbp_id] = [motif_id]

        if args.regex:
            filtered_rbps_dic[regex_id] = regex_id
            if args.motif_regex_id:
                filtered_motif_ids_dic[regex_id] = regex_id
                filtered_name2ids_dic[regex_id] = [regex_id]
            else:
                filtered_motif_ids_dic[regex] = regex_id
                filtered_name2ids_dic[regex_id] = [regex]

        c_loaded_rbps_pre = len(loaded_rbps_dic)
        c_loaded_rbps_post = len(filtered_rbps_dic)
        loaded_rbps_dic = filtered_rbps_dic

        print("# RBPs pre-filtering by --motifs: ", c_loaded_rbps_pre)
        print("# RBPs post-filtering by --motifs:", c_loaded_rbps_post)

        assert loaded_rbps_dic, "no remaining RBPs after filtering by provided motif IDs (via --motifs). Please provide compatible RBPs + motif IDs"

        c_loaded_motif_ids_pre = len(loaded_motif_ids_dic)
        c_loaded_motif_ids_post = len(filtered_motif_ids_dic)
        loaded_motif_ids_dic = filtered_motif_ids_dic

        print("# motif IDs pre-filtering by --motifs: ", c_loaded_motif_ids_pre)
        print("# motif IDs post-filtering by --motifs:", c_loaded_motif_ids_post)

        assert loaded_motif_ids_dic, "no remaining motifs after filtering by provided motif IDs (via --motifs). Please provide compatible RBPs + motif IDs"

        name2ids_dic = filtered_name2ids_dic


    """
    If --functions set, filter loaded_rbps_dic + loaded_motif_ids_dic.

    """
    rbp_fids_dic = {}
    if args.rbp_functions:

        print("Filtering loaded RBPs by provided function IDs ... ")

        for fid in args.rbp_functions:
            rbp_fids_dic[fid] = 1

        # Check if provided function IDs are valid.
        for fid in rbp_fids_dic:
            assert fid in fid2desc_dic, "function ID \"%s\" provided via --functions not found in internal function ID -> description mapping. Please provide valid function IDs (see rbpbench info for a detailed description)" %(fid)

        # Filter loaded_rbps_dic.
        filtered_rbps_dic = {}
        for rbp_id in loaded_rbps_dic:
            if rbp_id == regex_id:  # keep regex if set.
                filtered_rbps_dic[regex_id] = regex_id
                continue
            if rbp_id not in name2fids_dic:
                continue
            for fid in rbp_fids_dic:
                if fid in name2fids_dic[rbp_id]:
                    set_db_str = loaded_rbps_dic[rbp_id]
                    filtered_rbps_dic[rbp_id] = set_db_str
                    break

        c_loaded_rbps_pre = len(loaded_rbps_dic)
        c_loaded_rbps_post = len(filtered_rbps_dic)
        loaded_rbps_dic = filtered_rbps_dic

        print("# RBPs pre-filtering by functions: ", c_loaded_rbps_pre)
        print("# RBPs post-filtering by functions:", c_loaded_rbps_post)

        assert loaded_rbps_dic, "no remaining RBPs after filtering by provided function IDs. Please provide compatible RBPs + function IDs (see rbpbench info for annotated RBP functions)"

        # Filter loaded motif IDs.
        filtered_motif_ids_dic = {}
        for rbp_id in loaded_rbps_dic:
            if rbp_id == regex_id:
                if args.motif_regex_id:
                    filtered_motif_ids_dic[regex_id] = regex_id
                else:
                    filtered_motif_ids_dic[regex] = regex_id
            else:
                for motif_id in name2ids_dic[rbp_id]:
                    set_db_str = loaded_motif_ids_dic[motif_id]
                    filtered_motif_ids_dic[motif_id] = set_db_str
        
        c_loaded_motif_ids_pre = len(loaded_motif_ids_dic)
        c_loaded_motif_ids_post = len(filtered_motif_ids_dic)
        loaded_motif_ids_dic = filtered_motif_ids_dic

        print("# motif IDs pre-filtering by functions: ", c_loaded_motif_ids_pre)
        print("# motif IDs post-filtering by functions:", c_loaded_motif_ids_post)




    """
    Load RBP data, store in RBP() class.

    """

    # Store motif IDs for search.
    search_rbps_dic = {}
    seq_rbps_dic = {}
    str_rbps_dic = {}
    motif_id2idx_dic = {} # motif ID -> list index.
    args.internal_id = []

    for rbp_id in loaded_rbps_dic:
    
        internal_id = base64.urlsafe_b64encode(os.urandom(6)).decode()
        args.internal_id.append(internal_id)
        rbp = benchlib.RBP(rbp_id, internal_id)

        for motif_id in name2ids_dic[rbp_id]:
        
            assert motif_id in loaded_motif_ids_dic, "motif_id %s not in loaded_motif_ids_dic" %(motif_id)

            if id2type_dic[motif_id] == "meme_xml":
                rbp.seq_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.seq_motif_ids) - 1
                rbp.seq_motif_hits.append(0)
                seq_rbps_dic[rbp_id] = 1
            elif id2type_dic[motif_id] == "regex":
                rbp.seq_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.seq_motif_ids) - 1
                rbp.seq_motif_hits.append(0)
                seq_rbps_dic[rbp_id] = 1
            elif id2type_dic[motif_id] == "cm":
                rbp.str_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.str_motif_ids) - 1
                rbp.str_motif_hits.append(0)
                str_rbps_dic[rbp_id] = 1
            else:
                assert False, "unknown motif type (\"%s\") set for motif_id %s" %(id2type_dic[motif_id], motif_id)

        search_rbps_dic[rbp_id] = rbp


    print("# of RBP IDs for search:    ", len(loaded_rbps_dic))
    print("# of motif IDs for search:  ", len(loaded_motif_ids_dic))

    """
    Get chromosome IDs from --genome.
    """
    print("Get --genome FASTA headers ... ")
    chr_ids_dic = benchlib.get_fasta_headers(args.in_genome)

    """
    Guess chromosome ID style.

    chr_style:
        1: chr1, chr2, ..., chrX, chrM
        2: 1, 2, ... , X, MT

    """
    print("Guess chromosome ID style (based on --genome FASTA headers) ... ")
    chr_style = benchlib.guess_chr_id_style(chr_ids_dic)
    
    """
    Get transcript IDs from --in transcript sites (column 1).
    
    """

    tr_ids_dic = benchlib.bed_read_chr_ids_dic(args.in_sites)

    assert tr_ids_dic, "--in transcript sites BED seems to be empty (no column 1 transcript IDs read in). Make sure to provide valid BED file"


    """
    Read in gene infos from --gtf.

    """

    print("Read in gene features from --gtf ... ")
    tr2gid_dic = {}
    tr_types_dic = {}  # Store transcript biotypes in GTF file.
    gid2gio_dic = benchlib.gtf_read_in_gene_infos(args.in_gtf,
                                                    tr2gid_dic=tr2gid_dic,
                                                    tr_types_dic=tr_types_dic,
                                                    check_chr_ids_dic=chr_ids_dic,
                                                    chr_style=chr_style,
                                                    empty_check=False)
    assert gid2gio_dic, "no gene infos read in from --gtf. Please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE)"
    c_gene_infos = len(gid2gio_dic)
    print("# gene features read in from --gtf:", c_gene_infos)

    # Check exon order (return True if minus strand exon 1 is most downstream, not most upstream, which is the correct way).
    print("Check minus-strand exon order in --gtf ... ")
    correct_min_ex_order = benchlib.gtf_check_exon_order(args.in_gtf)
    if correct_min_ex_order:
        print("Correct order encountered ... ")
    else:
        print("Reverse order encountered ... ")

    # Get transcript infos.
    print("Read in transcript infos from --gtf ... ")
    tid2tio_dic = benchlib.gtf_read_in_transcript_infos(args.in_gtf, 
                                                        tr_ids_dic=tr_ids_dic,
                                                        correct_min_ex_order=correct_min_ex_order,
                                                        chr_style=chr_style,
                                                        empty_check=False)

    assert tid2tio_dic, "no transcript infos read in from --gtf. Please provide a GTF file compatible with --in transcript sites BED file (i.e. transcript IDs from --in need to be in --gtf as well)"

    # (in)sanity checks.
    for tr_id in tr_ids_dic:
        assert tr_id in tid2tio_dic, "transcript ID %s from --in BED file not found in --gtf GTF file. Please provide a compatible GTF file"
    for tr_id in tid2tio_dic:
        assert tr_id in tr_ids_dic, "transcript ID %s not in tr_ids_dic. Please contact developers"

    c_tr_infos = len(tid2tio_dic)
    print("# transcript features read in from --gtf:", c_tr_infos)

    # Get transcript sequences.
    print("Extract transcript sequences ... ")
    tr_seqs_dic = benchlib.get_transcript_sequences_from_gtf(tid2tio_dic, args.in_genome,
                                                             tr_ids_dic=tr_ids_dic,
                                                             tmp_out_folder=args.out_folder)

    # Output sequences to FASTA.
    print("Output transcript sequences to FASTA ... ")
    benchlib.fasta_output_dic(tr_seqs_dic, tr_seqs_fa,
                              split=True)

    # Transcript sequence lengths.
    tr_seq_len_dic = {}
    for tr_id in tr_seqs_dic:
        tr_seq_len_dic[tr_id] = len(tr_seqs_dic[tr_id])

    """
    GO enrichment analysis.

    """

    goa_results_df = False
    goa_stats_dic = {}
    propagate_counts = True
    target_reg_annot_file = None

    if args.run_goa:

        target_genes_dic = {}  # Store gene IDs covered by regions -> region count.
        background_genes_dic = {}  # Store all gene IDs in GTF file to use as background genes.

        # Get background gene IDs.
        for gene_id in gid2gio_dic:
            background_genes_dic[gene_id] = gid2gio_dic[gene_id].gene_name

        gid2tid_dic = {}

        # Get target gene IDs.
        for tid in tr_ids_dic:
            if tid in tr2gid_dic:
                gene_id = tr2gid_dic[tid]
                gid2tid_dic[gene_id] = tid
                target_genes_dic[gene_id] = 1


        # If only target genes with RBP motifs hits for all RBPs should be considered.
        if args.goa_cooc_mode == 2 or args.goa_cooc_mode == 3:

            gid2tid_dic = {}
            reg2annot_dic = {}
            for reg_id in region_rbp_binds_dic:
                # Region ID has format: ENST00000270722:686-752(+). Extract first part of ID.
                tr_id = reg_id.split(":")[0]
                reg2annot_dic[reg_id] = [False, tr_id]
            
            new_target_genes_dic = benchlib.get_target_genes_with_rbp_hits(reg2annot_dic, tr2gid_dic, region_rbp_binds_dic,
                                                                           gid2tid_dic=gid2tid_dic,
                                                                           goa_cooc_mode=args.goa_cooc_mode)

            if args.goa_cooc_mode == 2:
                print("Keep only target genes containing regions with motif hits for any RBP ... ")
            elif args.goa_cooc_mode == 3:
                print("Keep only target genes containing regions with motif hits for all RBPs ... ")
            
            print("# of target genes before filtering: %i" %(len(target_genes_dic)))
            print("# of target genes after filtering:  %i" %(len(new_target_genes_dic)))

            target_genes_dic = new_target_genes_dic


        if args.goa_bg_gene_list:

            print("Read in background genes from --goa-gb-gene-list ... ")

            assert os.path.exists(args.goa_bg_gene_list), "given --goa-bg-gene-list file \"%s\" not found" % (args.goa_bg_gene_list)

            bg_gene_ids_dic = benchlib.read_ids_into_dic(args.goa_bg_gene_list,
                                                    check_dic=False)

            print("# of gene IDs read in: %i" %(len(bg_gene_ids_dic)))
            print("Filter background genes by --gtf genes ... ")

            new_background_genes_dic = {}
            for gene_id in bg_gene_ids_dic:
                if gene_id in gid2gio_dic:
                    new_background_genes_dic[gene_id] = gid2gio_dic[gene_id].gene_name
            
            assert new_background_genes_dic, "given --goa-gb-gene-list gene IDs not found in --gtf. Please provide compatible --gtf and --goa-bg-gene-list files"

            print("# of background genes before filtering: %i" %(len(background_genes_dic)))
            print("# of background genes after filtering:  %i" %(len(new_background_genes_dic)))

            background_genes_dic = new_background_genes_dic

            print("Filter target genes by new background gene list ... ")
            new_target_genes_dic = {}
            for gene_id in target_genes_dic:
                if gene_id in background_genes_dic:
                    new_target_genes_dic[gene_id] = target_genes_dic[gene_id]
            
            print("# of target genes before filtering: %i" %(len(target_genes_dic)))
            print("# of target genes after filtering:  %i" %(len(new_target_genes_dic)))

            target_genes_dic = new_target_genes_dic

            assert target_genes_dic, "no target genes left after filtering by --goa-bg-gene-list. Please provide compatible --in regions + gene list"


        print("")
        print("GOA enabled (--goa) ... ")

        goa_stats_dic["c_target_genes_pre_filter"] = len(target_genes_dic)
        goa_stats_dic["c_background_genes_pre_filter"] = len(background_genes_dic)
        goa_stats_dic["pval_thr"] = args.goa_pval
        goa_stats_dic["goa_obo_mode"] = args.goa_obo_mode
        goa_stats_dic["propagate_counts"] = propagate_counts
        goa_stats_dic["excluded_terms"] = "-"
        goa_stats_dic["goa_filter_purified"] = args.goa_filter_purified
        goa_stats_dic["goa_max_child"] = args.goa_max_child
        goa_stats_dic["goa_min_depth"] = args.goa_min_depth
        goa_stats_dic["goa_cooc_mode"] = args.goa_cooc_mode

        gene_infos_file = benchlib_path + "/content/ensembl_gene_infos.biomart.GRCh38.112.tsv.gz"
        if os.path.exists(gene_infos_file):
            print("Output target region annotations ... ")
            target_reg_annot_file = args.out_folder + "/target_region_annotations.tsv"
            
            benchlib.output_target_reg_annot(target_genes_dic, gene_infos_file, target_reg_annot_file,
                                             gid2tid_dic=gid2tid_dic,
                                             tid2tio_dic=tid2tio_dic)

        local_gid2go_file = benchlib_path + "/content/ensembl_gene_id2go_ids.biomart.GRCh38.112.tsv.gz"
        local_obo_file = benchlib_path + "/content/go-basic.obo.gz"

        assert os.path.exists(local_gid2go_file), "local gene ID to GO ID file \"%s\" not found" %(local_gid2go_file)
        assert os.path.exists(local_obo_file), "local GO OBO file \"%s\" not found" %(local_obo_file)

        gid2go_file = local_gid2go_file
        if args.goa_gene2go_file:
            gid2go_file = args.goa_gene2go_file
            assert os.path.exists(gid2go_file), "provided --goa-gene2go-file \"%s\" not found" %(gid2go_file)
        goa_obo_file = local_obo_file
        if args.goa_obo_file and args.goa_obo_mode == 3:
            goa_obo_file = args.goa_obo_file
            assert os.path.exists(goa_obo_file), "provided --goa-obo-file \"%s\" not found" %(goa_obo_file)

        # Run GOA.
        goa_results_df = benchlib.run_go_analysis(target_genes_dic, background_genes_dic, 
                                                  gid2go_file, args.out_folder,
                                                  pval_thr=args.goa_pval,
                                                  excluded_terms = [],  # do not exclude any GO terms.
                                                  goa_obo_mode=args.goa_obo_mode,
                                                  propagate_counts=propagate_counts,
                                                  stats_dic=goa_stats_dic,
                                                  store_gene_names=True,
                                                  goa_obo_file=goa_obo_file)

        print("# of enriched (i.e., with significantly higher concentration) GO terms: %i" %(goa_stats_dic["c_sig_go_terms_e"]))
        print("# of purified (i.e., with significantly lower concentration) GO terms:  %i" %(goa_stats_dic["c_sig_go_terms_p"]))

        goa_results_df.to_csv(goa_results_tsv, sep="\t", index=False)
        print("")


    """
    Filter / extend --in transcript regions BED file.

    """

    # Process extension info.
    ext_parts = args.ext_up_down.split(",")
    c_ext_parts = len(ext_parts)
    ext_up = 0
    ext_down = 0
    if c_ext_parts == 1:
        ext_up = int(ext_parts[0])
        ext_down = int(ext_parts[0])
    elif c_ext_parts == 2:
        ext_up = int(ext_parts[0])
        ext_down = int(ext_parts[1]) 
    else:
        assert False, "invalid --ext argument provided (correct format: --ext 10 OR --ext 20,10)"

    args.ext_up = ext_up
    args.ext_down = ext_down

    # Filter / extend --in transcript sites BED.
    print("Preprocess --in sites ... ")
    bed_tr_ids_dic = {}  # Record transcript IDs (column 1).
    reg_stats_dic = benchlib.bed_filter_extend_bed(args.in_sites, filtered_sites_bed,
                                          ext_up=ext_up,
                                          ext_down=ext_down,
                                          remove_dupl=True,  # Yes also for transcript sites, remove same sites.
                                          reg2sc_dic=None,  # Don't need scores.
                                          score_col=args.bed_score_col,
                                          score_thr=args.bed_sc_thr,
                                          score_rev_filter=args.bed_sc_thr_rev_filter,
                                          chr_ids_dic=None,  # We don't want to filter by chromosome IDs in chr_ids_dic.
                                          bed_chr_ids_dic=bed_tr_ids_dic,
                                          use_region_ids=False,
                                          transcript_sites=True,
                                          chr_len_dic=tr_seq_len_dic,
                                          unstranded=False)

    print("# --in regions pre-filtering:  ", reg_stats_dic["c_in"])
    print("# --in regions post-filtering: ", reg_stats_dic["c_out"])
    print("# regions with invalid chr_id: ", reg_stats_dic["c_chr_filter"])
    print("# duplicated regions removed:  ", reg_stats_dic["c_dupl_filter"])
    print("# regions filtered by score:   ", reg_stats_dic["c_sc_thr"])

    assert reg_stats_dic["c_out"], "no --in BED sites after filtering. Make sure to provide a valid BED file with transcript sites"


    """
    Calculate effective size of genomic regions.

    """
    print("Calculate effective genomic region size ... ")
    eff_reg_size = benchlib.get_uniq_gen_size(filtered_sites_bed)

    print("Called region length sum:      ", reg_stats_dic["reg_len_sum"])
    print("Effective region length sum:   ", eff_reg_size)

    """
    Check if old index file still there.
    
    If still there, bedtools getfasta will use old one, will complain and 
    possibly not extracting sequences:
    Warning: the index file is older than the FASTA file.
    WARNING. chromosome (ENST00000270722) was not found in the FASTA file. Skipping.
    ...

    """
    fasta_index_file = tr_seqs_fa + ".fai"
    if os.path.exists(fasta_index_file):
        os.remove(fasta_index_file)

    """
    Get transcript region sequences from tr_seqs_fa.

    Output FASTA header format should be:
    >tr_id:10-20(+)

    """

    print("Extract sequences from transcripts ... ")
    benchlib.bed_extract_sequences_from_fasta(filtered_sites_bed, 
                                              tr_seqs_fa, filtered_sites_fa,
                                              print_warnings=True)


    """
    Get FASTA sequences and sequence lengths.

    out_seqs_dic should be in the format:
    tr_id:10-20(+) -> (extended) site sequence 

    """

    out_seqs_dic = benchlib.read_fasta_into_dic(filtered_sites_fa,
                                       dna=True,
                                       all_uc=True,
                                       id_check=True,
                                       empty_check=False,
                                       skip_n_seqs=False)

    assert out_seqs_dic, "no sequences extracted from FASTA file for --in BED sites. Make sure to use compatible FASTA/BED files!"

    # Effective number of regions used for motif search.
    c_regions = len(out_seqs_dic)
    args.c_regions = c_regions

    # Called region size.
    called_reg_size = 0
    len_list = []
    reg2pol_dic = {}
    for seq_id in out_seqs_dic:
        seq_len = len(out_seqs_dic[seq_id])
        called_reg_size += seq_len
        len_list.append(seq_len)
        reg2pol_dic[seq_id] = "+"

    # Length statistics.
    reg_len_median = statistics.median(len_list)
    reg_len_mean = statistics.mean(len_list)
    reg_len_mean = round(reg_len_mean, 2)
    reg_len_min = min(len_list)
    reg_len_max = max(len_list)


    """
    ====================================
    RUN SEQUENCE MOTIF SEARCH WITH FIMO.
    ====================================
    
    """
    fimo_hits_list = []
    call_dic = {}

    if seq_rbps_dic:

        """
        Print motifs to file.

        """

        print("Output motifs to XML ... ")
        out_str, c_added_motifs = benchlib.blocks_to_xml_string(seq_motif_blocks_dic, loaded_motif_ids_dic)

        benchlib.output_string_to_file(out_str, seq_motifs_xml)


        """
        Run FIMO on sequences + motifs.

        """

        print("Run FIMO ... ")
        benchlib.run_fast_fimo(filtered_sites_fa, seq_motifs_xml, fimo_res_tsv,
                    pval_thr=args.fimo_pval,
                    nt_freqs_file=fimo_freqs_file,
                    call_dic=call_dic,
                    params=fimo_params,
                    error_check=False)

        """
        Read in FIMO hits.

        """

        assert os.path.exists(fimo_res_tsv), "FIMO output file fimo.tsv %s does not exist! There must have been an error while running FIMO (invalid xml file provided?)" %(fimo_res_tsv)

        print("Read in FIMO results ... ")
        fimo_hits_list = benchlib.read_in_fimo_results(fimo_res_tsv,
                                                       only_best_hits=args.greatest_hits)

        c_fimo_hits = len(fimo_hits_list)
        print("# of FIMO motif hits:", c_fimo_hits)

        """
        If --regex is set, search for regex hits in sequences (stored in out_seqs_dic).

        """
        if args.regex:

            step_size_one = False
            if args.regex_search_mode == 1:
                step_size_one = True
            elif args.regex_search_mode == 2:
                step_size_one = False
            else:
                assert False, "invalid --regex-search-mode %i set" %(args.regex_search_mode)

            print("Run search for --regex \"%s\" ... " %(regex))
            regex_hits_list = benchlib.get_regex_hits(regex, regex_id, out_seqs_dic,
                                                      step_size_one=step_size_one,
                                                      regex_type=regex_type,
                                                      regex_spacer_min=args.regex_spacer_min,
                                                      regex_spacer_max=args.regex_spacer_max,
                                                      regex_min_gc=args.regex_min_gc,
                                                      regex_max_gu=args.regex_max_gu,
                                                      use_motif_regex_id=args.motif_regex_id)

            c_regex_hits = len(regex_hits_list)
            print("# of regex hits:", c_regex_hits)

            # Add regex hits to fimo_hits_list.
            fimo_hits_list += regex_hits_list


    """
    =========================================
    RUN STRUCTURE MOTIF SEARCH WITH CMSEARCH.
    =========================================

    """
    cmsearch_hits_list = []

    if str_rbps_dic:
        
        print("Output covariance models to .cm ... ")
        benchlib.output_cm_blocks_to_file(str_motif_blocks_dic, loaded_motif_ids_dic, str_motifs_cm)

        # Run cmsearch.
        print("Run cmsearch ... ")
        cmsh_mode = ""
        if args.cmsearch_mode == 1:
            cmsh_mode = "--default"
        elif args.cmsearch_mode == 2:
            cmsh_mode = "--max"
        else:
            assert False, "invalid --cmsearch-mode %i set" %(args.cmsearch_mode)
        cmsh_params = "-g --tformat fasta --toponly --incT %s -T %s %s" %(args.cmsearch_bs, args.cmsearch_bs, cmsh_mode)

        benchlib.run_cmsearch(filtered_sites_fa, str_motifs_cm, cmsearch_res_txt,
                        error_check=False,
                        call_dic=call_dic,
                        params=cmsh_params) # or add --anytrunc and remove --g
        # Read in hits.
        print("Read in cmsearch results ... ")
        cmsearch_hits_list, c_cms_hits = benchlib.read_in_cmsearch_results(cmsearch_res_txt,
                                                                           only_best_hits=args.greatest_hits,
                                                                           check=True)

        print("# of cmsearch motif hits:", c_cms_hits)


    """
    Store for each RBP the regions with motif hits (and hit counts), using
    dictionary of dictionaries regions_with_motifs_dic.
    This tells us, how many input regions have motif hits, separated by RBP.
    Also store for each RBP the unique motif hit regions (and hit counts), using
    dictionary of dictionaries unique_motifs_dic.

    regions_with_motifs_dic:
        Dictionary of dictionaries, format:
        {rbp_id1 -> {'region1': motif_c_region1, 'region2': motif_c_region2}, rbp_id2 -> {'region1': motif_c_region1}}
    unique_motifs_dic:
        Dictionary of dictionaries, format:
        {rbp_id1 -> {'motif_region1': c_motif_region1, 'motif_region2': c_motif_region2}, rbp_id2 -> .. }

    """

    regions_with_motifs_dic = {}
    unique_motifs_dic = {}

    # Store regions with sequence motifs.
    for fh in fimo_hits_list:

        rbp_id = id2name_dic[fh.motif_id]

        if rbp_id in regions_with_motifs_dic:
            # fh.seq_name : FASTA header (== --in genomic sequence region).
            if fh.seq_name in regions_with_motifs_dic[rbp_id]:
                regions_with_motifs_dic[rbp_id][fh.seq_name] += 1
            else:
                regions_with_motifs_dic[rbp_id][fh.seq_name] = 1
        else:
            regions_with_motifs_dic[rbp_id] = {}
            regions_with_motifs_dic[rbp_id][fh.seq_name] = 1

        fh_str = repr(fh) # genomic motif region string.

        # Unique motif regions for each RBP.
        if rbp_id in unique_motifs_dic:
            if fh_str in unique_motifs_dic[rbp_id]:
                unique_motifs_dic[rbp_id][fh_str] += 1
            else:
                unique_motifs_dic[rbp_id][fh_str] = 1
        else:
            unique_motifs_dic[rbp_id] = {}
            unique_motifs_dic[rbp_id][fh_str] = 1

    # Store regions with structure motifs.
    for cmsh in cmsearch_hits_list:

        rbp_id = id2name_dic[cmsh.motif_id]

        if rbp_id in regions_with_motifs_dic:
            # cmsh.seq_name : FASTA header (== --in genomic sequence region).
            if cmsh.seq_name in regions_with_motifs_dic[rbp_id]:
                regions_with_motifs_dic[rbp_id][cmsh.seq_name] += 1
            else:
                regions_with_motifs_dic[rbp_id][cmsh.seq_name] = 1
        else:
            regions_with_motifs_dic[rbp_id] = {}
            regions_with_motifs_dic[rbp_id][cmsh.seq_name] = 1

        cmsh_str = repr(cmsh) # genomic motif region string.

        # Unique motif regions for each RBP.
        if rbp_id in unique_motifs_dic:
            if cmsh_str in unique_motifs_dic[rbp_id]:
                unique_motifs_dic[rbp_id][cmsh_str] += 1
            else:
                unique_motifs_dic[rbp_id][cmsh_str] = 1
        else:
            unique_motifs_dic[rbp_id] = {}
            unique_motifs_dic[rbp_id][cmsh_str] = 1

    """
    Store infos for each RBP in RBP class.

    search_rbps_dic[rbp_id] = rbp_class
    RBP class arguments:
            name: str,
            seq_motif_ids = None,
            str_motif_ids = None,
            c_hit_reg = 0, # # regions with motif hits.
            perc_hit_reg = 0.0, # % hit regions over all regions (i.e. how many input regions contain >= 1 RBP motif).
            c_motif_hits = 0, # # motif hits.
            c_uniq_motif_hits = 0, # # unique motif hits.
            c_uniq_motif_nts = 0, # # unique motif nucleotides.
            perc_uniq_motif_nts_eff_reg = 0.0, # % unique motif nts over effective region length.
            perc_uniq_motif_nts_cal_reg = 0.0, # % unique motif nts over called region length.
            uniq_motif_hits_eff_1000nt = 0.0, # unique motif hits per effective 1000 nt.
            uniq_motif_hits_cal_1000nt = 0.0, # unique motif hits per called 1000 nt.
            ks_pval = 1.0, # Kolmogorov-Smirnov (KS) statistic p-value (are higher scoring sites enriched with motifs).
            ks_stat = 0.0,
            organism: Optional[str] = None

    Number of sequences for FIMO / cmsearch: 
    c_regions

    """
    # Set number of no-hit regions.
    for rbp_id in search_rbps_dic:
        search_rbps_dic[rbp_id].c_no_hit_reg = c_regions

    for rbp_id in regions_with_motifs_dic:
        # Number of --in regions with RBP motif hits.
        c_hit_reg = len(regions_with_motifs_dic[rbp_id])

        # Number of motif hits on --in regions in total.
        c_motif_hits = 0
        for reg_id in regions_with_motifs_dic[rbp_id]:
            c_motif_hits += regions_with_motifs_dic[rbp_id][reg_id]
        search_rbps_dic[rbp_id].c_hit_reg = c_hit_reg
        search_rbps_dic[rbp_id].c_no_hit_reg = c_regions - c_hit_reg
        search_rbps_dic[rbp_id].c_motif_hits = c_motif_hits

        # % hit regions over all regions (i.e. how many input regions contain >= 1 RBP motif).
        search_rbps_dic[rbp_id].perc_hit_reg = (search_rbps_dic[rbp_id].c_hit_reg / c_regions) * 100

    """
    Get unique motif hits.

    unique_motifs_dic:
        Dictionary of dictionaries, format:
        {rbp_id1 -> {'motif_region1': c_motif_region1, 'motif_region2': c_motif_region2}, rbp_id2 -> .. }
    """

    for rbp_id in unique_motifs_dic:
        c_uniq_motif_hits = len(unique_motifs_dic[rbp_id])
        search_rbps_dic[rbp_id].c_uniq_motif_hits = c_uniq_motif_hits
        # Store individual motif unique hits.
        for motif_str_repr in unique_motifs_dic[rbp_id]:
            motif_id = benchlib.get_motif_id_from_str_repr(motif_str_repr)
            idx = motif_id2idx_dic[motif_id]
            if id2type_dic[motif_id] == "meme_xml":
                search_rbps_dic[rbp_id].seq_motif_hits[idx] += 1
            elif id2type_dic[motif_id] == "regex":
                search_rbps_dic[rbp_id].seq_motif_hits[idx] += 1
            elif id2type_dic[motif_id] == "cm":
                search_rbps_dic[rbp_id].str_motif_hits[idx] += 1
            else:
                assert False, "unknown motif type (\"%s\") set for motif_id %s" %(id2type_dic[motif_id], motif_id)

    """
    Number of motif nucleotides over called + effective region size.

    """

    print("Calculate effective motif region sizes for each RBP ... ")
    for rbp_id in unique_motifs_dic:
        # Output unique motif hit regions (sequence or structure) to BED for RBP rbp_id.
        benchlib.output_motif_hits_to_bed(rbp_id, unique_motifs_dic, out_tmp_bed,
                                          one_based_start=True)
        # Calculate effective motif region size.
        eff_motif_reg_size = benchlib.get_uniq_gen_size(out_tmp_bed)

        # Number of unique motif nucleotides.
        search_rbps_dic[rbp_id].c_uniq_motif_nts = eff_motif_reg_size
        # % unique motif nts over effective region length.
        search_rbps_dic[rbp_id].perc_uniq_motif_nts_eff_reg = (eff_motif_reg_size / eff_reg_size) * 100
        # % unique motif nts over called region length.
        search_rbps_dic[rbp_id].perc_uniq_motif_nts_cal_reg = (eff_motif_reg_size / called_reg_size) * 100
        # Number of unique motif hits per effective 1000 nt.
        search_rbps_dic[rbp_id].uniq_motif_hits_eff_1000nt  = search_rbps_dic[rbp_id].c_uniq_motif_hits / (eff_reg_size / 1000)
        # Number of unique motif hits per called 1000 nt.
        search_rbps_dic[rbp_id].uniq_motif_hits_cal_1000nt  = search_rbps_dic[rbp_id].c_uniq_motif_hits / (called_reg_size / 1000)

    """
    # Print RBP object stats.
    for rbp_id in search_rbps_dic:
        print(search_rbps_dic[rbp_id].__dict__)

    """

    # print(search_rbps_dic["AGGF1"].__dict__)
    print("# --in regions for motif search:", c_regions)
    # print("Called genomic region size:     ", called_reg_size)
    # print("Effective genomic region size:  ", eff_reg_size)


    """
    Output RBP hit stats (ie one row per RBP).

    Output clowns:
    rbp_id
    c_regions
    called_reg_size
    effective_reg_size
    c_reg_with_hits
    perc_reg_with_hits
    c_motif_hits
    c_uniq_motif_hits
    c_uniq_motif_nts
    perc_uniq_motif_nts_cal_reg
    perc_uniq_motif_nts_eff_reg
    uniq_motif_hits_cal_1000nt
    uniq_motif_hits_eff_1000nt
    wc_pval
    seq_motif_ids
    seq_motif_hits
    str_motif_ids
    str_motif_hits

    """

    rbp_list = []

    OUTSTATS = open(rbp_stats_out, "w")
    rbp_stats_header = "data_id\tmethod_id\trun_id\tmotif_db\trbp_id\tc_regions\tmean_reg_len\tmedian_reg_len\tmin_reg_len\tmax_reg_len\t"
    rbp_stats_header += "called_reg_size\teffective_reg_size\tc_reg_with_hits\tperc_reg_with_hits\t"
    rbp_stats_header += "c_motif_hits\tc_uniq_motif_hits\tc_uniq_motif_nts\tperc_uniq_motif_nts_cal_reg\tperc_uniq_motif_nts_eff_reg\tuniq_motif_hits_cal_1000nt\t"
    rbp_stats_header += "uniq_motif_hits_eff_1000nt\twc_pval\twc_rbc_eff_size\twc_cl_eff_size\tseq_motif_ids\tseq_motif_hits\tstr_motif_ids\tstr_motif_hits\tinternal_id\n"
    OUTSTATS.write(rbp_stats_header)

    for rbp_id in search_rbps_dic:

        # print(search_rbps_dic[rbp_id].__dict__)
        rbp_list.append(rbp_id)

        motif_db_out = loaded_rbps_dic[rbp_id]

        c_reg_with_hits = search_rbps_dic[rbp_id].c_hit_reg
        perc_reg_with_hits = search_rbps_dic[rbp_id].perc_hit_reg
        c_motif_hits = search_rbps_dic[rbp_id].c_motif_hits
        c_uniq_motif_hits = search_rbps_dic[rbp_id].c_uniq_motif_hits
        c_uniq_motif_nts = search_rbps_dic[rbp_id].c_uniq_motif_nts
        perc_uniq_motif_nts_cal_reg = search_rbps_dic[rbp_id].perc_uniq_motif_nts_cal_reg
        perc_uniq_motif_nts_eff_reg = search_rbps_dic[rbp_id].perc_uniq_motif_nts_eff_reg
        uniq_motif_hits_cal_1000nt = search_rbps_dic[rbp_id].uniq_motif_hits_cal_1000nt
        uniq_motif_hits_eff_1000nt = search_rbps_dic[rbp_id].uniq_motif_hits_eff_1000nt
        wc_pval = search_rbps_dic[rbp_id].wc_pval
        wc_rbc_es = search_rbps_dic[rbp_id].wc_rbc_es
        wc_cl_es = search_rbps_dic[rbp_id].wc_cl_es
        internal_id = search_rbps_dic[rbp_id].internal_id

        seq_motif_hits = ",".join(str(hc) for hc in search_rbps_dic[rbp_id].seq_motif_hits)
        str_motif_hits = ",".join(str(hc) for hc in search_rbps_dic[rbp_id].str_motif_hits)
        seq_motif_ids = ",".join(search_rbps_dic[rbp_id].seq_motif_ids)
        str_motif_ids = ",".join(search_rbps_dic[rbp_id].str_motif_ids)
        if not seq_motif_hits:
            seq_motif_hits = "-"
        if not str_motif_hits:
            str_motif_hits = "-"
        if not seq_motif_ids:
            seq_motif_ids = "-"
        if not str_motif_ids:
            str_motif_ids = "-"

        row_str = args.data_id + "\t"
        row_str += args.method_id + "\t"
        row_str += run_id + "\t"
        row_str += motif_db_out + "\t"
        row_str += rbp_id + "\t"

        row_str += str(c_regions) + "\t"

        row_str += str(reg_len_mean) + "\t"
        row_str += str(reg_len_median) + "\t"
        row_str += str(reg_len_min) + "\t"
        row_str += str(reg_len_max) + "\t"

        row_str += str(called_reg_size) + "\t"
        row_str += str(eff_reg_size) + "\t"

        row_str += str(c_reg_with_hits) + "\t"
        row_str += str(perc_reg_with_hits) + "\t"

        row_str += str(c_motif_hits) + "\t"
        row_str += str(c_uniq_motif_hits) + "\t"
        row_str += str(c_uniq_motif_nts) + "\t"
        row_str += str(perc_uniq_motif_nts_cal_reg) + "\t"
        row_str += str(perc_uniq_motif_nts_eff_reg) + "\t"
        row_str += str(uniq_motif_hits_cal_1000nt) + "\t"
        row_str += str(uniq_motif_hits_eff_1000nt) + "\t"
        row_str += str(wc_pval) + "\t"
        row_str += str(wc_rbc_es) + "\t"
        row_str += str(wc_cl_es) + "\t"
        row_str += seq_motif_ids + "\t"
        row_str += seq_motif_hits + "\t"
        row_str += str_motif_ids + "\t"
        row_str += str_motif_hits + "\t"
        row_str += internal_id + "\n"

        OUTSTATS.write(row_str)

    OUTSTATS.close()

    """
    Region ID list.
    
    """
    reg_ids_list = []
    reg_ids_dic = {}
    for seq_id, seq in sorted(out_seqs_dic.items()):
        reg_ids_list.append(seq_id)
        reg_ids_dic[seq_id] = 1

    rbp_list.sort()
    len_rbp_list = len(rbp_list)
    # Store rbp_id -> for each region if hit: 1, else: 0, i.e.: [1,0,0,0,0]
    reg_hits_dic = {}
    add_count = False # Add # of motif hits in region or just add 1 (if False)

    # RBP ID to index mapping.
    rbp2idx_dic = {}
    idx2rbp_dic = {}
    print("Get RBP region occupancies ... ")
    for idx, rbp_id in enumerate(rbp_list):
        rbp2idx_dic[rbp_id] = idx
        idx2rbp_dic[idx] = rbp_id
        # Region has hits yes(1)/no(0).
        hit_list = []
        for reg_id in reg_ids_list:
            if rbp_id in regions_with_motifs_dic and reg_id in regions_with_motifs_dic[rbp_id]:
                if add_count:
                    hit_list.append(regions_with_motifs_dic[rbp_id][reg_id])
                else:
                    hit_list.append(1)
            else:
                hit_list.append(0)
        reg_hits_dic[rbp_id] = hit_list

    """
    Output occupancies.

    reg_hits_dic[rbp_id] = [0,1,0,0, ...]
    reg_ids_list = [reg_id1, reg_id2, ... ]

    rbp2regidx_dic:
        # rbp_id -> 0-based indexes of occupied regions, e.g. [0, 3, 12, 88, 114] 

    """
    rbp2regidx_dic = {}
    OUTOCC = open(rbp_reg_occ_table_out,"w")

    occ_header = r"#region_id \ rbp_id"
    for rbp_id, hit_list in sorted(reg_hits_dic.items()):
        occ_header += "\t%s" %(rbp_id)
        rbp2regidx_dic[rbp_id] = []
        for idx, label in enumerate(hit_list):
            if label:  # if occupied (i.e. 1-label).
                rbp2regidx_dic[rbp_id].append(idx)
    OUTOCC.write("%s\n" %(occ_header))
    for idx, reg_id in enumerate(reg_ids_list):
        occ_row = "%s" %(reg_id)
        for rbp_id, hit_list in sorted(reg_hits_dic.items()):
            occ_row += "\t%i" %(hit_list[idx])
        OUTOCC.write("%s\n" %(occ_row))
    OUTOCC.close()

    """
    Store RBP binding information for each input region.
    Format region_rbp_binds_dic:
    region_id -> [False, True, False ... ]
    with list number of RBP IDs (len_rbp_list), alphabetically sorted.
    Format region_rbp_motif_pos_dic:
    Region ID -> "motif_id,start_1based,end_1based,p_value/-(bit_score)"
    E.g.
    region_rbp_motif_pos_dic["reg1"] = ["rbp1_m1:98:102:0.01", "rbp1_m1:110:115:0.1", ...]
    region2motif_hits_dic:
    region_id -> [motif_hit1, motif_hit2, ...] with motif_hit format: "motif_id:seq_start,seq_end,p_value/-(bit_score)"

    """
    region_rbp_binds_dic = {}
    region_rbp_motif_pos_dic = {}
    region2motif_hits_dic = {}  # For plotting.
    rid2rbpidx2hcp_dic = {}  # region_id -> rbp_idx -> motif hit center position(s)

    # Checks (make sure use_region_ids=True in bed_filter_extend_bed() function).
    for reg_id in out_seqs_dic:
        # assert reg_id in reg2sc_dic, "region ID \"%s\" from out_seqs_dic not found in reg2sc_dic" %(reg_id)
        region_rbp_binds_dic[reg_id] = [False]*len_rbp_list
        region_rbp_motif_pos_dic[reg_id] = []
        region2motif_hits_dic[reg_id] = []
        rid2rbpidx2hcp_dic[reg_id] = {}

    """
    Output motif region stats (1 row for each motif hit).
    Report ALL motif hits,
    plus report how many times one transcript motif hit occurs (uniq_count). 

    """

    OUTSTATS = open(motif_stats_out,"w")

    motif_stats_header = "data_id\tmethod_id\trun_id\tmotif_db\tregion_id\trbp_id\tmotif_id\tchr_id\tgen_s\tgen_e\tstrand\tregion_s\tregion_e\tregion_len\t"
    motif_stats_header += "uniq_count\tfimo_score\tfimo_pval\tcms_score\tcms_eval\tmatched_seq\tinternal_id\n"
    OUTSTATS.write(motif_stats_header)

    # Unique motif regions BED.
    motif_reg_dic = {}
    match_c_dic = {}

    for rbp_id in search_rbps_dic:
        match_c_dic[rbp_id] = {}
        for motif_id in search_rbps_dic[rbp_id].seq_motif_ids:
            match_c_dic[rbp_id][motif_id] = {}
        for motif_id in search_rbps_dic[rbp_id].str_motif_ids:
            match_c_dic[rbp_id][motif_id] = {}

    # hit_id = "%s:%s-%s(%s)%s" %(cols[7], cols[8], cols[9], cols[10], cols[6])

    for fh in fimo_hits_list:

        rbp_id = id2name_dic[fh.motif_id]
        region_id = fh.seq_name
        region_len = benchlib.get_length_from_seq_name(fh.seq_name)
        # genomic motif region string.
        fh_str = repr(fh)
        uniq_count = unique_motifs_dic[rbp_id][fh_str]
        # Store binding info of RBP in region.
        rbp_idx = rbp2idx_dic[rbp_id]

        # Motif hit string.
        motif_str = "%s:%i:%i:%s" %(fh.motif_id, fh.start, fh.end, str(fh.pval))
        # motif_str_plot = "%s,%i-%i,%s" %(fh.motif_id, fh.seq_s, fh.seq_e, str(fh.pval))
        motif_str_plot = "%s:%i-%i" %(fh.motif_id, fh.seq_s, fh.seq_e)

        # Center position of motif hit.
        motif_hit_s = fh.seq_s - 1
        motif_hit_e = fh.seq_e
        center_pos = benchlib.get_center_position(motif_hit_s, motif_hit_e)
        
        region_rbp_binds_dic[region_id][rbp_idx] = True
        region_rbp_motif_pos_dic[region_id].append(motif_str)
        region2motif_hits_dic[region_id].append(motif_str_plot)
        if rbp_idx not in rid2rbpidx2hcp_dic[region_id]:
            rid2rbpidx2hcp_dic[region_id][rbp_idx] = [center_pos]
        else:
            rid2rbpidx2hcp_dic[region_id][rbp_idx].append(center_pos)

        motif_db_out = loaded_motif_ids_dic[fh.motif_id]

        internal_id = search_rbps_dic[rbp_id].internal_id

        # Get matched sequence.
        matched_seq = benchlib.get_matched_seq(fh.seq_name, out_seqs_dic, fh.seq_s, fh.seq_e)
        if not matched_seq:
            matched_seq = "-"

        # Store motif hit as BED.
        hit_id = "%s:%s-%s(%s)%s" %(fh.chr_id, str(fh.start), str(fh.end), fh.strand, fh.motif_id)
        if hit_id not in motif_reg_dic:
            if fh.hit_type == "str_pat":
                bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t-1.0\t-1.0\t%s\t%s\t-\t%s" %(fh.chr_id, fh.start-1, fh.end, rbp_id, fh.motif_id, uniq_count, args.method_id, args.data_id, fh.strand, str(fh.gc_frac), str(fh.gu_frac), matched_seq)
            else:
                bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t%s\t%s\t-1.0\t-1.0\t-\t%s" %(fh.chr_id, fh.start-1, fh.end, rbp_id, fh.motif_id, uniq_count, args.method_id, args.data_id, fh.strand, str(fh.score), str(fh.pval), matched_seq)
            # bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t%s\t%s\t-1.0\t-1.0\t-\t%s" %(fh.chr_id, fh.start-1, fh.end, rbp_id, fh.motif_id, uniq_count, args.method_id, args.data_id, fh.strand, str(fh.score), str(fh.pval), matched_seq)
            motif_reg_dic[hit_id] = bed_row
            if rbp_id not in match_c_dic:
                match_c_dic[rbp_id] = {}
            if fh.motif_id not in match_c_dic[rbp_id]:
                match_c_dic[rbp_id][fh.motif_id] = {}
            if matched_seq not in match_c_dic[rbp_id][fh.motif_id]:
                match_c_dic[rbp_id][fh.motif_id][matched_seq] = 0
            match_c_dic[rbp_id][fh.motif_id][matched_seq] += 1

        row_str = args.data_id + "\t"
        row_str += args.method_id + "\t"
        row_str += run_id + "\t"
        row_str += motif_db_out + "\t"
        row_str += region_id + "\t"
        row_str += rbp_id + "\t"
        row_str += fh.motif_id + "\t"
        row_str += fh.chr_id + "\t"
        row_str += str(fh.start) + "\t"  # 1-based.
        row_str += str(fh.end) + "\t"
        row_str += fh.strand + "\t"
        row_str += str(fh.seq_s) + "\t"  # 1-based.
        row_str += str(fh.seq_e) + "\t"
        row_str += str(region_len) + "\t"
        row_str += str(uniq_count) + "\t"
        row_str += str(fh.score) + "\t"
        row_str += str(fh.pval) + "\t"
        row_str += "-\t"
        row_str += "-\t"
        row_str += matched_seq + "\t"
        row_str += internal_id + "\n"

        OUTSTATS.write(row_str)

    for cmsh in cmsearch_hits_list:

        rbp_id = id2name_dic[cmsh.motif_id]
        region_id = cmsh.seq_name
        region_len = benchlib.get_length_from_seq_name(cmsh.seq_name)
        # genomic motif region string.
        cmsh_str = repr(cmsh) 
        uniq_count = unique_motifs_dic[rbp_id][cmsh_str]
        # Store binding info of RBP in region.
        rbp_idx = rbp2idx_dic[rbp_id]

        # Motif hit string.
        motif_str = "%s:%i:%i:%s" %(cmsh.motif_id, cmsh.start, cmsh.end, str(-1*cmsh.score))
        # motif_str_plot = "%s,%i-%i,%s" %(cmsh.motif_id, cmsh.seq_s, cmsh.seq_e, str(cmsh.score))
        motif_str_plot = "%s:%i-%i" %(cmsh.motif_id, cmsh.seq_s, cmsh.seq_e)

        # Center position of motif hit.
        motif_hit_s = cmsh.seq_s - 1
        motif_hit_e = cmsh.seq_e
        center_pos = benchlib.get_center_position(motif_hit_s, motif_hit_e)
        
        region_rbp_binds_dic[region_id][rbp_idx] = True
        region_rbp_motif_pos_dic[region_id].append(motif_str)
        region2motif_hits_dic[region_id].append(motif_str_plot)
        if rbp_idx not in rid2rbpidx2hcp_dic[region_id]:
            rid2rbpidx2hcp_dic[region_id][rbp_idx] = [center_pos]
        else:
            rid2rbpidx2hcp_dic[region_id][rbp_idx].append(center_pos)

        motif_db_out = loaded_motif_ids_dic[cmsh.motif_id]

        internal_id = search_rbps_dic[rbp_id].internal_id

        # Get matched sequence.
        matched_seq = benchlib.get_matched_seq(cmsh.seq_name, out_seqs_dic, cmsh.seq_s, cmsh.seq_e)
        if not matched_seq:
            matched_seq = "-"

        # Store motif hit as BED.
        hit_id = "%s:%s-%s(%s)%s" %(cmsh.chr_id, str(cmsh.start), str(cmsh.end), cmsh.strand, cmsh.motif_id)
        if hit_id not in motif_reg_dic:
            bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t-1.0\t-1.0\t%s\t%s\t-\t%s" %(cmsh.chr_id, cmsh.start-1, cmsh.end, rbp_id, cmsh.motif_id, uniq_count, args.method_id, args.data_id, cmsh.strand, str(cmsh.score), str(cmsh.e_value), matched_seq)
            motif_reg_dic[hit_id] = bed_row
            if rbp_id not in match_c_dic:
                match_c_dic[rbp_id] = {}
            if cmsh.motif_id not in match_c_dic[rbp_id]:
                match_c_dic[rbp_id][cmsh.motif_id] = {}
            if matched_seq not in match_c_dic[rbp_id][cmsh.motif_id]:
                match_c_dic[rbp_id][cmsh.motif_id][matched_seq] = 0
            match_c_dic[rbp_id][cmsh.motif_id][matched_seq] += 1

        row_str = args.data_id + "\t"
        row_str += args.method_id + "\t"
        row_str += run_id + "\t"
        row_str += motif_db_out + "\t"
        row_str += region_id + "\t"
        row_str += rbp_id + "\t"
        row_str += cmsh.motif_id + "\t"
        row_str += cmsh.chr_id + "\t"
        row_str += str(cmsh.start) + "\t"
        row_str += str(cmsh.end) + "\t"
        row_str += cmsh.strand + "\t"
        row_str += str(cmsh.seq_s) + "\t"
        row_str += str(cmsh.seq_e) + "\t"
        row_str += str(region_len) + "\t"
        row_str += str(uniq_count) + "\t"
        row_str += "-\t"
        row_str += "-\t"
        row_str += str(cmsh.score) + "\t"
        row_str += str(cmsh.e_value) + "\t"
        row_str += matched_seq + "\t"
        row_str += internal_id + "\n"
        #print("region_id:", region_id)
        #print("evalue:", cmsh.e_value)
        #print(row_str)

        OUTSTATS.write(row_str)

    OUTSTATS.close()

    """
    Output motif hits as BED.

    The motif hits written to motif_hits_bed_out are unique motif hits already. 
    If same hit occurs > 1, this is recorded in BED column 4 with format:
    rbp_id:motif_id;uniq_count;method_id:data_id
    These also include regex hits, as they are part of fimo hits.
    
    """

    OUTBED = open(motif_hits_bed_out, "w")
    for hit_id in motif_reg_dic:
        OUTBED.write("%s\n" %(motif_reg_dic[hit_id]))
    OUTBED.close()


    """
    Output matched sequence counts in table. 

    Formats: 
    match_c_total_dic[rbp_id][motif_id] = count
    match_c_dic[rbp_id][motif_id][matched_seq] = count

    """

    OUTTSV = open(matched_seqs_out, "w")
    OUTTSV.write("rbp_id\tmotif_id\tmatched_seq\tmatch_count\tmatch_perc\n")

    match_c_total_dic = benchlib.get_match_c_total_dic(match_c_dic)

    for rbp_id in match_c_dic:
        for motif_id in match_c_dic[rbp_id]:
            total_c = match_c_total_dic[rbp_id][motif_id]
            for matched_seq in match_c_dic[rbp_id][motif_id]:
                match_c = match_c_dic[rbp_id][motif_id][matched_seq]
                match_perc = 0.0
                if match_c > 0:
                    match_perc = (float(match_c)/float(total_c)) * 100.0
                row_str = "%s\t%s\t%s\t%i\t%.2f\n" %(rbp_id, motif_id, matched_seq, match_c, match_perc)
                OUTTSV.write(row_str)
    OUTTSV.close()


    """
    Significance testing:
    Check for co-occurrences of motifs from different RBPs in regions.
    Use 2x2 contingency tables, and some test for significance,
    e.g. Fisher exact or Chi-squared

    region_rbp_binds_dic format:
    'chr20:62139082-62139128(-)': [False, False, False]
    ...
    rbp2idx_dic
    idx2rbp_dic

    from itertools import combinations
    Number of combinations (draw k from n elements, no order, no repetition)
    Binomialcoefficient: n over k, where n = #RBPs, and k = 2
    rbp_pairs = list(combinations(rbp_list, 2))

    make_contingency_table_2x2(region_labels_dic, idx1, idx2):
    https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.fisher_exact.html
                   List 1              Not in List 1
    List 2         A                   B
    Not in List 2  C                   D

    table = [[A, B], [C, D]]

    from scipy.stats import chi2_contingency
    stat, p, dof, expected = chi2_contingency(table)
    Note that chi2_contingency does not tolerate zero elements.

    """

    # Store p-values in 2d-list.
    pval_ll = []  # p-values (Fisher exact test).
    or_ll = []  # odds ratios.
    pval_cont_lll = []

    for rbp_id in rbp_list:
        pval_ll.append([1.0]*len_rbp_list)
        pval_cont_lll.append([]*len_rbp_list)
        # pval_cont_lll.append([["1.0","-", "-"]]*len_rbp_list)
        or_ll.append([0.0]*len_rbp_list)

    for i in range(len_rbp_list):
        for j in range(len_rbp_list):
            # Storing [p-value_str, pair_str, table_str, correlation_str].
            pval_cont_lll[i].append(["1.0", "-", "-", "-", "-", "-", "-", "", "", ""])

    # Add p-values of 1.0 in diagonal.
    # for idx, rbp_id in enumerate(rbp_list):
    #     pval_ll[idx][idx] = 1.0
    #     pval_cont_lll[idx][idx] = 1.0

    rbp_pairs = list(combinations(rbp_list, 2))
    con_pval_dic = {}
    con_table_dic = {}
    pair_str_dic = {}

    # print("rbp2idx_dic:", rbp2idx_dic)
    # print("region_rbp_binds_dic:", region_rbp_binds_dic)
    # print("len(region_rbp_binds_dic):", len(region_rbp_binds_dic))
    # true_c = 0
    # for reg_id in region_rbp_binds_dic:
    #     for label in region_rbp_binds_dic[reg_id]:
    #         if label:
    #             true_c += 1
    # print("# TRUEs:", true_c)

    c_regions_with_hits = 0
    for reg_id in region_rbp_binds_dic:
        reg_hit = False
        for label in region_rbp_binds_dic[reg_id]:
            if label:
                reg_hit = True
        if reg_hit:
            c_regions_with_hits += 1
    print("# regions with hits (all motifs):", c_regions_with_hits)

    fisher_alt_hypo = "greater"
    if args.fisher_mode == 1:
        fisher_alt_hypo = "greater"
        print("Fisher mode = 1, reporting significantly overrepresented co-occurrences ... ")
    elif args.fisher_mode == 2:
        fisher_alt_hypo = "two-sided"
        print("Fisher mode = 2, reporting significantly over- AND underrepresented co-occurrences ... ")
    elif args.fisher_mode == 3:
        fisher_alt_hypo = "less"
        print("Fisher mode = 3, reporting significantly underrepresented co-occurrences ... ")
    else:
        assert False, "Invalid Fisher mode: %i" %(args.fisher_mode)

    p_val_list = []  # Fisher exact test p-values.

    print("Compute motif region co-occurrences between RBP pairs ... ")
    for pair in rbp_pairs:
        pair = list(pair)
        pair.sort()

        idx1 = rbp2idx_dic[pair[0]]
        idx2 = rbp2idx_dic[pair[1]]
        # pair_list = [pair[0], pair[1]]
        # pair_list.sort()
        pair_str = ",".join(pair)
        # pair_str_dic[pair_str] = [pair_list[0], pair_list[1]]
        pair_str_dic[pair_str] = [pair[0], pair[1]]

        # avg_min_dist and perc_close_hits = "-" if no common hit regions.
        table, avg_min_dist, perc_close_hits = benchlib.make_contingency_table_2x2_v2(
                                                       region_rbp_binds_dic, idx1, idx2,
                                                       rid2rbpidx2hcp_dic,
                                                       max_motif_dist=args.max_motif_dist)

        odds_ratio, p_value = fisher_exact(table, alternative=fisher_alt_hypo)

        # if p_value == 0:
        #   p_value = 2.2e-308
        #   print("PVALUE ZERO! (%s, odds_ratio: %s)" %(str(p_value), str(odds_ratio)))
        #   print("pair_str:", pair_str)
        #   print("table_str:", table_str)

        # con_pval_dic[pair_str] = p_value
        con_table_dic[pair_str] = table
        table_str = str(table)

        # print(pair_str, table_str, p_value)
        # print("Pair:", pair_str, "Odds ratio:", odds_ratio,"Fisher p-value:", p_value)

        # Corrected p-value.
        # corr_p_val = p_value * mult_test_corr_factor

        p_value_plotted = p_value
        p_val_list.append(p_value)
        # if p_value > cooc_pval_thr:
        #     p_value_plotted = 1.0

        pval_ll[idx1][idx2] = p_value_plotted
        pval_ll[idx2][idx1] = p_value_plotted
        pval_cont_lll[idx2][idx1][0] = str(p_value)
        pval_cont_lll[idx2][idx1][1] = str(p_value_plotted)
        pval_cont_lll[idx2][idx1][2] = pair_str
        pval_cont_lll[idx2][idx1][3] = table_str
        pval_cont_lll[idx2][idx1][4] = avg_min_dist
        pval_cont_lll[idx2][idx1][5] = perc_close_hits

        # pval_cont_lll[idx2][idx1][0] = str(p_value)
        # pval_cont_lll[idx2][idx1][1] = pair_str
        # pval_cont_lll[idx2][idx1][2] = table_str
        or_ll[idx1][idx2] = odds_ratio
        or_ll[idx2][idx1] = odds_ratio

    """
    Multiple testing correction.

    """

    cooc_pval_thr = args.cooc_pval_thr

    if args.cooc_pval_mode == 1:  # BH correction.

        pvals_corrected = false_discovery_control(p_val_list, method='bh')

        for i in range(len(p_val_list)):
            p_val_list[i] = pvals_corrected[i]

    elif args.cooc_pval_mode == 2:  # Bonferroni correction.

        # Multiple testing correction factor.
        mult_test_corr_factor = 1
        if len_rbp_list > 1:
            mult_test_corr_factor = (len_rbp_list*(len_rbp_list-1))/2

        cooc_pval_thr = args.cooc_pval_thr / mult_test_corr_factor
        cooc_pval_thr = benchlib.round_to_n_significant_digits_v2(cooc_pval_thr, 4)

    elif args.cooc_pval_mode == 3:  # No correction.

        cooc_pval_thr = args.cooc_pval_thr

    else:
        assert False, "Invalid co-occurrence p-value mode (--cooc-pval-mode) set: %i" %(args.cooc_pval_mode)

    args.cooc_pval_thr = cooc_pval_thr

    # Update + filter p-values.

    pv_idx = 0
    c_all_fisher_pval = 0
    c_sig_fisher_pval = 0

    COSOUT = open(cooc_stats_out, "w")
    COSOUT.write("rbp_id1\trbp_id2\tc_1and2\tc_only2\tc_only1\tc_not1not2\tcooc_pval\tavg_min_dist\tperc_close_hits_%int\n" %(args.max_motif_dist))

    for pair in rbp_pairs:
        pair = list(pair)
        pair.sort()

        idx1 = rbp2idx_dic[pair[0]]
        idx2 = rbp2idx_dic[pair[1]]

        pair_str = ",".join(pair)

        p_value = p_val_list[pv_idx]

        p_value = benchlib.round_to_n_significant_digits_v2(p_value, 4)

        p_value_plotted = p_value

        avg_min_dist_str = pval_cont_lll[idx2][idx1][4]
        avg_min_dist = 10000
        if avg_min_dist_str != "-":
            avg_min_dist = float(avg_min_dist_str)

        if p_value > cooc_pval_thr:
            p_value_plotted = 1.0
            pval_cont_lll[idx2][idx1][7] = "(Filter: p-value > %s)<br>" %(str(cooc_pval_thr))
        
        if p_value <= cooc_pval_thr and avg_min_dist < args.min_motif_dist:
            p_value_plotted = 1.0
            pval_cont_lll[idx2][idx1][7] = "(Filter: mean minimum motif distance < %i)<br>" %(args.min_motif_dist)

        c_all_fisher_pval += 1
        if p_value <= cooc_pval_thr and avg_min_dist >= args.min_motif_dist:
            c_sig_fisher_pval += 1

        pval_ll[idx1][idx2] = p_value_plotted
        pval_ll[idx2][idx1] = p_value_plotted
        pval_cont_lll[idx2][idx1][0] = str(p_value)
        pval_cont_lll[idx2][idx1][1] = str(p_value_plotted)

        con_pval_dic[pair_str] = p_value

        pv_idx += 1

        # Get stats for output.
        rbp1 = pair[0]
        rbp2 = pair[1]
        con_table = con_table_dic[pair_str]
        perc_close_hits = pval_cont_lll[idx2][idx1][5]

        COSOUT.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" %(rbp1, rbp2, str(con_table[0][0]), str(con_table[0][1]), str(con_table[1][0]), str(con_table[1][1]), str(p_value), avg_min_dist_str, perc_close_hits))

    COSOUT.close()

    assert c_all_fisher_pval == len(p_val_list), "Number of p-values (%i) does not match number of RBP pairs (%i)" %(len(p_val_list), len(rbp_pairs))
    perc_sig_fisher_pval = 0.0
    if c_all_fisher_pval > 0:
        perc_sig_fisher_pval = round((c_sig_fisher_pval/c_all_fisher_pval)*100, 2)

    print("# of RBP co-occurrence comparisons (calculated p-values in total): %i" %(c_all_fisher_pval))

    min_motif_dist_info = ""
    if args.min_motif_dist > 0:
        min_motif_dist_info = " + --min-motif-dist >= %i" %(args.min_motif_dist)

    args.c_all_fisher_pval = c_all_fisher_pval
    args.c_sig_fisher_pval = c_sig_fisher_pval
    args.perc_sig_fisher_pval = perc_sig_fisher_pval

    if args.cooc_pval_mode == 1:
        print("Number of significant co-occurrence p-values (BH corrected%s): %i (%.2f%%)" %(min_motif_dist_info, c_sig_fisher_pval, perc_sig_fisher_pval))
    elif args.cooc_pval_mode == 2:
        print("Number of significant co-occurrence p-values (Bonferroni corrected%s): %i (%.2f%%)" %(min_motif_dist_info, c_sig_fisher_pval, perc_sig_fisher_pval))
    elif args.cooc_pval_mode == 3:
        print("Number of significant co-occurrence p-values (no correction%s): %i (%.2f%%)" %(min_motif_dist_info, c_sig_fisher_pval, perc_sig_fisher_pval))

    """
    Print out RBPs sorted by KS p-value.

    """ 
    sorted_con_pval_dic = dict(sorted(con_pval_dic.items(), key=lambda item: item[1], reverse=False))

    print("Co-occurrence contingency table format: [A, B], [C, D]")
    print("A: RBP1 AND RBP2")
    print("B: NOT RBP1 AND RBP2")
    print("C: RBP1 AND NOT RBP2")
    print("D: NOT RBP1 AND NOT RBP2")

    if args.cooc_pval_mode == 1:
        print("Significance threshold: %s (on Benjamini-Hochberg corrected p-values)" %(str(cooc_pval_thr)))
    elif args.cooc_pval_mode == 2:
        print("Significance threshold: %s (Bonferroni corrected)" %(str(cooc_pval_thr)))
    elif args.cooc_pval_mode == 3:
        print("Significance threshold: %s" %(str(cooc_pval_thr)))
    else:
        assert False, "Invalid co-occurrence p-value mode (--cooc-pval-mode) set: %i" %(args.cooc_pval_mode)

    print("Co-occurrence of motifs (RBP IDs (RBP1,RBP2), # hits, contingency table, Fisher p-value)")

    c_reported = 0

    for pair_str, p_value in sorted_con_pval_dic.items():
        if p_value > cooc_pval_thr:
            break
        c_reported += 1
        rbp1 = pair_str_dic[pair_str][0]
        rbp2 = pair_str_dic[pair_str][1]
        con_table = con_table_dic[pair_str]
        rbp1_hits = search_rbps_dic[rbp1].c_uniq_motif_hits
        rbp2_hits = search_rbps_dic[rbp2].c_uniq_motif_hits
        print("%s\t%i,%i\t%s\t%s" %(pair_str, rbp1_hits, rbp2_hits, str(con_table), str(p_value)))

    if not c_reported:
        print("NO SIGNIFICANT CO-OCCURRENCES FOUND!")

    print("")

    # # Print table to file.
    # benchlib.output_con_table_results(con_res_out_tsv, pval_ll, rbp_list)

    """
    For HTML report, calculate correlations too.

    Setting add_count = True changes correlations a bit (use motif hit counts 
    instead of 1 for any number of hits)

    """

    print("Calculate correlations ... ")
    # Correlation between RBPs dataframe.
    df = DataFrame(reg_hits_dic, columns=rbp_list)
    df_corr = df.corr(method='pearson')

    for i,rbp_i in enumerate(rbp_list):
        for j,rbp_j in enumerate(rbp_list):
            if j > i:
                pval_ll[i][j] = None

    # Fisher p-value dataframe.
    df_pval = DataFrame(pval_ll, columns=rbp_list, index=rbp_list)

    # Write None to upper-diagonal entries.
    for i,rbp_i in enumerate(rbp_list):
        for j,rbp_j in enumerate(rbp_list):
            if j > i:
                # df_corr.loc[rbp_i][rbp_j] = None  # FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!
                df_corr.loc[rbp_i, rbp_j] = None
            else:
                # Round correlation values if != 1.0.
                if df_corr.loc[rbp_i][rbp_j] == 1.0:
                    pval_cont_lll[i][j][6] = str(df_corr.loc[rbp_i][rbp_j])
                else:
                    pval_cont_lll[i][j][6] = "{:.8f}".format(df_corr.loc[rbp_i][rbp_j])

    for i,rbp_i in enumerate(rbp_list):
        for j,rbp_j in enumerate(rbp_list):
            if j > i:
                df_pval.loc[rbp_i, rbp_j] = None

    benchlib.log_tf_df(df_pval, convert_zero_pv=True, rbp_list=rbp_list)


    """
    Generate HTML report.
    
    """

    html_report_out = args.out_folder + "/" + "report.rbpbench_searchrna.html"
    if args.plot_abs_paths:
        html_report_out = os.path.abspath(args.out_folder) + "/" + "report.rbpbench_searchrna.html"

    # If HTML file already exists, remove it.
    if os.path.exists(html_report_out):
        os.remove(html_report_out)

    # Sequence lengths dataframe.
    seq_len_df = None
    if not args.disable_len_dist_plot:

        # Sequences dataframe for plotting sequence lengths violin plot.
        sequences = []
        seq_ids = []
        for seq_id in out_seqs_dic:
            seq_ids.append(seq_id)
            sequences.append(out_seqs_dic[seq_id])

        motif_hits = []
        for seq_id in seq_ids:
            # region2motif_hits_dic[seq_id].sort()
            # If list empty, append "-".
            if not region2motif_hits_dic[seq_id]:
                motif_hits.append("-")
            else:
                motif_hits.append(benchlib.join_motif_hits(
                                region2motif_hits_dic[seq_id],
                                motifs_per_line=3,
                                line_break_char="<br>"))
                # motif_hits.append("\n".join(region2motif_hits_dic[seq_id]))

        seq_len_df = DataFrame({
            'Sequence ID': seq_ids,
            'Sequence Length': [len(seq) for seq in sequences],
            'Sequence': [benchlib.insert_line_breaks(seq, line_len=50) for seq in sequences],
            'Motif hits': motif_hits
        })


    plots_subfolder = "html_report_plots"
    benchlib_path = os.path.dirname(benchlib.__file__)

    # Disable some functions from rbpbench search.
    args.disable_kmer_tb_plot = True
    args.disable_kmer_var_plot = True

    print("Create report ... ")
    benchlib.search_generate_html_report(args,
                                    df_pval, pval_cont_lll,
                                    search_rbps_dic,
                                    id2name_dic, name2ids_dic,
                                    region_rbp_motif_pos_dic,
                                    reg2pol_dic,
                                    benchlib_path,
                                    rbp2regidx_dic,
                                    reg_ids_list,
                                    seq_len_df=seq_len_df,
                                    seq_motif_blocks_dic=seq_motif_blocks_dic,
                                    reg2annot_dic=False,  # no need.
                                    annot2color_dic=False,  # no need.
                                    goa_results_df=goa_results_df,
                                    goa_stats_dic=goa_stats_dic,
                                    html_report_out=html_report_out,
                                    rbpbench_mode="searchrna",
                                    reg_seq_str="regions",
                                    reg2seq_dic=out_seqs_dic,
                                    disable_motif_enrich_table=True,
                                    plots_subfolder=plots_subfolder)


    """
    Motif plots and motif hit statistics HTML.

    """
    html_motif_plots_out = args.out_folder + "/" + "motif_plots.rbpbench_searchrna.html"
    if args.plot_abs_paths:
        html_motif_plots_out = os.path.abspath(args.out_folder) + "/" + "motif_plots.rbpbench_searchrna.html"
    # If HTML file already exists, remove it.
    if os.path.exists(html_motif_plots_out):
        os.remove(html_motif_plots_out)

    if args.plot_motifs:
        
        print("Generate motif plots HTML ... ")
        
        plots_subfolder = "html_motif_plots"
        benchlib_path = os.path.dirname(benchlib.__file__)

        # Only valid for searchlongrna.
        args.run_goa_tr = False

        benchlib.search_generate_html_motif_plots(args,
                                         search_rbps_dic, seq_motif_blocks_dic, 
                                         str_motif_blocks_dic,
                                         benchlib_path, loaded_motif_ids_dic,
                                         rbp2motif2annot2c_dic=False,  # no need.
                                         annot2color_dic=False,  # no need.
                                         html_report_out=html_motif_plots_out,
                                         rbpbench_mode="searchrna --plot-motifs",
                                         reg_seq_str="regions",
                                         id2pids_dic=id2pids_dic,
                                         id2exp_dic=id2exp_dic,
                                         match_c_dic=match_c_dic,
                                         match_c_total_dic=match_c_total_dic,
                                         plots_subfolder=plots_subfolder)

    """
    Output parameter settings.

    """
    # Output mode settings.
    print("Output parameter settings ... ")
    SETOUT = open(settings_file, "w")
    for arg in vars(args):
        SETOUT.write("%s\t%s\n" %(arg, str(getattr(args, arg))))
    for call in call_dic:
        SETOUT.write("%s\t%s\n" %(call, call_dic[call]))
    SETOUT.close()

    """
    Take out the trash.

    """
    print("Delete .tmp files ... ")
    if os.path.exists(out_tmp_bed):
        os.remove(out_tmp_bed)
    if os.path.exists(cmstat_tmp_out):
        os.remove(cmstat_tmp_out)


    """
    Inform about outputs.

    """
    print("")
    print("OUTPUT FILES")
    print("============")
    print("")
    print("Run parameter settings:\n%s" %(settings_file))
    # print("Co-occurrence p-values for each RBP pair .tsv:\n%s" %(con_res_out_tsv))
    print("RBP co-occurrence stats .tsv:\n%s" %(cooc_stats_out))
    print("Filtered input regions .bed:\n%s" %(filtered_sites_bed))
    print("Filtered input regions .fa:\n%s" %(filtered_sites_fa))
    print("Motif hits .bed:\n%s" %(motif_hits_bed_out))
    print("Matched sequence stats .tsv:\n%s" %(matched_seqs_out))
    print("RBP region occupancies .tsv:\n%s" %(rbp_reg_occ_table_out))
    print("RBP hit stats .tsv:\n%s" %(rbp_stats_out))
    print("Motif hit stats .tsv:\n%s" %(motif_stats_out))
    if target_reg_annot_file is not None:
        print("Target region annotation .tsv:\n%s" %(target_reg_annot_file))
    if args.run_goa:
        print("GO enrichment analysis results .tsv:\n%s" %(goa_results_tsv))
    if args.plot_motifs:
        print("Motif plots and hit statistics .html:\n%s" %(html_motif_plots_out))
    print("Search report .html:\n%s" %(html_report_out))
    print("")


################################################################################

def main_searchlongrna(args):
    """
    Motif search in spliced full transcripts.

    """

    print("Running for you in SEARCHLONGRNA mode ... ")

    assert os.path.exists(args.in_gtf), "--gtf file \"%s\" not found" % (args.in_gtf)
    assert os.path.exists(args.in_genome), "--genome file \"%s\" not found" % (args.in_genome)

    assert benchlib.boundary_check(args.gtf_feat_min_overlap, 1E-9, 1.0), "set --gtf-feat-min-overlap expected to be >= 1E-9 and <= 1.0"

    if args.run_goa_tr:
        assert args.in_gtf, "set --goa requires --gtf GTF file"
        if args.goa_obo_mode == 3:
            assert args.goa_obo_file, "set --goa-obo-mode 3 requires --goa-obo-file"
            assert os.path.exists(args.goa_obo_file), "--goa-obo-file file \"%s\" not found" % (args.goa_obo_file)
        if args.goa_obo_file:
            assert args.goa_obo_mode == 3, "--goa-obo-file requires --goa-obo-mode 3"
        if args.goa_max_child is not None:
            assert args.goa_max_child >= 0, "set --goa-max-child expected to be >= 0"
        if args.goa_min_depth is not None:
            assert args.goa_min_depth >= 0, "set --goa-min-depth expected to be >= 0"

    # Is MEME >= v5 installed?
    if not args.meme_disable_check:
        assert benchlib.is_tool("meme"), "meme not in PATH"
        check, meme_version = benchlib.check_tool_version("meme -version", "5.0")
        assert check, "RBPBench requires meme version >= 5.0 (installed version: %s)" %(meme_version)

    # Check if MEME version is >= 5.5.4 (need to add fimo --no-pgc option to produce same results!).
    fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text"
    if not args.meme_disable_check:
        check, meme_version = benchlib.check_tool_version("meme -version", "5.5.4")
        args.meme_version = meme_version
        if check:
            fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text --no-pgc"
    if args.meme_no_pgc:
        fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text --no-pgc"

    # Regex check.
    regex_type = "sequence"
    if args.regex:
        if args.regex_type == 1:
            print("Check given --regex type ... ")
            if benchlib.looks_like_structure(args.regex):
                print("Given --regex looks like structure pattern ... ")
                regex_type = "structure"
            else:
                print("Given --regex assumed to be standard regex ... ")
                regex_type = "sequence"
        elif args.regex_type == 2:
            regex_type = "sequence"
        elif args.regex_type == 3:
            regex_type = "structure"
        else:
            assert False, "unexpected --regex-type value set (%d)" %(args.regex_type)
        # If standard regex, check validity.
        if regex_type == "sequence":
            assert benchlib.is_valid_regex(args.regex), "given --regex \"%s\" is not a valid regular expression. Please provide valid expression" % (args.regex)

    """
    Library path.
    """
    benchlib_path = os.path.dirname(benchlib.__file__)
    db_path = benchlib_path + "/content"

    """
    FIMO nt frequencies.

    """
    fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt.txt"
    if args.fimo_ntf_mode == 2:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt_with_introns.txt"
    elif args.fimo_ntf_mode == 3:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.uniform.txt"

    if args.fimo_user_ntf_file:
        fimo_freqs_file = args.fimo_user_ntf_file
    assert os.path.exists(fimo_freqs_file), "set FIMO nucleotide frequencies file \"%s\" not found" % (fimo_freqs_file)

    """
    Motif database.
    """
    seq_motifs_db_file, str_motifs_db_file, rbp2ids_file, motif_db_str = specify_motif_db(args.motif_db, 
                                                                            db_path=db_path)
    # Custom motif database: folder given.
    if args.custom_db:
        assert not args.custom_db_meme_xml, "--custom-db folder set incompatible with --custom-db-meme-xml. Provide custom motif database either via folder (--custom-db) or as single files"
        assert not args.custom_db_cm, "--custom-db folder set incompatible with --custom-db-cm. Provide custom motif database either via folder (--custom-db) or as single files"
        assert not args.custom_db_info, "--custom-db folder set incompatible with --custom-db-info. Provide custom motif database either via folder (--custom-db) or as single files"
        seq_motifs_db_file, str_motifs_db_file, rbp2ids_file = specify_custom_motif_db(args.custom_db)
        args.custom_db_id = benchlib.remove_special_chars_from_str(args.custom_db_id)
        assert args.custom_db_id, "empty string after removing special chars from --custom-db-id. Please provide alphanumeric string for custom motif database ID (- or _ are okay as well)"
        motif_db_str = args.custom_db_id
    # Custom motif database: single files given.
    if args.custom_db_meme_xml or args.custom_db_cm or args.custom_db_info:
        args.custom_db_id = benchlib.remove_special_chars_from_str(args.custom_db_id)
        assert args.custom_db_id, "empty string after removing special chars from --custom-db-id. Please provide alphanumeric string for custom motif database ID (- or _ are okay as well)"
        motif_db_str = args.custom_db_id
        assert not args.custom_db, "single custom motif database files provided not compatible with --custom-db. Provide custom motif database either via folder (--custom-db) or as single files"
        assert args.custom_db_info, "--custom-db-info needed to define custom motif database"
        rbp2ids_file = args.custom_db_info
        assert args.custom_db_meme_xml or args.custom_db_cm, "--custom-db-meme-xml and/or --custom-db-cm needed to define custom motif database"
        if args.custom_db_meme_xml:
            seq_motifs_db_file = args.custom_db_meme_xml
        else:
            seq_motifs_db_file = ""  # setting to empty string results in os.path.exists -> False.
        if args.custom_db_cm:
            str_motifs_db_file = args.custom_db_cm
        else:
            str_motifs_db_file = ""

    args.motif_db_str = motif_db_str

    """
    Get ID mappings

    name2ids_dic:
    RBP name -> motif IDs mapping
    id2type_dic:
    motif ID -> motif type (cm, meme_xml)
    id2org_dic (ignore for now):
    motif ID -> organism ID (so far only "human")

    """
    name2ids_dic, id2type_dic, name2gid_dic, name2fids_dic, id2pids_dic, id2exp_dic = benchlib.get_rbp_id_mappings(rbp2ids_file)

    # Motif ID -> RBP name (== RBP ID) mapping.
    id2name_dic = {}
    for rbp_id in name2ids_dic:
        for motif_id in name2ids_dic[rbp_id]:
            id2name_dic[motif_id] = rbp_id

    """
    Get MEME XML database motif blocks dictionary.

    """
    seq_motif_blocks_dic = {}
    if os.path.exists(seq_motifs_db_file):
        seq_motif_blocks_dic = benchlib.read_in_xml_motifs(seq_motifs_db_file,
                                                           empty_check=True)
    for motif_id in seq_motif_blocks_dic:
        assert motif_id in id2name_dic, "MEME XML motif ID \"%s\" not found in prior mapping. Please contact developers!" %(motif_id)

    """
    Get covariance model database motif blocks dictionary.

    """
    str_motif_blocks_dic = {}
    if str_motifs_db_file and os.path.exists(str_motifs_db_file):
        str_motif_blocks_dic = benchlib.read_in_cm_blocks(str_motifs_db_file, 
                                                          empty_check=True)
        for motif_id in str_motif_blocks_dic:
            assert motif_id in id2name_dic, "Covariance model accession ID \"%s\" not found in prior mapping. Please contact developers!" %(motif_id)

    # Remove special chars from run ID.
    args.data_id = benchlib.remove_special_chars_from_str(args.data_id)
    assert args.data_id, "empty string after removing special chars from --data-id. Please provide alphanumeric string for data ID (- or _ are okay as well)"
    args.method_id = benchlib.remove_special_chars_from_str(args.method_id)
    assert args.method_id, "empty string after removing special chars from --method-id. Please provide alphanumeric string for method ID (- or _ are okay as well)"
    # Run ID definition.
    run_id = "run_id"
    if args.run_id:
        run_id = benchlib.remove_special_chars_from_str(args.run_id)
        assert run_id, "empty string after removing special chars from --run-id. Please provide alphanumeric string for run ID (- or _ are okay as well)"

    print("Run ID:     ", run_id)
    print("Data ID:    ", args.data_id)
    print("Method ID:  ", args.method_id)


    """
    Output folders + files.

    """
    if not os.path.exists(args.out_folder):
        os.makedirs(args.out_folder)

    tr_seqs_fa = args.out_folder + "/transcript_sequences.fa"
    mrna_regions_bed = args.out_folder + "/mrna_regions.bed"  # UTR CDS regions on mRNAs (i.e. transcript context).
    seq_motifs_xml = args.out_folder + "/seq_motifs.xml"
    str_motifs_cm = args.out_folder + "/str_motifs.cm"
    fimo_res_tsv = args.out_folder + "/fimo_results.tsv"
    cmsearch_res_txt = args.out_folder + "/cmsearch_results.txt"

    rbp_stats_out = args.out_folder + "/rbp_hit_stats.tsv"
    motif_stats_out = args.out_folder + "/motif_hit_stats.tsv"
    # con_res_out_tsv = args.out_folder + "/contingency_table_results.tsv"
    settings_file = args.out_folder + "/settings.rbpbench_searchlongrna.out"
    # rbp_reg_occ_table_out = args.out_folder + "/rbp_region_occupancies.tsv"

    # Output unique motif hits.
    motif_hits_bed_out = args.out_folder + "/motif_hits.rbpbench_searchlongrna.bed"
    # Output matched sequences stats.
    matched_seqs_out = args.out_folder + "/matched_seq_stats.tsv"

    # GOA results.
    goa_results_tsv = args.out_folder + "/goa_results_on_motif_hit_transcripts.tsv"

    out_tmp_bed = args.out_folder + "/rbp_motif_hit_regions.tmp.bed"
    cmstat_tmp_out = args.out_folder + "/cmstat_out.tmp.txt"

    # Delete if existing folder.
    if os.path.exists(fimo_res_tsv):
        os.remove(fimo_res_tsv)
    if os.path.exists(cmsearch_res_txt):
        os.remove(cmsearch_res_txt)


    """
    Load RBP data based on --rbps (+ optionally USER data).

    """

    rbp_in_dic = {}
    for rbp_id in args.list_rbps:
        rbp_in_dic[rbp_id] = 1

    # RBPs for motif search.
    loaded_rbps_dic = {}

    # USER set?
    user_motifs = False
    user_rbp_id = False
    if "USER" in rbp_in_dic:
        user_motifs = True
    else:
        assert not args.user_meme_xml, "--user-meme-xml provided but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"
        assert not args.user_cm, "--user-cm provided but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"
        assert not args.user_rbp_id, "--user-rbp-id set but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"

    special_rbp_ids_list = ["USER", "REGEX"]

    # If ALL set, load all RBPs (+ optinally USER).
    if "ALL" in rbp_in_dic:
        if len(rbp_in_dic) == 2:
            assert user_motifs, "set --rbps ALL, --rbps ALL USER, or individual RBPs (+ optinally USER) --rbps RBP1 RBP2 ... USER"
        if len(rbp_in_dic) > 2:
            assert False, "set --rbps ALL, --rbps ALL USER, or individual RBPs (+ optinally USER) --rbps RBP1 RBP2 ... USER"
        print("--rbps ALL selected. Loading all database motifs ... ")
        for rbp_id in name2ids_dic:
            loaded_rbps_dic[rbp_id] = motif_db_str

    else:
        # Load individual RBPs.
        for rbp_id in rbp_in_dic:
            if rbp_id not in special_rbp_ids_list:
                """
                Check if RBP ID in database.
                Suggest similar RBPs based on string similarity (edit distance).

                """
                if rbp_id not in name2ids_dic:
                    db_rbp_list = []
                    for db_rbp_id in name2ids_dic:
                        db_rbp_list.append(db_rbp_id)
                    pair_dist_dic = benchlib.calc_edit_dist_query_list(rbp_id, db_rbp_list)
                    max_c = 10
                    c = 0
                    suggested_rbps = []
                    for key, value in sorted(pair_dist_dic.items(), key=lambda item: item[1], reverse=False):
                        if c >= max_c:
                            break
                        c += 1
                        suggested_rbps.append(key)
                    suggested_rbps_str = ",".join(suggested_rbps)
                    assert False, "provided --rbps ID %s not in internal motif database (%s). Please provide RBP name present in database. Did you mean (any of) the following database ID(s) (top 10 hits based on string similarity): %s ?" %(rbp_id, motif_db_str, suggested_rbps_str)
                # assert rbp_id in name2ids_dic, "provided --rbps ID %s not in internal motif database. Please provide RBP name present in database" %(rbp_id)
                loaded_rbps_dic[rbp_id] = motif_db_str

    # Motif IDs for search.
    loaded_motif_ids_dic = {}
    for rbp_id in loaded_rbps_dic:
        for motif_id in name2ids_dic[rbp_id]:
            loaded_motif_ids_dic[motif_id] = motif_db_str

    """
    Check and load provided USER data.

    """

    if user_motifs:
        print("--rbps USER selected. Check + load provided USER motifs ... ")
        assert args.user_rbp_id, "--rbps USER demands --user-rbp-id to be set to connect the supplied motif(s) with an RBP ID"
        assert args.user_meme_xml or args.user_cm, "--rbps USER requires a provided sequence or structure motif file (via --user-meme-xml AND/OR --user-cm)"

        # Reformat user_rbp_id. 
        user_rbp_id = benchlib.remove_special_chars_from_str(args.user_rbp_id)
        assert user_rbp_id, "empty string after removing special chars from --user-rbp-id. Please provide alphanumeric string for RBP ID (- or _ are okay as well)"

        assert user_rbp_id not in loaded_rbps_dic, "user RBP ID %s already selected from database. Please deselect respective database RBP ID or provide unique user RBP ID via --user-rbp-id" %(user_rbp_id)
        loaded_rbps_dic[user_rbp_id] = "user"
        # In case user_rbp_id in database, reset motif IDs associated to user_rbp_id.
        name2ids_dic[user_rbp_id] = []
        print("RBP ID for user-supplied motifs:", user_rbp_id)

        user_seq_motif_blocks_dic = {}
        if args.user_meme_xml:
            assert os.path.exists(args.user_meme_xml), "--user-meme-xml file \"%s\" not found" % (args.user_meme_xml)
            user_seq_motif_blocks_dic = benchlib.read_in_xml_motifs(args.user_meme_xml, empty_check=False)
            assert user_seq_motif_blocks_dic, "no motifs read in from provided --user-meme-xml. Make sure to supply sequence motifs in MEME XML format!"
            # Check if motif ID already loaded.
            for acc_id in user_seq_motif_blocks_dic:
                assert acc_id not in loaded_motif_ids_dic, "user-supplied MEME XML motif ID %s already selected, i.e., ID is already associated with selected database RBP %s. Please change user motif ID to a unique motif ID or deselect the respective RBP" %(acc_id, id2name_dic[acc_id])
                loaded_motif_ids_dic[acc_id] = "user"
                # Add user to database blocks (overwrite if same ID encountered).
                seq_motif_blocks_dic[acc_id] = user_seq_motif_blocks_dic[acc_id]
                name2ids_dic[user_rbp_id].append(acc_id)
                id2type_dic[acc_id] = "meme_xml"
                id2name_dic[acc_id] = user_rbp_id

        user_str_motif_blocks_dic = {}
        if args.user_cm:
            assert os.path.exists(args.user_cm), "--user-cm file \"%s\" not found" % (args.user_cm)
            # Check for valid format.
            acc_ids_dic = benchlib.check_cm_file(args.user_cm, cmstat_tmp_out, empty_check=False)
            # Read in covariance model blocks.
            user_str_motif_blocks_dic = benchlib.read_in_cm_blocks(args.user_cm)
            for acc_id in acc_ids_dic:
                assert acc_id in user_str_motif_blocks_dic, "accession ID %s not in blocks dictionary. Please contact developers!" %(acc_id)
            # Check if motif ID already loaded.
            for acc_id in user_str_motif_blocks_dic:
                assert acc_id not in loaded_motif_ids_dic, "user-supplied covariance model accession (ACC) ID %s already selected, i.e., ID is already associated with selected database RBP %s. Please change to a unique accession ID or deselect the respective RBP" %(acc_id, id2name_dic[acc_id])
                loaded_motif_ids_dic[acc_id] = "user"
                # Add user to database blocks (overwrite if same ID encountered).
                str_motif_blocks_dic[acc_id] = user_str_motif_blocks_dic[acc_id]
                name2ids_dic[user_rbp_id].append(acc_id)
                id2type_dic[acc_id] = "cm"
                id2name_dic[acc_id] = user_rbp_id


    """
    Get sequence motif lengths.

    """

    id2len_dic = benchlib.get_seq_motif_lengths(seq_motif_blocks_dic)

    """
    Optionally filter DREME/MEME sequence motifs by length.

    """

    if args.motif_min_len or args.motif_max_len:

        if args.motif_min_len and args.motif_max_len:
            assert args.motif_min_len <= args.motif_max_len, "set --motif-min-len needs to be <= --motif-max-len!"

        print("Filtering sequence motifs by set min/max lengths ... ")

        seq_motif_blocks_dic, loaded_rbps_dic, loaded_motif_ids_dic, name2ids_dic, c_flt_out = benchlib.filter_dic_by_motif_lengths(
                seq_motif_blocks_dic, str_motif_blocks_dic, loaded_rbps_dic, loaded_motif_ids_dic, id2name_dic,
                id2len_dic, motif_min_len=args.motif_min_len, motif_max_len=args.motif_max_len
            )

        assert loaded_rbps_dic, "no MEME/DREME sequence motifs left after length filtering. Please adjust length filter range (--motif-min-len, --motif-max-len), RBP selection, or disable length filtering!"

        print("Filtered out %d sequence motifs outside set length range" %(c_flt_out))


    """
    Check if loaded RBP IDs have motifs.

    """
    for rbp_id in loaded_rbps_dic:
        for motif_id in name2ids_dic[rbp_id]:
            found = 0
            if motif_id in seq_motif_blocks_dic:
                found += 1
            if motif_id in str_motif_blocks_dic:
                found += 1
            assert found, "no motifs loaded for RBP ID \"%s\". Please provide the respective motifs file" %(rbp_id)


    """
    If --regex is set:
    Treat regex as sequence motif / fimo type.
    rbp_id: regex, motif_id: regex, motif_db: regex
    
    """

    regex_id = args.regex_id
    regex = args.regex
    
    if args.regex:

        if regex_type == "sequence":

            # Remove , ; from given regex, to avoid motif_id format conflicts.
            regex = benchlib.remove_special_chars_from_str(args.regex,
                                                        reg_ex="[ ;]",
                                                        to_upper=False)  # [ :;\(\)]
            
            assert regex, "empty string after removing special chars ( ;) from --regex. Please provide a valid regex with DNA letters"

            # Convert IUPAC codes (if present) in regex to standard regex format.
            regex = benchlib.convert_iupac_in_regex(regex)

        elif regex_type == "structure":

            # Check structure pattern and format.
            regex = benchlib.check_format_str_pattern(args.regex)

        args.regex = regex

        regex_id = benchlib.remove_special_chars_from_str(args.regex_id)

        assert regex_id, "empty string after removing special chars from --regex-id. Please provide alphanumeric string for regex ID (- or _ are okay as well)"
        assert regex_id not in name2ids_dic, "--regex set but a different RBP ID with name \"%s\" was found. Please provide a different RBP ID or --regex-id" %(regex_id)

        args.regex_id = regex_id

        if args.motif_regex_id:
            assert regex_id not in id2type_dic, "--regex set but a different motif ID with name \"%s\" was found. Please provide a different motif ID or --regex-id" %(regex_id)

            id2name_dic[regex_id] = regex_id    # motif_id -> rbp_id
            id2type_dic[regex_id] = "regex"  # motif_id -> motif type string; motif type string can be regex, cm, meme_xml
            loaded_motif_ids_dic[regex_id] = regex_id  # motif_id -> motif_db_str
            name2ids_dic[regex_id] = [regex_id]  # rbp_id -> motif_ids

        else:
            assert regex not in id2type_dic, "--regex set but a different motif ID with name \"%s\" was found. Please provide a different motif ID or use --motif-regex-id" %(regex_id)

            id2name_dic[regex] = regex_id    # motif_id -> rbp_id
            id2type_dic[regex] = "regex"  # motif_id -> motif type string; motif type string can be regex, cm, meme_xml
            loaded_motif_ids_dic[regex] = regex_id  # motif_id -> motif_db_str
            name2ids_dic[regex_id] = [regex]  # rbp_id -> motif_ids

        loaded_rbps_dic[regex_id] = regex_id  # rbp_id -> motif_db_str

    else:
        regex_id = False

    """
    Get function ID -> function descriptions mapping.

    """
    fid2desc_file = db_path + "/rbp_function_list.32728246.tsv"
    fid2desc_dic, desc2fid_dic = benchlib.get_fid2desc_mapping(fid2desc_file)
    

    """
    If --motifs sets, filter loaded_rbps_dic + loaded_motif_ids_dic.
    
    """

    if args.motifs_list:

        print("Filtering loaded motifs by provided --motifs ... ")

        motif_fids_dic = {}

        for motif_id in args.motifs_list:
            motif_fids_dic[motif_id] = 1

        filtered_rbps_dic = {}
        filtered_motif_ids_dic = {}
        filtered_name2ids_dic = {}
        for motif_id in motif_fids_dic:
            assert motif_id in id2name_dic, "motif ID \"%s\" provided via --motifs not found in internal motif ID -> RBP ID mapping. Please provide valid motif IDs" %(motif_id)
            assert motif_id in loaded_motif_ids_dic, "motif ID \"%s\" provided via --motifs not found in loaded motifs (via --rbps). Please provide motif IDs that are part of loaded RBPs" %(motif_id)
            set_db_str = loaded_motif_ids_dic[motif_id]
            filtered_motif_ids_dic[motif_id] = set_db_str
            rbp_id = id2name_dic[motif_id]
            assert rbp_id in loaded_rbps_dic, "motif ID \"%s\" provided via --motifs not found in loaded RBPs (via --rbps). Please provide motif IDs that are part of loaded RBPs" %(motif_id)
            set_db_str = loaded_rbps_dic[rbp_id]
            filtered_rbps_dic[rbp_id] = set_db_str
            if rbp_id in filtered_name2ids_dic:
                filtered_name2ids_dic[rbp_id].append(motif_id)
            else:
                filtered_name2ids_dic[rbp_id] = [motif_id]

        if args.regex:
            filtered_rbps_dic[regex_id] = regex_id
            if args.motif_regex_id:
                filtered_motif_ids_dic[regex_id] = regex_id
                filtered_name2ids_dic[regex_id] = [regex_id]
            else:
                filtered_motif_ids_dic[regex] = regex_id
                filtered_name2ids_dic[regex_id] = [regex]

        c_loaded_rbps_pre = len(loaded_rbps_dic)
        c_loaded_rbps_post = len(filtered_rbps_dic)
        loaded_rbps_dic = filtered_rbps_dic

        print("# RBPs pre-filtering by --motifs: ", c_loaded_rbps_pre)
        print("# RBPs post-filtering by --motifs:", c_loaded_rbps_post)

        assert loaded_rbps_dic, "no remaining RBPs after filtering by provided motif IDs (via --motifs). Please provide compatible RBPs + motif IDs"

        c_loaded_motif_ids_pre = len(loaded_motif_ids_dic)
        c_loaded_motif_ids_post = len(filtered_motif_ids_dic)
        loaded_motif_ids_dic = filtered_motif_ids_dic

        print("# motif IDs pre-filtering by --motifs: ", c_loaded_motif_ids_pre)
        print("# motif IDs post-filtering by --motifs:", c_loaded_motif_ids_post)

        assert loaded_motif_ids_dic, "no remaining motifs after filtering by provided motif IDs (via --motifs). Please provide compatible RBPs + motif IDs"

        name2ids_dic = filtered_name2ids_dic

    """
    If --functions set, filter loaded_rbps_dic + loaded_motif_ids_dic.

    """
    rbp_fids_dic = {}
    if args.rbp_functions:

        print("Filtering loaded RBPs by provided function IDs ... ")

        for fid in args.rbp_functions:
            rbp_fids_dic[fid] = 1

        # Check if provided function IDs are valid.
        for fid in rbp_fids_dic:
            assert fid in fid2desc_dic, "function ID \"%s\" provided via --functions not found in internal function ID -> description mapping. Please provide valid function IDs (see rbpbench info for a detailed description)" %(fid)

        # Filter loaded_rbps_dic.
        filtered_rbps_dic = {}
        for rbp_id in loaded_rbps_dic:
            if rbp_id == regex_id:  # keep regex if set.
                filtered_rbps_dic[regex_id] = regex_id
                continue
            if rbp_id not in name2fids_dic:
                continue
            for fid in rbp_fids_dic:
                if fid in name2fids_dic[rbp_id]:
                    set_db_str = loaded_rbps_dic[rbp_id]
                    filtered_rbps_dic[rbp_id] = set_db_str
                    break

        c_loaded_rbps_pre = len(loaded_rbps_dic)
        c_loaded_rbps_post = len(filtered_rbps_dic)
        loaded_rbps_dic = filtered_rbps_dic

        print("# RBPs pre-filtering by functions: ", c_loaded_rbps_pre)
        print("# RBPs post-filtering by functions:", c_loaded_rbps_post)

        assert loaded_rbps_dic, "no remaining RBPs after filtering by provided function IDs. Please provide compatible RBPs + function IDs (see rbpbench info for annotated RBP functions)"

        # Filter loaded motif IDs.
        filtered_motif_ids_dic = {}
        for rbp_id in loaded_rbps_dic:
            if rbp_id == regex_id:
                if args.motif_regex_id:
                    filtered_motif_ids_dic[regex_id] = regex_id
                else:
                    filtered_motif_ids_dic[regex] = regex_id
            else:
                for motif_id in name2ids_dic[rbp_id]:
                    set_db_str = loaded_motif_ids_dic[motif_id]
                    filtered_motif_ids_dic[motif_id] = set_db_str
        
        c_loaded_motif_ids_pre = len(loaded_motif_ids_dic)
        c_loaded_motif_ids_post = len(filtered_motif_ids_dic)
        loaded_motif_ids_dic = filtered_motif_ids_dic

        print("# motif IDs pre-filtering by functions: ", c_loaded_motif_ids_pre)
        print("# motif IDs post-filtering by functions:", c_loaded_motif_ids_post)


    """
    Load RBP data, store in RBP() class.

    """

    # Store motif IDs for search.
    search_rbps_dic = {}
    seq_rbps_dic = {}
    str_rbps_dic = {}
    motif_id2idx_dic = {} # motif ID -> list index.
    args.internal_id = []

    for rbp_id in loaded_rbps_dic:
    
        internal_id = base64.urlsafe_b64encode(os.urandom(6)).decode()
        args.internal_id.append(internal_id)
        rbp = benchlib.RBP(rbp_id, internal_id)

        for motif_id in name2ids_dic[rbp_id]:
        
            assert motif_id in loaded_motif_ids_dic, "motif_id %s not in loaded_motif_ids_dic" %(motif_id)

            if id2type_dic[motif_id] == "meme_xml":
                rbp.seq_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.seq_motif_ids) - 1
                rbp.seq_motif_hits.append(0)
                seq_rbps_dic[rbp_id] = 1
            elif id2type_dic[motif_id] == "regex":
                rbp.seq_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.seq_motif_ids) - 1
                rbp.seq_motif_hits.append(0)
                seq_rbps_dic[rbp_id] = 1
            elif id2type_dic[motif_id] == "cm":
                rbp.str_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.str_motif_ids) - 1
                rbp.str_motif_hits.append(0)
                str_rbps_dic[rbp_id] = 1
            else:
                assert False, "unknown motif type (\"%s\") set for motif_id %s" %(id2type_dic[motif_id], motif_id)

        search_rbps_dic[rbp_id] = rbp


    print("# of RBP IDs for search:    ", len(loaded_rbps_dic))
    print("# of motif IDs for search:  ", len(loaded_motif_ids_dic))

    """
    Get chromosome IDs from --genome.
    """
    print("Get --genome FASTA headers ... ")
    chr_ids_dic = benchlib.get_fasta_headers(args.in_genome)

    """
    Guess chromosome ID style.

    chr_style:
        1: chr1, chr2, ..., chrX, chrM
        2: 1, 2, ... , X, MT

    """
    print("Guess chromosome ID style (based on --genome FASTA headers) ... ")
    chr_style = benchlib.guess_chr_id_style(chr_ids_dic)


    """
    Read in gene infos from --gtf.

    """

    # reg_annot_table_file = args.out_folder + "/" + "region_annotations.tsv"

    print("Read in gene features from --gtf ... ")
    tr2gid_dic = {}
    tr_types_dic = {}  # Store transcript biotypes in GTF file.
    gid2gio_dic = benchlib.gtf_read_in_gene_infos(args.in_gtf,
                                                    tr2gid_dic=tr2gid_dic,
                                                    tr_types_dic=tr_types_dic,
                                                    check_chr_ids_dic=chr_ids_dic,
                                                    chr_style=chr_style,
                                                    empty_check=False)
    assert gid2gio_dic, "no gene infos read in from --gtf. Please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE)"
    c_gene_infos = len(gid2gio_dic)
    print("# gene features read in from --gtf:", c_gene_infos)

    # Get most prominent transcripts or if --tr-list is set, read in transcript IDs.
    tr_ids_dic = {}
    if args.tr_list:
        assert os.path.exists(args.tr_list), "given --tr-list file \"%s\" not found" % (args.tr_list)
        tr_ids_dic = benchlib.read_ids_into_dic(args.tr_list,
                                                check_dic=False)
        assert tr_ids_dic, "no IDs read in from provided --tr-list file. Please provide a valid IDs file (one ID per row)"
        for tr_id in tr_ids_dic:
            assert tr_id in tr2gid_dic, "transcript ID \"%s\" from provided --tr-list file does not appear in --gtf file. Please provide compatible IDs + files" %(tr_id)
            tr_ids_dic[tr_id] = tr2gid_dic[tr_id]
        print("# of transcript IDs (read in from --tr-list): ", len(tr_ids_dic))
    else:
        # Get most prominent transcripts from gene infos.
        print("Select most prominent transcript (MPT) for each gene ... ")
        tr_ids_dic = benchlib.select_mpts_from_gene_infos(gid2gio_dic,
                                basic_tag=False,  # do not be strict (only_tsl=False too).
                                ensembl_canonical_tag=False,
                                prior_basic_tag=True,  # Prioritize basic tag transcript.
                                prior_mane_select=True,  # mane select if set trumps all.
                                prior_lncrna_primary_tag=True,
                                only_tsl=False)
        assert tr_ids_dic, "most prominent transcript selection from gene infos failed. Please contact developers"
        print("# of transcript IDs (most prominent transcripts): ", len(tr_ids_dic))

    # Check exon order (return True if minus strand exon 1 is most downstream, not most upstream, which is the correct way).
    print("Check minus-strand exon order in --gtf ... ")
    correct_min_ex_order = benchlib.gtf_check_exon_order(args.in_gtf)
    if correct_min_ex_order:
        print("Correct order encountered ... ")
    else:
        print("Reverse order encountered ... ")

    # Get transcript infos.
    print("Read in transcript infos from --gtf ... ")
    tid2tio_dic = benchlib.gtf_read_in_transcript_infos(args.in_gtf, 
                                                        tr_ids_dic=tr_ids_dic,
                                                        correct_min_ex_order=correct_min_ex_order,
                                                        chr_style=chr_style,
                                                        empty_check=False)

    assert tid2tio_dic, "no transcript infos read in from --gtf. Please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE)"

    # (in)sanity checks.
    for tr_id in tr_ids_dic:
        assert tr_id in tid2tio_dic, "transcript ID %s not in tid2tio_dic"
    for tr_id in tid2tio_dic:
        assert tr_id in tr_ids_dic, "transcript ID %s not in tr_ids_dic"

    c_tr_infos = len(tid2tio_dic)
    print("# transcript features read in from --gtf:", c_tr_infos)

    # If --mrna-only, only select mRNAs, which also triggers mRNA region occupancy plots generation.
    tid2regl_dic = {}
    if args.only_mrna:
        # Get mRNA region lengths (from transcript IDs with CDS feature).
        print("Get mRNA region lengths ... ")
        # tid2tio_dic contains infos for transcripts in tr_ids_dic (i.e. either MPT or --tr-list defined ones).
        tid2regl_dic = benchlib.get_mrna_region_lengths(tid2tio_dic)
        assert tid2regl_dic, "tid2regl_dic empty. If --tr-list was set, this means none of supplied transcript IDs contain a CDS. If --tr-list was not set, this means that none of the MPTs contain a CDS. In this case please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE) or contact developers"
        c_mrna_tids = len(tid2regl_dic)
        if args.tr_list:
            print("# mRNA transcripts (containing CDS) from --tr-list:", c_mrna_tids)
        else:
            print("# mRNA transcripts (containing CDS, out of MPT selected set):", c_mrna_tids)

        assert c_mrna_tids, "no mRNA transcripts (containing CDS) found in --gtf. Please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE). Alternatively, if --tr-list was given, make sure that the list contains mRNA transcripts, or do not set --mrna-only"

        # Output mRNA regions (5'UTR CDS 3'UTR) to BED.
        print("Output mRNA regions to BED ... ")
        benchlib.output_mrna_regions_to_bed(tid2regl_dic, mrna_regions_bed)
    
        # Assign mRNA transcripts as transcripts to extract sequences for.
        tr_ids_dic = tid2regl_dic
        
    # Get transcript sequences.
    print("Extract transcript sequences ... ")
    tr_seqs_dic = benchlib.get_transcript_sequences_from_gtf(tid2tio_dic, args.in_genome,
                                                             tr_ids_dic=tr_ids_dic,
                                                             tmp_out_folder=args.out_folder)

    # (in)sanity check lengths.
    if args.only_mrna:
        for tr_id in tid2regl_dic:
            total_len = tid2regl_dic[tr_id][0] + tid2regl_dic[tr_id][1] + tid2regl_dic[tr_id][2]
            assert total_len == len(tr_seqs_dic[tr_id]), "mRNA region lengths do not match extracted sequence lengths. Please contact developers"

    # Init mRNA region position counts for plotting.
    norm_mrna_reg_dic = {}
    mrna_reg_occ_dic = {}

    if args.only_mrna:

        utr5_len_norm, cds_len_norm, utr3_len_norm, norm_mode = benchlib.get_mrna_reg_norm_len(
                                                    tid2regl_dic,
                                                    mrna_norm_mode=args.mrna_norm_mode)

        if args.mrna_norm_mode == 1:
            print("Median lengths of mRNA regions:")
        elif args.mrna_norm_mode == 2:
            print("Mean lengths of mRNA regions:")

        print("5'UTR = ", utr5_len_norm)
        print("CDS   = ", cds_len_norm)
        print("3'UTR = ", utr3_len_norm)

        norm_mrna_reg_dic = {"5'UTR": utr5_len_norm, "CDS": cds_len_norm, "3'UTR": utr3_len_norm, "mode": norm_mode, "c_mrna_seqs": c_mrna_tids}

        for rbp_id, rbp in search_rbps_dic.items():

            mrna_reg_occ_dic[rbp_id] = {}
            # List of length utr5_len_median with 0s.
            mrna_reg_occ_dic[rbp_id]["5'UTR"] = [0] * round(utr5_len_norm)
            mrna_reg_occ_dic[rbp_id]["CDS"] = [0] * round(cds_len_norm)
            mrna_reg_occ_dic[rbp_id]["3'UTR"] = [0] * round(utr3_len_norm)

            for motif_id in rbp.seq_motif_ids:
                mrna_reg_occ_dic[motif_id] = {}
                mrna_reg_occ_dic[motif_id]["5'UTR"] = [0] * round(utr5_len_norm)
                mrna_reg_occ_dic[motif_id]["CDS"] = [0] * round(cds_len_norm)
                mrna_reg_occ_dic[motif_id]["3'UTR"] = [0] * round(utr3_len_norm)

            for motif_id in rbp.str_motif_ids:
                mrna_reg_occ_dic[motif_id] = {}
                mrna_reg_occ_dic[motif_id]["5'UTR"] = [0] * round(utr5_len_norm)
                mrna_reg_occ_dic[motif_id]["CDS"] = [0] * round(cds_len_norm)
                mrna_reg_occ_dic[motif_id]["3'UTR"] = [0] * round(utr3_len_norm)


    # Output sequences to FASTA.
    print("Output transcript sequences to FASTA ... ")
    benchlib.fasta_output_dic(tr_seqs_dic, tr_seqs_fa,
                              split=True)

    # # mRNA sequences for search.
    c_regions = len(tr_seqs_dic)

    # Called region size.
    called_reg_size = 0
    len_list = []
    for seq_id in tr_seqs_dic:
        seq_len = len(tr_seqs_dic[seq_id])
        called_reg_size += seq_len
        len_list.append(seq_len)

    eff_reg_size = called_reg_size

    # Length statistics.
    reg_len_median = statistics.median(len_list)
    reg_len_mean = statistics.mean(len_list)
    reg_len_mean = round(reg_len_mean, 2)
    reg_len_min = min(len_list)
    reg_len_max = max(len_list)


    """
    ====================================
    RUN SEQUENCE MOTIF SEARCH WITH FIMO.
    ====================================
    
    """
    fimo_hits_list = []
    call_dic = {}

    if seq_rbps_dic:

        """
        Print motifs to file.

        """

        print("Output motifs to XML ... ")
        out_str, c_added_motifs = benchlib.blocks_to_xml_string(seq_motif_blocks_dic, loaded_motif_ids_dic)

        benchlib.output_string_to_file(out_str, seq_motifs_xml)


        """
        Run FIMO on sequences + motifs.

        """

        print("Run FIMO ... ")
        benchlib.run_fast_fimo(tr_seqs_fa, seq_motifs_xml, fimo_res_tsv,
                    pval_thr=args.fimo_pval,
                    nt_freqs_file=fimo_freqs_file,
                    call_dic=call_dic,
                    params=fimo_params,
                    error_check=False)

        """
        Read in FIMO hits.

        """

        assert os.path.exists(fimo_res_tsv), "FIMO output file fimo.tsv %s does not exist! There must have been an error while running FIMO (invalid xml file provided?)" %(fimo_res_tsv)

        print("Read in FIMO results ... ")
        fimo_hits_list = benchlib.read_in_fimo_results(fimo_res_tsv,
                                                       only_best_hits=args.greatest_hits,
                                                       seq_based=True)

        c_fimo_hits = len(fimo_hits_list)
        print("# of FIMO motif hits:", c_fimo_hits)

        """
        If --regex is set, search for regex hits in sequences (stored in out_seqs_dic).

        """
        if args.regex:

            step_size_one = False
            if args.regex_search_mode == 1:
                step_size_one = True
            elif args.regex_search_mode == 2:
                step_size_one = False
            else:
                assert False, "invalid --regex-search-mode %i set" %(args.regex_search_mode)

            print("Run search for --regex \"%s\" ... " %(regex))
            regex_hits_list = benchlib.get_regex_hits(regex, regex_id, tr_seqs_dic,
                                                      step_size_one=step_size_one,
                                                      seq_based=True,
                                                      regex_type=regex_type,
                                                      regex_spacer_min=args.regex_spacer_min,
                                                      regex_spacer_max=args.regex_spacer_max,
                                                      regex_min_gc=args.regex_min_gc,
                                                      regex_max_gu=args.regex_max_gu,
                                                      use_motif_regex_id=args.motif_regex_id)

            c_regex_hits = len(regex_hits_list)
            print("# of regex hits:", c_regex_hits)

            # Add regex hits to fimo_hits_list.
            fimo_hits_list += regex_hits_list


    """
    =========================================
    RUN STRUCTURE MOTIF SEARCH WITH CMSEARCH.
    =========================================

    """
    cmsearch_hits_list = []

    if str_rbps_dic:
        
        print("Output covariance models to .cm ... ")
        benchlib.output_cm_blocks_to_file(str_motif_blocks_dic, loaded_motif_ids_dic, str_motifs_cm)

        # Run cmsearch.
        print("Run cmsearch ... ")
        cmsh_mode = ""
        if args.cmsearch_mode == 1:
            cmsh_mode = "--default"
        elif args.cmsearch_mode == 2:
            cmsh_mode = "--max"
        else:
            assert False, "invalid --cmsearch-mode %i set" %(args.cmsearch_mode)
        cmsh_params = "-g --tformat fasta --toponly --incT %s -T %s %s" %(args.cmsearch_bs, args.cmsearch_bs, cmsh_mode)

        benchlib.run_cmsearch(tr_seqs_fa, str_motifs_cm, cmsearch_res_txt,
                        error_check=False,
                        call_dic=call_dic,
                        params=cmsh_params) # or add --anytrunc and remove --g
        # Read in hits.
        print("Read in cmsearch results ... ")
        cmsearch_hits_list, c_cms_hits = benchlib.read_in_cmsearch_results(cmsearch_res_txt,
                                                                           seq_based=True,
                                                                           only_best_hits=args.greatest_hits,
                                                                           check=True)

        print("# of cmsearch motif hits:", c_cms_hits)


    """
    Store for each RBP the regions with motif hits (and hit counts), using
    dictionary of dictionaries regions_with_motifs_dic.
    This tells us, how many input regions have motif hits, separated by RBP.
    Also store for each RBP the unique motif hit regions (and hit counts), using
    dictionary of dictionaries unique_motifs_dic.

    regions_with_motifs_dic:
        Dictionary of dictionaries, format:
        {rbp_id1 -> {'region1': motif_c_region1, 'region2': motif_c_region2}, rbp_id2 -> {'region1': motif_c_region1}}
    unique_motifs_dic:
        Dictionary of dictionaries, format:
        {rbp_id1 -> {'motif_region1': c_motif_region1, 'motif_region2': c_motif_region2}, rbp_id2 -> .. }

    """

    regions_with_motifs_dic = {}
    unique_motifs_dic = {}

    # Store regions with sequence motifs.
    for fh in fimo_hits_list:

        rbp_id = id2name_dic[fh.motif_id]

        if rbp_id in regions_with_motifs_dic:
            # fh.seq_name : FASTA header (== --in genomic sequence region).
            if fh.seq_name in regions_with_motifs_dic[rbp_id]:
                regions_with_motifs_dic[rbp_id][fh.seq_name] += 1
            else:
                regions_with_motifs_dic[rbp_id][fh.seq_name] = 1
        else:
            regions_with_motifs_dic[rbp_id] = {}
            regions_with_motifs_dic[rbp_id][fh.seq_name] = 1

        fh_str = repr(fh) # genomic motif region string.

        # Unique motif regions for each RBP.
        if rbp_id in unique_motifs_dic:
            if fh_str in unique_motifs_dic[rbp_id]:
                unique_motifs_dic[rbp_id][fh_str] += 1
            else:
                unique_motifs_dic[rbp_id][fh_str] = 1
        else:
            unique_motifs_dic[rbp_id] = {}
            unique_motifs_dic[rbp_id][fh_str] = 1

    # Store regions with structure motifs.
    for cmsh in cmsearch_hits_list:

        rbp_id = id2name_dic[cmsh.motif_id]

        if rbp_id in regions_with_motifs_dic:
            # cmsh.seq_name : FASTA header (== --in genomic sequence region).
            if cmsh.seq_name in regions_with_motifs_dic[rbp_id]:
                regions_with_motifs_dic[rbp_id][cmsh.seq_name] += 1
            else:
                regions_with_motifs_dic[rbp_id][cmsh.seq_name] = 1
        else:
            regions_with_motifs_dic[rbp_id] = {}
            regions_with_motifs_dic[rbp_id][cmsh.seq_name] = 1

        cmsh_str = repr(cmsh) # genomic motif region string.

        # Unique motif regions for each RBP.
        if rbp_id in unique_motifs_dic:
            if cmsh_str in unique_motifs_dic[rbp_id]:
                unique_motifs_dic[rbp_id][cmsh_str] += 1
            else:
                unique_motifs_dic[rbp_id][cmsh_str] = 1
        else:
            unique_motifs_dic[rbp_id] = {}
            unique_motifs_dic[rbp_id][cmsh_str] = 1

    """
    Store infos for each RBP in RBP class.

    search_rbps_dic[rbp_id] = rbp_class
    RBP class arguments:
            name: str,
            seq_motif_ids = None,
            str_motif_ids = None,
            c_hit_reg = 0, # # regions with motif hits.
            perc_hit_reg = 0.0, # % hit regions over all regions (i.e. how many input regions contain >= 1 RBP motif).
            c_motif_hits = 0, # # motif hits.
            c_uniq_motif_hits = 0, # # unique motif hits.
            c_uniq_motif_nts = 0, # # unique motif nucleotides.
            perc_uniq_motif_nts_eff_reg = 0.0, # % unique motif nts over effective region length.
            perc_uniq_motif_nts_cal_reg = 0.0, # % unique motif nts over called region length.
            uniq_motif_hits_eff_1000nt = 0.0, # unique motif hits per effective 1000 nt.
            uniq_motif_hits_cal_1000nt = 0.0, # unique motif hits per called 1000 nt.
            ks_pval = 1.0, # Kolmogorov-Smirnov (KS) statistic p-value (are higher scoring sites enriched with motifs).
            ks_stat = 0.0,
            organism: Optional[str] = None

    Number of sequences for FIMO / cmsearch: 
    c_regions

    """
    # Set number of no-hit regions.
    for rbp_id in search_rbps_dic:
        search_rbps_dic[rbp_id].c_no_hit_reg = c_regions

    for rbp_id in regions_with_motifs_dic:
        # Number of --in regions with RBP motif hits.
        c_hit_reg = len(regions_with_motifs_dic[rbp_id])

        # Number of motif hits on --in regions in total.
        c_motif_hits = 0
        for reg_id in regions_with_motifs_dic[rbp_id]:
            c_motif_hits += regions_with_motifs_dic[rbp_id][reg_id]
        search_rbps_dic[rbp_id].c_hit_reg = c_hit_reg
        search_rbps_dic[rbp_id].c_no_hit_reg = c_regions - c_hit_reg
        search_rbps_dic[rbp_id].c_motif_hits = c_motif_hits

        # % hit regions over all regions (i.e. how many input regions contain >= 1 RBP motif).
        search_rbps_dic[rbp_id].perc_hit_reg = (search_rbps_dic[rbp_id].c_hit_reg / c_regions) * 100

    """
    Get unique motif hits.

    unique_motifs_dic:
        Dictionary of dictionaries, format:
        {rbp_id1 -> {'motif_region1': c_motif_region1, 'motif_region2': c_motif_region2}, rbp_id2 -> .. }
    """

    for rbp_id in unique_motifs_dic:
        c_uniq_motif_hits = len(unique_motifs_dic[rbp_id])
        search_rbps_dic[rbp_id].c_uniq_motif_hits = c_uniq_motif_hits
        # Store individual motif unique hits.
        for motif_str_repr in unique_motifs_dic[rbp_id]:
            motif_id = benchlib.get_motif_id_from_str_repr(motif_str_repr)
            idx = motif_id2idx_dic[motif_id]
            if id2type_dic[motif_id] == "meme_xml":
                search_rbps_dic[rbp_id].seq_motif_hits[idx] += 1
            elif id2type_dic[motif_id] == "regex":
                search_rbps_dic[rbp_id].seq_motif_hits[idx] += 1
            elif id2type_dic[motif_id] == "cm":
                search_rbps_dic[rbp_id].str_motif_hits[idx] += 1
            else:
                assert False, "unknown motif type (\"%s\") set for motif_id %s" %(id2type_dic[motif_id], motif_id)

    """
    Number of motif nucleotides over called + effective region size.

    """

    print("Calculate effective motif region sizes for each RBP ... ")
    for rbp_id in unique_motifs_dic:
        # Ouput unique motif hit regions (sequence or structure) to BED for RBP rbp_id.
        benchlib.output_motif_hits_to_bed(rbp_id, unique_motifs_dic, out_tmp_bed,
                                          one_based_start=True)
        # Calculate effective motif region size.
        eff_motif_reg_size = benchlib.get_uniq_gen_size(out_tmp_bed)

        # Number of unique motif nucleotides.
        search_rbps_dic[rbp_id].c_uniq_motif_nts = eff_motif_reg_size
        # % unique motif nts over effective region length.
        search_rbps_dic[rbp_id].perc_uniq_motif_nts_eff_reg = (eff_motif_reg_size / eff_reg_size) * 100
        # % unique motif nts over called region length.
        search_rbps_dic[rbp_id].perc_uniq_motif_nts_cal_reg = (eff_motif_reg_size / called_reg_size) * 100
        # Number of unique motif hits per effective 1000 nt.
        search_rbps_dic[rbp_id].uniq_motif_hits_eff_1000nt  = search_rbps_dic[rbp_id].c_uniq_motif_hits / (eff_reg_size / 1000)
        # Number of unique motif hits per called 1000 nt.
        search_rbps_dic[rbp_id].uniq_motif_hits_cal_1000nt  = search_rbps_dic[rbp_id].c_uniq_motif_hits / (called_reg_size / 1000)

    """
    # Print RBP object stats.
    for rbp_id in search_rbps_dic:
        print(search_rbps_dic[rbp_id].__dict__)

    """

    # print(search_rbps_dic["AGGF1"].__dict__)
    print("# --in regions for motif search:", c_regions)
    print("Called genomic region size:     ", called_reg_size)
    print("Effective genomic region size:  ", eff_reg_size)


    """
    Output RBP hit stats (ie one row per RBP).

    Output clowns:
    rbp_id
    c_regions
    called_reg_size
    effective_reg_size
    c_reg_with_hits
    perc_reg_with_hits
    c_motif_hits
    c_uniq_motif_hits
    c_uniq_motif_nts
    perc_uniq_motif_nts_cal_reg
    perc_uniq_motif_nts_eff_reg
    uniq_motif_hits_cal_1000nt
    uniq_motif_hits_eff_1000nt
    wc_pval
    seq_motif_ids
    seq_motif_hits
    str_motif_ids
    str_motif_hits

    """

    rbp_list = []

    OUTSTATS = open(rbp_stats_out, "w")
    rbp_stats_header = "data_id\tmethod_id\trun_id\tmotif_db\trbp_id\tc_regions\tmean_reg_len\tmedian_reg_len\tmin_reg_len\tmax_reg_len\t"
    rbp_stats_header += "called_reg_size\teffective_reg_size\tc_reg_with_hits\tperc_reg_with_hits\t"
    rbp_stats_header += "c_motif_hits\tc_uniq_motif_hits\tc_uniq_motif_nts\tperc_uniq_motif_nts_cal_reg\tperc_uniq_motif_nts_eff_reg\tuniq_motif_hits_cal_1000nt\t"
    rbp_stats_header += "uniq_motif_hits_eff_1000nt\twc_pval\twc_rbc_eff_size\twc_cl_eff_size\tseq_motif_ids\tseq_motif_hits\tstr_motif_ids\tstr_motif_hits\tinternal_id\n"
    OUTSTATS.write(rbp_stats_header)

    for rbp_id in search_rbps_dic:

        # print(search_rbps_dic[rbp_id].__dict__)
        rbp_list.append(rbp_id)

        motif_db_out = loaded_rbps_dic[rbp_id]

        c_reg_with_hits = search_rbps_dic[rbp_id].c_hit_reg
        perc_reg_with_hits = search_rbps_dic[rbp_id].perc_hit_reg
        c_motif_hits = search_rbps_dic[rbp_id].c_motif_hits
        c_uniq_motif_hits = search_rbps_dic[rbp_id].c_uniq_motif_hits
        c_uniq_motif_nts = search_rbps_dic[rbp_id].c_uniq_motif_nts
        perc_uniq_motif_nts_cal_reg = search_rbps_dic[rbp_id].perc_uniq_motif_nts_cal_reg
        perc_uniq_motif_nts_eff_reg = search_rbps_dic[rbp_id].perc_uniq_motif_nts_eff_reg
        uniq_motif_hits_cal_1000nt = search_rbps_dic[rbp_id].uniq_motif_hits_cal_1000nt
        uniq_motif_hits_eff_1000nt = search_rbps_dic[rbp_id].uniq_motif_hits_eff_1000nt
        wc_pval = "-"
        wc_rbc_es = "-"
        wc_cl_es = "-"
        internal_id = search_rbps_dic[rbp_id].internal_id

        seq_motif_hits = ",".join(str(hc) for hc in search_rbps_dic[rbp_id].seq_motif_hits)
        str_motif_hits = ",".join(str(hc) for hc in search_rbps_dic[rbp_id].str_motif_hits)
        seq_motif_ids = ",".join(search_rbps_dic[rbp_id].seq_motif_ids)
        str_motif_ids = ",".join(search_rbps_dic[rbp_id].str_motif_ids)
        if not seq_motif_hits:
            seq_motif_hits = "-"
        if not str_motif_hits:
            str_motif_hits = "-"
        if not seq_motif_ids:
            seq_motif_ids = "-"
        if not str_motif_ids:
            str_motif_ids = "-"

        row_str = args.data_id + "\t"
        row_str += args.method_id + "\t"
        row_str += run_id + "\t"
        row_str += motif_db_out + "\t"
        row_str += rbp_id + "\t"

        row_str += str(c_regions) + "\t"

        row_str += str(reg_len_mean) + "\t"
        row_str += str(reg_len_median) + "\t"
        row_str += str(reg_len_min) + "\t"
        row_str += str(reg_len_max) + "\t"

        row_str += str(called_reg_size) + "\t"
        row_str += str(eff_reg_size) + "\t"

        row_str += str(c_reg_with_hits) + "\t"
        row_str += str(perc_reg_with_hits) + "\t"

        row_str += str(c_motif_hits) + "\t"
        row_str += str(c_uniq_motif_hits) + "\t"
        row_str += str(c_uniq_motif_nts) + "\t"
        row_str += str(perc_uniq_motif_nts_cal_reg) + "\t"
        row_str += str(perc_uniq_motif_nts_eff_reg) + "\t"
        row_str += str(uniq_motif_hits_cal_1000nt) + "\t"
        row_str += str(uniq_motif_hits_eff_1000nt) + "\t"
        row_str += wc_pval + "\t"
        row_str += wc_rbc_es + "\t"
        row_str += wc_cl_es + "\t"
        row_str += seq_motif_ids + "\t"
        row_str += seq_motif_hits + "\t"
        row_str += str_motif_ids + "\t"
        row_str += str_motif_hits + "\t"
        row_str += internal_id + "\n"

        OUTSTATS.write(row_str)

    OUTSTATS.close()


    """
    Output motif region stats (1 row for each motif hit).
    Report ALL motif hits,
    plus report how many times one genomic motif hit occurs (uniq_count). 

    """

    tr_ids_with_hits_dic = {}
    # RBP ID to transcript ID dictionary. Effectively stores which transcript IDs have motif hits for each RBP.
    rbp2tr_id_dic = {}
    for rbp_id in loaded_rbps_dic:
        rbp2tr_id_dic[rbp_id] = {}

    motif_hit2annot_dic = {}

    OUTSTATS = open(motif_stats_out,"w")

    motif_stats_header = "data_id\tmethod_id\trun_id\tmotif_db\tregion_id\trbp_id\tmotif_id\tchr_id\tgen_s\tgen_e\tstrand\tregion_s\tregion_e\tregion_len\t"
    motif_stats_header += "uniq_count\tfimo_score\tfimo_pval\tcms_score\tcms_eval\tmatched_seq\tinternal_id\n"
    OUTSTATS.write(motif_stats_header)

    # Unique motif regions BED.
    motif_reg_dic = {}
    match_c_dic = {}

    for rbp_id in search_rbps_dic:
        match_c_dic[rbp_id] = {}
        for motif_id in search_rbps_dic[rbp_id].seq_motif_ids:
            match_c_dic[rbp_id][motif_id] = {}
        for motif_id in search_rbps_dic[rbp_id].str_motif_ids:
            match_c_dic[rbp_id][motif_id] = {}

    for fh in fimo_hits_list:

        rbp_id = id2name_dic[fh.motif_id]
        region_id = fh.seq_name
        region_len = len(tr_seqs_dic[region_id])
        # genomic motif region string.
        fh_str = repr(fh)
        uniq_count = unique_motifs_dic[rbp_id][fh_str]

        motif_db_out = loaded_motif_ids_dic[fh.motif_id]

        internal_id = search_rbps_dic[rbp_id].internal_id

        # Motif hit ID.
        hit_id = "%s:%s-%s(%s)%s" %(fh.chr_id, str(fh.start), str(fh.end), fh.strand, fh.motif_id)

        tr_id = fh.chr_id

        annot = False

        if args.only_mrna:

            # Center position of motif hit.
            motif_hit_s = fh.start - 1
            motif_hit_e = fh.end
            
            center_pos = benchlib.get_center_position(motif_hit_s, motif_hit_e)  # 1-based center position.

            utr5_len = tid2regl_dic[tr_id][0]
            cds_len = tid2regl_dic[tr_id][1]
            utr3_len = tid2regl_dic[tr_id][2]

            utr5_e = utr5_len
            cds_s = utr5_e
            cds_e = cds_s + cds_len
            utr3_s = cds_e
            utr3_e = utr3_s + utr3_len

            occ_pos_seen_dic = {}  # Check if a position already received a count.

            annot = ["intergenic", False]

            if center_pos <= utr5_e:  # In 5'UTR.

                occ_pos = round((center_pos / utr5_len) * utr5_len_norm)
                if occ_pos > 0:
                    occ_pos -= 1

                occ_pos_str = "utr5;%i" %(occ_pos)
                if occ_pos_str not in occ_pos_seen_dic:
                    mrna_reg_occ_dic[rbp_id]["5'UTR"][occ_pos] += 1
                    mrna_reg_occ_dic[fh.motif_id]["5'UTR"][occ_pos] += 1
                    occ_pos_seen_dic[occ_pos_str] = 1

                # mrna_reg_occ_dic[rbp_id]["5'UTR"][occ_pos] += 1
                # mrna_reg_occ_dic[fh.motif_id]["5'UTR"][occ_pos] += 1

                annot = ["5'UTR", tr_id]

            elif center_pos > utr5_e and center_pos <= cds_e:  # In CDS.

                occ_pos = round(((center_pos - utr5_e) / cds_len) * cds_len_norm)
                if occ_pos > 0:
                    occ_pos -= 1

                occ_pos_str = "cds;%i" %(occ_pos)
                if occ_pos_str not in occ_pos_seen_dic:
                    mrna_reg_occ_dic[rbp_id]["CDS"][occ_pos] += 1
                    mrna_reg_occ_dic[fh.motif_id]["CDS"][occ_pos] += 1
                    occ_pos_seen_dic[occ_pos_str] = 1

                # mrna_reg_occ_dic[rbp_id]["CDS"][occ_pos] += 1
                # mrna_reg_occ_dic[fh.motif_id]["CDS"][occ_pos] += 1

                annot = ["CDS", tr_id]
            
            elif center_pos > cds_e and center_pos <= utr3_e:  # In 3'UTR.

                occ_pos = round(((center_pos - cds_e) / utr3_len) * utr3_len_norm)
                if occ_pos > 0:
                    occ_pos -= 1

                occ_pos_str = "utr3;%i" %(occ_pos)
                if occ_pos_str not in occ_pos_seen_dic:
                    mrna_reg_occ_dic[rbp_id]["3'UTR"][occ_pos] += 1
                    mrna_reg_occ_dic[fh.motif_id]["3'UTR"][occ_pos] += 1
                    occ_pos_seen_dic[occ_pos_str] = 1

                # mrna_reg_occ_dic[rbp_id]["3'UTR"][occ_pos] += 1
                # mrna_reg_occ_dic[fh.motif_id]["3'UTR"][occ_pos] += 1

                annot = ["3'UTR", tr_id]

            else:
                assert False, "center position %i not in 5'UTR, CDS or 3'UTR of transcript ID %s (transcript length: %i)" %(center_pos, tr_id, len(tr_seqs_dic[tr_id]))

            motif_hit2annot_dic[hit_id] = annot

        # Store transcript IDs with motif hits.
        if args.goa_rna_region == 1:
            if tr_id in tr_ids_with_hits_dic:
                tr_ids_with_hits_dic[tr_id] += 1
            else:
                tr_ids_with_hits_dic[tr_id] = 1
            if tr_id in rbp2tr_id_dic[rbp_id]:
                rbp2tr_id_dic[rbp_id][tr_id] += 1
            else:
                rbp2tr_id_dic[rbp_id][tr_id] = 1
            
        elif args.goa_rna_region == 2:
            if annot[0] == "3'UTR":
                if tr_id in tr_ids_with_hits_dic:
                    tr_ids_with_hits_dic[tr_id] += 1
                else:
                    tr_ids_with_hits_dic[tr_id] = 1
                if tr_id in rbp2tr_id_dic[rbp_id]:
                    rbp2tr_id_dic[rbp_id][tr_id] += 1
                else:
                    rbp2tr_id_dic[rbp_id][tr_id] = 1

        elif args.goa_rna_region == 3:
            if annot[0] == "CDS":
                if tr_id in tr_ids_with_hits_dic:
                    tr_ids_with_hits_dic[tr_id] += 1
                else:
                    tr_ids_with_hits_dic[tr_id] = 1
                if tr_id in rbp2tr_id_dic[rbp_id]:
                    rbp2tr_id_dic[rbp_id][tr_id] += 1
                else:
                    rbp2tr_id_dic[rbp_id][tr_id] = 1

        elif args.goa_rna_region == 4:
            if annot[0] == "5'UTR":
                if tr_id in tr_ids_with_hits_dic:
                    tr_ids_with_hits_dic[tr_id] += 1
                else:
                    tr_ids_with_hits_dic[tr_id] = 1
                if tr_id in rbp2tr_id_dic[rbp_id]:
                    rbp2tr_id_dic[rbp_id][tr_id] += 1
                else:
                    rbp2tr_id_dic[rbp_id][tr_id] = 1

        # Get matched sequence.
        matched_seq = benchlib.get_matched_seq(fh.seq_name, tr_seqs_dic, fh.seq_s, fh.seq_e)
        if not matched_seq:
            matched_seq = "-"

        # Store motif hit as BED.
        if hit_id not in motif_reg_dic:
            annot_str = "-"
            if annot:
                annot_str = annot[0]
            if fh.hit_type == "str_pat":
                bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t-1.0\t-1.0\t%s\t%s\t-\t%s\t%s" %(fh.chr_id, fh.start-1, fh.end, rbp_id, fh.motif_id, uniq_count, args.method_id, args.data_id, fh.strand, str(fh.gc_frac), str(fh.gu_frac), annot_str, matched_seq)
            else:
                bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t%s\t%s\t-1.0\t-1.0\t-\t%s\t%s" %(fh.chr_id, fh.start-1, fh.end, rbp_id, fh.motif_id, uniq_count, args.method_id, args.data_id, fh.strand, str(fh.score), str(fh.pval), annot_str, matched_seq)
            # bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t%s\t%s\t-1.0\t-1.0\t%s\t%s" %(fh.chr_id, fh.start-1, fh.end, rbp_id, fh.motif_id, uniq_count, args.method_id, args.data_id, fh.strand, str(fh.score), str(fh.pval), annot_str, matched_seq)
            motif_reg_dic[hit_id] = bed_row
            if rbp_id not in match_c_dic:
                match_c_dic[rbp_id] = {}
            if fh.motif_id not in match_c_dic[rbp_id]:
                match_c_dic[rbp_id][fh.motif_id] = {}
            if matched_seq not in match_c_dic[rbp_id][fh.motif_id]:
                match_c_dic[rbp_id][fh.motif_id][matched_seq] = 0
            match_c_dic[rbp_id][fh.motif_id][matched_seq] += 1

        row_str = args.data_id + "\t"
        row_str += args.method_id + "\t"
        row_str += run_id + "\t"
        row_str += motif_db_out + "\t"
        row_str += region_id + "\t"
        row_str += rbp_id + "\t"
        row_str += fh.motif_id + "\t"
        row_str += fh.chr_id + "\t"
        row_str += str(fh.start) + "\t"  # 1-based.
        row_str += str(fh.end) + "\t"
        row_str += fh.strand + "\t"
        row_str += str(fh.seq_s) + "\t"  # 1-based.
        row_str += str(fh.seq_e) + "\t"
        row_str += str(region_len) + "\t"
        row_str += str(uniq_count) + "\t"
        row_str += str(fh.score) + "\t"
        row_str += str(fh.pval) + "\t"
        row_str += "-\t"
        row_str += "-\t"
        row_str += matched_seq + "\t"
        row_str += internal_id + "\n"

        OUTSTATS.write(row_str)

    for cmsh in cmsearch_hits_list:

        rbp_id = id2name_dic[cmsh.motif_id]
        region_id = cmsh.seq_name
        region_len = len(tr_seqs_dic[region_id])
        # genomic motif region string.
        cmsh_str = repr(cmsh) 
        uniq_count = unique_motifs_dic[rbp_id][cmsh_str]

        motif_db_out = loaded_motif_ids_dic[cmsh.motif_id]

        internal_id = search_rbps_dic[rbp_id].internal_id

        # Motif hit ID.
        hit_id = "%s:%s-%s(%s)%s" %(cmsh.chr_id, str(cmsh.start), str(cmsh.end), cmsh.strand, cmsh.motif_id)

        tr_id = fh.chr_id

        annot = False

        if args.only_mrna:

            # Center position of motif hit.
            motif_hit_s = cmsh.start - 1
            motif_hit_e = cmsh.end
            center_pos = benchlib.get_center_position(motif_hit_s, motif_hit_e)  # 1-based center position.

            utr5_len = tid2regl_dic[tr_id][0]
            cds_len = tid2regl_dic[tr_id][1]
            utr3_len = tid2regl_dic[tr_id][2]

            utr5_e = utr5_len
            cds_s = utr5_e
            cds_e = cds_s + cds_len
            utr3_s = cds_e
            utr3_e = utr3_s + utr3_len

            annot = ["intergenic", False]

            if center_pos <= utr5_e:  # In 5'UTR.

                occ_pos = round((center_pos / utr5_len) * utr5_len_norm)
                mrna_reg_occ_dic[rbp_id]["5'UTR"][occ_pos] += 1
                mrna_reg_occ_dic[cmsh.motif_id]["5'UTR"][occ_pos] += 1

                annot = ["5'UTR", tr_id]

            elif center_pos > utr5_e and center_pos <= cds_e:  # In CDS.

                occ_pos = round(((center_pos - utr5_e) / cds_len) * cds_len_norm)
                mrna_reg_occ_dic[rbp_id]["CDS"][occ_pos] += 1
                mrna_reg_occ_dic[cmsh.motif_id]["CDS"][occ_pos] += 1
            
                annot = ["CDS", tr_id]

            elif center_pos > cds_e and center_pos <= utr3_e:  # In 3'UTR.

                occ_pos = round(((center_pos - cds_e) / utr3_len) * utr3_len_norm)
                mrna_reg_occ_dic[rbp_id]["3'UTR"][occ_pos] += 1
                mrna_reg_occ_dic[cmsh.motif_id]["3'UTR"][occ_pos] += 1

                annot = ["3'UTR", tr_id]

            else:
                assert False, "center position %i not in 5'UTR, CDS or 3'UTR of transcript ID %s (transcript length: %i)" %(center_pos, tr_id, len(tr_seqs_dic[tr_id]))

            motif_hit2annot_dic[hit_id] = annot

        # Store transcript IDs with motif hits.
        if args.goa_rna_region == 1:
            if tr_id in tr_ids_with_hits_dic:
                tr_ids_with_hits_dic[tr_id] += 1
            else:
                tr_ids_with_hits_dic[tr_id] = 1
            if tr_id in rbp2tr_id_dic[rbp_id]:
                rbp2tr_id_dic[rbp_id][tr_id] += 1
            else:
                rbp2tr_id_dic[rbp_id][tr_id] = 1
            
        elif args.goa_rna_region == 2:
            if annot[0] == "3'UTR":
                if tr_id in tr_ids_with_hits_dic:
                    tr_ids_with_hits_dic[tr_id] += 1
                else:
                    tr_ids_with_hits_dic[tr_id] = 1
                if tr_id in rbp2tr_id_dic[rbp_id]:
                    rbp2tr_id_dic[rbp_id][tr_id] += 1
                else:
                    rbp2tr_id_dic[rbp_id][tr_id] = 1

        elif args.goa_rna_region == 3:
            if annot[0] == "CDS":
                if tr_id in tr_ids_with_hits_dic:
                    tr_ids_with_hits_dic[tr_id] += 1
                else:
                    tr_ids_with_hits_dic[tr_id] = 1
                if tr_id in rbp2tr_id_dic[rbp_id]:
                    rbp2tr_id_dic[rbp_id][tr_id] += 1
                else:
                    rbp2tr_id_dic[rbp_id][tr_id] = 1

        elif args.goa_rna_region == 4:
            if annot[0] == "5'UTR":
                if tr_id in tr_ids_with_hits_dic:
                    tr_ids_with_hits_dic[tr_id] += 1
                else:
                    tr_ids_with_hits_dic[tr_id] = 1
                if tr_id in rbp2tr_id_dic[rbp_id]:
                    rbp2tr_id_dic[rbp_id][tr_id] += 1
                else:
                    rbp2tr_id_dic[rbp_id][tr_id] = 1

        # Get matched sequence.
        matched_seq = benchlib.get_matched_seq(cmsh.seq_name, tr_seqs_dic, cmsh.seq_s, cmsh.seq_e)
        if not matched_seq:
            matched_seq = "-"

        # Store motif hit as BED.
        if hit_id not in motif_reg_dic:
            annot_str = "-"
            if annot:
                annot_str = annot[0]
            bed_row = "%s\t%i\t%i\t%s:%s;%i;%s:%s\t0\t%s\t-1.0\t-1.0\t%s\t%s\t%s\t%s" %(cmsh.chr_id, cmsh.start-1, cmsh.end, rbp_id, cmsh.motif_id, uniq_count, args.method_id, args.data_id, cmsh.strand, str(cmsh.score), str(cmsh.e_value), annot_str, matched_seq)
            motif_reg_dic[hit_id] = bed_row
            if rbp_id not in match_c_dic:
                match_c_dic[rbp_id] = {}
            if cmsh.motif_id not in match_c_dic[rbp_id]:
                match_c_dic[rbp_id][cmsh.motif_id] = {}
            if matched_seq not in match_c_dic[rbp_id][cmsh.motif_id]:
                match_c_dic[rbp_id][cmsh.motif_id][matched_seq] = 0
            match_c_dic[rbp_id][cmsh.motif_id][matched_seq] += 1

        row_str = args.data_id + "\t"
        row_str += args.method_id + "\t"
        row_str += run_id + "\t"
        row_str += motif_db_out + "\t"
        row_str += region_id + "\t"
        row_str += rbp_id + "\t"
        row_str += cmsh.motif_id + "\t"
        row_str += cmsh.chr_id + "\t"
        row_str += str(cmsh.start) + "\t"
        row_str += str(cmsh.end) + "\t"
        row_str += cmsh.strand + "\t"
        row_str += str(cmsh.seq_s) + "\t"
        row_str += str(cmsh.seq_e) + "\t"
        row_str += str(region_len) + "\t"
        row_str += str(uniq_count) + "\t"
        row_str += "-\t"
        row_str += "-\t"
        row_str += str(cmsh.score) + "\t"
        row_str += str(cmsh.e_value) + "\t"
        row_str += matched_seq + "\t"
        row_str += internal_id + "\n"
        #print("region_id:", region_id)
        #print("evalue:", cmsh.e_value)
        #print(row_str)

        OUTSTATS.write(row_str)

    OUTSTATS.close()

    """
    Output motif hits as BED.

    The motif hits written to motif_hits_bed_out are unique motif hits already. 
    If same hit occurs > 1, this is recorded in BED column 4 with format:
    rbp_id:motif_id;uniq_count;method_id:data_id
    These also include regex hits, as they are part of fimo hits.
    
    """

    OUTBED = open(motif_hits_bed_out, "w")
    for hit_id in motif_reg_dic:
        OUTBED.write("%s\n" %(motif_reg_dic[hit_id]))
    OUTBED.close()


    """
    Output matched sequence counts in table. 

    Formats: 
    match_c_total_dic[rbp_id][motif_id] = count
    match_c_dic[rbp_id][motif_id][matched_seq] = count

    """

    OUTTSV = open(matched_seqs_out, "w")
    OUTTSV.write("rbp_id\tmotif_id\tmatched_seq\tmatch_count\tmatch_perc\n")

    match_c_total_dic = benchlib.get_match_c_total_dic(match_c_dic)

    for rbp_id in match_c_dic:
        for motif_id in match_c_dic[rbp_id]:
            total_c = match_c_total_dic[rbp_id][motif_id]
            for matched_seq in match_c_dic[rbp_id][motif_id]:
                match_c = match_c_dic[rbp_id][motif_id][matched_seq]
                match_perc = 0.0
                if match_c > 0:
                    match_perc = (float(match_c)/float(total_c)) * 100.0
                row_str = "%s\t%s\t%s\t%i\t%.2f\n" %(rbp_id, motif_id, matched_seq, match_c, match_perc)
                OUTTSV.write(row_str)
    OUTTSV.close()


    """
    GO enrichment analysis.

    """

    # Further filter tr_ids_with_hits_dic such that only transcript IDs with hits from all specified RBPs are selected for GOA.
    if args.goa_only_cooc:
        print("Filter transcripts by only keeping ones with hits from all specified RBPs ...")
        print("# of transcripts with hits before filtering:", len(tr_ids_with_hits_dic))

        assert len(search_rbps_dic) == len(loaded_rbps_dic), "number of search RBPs (%i) does not match number of loaded RBPs (%i)" %(len(search_rbps_dic), len(loaded_rbps_dic))

        # Expected count.
        exp_count = len(loaded_rbps_dic)
        tids_to_remove = []

        for tid in tr_ids_with_hits_dic:
            tid_count = 0
            for rbp_id in rbp2tr_id_dic:
                if tid in rbp2tr_id_dic[rbp_id]:
                    tid_count += 1
            if tid_count < exp_count:
                tids_to_remove.append(tid)
        
        for tid in tids_to_remove:
            tr_ids_with_hits_dic.pop(tid)

        print("# of transcripts with hits after filtering: ", len(tr_ids_with_hits_dic))


    goa_results_df = False
    goa_stats_dic = {}
    propagate_counts = True

    if args.run_goa_tr:

        # Write empty file.
        open(goa_results_tsv, "w").close()

        print("")
        print("GOA enabled (--goa) ... ")

        target_genes_dic = {}
        background_genes_dic = {}

        # Get background gene IDs.
        for gene_id in gid2gio_dic:
            background_genes_dic[gene_id] = gid2gio_dic[gene_id].gene_name

        # Get target gene IDs.
        for tid in tr_ids_with_hits_dic:
            if tid in tr2gid_dic:
                gene_id = tr2gid_dic[tid]
                target_genes_dic[gene_id] = 1

        goa_stats_dic["c_target_genes_pre_filter"] = len(target_genes_dic)
        goa_stats_dic["c_background_genes_pre_filter"] = len(background_genes_dic)
        goa_stats_dic["pval_thr"] = args.goa_pval
        goa_stats_dic["goa_obo_mode"] = args.goa_obo_mode
        goa_stats_dic["propagate_counts"] = propagate_counts
        goa_stats_dic["excluded_terms"] = "-"
        goa_stats_dic["goa_only_cooc"] = args.goa_only_cooc
        goa_stats_dic["goa_max_child"] = args.goa_max_child
        goa_stats_dic["goa_min_depth"] = args.goa_min_depth
        goa_stats_dic["goa_filter_purified"] = args.goa_filter_purified

        if tr_ids_with_hits_dic:  # If there are motif hits in transcript regions.

            if args.goa_rna_region == 1:
                print("Hits on transcripts found. Running GOA on genes of transcripts with hits ... ")
            elif args.goa_rna_region == 2:
                print("Hits in 3'UTR regions found. Running GOA on genes of transcripts with 3'UTR hits ... ")
            elif args.goa_rna_region == 3:
                print("Hits in CDS regions found. Running GOA on genes of transcripts with CDS hits ... ")
            elif args.goa_rna_region == 4:
                print("Hits in 5'UTR regions found. Running GOA on genes of transcripts with 5'UTR hits ... ")

            local_gid2go_file = benchlib_path + "/content/ensembl_gene_id2go_ids.biomart.GRCh38.112.tsv.gz"
            local_obo_file = benchlib_path + "/content/go-basic.obo.gz"

            assert os.path.exists(local_gid2go_file), "local gene ID to GO ID file \"%s\" not found" %(local_gid2go_file)
            assert os.path.exists(local_obo_file), "local GO OBO file \"%s\" not found" %(local_obo_file)

            gid2go_file = local_gid2go_file
            if args.goa_gene2go_file:
                gid2go_file = args.goa_gene2go_file
                assert os.path.exists(gid2go_file), "provided --goa-gene2go-file \"%s\" not found" %(gid2go_file)
            goa_obo_file = local_obo_file
            if args.goa_obo_file and args.goa_obo_mode == 3:
                goa_obo_file = args.goa_obo_file
                assert os.path.exists(goa_obo_file), "provided --goa-obo-file \"%s\" not found" %(goa_obo_file)

            # Run GOA.
            goa_results_df = benchlib.run_go_analysis(target_genes_dic, background_genes_dic, 
                                                      gid2go_file, args.out_folder,
                                                      pval_thr=args.goa_pval,
                                                      excluded_terms = [],  # do not exclude any GO terms.
                                                      goa_obo_mode=args.goa_obo_mode,
                                                      propagate_counts=propagate_counts,
                                                      stats_dic=goa_stats_dic,
                                                      store_gene_names=True,
                                                      goa_obo_file=goa_obo_file)

            print("# of enriched (i.e., with significantly higher concentration) GO terms: %i" %(goa_stats_dic["c_sig_go_terms_e"]))
            print("# of purified (i.e., with significantly lower concentration) GO terms:  %i" %(goa_stats_dic["c_sig_go_terms_p"]))

            goa_results_df.to_csv(goa_results_tsv, sep="\t", index=False)
            print("")

        else:
            print("No transcripts with motif hits. Skipping GOA ... ")


    """
    Motif annotations (mRNA regions only).

    """

    rbp2motif2annot2c_dic = {}  # rbp_id -> motif_id -> annot -> count

    for motif_hit in motif_hit2annot_dic:

        # motif_hit format: "chr1:10-15(+)motif_id". Get motif_id.
        # motif_id = motif_hit.split(")")[1]  # Should work since motif_id/regex cannot contain ")".
        motif_id = benchlib.get_motif_id_from_hit_str(motif_hit)
        assert motif_id, "motif_id extraction failed for motif_hit string \"%s\"" %(motif_hit)
        # motif_id = motif_hit.split(")")[1]
        annot = motif_hit2annot_dic[motif_hit][0]
        # tr_id = motif_hit2annot_dic[motif_hit][1]
        rbp_id = id2name_dic[motif_id]
        if rbp_id not in rbp2motif2annot2c_dic:
            rbp2motif2annot2c_dic[rbp_id] = {}
        if motif_id not in rbp2motif2annot2c_dic[rbp_id]:
            rbp2motif2annot2c_dic[rbp_id][motif_id] = {}
        if annot not in rbp2motif2annot2c_dic[rbp_id][motif_id]:
            rbp2motif2annot2c_dic[rbp_id][motif_id][annot] = 1
        else:
            rbp2motif2annot2c_dic[rbp_id][motif_id][annot] += 1

    """
    Get annotation to color dictionary, which is needed for region annotation plots in HTML reports.

    """

    annot2color_dic = {}

    # annot_dic = {"5'UTR" : 0, "CDS" : 0, "3'UTR" : 0, "intergenic" : 0}
    annot_dic = {"3'UTR" : 0, "5'UTR" : 0, "CDS" : 0, "intron" : 0, "intergenic" : 0}

    if rbp2motif2annot2c_dic:

        if rbp2motif2annot2c_dic:
            for rbp_id in rbp2motif2annot2c_dic:
                for motif_id in rbp2motif2annot2c_dic[rbp_id]:
                    for annot in rbp2motif2annot2c_dic[rbp_id][motif_id]:
                        if annot not in annot_dic:
                            annot_dic[annot] = 1
                        else:
                            annot_dic[annot] += 1

        # hex_colors = get_hex_colors_list(min_len=len(annot_with_hits_dic))
        hex_colors = benchlib.get_hex_colors_list(min_len=len(annot_dic))

        idx = 0
        for annot in sorted(annot_dic, reverse=False):
            # hc = hex_colors[idx]
            # print("Assigning hex color %s to annotation %s ... " %(hc, annot))
            annot2color_dic[annot] = hex_colors[idx]
            idx += 1


    """
    Motif plots and motif hit statistics HTML.


    """
    
    args.bed_sc_thr = None
    args.c_regions = c_regions
    args.ext_up = None
    args.ext_down = None

    html_motif_plots_out = args.out_folder + "/" + "motif_plots.rbpbench_searchlongrna.html"
    if args.plot_abs_paths:
        html_motif_plots_out = os.path.abspath(args.out_folder) + "/" + "motif_plots.rbpbench_searchlongrna.html"
    # If HTML file already exists, remove it.
    if os.path.exists(html_motif_plots_out):
        os.remove(html_motif_plots_out)

    print("Generate motif plots HTML ... ")
    
    plots_subfolder = "html_motif_plots"
    benchlib_path = os.path.dirname(benchlib.__file__)

    benchlib.search_generate_html_motif_plots(args,
                                        search_rbps_dic, seq_motif_blocks_dic, 
                                        str_motif_blocks_dic,
                                        benchlib_path, loaded_motif_ids_dic,
                                        rbp2motif2annot2c_dic=rbp2motif2annot2c_dic,
                                        annot2color_dic=annot2color_dic,
                                        mrna_reg_occ_dic=mrna_reg_occ_dic,
                                        norm_mrna_reg_dic=norm_mrna_reg_dic,
                                        html_report_out=html_motif_plots_out,
                                        rbpbench_mode="searchlongrna",
                                        reg_seq_str="regions",
                                        goa_results_tsv="goa_results_on_motif_hit_transcripts.tsv",
                                        goa_results_df=goa_results_df,
                                        goa_stats_dic=goa_stats_dic,
                                        id2pids_dic=id2pids_dic,
                                        id2exp_dic=id2exp_dic,
                                        match_c_dic=match_c_dic,
                                        match_c_total_dic=match_c_total_dic,
                                        plots_subfolder=plots_subfolder)

    """
    Output parameter settings.

    """
    # Output mode settings.
    print("Output parameter settings ... ")
    SETOUT = open(settings_file, "w")
    for arg in vars(args):
        SETOUT.write("%s\t%s\n" %(arg, str(getattr(args, arg))))
    for call in call_dic:
        SETOUT.write("%s\t%s\n" %(call, call_dic[call]))
    SETOUT.close()

    """
    Take out the trash.

    """
    print("Delete .tmp files ... ")
    if os.path.exists(out_tmp_bed):
        os.remove(out_tmp_bed)
    if os.path.exists(cmstat_tmp_out):
        os.remove(cmstat_tmp_out)


    """
    Inform about outputs.

    """
    print("")
    print("OUTPUT FILES")
    print("============")
    print("")
    print("Run parameter settings:\n%s" %(settings_file))
    print("Transcript sequences .fa:\n%s" %(tr_seqs_fa))
    if args.only_mrna:
        print("mRNA regions .bed:\n%s" %(mrna_regions_bed))
    print("Motif hits .bed:\n%s" %(motif_hits_bed_out))
    print("Matched sequence stats .tsv:\n%s" %(matched_seqs_out))
    # print("RBP region occupancies .tsv:\n%s" %(rbp_reg_occ_table_out))
    print("RBP hit stats .tsv:\n%s" %(rbp_stats_out))
    print("Motif hit stats .tsv:\n%s" %(motif_stats_out))
    if args.run_goa_tr:
        print("Motif hit GO enrichment analysis results .tsv:\n%s" %(goa_results_tsv))
    # if reg_annot_table_file is not None:
    #     print("Region annotations .tsv:\n%s" %(reg_annot_table_file))
    print("Motif plots and hit statistics .html:\n%s" %(html_motif_plots_out))
    # if args.report:
    #     print("Search report .html:\n%s" %(html_report_out))
    print("")


################################################################################

def main_enmo(args):
    """
    Check for enriched motifs.

    """

    print("Running for you in ENMO mode ... ")


    assert os.path.exists(args.in_sites), "--in file \"%s\" not found" % (args.in_sites)
    assert os.path.exists(args.in_genome), "--genome file \"%s\" not found" % (args.in_genome)
    assert os.path.exists(args.in_gtf), "--gtf file \"%s\" not found" % (args.in_gtf)

    # Check --in BED format.
    benchlib.bed_check_format(args.in_sites, param_str="--in")
    if args.bg_user_incl_bed:
        assert os.path.exists(args.bg_user_incl_bed), "--bg-incl-bed \"%s\" not found" % (args.bg_user_incl_bed)
        benchlib.bed_check_format(args.bg_user_incl_bed, param_str="--bg-incl-bed")
    if args.bg_mask_bed:
        assert os.path.exists(args.bg_mask_bed), "--bg-mask-bed \"%s\" not found" % (args.bg_mask_bed)
        benchlib.bed_check_format(args.bg_mask_bed, param_str="--bg-mask-bed")

    # Is MEME >= v5 installed?
    if not args.meme_disable_check:
        assert benchlib.is_tool("meme"), "meme not in PATH"
        check, meme_version = benchlib.check_tool_version("meme -version", "5.0")
        assert check, "RBPBench requires meme version >= 5.0 (installed version: %s)" %(meme_version)

    # Check if MEME version is >= 5.5.4 (need to add fimo --no-pgc option to produce same results!).
    fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text --no-qvalue"
    if not args.meme_disable_check:
        check, meme_version = benchlib.check_tool_version("meme -version", "5.5.4")
        args.meme_version = meme_version
        if check:
            fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text --no-qvalue --no-pgc"
    if args.meme_no_pgc:
        fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text --no-qvalue --no-pgc"

    # Regex check.
    regex_type = "sequence"
    if args.regex:
        if args.regex_type == 1:
            print("Check given --regex type ... ")
            if benchlib.looks_like_structure(args.regex):
                print("Given --regex looks like structure pattern ... ")
                regex_type = "structure"
            else:
                print("Given --regex assumed to be standard regex ... ")
                regex_type = "sequence"
        elif args.regex_type == 2:
            regex_type = "sequence"
        elif args.regex_type == 3:
            regex_type = "structure"
        else:
            assert False, "unexpected --regex-type value set (%d)" %(args.regex_type)
        # If standard regex, check validity.
        if regex_type == "sequence":
            assert benchlib.is_valid_regex(args.regex), "given --regex \"%s\" is not a valid regular expression. Please provide valid expression" % (args.regex)

    # Fixed seed number?
    import random
    if args.random_seed is not None:
        random.seed(args.random_seed)

    # Limit inputs.
    assert benchlib.boundary_check(args.bg_shuff_factor, 1, 20), "set --bg-shuff-factor expected to be >= 1 and <= 20"
    assert benchlib.boundary_check(args.bg_shuff_k, 1, 5), "set --bg-shuff-k expected to be >= 1 and <= 5"
    assert benchlib.boundary_check(args.bg_min_size, 1, 200000), "set --bg-min-size expected to be >= 1 and <= 200000"
    if args.motif_sim_thr is not None:
        assert benchlib.boundary_check(args.motif_sim_thr, 0.0, 1000), "set --motif-sim-thr expected to be >= 0 and <= 1000"
    assert benchlib.boundary_check(args.motif_sim_cap, 0.5, 500), "set --motif-sim-cap expected to be >= 0.5 and <= 500"

    """
    Library path.
    """
    benchlib_path = os.path.dirname(benchlib.__file__)
    db_path = benchlib_path + "/content"

    """
    FIMO nt frequencies.

    """
    fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt.txt"
    if args.fimo_ntf_mode == 2:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt_with_introns.txt"
    elif args.fimo_ntf_mode == 3:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.uniform.txt"

    if args.fimo_user_ntf_file:
        fimo_freqs_file = args.fimo_user_ntf_file
    assert os.path.exists(fimo_freqs_file), "set FIMO nucleotide frequencies file \"%s\" not found" % (fimo_freqs_file)

    """
    ENCODE blacklist.

    https://github.com/Boyle-Lab/Blacklist/
    hg38-blacklist.v2.bed 
    """
    blacklist_bed = db_path + "/hg38-blacklist.v2.6col.bed"

    """
    Motif database.
    """
    seq_motifs_db_file, str_motifs_db_file, rbp2ids_file, motif_db_str = specify_motif_db(args.motif_db, 
                                                                            db_path=db_path)

    motif_pair2sim_dic = {}
    print("Read in motif similarities ... ")
    tomtom_sim_file = get_local_tomtom_sim_file(args.motif_db, db_path=db_path)
    motif_pair2sim_dic = benchlib.read_in_tomtom_sim_results(tomtom_sim_file,
                                                             motif_sim_cap=args.motif_sim_cap)
    print("Read in %i motif similarities ... " %(len(motif_pair2sim_dic)))

    # Custom motif database: folder given.
    if args.custom_db:
        assert not args.custom_db_meme_xml, "--custom-db folder set incompatible with --custom-db-meme-xml. Provide custom motif database either via folder (--custom-db) or as single files"
        assert not args.custom_db_cm, "--custom-db folder set incompatible with --custom-db-cm. Provide custom motif database either via folder (--custom-db) or as single files"
        assert not args.custom_db_info, "--custom-db folder set incompatible with --custom-db-info. Provide custom motif database either via folder (--custom-db) or as single files"
        seq_motifs_db_file, str_motifs_db_file, rbp2ids_file = specify_custom_motif_db(args.custom_db)
        if os.path.exists(seq_motifs_db_file):
            print("Calculate motif similarities for supplied custom motif database ... ")
            motif_pair2sim_dic = benchlib.calc_tomtom_sim(seq_motifs_db_file, args.out_folder,
                                                          motif_sim_cap=args.motif_sim_cap)
        args.custom_db_id = benchlib.remove_special_chars_from_str(args.custom_db_id)
        assert args.custom_db_id, "empty string after removing special chars from --custom-db-id. Please provide alphanumeric string for custom motif database ID (- or _ are okay as well)"
        motif_db_str = args.custom_db_id
    # Custom motif database: single files given.
    if args.custom_db_meme_xml or args.custom_db_cm or args.custom_db_info:
        args.custom_db_id = benchlib.remove_special_chars_from_str(args.custom_db_id)
        assert args.custom_db_id, "empty string after removing special chars from --custom-db-id. Please provide alphanumeric string for custom motif database ID (- or _ are okay as well)"
        motif_db_str = args.custom_db_id
        assert not args.custom_db, "single custom motif database files provided not compatible with --custom-db. Provide custom motif database either via folder (--custom-db) or as single files"
        assert args.custom_db_info, "--custom-db-info needed to define custom motif database"
        rbp2ids_file = args.custom_db_info
        assert args.custom_db_meme_xml or args.custom_db_cm, "--custom-db-meme-xml and/or --custom-db-cm needed to define custom motif database"
        if args.custom_db_meme_xml:
            seq_motifs_db_file = args.custom_db_meme_xml
            if os.path.exists(seq_motifs_db_file):
                print("Calculate motif similarities for supplied custom motif database ... ")
                motif_pair2sim_dic = benchlib.calc_tomtom_sim(seq_motifs_db_file, args.out_folder,
                                                              motif_sim_cap=args.motif_sim_cap)
        else:
            seq_motifs_db_file = ""  # setting to empty string results in os.path.exists -> False.
        if args.custom_db_cm:
            str_motifs_db_file = args.custom_db_cm
        else:
            str_motifs_db_file = ""

    args.motif_db_str = motif_db_str

    """
    Get ID mappings

    name2ids_dic:
    RBP name -> motif IDs mapping
    id2type_dic:
    motif ID -> motif type (cm, meme_xml)
    id2org_dic (ignore for now):
    motif ID -> organism ID (so far only "human")

    """
    name2ids_dic, id2type_dic, name2gid_dic, name2fids_dic, id2pids_dic, id2exp_dic = benchlib.get_rbp_id_mappings(rbp2ids_file)

    # Motif ID -> RBP name (== RBP ID) mapping.
    id2name_dic = {}
    for rbp_id in name2ids_dic:
        for motif_id in name2ids_dic[rbp_id]:
            id2name_dic[motif_id] = rbp_id

    """
    Get MEME XML database motif blocks dictionary.

    """
    seq_motif_blocks_dic = {}
    if os.path.exists(seq_motifs_db_file):
        seq_motif_blocks_dic = benchlib.read_in_xml_motifs(seq_motifs_db_file,
                                                           empty_check=True)
    for motif_id in seq_motif_blocks_dic:
        assert motif_id in id2name_dic, "MEME XML motif ID \"%s\" not found in prior mapping. Please contact developers!" %(motif_id)

    motif2conseq_dic = {}
    if seq_motif_blocks_dic:
        print("Get consensus sequences for sequence motifs ... ")
        for motif_id in seq_motif_blocks_dic:
            consensus_seq = benchlib.get_consensus_motif_from_seq_block(seq_motif_blocks_dic[motif_id])
            motif2conseq_dic[motif_id] = consensus_seq

    """
    Get covariance model database motif blocks dictionary.

    """
    str_motif_blocks_dic = {}
    if str_motifs_db_file and os.path.exists(str_motifs_db_file):
        str_motif_blocks_dic = benchlib.read_in_cm_blocks(str_motifs_db_file, 
                                                          empty_check=True)
        for motif_id in str_motif_blocks_dic:
            assert motif_id in id2name_dic, "Covariance model accession ID \"%s\" not found in prior mapping. Please contact developers!" %(motif_id)

    """
    Output folders + files.

    """
    if not os.path.exists(args.out_folder):
        os.makedirs(args.out_folder)

    f1_sites_bed = args.out_folder + "/in_sites.filtered.tmp.bed"
    filtered_sites_bed = args.out_folder + "/in_sites.filtered.bed"
    filtered_sites_fa = args.out_folder + "/in_sites.filtered.fa"
    seq_motifs_xml = args.out_folder + "/seq_motifs.xml"
    str_motifs_cm = args.out_folder + "/str_motifs.cm"
    fimo_res_tsv = args.out_folder + "/fimo_results.tsv"
    cmsearch_res_txt = args.out_folder + "/cmsearch_results.txt"

    chr_len_file = args.out_folder + "/" + "reference_lengths.out"
    mrna_regions_bed = args.out_folder + "/mrna_regions.bed"

    tr_seqs_fa = args.out_folder + "/transcript_sequences.fa"
    bg_excl_bed = args.out_folder + "/background_excluded_regions.bed"
    bg_incl_bed = args.out_folder + "/background_included_regions.bed"
    bg_shuffle_in_bed = args.out_folder + "/" + "background_shuffle_in.bed"

    # con_res_out_tsv = args.out_folder + "/contingency_table_results.tsv"
    settings_file = args.out_folder + "/settings.rbpbench_enmo.out"

    # Motif enrichment stats table.
    enmo_stats_out = args.out_folder + "/motif_enrichment_stats.tsv"

    # Motif co-ooccurrence stats table.
    cooc_stats_out = args.out_folder + "/motif_cooc_stats.tsv"

    # Temp files.
    # random_id = uuid.uuid1()
    # tmp_out_bed = args.out_folder + "/" + str(random_id) + ".filtered_in.bed"
    cmstat_tmp_out = args.out_folder + "/cmstat_out.tmp.txt"

    # Delete if existing folder.
    if os.path.exists(fimo_res_tsv):
        os.remove(fimo_res_tsv)
    if os.path.exists(cmsearch_res_txt):
        os.remove(cmsearch_res_txt)


    """
    Load RBP data based on --rbps (+ optionally USER data).

    """

    rbp_in_dic = {}
    for rbp_id in args.list_rbps:
        rbp_in_dic[rbp_id] = 1

    # RBPs for motif search.
    loaded_rbps_dic = {}

    # USER set?
    user_motifs = False
    user_rbp_id = False
    if "USER" in rbp_in_dic:
        user_motifs = True
    else:
        assert not args.user_meme_xml, "--user-meme-xml provided but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"
        assert not args.user_cm, "--user-cm provided but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"
        assert not args.user_rbp_id, "--user-rbp-id set but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"

    special_rbp_ids_list = ["USER", "REGEX"]

    # If ALL set, load all RBPs (+ optinally USER).
    if "ALL" in rbp_in_dic:
        if len(rbp_in_dic) == 2:
            assert user_motifs, "set --rbps ALL, --rbps ALL USER, or individual RBPs (+ optinally USER) --rbps RBP1 RBP2 ... USER"
        if len(rbp_in_dic) > 2:
            assert False, "set --rbps ALL, --rbps ALL USER, or individual RBPs (+ optinally USER) --rbps RBP1 RBP2 ... USER"
        print("--rbps ALL selected. Loading all database motifs ... ")
        for rbp_id in name2ids_dic:
            loaded_rbps_dic[rbp_id] = motif_db_str

    else:
        # Load individual RBPs.
        for rbp_id in rbp_in_dic:
            if rbp_id not in special_rbp_ids_list:
                """
                Check if RBP ID in database.
                Suggest similar RBPs based on string similarity (edit distance).

                """
                if rbp_id not in name2ids_dic:
                    db_rbp_list = []
                    for db_rbp_id in name2ids_dic:
                        db_rbp_list.append(db_rbp_id)
                    pair_dist_dic = benchlib.calc_edit_dist_query_list(rbp_id, db_rbp_list)
                    max_c = 10
                    c = 0
                    suggested_rbps = []
                    for key, value in sorted(pair_dist_dic.items(), key=lambda item: item[1], reverse=False):
                        if c >= max_c:
                            break
                        c += 1
                        suggested_rbps.append(key)
                    suggested_rbps_str = ",".join(suggested_rbps)
                    assert False, "provided --rbps ID %s not in internal motif database (%s). Please provide RBP name present in database. Did you mean (any of) the following database ID(s) (top 10 hits based on string similarity): %s ?" %(rbp_id, motif_db_str, suggested_rbps_str)
                # assert rbp_id in name2ids_dic, "provided --rbps ID %s not in internal motif database. Please provide RBP name present in database" %(rbp_id)
                loaded_rbps_dic[rbp_id] = motif_db_str

    # Motif IDs for search.
    loaded_motif_ids_dic = {}
    for rbp_id in loaded_rbps_dic:
        for motif_id in name2ids_dic[rbp_id]:
            loaded_motif_ids_dic[motif_id] = motif_db_str

    """
    Check and load provided USER data.

    """

    if user_motifs:
        print("--rbps USER selected. Check + load provided USER motifs ... ")
        assert args.user_rbp_id, "--rbps USER demands --user-rbp-id to be set to connect the supplied motif(s) with an RBP ID"
        assert args.user_meme_xml or args.user_cm, "--rbps USER requires a provided sequence or structure motif file (via --user-meme-xml AND/OR --user-cm)"

        # Reformat user_rbp_id. 
        user_rbp_id = benchlib.remove_special_chars_from_str(args.user_rbp_id)
        assert user_rbp_id, "empty string after removing special chars from --user-rbp-id. Please provide alphanumeric string for RBP ID (- or _ are okay as well)"

        assert user_rbp_id not in loaded_rbps_dic, "user RBP ID %s already selected from database. Please deselect respective database RBP ID or provide unique user RBP ID via --user-rbp-id" %(user_rbp_id)
        loaded_rbps_dic[user_rbp_id] = "user"
        # In case user_rbp_id in database, reset motif IDs associated to user_rbp_id.
        name2ids_dic[user_rbp_id] = []
        print("RBP ID for user-supplied motifs:", user_rbp_id)

        user_seq_motif_blocks_dic = {}
        if args.user_meme_xml:
            assert os.path.exists(args.user_meme_xml), "--user-meme-xml file \"%s\" not found" % (args.user_meme_xml)
            user_seq_motif_blocks_dic = benchlib.read_in_xml_motifs(args.user_meme_xml, empty_check=False)
            assert user_seq_motif_blocks_dic, "no motifs read in from provided --user-meme-xml. Make sure to supply sequence motifs in MEME XML format!"
            # Check if motif ID already loaded.
            for acc_id in user_seq_motif_blocks_dic:
                assert acc_id not in loaded_motif_ids_dic, "user-supplied MEME XML motif ID %s already selected, i.e., ID is already associated with selected database RBP %s. Please change user motif ID to a unique motif ID or deselect the respective RBP" %(acc_id, id2name_dic[acc_id])
                loaded_motif_ids_dic[acc_id] = "user"
                # Add user to database blocks (overwrite if same ID encountered).
                seq_motif_blocks_dic[acc_id] = user_seq_motif_blocks_dic[acc_id]
                name2ids_dic[user_rbp_id].append(acc_id)
                id2type_dic[acc_id] = "meme_xml"
                id2name_dic[acc_id] = user_rbp_id

        user_str_motif_blocks_dic = {}
        if args.user_cm:
            assert os.path.exists(args.user_cm), "--user-cm file \"%s\" not found" % (args.user_cm)
            # Check for valid format.
            acc_ids_dic = benchlib.check_cm_file(args.user_cm, cmstat_tmp_out, empty_check=False)
            # Read in covariance model blocks.
            user_str_motif_blocks_dic = benchlib.read_in_cm_blocks(args.user_cm)
            for acc_id in acc_ids_dic:
                assert acc_id in user_str_motif_blocks_dic, "accession ID %s not in blocks dictionary. Please contact developers!" %(acc_id)
            # Check if motif ID already loaded.
            for acc_id in user_str_motif_blocks_dic:
                assert acc_id not in loaded_motif_ids_dic, "user-supplied covariance model accession (ACC) ID %s already selected, i.e., ID is already associated with selected database RBP %s. Please change to a unique accession ID or deselect the respective RBP" %(acc_id, id2name_dic[acc_id])
                loaded_motif_ids_dic[acc_id] = "user"
                # Add user to database blocks (overwrite if same ID encountered).
                str_motif_blocks_dic[acc_id] = user_str_motif_blocks_dic[acc_id]
                name2ids_dic[user_rbp_id].append(acc_id)
                id2type_dic[acc_id] = "cm"
                id2name_dic[acc_id] = user_rbp_id


    """
    Get sequence motif lengths.

    """

    id2len_dic = benchlib.get_seq_motif_lengths(seq_motif_blocks_dic)

    """
    Optionally filter DREME/MEME sequence motifs by length.

    """

    if args.motif_min_len or args.motif_max_len:

        if args.motif_min_len and args.motif_max_len:
            assert args.motif_min_len <= args.motif_max_len, "set --motif-min-len needs to be <= --motif-max-len!"

        print("Filtering sequence motifs by set min/max lengths ... ")

        seq_motif_blocks_dic, loaded_rbps_dic, loaded_motif_ids_dic, name2ids_dic, c_flt_out = benchlib.filter_dic_by_motif_lengths(
                seq_motif_blocks_dic, str_motif_blocks_dic, loaded_rbps_dic, loaded_motif_ids_dic, id2name_dic,
                id2len_dic, motif_min_len=args.motif_min_len, motif_max_len=args.motif_max_len
            )

        assert loaded_rbps_dic, "no MEME/DREME sequence motifs left after length filtering. Please adjust length filter range (--motif-min-len, --motif-max-len), RBP selection, or disable length filtering!"

        print("Filtered out %d sequence motifs outside set length range" %(c_flt_out))


    """
    Check if loaded RBP IDs have motifs.

    """
    for rbp_id in loaded_rbps_dic:
        for motif_id in name2ids_dic[rbp_id]:
            found = 0
            if motif_id in seq_motif_blocks_dic:
                found += 1
            if motif_id in str_motif_blocks_dic:
                found += 1
            assert found, "no motifs loaded for RBP ID \"%s\". Please provide the respective motifs file" %(rbp_id)


    """
    If --regex is set:
    Treat regex as sequence motif / fimo type.
    rbp_id: regex, motif_id: regex, motif_db: regex

    """

    regex_id = args.regex_id
    regex = args.regex
    
    if args.regex:

        if regex_type == "sequence":

            # Remove , ; from given regex, to avoid motif_id format conflicts.
            regex = benchlib.remove_special_chars_from_str(args.regex,
                                                        reg_ex="[ ;]",
                                                        to_upper=False)  # [ :;\(\)]
            
            assert regex, "empty string after removing special chars ( ;) from --regex. Please provide a valid regex with DNA letters"

            # Convert IUPAC codes (if present) in regex to standard regex format.
            regex = benchlib.convert_iupac_in_regex(regex)

        elif regex_type == "structure":

            # Check structure pattern and format.
            regex = benchlib.check_format_str_pattern(args.regex)

        args.regex = regex

        regex_id = benchlib.remove_special_chars_from_str(args.regex_id)

        assert regex_id, "empty string after removing special chars from --regex-id. Please provide alphanumeric string for regex ID (- or _ are okay as well)"
        assert regex_id not in name2ids_dic, "--regex set but a different RBP ID with name \"%s\" was found. Please provide a different RBP ID or --regex-id" %(regex_id)

        args.regex_id = regex_id

        if args.motif_regex_id:
            assert regex_id not in id2type_dic, "--regex set but a different motif ID with name \"%s\" was found. Please provide a different motif ID or --regex-id" %(regex_id)

            id2name_dic[regex_id] = regex_id    # motif_id -> rbp_id
            id2type_dic[regex_id] = "regex"  # motif_id -> motif type string; motif type string can be regex, cm, meme_xml
            loaded_motif_ids_dic[regex_id] = regex_id  # motif_id -> motif_db_str
            name2ids_dic[regex_id] = [regex_id]  # rbp_id -> motif_ids

        else:
            assert regex not in id2type_dic, "--regex set but a different motif ID with name \"%s\" was found. Please provide a different motif ID or use --motif-regex-id" %(regex_id)

            id2name_dic[regex] = regex_id    # motif_id -> rbp_id
            id2type_dic[regex] = "regex"  # motif_id -> motif type string; motif type string can be regex, cm, meme_xml
            loaded_motif_ids_dic[regex] = regex_id  # motif_id -> motif_db_str
            name2ids_dic[regex_id] = [regex]  # rbp_id -> motif_ids

        loaded_rbps_dic[regex_id] = regex_id  # rbp_id -> motif_db_str

        """
        Dictionaries that use motif_id as keys.
        loaded_motif_ids_dic
        name2ids_dic
        id2name_dic
        id2type_dic
        """
    else:
        regex_id = False


    """
    Get function ID -> function descriptions mapping.

    """
    fid2desc_file = db_path + "/rbp_function_list.32728246.tsv"
    fid2desc_dic, desc2fid_dic = benchlib.get_fid2desc_mapping(fid2desc_file)


    """
    If --motifs sets, filter loaded_rbps_dic + loaded_motif_ids_dic.

    """

    if args.motifs_list:

        print("Filtering loaded motifs by provided --motifs ... ")

        motif_fids_dic = {}

        for motif_id in args.motifs_list:
            motif_fids_dic[motif_id] = 1

        filtered_rbps_dic = {}
        filtered_motif_ids_dic = {}
        filtered_name2ids_dic = {}
        for motif_id in motif_fids_dic:
            assert motif_id in id2name_dic, "motif ID \"%s\" provided via --motifs not found in internal motif ID -> RBP ID mapping. Please provide valid motif IDs" %(motif_id)
            assert motif_id in loaded_motif_ids_dic, "motif ID \"%s\" provided via --motifs not found in loaded motifs (via --rbps). Please provide motif IDs that are part of loaded RBPs" %(motif_id)
            set_db_str = loaded_motif_ids_dic[motif_id]
            filtered_motif_ids_dic[motif_id] = set_db_str
            rbp_id = id2name_dic[motif_id]
            assert rbp_id in loaded_rbps_dic, "motif ID \"%s\" provided via --motifs not found in loaded RBPs (via --rbps). Please provide motif IDs that are part of loaded RBPs" %(motif_id)
            set_db_str = loaded_rbps_dic[rbp_id]
            filtered_rbps_dic[rbp_id] = set_db_str
            if rbp_id in filtered_name2ids_dic:
                filtered_name2ids_dic[rbp_id].append(motif_id)
            else:
                filtered_name2ids_dic[rbp_id] = [motif_id]

        if args.regex:
            filtered_rbps_dic[regex_id] = regex_id
            if args.motif_regex_id:
                filtered_motif_ids_dic[regex_id] = regex_id
                filtered_name2ids_dic[regex_id] = [regex_id]
            else:
                filtered_motif_ids_dic[regex] = regex_id
                filtered_name2ids_dic[regex_id] = [regex]

        c_loaded_rbps_pre = len(loaded_rbps_dic)
        c_loaded_rbps_post = len(filtered_rbps_dic)
        loaded_rbps_dic = filtered_rbps_dic

        print("# RBPs pre-filtering by --motifs: ", c_loaded_rbps_pre)
        print("# RBPs post-filtering by --motifs:", c_loaded_rbps_post)

        assert loaded_rbps_dic, "no remaining RBPs after filtering by provided motif IDs (via --motifs). Please provide compatible RBPs + motif IDs"

        c_loaded_motif_ids_pre = len(loaded_motif_ids_dic)
        c_loaded_motif_ids_post = len(filtered_motif_ids_dic)
        loaded_motif_ids_dic = filtered_motif_ids_dic

        print("# motif IDs pre-filtering by --motifs: ", c_loaded_motif_ids_pre)
        print("# motif IDs post-filtering by --motifs:", c_loaded_motif_ids_post)

        assert loaded_motif_ids_dic, "no remaining motifs after filtering by provided motif IDs (via --motifs). Please provide compatible RBPs + motif IDs"

        name2ids_dic = filtered_name2ids_dic


    """
    If --functions set, filter loaded_rbps_dic + loaded_motif_ids_dic.

    """
    rbp_fids_dic = {}
    if args.rbp_functions:

        print("Filtering loaded RBPs by provided function IDs ... ")

        for fid in args.rbp_functions:
            rbp_fids_dic[fid] = 1

        # Check if provided function IDs are valid.
        for fid in rbp_fids_dic:
            assert fid in fid2desc_dic, "function ID \"%s\" provided via --functions not found in internal function ID -> description mapping. Please provide valid function IDs (see rbpbench info for a detailed description)" %(fid)

        # Filter loaded_rbps_dic.
        filtered_rbps_dic = {}
        for rbp_id in loaded_rbps_dic:
            if rbp_id == regex_id:  # keep regex if set.
                filtered_rbps_dic[regex_id] = regex_id
                continue
            if rbp_id not in name2fids_dic:
                continue
            for fid in rbp_fids_dic:
                if fid in name2fids_dic[rbp_id]:
                    set_db_str = loaded_rbps_dic[rbp_id]
                    filtered_rbps_dic[rbp_id] = set_db_str
                    break

        c_loaded_rbps_pre = len(loaded_rbps_dic)
        c_loaded_rbps_post = len(filtered_rbps_dic)
        loaded_rbps_dic = filtered_rbps_dic

        print("# RBPs pre-filtering by functions: ", c_loaded_rbps_pre)
        print("# RBPs post-filtering by functions:", c_loaded_rbps_post)

        assert loaded_rbps_dic, "no remaining RBPs after filtering by provided function IDs. Please provide compatible RBPs + function IDs (see rbpbench info for annotated RBP functions)"

        # Filter loaded motif IDs.
        filtered_motif_ids_dic = {}
        for rbp_id in loaded_rbps_dic:
            if rbp_id == regex_id:
                if args.motif_regex_id:
                    filtered_motif_ids_dic[regex_id] = regex_id
                else:
                    filtered_motif_ids_dic[regex] = regex_id
            else:
                for motif_id in name2ids_dic[rbp_id]:
                    set_db_str = loaded_motif_ids_dic[motif_id]
                    filtered_motif_ids_dic[motif_id] = set_db_str
        
        c_loaded_motif_ids_pre = len(loaded_motif_ids_dic)
        c_loaded_motif_ids_post = len(filtered_motif_ids_dic)
        loaded_motif_ids_dic = filtered_motif_ids_dic

        print("# motif IDs pre-filtering by functions: ", c_loaded_motif_ids_pre)
        print("# motif IDs post-filtering by functions:", c_loaded_motif_ids_post)


    """
    Load RBP data, store in RBP() class.

    """

    # Store motif IDs for search.
    search_rbps_dic = {}
    seq_rbps_dic = {}
    str_rbps_dic = {}
    motif_id2idx_dic = {} # motif ID -> list index.
    args.internal_id = []

    for rbp_id in loaded_rbps_dic:
    
        internal_id = base64.urlsafe_b64encode(os.urandom(6)).decode()
        args.internal_id.append(internal_id)
        rbp = benchlib.RBP(rbp_id, internal_id)

        for motif_id in name2ids_dic[rbp_id]:
        
            assert motif_id in loaded_motif_ids_dic, "motif_id %s not in loaded_motif_ids_dic" %(motif_id)

            if id2type_dic[motif_id] == "meme_xml":
                rbp.seq_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.seq_motif_ids) - 1
                rbp.seq_motif_hits.append(0)
                seq_rbps_dic[rbp_id] = 1
            elif id2type_dic[motif_id] == "regex":
                rbp.seq_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.seq_motif_ids) - 1
                rbp.seq_motif_hits.append(0)
                seq_rbps_dic[rbp_id] = 1
            elif id2type_dic[motif_id] == "cm":
                rbp.str_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.str_motif_ids) - 1
                rbp.str_motif_hits.append(0)
                str_rbps_dic[rbp_id] = 1
            else:
                assert False, "unknown motif type (\"%s\") set for motif_id %s" %(id2type_dic[motif_id], motif_id)

        search_rbps_dic[rbp_id] = rbp

    print("# of RBP IDs for search:    ", len(loaded_rbps_dic))
    print("# of motif IDs for search:  ", len(loaded_motif_ids_dic))


    """
    Get chromosome IDs + sizes from --genome.
    """

    # Get chromosome sizes.
    chr_ids_dic = {}
    chr_len_dic = {}
    print("Get --genome chromosome IDs + sizes ... ")
    benchlib.genome_fasta_get_chr_sizes_file(args.in_genome, chr_len_file,
                                             check_ids=True,
                                             seq_len_dic=chr_len_dic)
    for chr_id in chr_len_dic:
        chr_ids_dic[chr_id] = 1

    """
    Guess chromosome ID style.

    chr_style:
        1: chr1, chr2, ..., chrX, chrM
        2: 1, 2, ... , X, MT

    """
    print("Guess chromosome ID style (based on --genome FASTA headers) ... ")
    chr_style = benchlib.guess_chr_id_style(chr_ids_dic)

    """
    Get chromosome or transcript IDs from --in sites (column 1).
    +
    Determine type of input sites (genomic or transcript sites).

    """

    chr_ids_in_dic = benchlib.bed_read_chr_ids_dic(args.in_sites)

    assert chr_ids_in_dic, "--in sites BED seems to be empty (no column 1 chromosome IDs read in). Make sure to provide valid BED file"

    genomic_sites_input = False
    for chr_id in chr_ids_in_dic:
        if chr_id in chr_ids_dic:
            genomic_sites_input = True
        break

    args.genomic_sites_input = genomic_sites_input

    if genomic_sites_input:
        print("--in site seem to be genomic sites. Checking ... ")
        for chr_id in chr_ids_in_dic:
            assert chr_id in chr_ids_dic, "chromosome ID %s from --in sites BED file not found in --genome FASTA file. Please provide compatible --in and --genome files"

    """
    Read in gene infos from --gtf.

    """

    print("Read in gene features from --gtf ... ")
    tr2gid_dic = {}
    gid2gio_dic = benchlib.gtf_read_in_gene_infos(args.in_gtf,
                                                  tr2gid_dic=tr2gid_dic,
                                                  check_chr_ids_dic=chr_ids_dic,
                                                  chr_style=chr_style,
                                                  empty_check=False)
    assert gid2gio_dic, "no gene infos read in from --gtf. Please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE)"
    c_gene_infos = len(gid2gio_dic)
    print("# gene features read in from --gtf:", c_gene_infos)

    if not genomic_sites_input:
        print("--in site seem to be transcript sites. Checking ... ")
        for tr_id in chr_ids_in_dic:
            assert tr_id in tr2gid_dic, "transcript ID %s from --in sites BED file not found in --gtf file. Please provide compatible --in and --gtf files" %(tr_id)

    # Get most prominent transcripts or if --tr-list is set, read in transcript IDs.
    tr_ids_dic = {}
    if args.tr_list:
        assert os.path.exists(args.tr_list), "given --tr-list file \"%s\" not found" % (args.tr_list)
        tr_ids_dic = benchlib.read_ids_into_dic(args.tr_list,
                                                check_dic=False)
        assert tr_ids_dic, "no IDs read in from provided --tr-list file. Please provide a valid IDs file (one ID per row)"
        for tr_id in tr_ids_dic:
            assert tr_id in tr2gid_dic, "transcript ID \"%s\" from provided --tr-list file does not appear in --gtf file. Please provide compatible IDs + files" %(tr_id)
            tr_ids_dic[tr_id] = tr2gid_dic[tr_id]
        print("# of transcript IDs (read in from --tr-list): ", len(tr_ids_dic))
    else:
        # Get most prominent transcripts from gene infos.
        print("Select most prominent transcript (MPT) for each gene ... ")
        tr_ids_dic = benchlib.select_mpts_from_gene_infos(gid2gio_dic,
                                basic_tag=False,  # do not be strict (only_tsl=False too).
                                ensembl_canonical_tag=False,
                                prior_basic_tag=True,  # Prioritize basic tag transcript.
                                prior_mane_select=True,  # mane select if set trumps all.
                                prior_lncrna_primary_tag=True,
                                only_tsl=False)
        assert tr_ids_dic, "most prominent transcript selection from gene infos failed. Please contact developers"
        print("# of transcript IDs (most prominent transcripts): ", len(tr_ids_dic))

    # Add transcript IDs from --in sites to tr_ids_dic.
    if not genomic_sites_input:
        for tr_id in chr_ids_in_dic:
            tr_ids_dic[tr_id] = 1

    # Check exon order (return True if minus strand exon 1 is most downstream, not most upstream, which is the correct way).
    print("Check minus-strand exon order in --gtf ... ")
    correct_min_ex_order = benchlib.gtf_check_exon_order(args.in_gtf)
    if correct_min_ex_order:
        print("Correct order encountered ... ")
    else:
        print("Reverse order encountered ... ")

    # Get transcript infos.
    print("Read in transcript infos from --gtf ... ")
    tid2tio_dic = benchlib.gtf_read_in_transcript_infos(args.in_gtf,
                                                        tr_ids_dic=tr_ids_dic,
                                                        correct_min_ex_order=correct_min_ex_order,
                                                        chr_style=chr_style,
                                                        empty_check=False)

    assert tid2tio_dic, "no transcript infos read in from --gtf. Please contact developers"

    # Calculate intron lengths.
    total_tr_intron_len = 0
    total_tr_exon_len = 0

    # Get transcript lengths.
    tr_len_dic = {}

    for tid in tid2tio_dic:
        tr_len_dic[tid] = tid2tio_dic[tid].tr_length
        total_tr_intron_len += tid2tio_dic[tid].total_intron_len
        total_tr_exon_len += tid2tio_dic[tid].tr_length

    total_tr_len = total_tr_intron_len + total_tr_exon_len

    total_tr_intron_ratio = round(total_tr_intron_len / total_tr_len, 6)
    total_tr_exon_ratio = round(total_tr_exon_len / total_tr_len, 6)

    print("Total transcript intron length:", total_tr_intron_len)
    print("Total transcript exon length:  ", total_tr_exon_len)
    print("Total transcript intron ratio: ", total_tr_intron_ratio)


    """
    Filter / extend --in regions BED file.

    """

    # Process extension info.
    ext_parts = args.ext_up_down.split(",")
    c_ext_parts = len(ext_parts)
    ext_up = 0
    ext_down = 0
    if c_ext_parts == 1:
        ext_up = int(ext_parts[0])
        ext_down = int(ext_parts[0])
    elif c_ext_parts == 2:
        ext_up = int(ext_parts[0])
        ext_down = int(ext_parts[1]) 
    else:
        assert False, "invalid --ext argument provided (correct format: --ext 10 OR --ext 20,10)"

    args.ext_up = ext_up
    args.ext_down = ext_down

    # Filter / extend --in BED.
    print("Preprocess --in sites ... ")

    feat_len_dic = {}
    if genomic_sites_input:
        feat_len_dic = chr_len_dic
    else:
        feat_len_dic = tr_len_dic


    reg_stats_dic = benchlib.bed_filter_extend_bed(args.in_sites, f1_sites_bed,
                                                   ext_up=ext_up,
                                                   ext_down=ext_down,
                                                   remove_dupl=True,
                                                   score_col=args.bed_score_col,
                                                   score_thr=args.bed_sc_thr,
                                                   score_rev_filter=args.bed_sc_thr_rev_filter,
                                                   chr_len_dic=feat_len_dic,
                                                   use_region_ids=True)

    print("# --in regions pre-filtering:  ", reg_stats_dic["c_in"])
    print("# --in regions post-filtering: ", reg_stats_dic["c_out"])
    print("# regions with invalid chr_id: ", reg_stats_dic["c_chr_filter"])
    print("# duplicated regions removed:  ", reg_stats_dic["c_dupl_filter"])
    print("# regions filtered by score:   ", reg_stats_dic["c_sc_thr"])

    assert reg_stats_dic["c_out"], "no --in BED sites remain after chromosome ID (or optionally score) filtering. If caused by invalid chr_id filtering, make sure chromosome IDs in --genome FASTA and --in BED files are compatible (i.e., \"chr1\" vs. \"1\" notation). If --in regions are on transcripts, use rbpbench searchrna"


    fasta_index_file = tr_seqs_fa + ".fai"
    if os.path.exists(fasta_index_file):
        os.remove(fasta_index_file)

    pos_seqs_dic = {}
    tr_seqs_dic = {}

    if genomic_sites_input:

        print("Extract --in sequences from --genome ... ")
        benchlib.bed_extract_sequences_from_fasta(f1_sites_bed, 
                                                args.in_genome, filtered_sites_fa,
                                                print_warnings=True)

        print("Read in sequences ... ")
        pos_seqs_dic = benchlib.read_fasta_into_dic(filtered_sites_fa,
                                        dna=True,
                                        all_uc=True,
                                        id_check=True,
                                        empty_check=False,
                                        skip_n_seqs=False)

        # Filter BED file to discard regions for which no sequence was extracted.
        benchlib.bed_filter_by_seqs_dic(pos_seqs_dic, f1_sites_bed, filtered_sites_bed)

    else:

        # Get transcript sequences.
        print("Extract transcript sequences ... ")
        tr_seqs_dic = benchlib.get_transcript_sequences_from_gtf(tid2tio_dic, args.in_genome,
                                                                 tr_ids_dic=tr_ids_dic,
                                                                 tmp_out_folder=args.out_folder)

        print("Output transcript sequences to FASTA ... ")
        benchlib.fasta_output_dic(tr_seqs_dic, tr_seqs_fa,
                                  split=True)

        print("Extract site sequences from transcript sequences ... ")
        benchlib.bed_extract_sequences_from_fasta(f1_sites_bed, 
                                                  tr_seqs_fa, filtered_sites_fa,
                                                  print_warnings=True)

        print("Read in sequences ... ")
        pos_seqs_dic = benchlib.read_fasta_into_dic(filtered_sites_fa,
                                        dna=True,
                                        all_uc=True,
                                        id_check=True,
                                        empty_check=False,
                                        skip_n_seqs=False)

        # Filter BED file to discard regions for which no sequence was extracted.
        benchlib.bed_filter_by_seqs_dic(pos_seqs_dic, f1_sites_bed, filtered_sites_bed)

        # # Transcript sequence lengths.
        # tr_seq_len_dic = {}
        # for tr_id in tr_seqs_dic:
        #     tr_seq_len_dic[tr_id] = len(tr_seqs_dic[tr_id])

    """
    Define regions to exclude (i.e. no background sites sampled from these regions).
    
    """

    # Collect masking files and merge them to BED.
    print("Collect regions for masking (== no negatives from these regions) ... ")
    mask_files_list = []
    mask_files_list.append(filtered_sites_bed)
    if args.bg_mask_bed:
        mask_files_list.append(args.bg_mask_bed)
    if args.bg_mask_blacklist:
        mask_files_list.append(blacklist_bed)
    benchlib.merge_files(mask_files_list, bg_excl_bed)


    """
    Make shuffle input BED file bg_shuffle_in_bed.

    """

    # Generate shuffle file.
    shuffle_list = []
    c_pos = len(pos_seqs_dic)
    c_pos_total = 0
    if args.bg_mode == 1:
        if c_pos >= args.bg_min_size:
            shuffle_list.append(filtered_sites_bed)
            c_pos_total = c_pos
        else:
            while c_pos_total < args.bg_min_size:
                shuffle_list.append(filtered_sites_bed)
                c_pos_total += c_pos
    elif args.bg_mode == 2:
        shuffle_list.append(filtered_sites_bed)
        c_pos_total = c_pos
    else:
        assert False, "invalid --bg-mode set"

    benchlib.merge_files(shuffle_list, bg_shuffle_in_bed)

    print("# random negative regions to sample: ", c_pos_total)





    """
    Get input / positive genomic region annotations.

    """

    pos_reg_ids_list = []
    pos_reg_ids_dic = {}
    for seq_id, seq in sorted(pos_seqs_dic.items()):
        pos_reg_ids_list.append(seq_id)
        pos_reg_ids_dic[seq_id] = 1

    intron_exon_out_bed = args.out_folder + "/intron_exon_regions.tmp.bed"
    overlap_annotations_bed = args.out_folder + "/overlap_region_annotations.tmp.bed"

    pos_reg2annot_dic = {}
    tid2regl_dic = {}

    if args.bg_mode == 1:

        if genomic_sites_input:

            """
            reg2annot_dic[reg_id][0] = annot_string
            reg2annot_dic[reg_id][1] = tr_id

            """

            print("Get genomic region annotations ... ")
            # Extract exon + intron regions of selected transcripts from transcript infos.
            print("Output intron annotations to BED ... ")
            benchlib.output_transcript_info_intron_exon_to_bed(tid2tio_dic, intron_exon_out_bed,
                                                output_mode=3,  # only introns.
                                                report_counts=True,
                                                add_tr_id=True,  # new reg_id format: intron;ENST000006666
                                                add_numbers=True,  # new reg_id format: intron;ENST000006666;1-2
                                                number_format=1,  # format intron;ENST000006666;1-2
                                                empty_check=False)

            # Append detailed exon annotations (CDS, UTR, transcript biotypes) to intron annotations.
            print("Output exon annotations to BED ... ")
            benchlib.output_exon_annotations(tid2tio_dic, intron_exon_out_bed,
                                             add_numbers=True,
                                             append=True)

            # Overlap with input regions.
            print("Overlap annotations with input regions ... ")
            params = "-s -wo -f %s" %(str(args.gtf_feat_min_overlap))

            benchlib.bed_intersect_files(filtered_sites_bed, intron_exon_out_bed, 
                                         overlap_annotations_bed,
                                         params=params)
            pos_reg2annot_dic = benchlib.get_region_annotations(overlap_annotations_bed,
                                                                tid2tio_dic=tid2tio_dic,
                                                                reg_ids_dic=pos_reg_ids_dic)

        else:

            for reg_id in pos_reg_ids_dic:
                # Get tr_id from reg_id format: ENST00000663363:36-136(+)
                tr_id = reg_id.split(":")[0]
                pos_reg2annot_dic[reg_id] = ["ncRNA", tr_id]

            # Get mRNA region lengths.
            tid2regl_dic = benchlib.get_mrna_region_lengths(tid2tio_dic)

            if tid2regl_dic:
                print("Get mRNA region annotations ... ")

                print("# mRNA transcripts (containing CDS):", len(tid2regl_dic))

                benchlib.output_mrna_regions_to_bed(tid2regl_dic, mrna_regions_bed)

                # Overlap with input regions.
                print("Overlap mRNA annotations with input regions ... ")
                params = "-s -wo -f %s" %(str(args.gtf_feat_min_overlap))
                benchlib.bed_intersect_files(filtered_sites_bed, mrna_regions_bed, 
                                            overlap_annotations_bed,
                                            params=params)
                pos_reg2annot_dic = benchlib.get_mrna_region_annotations_v2(overlap_annotations_bed,
                                                                            reg_ids_dic=pos_reg_ids_dic)

        # Approx. intron / non-intron lengths in positive set.
        pos_intron_sites_c = 0
        pos_non_intron_sites_c = 0
        intergenic_as_intron = True
        intron_labels = ["intron"]
        if intergenic_as_intron:
            intron_labels.append("intergenic")

        for reg_id in pos_reg2annot_dic:
            # seq_len = len(pos_seqs_dic[reg_id])
            if pos_reg2annot_dic[reg_id][0] in intron_labels:
                pos_intron_sites_c += 1
            else:
                pos_non_intron_sites_c += 1

        pos_sites_c = len(pos_reg2annot_dic)

        # Intron ratio.
        pos_intron_ratio = round(pos_intron_sites_c / pos_sites_c, 6)
        pos_exon_ratio = round(pos_non_intron_sites_c / pos_sites_c, 6)

        print("# input sites with intron (+intergenic) annotations:", pos_intron_sites_c)
        print("# input sites with non-intron annotations:          ", pos_non_intron_sites_c)
        print("Input set intron ratio:                             ", pos_intron_ratio)

        # Do we need more intronic regions for adaptive sampling?
        more_introns = False
        if pos_intron_ratio > total_tr_intron_ratio:
            more_introns = True

        """
        Write regions to BED from where to sample background sites (incl_bed).

        """

        min_intron_len = 100
        min_exon_len = 100
        max_exon_ratio = 0.5  # exon ratio to aim for.

        OUTBED = open(bg_incl_bed, "w")
        print("Output regions to sample background sites from ... ")

        if genomic_sites_input:
            # Output genomic transcript regions with introns.
            for tr_id in tid2tio_dic:
                chr_id = tid2tio_dic[tr_id].chr_id
                tr_s = tid2tio_dic[tr_id].tr_s - 1
                tr_e = tid2tio_dic[tr_id].tr_e
                tr_pol = tid2tio_dic[tr_id].tr_pol
                OUTBED.write("%s\t%i\t%i\t%s\t0\t%s\n" %(chr_id, tr_s, tr_e, tr_id, tr_pol))

            if args.bg_adaptive_sampling:

                print("Adaptive sampling enabled ... ")

                tr_ids_list = list(tid2tio_dic.keys())
                random.shuffle(tr_ids_list)

                if more_introns:
                    target_intron_len = int(total_tr_len * pos_intron_ratio)
                    current_intron_len = total_tr_intron_len

                    for tr_id in tr_ids_list:
                        tio = tid2tio_dic[tr_id]

                        # Loop over intron regions.
                        for intron in tio.intron_coords:
                            intron_s = intron[0] - 1
                            intron_e = intron[1]
                            intron_len = intron_e - intron_s
                            if intron_len < min_intron_len:
                                continue
                            if current_intron_len < target_intron_len:
                                chr_id = tio.chr_id
                                OUTBED.write("%s\t%i\t%i\t%s\t0\t%s\n" %(chr_id, intron_s, intron_e, tr_id, tio.tr_pol))
                                current_intron_len += intron_len
                            else:
                                break

                    total_tr_intron_len = current_intron_len

                else:
                    target_exon_len = int(total_tr_len * pos_exon_ratio)
                    if pos_exon_ratio > max_exon_ratio:  # max exon ratio to be aimed for in --bg-ada-sampling.
                        target_exon_len = int(total_tr_len * max_exon_ratio)
                    current_exon_len = total_tr_exon_len

                    for tr_id in tr_ids_list:
                        tio = tid2tio_dic[tr_id]

                        # Loop over exon regions.
                        for exon in tio.exon_coords:
                            exon_s = exon[0] - 1
                            exon_e = exon[1]
                            exon_len = exon_e - exon_s
                            if exon_len < min_exon_len:
                                continue
                            if current_exon_len < target_exon_len:
                                chr_id = tio.chr_id
                                OUTBED.write("%s\t%i\t%i\t%s\t0\t%s\n" %(chr_id, exon_s, exon_e, tr_id, tio.tr_pol))
                                current_exon_len += exon_len
                            else:
                                break

                    total_tr_exon_len = current_exon_len

                new_total_tr_len = total_tr_intron_len + total_tr_exon_len
                new_total_tr_intron_ratio = round(total_tr_intron_len / new_total_tr_len, 6)
                
                print("New total background intron length:", total_tr_intron_len)
                print("New total background exon length:  ", total_tr_exon_len)
                print("New total background intron ratio: ", new_total_tr_intron_ratio)

        else:
            # Output transcript regions (chromosome id = transcript ID).
            for tr_id in tr_len_dic:
                tr_len = tr_len_dic[tr_id]
                OUTBED.write("%s\t0\t%i\t%s\t0\t+\n" %(tr_id, tr_len, tr_id))

            # Overwrite chromosome lengths dictionary with transcript lengths.
            chr_len_dic = tr_len_dic
            # Overwrite chromsome lengths file with transcript lengths.
            benchlib.output_chromosome_lengths_file(tr_len_dic, chr_len_file)

        OUTBED.close()


    """
    Create random negatives / background / control set.

    bg_excl_bed:
        Background excluded regions BED
    bg_incl_bed:
        Background included regions BED

    """
    
    neg_seqs_dic = {}
    neg_seqs_tmp_bed = args.out_folder + "/background_sequences.tmp.bed"
    neg_seqs_bed = args.out_folder + "/background_sequences.bed"
    neg_seqs_fa = args.out_folder + "/background_sequences.fa"

    if args.bg_mode == 1:

        print("Background mode 1 ... ")

        print("Extract random background regions ... ")

        check = False

        if args.bg_user_incl_bed:
            print("Use regions provided via --bg-incl-bed ... ")
            check = benchlib.bed_generate_random_negatives(bg_shuffle_in_bed, chr_len_file, neg_seqs_tmp_bed,
                                                        incl_bed=args.bg_user_incl_bed,
                                                        excl_bed=bg_excl_bed,
                                                        seed=args.random_seed)
            if not check:
                print("--bg-incl-bed regions not sufficient to sample required number of background sites. Try with more regions ... ")

                check = benchlib.bed_generate_random_negatives(bg_shuffle_in_bed, chr_len_file, neg_seqs_tmp_bed,
                                                            incl_bed=bg_incl_bed,
                                                            excl_bed=bg_excl_bed,
                                                            seed=args.random_seed)

        else:
            check = benchlib.bed_generate_random_negatives(bg_shuffle_in_bed, chr_len_file, neg_seqs_tmp_bed,
                                                        incl_bed=bg_incl_bed,
                                                        excl_bed=bg_excl_bed,
                                                        seed=args.random_seed)

        if not check:
            print("Less than requested # of background sites sampled. Possible solutions: use lower --bg-min-size, use more transcripts (if --tr-list is set), or use less strict masking (if --bg-mask-bed is set)")
            sys.exit()  

        seqs_fa = args.in_genome
        if not genomic_sites_input:
            seqs_fa = tr_seqs_fa
        
        """
        For genomic regions resulting IDs look like:
        chr22:20977398-20977456(+)
        tr regions work as well, ID format is:
        ENST00000663363:36-136(+)

        """

        print("Extract background regions from FASTA ... ")
        benchlib.bed_extract_sequences_from_fasta(neg_seqs_tmp_bed, 
                                                  seqs_fa, neg_seqs_fa,
                                                  print_warnings=True)

        print("Read in background sequences ... ")
        neg_seqs_dic = benchlib.read_fasta_into_dic(neg_seqs_fa,
                                                    dna=True,
                                                    all_uc=True,
                                                    id_check=True, # This will fail probably, need genomic regions.
                                                    empty_check=False,
                                                    skip_n_seqs=False)

        assert neg_seqs_dic, "no background sequences read in. Please contact developers"

        print("# of extracted background sequences: ", len(neg_seqs_dic))

        # Remove N-containing sequences from neg_seqs_dic.
        c_n_removed = 0
        print("Remove N-containing background sequences ... ")
        for seq_id in list(neg_seqs_dic.keys()):
            if "N" in neg_seqs_dic[seq_id]:
                c_n_removed += 1
                del neg_seqs_dic[seq_id]
        print("# of N-containing background sequences removed: ", c_n_removed)

        # Filter BED file to discard regions for which no sequence was extracted.
        benchlib.bed_filter_by_seqs_dic(neg_seqs_dic, neg_seqs_tmp_bed, neg_seqs_bed)

    elif args.bg_mode == 2:

        print("Prepare sequences to shuffle (factor = %i) ... " %(args.bg_shuff_factor))
        seqs2shuffle_fa = args.out_folder + "/sequences_to_shuffle.fa"
        OUTSHUFA = open(seqs2shuffle_fa, "w")
        seq_idx = 0
        for idx in range(args.bg_shuff_factor):
            for seq_id in pos_seqs_dic:
                new_seq_id = "shuff_%i" %(seq_idx)
                seq_idx += 1
                OUTSHUFA.write(">%s\n%s\n" %(new_seq_id, pos_seqs_dic[seq_id]))
        OUTSHUFA.close()

        # Di-nucleotide shuffling of positive sequences.
        print("Shuffle sequences (k = %i) ... " %(args.bg_shuff_k))

        benchlib.run_k_nt_shuffling(seqs2shuffle_fa, neg_seqs_fa,
                                    kmer_size=args.bg_shuff_k,
                                    params="-dna",
                                    tag="",  # add nothing to sequence IDs in seqs2shuffle_fa.
                                    error_check=True,
                                    seed=args.random_seed)

        print("Read in background sequences ... ")
        neg_seqs_dic = benchlib.read_fasta_into_dic(neg_seqs_fa,
                                                    dna=True,
                                                    all_uc=True,
                                                    id_check=True, # This will fail probably, need genomic regions.
                                                    empty_check=False,
                                                    skip_n_seqs=False)

        assert neg_seqs_dic, "no shuffled background sequences read in. Please contact developers"

        print("# of shuffled background sequences: ", len(neg_seqs_dic))


    print("Plot sequence lengths distributions ... ")

    pos_len_list = [len(seq) for seq in pos_seqs_dic.values()]
    neg_len_list = [len(seq) for seq in neg_seqs_dic.values()]

    len_dist_plot_out = args.out_folder + "/sequence_lengths_distribution.png"
    benchlib.plot_seq_len_distr(pos_len_list, neg_len_list, len_dist_plot_out,
                                label1='Input sequences',
                                label2='Background sequences',
                                density=True)


    args.c_input_sites = len(pos_seqs_dic)
    args.c_bg_sites = len(neg_seqs_dic)

    """
    Get genomic / transcript region annotations for background regions.

    """

    neg_reg_ids_list = []
    neg_reg_ids_dic = {}
    for seq_id, seq in sorted(neg_seqs_dic.items()):
        neg_reg_ids_list.append(seq_id)
        neg_reg_ids_dic[seq_id] = 1

    neg_reg2annot_dic = {}

    if args.bg_mode == 1 and genomic_sites_input:

        print("Overlap annotations with background regions ... ")
        params = "-s -wo -f %s" %(str(args.gtf_feat_min_overlap))

        benchlib.bed_intersect_files(neg_seqs_bed, intron_exon_out_bed, 
                                     overlap_annotations_bed,
                                     params=params)
        neg_reg2annot_dic = benchlib.get_region_annotations(overlap_annotations_bed,
                                                            tid2tio_dic=tid2tio_dic,
                                                            reg_ids_dic=neg_reg_ids_dic)


    elif args.bg_mode == 1 and not genomic_sites_input:

        for reg_id in neg_reg_ids_dic:
            tr_id = reg_id.split(":")[0]
            neg_reg2annot_dic[reg_id] = ["ncRNA", tr_id]

        if tid2regl_dic:

            print("Overlap mRNA annotations with background regions ... ")
            params = "-s -wo -f %s" %(str(args.gtf_feat_min_overlap))

            benchlib.bed_intersect_files(neg_seqs_bed, mrna_regions_bed, 
                                         overlap_annotations_bed,
                                         params=params)
            neg_reg2annot_dic = benchlib.get_mrna_region_annotations_v2(overlap_annotations_bed,
                                                                        reg_ids_dic=neg_reg_ids_dic)



    reg_annot_table_file = False
    annot2color_dic = {}

    if args.bg_mode == 1:

        print("Output input region annotations ... ")
        
        reg_annot_table_file = args.out_folder + "/" + "input_region_annotations.tsv"

        OUTRAN = open(reg_annot_table_file, "w")
        OUTRAN.write("region_id\tgene_id\tgene_name\ttranscript_id\tregion_annotation\ttranscript_biotype\n")

        for reg_id in pos_reg2annot_dic:
            annot = pos_reg2annot_dic[reg_id][0]
            tr_id = pos_reg2annot_dic[reg_id][1]
            gene_id = "-"
            gene_name = "-"
            tr_biotype = "-"
            if tr_id:
                gene_id = tr2gid_dic[tr_id]
                gene_info = gid2gio_dic[gene_id]
                tr_biotype = tid2tio_dic[tr_id].tr_biotype
                gene_name = gene_info.gene_name
            else:
                tr_id = "-"
            OUTRAN.write("%s\t%s\t%s\t%s\t%s\t%s\n" %(reg_id, gene_id, gene_name, tr_id, annot, tr_biotype))
        OUTRAN.close()

        """
        Get annotation to color dictionary, which is needed for region annotation plots in HTML reports.

        """
        assert pos_reg2annot_dic, "--bg-mode 1 set, but no input region annotations found. Please contact developers"
        assert neg_reg2annot_dic, "--bg-mode 1 set, but no background region annotations found. Please contact developers"

        annot_dic = {"3'UTR" : 0, "5'UTR" : 0, "CDS" : 0, "lncRNA" : 0, "intron" : 0, "intergenic" : 0}

        for reg_id in pos_reg2annot_dic:
            annot = pos_reg2annot_dic[reg_id][0]
            if annot not in annot_dic:
                annot_dic[annot] = 1
            else:
                annot_dic[annot] += 1
        for reg_id in neg_reg2annot_dic:
            annot = neg_reg2annot_dic[reg_id][0]
            if annot not in annot_dic:
                annot_dic[annot] = 1
            else:
                annot_dic[annot] += 1

        hex_colors = benchlib.get_hex_colors_list(min_len=len(annot_dic))

        idx = 0
        for annot in sorted(annot_dic, reverse=False):
            # hc = hex_colors[idx]
            # print("Assigning hex color %s to annotation %s ... " %(hc, annot))
            annot2color_dic[annot] = hex_colors[idx]
            idx += 1


    """
    Get FIMO + REGEX hits.

    """

    call_dic = {}
    pos_fimo_hits_list = []
    neg_fimo_hits_list = []

    # For shuffled negatives, IDs do not contain coordinates (e.g. shuff_52).
    seq_based = False
    if args.bg_mode == 2:
        seq_based = True

    if seq_rbps_dic:

        out_str, c_added_motifs = benchlib.blocks_to_xml_string(seq_motif_blocks_dic, loaded_motif_ids_dic)
        benchlib.output_string_to_file(out_str, seq_motifs_xml)

        print("Run FIMO on input regions ... ")
        benchlib.run_fast_fimo(filtered_sites_fa, seq_motifs_xml, fimo_res_tsv,
                               pval_thr=args.fimo_pval,
                               nt_freqs_file=fimo_freqs_file,
                               call_dic=call_dic,
                               params=fimo_params,
                               error_check=False)

        assert os.path.exists(fimo_res_tsv), "FIMO output file fimo.tsv %s does not exist! There must have been an error while running FIMO (invalid xml file provided?)" %(fimo_res_tsv)

        pos_fimo_hits_list = benchlib.read_in_fimo_results(fimo_res_tsv,
                                                           only_best_hits=args.greatest_hits)

        c_pos_fimo_hits = len(pos_fimo_hits_list)
        print("# of FIMO motif hits in input regions:", c_pos_fimo_hits)

        print("Run FIMO on background regions ... ")
        benchlib.run_fast_fimo(neg_seqs_fa, seq_motifs_xml, fimo_res_tsv,
                               pval_thr=args.fimo_pval,
                               nt_freqs_file=fimo_freqs_file,
                               params=fimo_params,
                               error_check=False)

        assert os.path.exists(fimo_res_tsv), "FIMO output file fimo.tsv %s does not exist! There must have been an error while running FIMO (invalid xml file provided?)" %(fimo_res_tsv)

        neg_fimo_hits_list = benchlib.read_in_fimo_results(fimo_res_tsv, 
                                                           seq_based=seq_based,
                                                           only_best_hits=args.greatest_hits)

        c_neg_fimo_hits = len(neg_fimo_hits_list)
        print("# of FIMO motif hits in background regions:", c_neg_fimo_hits)

        if args.regex:

            step_size_one = False
            if args.regex_search_mode == 1:
                step_size_one = True
            elif args.regex_search_mode == 2:
                step_size_one = False
            else:
                assert False, "invalid --regex-search-mode %i set" %(args.regex_search_mode)

            print("Run search for --regex \"%s\" ... " %(regex))
            pos_regex_hits_list = benchlib.get_regex_hits(regex, regex_id, pos_seqs_dic,
                                                      step_size_one=step_size_one,
                                                      regex_type=regex_type,
                                                      regex_spacer_min=args.regex_spacer_min,
                                                      regex_spacer_max=args.regex_spacer_max,
                                                      regex_min_gc=args.regex_min_gc,
                                                      regex_max_gu=args.regex_max_gu,
                                                      use_motif_regex_id=args.motif_regex_id)

            c_pos_regex_hits = len(pos_regex_hits_list)
            print("# of regex hits in input regions:", c_pos_regex_hits)

            # Add regex hits to fimo_hits_list.
            pos_fimo_hits_list += pos_regex_hits_list

            neg_regex_hits_list = benchlib.get_regex_hits(regex, regex_id, neg_seqs_dic,
                                                      step_size_one=step_size_one,
                                                      seq_based=seq_based,
                                                      regex_type=regex_type,
                                                      regex_spacer_min=args.regex_spacer_min,
                                                      regex_spacer_max=args.regex_spacer_max,
                                                      regex_min_gc=args.regex_min_gc,
                                                      regex_max_gu=args.regex_max_gu,
                                                      use_motif_regex_id=args.motif_regex_id)

            c_neg_regex_hits = len(neg_regex_hits_list)
            print("# of regex hits in background regions:", c_neg_regex_hits)

            # Add regex hits to fimo_hits_list.
            neg_fimo_hits_list += neg_regex_hits_list


    """
    Get CMSEARCH hits.

    """

    pos_cmsearch_hits_list = []
    neg_cmsearch_hits_list = []

    cmsh_mode = ""
    if args.cmsearch_mode == 1:
        cmsh_mode = "--default"
    elif args.cmsearch_mode == 2:
        cmsh_mode = "--max"
    else:
        assert False, "invalid --cmsearch-mode %i set" %(args.cmsearch_mode)
    cmsh_params = "-g --tformat fasta --toponly --incT %s -T %s %s" %(args.cmsearch_bs, args.cmsearch_bs, cmsh_mode)

    if str_rbps_dic:
        
        print("Output covariance models to .cm ... ")
        benchlib.output_cm_blocks_to_file(str_motif_blocks_dic, loaded_motif_ids_dic, str_motifs_cm)

        print("Run cmsearch on input regions ... ")
        benchlib.run_cmsearch(filtered_sites_fa, str_motifs_cm, cmsearch_res_txt,
                        error_check=False,
                        call_dic=call_dic,
                        params=cmsh_params)

        pos_cmsearch_hits_list, c_pos_cms_hits = benchlib.read_in_cmsearch_results(cmsearch_res_txt,
                                                                            only_best_hits=args.greatest_hits,
                                                                            check=True)

        print("# of cmsearch motif hits in input regions:", c_pos_cms_hits)

        print("Run cmsearch on background regions ... ")
        benchlib.run_cmsearch(neg_seqs_fa, str_motifs_cm, cmsearch_res_txt,
                        error_check=False,
                        params=cmsh_params)

        neg_cmsearch_hits_list, c_neg_cms_hits = benchlib.read_in_cmsearch_results(cmsearch_res_txt, 
                                                                            seq_based=seq_based,
                                                                            only_best_hits=args.greatest_hits,
                                                                            check=True)

        print("# of cmsearch motif hits in background regions:", c_neg_cms_hits)



    """
    Store for each motif ID the positive / input regions with motif hits (and hit counts), 
    using dictionary of dictionaries regions_with_motifs_dic.

    regions_with_motifs_dic:
        Dictionary of dictionaries, format:
        {motif_id1 -> {'region1': motif_c_region1, 'region2': motif_c_region2}, motif_id2 -> {'region1': motif_c_region1}}

    """

    regions_with_motifs_dic = {}

    for fh in pos_fimo_hits_list:

        motif_id = fh.motif_id
        rbp_id = id2name_dic[motif_id]

        if motif_id in regions_with_motifs_dic:
            # fh.seq_name : FASTA header (== --in genomic sequence region).
            if fh.seq_name in regions_with_motifs_dic[motif_id]:
                regions_with_motifs_dic[motif_id][fh.seq_name] += 1
            else:
                regions_with_motifs_dic[motif_id][fh.seq_name] = 1
        else:
            regions_with_motifs_dic[motif_id] = {}
            regions_with_motifs_dic[motif_id][fh.seq_name] = 1

    # Store regions with structure motifs.
    for cmsh in pos_cmsearch_hits_list:

        motif_id = cmsh.motif_id
        rbp_id = id2name_dic[motif_id]

        if rbp_id in regions_with_motifs_dic:
            # cmsh.seq_name : FASTA header (== --in genomic sequence region).
            if cmsh.seq_name in regions_with_motifs_dic[motif_id]:
                regions_with_motifs_dic[motif_id][cmsh.seq_name] += 1
            else:
                regions_with_motifs_dic[motif_id][cmsh.seq_name] = 1
        else:
            regions_with_motifs_dic[motif_id] = {}
            regions_with_motifs_dic[motif_id][cmsh.seq_name] = 1



    """
    Get motif to region hit counts.

    pos_hits_dic / neg_hits_dic format: 
    motif_id -> region_id -> # of hits in input regions

    """
    pos_hits_dic = {}
    neg_hits_dic = {}
    found_motif_ids_dic = {}

    for fh in pos_fimo_hits_list:

        motif_id = fh.motif_id
        rbp_id = id2name_dic[motif_id]
        # fh.seq_name : FASTA header (== --in genomic sequence region).
        reg_id = fh.seq_name

        if motif_id in pos_hits_dic:
            if reg_id in pos_hits_dic[motif_id]:
                pos_hits_dic[motif_id][reg_id] += 1
            else:
                pos_hits_dic[motif_id][reg_id] = 1
        else:
            pos_hits_dic[motif_id] = {}
            pos_hits_dic[motif_id][reg_id] = 1

        if motif_id in found_motif_ids_dic:
            found_motif_ids_dic[motif_id] += 1
        else:
            found_motif_ids_dic[motif_id] = 1

    for fh in neg_fimo_hits_list:
            
        motif_id = fh.motif_id
        rbp_id = id2name_dic[motif_id]
        reg_id = fh.seq_name

        if motif_id in neg_hits_dic:
            if reg_id in neg_hits_dic[motif_id]:
                neg_hits_dic[motif_id][reg_id] += 1
            else:
                neg_hits_dic[motif_id][reg_id] = 1
        else:
            neg_hits_dic[motif_id] = {}
            neg_hits_dic[motif_id][reg_id] = 1

        if motif_id in found_motif_ids_dic:
            found_motif_ids_dic[motif_id] += 1
        else:
            found_motif_ids_dic[motif_id] = 1

    for cmsh in pos_cmsearch_hits_list:
            
        motif_id = cmsh.motif_id
        rbp_id = id2name_dic[motif_id]
        reg_id = cmsh.seq_name

        if motif_id in pos_hits_dic:
            if reg_id in pos_hits_dic[motif_id]:
                pos_hits_dic[motif_id][reg_id] += 1
            else:
                pos_hits_dic[motif_id][reg_id] = 1
        else:
            pos_hits_dic[motif_id] = {}
            pos_hits_dic[motif_id][reg_id] = 1

        if motif_id in found_motif_ids_dic:
            found_motif_ids_dic[motif_id] += 1
        else:
            found_motif_ids_dic[motif_id] = 1

    for cmsh in neg_cmsearch_hits_list:
            
        motif_id = cmsh.motif_id
        rbp_id = id2name_dic[motif_id]
        reg_id = cmsh.seq_name

        if motif_id in neg_hits_dic:
            if reg_id in neg_hits_dic[motif_id]:
                neg_hits_dic[motif_id][reg_id] += 1
            else:
                neg_hits_dic[motif_id][reg_id] = 1
        else:
            neg_hits_dic[motif_id] = {}
            neg_hits_dic[motif_id][reg_id] = 1

        if motif_id in found_motif_ids_dic:
            found_motif_ids_dic[motif_id] += 1
        else:
            found_motif_ids_dic[motif_id] = 1

    """
    Get motif enrichment stats.

    """

    fisher_alt_hypo = "greater"
    if args.fisher_mode == 1:
        fisher_alt_hypo = "greater"
        print("Fisher mode = 1, reporting significantly overrepresented motifs + co-occurrences ... ")
    elif args.fisher_mode == 2:
        fisher_alt_hypo = "two-sided"
        print("Fisher mode = 2, reporting significantly over- AND underrepresented motifs + co-occurrences ... ")
    elif args.fisher_mode == 3:
        fisher_alt_hypo = "less"
        print("Fisher mode = 3, reporting significantly underrepresented motifs + co-occurrences ... ")
    else:
        assert False, "Invalid Fisher mode: %i" %(args.fisher_mode)

    c_pos_regions = len(pos_seqs_dic)
    c_neg_regions = len(neg_seqs_dic)
    p_val_list = []
    motif_ids_list = []
    motif2con_table_dic = {}
    len_motif_ids = len(found_motif_ids_dic)
    motif_enrich_stats_dic = {}  # Store EnmoStats stats objects.

    if not found_motif_ids_dic:
        print("No motifs found in any regions (input or background). Exiting ... ")
        sys.exit()

    for motif_id in sorted(found_motif_ids_dic.keys()):

        motif_ids_list.append(motif_id)

        rbp_id = id2name_dic[motif_id]
        motif_type = id2type_dic[motif_id]

        motif_enrich_stats = benchlib.EnmoStats(motif_id, rbp_id)
        motif_enrich_stats.motif_type = motif_type
        motif_enrich_stats.c_pos_regions = c_pos_regions
        motif_enrich_stats.c_neg_regions = c_neg_regions

        # Set consensus sequence.
        conseq = "-"
        if id2type_dic[motif_id] == "meme_xml":
            conseq = motif2conseq_dic[motif_id]
        elif id2type_dic[motif_id] == "cm":
            conseq = "-"
        elif id2type_dic[motif_id] == "regex":
            conseq = regex
        else:
            assert False, "Invalid motif type: %s" %(id2type_dic[motif_id])
        motif_enrich_stats.consensus_seq = conseq

        c_pos_hits = 0
        c_pos_hit_regions = 0

        if motif_id in pos_hits_dic:
            for reg_id in pos_hits_dic[motif_id]:
                c_pos_hits += pos_hits_dic[motif_id][reg_id]
                c_pos_hit_regions += 1

        c_neg_hits = 0
        c_neg_hit_regions = 0

        if motif_id in neg_hits_dic:
            for reg_id in neg_hits_dic[motif_id]:
                c_neg_hits += neg_hits_dic[motif_id][reg_id]
                c_neg_hit_regions += 1

        motif_enrich_stats.c_pos_hits = c_pos_hits
        motif_enrich_stats.c_neg_hits = c_neg_hits
        motif_enrich_stats.c_pos_hit_regions = c_pos_hit_regions
        motif_enrich_stats.c_neg_hit_regions = c_neg_hit_regions

        # print("Motif %s (%s) %i input hit regions (%i hits) and %i negative hit regions (%i hits) ... " %(motif_id, rbp_id, c_pos_hit_regions, c_pos_hits, c_neg_hit_regions, c_neg_hits))

        """
        DREME style test for motif enrichment:
        A: The number of input sequences that contain the motif.
        B: The number of input sequences that do not contain the motif.
        C: The number of background sequences that contain the motif.
        D: The number of background sequences that do not contain the motif.
        """

        con_table = [[c_pos_hit_regions, c_pos_regions - c_pos_hit_regions],
                     [c_neg_hit_regions, c_neg_regions - c_neg_hit_regions]]

        odds_ratio, p_value = fisher_exact(con_table, alternative=fisher_alt_hypo)

        p_val_list.append(p_value)
        table_str = str(con_table)
        motif2con_table_dic[motif_id] = con_table

        motif_enrich_stats.con_table = table_str
        motif_enrich_stats.fisher_pval = p_value
        motif_enrich_stats.fisher_corr_mode = args.enmo_pval_mode
        motif_enrich_stats.fisher_alt_hyp_mode = args.fisher_mode

        motif_enrich_stats_dic[motif_id] = motif_enrich_stats

    """
    Multiple testing correction.

    """

    enmo_pval_thr = args.enmo_pval_thr

    if args.enmo_pval_mode == 1:  # BH correction.

        pvals_corrected = false_discovery_control(p_val_list, method='bh')

        for i in range(len(p_val_list)):
            p_val_list[i] = pvals_corrected[i]

    elif args.enmo_pval_mode == 2:  # Bonferroni correction.

        # Multiple testing correction factor.
        mult_test_corr_factor = 1
        if len_motif_ids > 1:
            mult_test_corr_factor = len_motif_ids

        enmo_pval_thr = args.enmo_pval_thr / mult_test_corr_factor
        enmo_pval_thr = benchlib.round_to_n_significant_digits_v2(enmo_pval_thr, 4)

    elif args.enmo_pval_mode == 3:  # No correction.

        enmo_pval_thr = args.enmo_pval_thr

    else:
        assert False, "Invalid motif enrichment p-value mode (--enmo-pval-mode) set: %i" %(args.enmo_pval_mode)

    args.enmo_pval_thr_corr = enmo_pval_thr


    # Update + filter p-values.

    c_all_fisher_pval = 0
    c_sig_fisher_pval = 0
    sig_motif_ids_list = []  # significant motif IDs list.

    COSOUT = open(enmo_stats_out, "w")
    COSOUT.write("motif_id\trbp_id\tc_in_fg\tc_not_in_fg\tc_in_bg\tc_not_in_bg\tfisher_pval\tfisher_pval_corr\n")

    for idx, motif_id in enumerate(motif_ids_list):

        rbp_id = id2name_dic[motif_id]
        con_table = motif2con_table_dic[motif_id]
        p_value_corr = p_val_list[idx]
        c_in_fg = con_table[0][0]
        c_not_in_fg = con_table[0][1]
        c_in_bg = con_table[1][0]
        c_not_in_bg = con_table[1][1]
        
        p_value_corr = benchlib.round_to_n_significant_digits_v2(p_value_corr, 4)

        motif_enrich_stats_dic[motif_id].fisher_pval_corr = p_value_corr
        p_value = motif_enrich_stats_dic[motif_id].fisher_pval
        p_value = benchlib.round_to_n_significant_digits_v2(p_value, 4)
        motif_enrich_stats_dic[motif_id].fisher_pval = p_value

        if p_value_corr <= enmo_pval_thr:
            c_sig_fisher_pval += 1
            sig_motif_ids_list.append(motif_id)
        c_all_fisher_pval += 1

        COSOUT.write("%s\t%s\t%i\t%i\t%i\t%i\t%s\t%s\n" %(motif_id, rbp_id, c_in_fg, c_not_in_fg, c_in_bg, c_not_in_bg, str(p_value), str(p_value_corr)))

    COSOUT.close()



    """
    Calculate co-occurrence statistics for significantly enriched motif IDs.


    """

    sig_motif_ids_list.sort()
    len_motif_list = len(sig_motif_ids_list)

    print("# of significant motifs: %i" %(len_motif_list))

    reg_ids_list = []
    reg_ids_dic = {}
    for seq_id, seq in sorted(pos_seqs_dic.items()):
        reg_ids_list.append(seq_id)
        reg_ids_dic[seq_id] = 1

    df_pval = False
    pval_ll = []
    pval_cont_lll = []

    region_motif_binds_dic = {}
    rid2mtfidx2hcp_dic = {}  # region_id -> motif_id_idx -> motif hit center position(s)

    if sig_motif_ids_list:

        print("Calculate co-occurrence stats for significant motifs ... ")

        for reg_id in pos_seqs_dic:
            region_motif_binds_dic[reg_id] = [False]*len_motif_list
            rid2mtfidx2hcp_dic[reg_id] = {}

        reg_hits_dic = {}
        add_count = False
        mtf2idx_dic = {}
        idx2mtf_dic = {}

        print("Get motif region occupancies ... ")

        for idx, motif_id in enumerate(sig_motif_ids_list):
            mtf2idx_dic[motif_id] = idx
            idx2mtf_dic[idx] = motif_id

            # Region has hits yes(1)/no(0).
            hit_list = []
            for reg_id in reg_ids_list:

                if motif_id in regions_with_motifs_dic and reg_id in regions_with_motifs_dic[motif_id]:
                    if add_count:
                        hit_list.append(regions_with_motifs_dic[motif_id][reg_id])
                    else:
                        hit_list.append(1)
                else:
                    hit_list.append(0)
            reg_hits_dic[motif_id] = hit_list

        # Loop over positive hits.

        for fh in pos_fimo_hits_list:

            motif_id = fh.motif_id

            if motif_id not in sig_motif_ids_list:
                continue

            rbp_id = id2name_dic[motif_id]
            region_id = fh.seq_name

            # region_len = benchlib.get_length_from_seq_name(fh.seq_name)
            # # genomic motif region string.
            # fh_str = repr(fh)
            # uniq_count = unique_motifs_dic[rbp_id][fh_str]
            mtf_idx = mtf2idx_dic[motif_id]

            # # Motif hit string.
            # motif_str = "%s:%i:%i:%s" %(motif_id, fh.start, fh.end, str(fh.pval))
            # # What gets displayed in hover box in violin plot.
            # # motif_str_plot = "%s,%i-%i,%s" %(fh.motif_id, fh.seq_s, fh.seq_e, str(fh.pval))
            # motif_str_plot = "%s:%i-%i" %(motif_id, fh.seq_s, fh.seq_e)

            # Center position of motif hit.
            motif_hit_s = fh.seq_s - 1
            motif_hit_e = fh.seq_e
            center_pos = benchlib.get_center_position(motif_hit_s, motif_hit_e)
            
            region_motif_binds_dic[region_id][mtf_idx] = True
            
            # region_rbp_motif_pos_dic[region_id].append(motif_str)
            # region2motif_hits_dic[region_id].append(motif_str_plot)

            if mtf_idx not in rid2mtfidx2hcp_dic[region_id]:
                rid2mtfidx2hcp_dic[region_id][mtf_idx] = [center_pos]
            else:
                rid2mtfidx2hcp_dic[region_id][mtf_idx].append(center_pos)


        for cmsh in pos_cmsearch_hits_list:

            motif_id = cmsh.motif_id

            if motif_id not in sig_motif_ids_list:
                continue

            rbp_id = id2name_dic[motif_id]
            region_id = cmsh.seq_name

            mtf_idx = mtf2idx_dic[motif_id]

            # # Motif hit string.
            # motif_str = "%s:%i:%i:%s" %(cmsh.motif_id, cmsh.start, cmsh.end, str(-1*cmsh.score))
            # # motif_str_plot = "%s,%i-%i,%s" %(cmsh.motif_id, cmsh.seq_s, cmsh.seq_e, str(cmsh.score))
            # motif_str_plot = "%s:%i-%i" %(cmsh.motif_id, cmsh.seq_s, cmsh.seq_e)

            # Center position of motif hit.
            motif_hit_s = cmsh.seq_s - 1
            motif_hit_e = cmsh.seq_e
            center_pos = benchlib.get_center_position(motif_hit_s, motif_hit_e)
            
            region_motif_binds_dic[region_id][mtf_idx] = True
            
            if mtf_idx not in rid2mtfidx2hcp_dic[region_id]:
                rid2mtfidx2hcp_dic[region_id][mtf_idx] = [center_pos]
            else:
                rid2mtfidx2hcp_dic[region_id][mtf_idx].append(center_pos)

        # Create list of lists for co-occurrence heatmap.
        for motif_id in sig_motif_ids_list:
            pval_ll.append([1.0]*len_motif_list)
            pval_cont_lll.append([]*len_motif_list)

        for i in range(len_motif_list):
            for j in range(len_motif_list):
                # Storing [p-value_str, pair_str, table_str, correlation_str].
                pval_cont_lll[i].append(["1.0", "-", "-", "-", "-", "-", "-", "", "", ""])  

        motif_pairs = list(combinations(sig_motif_ids_list, 2))
        con_pval_dic = {}
        con_table_dic = {}
        pair_str_dic = {}

        # # Needed ?
        # c_regions_with_hits = 0
        # for reg_id in region_motif_binds_dic:
        #     reg_hit = False
        #     for label in region_motif_binds_dic[reg_id]:
        #         if label:
        #             reg_hit = True
        #     if reg_hit:
        #         c_regions_with_hits += 1
        # # print("# regions with hits (all motifs):", c_regions_with_hits)

        p_val_list = []  # Fisher exact test p-values.

        print("Compute co-occurrences between significant motif pairs ... ")
        for pair in motif_pairs:
            pair = list(pair)
            pair.sort()

            idx1 = mtf2idx_dic[pair[0]]
            idx2 = mtf2idx_dic[pair[1]]

            pair_str = ",".join(pair)
            # pair_str_dic[pair_str] = [pair_list[0], pair_list[1]]
            pair_str_dic[pair_str] = [pair[0], pair[1]]

            # avg_min_dist and perc_close_hits = "-" if no common hit regions.
            table, avg_min_dist, perc_close_hits = benchlib.make_contingency_table_2x2_v2(
                                                        region_motif_binds_dic, idx1, idx2,
                                                        rid2mtfidx2hcp_dic,
                                                        max_motif_dist=args.max_motif_dist)

            odds_ratio, p_value = fisher_exact(table, alternative=fisher_alt_hypo)

            con_table_dic[pair_str] = table
            table_str = str(table)

            p_value_plotted = p_value
            p_val_list.append(p_value)

            pval_ll[idx1][idx2] = p_value_plotted
            pval_ll[idx2][idx1] = p_value_plotted
            pval_cont_lll[idx2][idx1][0] = str(p_value)
            pval_cont_lll[idx2][idx1][1] = str(p_value_plotted)
            pval_cont_lll[idx2][idx1][2] = pair_str
            pval_cont_lll[idx2][idx1][3] = table_str
            pval_cont_lll[idx2][idx1][4] = avg_min_dist
            pval_cont_lll[idx2][idx1][5] = perc_close_hits
        
            # rbp_id1 = rbp_id = id2name_dic[pair[0]]
            # rbp_id2 = rbp_id = id2name_dic[pair[1]]

            # motif1_path = benchlib_path + "/content/motif_plots/%s.%s.png" %(rbp_id1, pair[0])
            # motif2_path = benchlib_path + "/content/motif_plots/%s.%s.png" %(rbp_id2, pair[1])

            # with open(motif1_path, "rb") as image_file:
            #     encoded_image1 = base64.b64encode(image_file.read()).decode()
            # motif1_encoded = f"data:image/png;base64,{encoded_image1}"
            # with open(motif2_path, "rb") as image_file:
            #     encoded_image2 = base64.b64encode(image_file.read()).decode()
            # motif2_encoded = f"data:image/png;base64,{encoded_image2}"

            # if os.path.exists(motif1_path):
            #     pval_cont_lll[idx2][idx1][8] = "<img src=\"%s\" width=\"50px\"><br>" %(motif1_encoded)
            # if os.path.exists(motif2_path):
            #     pval_cont_lll[idx2][idx1][9] = "<img src=\"%s\" width=\"50px\"><br>" %(motif2_encoded)


        """
        Multiple testing correction.

        """

        cooc_pval_thr = args.cooc_pval_thr

        if args.cooc_pval_mode == 1:  # BH correction.

            pvals_corrected = false_discovery_control(p_val_list, method='bh')

            for i in range(len(p_val_list)):
                p_val_list[i] = pvals_corrected[i]
        
        elif args.cooc_pval_mode == 2:  # Bonferroni correction.

            # Multiple testing correction factor.
            mult_test_corr_factor = 1
            if len_motif_list > 1:
                mult_test_corr_factor = (len_motif_list*(len_motif_list-1))/2

            cooc_pval_thr = args.cooc_pval_thr / mult_test_corr_factor
            cooc_pval_thr = benchlib.round_to_n_significant_digits_v2(cooc_pval_thr, 4)

        elif args.cooc_pval_mode == 3:  # No correction.

            cooc_pval_thr = args.cooc_pval_thr

        else:
            assert False, "Invalid co-occurrence p-value mode (--cooc-pval-mode) set: %i" %(args.cooc_pval_mode)

        args.cooc_pval_thr = cooc_pval_thr

        # Update + filter p-values.
        COSOUT = open(cooc_stats_out, "w")
        COSOUT.write("motif_id1\tmotif_id2\tc_1and2\tc_only2\tc_only1\tc_not1not2\tcooc_pval\tavg_min_dist\tperc_close_hits_%int\n" %(args.max_motif_dist))

        pv_idx = 0
        c_all_fisher_pval = 0
        c_sig_fisher_pval = 0
        perc_sig_fisher_pval = 0.0
        args.c_all_fisher_pval = c_all_fisher_pval
        args.c_sig_fisher_pval = c_sig_fisher_pval
        args.perc_sig_fisher_pval = perc_sig_fisher_pval

        for pair in motif_pairs:
            pair = list(pair)
            pair.sort()

            idx1 = mtf2idx_dic[pair[0]]
            idx2 = mtf2idx_dic[pair[1]]

            pair_str = ",".join(pair)

            p_value = p_val_list[pv_idx]

            # Round p-values to 4 significant digits.
            p_value = benchlib.round_to_n_significant_digits_v2(p_value, 4)

            p_value_plotted = p_value

            avg_min_dist_str = pval_cont_lll[idx2][idx1][4]
            avg_min_dist = 10000
            if avg_min_dist_str != "-":
                avg_min_dist = float(avg_min_dist_str)

            # Get sequence motif consensus sequences.
            conseq1 = "-"
            if id2type_dic[pair[0]] == "meme_xml":
                conseq1 = "Consensus: " + motif2conseq_dic[pair[0]]
            elif id2type_dic[pair[0]] == "cm":
                conseq1 = "Structure motif"
            elif id2type_dic[pair[0]] == "regex":
                conseq1 = "Regex: " + regex
            else:
                assert False, "Invalid motif type: %s" %(id2type_dic[pair[0]])
            conseq2 = "-"
            if id2type_dic[pair[1]] == "meme_xml":
                conseq2 = "Consensus: " + motif2conseq_dic[pair[1]]
            elif id2type_dic[pair[1]] == "cm":
                conseq2 = "Structure motif"
            elif id2type_dic[pair[1]] == "regex":
                conseq2 = "Regex: " + regex
            else:
                assert False, "Invalid motif type: %s" %(id2type_dic[pair[1]])

            if p_value > cooc_pval_thr:
                p_value_plotted = 1.0
                pval_cont_lll[idx2][idx1][7] = "(Filter: p-value > %s)<br>" %(str(cooc_pval_thr))
            
            if p_value <= cooc_pval_thr and avg_min_dist < args.min_motif_dist:
                p_value_plotted = 1.0
                pval_cont_lll[idx2][idx1][7] = "(Filter: mean minimum motif distance < %i)<br>" %(args.min_motif_dist)

            if args.motif_sim_thr is not None:
                if id2type_dic[pair[0]] == "meme_xml" and id2type_dic[pair[1]] == "meme_xml":
                    assert pair_str in motif_pair2sim_dic, "No similarity score found for motif pair \"%s\"" %(pair_str)
                    sim_score = motif_pair2sim_dic[pair_str]
                    if sim_score > args.motif_sim_thr:
                        p_value_plotted = 1.0
                        pval_cont_lll[idx2][idx1][7] = "(Filter: motif similarity > %s)<br>" %(str(args.motif_sim_thr))

            c_all_fisher_pval += 1
            if p_value <= cooc_pval_thr and avg_min_dist >= args.min_motif_dist:
                c_sig_fisher_pval += 1

            pval_ll[idx1][idx2] = p_value_plotted
            pval_ll[idx2][idx1] = p_value_plotted
            pval_cont_lll[idx2][idx1][0] = str(p_value)
            pval_cont_lll[idx2][idx1][1] = str(p_value_plotted)
            pval_cont_lll[idx2][idx1][8] = conseq1 + "<br>"
            pval_cont_lll[idx2][idx1][9] = conseq2 + "<br>"

            con_pval_dic[pair_str] = p_value

            pv_idx += 1

            # Get stats for output.
            mtf1 = pair[0]
            mtf2 = pair[1]
            con_table = con_table_dic[pair_str]
            perc_close_hits = pval_cont_lll[idx2][idx1][5]

            COSOUT.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" %(mtf1, mtf2, str(con_table[0][0]), str(con_table[0][1]), str(con_table[1][0]), str(con_table[1][1]), str(p_value), avg_min_dist_str, perc_close_hits))

        COSOUT.close()

        assert c_all_fisher_pval == len(p_val_list), "Number of p-values (%i) does not match number of motif pairs (%i)" %(len(p_val_list), len(motif_pairs))
        # Percentage rounded to 2 digits.
        perc_sig_fisher_pval = 0.0
        if c_all_fisher_pval > 0:
            perc_sig_fisher_pval = round((c_sig_fisher_pval/c_all_fisher_pval)*100, 2)

        print("# of motif co-occurrence comparisons (calculated p-values in total): %i" %(c_all_fisher_pval))

        args.c_all_fisher_pval = c_all_fisher_pval
        args.c_sig_fisher_pval = c_sig_fisher_pval
        args.perc_sig_fisher_pval = perc_sig_fisher_pval

        min_motif_dist_info = ""
        if args.min_motif_dist > 0:
            min_motif_dist_info = " + --min-motif-dist >= %i" %(args.min_motif_dist)

        if args.cooc_pval_mode == 1:
            print("Number of significant co-occurrence p-values (BH corrected%s): %i (%.2f%%)" %(min_motif_dist_info, c_sig_fisher_pval, perc_sig_fisher_pval))
        elif args.cooc_pval_mode == 2:
            print("Number of significant co-occurrence p-values (Bonferroni corrected%s): %i (%.2f%%)" %(min_motif_dist_info, c_sig_fisher_pval, perc_sig_fisher_pval))
        elif args.cooc_pval_mode == 3:
            print("Number of significant co-occurrence p-values (no correction%s): %i (%.2f%%)" %(min_motif_dist_info, c_sig_fisher_pval, perc_sig_fisher_pval))

        """
        Print out RBPs sorted by KS p-value.

        """ 
        sorted_con_pval_dic = dict(sorted(con_pval_dic.items(), key=lambda item: item[1], reverse=False))

        print("Co-occurrence contingency table format: [A, B], [C, D]")
        print("A: Motif1 AND Motif2")
        print("B: NOT Motif1 AND Motif2")
        print("C: Motif1 AND NOT Motif2")
        print("D: NOT Motif1 AND NOT Motif2")

        if args.cooc_pval_mode == 1:
            print("Significance threshold: %s (on Benjamini-Hochberg corrected p-values)" %(str(cooc_pval_thr)))
        elif args.cooc_pval_mode == 2:
            print("Significance threshold: %s (Bonferroni corrected)" %(str(cooc_pval_thr)))
        elif args.cooc_pval_mode == 3:
            print("Significance threshold: %s" %(str(cooc_pval_thr)))
        else:
            assert False, "Invalid co-occurrence p-value mode (--cooc-pval-mode) set: %i" %(args.cooc_pval_mode)

        print("Co-occurrence of motifs (Motif IDs (Motif1,Motif2), contingency table, Fisher p-value)")

        c_reported = 0

        for pair_str, p_value in sorted_con_pval_dic.items():
            if p_value > cooc_pval_thr:
                break
            c_reported += 1
            mtf1 = pair_str_dic[pair_str][0]
            mtf2 = pair_str_dic[pair_str][1]
            con_table = con_table_dic[pair_str]
            print("%s\t%s\t%s" %(pair_str, str(con_table), str(p_value)))

        if not c_reported:
            print("NO SIGNIFICANT CO-OCCURRENCES FOUND!")

        print("")

        print("Calculate correlations ... ")

        df = DataFrame(reg_hits_dic, columns=sig_motif_ids_list)
        df_corr = df.corr(method='pearson')

        for i,mtf_i in enumerate(sig_motif_ids_list):
            for j,mtf_j in enumerate(sig_motif_ids_list):
                if j > i:
                    pval_ll[i][j] = None

        # Fisher p-value dataframe.
        df_pval = DataFrame(pval_ll, columns=sig_motif_ids_list, index=sig_motif_ids_list)

        # Write None to upper-diagonal entries.
        for i,mtf_i in enumerate(sig_motif_ids_list):
            for j,mtf_j in enumerate(sig_motif_ids_list):
                if j > i:
                    df_corr.loc[mtf_i, mtf_j] = None
                else:
                    # Round correlation values if != 1.0.
                    if df_corr.loc[mtf_i][mtf_j] == 1.0:
                        pval_cont_lll[i][j][6] = str(df_corr.loc[mtf_i][mtf_j])
                    else:
                        pval_cont_lll[i][j][6] = "{:.8f}".format(df_corr.loc[mtf_i][mtf_j])

        # Add NoneS to table.
        for i,mtf_i in enumerate(sig_motif_ids_list):
            for j,mtf_j in enumerate(sig_motif_ids_list):
                if j > i:
                    df_pval.loc[mtf_i, mtf_j] = None

        # -log10 p-value transformation.
        benchlib.log_tf_df(df_pval, convert_zero_pv=True, rbp_list=sig_motif_ids_list)



    """
    make html report.
    Check additional options?
    
    
class EnmoStats:

    def __init__(self,
                 motif_id: str,
                 rbp_id: str,
                 c_pos_hit_regions = 0,
                 c_neg_hit_regions = 0,
                 c_pos_regions = 0,
                 c_neg_regions = 0,
                 c_pos_hits = 0,
                 c_neg_hits = 0,
                 cont_table = "-",
                 fisher_pval = 1.0,
                 fisher_pval_corr = 1.0,
                 fisher_corr_mode = 1,  # 1: BH, 2: Bonferroni, 3: no correction
                 fisher_alt_hyp_mode = 1,  # Alternative hypothesis mode, 1: greater, 2: two-sided, 3: less
                 motif_type="meme_xml",
                 logo_png_file = False) -> None:

    
            rbp_stats = RBPStats(internal_id, cols[0], cols[1], cols[2], cols[3])
            rbp_stats.rbp_id = cols[4]
            rbp_stats.c_regions = int(cols[5])
            rbp_stats.mean_reg_len = float(cols[6])


            for other_motif_id, pair_c in sorted(pc_dic.items(), key=lambda item: item[1], reverse=True):

                if pair_c >= motif_min_pair_count:

                    # Plot motif (if sequence motif).
                    plot_str = "-"
                    if other_motif_id in seq_motif_blocks_dic:

                        other_rbp_id = id2name_dic[other_motif_id]
                        motif_plot = "%s.%s.png" %(other_rbp_id, other_motif_id)
                        motif_plot_out = plots_out_folder + "/" + motif_plot
                        plot_path = plots_folder + "/" + motif_plot

                        # Check if motif in motif database folder.
                        if motif_db_str:
                            db_motif_path = benchlib_path + "/content/%s_motif_plots/%s" %(motif_db_str, motif_plot)
                            if os.path.exists(db_motif_path):
                                shutil.copy(db_motif_path, motif_plot_out)

                        if not os.path.exists(motif_plot_out):
                            create_motif_plot(other_motif_id, seq_motif_blocks_dic,
                                              motif_plot_out)

                        plot_str = '<image src = "' + plot_path + '" width="300px"></image>'

                    # else:
                    #     print("Motif ID %s not in seq_motif_blocks_dic ... " %(other_motif_id))

                    # mdtext += "| %s | %s | %s | %i | %i | %i |\n" %(motif_id, other_motif_id, plot_str, pair_c, in_dic[other_motif_id], out_dic[other_motif_id])

                    mdtext += '<tr>' + "\n"
                    mdtext += "<td>" + motif_id + "</td>\n"
                    mdtext += "<td>" + other_motif_id + "</td>\n"
                    mdtext += "<td>" + plot_str + "</td>\n"
                    mdtext += "<td>" + str(pair_c) + "</td>\n"
                    mdtext += "<td>" + str(in_dic[other_motif_id]) + "</td>\n"
                    mdtext += "<td>" + str(out_dic[other_motif_id]) + "</td>\n"
                    mdtext += '</tr>' + "\n"


    Store for each RBP the regions with motif hits (and hit counts), using
    dictionary of dictionaries regions_with_motifs_dic.
    This tells us, how many input regions have motif hits, separated by RBP.
    Also store for each RBP the unique motif hit regions (and hit counts), using
    dictionary of dictionaries unique_motifs_dic.

    regions_with_motifs_dic:
        Dictionary of dictionaries, format:
        {rbp_id1 -> {'region1': motif_c_region1, 'region2': motif_c_region2}, rbp_id2 -> {'region1': motif_c_region1}}
    unique_motifs_dic:
        Dictionary of dictionaries, format:
        {rbp_id1 -> {'motif_region1': c_motif_region1, 'motif_region2': c_motif_region2}, rbp_id2 -> .. }

    """



    html_report_out = args.out_folder + "/" + "report.rbpbench_enmo.html"
    if args.plot_abs_paths:
        html_report_out = os.path.abspath(args.out_folder) + "/" + "report.rbpbench_enmo.html"
    # If HTML file already exists, remove it.
    if os.path.exists(html_report_out):
        os.remove(html_report_out)

    plots_subfolder = "html_report_plots"

    print("Create report ... ")
    benchlib.enmo_generate_html_report(args, motif_enrich_stats_dic, 
                                       seq_motif_blocks_dic,
                                       benchlib_path,
                                       df_pval=df_pval,
                                       pval_cont_lll=pval_cont_lll,
                                       motif_pair2sim_dic=motif_pair2sim_dic,
                                       pos_seqs_dic=pos_seqs_dic,
                                       neg_seqs_dic=neg_seqs_dic,
                                       pos_reg2annot_dic=pos_reg2annot_dic,
                                       neg_reg2annot_dic=neg_reg2annot_dic,
                                       annot2color_dic=annot2color_dic,
                                       rbpbench_mode="enmo",
                                       html_report_out=html_report_out,
                                       plots_subfolder=plots_subfolder)


    """
    Output parameter settings.

    """
    # Output mode settings.
    print("Output parameter settings ... ")
    SETOUT = open(settings_file, "w")
    for arg in vars(args):
        SETOUT.write("%s\t%s\n" %(arg, str(getattr(args, arg))))
    for call in call_dic:
        SETOUT.write("%s\t%s\n" %(call, call_dic[call]))
    SETOUT.close()

    """
    Inform about outputs.

    """
    print("")
    print("OUTPUT FILES")
    print("============")
    print("")
    print("Run parameter settings:\n%s" %(settings_file))
    print("Motif enrichment stats .tsv:\n%s" %(enmo_stats_out))
    print("Motif co-occurrence stats .tsv:\n%s" %(cooc_stats_out))

    print("Filtered input regions .bed:\n%s" %(filtered_sites_bed))
    print("Filtered input regions .fa:\n%s" %(filtered_sites_fa))
    print("Background regions .bed:\n%s" %(neg_seqs_bed))
    print("Background regions .fa:\n%s" %(neg_seqs_fa))

    if reg_annot_table_file:
        print("Input region annotations .tsv:\n%s" %(reg_annot_table_file))
    print("Motif enrichment report .html:\n%s" %(html_report_out))
    print("")


################################################################################

def main_nemo(args):
    """
    Run search for significantly enriched neighboring motifs.

    """

    print("Running for you in NEMO mode ... ")

    assert os.path.exists(args.in_sites), "--in file \"%s\" not found" % (args.in_sites)
    assert os.path.exists(args.in_genome), "--genome file \"%s\" not found" % (args.in_genome)
    assert os.path.exists(args.in_gtf), "--gtf file \"%s\" not found" % (args.in_gtf)

    # Check --in BED format.
    benchlib.bed_check_format(args.in_sites, param_str="--in")
    if args.bg_user_incl_bed:
        assert os.path.exists(args.bg_user_incl_bed), "--bg-incl-bed \"%s\" not found" % (args.bg_user_incl_bed)
        benchlib.bed_check_format(args.bg_user_incl_bed, param_str="--bg-incl-bed")
    if args.bg_mask_bed:
        assert os.path.exists(args.bg_mask_bed), "--bg-mask-bed \"%s\" not found" % (args.bg_mask_bed)
        benchlib.bed_check_format(args.bg_mask_bed, param_str="--bg-mask-bed")

    # Is MEME >= v5 installed?
    if not args.meme_disable_check:
        assert benchlib.is_tool("meme"), "meme not in PATH"
        check, meme_version = benchlib.check_tool_version("meme -version", "5.0")
        assert check, "RBPBench requires meme version >= 5.0 (installed version: %s)" %(meme_version)

    # Check if MEME version is >= 5.5.4 (need to add fimo --no-pgc option to produce same results!).
    fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text"
    if not args.meme_disable_check:
        check, meme_version = benchlib.check_tool_version("meme -version", "5.5.4")
        args.meme_version = meme_version
        if check:
            fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text --no-pgc"
    if args.meme_no_pgc:
        fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text --no-pgc"

    # Regex check.
    regex_type = "sequence"
    if args.regex:
        if args.regex_type == 1:
            print("Check given --regex type ... ")
            if benchlib.looks_like_structure(args.regex):
                print("Given --regex looks like structure pattern ... ")
                regex_type = "structure"
            else:
                print("Given --regex assumed to be standard regex ... ")
                regex_type = "sequence"
        elif args.regex_type == 2:
            regex_type = "sequence"
        elif args.regex_type == 3:
            regex_type = "structure"
        else:
            assert False, "unexpected --regex-type value set (%d)" %(args.regex_type)
        # If standard regex, check validity.
        if regex_type == "sequence":
            assert benchlib.is_valid_regex(args.regex), "given --regex \"%s\" is not a valid regular expression. Please provide valid expression" % (args.regex)

    # Fixed seed number?
    import random
    if args.random_seed is not None:
        random.seed(args.random_seed)

    # Limit inputs.
    assert benchlib.boundary_check(args.bg_shuff_factor, 1, 20), "set --bg-shuff-factor expected to be >= 1 and <= 20"
    assert benchlib.boundary_check(args.bg_shuff_k, 1, 5), "set --bg-shuff-k expected to be >= 1 and <= 5"
    assert benchlib.boundary_check(args.bg_min_size, 1, 200000), "set --bg-min-size expected to be >= 1 and <= 200000"
    if args.motif_sim_thr is not None:
        assert benchlib.boundary_check(args.motif_sim_thr, 0.0, 1000), "set --motif-sim-thr expected to be >= 0 and <= 1000"
    assert benchlib.boundary_check(args.motif_sim_cap, 0.5, 500), "set --motif-sim-cap expected to be >= 0.5 and <= 500"

    """
    Library path.
    """
    benchlib_path = os.path.dirname(benchlib.__file__)
    db_path = benchlib_path + "/content"

    """
    FIMO nt frequencies.

    """
    fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt.txt"
    if args.fimo_ntf_mode == 2:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt_with_introns.txt"
    elif args.fimo_ntf_mode == 3:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.uniform.txt"

    if args.fimo_user_ntf_file:
        fimo_freqs_file = args.fimo_user_ntf_file
    assert os.path.exists(fimo_freqs_file), "set FIMO nucleotide frequencies file \"%s\" not found" % (fimo_freqs_file)

    """
    ENCODE blacklist.

    https://github.com/Boyle-Lab/Blacklist/
    hg38-blacklist.v2.bed
    """
    blacklist_bed = db_path + "/hg38-blacklist.v2.6col.bed"

    """
    Motif database.
    """
    seq_motifs_db_file, str_motifs_db_file, rbp2ids_file, motif_db_str = specify_motif_db(args.motif_db, 
                                                                            db_path=db_path)

    motif_pair2sim_dic = {}
    print("Read in motif similarities ... ")
    tomtom_sim_file = get_local_tomtom_sim_file(args.motif_db, db_path=db_path)
    motif_pair2sim_dic = benchlib.read_in_tomtom_sim_results(tomtom_sim_file,
                                                             motif_sim_cap=args.motif_sim_cap)
    print("Read in %i motif similarities ... " %(len(motif_pair2sim_dic)))

    # Custom motif database: folder given.
    if args.custom_db:
        assert not args.custom_db_meme_xml, "--custom-db folder set incompatible with --custom-db-meme-xml. Provide custom motif database either via folder (--custom-db) or as single files"
        assert not args.custom_db_cm, "--custom-db folder set incompatible with --custom-db-cm. Provide custom motif database either via folder (--custom-db) or as single files"
        assert not args.custom_db_info, "--custom-db folder set incompatible with --custom-db-info. Provide custom motif database either via folder (--custom-db) or as single files"
        seq_motifs_db_file, str_motifs_db_file, rbp2ids_file = specify_custom_motif_db(args.custom_db)
        if os.path.exists(seq_motifs_db_file):
            print("Calculate motif similarities for supplied custom motif database ... ")
            motif_pair2sim_dic = benchlib.calc_tomtom_sim(seq_motifs_db_file, args.out_folder,
                                                          motif_sim_cap=args.motif_sim_cap)
        args.custom_db_id = benchlib.remove_special_chars_from_str(args.custom_db_id)
        assert args.custom_db_id, "empty string after removing special chars from --custom-db-id. Please provide alphanumeric string for custom motif database ID (- or _ are okay as well)"
        motif_db_str = args.custom_db_id
    # Custom motif database: single files given.
    if args.custom_db_meme_xml or args.custom_db_cm or args.custom_db_info:
        args.custom_db_id = benchlib.remove_special_chars_from_str(args.custom_db_id)
        assert args.custom_db_id, "empty string after removing special chars from --custom-db-id. Please provide alphanumeric string for custom motif database ID (- or _ are okay as well)"
        motif_db_str = args.custom_db_id
        assert not args.custom_db, "single custom motif database files provided not compatible with --custom-db. Provide custom motif database either via folder (--custom-db) or as single files"
        assert args.custom_db_info, "--custom-db-info needed to define custom motif database"
        rbp2ids_file = args.custom_db_info
        assert args.custom_db_meme_xml or args.custom_db_cm, "--custom-db-meme-xml and/or --custom-db-cm needed to define custom motif database"
        if args.custom_db_meme_xml:
            seq_motifs_db_file = args.custom_db_meme_xml
            if os.path.exists(seq_motifs_db_file):
                print("Calculate motif similarities for supplied custom motif database ... ")
                motif_pair2sim_dic = benchlib.calc_tomtom_sim(seq_motifs_db_file, args.out_folder,
                                                              motif_sim_cap=args.motif_sim_cap)
        else:
            seq_motifs_db_file = ""  # setting to empty string results in os.path.exists -> False.
        if args.custom_db_cm:
            str_motifs_db_file = args.custom_db_cm
        else:
            str_motifs_db_file = ""

    args.motif_db_str = motif_db_str

    """
    Get ID mappings

    name2ids_dic:
    RBP name -> motif IDs mapping
    id2type_dic:
    motif ID -> motif type (cm, meme_xml)
    id2org_dic (ignore for now):
    motif ID -> organism ID (so far only "human")

    """
    name2ids_dic, id2type_dic, name2gid_dic, name2fids_dic, id2pids_dic, id2exp_dic = benchlib.get_rbp_id_mappings(rbp2ids_file)

    # Motif ID -> RBP name (== RBP ID) mapping.
    id2name_dic = {}
    for rbp_id in name2ids_dic:
        for motif_id in name2ids_dic[rbp_id]:
            id2name_dic[motif_id] = rbp_id

    """
    Get MEME XML database motif blocks dictionary.

    """
    seq_motif_blocks_dic = {}
    if os.path.exists(seq_motifs_db_file):
        seq_motif_blocks_dic = benchlib.read_in_xml_motifs(seq_motifs_db_file,
                                                           empty_check=True)
    for motif_id in seq_motif_blocks_dic:
        assert motif_id in id2name_dic, "MEME XML motif ID \"%s\" not found in prior mapping. Please contact developers!" %(motif_id)

    motif2conseq_dic = {}
    if seq_motif_blocks_dic:
        print("Get consensus sequences for sequence motifs ... ")
        for motif_id in seq_motif_blocks_dic:
            consensus_seq = benchlib.get_consensus_motif_from_seq_block(seq_motif_blocks_dic[motif_id])
            motif2conseq_dic[motif_id] = consensus_seq

    """
    Get covariance model database motif blocks dictionary.

    """
    str_motif_blocks_dic = {}
    if str_motifs_db_file and os.path.exists(str_motifs_db_file):
        str_motif_blocks_dic = benchlib.read_in_cm_blocks(str_motifs_db_file, 
                                                          empty_check=True)
        for motif_id in str_motif_blocks_dic:
            assert motif_id in id2name_dic, "Covariance model accession ID \"%s\" not found in prior mapping. Please contact developers!" %(motif_id)

    """
    Output folders + files.

    """
    if not os.path.exists(args.out_folder):
        os.makedirs(args.out_folder)

    f1_sites_bed = args.out_folder + "/in_sites.filtered.tmp.bed"
    filtered_sites_bed = args.out_folder + "/in_sites.filtered.bed"
    filtered_sites_fa = args.out_folder + "/in_sites.filtered.fa"
    seq_motifs_xml = args.out_folder + "/seq_motifs.xml"
    str_motifs_cm = args.out_folder + "/str_motifs.cm"
    fimo_res_tsv = args.out_folder + "/fimo_results.tsv"
    cmsearch_res_txt = args.out_folder + "/cmsearch_results.txt"

    chr_len_file = args.out_folder + "/" + "reference_lengths.out"
    mrna_regions_bed = args.out_folder + "/mrna_regions.bed"

    tr_seqs_fa = args.out_folder + "/transcript_sequences.fa"
    bg_excl_bed = args.out_folder + "/background_excluded_regions.bed"
    bg_incl_bed = args.out_folder + "/background_included_regions.bed"
    bg_shuffle_in_bed = args.out_folder + "/" + "background_shuffle_in.bed"

    settings_file = args.out_folder + "/settings.rbpbench_nemo.out"

    # Motif enrichment stats table.
    nemo_stats_out = args.out_folder + "/motif_enrichment_stats.tsv"
    # Motif co-ooccurrence stats table.
    cooc_stats_out = args.out_folder + "/motif_cooc_stats.tsv"

    cmstat_tmp_out = args.out_folder + "/cmstat_out.tmp.txt"

    # Delete if existing folder.
    if os.path.exists(fimo_res_tsv):
        os.remove(fimo_res_tsv)
    if os.path.exists(cmsearch_res_txt):
        os.remove(cmsearch_res_txt)


    """
    Load RBP data based on --rbps (+ optionally USER data).

    """

    rbp_in_dic = {}
    for rbp_id in args.list_rbps:
        rbp_in_dic[rbp_id] = 1

    # RBPs for motif search.
    loaded_rbps_dic = {}

    # USER set?
    user_motifs = False
    user_rbp_id = False
    if "USER" in rbp_in_dic:
        user_motifs = True
    else:
        assert not args.user_meme_xml, "--user-meme-xml provided but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"
        assert not args.user_cm, "--user-cm provided but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"
        assert not args.user_rbp_id, "--user-rbp-id set but --rbps USER not set. Please add USER to --rbps list to search for user-supplied motifs"

    special_rbp_ids_list = ["USER", "REGEX"]

    # If ALL set, load all RBPs (+ optinally USER).
    if "ALL" in rbp_in_dic:
        if len(rbp_in_dic) == 2:
            assert user_motifs, "set --rbps ALL, --rbps ALL USER, or individual RBPs (+ optinally USER) --rbps RBP1 RBP2 ... USER"
        if len(rbp_in_dic) > 2:
            assert False, "set --rbps ALL, --rbps ALL USER, or individual RBPs (+ optinally USER) --rbps RBP1 RBP2 ... USER"
        print("--rbps ALL selected. Loading all database motifs ... ")
        for rbp_id in name2ids_dic:
            loaded_rbps_dic[rbp_id] = motif_db_str

    else:
        # Load individual RBPs.
        for rbp_id in rbp_in_dic:
            if rbp_id not in special_rbp_ids_list:
                """
                Check if RBP ID in database.
                Suggest similar RBPs based on string similarity (edit distance).

                """
                if rbp_id not in name2ids_dic:
                    db_rbp_list = []
                    for db_rbp_id in name2ids_dic:
                        db_rbp_list.append(db_rbp_id)
                    pair_dist_dic = benchlib.calc_edit_dist_query_list(rbp_id, db_rbp_list)
                    max_c = 10
                    c = 0
                    suggested_rbps = []
                    for key, value in sorted(pair_dist_dic.items(), key=lambda item: item[1], reverse=False):
                        if c >= max_c:
                            break
                        c += 1
                        suggested_rbps.append(key)
                    suggested_rbps_str = ",".join(suggested_rbps)
                    assert False, "provided --rbps ID %s not in internal motif database (%s). Please provide RBP name present in database. Did you mean (any of) the following database ID(s) (top 10 hits based on string similarity): %s ?" %(rbp_id, motif_db_str, suggested_rbps_str)
                # assert rbp_id in name2ids_dic, "provided --rbps ID %s not in internal motif database. Please provide RBP name present in database" %(rbp_id)
                loaded_rbps_dic[rbp_id] = motif_db_str

    # Motif IDs for search.
    loaded_motif_ids_dic = {}
    for rbp_id in loaded_rbps_dic:
        for motif_id in name2ids_dic[rbp_id]:
            loaded_motif_ids_dic[motif_id] = motif_db_str

    """
    Check and load provided USER data.

    """

    if user_motifs:
        print("--rbps USER selected. Check + load provided USER motifs ... ")
        assert args.user_rbp_id, "--rbps USER demands --user-rbp-id to be set to connect the supplied motif(s) with an RBP ID"
        assert args.user_meme_xml or args.user_cm, "--rbps USER requires a provided sequence or structure motif file (via --user-meme-xml AND/OR --user-cm)"

        # Reformat user_rbp_id. 
        user_rbp_id = benchlib.remove_special_chars_from_str(args.user_rbp_id)
        assert user_rbp_id, "empty string after removing special chars from --user-rbp-id. Please provide alphanumeric string for RBP ID (- or _ are okay as well)"

        assert user_rbp_id not in loaded_rbps_dic, "user RBP ID %s already selected from database. Please deselect respective database RBP ID or provide unique user RBP ID via --user-rbp-id" %(user_rbp_id)
        loaded_rbps_dic[user_rbp_id] = "user"
        # In case user_rbp_id in database, reset motif IDs associated to user_rbp_id.
        name2ids_dic[user_rbp_id] = []
        print("RBP ID for user-supplied motifs:", user_rbp_id)

        user_seq_motif_blocks_dic = {}
        if args.user_meme_xml:
            assert os.path.exists(args.user_meme_xml), "--user-meme-xml file \"%s\" not found" % (args.user_meme_xml)
            user_seq_motif_blocks_dic = benchlib.read_in_xml_motifs(args.user_meme_xml, empty_check=False)
            assert user_seq_motif_blocks_dic, "no motifs read in from provided --user-meme-xml. Make sure to supply sequence motifs in MEME XML format!"
            # Check if motif ID already loaded.
            for acc_id in user_seq_motif_blocks_dic:
                assert acc_id not in loaded_motif_ids_dic, "user-supplied MEME XML motif ID %s already selected, i.e., ID is already associated with selected database RBP %s. Please change user motif ID to a unique motif ID or deselect the respective RBP" %(acc_id, id2name_dic[acc_id])
                loaded_motif_ids_dic[acc_id] = "user"
                # Add user to database blocks (overwrite if same ID encountered).
                seq_motif_blocks_dic[acc_id] = user_seq_motif_blocks_dic[acc_id]
                name2ids_dic[user_rbp_id].append(acc_id)
                id2type_dic[acc_id] = "meme_xml"
                id2name_dic[acc_id] = user_rbp_id

        user_str_motif_blocks_dic = {}
        if args.user_cm:
            assert os.path.exists(args.user_cm), "--user-cm file \"%s\" not found" % (args.user_cm)
            # Check for valid format.
            acc_ids_dic = benchlib.check_cm_file(args.user_cm, cmstat_tmp_out, empty_check=False)
            # Read in covariance model blocks.
            user_str_motif_blocks_dic = benchlib.read_in_cm_blocks(args.user_cm)
            for acc_id in acc_ids_dic:
                assert acc_id in user_str_motif_blocks_dic, "accession ID %s not in blocks dictionary. Please contact developers!" %(acc_id)
            # Check if motif ID already loaded.
            for acc_id in user_str_motif_blocks_dic:
                assert acc_id not in loaded_motif_ids_dic, "user-supplied covariance model accession (ACC) ID %s already selected, i.e., ID is already associated with selected database RBP %s. Please change to a unique accession ID or deselect the respective RBP" %(acc_id, id2name_dic[acc_id])
                loaded_motif_ids_dic[acc_id] = "user"
                # Add user to database blocks (overwrite if same ID encountered).
                str_motif_blocks_dic[acc_id] = user_str_motif_blocks_dic[acc_id]
                name2ids_dic[user_rbp_id].append(acc_id)
                id2type_dic[acc_id] = "cm"
                id2name_dic[acc_id] = user_rbp_id


    """
    Get sequence motif lengths.

    """

    id2len_dic = benchlib.get_seq_motif_lengths(seq_motif_blocks_dic)

    """
    Optionally filter DREME/MEME sequence motifs by length.

    """

    if args.motif_min_len or args.motif_max_len:

        if args.motif_min_len and args.motif_max_len:
            assert args.motif_min_len <= args.motif_max_len, "set --motif-min-len needs to be <= --motif-max-len!"

        print("Filtering sequence motifs by set min/max lengths ... ")

        seq_motif_blocks_dic, loaded_rbps_dic, loaded_motif_ids_dic, name2ids_dic, c_flt_out = benchlib.filter_dic_by_motif_lengths(
                seq_motif_blocks_dic, str_motif_blocks_dic, loaded_rbps_dic, loaded_motif_ids_dic, id2name_dic,
                id2len_dic, motif_min_len=args.motif_min_len, motif_max_len=args.motif_max_len
            )

        assert loaded_rbps_dic, "no MEME/DREME sequence motifs left after length filtering. Please adjust length filter range (--motif-min-len, --motif-max-len), RBP selection, or disable length filtering!"

        print("Filtered out %d sequence motifs outside set length range" %(c_flt_out))


    """
    Check if loaded RBP IDs have motifs.

    """
    for rbp_id in loaded_rbps_dic:
        for motif_id in name2ids_dic[rbp_id]:
            found = 0
            if motif_id in seq_motif_blocks_dic:
                found += 1
            if motif_id in str_motif_blocks_dic:
                found += 1
            assert found, "no motifs loaded for RBP ID \"%s\". Please provide the respective motifs file" %(rbp_id)


    """
    If --regex is set:
    Treat regex as sequence motif / fimo type.
    rbp_id: regex, motif_id: regex, motif_db: regex

    """

    regex_id = args.regex_id
    regex = args.regex
    
    if args.regex:

        if regex_type == "sequence":

            # Remove , ; from given regex, to avoid motif_id format conflicts.
            regex = benchlib.remove_special_chars_from_str(args.regex,
                                                        reg_ex="[ ;]",
                                                        to_upper=False)  # [ :;\(\)]
            
            assert regex, "empty string after removing special chars ( ;) from --regex. Please provide a valid regex with DNA letters"

            # Convert IUPAC codes (if present) in regex to standard regex format.
            regex = benchlib.convert_iupac_in_regex(regex)

        elif regex_type == "structure":

            # Check structure pattern and format.
            regex = benchlib.check_format_str_pattern(args.regex)

        args.regex = regex

        regex_id = benchlib.remove_special_chars_from_str(args.regex_id)

        assert regex_id, "empty string after removing special chars from --regex-id. Please provide alphanumeric string for regex ID (- or _ are okay as well)"
        assert regex_id not in name2ids_dic, "--regex set but a different RBP ID with name \"%s\" was found. Please provide a different RBP ID or --regex-id" %(regex_id)

        args.regex_id = regex_id

        if args.motif_regex_id:
            assert regex_id not in id2type_dic, "--regex set but a different motif ID with name \"%s\" was found. Please provide a different motif ID or --regex-id" %(regex_id)

            id2name_dic[regex_id] = regex_id    # motif_id -> rbp_id
            id2type_dic[regex_id] = "regex"  # motif_id -> motif type string; motif type string can be regex, cm, meme_xml
            loaded_motif_ids_dic[regex_id] = regex_id  # motif_id -> motif_db_str
            name2ids_dic[regex_id] = [regex_id]  # rbp_id -> motif_ids

        else:
            assert regex not in id2type_dic, "--regex set but a different motif ID with name \"%s\" was found. Please provide a different motif ID or use --motif-regex-id" %(regex_id)

            id2name_dic[regex] = regex_id    # motif_id -> rbp_id
            id2type_dic[regex] = "regex"  # motif_id -> motif type string; motif type string can be regex, cm, meme_xml
            loaded_motif_ids_dic[regex] = regex_id  # motif_id -> motif_db_str
            name2ids_dic[regex_id] = [regex]  # rbp_id -> motif_ids

        loaded_rbps_dic[regex_id] = regex_id  # rbp_id -> motif_db_str

        """
        Dictionaries that use motif_id as keys.
        loaded_motif_ids_dic
        name2ids_dic
        id2name_dic
        id2type_dic
        """
    else:
        regex_id = False


    """
    Get function ID -> function descriptions mapping.

    """
    fid2desc_file = db_path + "/rbp_function_list.32728246.tsv"
    fid2desc_dic, desc2fid_dic = benchlib.get_fid2desc_mapping(fid2desc_file)


    """
    If --motifs sets, filter loaded_rbps_dic + loaded_motif_ids_dic.
    
    """

    if args.motifs_list:

        print("Filtering loaded motifs by provided --motifs ... ")

        motif_fids_dic = {}

        for motif_id in args.motifs_list:
            motif_fids_dic[motif_id] = 1

        filtered_rbps_dic = {}
        filtered_motif_ids_dic = {}
        filtered_name2ids_dic = {}
        for motif_id in motif_fids_dic:
            assert motif_id in id2name_dic, "motif ID \"%s\" provided via --motifs not found in internal motif ID -> RBP ID mapping. Please provide valid motif IDs" %(motif_id)
            assert motif_id in loaded_motif_ids_dic, "motif ID \"%s\" provided via --motifs not found in loaded motifs (via --rbps). Please provide motif IDs that are part of loaded RBPs" %(motif_id)
            set_db_str = loaded_motif_ids_dic[motif_id]
            filtered_motif_ids_dic[motif_id] = set_db_str
            rbp_id = id2name_dic[motif_id]
            assert rbp_id in loaded_rbps_dic, "motif ID \"%s\" provided via --motifs not found in loaded RBPs (via --rbps). Please provide motif IDs that are part of loaded RBPs" %(motif_id)
            set_db_str = loaded_rbps_dic[rbp_id]
            filtered_rbps_dic[rbp_id] = set_db_str
            if rbp_id in filtered_name2ids_dic:
                filtered_name2ids_dic[rbp_id].append(motif_id)
            else:
                filtered_name2ids_dic[rbp_id] = [motif_id]

        if args.regex:
            filtered_rbps_dic[regex_id] = regex_id
            if args.motif_regex_id:
                filtered_motif_ids_dic[regex_id] = regex_id
                filtered_name2ids_dic[regex_id] = [regex_id]
            else:
                filtered_motif_ids_dic[regex] = regex_id
                filtered_name2ids_dic[regex_id] = [regex]

        c_loaded_rbps_pre = len(loaded_rbps_dic)
        c_loaded_rbps_post = len(filtered_rbps_dic)
        loaded_rbps_dic = filtered_rbps_dic

        print("# RBPs pre-filtering by --motifs: ", c_loaded_rbps_pre)
        print("# RBPs post-filtering by --motifs:", c_loaded_rbps_post)

        assert loaded_rbps_dic, "no remaining RBPs after filtering by provided motif IDs (via --motifs). Please provide compatible RBPs + motif IDs"

        c_loaded_motif_ids_pre = len(loaded_motif_ids_dic)
        c_loaded_motif_ids_post = len(filtered_motif_ids_dic)
        loaded_motif_ids_dic = filtered_motif_ids_dic

        print("# motif IDs pre-filtering by --motifs: ", c_loaded_motif_ids_pre)
        print("# motif IDs post-filtering by --motifs:", c_loaded_motif_ids_post)

        assert loaded_motif_ids_dic, "no remaining motifs after filtering by provided motif IDs (via --motifs). Please provide compatible RBPs + motif IDs"

        name2ids_dic = filtered_name2ids_dic


    """
    If --functions set, filter loaded_rbps_dic + loaded_motif_ids_dic.

    """
    rbp_fids_dic = {}
    if args.rbp_functions:

        print("Filtering loaded RBPs by provided function IDs ... ")

        for fid in args.rbp_functions:
            rbp_fids_dic[fid] = 1

        # Check if provided function IDs are valid.
        for fid in rbp_fids_dic:
            assert fid in fid2desc_dic, "function ID \"%s\" provided via --functions not found in internal function ID -> description mapping. Please provide valid function IDs (see rbpbench info for a detailed description)" %(fid)

        # Filter loaded_rbps_dic.
        filtered_rbps_dic = {}
        for rbp_id in loaded_rbps_dic:
            if rbp_id == regex_id:  # keep regex if set.
                filtered_rbps_dic[regex_id] = regex_id
                continue
            if rbp_id not in name2fids_dic:
                continue
            for fid in rbp_fids_dic:
                if fid in name2fids_dic[rbp_id]:
                    set_db_str = loaded_rbps_dic[rbp_id]
                    filtered_rbps_dic[rbp_id] = set_db_str
                    break

        c_loaded_rbps_pre = len(loaded_rbps_dic)
        c_loaded_rbps_post = len(filtered_rbps_dic)
        loaded_rbps_dic = filtered_rbps_dic

        print("# RBPs pre-filtering by functions: ", c_loaded_rbps_pre)
        print("# RBPs post-filtering by functions:", c_loaded_rbps_post)

        assert loaded_rbps_dic, "no remaining RBPs after filtering by provided function IDs. Please provide compatible RBPs + function IDs (see rbpbench info for annotated RBP functions)"

        # Filter loaded motif IDs.
        filtered_motif_ids_dic = {}
        for rbp_id in loaded_rbps_dic:
            if rbp_id == regex_id:
                if args.motif_regex_id:
                    filtered_motif_ids_dic[regex_id] = regex_id
                else:
                    filtered_motif_ids_dic[regex] = regex_id
            else:
                for motif_id in name2ids_dic[rbp_id]:
                    set_db_str = loaded_motif_ids_dic[motif_id]
                    filtered_motif_ids_dic[motif_id] = set_db_str
        
        c_loaded_motif_ids_pre = len(loaded_motif_ids_dic)
        c_loaded_motif_ids_post = len(filtered_motif_ids_dic)
        loaded_motif_ids_dic = filtered_motif_ids_dic

        print("# motif IDs pre-filtering by functions: ", c_loaded_motif_ids_pre)
        print("# motif IDs post-filtering by functions:", c_loaded_motif_ids_post)



    """
    Load RBP data, store in RBP() class.

    """

    # Store motif IDs for search.
    search_rbps_dic = {}
    seq_rbps_dic = {}
    str_rbps_dic = {}
    motif_id2idx_dic = {} # motif ID -> list index.
    args.internal_id = []

    for rbp_id in loaded_rbps_dic:
    
        internal_id = base64.urlsafe_b64encode(os.urandom(6)).decode()
        args.internal_id.append(internal_id)
        rbp = benchlib.RBP(rbp_id, internal_id)

        for motif_id in name2ids_dic[rbp_id]:
        
            assert motif_id in loaded_motif_ids_dic, "motif_id %s not in loaded_motif_ids_dic" %(motif_id)

            if id2type_dic[motif_id] == "meme_xml":
                rbp.seq_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.seq_motif_ids) - 1
                rbp.seq_motif_hits.append(0)
                seq_rbps_dic[rbp_id] = 1
            elif id2type_dic[motif_id] == "regex":
                rbp.seq_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.seq_motif_ids) - 1
                rbp.seq_motif_hits.append(0)
                seq_rbps_dic[rbp_id] = 1
            elif id2type_dic[motif_id] == "cm":
                rbp.str_motif_ids.append(motif_id)
                motif_id2idx_dic[motif_id] = len(rbp.str_motif_ids) - 1
                rbp.str_motif_hits.append(0)
                str_rbps_dic[rbp_id] = 1
            else:
                assert False, "unknown motif type (\"%s\") set for motif_id %s" %(id2type_dic[motif_id], motif_id)

        search_rbps_dic[rbp_id] = rbp

    print("# of RBP IDs for search:    ", len(loaded_rbps_dic))
    print("# of motif IDs for search:  ", len(loaded_motif_ids_dic))


    """
    Get chromosome IDs + sizes from --genome.
    """

    # Get chromosome sizes.
    chr_ids_dic = {}
    chr_len_dic = {}
    print("Get --genome chromosome IDs + sizes ... ")
    benchlib.genome_fasta_get_chr_sizes_file(args.in_genome, chr_len_file,
                                             check_ids=True,
                                             seq_len_dic=chr_len_dic)
    for chr_id in chr_len_dic:
        chr_ids_dic[chr_id] = 1

    """
    Guess chromosome ID style.

    chr_style:
        1: chr1, chr2, ..., chrX, chrM
        2: 1, 2, ... , X, MT

    """
    print("Guess chromosome ID style (based on --genome FASTA headers) ... ")
    chr_style = benchlib.guess_chr_id_style(chr_ids_dic)

    """
    Get chromosome or transcript IDs from --in sites (column 1).
    +
    Determine type of input sites (genomic or transcript sites).

    """

    chr_ids_in_dic = benchlib.bed_read_chr_ids_dic(args.in_sites)

    assert chr_ids_in_dic, "--in sites BED seems to be empty (no column 1 chromosome IDs read in). Make sure to provide valid BED file"

    genomic_sites_input = False
    for chr_id in chr_ids_in_dic:
        if chr_id in chr_ids_dic:
            genomic_sites_input = True
        break

    args.genomic_sites_input = genomic_sites_input

    if genomic_sites_input:
        print("--in site seem to be genomic sites. Checking ... ")
        for chr_id in chr_ids_in_dic:
            assert chr_id in chr_ids_dic, "chromosome ID %s from --in sites BED file not found in --genome FASTA file. Please provide compatible --in and --genome files"

    """
    Read in gene infos from --gtf.

    """

    print("Read in gene features from --gtf ... ")
    tr2gid_dic = {}
    gid2gio_dic = benchlib.gtf_read_in_gene_infos(args.in_gtf,
                                                  tr2gid_dic=tr2gid_dic,
                                                  check_chr_ids_dic=chr_ids_dic,
                                                  chr_style=chr_style,
                                                  empty_check=False)
    assert gid2gio_dic, "no gene infos read in from --gtf. Please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE)"
    c_gene_infos = len(gid2gio_dic)
    print("# gene features read in from --gtf:", c_gene_infos)

    if not genomic_sites_input:
        print("--in site seem to be transcript sites. Checking ... ")
        for tr_id in chr_ids_in_dic:
            assert tr_id in tr2gid_dic, "transcript ID %s from --in sites BED file not found in --gtf file. Please provide compatible --in and --gtf files" %(tr_id)

    # Get most prominent transcripts or if --tr-list is set, read in transcript IDs.
    tr_ids_dic = {}
    if args.tr_list:
        assert os.path.exists(args.tr_list), "given --tr-list file \"%s\" not found" % (args.tr_list)
        tr_ids_dic = benchlib.read_ids_into_dic(args.tr_list,
                                                check_dic=False)
        assert tr_ids_dic, "no IDs read in from provided --tr-list file. Please provide a valid IDs file (one ID per row)"
        for tr_id in tr_ids_dic:
            assert tr_id in tr2gid_dic, "transcript ID \"%s\" from provided --tr-list file does not appear in --gtf file. Please provide compatible IDs + files" %(tr_id)
            tr_ids_dic[tr_id] = tr2gid_dic[tr_id]
        print("# of transcript IDs (read in from --tr-list): ", len(tr_ids_dic))
    else:
        # Get most prominent transcripts from gene infos.
        print("Select most prominent transcript (MPT) for each gene ... ")
        tr_ids_dic = benchlib.select_mpts_from_gene_infos(gid2gio_dic,
                                basic_tag=False,  # do not be strict (only_tsl=False too).
                                ensembl_canonical_tag=False,
                                prior_basic_tag=True,  # Prioritize basic tag transcript.
                                prior_mane_select=True,  # mane select if set trumps all.
                                prior_lncrna_primary_tag=True,
                                only_tsl=False)
        assert tr_ids_dic, "most prominent transcript selection from gene infos failed. Please contact developers"
        print("# of transcript IDs (most prominent transcripts): ", len(tr_ids_dic))

    # Add transcript IDs from --in sites to tr_ids_dic.
    if not genomic_sites_input:
        for tr_id in chr_ids_in_dic:
            tr_ids_dic[tr_id] = 1

    # Check exon order (return True if minus strand exon 1 is most downstream, not most upstream, which is the correct way).
    print("Check minus-strand exon order in --gtf ... ")
    correct_min_ex_order = benchlib.gtf_check_exon_order(args.in_gtf)
    if correct_min_ex_order:
        print("Correct order encountered ... ")
    else:
        print("Reverse order encountered ... ")

    # Get transcript infos.
    print("Read in transcript infos from --gtf ... ")
    tid2tio_dic = benchlib.gtf_read_in_transcript_infos(args.in_gtf,
                                                        tr_ids_dic=tr_ids_dic,
                                                        correct_min_ex_order=correct_min_ex_order,
                                                        chr_style=chr_style,
                                                        empty_check=False)

    assert tid2tio_dic, "no transcript infos read in from --gtf. Please contact developers"

    # Calculate intron lengths.
    total_tr_intron_len = 0
    total_tr_exon_len = 0

    # Get transcript lengths.
    tr_len_dic = {}

    for tid in tid2tio_dic:
        tr_len_dic[tid] = tid2tio_dic[tid].tr_length
        total_tr_intron_len += tid2tio_dic[tid].total_intron_len
        total_tr_exon_len += tid2tio_dic[tid].tr_length

    total_tr_len = total_tr_intron_len + total_tr_exon_len

    total_tr_intron_ratio = round(total_tr_intron_len / total_tr_len, 6)
    total_tr_exon_ratio = round(total_tr_exon_len / total_tr_len, 6)

    print("Total transcript intron length:", total_tr_intron_len)
    print("Total transcript exon length:  ", total_tr_exon_len)
    print("Total transcript intron ratio: ", total_tr_intron_ratio)


    """
    Filter / extend --in regions BED file.

    """

    # Process extension info.
    ext_parts = args.ext_up_down.split(",")
    c_ext_parts = len(ext_parts)
    ext_up = 0
    ext_down = 0
    if c_ext_parts == 1:
        ext_up = int(ext_parts[0])
        ext_down = int(ext_parts[0])
    elif c_ext_parts == 2:
        ext_up = int(ext_parts[0])
        ext_down = int(ext_parts[1]) 
    else:
        assert False, "invalid --ext argument provided (correct format: --ext 10 OR --ext 20,10)"

    max_ext = max(ext_up, ext_down)
    assert max_ext > 0, "--ext <= 0. Please provide a positive value to define context size to check for neighboring motifs"  

    print("--in extension upstream == %i, downstream == %i ... " %(ext_up, ext_down))

    args.ext_up = ext_up
    args.ext_down = ext_down

    # Filter / extend --in BED.
    print("Preprocess --in sites ... ")

    # Store original (ie before extending) region info (chr_id, start, end, strand) for each region ID.
    core_reg_dic = {}

    feat_len_dic = {}
    if genomic_sites_input:
        feat_len_dic = chr_len_dic
    else:
        feat_len_dic = tr_len_dic

    reg_stats_dic = benchlib.bed_filter_extend_bed(args.in_sites, f1_sites_bed,
                                                   ext_up=ext_up,
                                                   ext_down=ext_down,
                                                   remove_dupl=True,
                                                   score_col=args.bed_score_col,
                                                   score_thr=args.bed_sc_thr,
                                                   new_reg_ids=True,
                                                   core_reg_id="pos",
                                                   core_reg_dic=core_reg_dic,
                                                   score_rev_filter=args.bed_sc_thr_rev_filter,
                                                   chr_len_dic=feat_len_dic,
                                                   use_region_ids=False)

    print("# --in regions pre-filtering:  ", reg_stats_dic["c_in"])
    print("# --in regions post-filtering: ", reg_stats_dic["c_out"])
    print("# regions with invalid chr_id: ", reg_stats_dic["c_chr_filter"])
    print("# duplicated regions removed:  ", reg_stats_dic["c_dupl_filter"])
    print("# regions filtered by score:   ", reg_stats_dic["c_sc_thr"])

    assert reg_stats_dic["c_out"], "no --in BED sites remain after chromosome ID (or optionally score) filtering. If caused by invalid chr_id filtering, make sure chromosome IDs in --genome FASTA and --in BED files are compatible (i.e., \"chr1\" vs. \"1\" notation). If --in regions are on transcripts, use rbpbench searchrna"

    # Check if all core regions have same length.
    core_reg_len_list = []
    for reg_id in core_reg_dic:
        core_reg_len_list.append(core_reg_dic[reg_id][2] - core_reg_dic[reg_id][1])
    core_reg_len_set = set(core_reg_len_list)
    if len(core_reg_len_set) != 1:
        print("WARNING: --in sites differ in length. For optimal results provide same length sites (e.g. motif hit sites or single positions)")
    max_core_len = max(core_reg_len_set)

    # Define motif hit context plot length.
    from math import ceil
    max_dist_plot_range = max_ext + ceil(max_core_len/2)
    dist_plot_range = range(-max_dist_plot_range, max_dist_plot_range+1)

    print("Maximum --in site length: ", max_core_len)
    print("Motif distance plot range:", max_dist_plot_range)


    fasta_index_file = tr_seqs_fa + ".fai"
    if os.path.exists(fasta_index_file):
        os.remove(fasta_index_file)

    pos_seqs_dic = {}
    tr_seqs_dic = {}

    if genomic_sites_input:

        print("Extract --in sequences from --genome ... ")
        benchlib.bed_extract_sequences_from_fasta(f1_sites_bed, 
                                                args.in_genome, filtered_sites_fa,
                                                add_param="-name",
                                                print_warnings=True)

        print("Read in sequences ... ")
        pos_seqs_dic = benchlib.read_fasta_into_dic(filtered_sites_fa,
                                        dna=True,
                                        all_uc=True,
                                        id_check=True,
                                        name_bed=True,
                                        empty_check=False,
                                        skip_n_seqs=False)

        # Filter BED file to discard regions for which no sequence was extracted.
        benchlib.bed_filter_by_seqs_dic(pos_seqs_dic, f1_sites_bed, filtered_sites_bed,
                                        use_col4_id=True)

    else:

        # Get transcript sequences.
        print("Extract transcript sequences ... ")
        tr_seqs_dic = benchlib.get_transcript_sequences_from_gtf(tid2tio_dic, args.in_genome,
                                                                 tr_ids_dic=tr_ids_dic,
                                                                 tmp_out_folder=args.out_folder)

        print("Output transcript sequences to FASTA ... ")
        benchlib.fasta_output_dic(tr_seqs_dic, tr_seqs_fa,
                                  split=True)

        print("Extract site sequences from transcript sequences ... ")
        benchlib.bed_extract_sequences_from_fasta(f1_sites_bed, 
                                                  tr_seqs_fa, filtered_sites_fa,
                                                  add_param="-name",
                                                  print_warnings=True)

        print("Read in sequences ... ")
        pos_seqs_dic = benchlib.read_fasta_into_dic(filtered_sites_fa,
                                        dna=True,
                                        all_uc=True,
                                        id_check=True,
                                        name_bed=True,
                                        empty_check=False,
                                        skip_n_seqs=False)

        # Filter BED file to discard regions for which no sequence was extracted.
        benchlib.bed_filter_by_seqs_dic(pos_seqs_dic, f1_sites_bed, filtered_sites_bed,
                                        use_col4_id=True)

        # # Transcript sequence lengths.
        # tr_seq_len_dic = {}
        # for tr_id in tr_seqs_dic:
        #     tr_seq_len_dic[tr_id] = len(tr_seqs_dic[tr_id])


    """
    Overwrite filtered_sites_fa, since it still contains header IDs with format:
    pos1::chr22:49824553-49824749(-)

    """

    benchlib.fasta_output_dic(pos_seqs_dic, filtered_sites_fa,
                              split=True)

    """
    Store positive region ID -> region string (format: chr1:100-200(+)) mapping.
    
    Note that region lengths should be identical to extracted sequence lengths, because:
    bed_filter_extend_bed gets chr_len_dic argument, so specified regions to extract sequences 
    for cannot extend over provided FASTA sequences in --genome.

    """

    pos_reg_dic, pos_len_dic = benchlib.bed_get_region_str_len_dic(filtered_sites_bed)
    assert pos_reg_dic, "no positive region ID -> region string mappings read in from filtered sites BED file. Please contact developers"
    for reg_id in pos_reg_dic:
        assert reg_id in pos_seqs_dic, "region ID %s not found in pos_seqs_dic. Please contact developers" %(reg_id)
        assert pos_len_dic[reg_id] == len(pos_seqs_dic[reg_id]), "region ID %s sequence length mismatch between BED and FASTA. Please contact developers" %(reg_id)
    for reg_id in pos_seqs_dic:
        assert reg_id in pos_reg_dic, "region ID %s not found in pos_reg_dic. Please contact developers" %(reg_id)

    # Get relative coordinates of core motif site region.
    core_rel_reg_dic = benchlib.bed_get_core_rel_reg_dic(core_reg_dic, filtered_sites_bed)

    """
    Define regions to exclude (i.e. no background sites sampled from these regions).
    
    """

    # Collect masking files and merge them to BED.
    print("Collect regions for masking (== no negatives from these regions) ... ")
    mask_files_list = []
    mask_files_list.append(filtered_sites_bed)
    if args.bg_mask_bed:
        mask_files_list.append(args.bg_mask_bed)
    if args.bg_mask_blacklist:
        mask_files_list.append(blacklist_bed)
    benchlib.merge_files(mask_files_list, bg_excl_bed)


    """
    Make shuffle input BED file bg_shuffle_in_bed.

    """

    # Generate shuffle file.
    shuffle_list = []
    c_pos = len(pos_seqs_dic)
    c_pos_total = 0
    if args.bg_mode == 1:
        if c_pos >= args.bg_min_size:
            shuffle_list.append(filtered_sites_bed)
            c_pos_total = c_pos
        else:
            while c_pos_total < args.bg_min_size:
                shuffle_list.append(filtered_sites_bed)
                c_pos_total += c_pos
    elif args.bg_mode == 2:
        shuffle_list.append(filtered_sites_bed)
        c_pos_total = c_pos
    else:
        assert False, "invalid --bg-mode set"

    benchlib.merge_pos_bed_files(shuffle_list, bg_shuffle_in_bed,
                                 core_neg_id="neg")
    
    # benchlib.merge_files(shuffle_list, bg_shuffle_in_bed)

    print("# random negative regions to sample: ", c_pos_total)

    """
    Get input / positive genomic region annotations.

    """

    pos_reg_ids_list = []
    pos_reg_ids_dic = {}
    for seq_id, seq in sorted(pos_seqs_dic.items()):
        pos_reg_ids_list.append(seq_id)
        pos_reg_ids_dic[seq_id] = 1

    intron_exon_out_bed = args.out_folder + "/intron_exon_regions.tmp.bed"
    overlap_annotations_bed = args.out_folder + "/overlap_region_annotations.tmp.bed"

    pos_reg2annot_dic = {}
    tid2regl_dic = {}

    if args.bg_mode == 1:

        if genomic_sites_input:

            """
            reg2annot_dic[reg_id][0] = annot_string
            reg2annot_dic[reg_id][1] = tr_id

            """

            print("Get genomic region annotations ... ")
            # Extract exon + intron regions of selected transcripts from transcript infos.
            print("Output intron annotations to BED ... ")
            benchlib.output_transcript_info_intron_exon_to_bed(tid2tio_dic, intron_exon_out_bed,
                                                output_mode=3,  # only introns.
                                                report_counts=True,
                                                add_tr_id=True,  # new reg_id format: intron;ENST000006666
                                                add_numbers=True,  # new reg_id format: intron;ENST000006666;1-2 ADDED
                                                number_format=1,  # format intron;ENST000006666;1-2 ADDED
                                                empty_check=False)
        
            # Append detailed exon annotations (CDS, UTR, transcript biotypes) to intron annotations.
            print("Output exon annotations to BED ... ")
            benchlib.output_exon_annotations(tid2tio_dic, intron_exon_out_bed,
                                             add_numbers=True,
                                             append=True)

            # Overlap with input regions.
            print("Overlap annotations with input regions ... ")
            params = "-s -wo -f %s" %(str(args.gtf_feat_min_overlap))

            benchlib.bed_intersect_files(filtered_sites_bed, intron_exon_out_bed, 
                                         overlap_annotations_bed,
                                         params=params)
            pos_reg2annot_dic = benchlib.get_region_annotations(overlap_annotations_bed,
                                                                tid2tio_dic=tid2tio_dic,
                                                                reg_ids_dic=pos_reg_ids_dic)

        else:

            for reg_id in pos_reg_ids_dic:
                # Get tr_id from reg_id format: ENST00000663363:36-136(+)
                tr_id = reg_id.split(":")[0]
                pos_reg2annot_dic[reg_id] = ["ncRNA", tr_id]

            # Get mRNA region lengths.
            tid2regl_dic = benchlib.get_mrna_region_lengths(tid2tio_dic)

            if tid2regl_dic:
                print("Get mRNA region annotations ... ")

                print("# mRNA transcripts (containing CDS):", len(tid2regl_dic))

                benchlib.output_mrna_regions_to_bed(tid2regl_dic, mrna_regions_bed)

                # Overlap with input regions.
                print("Overlap mRNA annotations with input regions ... ")
                params = "-s -wo -f %s" %(str(args.gtf_feat_min_overlap))
                benchlib.bed_intersect_files(filtered_sites_bed, mrna_regions_bed, 
                                             overlap_annotations_bed,
                                             params=params)
                pos_reg2annot_dic = benchlib.get_mrna_region_annotations_v2(overlap_annotations_bed,
                                                                            reg_ids_dic=pos_reg_ids_dic)

        # Approx. intron / non-intron lengths in positive set.
        pos_intron_sites_c = 0
        pos_non_intron_sites_c = 0
        intergenic_as_intron = False
        intron_labels = ["intron"]
        if intergenic_as_intron:
            intron_labels.append("intergenic")

        for reg_id in pos_reg2annot_dic:
            # seq_len = len(pos_seqs_dic[reg_id])
            if pos_reg2annot_dic[reg_id][0] in intron_labels:
                pos_intron_sites_c += 1
            else:
                pos_non_intron_sites_c += 1

        pos_sites_c = len(pos_reg2annot_dic)

        # Intron ratio.
        pos_intron_ratio = round(pos_intron_sites_c / pos_sites_c, 6)
        pos_exon_ratio = round(pos_non_intron_sites_c / pos_sites_c, 6)

        print("# input sites with intron (+intergenic) annotations:", pos_intron_sites_c)
        print("# input sites with non-intron annotations:          ", pos_non_intron_sites_c)
        print("Input set intron ratio:                             ", pos_intron_ratio)

        # Do we need more intronic regions for adaptive sampling?
        more_introns = False
        if pos_intron_ratio > total_tr_intron_ratio:
            more_introns = True

        """
        Write regions to BED from where to sample background sites (incl_bed).

        """

        min_intron_len = 100
        min_exon_len = 100
        max_exon_ratio = 0.5  # exon ratio to aim for.

        OUTBED = open(bg_incl_bed, "w")
        print("Output regions to sample background sites from ... ")

        if genomic_sites_input:
            # Output genomic transcript regions with introns.
            for tr_id in tid2tio_dic:
                chr_id = tid2tio_dic[tr_id].chr_id
                tr_s = tid2tio_dic[tr_id].tr_s - 1
                tr_e = tid2tio_dic[tr_id].tr_e
                tr_pol = tid2tio_dic[tr_id].tr_pol
                OUTBED.write("%s\t%i\t%i\t%s\t0\t%s\n" %(chr_id, tr_s, tr_e, tr_id, tr_pol))

            if args.bg_adaptive_sampling:

                print("Adaptive sampling enabled ... ")

                tr_ids_list = list(tid2tio_dic.keys())
                random.shuffle(tr_ids_list)

                if more_introns:
                    target_intron_len = int(total_tr_len * pos_intron_ratio)
                    current_intron_len = total_tr_intron_len

                    for tr_id in tr_ids_list:
                        tio = tid2tio_dic[tr_id]

                        # Loop over intron regions.
                        for intron in tio.intron_coords:
                            intron_s = intron[0] - 1
                            intron_e = intron[1]
                            intron_len = intron_e - intron_s
                            if intron_len < min_intron_len:
                                continue
                            if current_intron_len < target_intron_len:
                                chr_id = tio.chr_id
                                OUTBED.write("%s\t%i\t%i\t%s\t0\t%s\n" %(chr_id, intron_s, intron_e, tr_id, tio.tr_pol))
                                current_intron_len += intron_len
                            else:
                                break

                    total_tr_intron_len = current_intron_len

                else:
                    target_exon_len = int(total_tr_len * pos_exon_ratio)
                    if pos_exon_ratio > max_exon_ratio:  # max exon ratio to be aimed for in --bg-ada-sampling.
                        target_exon_len = int(total_tr_len * max_exon_ratio)
                    current_exon_len = total_tr_exon_len

                    for tr_id in tr_ids_list:
                        tio = tid2tio_dic[tr_id]

                        # Loop over exon regions.
                        for exon in tio.exon_coords:
                            exon_s = exon[0] - 1
                            exon_e = exon[1]
                            exon_len = exon_e - exon_s
                            if exon_len < min_exon_len:
                                continue
                            if current_exon_len < target_exon_len:
                                chr_id = tio.chr_id
                                OUTBED.write("%s\t%i\t%i\t%s\t0\t%s\n" %(chr_id, exon_s, exon_e, tr_id, tio.tr_pol))
                                current_exon_len += exon_len
                            else:
                                break

                    total_tr_exon_len = current_exon_len

                new_total_tr_len = total_tr_intron_len + total_tr_exon_len
                new_total_tr_intron_ratio = round(total_tr_intron_len / new_total_tr_len, 6)
                
                print("New total background intron length:", total_tr_intron_len)
                print("New total background exon length:  ", total_tr_exon_len)
                print("New total background intron ratio: ", new_total_tr_intron_ratio)

        else:
            # Output transcript regions (chromosome id = transcript ID).
            for tr_id in tr_len_dic:
                tr_len = tr_len_dic[tr_id]
                OUTBED.write("%s\t0\t%i\t%s\t0\t+\n" %(tr_id, tr_len, tr_id))

            # Overwrite chromosome lengths dictionary with transcript lengths.
            chr_len_dic = tr_len_dic
            # Overwrite chromsome lengths file with transcript lengths.
            benchlib.output_chromosome_lengths_file(tr_len_dic, chr_len_file)

        OUTBED.close()


    """
    Create random negatives / background / control set.

    bg_excl_bed:
        Background excluded regions BED
    bg_incl_bed:
        Background included regions BED

    """
    
    neg_seqs_dic = {}
    neg_seqs_tmp_bed = args.out_folder + "/background_sequences.tmp.bed"
    neg_seqs_bed = args.out_folder + "/background_sequences.bed"
    neg_seqs_fa = args.out_folder + "/background_sequences.fa"
    neg_reg_dic = {}  # only needed for --bg-mode 1.


    if args.bg_mode == 1:

        print("Background mode 1 ... ")

        print("Extract random background regions ... ")

        check = False

        if args.bg_user_incl_bed:
            print("Use regions provided via --bg-incl-bed to sample background sites from ... ")
            check = benchlib.bed_generate_random_negatives(bg_shuffle_in_bed, chr_len_file, neg_seqs_tmp_bed,
                                                        incl_bed=args.bg_user_incl_bed,
                                                        excl_bed=bg_excl_bed,
                                                        seed=args.random_seed)
            if not check:
                print("--bg-incl-bed regions not sufficient to sample required number of background sites. Try with more regions ... ")

                check = benchlib.bed_generate_random_negatives(bg_shuffle_in_bed, chr_len_file, neg_seqs_tmp_bed,
                                                            incl_bed=bg_incl_bed,
                                                            excl_bed=bg_excl_bed,
                                                            seed=args.random_seed)

        else:
            check = benchlib.bed_generate_random_negatives(bg_shuffle_in_bed, chr_len_file, neg_seqs_tmp_bed,
                                                        incl_bed=bg_incl_bed,
                                                        excl_bed=bg_excl_bed,
                                                        seed=args.random_seed)

        if not check:
            print("Less than requested # of background sites sampled. Possible solutions: use lower --bg-min-size, use more transcripts (if --tr-list is set), or use less strict masking (if --bg-mask-bed is set)")
            sys.exit()  

        seqs_fa = args.in_genome
        if not genomic_sites_input:
            seqs_fa = tr_seqs_fa
        
        """
        For genomic regions resulting IDs look like:
        chr22:20977398-20977456(+)
        tr regions work as well, ID format is:
        ENST00000663363:36-136(+)

        """

        print("Extract background regions from FASTA ... ")
        benchlib.bed_extract_sequences_from_fasta(neg_seqs_tmp_bed, 
                                                  seqs_fa, neg_seqs_fa,
                                                  add_param="-name",
                                                  print_warnings=True)

        print("Read in background sequences ... ")
        neg_seqs_dic = benchlib.read_fasta_into_dic(neg_seqs_fa,
                                                    dna=True,
                                                    all_uc=True,
                                                    id_check=True, # This will fail probably, need genomic regions.
                                                    name_bed=True,
                                                    empty_check=False,
                                                    skip_n_seqs=False)

        assert neg_seqs_dic, "no background sequences read in. Please contact developers"

        print("# of extracted background sequences: ", len(neg_seqs_dic))

        # Remove N-containing sequences from neg_seqs_dic.
        c_n_removed = 0
        print("Remove N-containing background sequences ... ")
        for seq_id in list(neg_seqs_dic.keys()):
            if "N" in neg_seqs_dic[seq_id]:
                c_n_removed += 1
                del neg_seqs_dic[seq_id]
        print("# of N-containing background sequences removed: ", c_n_removed)

        # Filter BED file to discard regions for which no sequence was extracted.
        benchlib.bed_filter_by_seqs_dic(neg_seqs_dic, neg_seqs_tmp_bed, neg_seqs_bed,
                                        use_col4_id=True)

        neg_reg_dic, neg_len_dic = benchlib.bed_get_region_str_len_dic(neg_seqs_bed)
        assert neg_reg_dic, "no negative region ID -> region string mappings read in from filtered sites BED file. Please contact developers"
        for reg_id in neg_reg_dic:
            assert reg_id in neg_seqs_dic, "region ID %s not found in neg_seqs_dic. Please contact developers" %(reg_id)
            assert neg_len_dic[reg_id] == len(neg_seqs_dic[reg_id]), "region ID %s sequence length mismatch between BED and FASTA. Please contact developers" %(reg_id)
        for reg_id in neg_seqs_dic:
            assert reg_id in neg_reg_dic, "region ID %s not found in neg_reg_dic. Please contact developers" %(reg_id)


    elif args.bg_mode == 2:

        print("Prepare sequences to shuffle (factor = %i) ... " %(args.bg_shuff_factor))
        seqs2shuffle_fa = args.out_folder + "/sequences_to_shuffle.fa"
        OUTSHUFA = open(seqs2shuffle_fa, "w")
        # seq_idx = 0
        for idx in range(args.bg_shuff_factor):
            set_idx = idx + 1
            for seq_id in pos_seqs_dic:
                # new_seq_id = "shuff_%i" %(seq_idx)
                new_seq_id = seq_id + ";" + "neg" + str(set_idx)
                # seq_idx += 1
                OUTSHUFA.write(">%s\n%s\n" %(new_seq_id, pos_seqs_dic[seq_id]))
        OUTSHUFA.close()

        # Di-nucleotide shuffling of positive sequences.
        print("Shuffle sequences (k = %i) ... " %(args.bg_shuff_k))

        benchlib.run_k_nt_shuffling(seqs2shuffle_fa, neg_seqs_fa,
                                    kmer_size=args.bg_shuff_k,
                                    params="-dna",
                                    tag="",  # add nothing to sequence IDs in seqs2shuffle_fa.
                                    error_check=True,
                                    seed=args.random_seed)

        print("Read in background sequences ... ")
        neg_seqs_dic = benchlib.read_fasta_into_dic(neg_seqs_fa,
                                                    dna=True,
                                                    all_uc=True,
                                                    id_check=True, # This will fail probably, need genomic regions.
                                                    empty_check=False,
                                                    skip_n_seqs=False)

        assert neg_seqs_dic, "no shuffled background sequences read in. Please contact developers"

        print("# of shuffled background sequences: ", len(neg_seqs_dic))

    """
    Overwrite neg_seqs_fa, since it still contains header IDs with format:
    pos1;neg1::chr22:28163785-28163981(-)

    """

    benchlib.fasta_output_dic(neg_seqs_dic, neg_seqs_fa,
                              split=True)


    print("Plot sequence lengths distributions ... ")

    pos_len_list = [len(seq) for seq in pos_seqs_dic.values()]
    neg_len_list = [len(seq) for seq in neg_seqs_dic.values()]

    len_dist_plot_out = args.out_folder + "/sequence_lengths_distribution.png"
    benchlib.plot_seq_len_distr(pos_len_list, neg_len_list, len_dist_plot_out,
                                label1='Input sequences',
                                label2='Background sequences',
                                density=True)


    args.c_input_sites = len(pos_seqs_dic)
    args.c_bg_sites = len(neg_seqs_dic)

    """
    Compare sequence lengths of positives with its sampled negatives.

    If these counts are very different (should not be), maybe the the 
    filter_out_neg_center_motif_hits() filtering has to be adapted.
    
    """
    c_same_len = 0
    c_diff_len = 0

    for seq_id in neg_seqs_dic:

        pos_seq_id = seq_id.split(";")[0]

        if len(pos_seqs_dic[pos_seq_id]) == len(neg_seqs_dic[seq_id]):
            c_same_len += 1
        else:
            c_diff_len += 1

    print("# of negatives with length == positives length:", c_same_len)
    print("# of negatives with length != positives length:", c_diff_len)



    """
    Get genomic / transcript region annotations for background regions.

    """

    neg_reg_ids_list = []
    neg_reg_ids_dic = {}
    for seq_id, seq in sorted(neg_seqs_dic.items()):
        neg_reg_ids_list.append(seq_id)
        neg_reg_ids_dic[seq_id] = 1

    neg_reg2annot_dic = {}

    if args.bg_mode == 1 and genomic_sites_input:

        print("Overlap annotations with background regions ... ")
        params = "-s -wo -f %s" %(str(args.gtf_feat_min_overlap))

        benchlib.bed_intersect_files(neg_seqs_bed, intron_exon_out_bed, 
                                     overlap_annotations_bed,
                                     params=params)
        neg_reg2annot_dic = benchlib.get_region_annotations(overlap_annotations_bed,
                                                            tid2tio_dic=tid2tio_dic,
                                                            reg_ids_dic=neg_reg_ids_dic)


    elif args.bg_mode == 1 and not genomic_sites_input:

        for reg_id in neg_reg_ids_dic:
            tr_id = reg_id.split(":")[0]
            neg_reg2annot_dic[reg_id] = ["ncRNA", tr_id]

        if tid2regl_dic:

            print("Overlap mRNA annotations with background regions ... ")
            params = "-s -wo -f %s" %(str(args.gtf_feat_min_overlap))

            benchlib.bed_intersect_files(neg_seqs_bed, mrna_regions_bed, 
                                         overlap_annotations_bed,
                                         params=params)
            neg_reg2annot_dic = benchlib.get_mrna_region_annotations_v2(overlap_annotations_bed,
                                                                        reg_ids_dic=neg_reg_ids_dic)



    reg_annot_table_file = False
    annot2color_dic = {}

    if args.bg_mode == 1:

        print("Output input region annotations ... ")
        
        reg_annot_table_file = args.out_folder + "/" + "input_region_annotations.tsv"

        OUTRAN = open(reg_annot_table_file, "w")
        OUTRAN.write("region_id\tgene_id\tgene_name\ttranscript_id\tregion_annotation\ttranscript_biotype\n")

        for reg_id in pos_reg2annot_dic:
            annot = pos_reg2annot_dic[reg_id][0]
            tr_id = pos_reg2annot_dic[reg_id][1]
            gene_id = "-"
            gene_name = "-"
            tr_biotype = "-"
            if tr_id:
                gene_id = tr2gid_dic[tr_id]
                gene_info = gid2gio_dic[gene_id]
                tr_biotype = tid2tio_dic[tr_id].tr_biotype
                gene_name = gene_info.gene_name
            else:
                tr_id = "-"
            OUTRAN.write("%s\t%s\t%s\t%s\t%s\t%s\n" %(reg_id, gene_id, gene_name, tr_id, annot, tr_biotype))
        OUTRAN.close()

        """
        Get annotation to color dictionary, which is needed for region annotation plots in HTML reports.

        """
        assert pos_reg2annot_dic, "--bg-mode 1 set, but no input region annotations found. Please contact developers"
        assert neg_reg2annot_dic, "--bg-mode 1 set, but no background region annotations found. Please contact developers"

        annot_dic = {"3'UTR" : 0, "5'UTR" : 0, "CDS" : 0, "lncRNA" : 0, "intron" : 0, "intergenic" : 0}

        for reg_id in pos_reg2annot_dic:
            annot = pos_reg2annot_dic[reg_id][0]
            if annot not in annot_dic:
                annot_dic[annot] = 1
            else:
                annot_dic[annot] += 1
        for reg_id in neg_reg2annot_dic:
            annot = neg_reg2annot_dic[reg_id][0]
            if annot not in annot_dic:
                annot_dic[annot] = 1
            else:
                annot_dic[annot] += 1

        hex_colors = benchlib.get_hex_colors_list(min_len=len(annot_dic))

        idx = 0
        for annot in sorted(annot_dic, reverse=False):
            # hc = hex_colors[idx]
            # print("Assigning hex color %s to annotation %s ... " %(hc, annot))
            annot2color_dic[annot] = hex_colors[idx]
            idx += 1

    """
    Get FIMO + REGEX hits.

    """

    call_dic = {}
    pos_fimo_hits_list = []
    neg_fimo_hits_list = []

    # For shuffled negatives, IDs do not contain coordinates (e.g. shuff_52).
    seq_based = False
    if args.bg_mode == 2:
        seq_based = True

    if seq_rbps_dic:

        out_str, c_added_motifs = benchlib.blocks_to_xml_string(seq_motif_blocks_dic, loaded_motif_ids_dic)
        benchlib.output_string_to_file(out_str, seq_motifs_xml)

        print("Run FIMO on input regions ... ")
        benchlib.run_fast_fimo(filtered_sites_fa, seq_motifs_xml, fimo_res_tsv,
                               pval_thr=args.fimo_pval,
                               nt_freqs_file=fimo_freqs_file,
                               call_dic=call_dic,
                               params=fimo_params,
                               error_check=False)

        assert os.path.exists(fimo_res_tsv), "FIMO output file fimo.tsv %s does not exist! There must have been an error while running FIMO (invalid xml file provided?)" %(fimo_res_tsv)

        pos_fimo_hits_list = benchlib.read_in_fimo_results(fimo_res_tsv,
                                                           only_best_hits=args.greatest_hits,
                                                           reg_dic=pos_reg_dic)

        c_pos_fimo_hits = len(pos_fimo_hits_list)
        print("# of FIMO motif hits in input regions:", c_pos_fimo_hits)

        print("Run FIMO on background regions ... ")
        benchlib.run_fast_fimo(neg_seqs_fa, seq_motifs_xml, fimo_res_tsv,
                               pval_thr=args.fimo_pval,
                               nt_freqs_file=fimo_freqs_file,
                               params=fimo_params,
                               error_check=False)

        assert os.path.exists(fimo_res_tsv), "FIMO output file fimo.tsv %s does not exist! There must have been an error while running FIMO (invalid xml file provided?)" %(fimo_res_tsv)

        neg_fimo_hits_list = benchlib.read_in_fimo_results(fimo_res_tsv,
                                                           reg_dic=neg_reg_dic,  # only applies if seq_based False.
                                                           only_best_hits=args.greatest_hits,
                                                           seq_based=seq_based)

        c_neg_fimo_hits = len(neg_fimo_hits_list)
        print("# of FIMO motif hits in background regions:", c_neg_fimo_hits)

        if args.regex:

            step_size_one = False
            if args.regex_search_mode == 1:
                step_size_one = True
            elif args.regex_search_mode == 2:
                step_size_one = False
            else:
                assert False, "invalid --regex-search-mode %i set" %(args.regex_search_mode)

            print("Run search for --regex \"%s\" ... " %(regex))
            pos_regex_hits_list = benchlib.get_regex_hits(regex, regex_id, pos_seqs_dic,
                                                      step_size_one=step_size_one,
                                                      reg_dic=pos_reg_dic,
                                                      regex_type=regex_type,
                                                      regex_spacer_min=args.regex_spacer_min,
                                                      regex_spacer_max=args.regex_spacer_max,
                                                      regex_min_gc=args.regex_min_gc,
                                                      regex_max_gu=args.regex_max_gu,
                                                      use_motif_regex_id=args.motif_regex_id)

            c_pos_regex_hits = len(pos_regex_hits_list)
            print("# of regex hits in input regions:", c_pos_regex_hits)

            # Add regex hits to fimo_hits_list.
            pos_fimo_hits_list += pos_regex_hits_list

            neg_regex_hits_list = benchlib.get_regex_hits(regex, regex_id, neg_seqs_dic,
                                                      step_size_one=step_size_one,
                                                      reg_dic=neg_reg_dic,
                                                      seq_based=seq_based,
                                                      regex_type=regex_type,
                                                      regex_spacer_min=args.regex_spacer_min,
                                                      regex_spacer_max=args.regex_spacer_max,
                                                      regex_min_gc=args.regex_min_gc,
                                                      regex_max_gu=args.regex_max_gu,
                                                      use_motif_regex_id=args.motif_regex_id)

            c_neg_regex_hits = len(neg_regex_hits_list)
            print("# of regex hits in background regions:", c_neg_regex_hits)

            # Add regex hits to fimo_hits_list.
            neg_fimo_hits_list += neg_regex_hits_list


    # Filter out hits overlapping with center motif sites (ie --in sites provided).
    print("Remove FIMO/REGEX hits overlapping with center motif sites ... ")
    flt_pos_fimo_hits_list = benchlib.filter_out_center_motif_hits(pos_fimo_hits_list, core_rel_reg_dic,
                                                                   allow_overlaps=args.allow_overlaps)
    flt_neg_fimo_hits_list = benchlib.filter_out_neg_center_motif_hits(neg_fimo_hits_list, core_rel_reg_dic,
                                                                       allow_overlaps=args.allow_overlaps)

    print("# positive FIMO/REGEX hits before filtering:", len(pos_fimo_hits_list))
    print("# positive FIMO/REGEX hits after filtering: ", len(flt_pos_fimo_hits_list))
    print("# negative FIMO/REGEX hits before filtering:", len(neg_fimo_hits_list))
    print("# negative FIMO/REGEX hits after filtering: ", len(flt_neg_fimo_hits_list))

    """
    Get CMSEARCH hits.

    """

    pos_cmsearch_hits_list = []
    neg_cmsearch_hits_list = []

    cmsh_mode = ""
    if args.cmsearch_mode == 1:
        cmsh_mode = "--default"
    elif args.cmsearch_mode == 2:
        cmsh_mode = "--max"
    else:
        assert False, "invalid --cmsearch-mode %i set" %(args.cmsearch_mode)
    cmsh_params = "-g --tformat fasta --toponly --incT %s -T %s %s" %(args.cmsearch_bs, args.cmsearch_bs, cmsh_mode)

    if str_rbps_dic:
        
        print("Output covariance models to .cm ... ")
        benchlib.output_cm_blocks_to_file(str_motif_blocks_dic, loaded_motif_ids_dic, str_motifs_cm)

        print("Run cmsearch on input regions ... ")
        benchlib.run_cmsearch(filtered_sites_fa, str_motifs_cm, cmsearch_res_txt,
                        error_check=False,
                        call_dic=call_dic,
                        params=cmsh_params)

        pos_cmsearch_hits_list, c_pos_cms_hits = benchlib.read_in_cmsearch_results(cmsearch_res_txt,
                                                                            reg_dic=pos_reg_dic,
                                                                            only_best_hits=args.greatest_hits,
                                                                            check=True)

        print("# of cmsearch motif hits in input regions:", c_pos_cms_hits)

        print("Run cmsearch on background regions ... ")
        benchlib.run_cmsearch(neg_seqs_fa, str_motifs_cm, cmsearch_res_txt,
                        error_check=False,
                        params=cmsh_params)

        neg_cmsearch_hits_list, c_neg_cms_hits = benchlib.read_in_cmsearch_results(cmsearch_res_txt,
                                                                            reg_dic=neg_reg_dic,
                                                                            seq_based=seq_based,
                                                                            only_best_hits=args.greatest_hits,
                                                                            check=True)

        print("# of cmsearch motif hits in background regions:", c_neg_cms_hits)


    # Filter out hits overlapping with center motif sites (ie --in sites provided).
    print("Remove CMSEARCH hits overlapping with center motif sites ... ")
    flt_pos_cmsearch_hits_list = benchlib.filter_out_center_motif_hits(pos_cmsearch_hits_list, core_rel_reg_dic,
                                                                       allow_overlaps=args.allow_overlaps)
    flt_neg_cmsearch_hits_list = benchlib.filter_out_neg_center_motif_hits(neg_cmsearch_hits_list, core_rel_reg_dic,
                                                                           allow_overlaps=args.allow_overlaps)

    print("# positive CMSEARCH hits before filtering:", len(pos_cmsearch_hits_list))
    print("# positive CMSEARCH hits after filtering: ", len(flt_pos_cmsearch_hits_list))
    print("# negative CMSEARCH hits before filtering:", len(neg_cmsearch_hits_list))
    print("# negative CMSEARCH hits after filtering: ", len(flt_neg_cmsearch_hits_list))

    """
    Store for each motif ID the positive / input regions with motif hits (and hit counts), 
    using dictionary of dictionaries regions_with_motifs_dic.

    regions_with_motifs_dic:
        Dictionary of dictionaries, format:
        {motif_id1 -> {'region1': motif_c_region1, 'region2': motif_c_region2}, motif_id2 -> {'region1': motif_c_region1}}

    """

    regions_with_motifs_dic = {}

    for fh in flt_pos_fimo_hits_list:

        motif_id = fh.motif_id
        rbp_id = id2name_dic[motif_id]

        if motif_id in regions_with_motifs_dic:
            # fh.seq_name : FASTA header (== --in genomic sequence region).
            if fh.seq_name in regions_with_motifs_dic[motif_id]:
                regions_with_motifs_dic[motif_id][fh.seq_name] += 1
            else:
                regions_with_motifs_dic[motif_id][fh.seq_name] = 1
        else:
            regions_with_motifs_dic[motif_id] = {}
            regions_with_motifs_dic[motif_id][fh.seq_name] = 1

    # Store regions with structure motifs.
    for cmsh in flt_pos_cmsearch_hits_list:

        motif_id = cmsh.motif_id
        rbp_id = id2name_dic[motif_id]

        if rbp_id in regions_with_motifs_dic:
            # cmsh.seq_name : FASTA header (== --in genomic sequence region).
            if cmsh.seq_name in regions_with_motifs_dic[motif_id]:
                regions_with_motifs_dic[motif_id][cmsh.seq_name] += 1
            else:
                regions_with_motifs_dic[motif_id][cmsh.seq_name] = 1
        else:
            regions_with_motifs_dic[motif_id] = {}
            regions_with_motifs_dic[motif_id][cmsh.seq_name] = 1



    """
    Get motif to region hit counts.

    pos_hits_dic / neg_hits_dic format: 
    motif_id -> region_id -> # of hits in input regions

    """
    pos_hits_dic = {}
    neg_hits_dic = {}
    found_motif_ids_dic = {}
    pos_avg_center_dist_dic = {}
    neg_avg_center_dist_dic = {}
    pos_dist_c_dic = {}  # For each encountered center distance, count number of occurrences.
    neg_dist_c_dic = {}

    for fh in flt_pos_fimo_hits_list:

        motif_id = fh.motif_id
        rbp_id = id2name_dic[motif_id]
        # fh.seq_name : FASTA header (== --in genomic sequence region).
        reg_id = fh.seq_name

        cedi = fh.center_dist

        if motif_id in pos_avg_center_dist_dic:
            pos_avg_center_dist_dic[motif_id] += cedi
            if cedi in pos_dist_c_dic[motif_id]:
                pos_dist_c_dic[motif_id][cedi] += 1
            else:
                pos_dist_c_dic[motif_id][cedi] = 1
        else:
            pos_avg_center_dist_dic[motif_id] = cedi
            pos_dist_c_dic[motif_id] = {}
            pos_dist_c_dic[motif_id][cedi] = 1

        if motif_id in pos_hits_dic:
            if reg_id in pos_hits_dic[motif_id]:
                pos_hits_dic[motif_id][reg_id] += 1
            else:
                pos_hits_dic[motif_id][reg_id] = 1
        else:
            pos_hits_dic[motif_id] = {}
            pos_hits_dic[motif_id][reg_id] = 1

        if motif_id in found_motif_ids_dic:
            found_motif_ids_dic[motif_id] += 1
        else:
            found_motif_ids_dic[motif_id] = 1

    for fh in flt_neg_fimo_hits_list:
            
        motif_id = fh.motif_id
        rbp_id = id2name_dic[motif_id]
        reg_id = fh.seq_name

        cedi = fh.center_dist

        if motif_id in neg_avg_center_dist_dic:
            neg_avg_center_dist_dic[motif_id] += cedi
            if cedi in neg_dist_c_dic[motif_id]:
                neg_dist_c_dic[motif_id][cedi] += 1
            else:
                neg_dist_c_dic[motif_id][cedi] = 1
        else:
            neg_avg_center_dist_dic[motif_id] = cedi
            neg_dist_c_dic[motif_id] = {}
            neg_dist_c_dic[motif_id][cedi] = 1

        if motif_id in neg_hits_dic:
            if reg_id in neg_hits_dic[motif_id]:
                neg_hits_dic[motif_id][reg_id] += 1
            else:
                neg_hits_dic[motif_id][reg_id] = 1
        else:
            neg_hits_dic[motif_id] = {}
            neg_hits_dic[motif_id][reg_id] = 1

        if motif_id in found_motif_ids_dic:
            found_motif_ids_dic[motif_id] += 1
        else:
            found_motif_ids_dic[motif_id] = 1

    for cmsh in flt_pos_cmsearch_hits_list:
            
        motif_id = cmsh.motif_id
        rbp_id = id2name_dic[motif_id]
        reg_id = cmsh.seq_name

        cedi = cmsh.center_dist

        if motif_id in pos_avg_center_dist_dic:
            pos_avg_center_dist_dic[motif_id] += cedi
            if cedi in pos_dist_c_dic[motif_id]:
                pos_dist_c_dic[motif_id][cedi] += 1
            else:
                pos_dist_c_dic[motif_id][cedi] = 1
        else:
            pos_avg_center_dist_dic[motif_id] = cedi
            pos_dist_c_dic[motif_id] = {}
            pos_dist_c_dic[motif_id][cedi] = 1

        if motif_id in pos_hits_dic:
            if reg_id in pos_hits_dic[motif_id]:
                pos_hits_dic[motif_id][reg_id] += 1
            else:
                pos_hits_dic[motif_id][reg_id] = 1
        else:
            pos_hits_dic[motif_id] = {}
            pos_hits_dic[motif_id][reg_id] = 1

        if motif_id in found_motif_ids_dic:
            found_motif_ids_dic[motif_id] += 1
        else:
            found_motif_ids_dic[motif_id] = 1

    for cmsh in flt_neg_cmsearch_hits_list:
            
        motif_id = cmsh.motif_id
        rbp_id = id2name_dic[motif_id]
        reg_id = cmsh.seq_name

        cedi = cmsh.center_dist

        if motif_id in neg_avg_center_dist_dic:
            neg_avg_center_dist_dic[motif_id] += cedi
            if cedi in neg_dist_c_dic[motif_id]:
                neg_dist_c_dic[motif_id][cedi] += 1
            else:
                neg_dist_c_dic[motif_id][cedi] = 1
        else:
            neg_avg_center_dist_dic[motif_id] = cedi
            neg_dist_c_dic[motif_id] = {}
            neg_dist_c_dic[motif_id][cedi] = 1

        if motif_id in neg_hits_dic:
            if reg_id in neg_hits_dic[motif_id]:
                neg_hits_dic[motif_id][reg_id] += 1
            else:
                neg_hits_dic[motif_id][reg_id] = 1
        else:
            neg_hits_dic[motif_id] = {}
            neg_hits_dic[motif_id][reg_id] = 1

        if motif_id in found_motif_ids_dic:
            found_motif_ids_dic[motif_id] += 1
        else:
            found_motif_ids_dic[motif_id] = 1

    """
    Get motif enrichment stats.

    """

    fisher_alt_hypo = "greater"
    if args.fisher_mode == 1:
        fisher_alt_hypo = "greater"
        print("Fisher mode = 1, reporting significantly overrepresented motifs + co-occurrences ... ")
    elif args.fisher_mode == 2:
        fisher_alt_hypo = "two-sided"
        print("Fisher mode = 2, reporting significantly over- AND underrepresented motifs + co-occurrences ... ")
    elif args.fisher_mode == 3:
        fisher_alt_hypo = "less"
        print("Fisher mode = 3, reporting significantly underrepresented motifs + co-occurrences ... ")
    else:
        assert False, "Invalid Fisher mode: %i" %(args.fisher_mode)

    c_pos_regions = len(pos_seqs_dic)
    c_neg_regions = len(neg_seqs_dic)
    p_val_list = []
    motif_ids_list = []
    motif2con_table_dic = {}
    # motif2dist_plot_pos_dic = {}  # positional motif hit counts over input sites for each motif ID. 
    len_motif_ids = len(found_motif_ids_dic)
    motif_enrich_stats_dic = {}  # Store NemoStats stats objects.

    if not found_motif_ids_dic:
        print("No motifs found in any regions (input or background). Exiting ... ")
        sys.exit()

    for motif_id in sorted(found_motif_ids_dic.keys()):

        motif_ids_list.append(motif_id)

        rbp_id = id2name_dic[motif_id]
        motif_type = id2type_dic[motif_id]

        motif_enrich_stats = benchlib.NemoStats(motif_id, rbp_id)
        motif_enrich_stats.motif_type = motif_type
        motif_enrich_stats.c_pos_regions = c_pos_regions
        motif_enrich_stats.c_neg_regions = c_neg_regions

        # Set consensus sequence.
        conseq = "-"
        if id2type_dic[motif_id] == "meme_xml":
            conseq = motif2conseq_dic[motif_id]
        elif id2type_dic[motif_id] == "cm":
            conseq = "-"
        elif id2type_dic[motif_id] == "regex":
            conseq = regex
        else:
            assert False, "Invalid motif type: %s" %(id2type_dic[motif_id])
        motif_enrich_stats.consensus_seq = conseq

        c_pos_hits = 0
        c_pos_hit_regions = 0

        dist_plot_counts_dic = {dist: 0 for dist in dist_plot_range}

        if motif_id in pos_hits_dic:
            for reg_id in pos_hits_dic[motif_id]:
                c_pos_hits += pos_hits_dic[motif_id][reg_id]
                c_pos_hit_regions += 1

            motif_enrich_stats.pos_set_avg_center_dist = pos_avg_center_dist_dic[motif_id] / c_pos_hits

            # Get center distance position with maximum count from pos_dist_c_dic[motif_id], where key is position and value is count.
            pos_set_max_center_dist = max(pos_dist_c_dic[motif_id], key=pos_dist_c_dic[motif_id].get)
            motif_enrich_stats.pos_set_max_center_dist = pos_set_max_center_dist
            motif_enrich_stats.pos_set_max_center_dist_c = pos_dist_c_dic[motif_id][pos_set_max_center_dist]
        
            # Make positional motif hit counts dictionary (input sites only) for every motif ID.
            for dist in pos_dist_c_dic[motif_id]:
                if dist in dist_plot_counts_dic:
                    dist_plot_counts_dic[dist] = pos_dist_c_dic[motif_id][dist]

        upstream_counts = []
        downstream_counts = []
        for dist in dist_plot_counts_dic:
            if dist < 0:
                upstream_counts.append(dist_plot_counts_dic[dist])
            elif dist > 0:
                downstream_counts.append(dist_plot_counts_dic[dist])

        motif_enrich_stats.dist_plot_counts_dic = dist_plot_counts_dic

        # Wilcoxon rank-sum test two-sided.
        wrs_test_stat, wrs_pval = mannwhitneyu(upstream_counts, downstream_counts, alternative="two-sided")
        wrs_pval = benchlib.round_to_n_significant_digits_v2(wrs_pval, 4)
        motif_enrich_stats.wrs_pval_two_sided = wrs_pval
        motif_enrich_stats.wrs_test_stat_two_sided = wrs_test_stat
        # Wilcoxon rank-sum test greater.
        wrs_test_stat, wrs_pval = mannwhitneyu(upstream_counts, downstream_counts, alternative="greater")
        wrs_pval = benchlib.round_to_n_significant_digits_v2(wrs_pval, 4)
        motif_enrich_stats.wrs_pval_greater = wrs_pval
        motif_enrich_stats.wrs_test_stat_greater = wrs_test_stat
        # Wilcoxon rank-sum test less.
        wrs_test_stat, wrs_pval = mannwhitneyu(upstream_counts, downstream_counts, alternative="less")
        wrs_pval = benchlib.round_to_n_significant_digits_v2(wrs_pval, 4)
        motif_enrich_stats.wrs_pval_less = wrs_pval
        motif_enrich_stats.wrs_test_stat_less = wrs_test_stat

        # 2DO: Get effect sizes for every wc_pval.
        # wc_rbc_es, wc_cl_es = benchlib.get_eff_sizes(hit_reg_scores, non_hit_reg_scores, wc_stat,
        #                                                 round=True, round_n=4)

        # if motif_id == "CSTF2_1":
        #     # print("dist_plot_counts_dic:", dist_plot_counts_dic)
        #     print(motif_id, "two-sided", motif_enrich_stats.wrs_pval_two_sided)
        #     print(motif_id, "greater", motif_enrich_stats.wrs_pval_greater)
        #     print(motif_id, "less", motif_enrich_stats.wrs_pval_less)

        # if motif_id == "CSTF2_2":
        #     # print("dist_plot_counts_dic:", dist_plot_counts_dic)
        #     print(motif_id, "two-sided", motif_enrich_stats.wrs_pval_two_sided)
        #     print(motif_id, "greater", motif_enrich_stats.wrs_pval_greater)
        #     print(motif_id, "less", motif_enrich_stats.wrs_pval_less)

        # if motif_id == "PUM1_2":
        #     print(motif_id, "dist_plot_counts_dic:", dist_plot_counts_dic)


        c_neg_hits = 0
        c_neg_hit_regions = 0

        if motif_id in neg_hits_dic:
            for reg_id in neg_hits_dic[motif_id]:
                c_neg_hits += neg_hits_dic[motif_id][reg_id]
                c_neg_hit_regions += 1

            motif_enrich_stats.neg_set_avg_center_dist = neg_avg_center_dist_dic[motif_id] / c_neg_hits

            neg_set_max_center_dist = max(neg_dist_c_dic[motif_id], key=neg_dist_c_dic[motif_id].get)
            motif_enrich_stats.neg_set_max_center_dist = neg_set_max_center_dist
            motif_enrich_stats.neg_set_max_center_dist_c = neg_dist_c_dic[motif_id][neg_set_max_center_dist]

        motif_enrich_stats.c_pos_hits = c_pos_hits
        motif_enrich_stats.c_neg_hits = c_neg_hits
        motif_enrich_stats.c_pos_hit_regions = c_pos_hit_regions
        motif_enrich_stats.c_neg_hit_regions = c_neg_hit_regions

        # print("Motif %s (%s) %i input hit regions (%i hits) and %i negative hit regions (%i hits) ... " %(motif_id, rbp_id, c_pos_hit_regions, c_pos_hits, c_neg_hit_regions, c_neg_hits))

        """
        DREME style test for motif enrichment:
        A: The number of input sequences that contain the motif.
        B: The number of input sequences that do not contain the motif.
        C: The number of background sequences that contain the motif.
        D: The number of background sequences that do not contain the motif.
        """

        con_table = [[c_pos_hit_regions, c_pos_regions - c_pos_hit_regions],
                     [c_neg_hit_regions, c_neg_regions - c_neg_hit_regions]]

        odds_ratio, p_value = fisher_exact(con_table, alternative=fisher_alt_hypo)

        p_val_list.append(p_value)
        table_str = str(con_table)
        motif2con_table_dic[motif_id] = con_table

        motif_enrich_stats.con_table = table_str
        motif_enrich_stats.fisher_pval = p_value
        motif_enrich_stats.fisher_corr_mode = args.nemo_pval_mode
        motif_enrich_stats.fisher_alt_hyp_mode = args.fisher_mode

        motif_enrich_stats_dic[motif_id] = motif_enrich_stats

    """
    Multiple testing correction.

    """

    nemo_pval_thr = args.nemo_pval_thr

    if args.nemo_pval_mode == 1:  # BH correction.

        pvals_corrected = false_discovery_control(p_val_list, method='bh')

        for i in range(len(p_val_list)):
            p_val_list[i] = pvals_corrected[i]

    elif args.nemo_pval_mode == 2:  # Bonferroni correction.

        # Multiple testing correction factor.
        mult_test_corr_factor = 1
        if len_motif_ids > 1:
            mult_test_corr_factor = len_motif_ids

        nemo_pval_thr = args.nemo_pval_thr / mult_test_corr_factor
        nemo_pval_thr = benchlib.round_to_n_significant_digits_v2(nemo_pval_thr, 4)

    elif args.nemo_pval_mode == 3:  # No correction.

        nemo_pval_thr = args.nemo_pval_thr

    else:
        assert False, "Invalid motif enrichment p-value mode (--nemo-pval-mode) set: %i" %(args.nemo_pval_mode)

    args.nemo_pval_thr_corr = nemo_pval_thr


    # Update + filter p-values.

    c_all_fisher_pval = 0
    c_sig_fisher_pval = 0
    sig_motif_ids_list = []  # significant motif IDs list.

    COSOUT = open(nemo_stats_out, "w")
    COSOUT.write("motif_id\trbp_id\tc_in_fg\tc_not_in_fg\tc_in_bg\tc_not_in_bg\tpos_avg_dist\tpos_max_dist\tpos_max_dist_c\tneg_avg_dist\tneg_max_dist\tneg_max_dist_c\tfisher_pval\tfisher_pval_corr\n")

    for idx, motif_id in enumerate(motif_ids_list):

        rbp_id = id2name_dic[motif_id]
        con_table = motif2con_table_dic[motif_id]
        p_value_corr = p_val_list[idx]
        c_in_fg = con_table[0][0]
        c_not_in_fg = con_table[0][1]
        c_in_bg = con_table[1][0]
        c_not_in_bg = con_table[1][1]
        
        p_value_corr = benchlib.round_to_n_significant_digits_v2(p_value_corr, 4)

        motif_enrich_stats_dic[motif_id].fisher_pval_corr = p_value_corr
        p_value = motif_enrich_stats_dic[motif_id].fisher_pval
        p_value = benchlib.round_to_n_significant_digits_v2(p_value, 4)
        motif_enrich_stats_dic[motif_id].fisher_pval = p_value

        pos_set_avg_center_dist = "-"
        neg_set_avg_center_dist = "-"
        if motif_id in pos_hits_dic:
            pos_set_avg_center_dist = str(round(motif_enrich_stats_dic[motif_id].pos_set_avg_center_dist, 1))
        if motif_id in neg_hits_dic:
            neg_set_avg_center_dist = str(round(motif_enrich_stats_dic[motif_id].neg_set_avg_center_dist, 1))
        pos_set_max_center_dist = str(motif_enrich_stats_dic[motif_id].pos_set_max_center_dist)
        neg_set_max_center_dist = str(motif_enrich_stats_dic[motif_id].neg_set_max_center_dist)
        pos_set_max_center_dist_c = str(motif_enrich_stats_dic[motif_id].pos_set_max_center_dist_c)
        neg_set_max_center_dist_c = str(motif_enrich_stats_dic[motif_id].neg_set_max_center_dist_c)

        if p_value_corr <= nemo_pval_thr:
            c_sig_fisher_pval += 1
            sig_motif_ids_list.append(motif_id)
        c_all_fisher_pval += 1

        COSOUT.write("%s\t%s\t%i\t%i\t%i\t%i\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" %(motif_id, rbp_id, c_in_fg, c_not_in_fg, c_in_bg, c_not_in_bg, pos_set_avg_center_dist, pos_set_max_center_dist, pos_set_max_center_dist_c, neg_set_avg_center_dist, neg_set_max_center_dist, neg_set_max_center_dist_c, str(p_value), str(p_value_corr)))

    COSOUT.close()



    """
    Calculate co-occurrence statistics for significantly enriched motif IDs.


    """

    sig_motif_ids_list.sort()
    len_motif_list = len(sig_motif_ids_list)

    print("# of significant motifs: %i" %(len_motif_list))

    reg_ids_list = []
    reg_ids_dic = {}
    for seq_id, seq in sorted(pos_seqs_dic.items()):
        reg_ids_list.append(seq_id)
        reg_ids_dic[seq_id] = 1

    df_pval = False
    pval_ll = []
    pval_cont_lll = []

    region_motif_binds_dic = {}
    rid2mtfidx2hcp_dic = {}  # region_id -> motif_id_idx -> motif hit center position(s)

    if sig_motif_ids_list:

        print("Calculate co-occurrence stats for significant motifs ... ")

        for reg_id in pos_seqs_dic:
            region_motif_binds_dic[reg_id] = [False]*len_motif_list
            rid2mtfidx2hcp_dic[reg_id] = {}

        reg_hits_dic = {}
        add_count = False
        mtf2idx_dic = {}
        idx2mtf_dic = {}

        print("Get motif region occupancies ... ")

        for idx, motif_id in enumerate(sig_motif_ids_list):
            mtf2idx_dic[motif_id] = idx
            idx2mtf_dic[idx] = motif_id

            # Region has hits yes(1)/no(0).
            hit_list = []
            for reg_id in reg_ids_list:

                if motif_id in regions_with_motifs_dic and reg_id in regions_with_motifs_dic[motif_id]:
                    if add_count:
                        hit_list.append(regions_with_motifs_dic[motif_id][reg_id])
                    else:
                        hit_list.append(1)
                else:
                    hit_list.append(0)
            reg_hits_dic[motif_id] = hit_list

        # Loop over positive hits.

        for fh in flt_pos_fimo_hits_list:

            motif_id = fh.motif_id

            if motif_id not in sig_motif_ids_list:
                continue

            rbp_id = id2name_dic[motif_id]
            region_id = fh.seq_name

            # region_len = benchlib.get_length_from_seq_name(fh.seq_name)
            # # genomic motif region string.
            # fh_str = repr(fh)
            # uniq_count = unique_motifs_dic[rbp_id][fh_str]
            mtf_idx = mtf2idx_dic[motif_id]

            # # Motif hit string.
            # motif_str = "%s:%i:%i:%s" %(motif_id, fh.start, fh.end, str(fh.pval))
            # # What gets displayed in hover box in violin plot.
            # # motif_str_plot = "%s,%i-%i,%s" %(fh.motif_id, fh.seq_s, fh.seq_e, str(fh.pval))
            # motif_str_plot = "%s:%i-%i" %(motif_id, fh.seq_s, fh.seq_e)

            # Center position of motif hit.
            motif_hit_s = fh.seq_s - 1
            motif_hit_e = fh.seq_e
            center_pos = benchlib.get_center_position(motif_hit_s, motif_hit_e)
            
            region_motif_binds_dic[region_id][mtf_idx] = True
            
            # region_rbp_motif_pos_dic[region_id].append(motif_str)
            # region2motif_hits_dic[region_id].append(motif_str_plot)

            if mtf_idx not in rid2mtfidx2hcp_dic[region_id]:
                rid2mtfidx2hcp_dic[region_id][mtf_idx] = [center_pos]
            else:
                rid2mtfidx2hcp_dic[region_id][mtf_idx].append(center_pos)


        for cmsh in flt_pos_cmsearch_hits_list:

            motif_id = cmsh.motif_id

            if motif_id not in sig_motif_ids_list:
                continue

            rbp_id = id2name_dic[motif_id]
            region_id = cmsh.seq_name

            mtf_idx = mtf2idx_dic[motif_id]

            # # Motif hit string.
            # motif_str = "%s:%i:%i:%s" %(cmsh.motif_id, cmsh.start, cmsh.end, str(-1*cmsh.score))
            # # motif_str_plot = "%s,%i-%i,%s" %(cmsh.motif_id, cmsh.seq_s, cmsh.seq_e, str(cmsh.score))
            # motif_str_plot = "%s:%i-%i" %(cmsh.motif_id, cmsh.seq_s, cmsh.seq_e)

            # Center position of motif hit.
            motif_hit_s = cmsh.seq_s - 1
            motif_hit_e = cmsh.seq_e
            center_pos = benchlib.get_center_position(motif_hit_s, motif_hit_e)
            
            region_motif_binds_dic[region_id][mtf_idx] = True
            
            if mtf_idx not in rid2mtfidx2hcp_dic[region_id]:
                rid2mtfidx2hcp_dic[region_id][mtf_idx] = [center_pos]
            else:
                rid2mtfidx2hcp_dic[region_id][mtf_idx].append(center_pos)

        # Create list of lists for co-occurrence heatmap.
        for motif_id in sig_motif_ids_list:
            pval_ll.append([1.0]*len_motif_list)
            pval_cont_lll.append([]*len_motif_list)

        for i in range(len_motif_list):
            for j in range(len_motif_list):
                # Storing [p-value_str, pair_str, table_str, correlation_str].
                pval_cont_lll[i].append(["1.0", "-", "-", "-", "-", "-", "-", "", "", ""])  

        motif_pairs = list(combinations(sig_motif_ids_list, 2))
        con_pval_dic = {}
        con_table_dic = {}
        pair_str_dic = {}

        # # Needed ?
        # c_regions_with_hits = 0
        # for reg_id in region_motif_binds_dic:
        #     reg_hit = False
        #     for label in region_motif_binds_dic[reg_id]:
        #         if label:
        #             reg_hit = True
        #     if reg_hit:
        #         c_regions_with_hits += 1
        # # print("# regions with hits (all motifs):", c_regions_with_hits)

        p_val_list = []  # Fisher exact test p-values.

        print("Compute co-occurrences between significant motif pairs ... ")
        for pair in motif_pairs:
            pair = list(pair)
            pair.sort()

            idx1 = mtf2idx_dic[pair[0]]
            idx2 = mtf2idx_dic[pair[1]]

            pair_str = ",".join(pair)
            # pair_str_dic[pair_str] = [pair_list[0], pair_list[1]]
            pair_str_dic[pair_str] = [pair[0], pair[1]]

            # avg_min_dist and perc_close_hits = "-" if no common hit regions.
            table, avg_min_dist, perc_close_hits = benchlib.make_contingency_table_2x2_v2(
                                                        region_motif_binds_dic, idx1, idx2,
                                                        rid2mtfidx2hcp_dic,
                                                        max_motif_dist=args.max_motif_dist)

            odds_ratio, p_value = fisher_exact(table, alternative=fisher_alt_hypo)

            con_table_dic[pair_str] = table
            table_str = str(table)

            p_value_plotted = p_value
            p_val_list.append(p_value)

            pval_ll[idx1][idx2] = p_value_plotted
            pval_ll[idx2][idx1] = p_value_plotted
            pval_cont_lll[idx2][idx1][0] = str(p_value)
            pval_cont_lll[idx2][idx1][1] = str(p_value_plotted)
            pval_cont_lll[idx2][idx1][2] = pair_str
            pval_cont_lll[idx2][idx1][3] = table_str
            pval_cont_lll[idx2][idx1][4] = avg_min_dist
            pval_cont_lll[idx2][idx1][5] = perc_close_hits
        

        """
        Multiple testing correction.

        """

        cooc_pval_thr = args.cooc_pval_thr

        if args.cooc_pval_mode == 1:  # BH correction.

            pvals_corrected = false_discovery_control(p_val_list, method='bh')

            for i in range(len(p_val_list)):
                p_val_list[i] = pvals_corrected[i]
        
        elif args.cooc_pval_mode == 2:  # Bonferroni correction.

            # Multiple testing correction factor.
            mult_test_corr_factor = 1
            if len_motif_list > 1:
                mult_test_corr_factor = (len_motif_list*(len_motif_list-1))/2

            cooc_pval_thr = args.cooc_pval_thr / mult_test_corr_factor
            cooc_pval_thr = benchlib.round_to_n_significant_digits_v2(cooc_pval_thr, 4)

        elif args.cooc_pval_mode == 3:  # No correction.

            cooc_pval_thr = args.cooc_pval_thr

        else:
            assert False, "Invalid co-occurrence p-value mode (--cooc-pval-mode) set: %i" %(args.cooc_pval_mode)

        args.cooc_pval_thr = cooc_pval_thr

        # Update + filter p-values.
        COSOUT = open(cooc_stats_out, "w")
        COSOUT.write("motif_id1\tmotif_id2\tc_1and2\tc_only2\tc_only1\tc_not1not2\tcooc_pval\tavg_min_dist\tperc_close_hits_%int\n" %(args.max_motif_dist))

        pv_idx = 0
        c_all_fisher_pval = 0
        c_sig_fisher_pval = 0
        perc_sig_fisher_pval = 0.0
        args.c_all_fisher_pval = c_all_fisher_pval
        args.c_sig_fisher_pval = c_sig_fisher_pval
        args.perc_sig_fisher_pval = perc_sig_fisher_pval

        for pair in motif_pairs:
            pair = list(pair)
            pair.sort()

            idx1 = mtf2idx_dic[pair[0]]
            idx2 = mtf2idx_dic[pair[1]]

            pair_str = ",".join(pair)

            p_value = p_val_list[pv_idx]

            # Round p-values to 4 significant digits.
            p_value = benchlib.round_to_n_significant_digits_v2(p_value, 4)

            p_value_plotted = p_value

            avg_min_dist_str = pval_cont_lll[idx2][idx1][4]
            avg_min_dist = 10000
            if avg_min_dist_str != "-":
                avg_min_dist = float(avg_min_dist_str)

            # Get sequence motif consensus sequences.
            conseq1 = "-"
            if id2type_dic[pair[0]] == "meme_xml":
                conseq1 = "Consensus: " + motif2conseq_dic[pair[0]]
            elif id2type_dic[pair[0]] == "cm":
                conseq1 = "Structure motif"
            elif id2type_dic[pair[0]] == "regex":
                conseq1 = "Regex: " + regex
            else:
                assert False, "Invalid motif type: %s" %(id2type_dic[pair[0]])
            conseq2 = "-"
            if id2type_dic[pair[1]] == "meme_xml":
                conseq2 = "Consensus: " + motif2conseq_dic[pair[1]]
            elif id2type_dic[pair[1]] == "cm":
                conseq2 = "Structure motif"
            elif id2type_dic[pair[1]] == "regex":
                conseq2 = "Regex: " + regex
            else:
                assert False, "Invalid motif type: %s" %(id2type_dic[pair[1]])

            if p_value > cooc_pval_thr:
                p_value_plotted = 1.0
                pval_cont_lll[idx2][idx1][7] = "(Filter: p-value > %s)<br>" %(str(cooc_pval_thr))
            
            if p_value <= cooc_pval_thr and avg_min_dist < args.min_motif_dist:
                p_value_plotted = 1.0
                pval_cont_lll[idx2][idx1][7] = "(Filter: mean minimum motif distance < %i)<br>" %(args.min_motif_dist)

            if args.motif_sim_thr is not None:
                if id2type_dic[pair[0]] == "meme_xml" and id2type_dic[pair[1]] == "meme_xml":
                    assert pair_str in motif_pair2sim_dic, "No similarity score found for motif pair \"%s\"" %(pair_str)
                    sim_score = motif_pair2sim_dic[pair_str]
                    if sim_score > args.motif_sim_thr:
                        p_value_plotted = 1.0
                        pval_cont_lll[idx2][idx1][7] = "(Filter: motif similarity > %s)<br>" %(str(args.motif_sim_thr))

            c_all_fisher_pval += 1
            if p_value <= cooc_pval_thr and avg_min_dist >= args.min_motif_dist:
                c_sig_fisher_pval += 1

            pval_ll[idx1][idx2] = p_value_plotted
            pval_ll[idx2][idx1] = p_value_plotted
            pval_cont_lll[idx2][idx1][0] = str(p_value)
            pval_cont_lll[idx2][idx1][1] = str(p_value_plotted)
            pval_cont_lll[idx2][idx1][8] = conseq1 + "<br>"
            pval_cont_lll[idx2][idx1][9] = conseq2 + "<br>"

            con_pval_dic[pair_str] = p_value

            pv_idx += 1

            # Get stats for output.
            mtf1 = pair[0]
            mtf2 = pair[1]
            con_table = con_table_dic[pair_str]
            perc_close_hits = pval_cont_lll[idx2][idx1][5]

            COSOUT.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" %(mtf1, mtf2, str(con_table[0][0]), str(con_table[0][1]), str(con_table[1][0]), str(con_table[1][1]), str(p_value), avg_min_dist_str, perc_close_hits))

        COSOUT.close()

        assert c_all_fisher_pval == len(p_val_list), "Number of p-values (%i) does not match number of motif pairs (%i)" %(len(p_val_list), len(motif_pairs))
        # Percentage rounded to 2 digits.
        perc_sig_fisher_pval = 0.0
        if c_all_fisher_pval > 0:
            perc_sig_fisher_pval = round((c_sig_fisher_pval/c_all_fisher_pval)*100, 2)

        print("# of motif co-occurrence comparisons (calculated p-values in total): %i" %(c_all_fisher_pval))

        args.c_all_fisher_pval = c_all_fisher_pval
        args.c_sig_fisher_pval = c_sig_fisher_pval
        args.perc_sig_fisher_pval = perc_sig_fisher_pval

        min_motif_dist_info = ""
        if args.min_motif_dist > 0:
            min_motif_dist_info = " + --min-motif-dist >= %i" %(args.min_motif_dist)

        if args.cooc_pval_mode == 1:
            print("Number of significant co-occurrence p-values (BH corrected%s): %i (%.2f%%)" %(min_motif_dist_info, c_sig_fisher_pval, perc_sig_fisher_pval))
        elif args.cooc_pval_mode == 2:
            print("Number of significant co-occurrence p-values (Bonferroni corrected%s): %i (%.2f%%)" %(min_motif_dist_info, c_sig_fisher_pval, perc_sig_fisher_pval))
        elif args.cooc_pval_mode == 3:
            print("Number of significant co-occurrence p-values (no correction%s): %i (%.2f%%)" %(min_motif_dist_info, c_sig_fisher_pval, perc_sig_fisher_pval))

        """
        Print out RBPs sorted by KS p-value.

        """ 
        sorted_con_pval_dic = dict(sorted(con_pval_dic.items(), key=lambda item: item[1], reverse=False))

        print("Co-occurrence contingency table format: [A, B], [C, D]")
        print("A: Motif1 AND Motif2")
        print("B: NOT Motif1 AND Motif2")
        print("C: Motif1 AND NOT Motif2")
        print("D: NOT Motif1 AND NOT Motif2")

        if args.cooc_pval_mode == 1:
            print("Significance threshold: %s (on Benjamini-Hochberg corrected p-values)" %(str(cooc_pval_thr)))
        elif args.cooc_pval_mode == 2:
            print("Significance threshold: %s (Bonferroni corrected)" %(str(cooc_pval_thr)))
        elif args.cooc_pval_mode == 3:
            print("Significance threshold: %s" %(str(cooc_pval_thr)))
        else:
            assert False, "Invalid co-occurrence p-value mode (--cooc-pval-mode) set: %i" %(args.cooc_pval_mode)

        print("Co-occurrence of motifs (Motif IDs (Motif1,Motif2), contingency table, Fisher p-value)")

        c_reported = 0

        for pair_str, p_value in sorted_con_pval_dic.items():
            if p_value > cooc_pval_thr:
                break
            c_reported += 1
            mtf1 = pair_str_dic[pair_str][0]
            mtf2 = pair_str_dic[pair_str][1]
            con_table = con_table_dic[pair_str]
            print("%s\t%s\t%s" %(pair_str, str(con_table), str(p_value)))

        if not c_reported:
            print("NO SIGNIFICANT CO-OCCURRENCES FOUND!")

        print("")

        print("Calculate correlations ... ")

        df = DataFrame(reg_hits_dic, columns=sig_motif_ids_list)
        df_corr = df.corr(method='pearson')

        for i,mtf_i in enumerate(sig_motif_ids_list):
            for j,mtf_j in enumerate(sig_motif_ids_list):
                if j > i:
                    pval_ll[i][j] = None

        # Fisher p-value dataframe.
        df_pval = DataFrame(pval_ll, columns=sig_motif_ids_list, index=sig_motif_ids_list)

        # Write None to upper-diagonal entries.
        for i,mtf_i in enumerate(sig_motif_ids_list):
            for j,mtf_j in enumerate(sig_motif_ids_list):
                if j > i:
                    df_corr.loc[mtf_i, mtf_j] = None
                else:
                    # Round correlation values if != 1.0.
                    if df_corr.loc[mtf_i][mtf_j] == 1.0:
                        pval_cont_lll[i][j][6] = str(df_corr.loc[mtf_i][mtf_j])
                    else:
                        pval_cont_lll[i][j][6] = "{:.8f}".format(df_corr.loc[mtf_i][mtf_j])

        # Add NoneS to table.
        for i,mtf_i in enumerate(sig_motif_ids_list):
            for j,mtf_j in enumerate(sig_motif_ids_list):
                if j > i:
                    df_pval.loc[mtf_i, mtf_j] = None

        # -log10 p-value transformation.
        benchlib.log_tf_df(df_pval, convert_zero_pv=True, rbp_list=sig_motif_ids_list)


    """
    Make html report.

    """

    html_report_out = args.out_folder + "/" + "report.rbpbench_nemo.html"
    if args.plot_abs_paths:
        html_report_out = os.path.abspath(args.out_folder) + "/" + "report.rbpbench_nemo.html"
    # If HTML file already exists, remove it.
    if os.path.exists(html_report_out):
        os.remove(html_report_out)

    plots_subfolder = "html_report_plots"

    print("Create report ... ")
    benchlib.nemo_generate_html_report(args, motif_enrich_stats_dic, 
                                       seq_motif_blocks_dic,
                                       benchlib_path,
                                       df_pval=df_pval,
                                       pval_cont_lll=pval_cont_lll,
                                       motif_pair2sim_dic=motif_pair2sim_dic,
                                       pos_seqs_dic=pos_seqs_dic,
                                       neg_seqs_dic=neg_seqs_dic,
                                       pos_reg2annot_dic=pos_reg2annot_dic,
                                       neg_reg2annot_dic=neg_reg2annot_dic,
                                       annot2color_dic=annot2color_dic,
                                       rbpbench_mode="nemo",
                                       html_report_out=html_report_out,
                                       plots_subfolder=plots_subfolder)


    """
    Output parameter settings.

    """
    # Output mode settings.
    print("Output parameter settings ... ")
    SETOUT = open(settings_file, "w")
    for arg in vars(args):
        SETOUT.write("%s\t%s\n" %(arg, str(getattr(args, arg))))
    for call in call_dic:
        SETOUT.write("%s\t%s\n" %(call, call_dic[call]))
    SETOUT.close()

    """
    Inform about outputs.

    """
    print("")
    print("OUTPUT FILES")
    print("============")
    print("")
    print("Run parameter settings:\n%s" %(settings_file))
    print("Neighboring motif enrichment stats .tsv:\n%s" %(nemo_stats_out))
    print("Motif co-occurrence stats .tsv:\n%s" %(cooc_stats_out))

    print("Filtered input regions .bed:\n%s" %(filtered_sites_bed))
    print("Filtered input regions .fa:\n%s" %(filtered_sites_fa))
    print("Background regions .bed:\n%s" %(neg_seqs_bed))
    print("Background regions .fa:\n%s" %(neg_seqs_fa))

    if reg_annot_table_file:
        print("Input region annotations .tsv:\n%s" %(reg_annot_table_file))
    print("Neighboring motif enrichment report .html:\n%s" %(html_report_out))
    print("")


################################################################################

def main_con(args):
    """
    Compare conservation in two sets of genomic sites.

    """

    print("Running CON for you ... ")

    assert os.path.exists(args.in_bed), "given --bed file \"%s\" not found" % (args.in_bed)
    assert os.path.exists(args.in_control_bed), "given --control-bed file \"%s\" not found" % (args.in_control_bed)

    if not os.path.exists(args.out_folder):
        os.makedirs(args.out_folder)

    """
    Library path.
    """
    benchlib_path = os.path.dirname(benchlib.__file__)
    # db_path = benchlib_path + "/content"

    print("Reading in BED regions  ... ")

    # Read in regions of interest.
    in_regions_dic = benchlib.bed_read_in_regions(args.in_bed,
                                         no_id_check=args.no_id_check,
                                         use_regions_as_ids=args.use_regions)
    
    print("# --bed regions: %i" % (len(in_regions_dic)))

    # Read in control regions.
    control_regions_dic = benchlib.bed_read_in_regions(args.in_control_bed,
                                              no_id_check=args.no_id_check,
                                              use_regions_as_ids=args.use_regions)
    
    print("# --control-bed regions: %i" % (len(control_regions_dic)))

    # Check if conservation scores are provided.
    if args.pc_bw is None and args.pp_bw is None:
        print("No conservation scores provided! Please provide --phastcons and/or --phylop files/links")
        exit(0)

    # If URLs are provided.
    if args.pc_bw is not None:
        # Check if pc_bw is a link or a file.
        if args.pc_bw.startswith("http"):
            print("Downloading phastCons conservation scores to --out folder (this might take some time!) ... ")
            os.system("wget -q %s -O %s" % (args.pc_bw, os.path.join(args.out_folder, "phastCons.bw")))
            args.pc_bw = os.path.join(args.out_folder, "phastCons.bw")

        assert os.path.exists(args.pc_bw), "given --phastcons file \"%s\" not found" % (args.pc_bw)

    if args.pp_bw is not None:
        # Check if pp_bw is a link or a file.
        if args.pp_bw.startswith("http"):
            print("Downloading phyloP conservation scores to --out folder (this might take some time!) ... ")
            os.system("wget -q %s -O %s" % (args.pp_bw, os.path.join(args.out_folder, "phyloP.bw")))
            args.pp_bw = os.path.join(args.out_folder, "phyloP.bw")
        
        assert os.path.exists(args.pp_bw), "given --phylop file \"%s\" not found" % (args.pp_bw)


    """
    Extract scores and create HTML report.

    """
    html_report_out = "report.rbpbench_con.html"
    in_con_sc_name = "in_regions.avg_con_sc.tsv"
    ctrl_con_sc_name = "control_regions.avg_con_sc.tsv"

    benchlib.compare_conservation_scores(args,
                                in_regions_dic, control_regions_dic, 
                                benchlib_path,
                                pc_bw=args.pc_bw,
                                pp_bw=args.pp_bw,
                                html_report_out=html_report_out,
                                in_con_sc_name=in_con_sc_name,
                                ctrl_con_sc_name=ctrl_con_sc_name,
                                pc_plot_name="phastCons_scores.png",
                                pp_plot_name="phyloP_scores.png")

    """
    Output parameter settings.

    """
    settings_file = os.path.join(args.out_folder, "settings.rbpbench_con.out")

    # Output mode settings.
    print("Output parameter settings ... ")
    SETOUT = open(settings_file, "w")
    for arg in vars(args):
        SETOUT.write("%s\t%s\n" %(arg, str(getattr(args, arg))))
    SETOUT.close()

    in_con_sc_path = os.path.join(args.out_folder, in_con_sc_name)
    ctrl_con_sc_path = os.path.join(args.out_folder, ctrl_con_sc_name)
    html_report_path = os.path.join(args.out_folder, html_report_out)

    print("")
    print("OUTPUT FILES")
    print("============")
    print("")
    print("Input sites scores .tsv:\n%s" %(in_con_sc_path))
    print("Control sites scores .tsv:\n%s" %(ctrl_con_sc_path))
    print("Conservation scores comparison report .html:\n%s" %(html_report_path))
    print("")


################################################################################

def main_sponge(args):
    """
    Check for sponge transcript sequences given a regex.

    """

    print("Running SPONGE for you ... ")

    parser = setup_argument_parser()
    args = parser.parse_args()

    # assert os.path.exists(args.in_fa), "--in \"%s\" not found" % (args.in_fa)
    # assert benchlib.fasta_check_format(args.in_fa), "--in \"%s\" not in FASTA format" % (args.in_fa)

    # Check regex.
    regex = args.regex
    regex_type = "sequence"

    if benchlib.looks_like_structure(regex):

        print("Given regex \"%s\" looks like structure pattern ... " % regex)
        regex = benchlib.check_format_str_pattern(regex)
        regex_type = "structure"

    else:

        print("Given regex \"%s\" assumed to be standard regex ... " % regex)
        assert benchlib.is_valid_regex(regex), "given regex \"%s\" is not a valid regular expression. Please provide valid expression" % (regex)
        regex = benchlib.convert_iupac_in_regex(regex)
        regex_type = "sequence"

    assert args.in_genome or args.in_gtf or args.in_fasta, "Please provide either --genome, --gtf or --fasta file to extract transcript sequences for sponge testing"

    if args.in_genome:
        assert os.path.exists(args.in_genome), "--genome file \"%s\" not found" % (args.in_genome)
        assert args.in_gtf, "set --genome requires --gtf file to extract transcript sequences"
        assert not args.in_fasta, "set --genome collides with set --fasta file, since transcript sequences are extracted from --gtf and --genome if set. Please provide either --fasta or --gtf + --genome to supply transcript sequences for sponge testing"
    if args.in_gtf:
        assert os.path.exists(args.in_gtf), "--gtf file \"%s\" not found" % (args.in_gtf)
        assert args.in_genome, "set --genome requires --gtf file to extract transcript sequences"
        assert not args.in_fasta, "set --gtf collides with set --fasta file, since transcript sequences are extracted from --gtf and --genome if set. Please provide either --fasta or --gtf + --genome to supply transcript sequences for sponge testing"
    if args.in_fasta:
        assert os.path.exists(args.in_fasta), "--fasta file \"%s\" not found" % (args.in_fasta)
        assert not args.in_gtf and not args.in_genome, "set --fasta collides with set --gtf / --genome, since transcript sequences are extracted from --fasta and not from --gtf / --genome. Please provide either --fasta or --gtf + --genome to define transcript sequences for sponge testing"

    # Output files.
    if not os.path.exists(args.out_folder):
        os.makedirs(args.out_folder)
    pecentile_ranks_out = os.path.join(args.out_folder, "sponge_transcripts_ranked.tsv")
    hit_dist_plot_out = os.path.join(args.out_folder, "hit_distribution.png")
    tr_seqs_fa = os.path.join(args.out_folder, "transcript_seqs.fa")  # Transcript sequences (spliced) in FASTA format.
    settings_file = args.out_folder + "/settings.rbpbench_sponge.out"
    hits_bed_out = os.path.join(args.out_folder, "motif_hits.rbpbench_sponge.bed")  # Sponge hits in BED format.
    # mrna_regions_bed = os.path.join(args.out_folder, "mrna_regions.bed")  # UTR CDS regions on mRNAs (transcript context).
    # tr_seqs_len_out = os.path.join(args.out_folder, "transcript_seqs_len.txt")  # Transcript lengths.

    tr2gid_dic = {}
    tr2type_dic = {}  # transcript ID -> transcript biotype.
    gn2type_dic = {}  # gene ID -> gene biotype.
    tr2gn_dic = {}
    tr_seqs_dic = {}
    # Transcript ID -> mRNA region annotation dictionary.
    tr2reg_dic = {}
    # mRNA transcript region lengths (CDS + UTRs).
    tid2regl_dic = {}

    if args.in_gtf and args.in_genome:

        print("Extract transcript sequences from --gtf and --genome ... ")

        print("Read in gene features from --gtf ... ")

        gid2gio_dic = benchlib.gtf_read_in_gene_infos(args.in_gtf,
                                                    tr2gid_dic=tr2gid_dic,
                                                    chr_style=args.chr_id_style,
                                                    empty_check=False)

        assert gid2gio_dic, "no gene infos read in from --gtf. Please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE)"
        c_gene_infos = len(gid2gio_dic)
        print("# gene features read in from --gtf:", c_gene_infos)

        # Get transcript ID -> gene name mapping.
        for tr_id in tr2gid_dic:
            gene_id = tr2gid_dic[tr_id]
            gene_name = gid2gio_dic[gene_id].gene_name
            gene_biotype = gid2gio_dic[gene_id].gene_biotype
            tr2gn_dic[tr_id] = gene_name
            gn2type_dic[gene_id] = gene_biotype

        tr_ids_dic = {}

        if args.tr_list:  # if custom transcript list is provided, skip MPT selection.

            tr_ids_dic = benchlib.read_ids_into_dic(args.tr_list,
                                                    check_dic=False)
            assert tr_ids_dic, "no IDs read in from provided --tr-list file. Please provide a valid IDs file (one ID per row)"
            for tr_id in tr_ids_dic:
                assert tr_id in tr2gid_dic, "transcript ID \"%s\" from provided --tr-list file does not appear in --gtf file (or if --gene-ids-list supplied not in resulting subset). Please provide compatible settings" %(tr_id)
                tr_ids_dic[tr_id] = tr2gid_dic[tr_id]
            print("# of transcript IDs (read in from --tr-list): ", len(tr_ids_dic))

        else:  # MPT selection.

            # Get most prominent transcripts from gene infos.
            print("Select most prominent transcript (MPT) for each gene ... ")
            tr_ids_dic = benchlib.select_mpts_from_gene_infos(gid2gio_dic,
                                    basic_tag=False,  # do not be strict (only_tsl=False too).
                                    ensembl_canonical_tag=False,
                                    prior_basic_tag=True,  # Prioritize basic tag transcript.
                                    prior_mane_select=True,  # mane select if set trumps all.
                                    prior_lncrna_primary_tag=True,
                                    only_tsl=False)
            
            assert tr_ids_dic, "most prominent transcript selection from gene infos failed. Please contact developers"
            print("# of transcript IDs (most prominent transcripts): ", len(tr_ids_dic))

        # Check exon order (return True if minus strand exon 1 is most downstream, not most upstream, which is the correct way).
        print("Check minus-strand exon order in --gtf ... ")
        correct_min_ex_order = benchlib.gtf_check_exon_order(args.in_gtf)
        if correct_min_ex_order:
            print("Correct order encountered ... ")
        else:
            print("Reverse order encountered ... ")

        # Get transcript infos.
        print("Read in transcript infos from --gtf ... ")
        tid2tio_dic = benchlib.gtf_read_in_transcript_infos(args.in_gtf, 
                                                            tr_ids_dic=tr_ids_dic,
                                                            correct_min_ex_order=correct_min_ex_order,
                                                            chr_style=args.chr_id_style,
                                                            empty_check=False)

        assert tid2tio_dic, "no transcript infos read in from --gtf. Please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE)"

        # (in)sanity checks.
        for tr_id in tr_ids_dic:
            assert tr_id in tid2tio_dic, "transcript ID %s not in tid2tio_dic"
        for tr_id in tid2tio_dic:
            assert tr_id in tr_ids_dic, "transcript ID %s not in tr_ids_dic"
            tr_biotype = tid2tio_dic[tr_id].tr_biotype
            tr2type_dic[tr_id] = tr_biotype

        c_tr_infos = len(tid2tio_dic)
        print("# transcript features read in from --gtf:", c_tr_infos)

        print("Get mRNA region lengths ... ")
        tid2regl_dic = benchlib.get_mrna_region_lengths(tid2tio_dic)

        # If only mRNA transcripts (or 3'UTR parts of it) should be used.
        if args.select_mode == 2 or args.select_mode == 3:

            print("Use only mRNA transcripts for sponge search ... ")
            tr_ids_dic = {}
            for tr_id in tid2regl_dic:
                tr_ids_dic[tr_id] = 1  # Use all mRNA transcripts.

            assert tr_ids_dic, "no mRNA transcripts found in --gtf. Please provide a valid/compatible GTF file, and if --tr-list is set, make sure it contains mRNA transcripts"


        # print("Output mRNA regions to BED ... ")
        # benchlib.output_mrna_regions_to_bed(tid2regl_dic, mrna_regions_bed,
        #                                     tr2gid_dic=tr2gid_dic,
        #                                     tr2gn_dic=tr2gn_dic,
        #                                     empty_check=False)
        
        # c_mrna_tids = len(tid2regl_dic)
        # print("# mRNA transcripts:", c_mrna_tids)

        """
        Output transcript sequences to FASTA file.

        """

        # Get transcript sequences.
        print("Extract transcript sequences ... ")
        tr_seqs_dic = benchlib.get_transcript_sequences_from_gtf(tid2tio_dic, args.in_genome,
                                                                 tr_ids_dic=tr_ids_dic,
                                                                 tmp_out_folder=args.out_folder)

        # Output sequences to FASTA.
        if args.select_mode == 1:
            print("Output transcript sequences to FASTA ... ")
        elif args.select_mode == 2:
            print("Output mRNA transcript sequences to FASTA ... ")
        elif args.select_mode == 3:
            print("Output mRNA 3'UTR sequences to FASTA ... ")

        if args.select_mode == 3:

            # Update tr_seqs_dic to only contain 3'UTR parts.
            tr_seqs_dic = benchlib.get_mrna_region_seqs_from_dic(tr_seqs_dic, tid2regl_dic, 
                                                                 mrna_reg_id="utr3")
            # Add region annotations.
            for tid in tr_seqs_dic:
                tr2reg_dic[tid] = "3'UTR"

        # Output sequences to FASTA.
        benchlib.fasta_output_dic(tr_seqs_dic, tr_seqs_fa,
                                tr2gid_dic=tr2gid_dic,  # add gene ID to header.
                                tr2gn_dic=tr2gn_dic,  # add gene name to header.
                                tr2reg_dic=tr2reg_dic,
                                to_upper=True,  # convert sequences to upper case.
                                split_size=60,  # split sequences into lines of 60 characters.
                                split=True)

    else:

        """
        if --fasta supplied:
        Get transcript sequences for sponge search from FASTA file.

        """
        
        tr_seqs_dic = benchlib.read_fasta_into_dic(args.in_fasta,
                                            dna=True,
                                            all_uc=True,
                                            id_check=True,
                                            empty_check=False,
                                            skip_n_seqs=False)


    """
    Search for regex hits in transcript sequences.

    """

    if tr_seqs_dic:

        if args.min_spacer_len > 0:
            args.allow_overlaps = True

        result_df = benchlib.sponge_search_regex_hits(tr_seqs_dic, regex,
                                                      min_seq_len=args.min_seq_len,
                                                      min_spacing=args.min_spacer_len,
                                                      min_hit_count=args.min_hit_count,
                                                      step_size_one=args.allow_overlaps,
                                                      tr2gid_dic=tr2gid_dic,
                                                      tr2gn_dic=tr2gn_dic,
                                                      tr2reg_dic=tr2reg_dic,
                                                      tr2type_dic=tr2type_dic,
                                                      gn2type_dic=gn2type_dic,
                                                      tid2regl_dic=tid2regl_dic,
                                                      hits_bed_out=hits_bed_out,
                                                      regex_type=regex_type,
                                                      regex_spacer_min=args.regex_spacer_min,
                                                      regex_spacer_max=args.regex_spacer_max,
                                                      regex_min_gc=args.regex_min_gc,
                                                      regex_max_gu=args.regex_max_gu)

        total_entries = len(result_df)

        # Print top hits.
        if total_entries > 0:
            print("")
            print("Top 10 sequences with highest # motif hits per kb:")
            print(result_df[["transcript_id", "gene_name", "transcript_length", "hit_count", "hits_per_kb", "percentile_rank"]].head(10))
            print("")

        zero_hits = (result_df["hits_per_kb"] == 0).sum()
        hit_entries = total_entries - zero_hits
        perc_hits = (hit_entries / total_entries) * 100.0 if total_entries > 0 else 0.0
        perc_zero_hits = (zero_hits / total_entries) * 100.0 if total_entries > 0 else 0.0
        print("# sequences for regex search: %i" % (total_entries))
        print("# sequences with hits:        %i (%.2f%%)" % (hit_entries, perc_hits))
        print("# sequences with zero hits:   %i (%.2f%%)" % (zero_hits, perc_zero_hits))

        result_df.to_csv(pecentile_ranks_out, sep='\t', index=False)
        benchlib.plot_hit_distribution(result_df, save_path=hit_dist_plot_out)

    else:
        print("WARNING: No transcript sequences available for regex search! Please provide valid --fasta or --gtf + --genome files")

    """
    Output parameter settings.

    """

    # Output mode settings.
    print("Output parameter settings ... ")
    SETOUT = open(settings_file, "w")
    for arg in vars(args):
        SETOUT.write("%s\t%s\n" %(arg, str(getattr(args, arg))))
    SETOUT.close()

    print("")
    print("OUTPUT FILES")
    print("============")
    print("")
    print("Hit distribution .png:\n%s" %(hit_dist_plot_out))
    print("Regex hits in transcript sequences .bed:\n%s" %(hits_bed_out))
    print("Transcript sequences ranked by hits per kb .tsv:\n%s" %(pecentile_ranks_out))
    print("")


################################################################################

def main_isocomp(args):
    """
    Compare regex hit occurrences between transcript isoforms.

    """

    print("Running ISOCOMP for you ... ")

    parser = setup_argument_parser()
    args = parser.parse_args()

    # Check regex.
    regex = args.regex
    regex_type = "sequence"

    if benchlib.looks_like_structure(regex):

        print("Given regex \"%s\" looks like structure pattern ... " % regex)
        regex = benchlib.check_format_str_pattern(regex)
        regex_type = "structure"

    else:

        print("Given regex \"%s\" assumed to be standard regex ... " % regex)
        assert benchlib.is_valid_regex(regex), "given regex \"%s\" is not a valid regular expression. Please provide valid expression" % (regex)
        regex = benchlib.convert_iupac_in_regex(regex)
        regex_type = "sequence"

    assert args.in_genome or args.in_gtf or args.in_fasta, "Please provide either --genome, --gtf or --fasta file to extract transcript sequences for sponge testing"

    if args.in_genome:
        assert os.path.exists(args.in_genome), "--genome file \"%s\" not found" % (args.in_genome)
        assert args.in_gtf, "set --genome requires --gtf file to extract transcript sequences"
        assert not args.in_fasta, "set --genome collides with set --fasta file, since transcript sequences are extracted from --gtf and --genome if set. Please provide either --fasta or --gtf + --genome to supply transcript sequences for isoform comparison"
    if args.in_gtf:
        assert os.path.exists(args.in_gtf), "--gtf file \"%s\" not found" % (args.in_gtf)
        assert args.in_genome, "set --genome requires --gtf file to extract transcript sequences"
        assert not args.in_fasta, "set --gtf collides with set --fasta file, since transcript sequences are extracted from --gtf and --genome if set. Please provide either --fasta or --gtf + --genome to supply transcript sequences for isoform comparison"
    if args.in_fasta:
        assert os.path.exists(args.in_fasta), "--fasta file \"%s\" not found" % (args.in_fasta)
        assert not args.in_gtf and not args.in_genome, "set --fasta collides with set --gtf / --genome, since transcript sequences are extracted from --fasta and not from --gtf / --genome. Please provide either --fasta or --gtf + --genome to define transcript sequences for isoform comparison"
        assert benchlib.fasta_check_header_format(args.in_fasta), "--fasta file \"%s\" has invalid header format. Please provide a valid FASTA file with following format: >transcript_id,gene_id" % (args.in_fasta)

    # Output files.
    if not os.path.exists(args.out_folder):
        os.makedirs(args.out_folder)
    
    compared_isoforms_out = os.path.join(args.out_folder, "compared_isoforms.tsv")
    tr_seqs_fa = os.path.join(args.out_folder, "transcript_seqs.fa")  # Transcript sequences (spliced) in FASTA format.
    settings_file = args.out_folder + "/settings.rbpbench_isocomp.out"
    hits_bed_out = os.path.join(args.out_folder, "motif_hits.rbpbench_isocomp.bed")  # regex hits on isoforms in BED format.

    tr2gid_dic = {}
    tr2type_dic = {}  # transcript ID -> transcript biotype.
    gn2type_dic = {}  # gene ID -> gene biotype.
    tr2gn_dic = {}
    tr_seqs_dic = {}
    # Transcript ID -> mRNA region annotation dictionary.
    tr2reg_dic = {}
    # mRNA transcript region lengths (CDS + UTRs).
    tid2regl_dic = {}
    gid2iso_dic = {}  # gene ID -> isoform infos (transcript IDs, gene name, biotype, etc.).
    tr_ids_dic = {}  # transcript IDs used for isoform comparison.

    if args.in_gtf and args.in_genome:

        print("Extract transcript sequences from --gtf and --genome ... ")

        print("Read in gene features from --gtf ... ")

        gid2gio_dic = benchlib.gtf_read_in_gene_infos(args.in_gtf,
                                                    tr2gid_dic=tr2gid_dic,
                                                    chr_style=args.chr_id_style,
                                                    empty_check=False)

        assert gid2gio_dic, "no gene infos read in from --gtf. Please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE)"
        c_gene_infos = len(gid2gio_dic)
        print("# gene features read in from --gtf:", c_gene_infos)

        # Get transcript ID -> gene name mapping.
        for tr_id in tr2gid_dic:
            gene_id = tr2gid_dic[tr_id]
            gene_name = gid2gio_dic[gene_id].gene_name
            gene_biotype = gid2gio_dic[gene_id].gene_biotype
            tr2gn_dic[tr_id] = gene_name
            gn2type_dic[gene_id] = gene_biotype

        # # Get most prominent transcripts from gene infos.
        # print("Select most prominent transcript (MPT) for each gene ... ")
        # tr_ids_dic = benchlib.select_mpts_from_gene_infos(gid2gio_dic,
        #                         basic_tag=False,  # do not be strict (only_tsl=False too).
        #                         ensembl_canonical_tag=False,
        #                         prior_basic_tag=True,  # Prioritize basic tag transcript.
        #                         only_tsl=False)
        # assert tr_ids_dic, "most prominent transcript selection from gene infos failed. Please contact developers"
        # print("# of transcript IDs (most prominent transcripts): ", len(tr_ids_dic))

        # Check exon order (return True if minus strand exon 1 is most downstream, not most upstream, which is the correct way).
        print("Check minus-strand exon order in --gtf ... ")
        correct_min_ex_order = benchlib.gtf_check_exon_order(args.in_gtf)
        if correct_min_ex_order:
            print("Correct order encountered ... ")
        else:
            print("Reverse order encountered ... ")

        # Get transcript infos.
        print("Read in transcript infos from --gtf ... ")
        tid2tio_dic = benchlib.gtf_read_in_transcript_infos(args.in_gtf, 
                                                            tr_ids_dic=tr2gid_dic,  # read in all transcripts.
                                                            correct_min_ex_order=correct_min_ex_order,
                                                            chr_style=args.chr_id_style,
                                                            empty_check=False)

        assert tid2tio_dic, "no transcript infos read in from --gtf. Please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE)"

        for tr_id in tid2tio_dic:
            assert tr_id in tr2gid_dic, "transcript ID %s not in tr2gid_dic"
            tr_biotype = tid2tio_dic[tr_id].tr_biotype
            tr2type_dic[tr_id] = tr_biotype

        c_tr_infos = len(tid2tio_dic)
        print("# transcript features read in from --gtf:", c_tr_infos)

        print("Get mRNA region lengths ... ")
        tid2regl_dic = benchlib.get_mrna_region_lengths(tid2tio_dic)

        if args.select_mode not in (4, 5):  # Only mRNA transcripts (i.e., mRNA transcripts or parts of it).

            print("Process mRNA transcript infos ... ")

            for tid in tid2regl_dic:
                utr5_len = tid2regl_dic[tid][0]
                cds_len = tid2regl_dic[tid][1]
                utr3_len = tid2regl_dic[tid][2]
                tr_length = utr5_len + cds_len + utr3_len
                assert tr_length == tid2tio_dic[tid].tr_length, "Transcript length mismatch for %s: %i != %i" % (tid, tr_length, tid2tio_dic[tid].tr_length)

                region_id = "Full"  # full mRNA, select_mode = 3.

                if args.select_mode == 1:  # 3'UTR.
                    if utr3_len > 0:
                        region_id = "3'UTR"
                    else:
                        continue
                    tr_length = utr3_len

                if args.select_mode == 2:  # 5'UTR.
                    if utr5_len > 0:
                        region_id = "5'UTR"
                    else:
                        continue
                    tr_length = utr5_len

                if args.min_seq_len:
                    if tr_length < args.min_seq_len:
                        continue

                gene_id = tr2gid_dic[tid]
                tr_biotype = tid2tio_dic[tid].tr_biotype
                tr_ids_dic[tid] = 1  # Use this transcript for isoform comparison.
                tr2reg_dic[tid] = region_id

                if gene_id not in gid2iso_dic:

                    gene_name = gid2gio_dic[gene_id].gene_name
                    gene_biotype = gid2gio_dic[gene_id].gene_biotype
                    gene_isocomp_infos = benchlib.GeneIsoComp(gene_id, gene_name, gene_biotype)
                    gid2iso_dic[gene_id] = gene_isocomp_infos

                gid2iso_dic[gene_id].tr_ids.append(tid)
                gid2iso_dic[gene_id].tr_biotypes.append(tr_biotype)
                gid2iso_dic[gene_id].tr_lengths.append(tr_length)
                gid2iso_dic[gene_id].tr_regions.append(region_id)  # region ID can be: "5'UTR", "CDS", "3'UTR", "Full" (Full can be both ncRNA and mRNA).
                gid2iso_dic[gene_id].tr_hit_counts.append(0)  # Initialize hit counts.
                gid2iso_dic[gene_id].tr_hit_counts_kb.append(0.0)  # Initialize hit counts per kb.

        else:  # If all non-coding transcripts or all transcripts should be used.

            if args.select_mode == 4:
                print("Process all non-coding transcript infos ... ")
            elif args.select_mode == 5:
                print("Process all transcript infos ... ")
            else:
                assert False, "Invalid --select-mode for isoform comparison: %i" % (args.select_mode)

            for tid in tid2tio_dic:

                gene_id = tr2gid_dic[tid]
                gene_biotype = gid2gio_dic[gene_id].gene_biotype
                tr_biotype = tid2tio_dic[tid].tr_biotype
                if args.select_mode == 4:
                    if gene_biotype == "protein_coding":
                        continue  # Skip protein-coding genes / all their transcripts.
                tr_length = tid2tio_dic[tid].tr_length
                if args.min_seq_len:
                    if tr_length < args.min_seq_len:
                        continue  # Skip transcripts that are shorter than the minimum sequence length.

                tr_ids_dic[tid] = 1  # Use this transcript for isoform comparison.
                tr2reg_dic[tid] = "Full"
                
                if gene_id not in gid2iso_dic:

                    gene_name = gid2gio_dic[gene_id].gene_name
                    
                    gene_isocomp_infos = benchlib.GeneIsoComp(gene_id, gene_name, gene_biotype)
                    gid2iso_dic[gene_id] = gene_isocomp_infos

                gid2iso_dic[gene_id].tr_ids.append(tid)
                gid2iso_dic[gene_id].tr_biotypes.append(tr_biotype)
                gid2iso_dic[gene_id].tr_lengths.append(tr_length)
                gid2iso_dic[gene_id].tr_regions.append("Full")  # region ID can be: "5'UTR", "CDS", "3'UTR", "Full" (Full can be both ncRNA and mRNA).
                gid2iso_dic[gene_id].tr_hit_counts.append(0)  # Initialize hit counts.
                gid2iso_dic[gene_id].tr_hit_counts_kb.append(0.0)  # Initialize hit counts per kb.


        assert tr_ids_dic, "no transcript IDs selected for isoform comparison. Please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE) and select a valid --select-mode"

        """
        Output transcript sequences to FASTA file.

        """

        # Get full-length transcript sequences for transcripts that should be used for isoform comparison (IDs stored in tr_ids_dic).
        print("Extract transcript sequences ... ")
        tr_seqs_dic = benchlib.get_transcript_sequences_from_gtf(tid2tio_dic, args.in_genome,
                                                                 tr_ids_dic=tr_ids_dic,
                                                                 tmp_out_folder=args.out_folder)

        if args.select_mode == 1:

            # Update tr_seqs_dic to only contain 3'UTR parts.
            print("Use only 3'UTR sequences ... ")
            tr_seqs_dic = benchlib.get_mrna_region_seqs_from_dic(tr_seqs_dic, tid2regl_dic, 
                                                                 mrna_reg_id="utr3")  

        elif args.select_mode == 2:

            # Update tr_seqs_dic to only contain 5'UTR parts.
            print("Use only 5'UTR sequences ... ")
            tr_seqs_dic = benchlib.get_mrna_region_seqs_from_dic(tr_seqs_dic, tid2regl_dic,
                                                                    mrna_reg_id="utr5")

        else:
            print("Use full transcript sequences ... ")

        # Output sequences to FASTA.
        if args.select_mode == 1:
            print("Output 3'UTR sequences to FASTA ... ")
        elif args.select_mode == 2:
            print("Output 5'UTR sequences to FASTA ... ")
        elif args.select_mode == 3:
            print("Output full mRNA sequences to FASTA ... ")
        elif args.select_mode == 4:
            print("Output all transcript sequences to FASTA ... ")

        # Output sequences to FASTA.
        benchlib.fasta_output_dic(tr_seqs_dic, tr_seqs_fa,
                                tr2gid_dic=tr2gid_dic,  # add gene ID to header.
                                tr2gn_dic=tr2gn_dic,  # add gene name to header.
                                tr2reg_dic=tr2reg_dic,
                                to_upper=True,  # convert sequences to upper case.
                                split_size=60,  # split sequences into lines of 60 characters.
                                split=True)

    else:

        """
        if --fasta supplied:
        Get transcript sequences for sponge search from FASTA file.

        """
        
        in_seqs_dic = benchlib.read_fasta_into_dic(args.in_fasta,
                                            dna=True,
                                            all_uc=True,
                                            id_check=True,
                                            empty_check=False,
                                            skip_n_seqs=False)

        tr_seqs_dic = {}

        for seq_id in in_seqs_dic:
            parts = seq_id.split(",")
            assert len(parts) >= 2, "--fasta sequence ID \"%s\" has invalid format. Please provide header IDs in format: >transcript_id,gene_id (additional commas are fine too)" % (seq_id)
            tid = parts[0]
            gid = parts[1]
            assert tid not in tr_ids_dic, "--fasta transcript ID \"%s\" already encountered! Please provide unique transcript IDs via --fasta file headers (valid format: >transcript_id,gene_id)" % (tid)
            tr_ids_dic[tid] = 1
            tr2gid_dic[tid] = gid  # transcript ID -> gene ID.
            tr2reg_dic[tid] = "Full"
            tr_seq = in_seqs_dic[seq_id]
            tr_seqs_dic[tid] = tr_seq  # Transcript ID -> transcript sequence (removing gene ID or other parts).
            tr_length = len(tr_seq)

            if gid not in gid2iso_dic:
                gene_isocomp_infos = benchlib.GeneIsoComp(gid, "-", "-")
                gid2iso_dic[gene_id] = gene_isocomp_infos

            gid2iso_dic[gene_id].tr_ids.append(tid)
            gid2iso_dic[gene_id].tr_biotypes.append("-")
            gid2iso_dic[gene_id].tr_lengths.append(tr_length)
            gid2iso_dic[gene_id].tr_regions.append("Full")  # region ID can be: "5'UTR", "CDS", "3'UTR", "Full" (Full can be both ncRNA and mRNA).
            gid2iso_dic[gene_id].tr_hit_counts.append(0)  # Initialize hit counts.
            gid2iso_dic[gene_id].tr_hit_counts_kb.append(0.0)  # Initialize hit counts per kb.


        print("# transcript sequences read in from --fasta:", len(tr_seqs_dic))
    

    print("# transcript sequences for isoform comparison:", len(tr_ids_dic))
    print("# of genes with isoforms for comparison:      ", len(gid2iso_dic))


    """
    Compare regex hits between isoforms.

    """

    if tr_seqs_dic:

        if args.min_spacer_len > 0:
            args.allow_overlaps = True

        result_df = benchlib.isocomp_search_regex_hits(tr_seqs_dic, regex, gid2iso_dic,
                                                       min_spacing=args.min_spacer_len,
                                                       step_size_one=args.allow_overlaps,
                                                       tr2reg_dic=tr2reg_dic,
                                                       tid2regl_dic=tid2regl_dic,
                                                       hits_bed_out=hits_bed_out,
                                                       digits_round=4,
                                                       regex_type=regex_type,
                                                       regex_spacer_min=args.regex_spacer_min,
                                                       regex_spacer_max=args.regex_spacer_max,
                                                       regex_min_gc=args.regex_min_gc,
                                                       regex_max_gu=args.regex_max_gu)

        total_entries = len(result_df)
        print("# isoform comparisons: %i" % (total_entries))

        result_df.to_csv(compared_isoforms_out, sep='\t', index=False)

    else:
        print("WARNING: No transcript sequences available for regex search! Please provide valid --fasta or --gtf + --genome files")

    """
    Output parameter settings.

    """

    # Output mode settings.
    print("Output parameter settings ... ")
    SETOUT = open(settings_file, "w")
    for arg in vars(args):
        SETOUT.write("%s\t%s\n" %(arg, str(getattr(args, arg))))
    SETOUT.close()

    print("")
    print("OUTPUT FILES")
    print("============")
    print("")
    print("Regex hits in transcript sequences .bed:\n%s" %(hits_bed_out))
    print("Compared isoforms .tsv:\n%s" %(compared_isoforms_out))
    print("")


################################################################################

def main_streme(args):
    """
    Run MEME suite STREME to identify motifs in input sites.

    """

    print("Running STREME for you ... ")

    assert os.path.exists(args.in_fa), "--in \"%s\" not found" % (args.in_fa)
    assert benchlib.fasta_check_format(args.in_fa), "--in \"%s\" not in FASTA format" % (args.in_fa)

    if args.in_neg_fa:
        assert os.path.exists(args.in_neg_fa), "--neg-in \"%s\" not found" % (args.in_neg_fa)
        assert benchlib.fasta_check_format(args.in_neg_fa), "--neg-in \"%s\" not in FASTA format" % (args.in_neg_fa)

    """
    Library path.
    """
    benchlib_path = os.path.dirname(benchlib.__file__)
    db_path = benchlib_path + "/content"

    """
    FIMO nt frequencies.

    """
    fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt.txt"
    if args.streme_ntf_mode == 2:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt_with_introns.txt"
    elif args.streme_ntf_mode == 3:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.uniform.txt"

    if args.streme_bfile:
        fimo_freqs_file = args.streme_bfile

    assert os.path.exists(fimo_freqs_file), "set nucleotide frequencies file \"%s\" not found" % (fimo_freqs_file)

    """
    Outputs.

    """

    if not os.path.exists(args.out_folder):
        os.makedirs(args.out_folder)

    settings_file = args.out_folder + "/settings.rbpbench_streme.out"
    streme_seqs_tsv = args.out_folder + "/sequences.tsv"
    streme_sites_tsv = args.out_folder + "/sites.tsv"
    streme_out_txt = args.out_folder + "/streme.txt"
    streme_out_xml = args.out_folder + "/streme.xml"
    streme_out_html = args.out_folder + "/streme.html"

    
    """
    Run STREME.

    """

    call_dic = {}

    print("Run STREME ... ")
    benchlib.run_streme(args.in_fa, args.out_folder,
                        neg_fa=args.in_neg_fa,
                        streme_bfile=fimo_freqs_file,
                        streme_evalue=args.streme_evalue,
                        streme_thresh=args.streme_thresh,
                        streme_minw=args.streme_minw,
                        streme_maxw=args.streme_maxw,
                        streme_seed=args.streme_seed,
                        streme_order=args.streme_order,
                        params="--dna",
                        call_dic=call_dic,
                        print_output=True,
                        error_check=False)

    """
    Output parameter settings.

    """
    # Output mode settings.
    print("Output parameter settings ... ")
    SETOUT = open(settings_file, "w")
    for arg in vars(args):
        SETOUT.write("%s\t%s\n" %(arg, str(getattr(args, arg))))
    for call in call_dic:
        SETOUT.write("%s\t%s\n" %(call, call_dic[call]))
    SETOUT.close()

    """
    Inform about outputs.

    """
    print("")
    print("OUTPUT FILES")
    print("============")
    print("")
    print("Run parameter settings:\n%s" %(settings_file))
    print("STREME matching sites .tsv:\n%s" %(streme_sites_tsv))
    print("STREME matching sequences .tsv:\n%s" %(streme_seqs_tsv))
    print("STREME results .txt:\n%s" %(streme_out_txt))
    print("STREME results .xml:\n%s" %(streme_out_xml))
    print("STREME results .html:\n%s" %(streme_out_html))
    print("")


################################################################################

def main_tomtom(args):
    """
    Run MEME suite TOMTOM to compare input motifs with database motifs.

    """

    print("Running TOMTOM for you ... ")

    if args.custom_db_meme_xml:
        assert os.path.exists(args.custom_db_meme_xml), "--custom-db \"%s\" not found" % (args.custom_db_meme_xml)

    """
    Library path.
    """
    benchlib_path = os.path.dirname(benchlib.__file__)
    db_path = benchlib_path + "/content"

    """
    FIMO nt frequencies.

    """
    fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt.txt"
    if args.tomtom_ntf_mode == 2:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt_with_introns.txt"
    elif args.tomtom_ntf_mode == 3:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.uniform.txt"

    if args.tomtom_bfile:
        fimo_freqs_file = args.tomtom_bfile

    assert os.path.exists(fimo_freqs_file), "set nucleotide frequencies file \"%s\" not found" % (fimo_freqs_file)

    """
    Outputs.
    """

    if not os.path.exists(args.out_folder):
        os.makedirs(args.out_folder)

    settings_file = args.out_folder + "/settings.rbpbench_tomtom.out"
    tomtom_out_tsv = args.out_folder + "/tomtom.tsv"
    tomtom_out_xml = args.out_folder + "/tomtom.xml"
    tomtom_out_html = args.out_folder + "/tomtom.html"


    """
    Motif database.

    Only sequence motifs of interest for TOMTOM.
    """
    seq_motifs_db_file, str_motifs_db_file, rbp2ids_file, motif_db_str = specify_motif_db(args.motif_db, 
                                                                            db_path=db_path)

    if args.custom_db_meme_xml:
        seq_motifs_db_file = args.custom_db_meme_xml
        motif_db_str = "custom"
        rbp2ids_file = False

    args.motif_db_str = motif_db_str

    """
    Get ID mappings (if internal motif db used)

    name2ids_dic:
    RBP name -> motif IDs mapping
    id2type_dic:
    motif ID -> motif type (cm, meme_xml)
    id2org_dic (ignore for now):
    motif ID -> organism ID (so far only "human")
    name2fids_dic:
    RBP name -> annotated function IDs list, e.g.
    "A1CF" -> ["RM", "RSD", "RE"]

    """

    name2ids_dic = False
    id2type_dic = False
    name2gid_dic = False
    name2fids_dic = False
    fid2desc_dic = False
    desc2fid_dic = False
    id2name_dic = {}

    if rbp2ids_file:
        name2ids_dic, id2type_dic, name2gid_dic, name2fids_dic, id2pids_dic, id2exp_dic = benchlib.get_rbp_id_mappings(rbp2ids_file,
                                                                                                                       only_meme_xml=True)

        # Motif ID -> RBP name (== RBP ID) mapping.
        for rbp_id in name2ids_dic:
            for motif_id in name2ids_dic[rbp_id]:
                id2name_dic[motif_id] = rbp_id

        """
        Get function ID -> function descriptions mapping.

        """
        fid2desc_file = db_path + "/rbp_function_list.32728246.tsv"
        fid2desc_dic, desc2fid_dic = benchlib.get_fid2desc_mapping(fid2desc_file)


    """
    Check set motifs file.

    """

    seq_motif_blocks_dic = benchlib.read_in_xml_motifs(seq_motifs_db_file,
                                                       empty_check=True)
    assert seq_motif_blocks_dic, "No motifs found in MEME XML file \"%s\". Please provide motifs file in MEME motif format" % (seq_motifs_db_file)


    """
    Check --in query.

    """

    regex = False
    query_motif_file = args.out_folder + "/query_motifs.meme"
    db_motif_file = args.out_folder + "/db_motifs.meme"
    shutil.copy(seq_motifs_db_file, db_motif_file)

    if os.path.isfile(args.motif_in):
        print("--in is motifs file. Check format ... ")

        shutil.copy(args.motif_in, query_motif_file)

        query_motif_blocks_dic = benchlib.read_in_xml_motifs(seq_motifs_db_file,
                                                             empty_check=True)
        assert query_motif_blocks_dic, "No motifs found in --in MEME XML file \"%s\". Please provide motifs file in MEME motif format" % (args.motif_in)

    else:

        print("--in is regex/sequence motif string. Extracting sequences from string ... ")

        regex = args.motif_in
        assert benchlib.is_valid_regex(regex), "given --in \"%s\" is not a valid regular expression. Please provide valid expression (e.g. --in AC[AC]GTA)" % (args.regex)
        # Only allow a subset of regexes, namely with square brackets: AC[ACG]AC.
        special_chars = r"[,\d.^$*+?{}()|\\]"

        regex = benchlib.remove_special_chars_from_str(regex,
                                                       reg_ex=special_chars)

        assert regex, "empty string after removing special chars \"%s\" from --regex. Please provide a valid regex with DNA letters" %(special_chars)

        print("Regex after removing special chars:", regex)

        query_motif_blocks_dic = {}

        if args.regex_mode == 1:

            seq_id = regex

            # Get all sequence parts from regex.
            seq_parts_list = benchlib.get_seq_parts_from_regex(regex)

            for seq in seq_parts_list:
                assert benchlib.seq_check_alphabet(seq, alphabet=["A", "C", "G", "T"]), "sequence \"%s\" derived from --in string has non-DNA letters in it. Please provide DNA sequences / regular expressions" %(seq)

            seq_motif_block = benchlib.seq_parts_to_motif_block(seq_parts_list)

            query_motif_blocks_dic[seq_id] = seq_motif_block

        elif args.regex_mode == 2:
            # Get all single sequences from regex.
            seqs_list = benchlib.get_seqs_from_regex(regex)

            # Convert sequences to sequence blocks.
            for idx, seq in enumerate(seqs_list):
                seq_id = "s%i_" %(idx+1) + seq

                assert benchlib.seq_check_alphabet(seq, alphabet=["A", "C", "G", "T"]), "sequence \"%s\" derived from --in string has non-DNA letters in it. Please provide DNA sequences / regular expressions" %(seq)

                seq_motif_block = benchlib.seq_to_motif_block(seq)

                query_motif_blocks_dic[seq_id] = seq_motif_block

        out_str, c_added_motifs = benchlib.blocks_to_xml_string(query_motif_blocks_dic, query_motif_blocks_dic)
        benchlib.output_string_to_file(out_str, query_motif_file)
        print("# of added query motifs from --in string:", c_added_motifs)

    assert os.path.exists(query_motif_file), "no query motifs file written to output folder. Please contact developers"

    args.regex = regex
    args.query_motif_file = query_motif_file
    args.db_motif_file = db_motif_file

    """
    Run TOMTOM.

    """

    call_dic = {}

    # Bang the tomtom slowly, dumbass.
    print("Run TOMTOM using query and database motif files ... ")
    benchlib.run_tomtom(query_motif_file, db_motif_file, args.out_folder,
                        tomtom_bfile=fimo_freqs_file,
                        tomtom_thresh=args.tomtom_thresh,
                        tomtom_evalue=args.tomtom_evalue,
                        tomtom_m=args.tomtom_m,
                        tomtom_min_overlap=args.tomtom_min_overlap,
                        params="-norc",
                        call_dic=call_dic,
                        print_output=True,
                        error_check=False)

    """
    Check for significantly enriched functions in targets, 
    for each query.

    Motif ID -> RBP name (== RBP ID):
    id2name_dic
    RBP name -> Motif IDs:
    name2ids_dic
    name2fids_dic
    fid2desc_dic
    desc2fid_dic
    name2gid_dic

    tomtom_out_tsv format:

    Query_ID	Target_ID	Optimal_offset	p-value	E-value	q-value	Overlap	Query_consensus	Target_consensus	Orientation
    RNCMPT00001	TBRG4_1	2	0.000171464	0.103564	0.103564	7	ATAATTG	ACGTAATTTT	+
                                        
    # Tomtom (Motif Comparison Tool): Version 5.5.5 compiled on Jan 16 2024 at 00:57:42									
    # The format of this file is described at https://meme-suite.org/meme/doc/tomtom-output-format.html.									
    # tomtom -norc -png -oc tomtom_out q2.meme catRAPID_omics_v2.1_6plus_motifs.weak_rounded.meme									

    """

    motif_level = False
    if args.fe_mode == 2:
        motif_level = True

    enriched_rbp_func_stats = args.out_folder + "/enriched_rbp_functions.tsv"

    if rbp2ids_file:

        print("Test for significantly enriched RBP functions in target IDs ... ")

        RESOUT = open(enriched_rbp_func_stats, "w")
        RESOUT.write("query_id\tfunction_id\tenrich_score\tp_value\tc_tar_f\tc_tar_not_f\tc_db_f\tc_db_not_f\tfunction_description\ttarget_rbps_with_func\n")

        c_db_hits = 0
        c_sig_f = 0
        c_db_rbps = len(name2ids_dic)  # total number of RBPs in database.

        # Motif ID -> function IDs mapping.
        mid2fids_dic = {}
        c_db_motifs = 0  # Total number of motif IDs in database.
        for rbp_id in name2ids_dic:
            for motif_id in name2ids_dic[rbp_id]:
                c_db_motifs += 1
                mid2fids_dic[motif_id] = name2fids_dic[rbp_id]

        fid2dbc_dic = benchlib.get_fid_db_counts(name2ids_dic, name2fids_dic,
                                                 motif_level=motif_level)

        query2targets_dic = {}
        targets_dic = {}

        with open(tomtom_out_tsv, "r") as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                if line.startswith("#") or line.startswith("Query_ID"):
                    continue
                cols = line.split("\t")
                query_id = cols[0]
                target_id = cols[1]
                targets_dic[target_id] = 1
                c_db_hits += 1
                if query_id in query2targets_dic:
                    query2targets_dic[query_id].append(target_id)
                else:
                    query2targets_dic[query_id] = [target_id]
        f.closed

        print("# of TOMTOM database hits:          %i" %(c_db_hits))
        print("# of query IDs with hits:           %i" %(len(query2targets_dic)))
        print("# of target (motif) IDs with hits:  %i" %(len(targets_dic)))

        fisher_alt_hypo = "greater"
        min_pv = 2.2e-308

        if motif_level:

            # Check function enrichment on single motif level.
            print("Check function enrichment on motif level ... ")

            # If there are any database hits.
            if query2targets_dic:
                for qid in query2targets_dic:
                    fid2c_dic = {}
                    fid2rbps_dic = {}
                    for tid in query2targets_dic[qid]:
                        rbp_id = id2name_dic[tid]
                        fids_list = mid2fids_dic[tid]
                        for fid in fids_list:
                            if fid in fid2c_dic:
                                if rbp_id not in fid2rbps_dic[fid]:
                                    fid2rbps_dic[fid].append(rbp_id)
                                fid2c_dic[fid] += 1
                            else:
                                fid2rbps_dic[fid] = [rbp_id]
                                fid2c_dic[fid] = 1
                    tid_c = len(query2targets_dic[qid])
                    # One Fisher test for each function ID in targets.
                    for fid in fid2c_dic:
                        fid_c = fid2c_dic[fid]
                        assert fid_c <= tid_c, "function ID count (%i) exceeds target count (%i)" %(fid_c, tid_c)
                        rbps_list = fid2rbps_dic[fid]
                        rbps_list.sort()
                        rbps_str = ",".join(rbps_list)
                        a_fisher = fid_c
                        b_fisher = tid_c - fid_c
                        c_fisher = fid2dbc_dic[fid]
                        d_fisher = c_db_motifs - c_fisher

                        con_table = [[a_fisher, b_fisher], [c_fisher, d_fisher]]
                        fisher_alt_hypo = "greater"

                        odds_ratio, p_value = fisher_exact(con_table, alternative=fisher_alt_hypo)

                        p_value = benchlib.round_to_n_significant_digits_v2(p_value, 4,
                                                                            min_val=1e-304)
                        
                        min_log10_pval = benchlib.log_tf_pval(p_value)

                        min_log10_pval = benchlib.round_to_n_significant_digits_v2(min_log10_pval, 4,
                                                                                   min_val=0)

                        if p_value <= args.fe_pval_thr:
                            desc = fid2desc_dic[fid]
                            c_sig_f += 1
                            RESOUT.write("%s\t%s\t%s\t%s\t%i\t%i\t%i\t%i\t%s\t%s\n" %(qid, fid, str(min_log10_pval), str(p_value), a_fisher, b_fisher, c_fisher, d_fisher, desc, rbps_str))

        else:

            # Check function enrichment on RBP level.
            print("Check function enrichment on RBP level ... ")

            query2rbps_dic = {}
            for qid in query2targets_dic:
                query2rbps_dic[qid] = []
                for tid in query2targets_dic[qid]:
                    rbp_id = id2name_dic[tid]
                    if rbp_id not in query2rbps_dic[qid]:
                        query2rbps_dic[qid].append(rbp_id)

            if query2rbps_dic:
                for qid in query2rbps_dic:
                    fid2c_dic = {}
                    fid2rbps_dic = {}
                    for rbp_id in query2rbps_dic[qid]:
                        fids_list = name2fids_dic[rbp_id]
                        for fid in fids_list:
                            if fid in fid2c_dic:
                                if rbp_id not in fid2rbps_dic[fid]:
                                    fid2rbps_dic[fid].append(rbp_id)
                                fid2c_dic[fid] += 1
                            else:
                                fid2rbps_dic[fid] = [rbp_id]
                                fid2c_dic[fid] = 1
                    rbp_c = len(query2rbps_dic[qid])

                    # One Fisher test for each function ID in targets.
                    for fid in fid2c_dic:
                        fid_c = fid2c_dic[fid]  # number of target RBPs with function ID fid.
                        assert fid_c <= rbp_c, "function ID count (%i) exceeds target RBP count (%i)" %(fid_c, rbp_c)
                        rbps_list = fid2rbps_dic[fid]
                        rbps_list.sort()
                        rbps_str = ",".join(rbps_list)
                        a_fisher = fid_c
                        b_fisher = rbp_c - fid_c
                        c_fisher = fid2dbc_dic[fid]
                        d_fisher = c_db_rbps - c_fisher

                        con_table = [[a_fisher, b_fisher], [c_fisher, d_fisher]]

                        odds_ratio, p_value = fisher_exact(con_table, alternative=fisher_alt_hypo)

                        p_value = benchlib.round_to_n_significant_digits_v2(p_value, 4,
                                                                            min_val=1e-304)
                        
                        min_log10_pval = benchlib.log_tf_pval(p_value)

                        min_log10_pval = benchlib.round_to_n_significant_digits_v2(min_log10_pval, 4,
                                                                                   min_val=0)

                        if p_value <= args.fe_pval_thr:
                            desc = fid2desc_dic[fid]
                            c_sig_f += 1
                            RESOUT.write("%s\t%s\t%s\t%s\t%i\t%i\t%i\t%i\t%s\t%s\n" %(qid, fid, str(min_log10_pval), str(p_value), a_fisher, b_fisher, c_fisher, d_fisher, desc, rbps_str))

        RESOUT.close()

        # Sort table by enrich_score.
        df = read_csv(enriched_rbp_func_stats, sep='\t')
        df_sorted = df.sort_values(by='enrich_score', ascending=False)
        df_sorted.to_csv(enriched_rbp_func_stats, sep='\t', index=False)

        print("# significantly enriched functions: %i" %(c_sig_f))


    """
    Output parameter settings.

    """
    # Output mode settings.
    print("Output parameter settings ... ")
    SETOUT = open(settings_file, "w")
    for arg in vars(args):
        SETOUT.write("%s\t%s\n" %(arg, str(getattr(args, arg))))
    for call in call_dic:
        SETOUT.write("%s\t%s\n" %(call, call_dic[call]))
    SETOUT.close()

    """
    Inform about outputs.

    """
    print("")
    print("OUTPUT FILES")
    print("============")
    print("")
    print("Run parameter settings:\n%s" %(settings_file))
    print("Query motifs .meme:\n%s" %(query_motif_file))
    print("Database motifs .meme:\n%s" %(db_motif_file))
    print("TOMTOM results .tsv:\n%s" %(tomtom_out_tsv))
    print("TOMTOM results .xml:\n%s" %(tomtom_out_xml))
    print("TOMTOM results .html:\n%s" %(tomtom_out_html))
    if rbp2ids_file:
        print("Enriched RBP functions .tsv:\n%s" %(enriched_rbp_func_stats))
    print("")


################################################################################

def main_goa(args):
    """
    Run GO term enrichment analysis on provided gene list.

    """

    print("Running for you in GOA mode ... ")

    assert os.path.exists(args.in_gene_list), "--in file \"%s\" not found" % (args.in_gene_list)
    assert os.path.exists(args.in_gtf), "--gtf file \"%s\" not found" % (args.in_gtf)

    print("Read in target genes from --in ... ")

    target_genes_dic = benchlib.read_ids_into_dic(args.in_gene_list,
                                                     check_dic=False)

    assert target_genes_dic, "no gene IDs read in from --in file \"%s\"" % (args.in_gene_list)
    print("# of gene IDs read in: %i" %(len(target_genes_dic)))

    if args.goa_obo_mode == 3:
        assert args.goa_obo_file, "set --goa-obo-mode 3 requires --goa-obo-file"
        assert os.path.exists(args.goa_obo_file), "--goa-obo-file file \"%s\" not found" % (args.goa_obo_file)
    if args.goa_obo_file:
        assert args.goa_obo_mode == 3, "--goa-obo-file requires --goa-obo-mode 3"
    if args.goa_max_child is not None:
        assert args.goa_max_child >= 0, "set --goa-max-child expected to be >= 0"
    if args.goa_min_depth is not None:
        assert args.goa_min_depth >= 0, "set --goa-min-depth expected to be >= 0"

    """
    Library path.

    """
    benchlib_path = os.path.dirname(benchlib.__file__)
    db_path = benchlib_path + "/content"

    """
    Output folders + files.

    """
    if not os.path.exists(args.out_folder):
        os.makedirs(args.out_folder)

    goa_results_tsv = args.out_folder + "/goa_results.tsv"
    settings_file = args.out_folder + "/settings.rbpbench_goa.out"


    """
    Read in gene IDs from --gtf file.
    
    """

    background_genes_dic = {}  # Store all gene IDs in GTF file to use as background genes.

    print("Read in gene features from --gtf ... ")

    gid2gio_dic = benchlib.gtf_read_in_gene_infos(args.in_gtf,
                                                  empty_check=False)

    assert gid2gio_dic, "no gene infos read in from --gtf. Please provide a valid/compatible GTF file (e.g. from Ensembl or ENCODE)"

    c_gene_infos = len(gid2gio_dic)
    print("# gene features read in from --gtf:", c_gene_infos)

    # Get an initial background genes list (i.e., all gene IDs in GTF file).
    background_genes_dic = {}
    for gid in gid2gio_dic:
        background_genes_dic[gid] = gid2gio_dic[gid].gene_name

    if args.goa_bg_gene_list:

        print("Read in background genes from --goa-gb-gene-list ... ")

        assert os.path.exists(args.goa_bg_gene_list), "given --goa-bg-gene-list file \"%s\" not found" % (args.goa_bg_gene_list)

        bg_gene_ids_dic = benchlib.read_ids_into_dic(args.goa_bg_gene_list,
                                                     check_dic=False)

        print("# of gene IDs read in: %i" %(len(bg_gene_ids_dic)))
        print("Filter background genes by --gtf genes ... ")

        new_background_genes_dic = {}
        for gid in bg_gene_ids_dic:
            if gid in gid2gio_dic:
                new_background_genes_dic[gid] = gid2gio_dic[gid].gene_name
        
        assert new_background_genes_dic, "given --goa-gb-gene-list gene IDs not found in --gtf. Please provide compatible --gtf and --goa-bg-gene-list files"

        print("# of background genes before filtering: %i" %(len(background_genes_dic)))
        print("# of background genes after filtering:  %i" %(len(new_background_genes_dic)))

        background_genes_dic = new_background_genes_dic

        print("Filter target genes by new background gene list ... ")
        new_target_genes_dic = {}
        for gid in target_genes_dic:
            if gid in background_genes_dic:
                new_target_genes_dic[gid] = target_genes_dic[gid]
        
        print("# of target genes before filtering: %i" %(len(target_genes_dic)))
        print("# of target genes after filtering:  %i" %(len(new_target_genes_dic)))

        target_genes_dic = new_target_genes_dic

    else:

        print("Filter read in target genes by background gene list from --gtf ... ")
        new_target_genes_dic = {}
        for gid in target_genes_dic:
            if gid in background_genes_dic:
                new_target_genes_dic[gid] = target_genes_dic[gid]

        print("# of target genes before filtering: %i" %(len(target_genes_dic)))
        print("# of target genes after filtering:  %i" %(len(new_target_genes_dic)))

        assert new_target_genes_dic, "no target genes remaining after filtering by background gene list from --gtf. Please provide compatible --gtf and --in files"
        target_genes_dic = new_target_genes_dic


    """
    GO enrichment analysis.

    """
    goa_results_df = False
    goa_stats_dic = {}
    propagate_counts = True

    # Write empty file.
    open(goa_results_tsv, "w").close()

    print("")
    print("Run GOA ... ")

    goa_stats_dic["c_target_genes_pre_filter"] = len(target_genes_dic)
    goa_stats_dic["c_background_genes_pre_filter"] = len(background_genes_dic)
    goa_stats_dic["pval_thr"] = args.goa_pval
    goa_stats_dic["goa_obo_mode"] = args.goa_obo_mode
    goa_stats_dic["propagate_counts"] = propagate_counts
    goa_stats_dic["excluded_terms"] = "-"
    goa_stats_dic["goa_filter_purified"] = args.goa_filter_purified
    goa_stats_dic["goa_max_child"] = args.goa_max_child
    goa_stats_dic["goa_min_depth"] = args.goa_min_depth

    local_gid2go_file = benchlib_path + "/content/ensembl_gene_id2go_ids.biomart.GRCh38.112.tsv.gz"
    local_obo_file = benchlib_path + "/content/go-basic.obo.gz"

    assert os.path.exists(local_gid2go_file), "local gene ID to GO ID file \"%s\" not found" %(local_gid2go_file)
    assert os.path.exists(local_obo_file), "local GO OBO file \"%s\" not found" %(local_obo_file)

    gid2go_file = local_gid2go_file
    if args.goa_gene2go_file:
        gid2go_file = args.goa_gene2go_file
        assert os.path.exists(gid2go_file), "provided --goa-gene2go-file \"%s\" not found" %(gid2go_file)
    goa_obo_file = local_obo_file
    if args.goa_obo_file and args.goa_obo_mode == 3:
        goa_obo_file = args.goa_obo_file
        assert os.path.exists(goa_obo_file), "provided --goa-obo-file \"%s\" not found" %(goa_obo_file)

    # Run GOA.
    goa_results_df = benchlib.run_go_analysis(target_genes_dic, background_genes_dic, 
                                              gid2go_file, args.out_folder,
                                              pval_thr=args.goa_pval,
                                              excluded_terms = [],  # do not exclude any GO terms.
                                              goa_obo_mode=args.goa_obo_mode,
                                              propagate_counts=propagate_counts,
                                              stats_dic=goa_stats_dic,
                                              store_gene_names=True,
                                              goa_obo_file=goa_obo_file)

    print("# of enriched (i.e., with significantly higher concentration) GO terms: %i" %(goa_stats_dic["c_sig_go_terms_e"]))
    print("# of purified (i.e., with significantly lower concentration) GO terms:  %i" %(goa_stats_dic["c_sig_go_terms_p"]))

    goa_results_df.to_csv(goa_results_tsv, sep="\t", index=False)
    print("")


    """
    GOA table HTML.


    """
    
    html_goa_results_out = args.out_folder + "/" + "goa_results.rbpbench_goa.html"

    print("Generate GOA results table HTML ... ")
    
    benchlib.goa_generate_html_report(args, goa_results_df, 
                                      goa_stats_dic, benchlib_path,
                                      html_report_out=html_goa_results_out)

    """
    Output parameter settings.

    """

    # Output mode settings.
    print("Output parameter settings ... ")
    SETOUT = open(settings_file, "w")
    for arg in vars(args):
        SETOUT.write("%s\t%s\n" %(arg, str(getattr(args, arg))))
    # for call in call_dic:
    #     SETOUT.write("%s\t%s\n" %(call, call_dic[call]))
    SETOUT.close()

    """
    Inform about outputs.

    """
    print("")
    print("OUTPUT FILES")
    print("============")
    print("")
    print("Run parameter settings:\n%s" %(settings_file))
    print("GO enrichment analysis results (full table) .tsv:\n%s" %(goa_results_tsv))
    print("GO enrichment analysis results (filtered) .html:\n%s" %(html_goa_results_out))
    print("")



################################################################################

def main_optex(args):
    """
    Investigate optimal extension.

    """

    print("Running for you in OPTEX mode ... ")

    assert os.path.exists(args.in_sites), "--in file \"%s\" not found" % (args.in_sites)
    # Check --in BED format.
    benchlib.bed_check_format(args.in_sites, param_str="--in")

    # Reformat user_rbp_id. 
    rbp_id = benchlib.remove_special_chars_from_str(args.rbp_id)
    assert rbp_id, "empty string after removing special chars from given --rbp-id %s. Please provide alphanumeric string for RBP ID (- or _ are okay as well)" %(args.rbp_id)

    # Is MEME >= v5 installed?
    if not args.meme_disable_check:
        assert benchlib.is_tool("meme"), "meme not in PATH"
        check, meme_version = benchlib.check_tool_version("meme -version", "5.0")
        assert check, "RBPBench requires meme version >= 5.0 (installed version: %s)" %(meme_version)

    # Check if MEME version is >= 5.5.4 (need to add fimo --no-pgc option to produce same results!).
    fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text"
    if not args.meme_disable_check:
        check, meme_version = benchlib.check_tool_version("meme -version", "5.5.4")
        args.meme_version = meme_version
        if check:
            fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text --no-pgc"
    if args.meme_no_pgc:
        fimo_params = "--norc --verbosity 1 --skip-matched-sequence --text --no-pgc"

    """
    Library path.
    """
    benchlib_path = os.path.dirname(benchlib.__file__)
    db_path = benchlib_path + "/content"

    """
    FIMO nt frequencies.

    """
    fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt.txt"
    if args.fimo_ntf_mode == 2:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.ensembl_mpt_with_introns.txt"
    elif args.fimo_ntf_mode == 3:
        fimo_freqs_file = db_path + "/fimo_nt_freqs.uniform.txt"

    if args.fimo_user_ntf_file:
        fimo_freqs_file = args.fimo_user_ntf_file
    assert os.path.exists(fimo_freqs_file), "set FIMO nucleotide frequencies file \"%s\" not found" % (fimo_freqs_file)

    """
    Motif database.
    """
    seq_motifs_db_file, str_motifs_db_file, rbp2ids_file, motif_db_str = specify_motif_db(args.motif_db, 
                                                                                          db_path=db_path)
    args.motif_db_str = motif_db_str

    """
    Temp outputs.

    """
    tmp_id = base64.urlsafe_b64encode(os.urandom(6)).decode()
    tmp_out_folder = "optex_tmp_out_" + tmp_id

    if not os.path.exists(tmp_out_folder):
        os.makedirs(tmp_out_folder)

    filtered_sites_bed = tmp_out_folder + "/in_sites.filtered.bed"
    filtered_sites_fa = tmp_out_folder + "/in_sites.filtered.fa"
    seq_motifs_xml = tmp_out_folder + "/seq_motifs.xml"
    str_motifs_cm = tmp_out_folder + "/str_motifs.cm"
    fimo_res_tsv = tmp_out_folder + "/fimo_results.tsv"
    cmsearch_res_txt = tmp_out_folder + "/cmsearch_results.txt"

    out_tmp_bed = tmp_out_folder + "/rbp_motif_hit_regions.tmp.bed"
    cmstat_tmp_out = tmp_out_folder + "/cmstat_out.tmp.txt"

    # Delete if existing folder.
    if os.path.exists(fimo_res_tsv):
        os.remove(fimo_res_tsv)
    if os.path.exists(cmsearch_res_txt):
        os.remove(cmsearch_res_txt)

    """
    Get ID mappings

    name2ids_dic:
    RBP name -> motif IDs mapping
    id2type_dic:
    motif ID -> motif type (cm, meme_xml)
    id2org_dic (ignore for now):
    motif ID -> organism ID (so far only "human")

    """
    name2ids_dic, id2type_dic, name2gid_dic, name2fids_dic, id2pids_dic, id2exp_dic = benchlib.get_rbp_id_mappings(rbp2ids_file)

    # Motif ID -> RBP name (== RBP ID) mapping.
    id2name_dic = {}
    for rbpid in name2ids_dic:
        for motif_id in name2ids_dic[rbpid]:
            id2name_dic[motif_id] = rbpid


    """
    Define search motifs.

    """

    seq_motif_blocks_dic = {}
    str_motif_blocks_dic = {}
    loaded_motif_ids_dic = {}

    if rbp_id in name2ids_dic:

        print("RBP ID %s in database ... " %(rbp_id))

        assert not args.user_meme_xml, "provided RBP ID in motif database, but --user-meme-xml provided. Please provide RBP ID not in motif database to search with --user-meme-xml motif(s)"
        assert not args.user_cm, "provided RBP ID in motif database, but --user-cm provided. Please provide RBP ID not in motif database to search with --user-cm motif(s)"

        """
        Get MEME XML database motif blocks dictionary.

        """
        seq_motif_blocks_dic = benchlib.read_in_xml_motifs(seq_motifs_db_file)
        for motif_id in seq_motif_blocks_dic:
            assert motif_id in id2name_dic, "MEME XML motif ID \"%s\" not found in prior mapping. Please contact developers!" %(motif_id)

        """
        Get covariance model database motif blocks dictionary.

        """
        str_motif_blocks_dic = benchlib.read_in_cm_blocks(str_motifs_db_file)
        for motif_id in str_motif_blocks_dic:
            assert motif_id in id2name_dic, "Covariance model accession ID \"%s\" not found in prior mapping. Please contact developers!" %(motif_id)

        for motif_id in name2ids_dic[rbp_id]:
            loaded_motif_ids_dic[motif_id] = 1

    else:

        print("RBP ID %s NOT in database ... " %(rbp_id))

        assert args.user_meme_xml or args.user_cm, "--rbp-id not in database. Please provide user motif(s) (via --user-meme-xml OR --user-cm), set different motif database or supply RBP ID present in set database"

        if args.user_meme_xml:
            assert not args.user_cm, "either supply --user-meme-xml or --user-cm"
            assert os.path.exists(args.user_meme_xml), "--user-meme-xml file \"%s\" not found" % (args.user_meme_xml)
            seq_motif_blocks_dic = benchlib.read_in_xml_motifs(args.user_meme_xml, empty_check=False)
            assert seq_motif_blocks_dic, "no motifs read in from provided --user-meme-xml. Make sure to supply sequence motifs in MEME XML format!"
            for acc_id in seq_motif_blocks_dic:
                name2ids_dic[rbp_id] = [acc_id]
                id2type_dic[acc_id] = "meme_xml"
                id2name_dic[acc_id] = rbp_id
                loaded_motif_ids_dic[acc_id] = 1

        if args.user_cm:
            assert not args.user_meme_xml, "either supply --user-meme-xml or --user-cm"
            assert os.path.exists(args.user_cm), "--user-cm file \"%s\" not found" % (args.user_cm)
            # Check for valid format.
            acc_ids_dic = benchlib.check_cm_file(args.user_cm, cmstat_tmp_out, empty_check=True)
            # Read in covariance model blocks.
            str_motif_blocks_dic = benchlib.read_in_cm_blocks(args.user_cm)
            for acc_id in acc_ids_dic:
                assert acc_id in str_motif_blocks_dic, "accession ID %s not in blocks dictionary. Please contact developers!" %(acc_id)
            for acc_id in str_motif_blocks_dic:
                name2ids_dic[rbp_id] = [acc_id]
                id2type_dic[acc_id] = "cm"
                id2name_dic[acc_id] = rbp_id
                loaded_motif_ids_dic[acc_id] = 1


    """
    Make RBP object.

    """
    motif_id2idx_dic = {}
    seq_rbps_dic = {}
    str_rbps_dic = {}
    search_rbps_dic = {}

    rbp = benchlib.RBP(rbp_id, "optex")

    for motif_id in loaded_motif_ids_dic:

        if id2type_dic[motif_id] == "meme_xml":
            rbp.seq_motif_ids.append(motif_id)
            motif_id2idx_dic[motif_id] = len(rbp.seq_motif_ids) - 1
            rbp.seq_motif_hits.append(0)
            seq_rbps_dic[rbp_id] = 1
        else:
            rbp.str_motif_ids.append(motif_id)
            motif_id2idx_dic[motif_id] = len(rbp.str_motif_ids) - 1
            rbp.str_motif_hits.append(0)
            str_rbps_dic[rbp_id] = 1

        search_rbps_dic[rbp_id] = rbp


    print("RBP ID:        ", rbp_id)
    print("# of motif IDs:", len(loaded_motif_ids_dic))
    print("Motif IDs:     ", loaded_motif_ids_dic)

    """
    Get chromosome IDs from --genome.
    """
    print("Get --genome FASTA headers ... ")
    chr_ids_dic = benchlib.get_fasta_headers(args.in_genome)


    """
    Run motif search for each up- downstream extension combination.

    """
    from itertools import product
    ext_list = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60]

    # If user extension list given.
    if args.ext_list:
        ext_dic = {}
        for ext in args.ext_list:
            ext_dic[ext] = 1
        ext_list = []
        for ext in ext_dic:
            ext_list.append(ext)

    combined_list = list(product(ext_list, ext_list))
    ext2pval_dic = {}
    ext2stats_dic = {}
    sum_ext = 0
    longest_ext = "-"
    
    for ext in combined_list:

        print("")
        print("Extension (upstream, downstream):", ext)
        ext_up = ext[0]
        ext_down = ext[1]
        ext_str = "%i-%i" %(ext_up, ext_down)

        # Filter / extend --in BED.
        reg2sc_dic = {}
        reg_stats_dic = benchlib.bed_filter_extend_bed(args.in_sites, filtered_sites_bed,
                                            ext_up=ext_up,
                                            ext_down=ext_down,
                                            remove_dupl=True,
                                            reg2sc_dic=reg2sc_dic,
                                            score_col=args.bed_score_col,
                                            score_thr=args.bed_sc_thr,
                                            score_rev_filter=args.bed_sc_thr_rev_filter,
                                            chr_ids_dic=chr_ids_dic)
        
        assert reg_stats_dic["c_out"], "no --in BED sites remain after chromosome ID (or optionally score) filtering"

        # Check if all scores same (e.g. 0).
        reg_scores_dic = {}
        for reg_id in reg2sc_dic:
            reg_scores_dic[reg2sc_dic[reg_id]] = 1
        assert len(reg_scores_dic) != 1, "all site scores identical. Please use meaningful scores (--in BED column set by --bed-score-col)"

        """
        Calculate effective size of genomic regions.
        
        """
        print("Calculate effective genomic region size ... ")
        eff_reg_size = benchlib.get_uniq_gen_size(filtered_sites_bed)

        # print("# --in regions pre-filtering:  ", reg_stats_dic["c_in"])
        # print("# --in regions post-filtering: ", reg_stats_dic["c_out"])
        # print("# regions with invalid chr_id: ", reg_stats_dic["c_chr_filter"])
        # print("# duplicated regions removed:  ", reg_stats_dic["c_dupl_filter"])
        # print("Called region length sum:      ", reg_stats_dic["reg_len_sum"])
        # print("Effective region length sum:   ", eff_reg_size)

        """
        Get genomic region sequences from --genome.

        Output FASTA header format:
        >chr8:9772198-9772297(+)

        No need to convert sequences to uppercase, as FIMO works on both 
        lower- and uppercase (as long as DNA / RNA is set correct).

        """
        # print("Extract sequences from --genome ... ")
        benchlib.bed_extract_sequences_from_fasta(filtered_sites_bed, 
                                                  args.in_genome, filtered_sites_fa,
                                                  print_warnings=True)

        """
        Get FASTA sequences and sequence lengths.
        """

        out_seqs_dic = benchlib.read_fasta_into_dic(filtered_sites_fa,
                                        dna=True,
                                        all_uc=True,
                                        id_check=True,
                                        empty_check=False,
                                        skip_n_seqs=False)

        assert out_seqs_dic, "no sequences extracted from FASTA file for --in BED sites. Make sure to use compatible FASTA/BED files!"

        # Effective number of regions used for motif search.
        c_regions = len(out_seqs_dic)
        args.c_regions = c_regions

        # Called region size.
        called_reg_size = 0
        for seq_id in out_seqs_dic:
            called_reg_size += len(out_seqs_dic[seq_id])


        """
        ====================================
        RUN SEQUENCE MOTIF SEARCH WITH FIMO.
        ====================================
        
        """
        fimo_hits_list = []
        call_dic = {}

        if seq_rbps_dic:

            """
            Print motifs to file.

            """

            # print("Output motifs to XML ... ")
            out_str, c_added_motifs = benchlib.blocks_to_xml_string(seq_motif_blocks_dic, loaded_motif_ids_dic)

            benchlib.output_string_to_file(out_str, seq_motifs_xml)


            """
            Run FIMO on sequences + motifs.

            """

            # print("Run FIMO ... ")
            benchlib.run_fast_fimo(filtered_sites_fa, seq_motifs_xml, fimo_res_tsv,
                        pval_thr=args.fimo_pval,
                        nt_freqs_file=fimo_freqs_file,
                        call_dic=call_dic,
                        params=fimo_params,
                        error_check=False)

            """
            Read in FIMO hits.

            """

            assert os.path.exists(fimo_res_tsv), "FIMO output file fimo.tsv %s does not exist! There must have been an error while running FIMO (invalid xml file provided?)" %(fimo_res_tsv)

            # print("Read in FIMO results ... ")
            fimo_hits_list = benchlib.read_in_fimo_results(fimo_res_tsv,
                                                           only_best_hits=args.greatest_hits)

            # c_fimo_hits = len(fimo_hits_list)
            # print("# of FIMO motif hits:", c_fimo_hits)

        """
        =========================================
        RUN STRUCTURE MOTIF SEARCH WITH CMSEARCH.
        =========================================

        """
        cmsearch_hits_list = []

        if str_rbps_dic:
            
            # print("Output covariance models to .cm ... ")
            benchlib.output_cm_blocks_to_file(str_motif_blocks_dic, loaded_motif_ids_dic, str_motifs_cm)

            cmsh_mode = ""
            if args.cmsearch_mode == 1:
                cmsh_mode = "--default"
            elif args.cmsearch_mode == 2:
                cmsh_mode = "--max"
            else:
                assert False, "invalid --cmsearch-mode %i set" %(args.cmsearch_mode)
            cmsh_params = "-g --tformat fasta --toponly --incT %s -T %s %s" %(args.cmsearch_bs, args.cmsearch_bs, cmsh_mode)

            benchlib.run_cmsearch(filtered_sites_fa, str_motifs_cm, cmsearch_res_txt,
                            error_check=False,
                            call_dic=call_dic,
                            params=cmsh_params)
            # Read in hits.
            # print("Read in cmsearch results ... ")
            cmsearch_hits_list, c_cms_hits = benchlib.read_in_cmsearch_results(cmsearch_res_txt,
                                                                               only_best_hits=args.greatest_hits)

            # print("# of cmsearch motif hits:", c_cms_hits)

        """
        Store regions with motif hits (and hit counts).
        This tells us, how many input regions have motif hits (+ how many hits).

        Also store the unique motif hit regions (and hit counts).

        regions_with_motifs_dic:
            region -> motif_c_region

        unique_motifs_dic:
            motif_region -> c_motif_region

        """

        regions_with_motifs_dic = {}
        unique_motifs_dic = {}

        # Store regions with sequence motifs.
        for fh in fimo_hits_list:

            if fh.seq_name in regions_with_motifs_dic:
                regions_with_motifs_dic[fh.seq_name] += 1
            else:
               regions_with_motifs_dic[fh.seq_name] = 1 

            fh_str = repr(fh) # genomic motif region string.

            if fh_str in unique_motifs_dic:
                unique_motifs_dic[fh_str] += 1
            else:
                unique_motifs_dic[fh_str] = 1

        # Store regions with structure motifs.
        for cmsh in cmsearch_hits_list:

            if cmsh.seq_name in regions_with_motifs_dic:
                regions_with_motifs_dic[cmsh.seq_name] += 1
            else:
                regions_with_motifs_dic[cmsh.seq_name] = 1 

            cmsh_str = repr(cmsh) # genomic motif region string.

            if cmsh_str in unique_motifs_dic:
                unique_motifs_dic[cmsh_str] += 1
            else:
                unique_motifs_dic[cmsh_str] = 1

        """
        Store infos in RBP object.

        """
        # number of --in regions with RBP motif hits.
        c_hit_reg = len(regions_with_motifs_dic)
        # number of motif hits on --in regions in total.
        c_motif_hits = 0
        for reg_id in regions_with_motifs_dic:
            c_motif_hits += regions_with_motifs_dic[reg_id]
        
        rbp.c_hit_reg = c_hit_reg
        rbp.c_motif_hits = c_motif_hits
        # % hit regions over all regions (i.e. how many input regions contain >= 1 RBP motif).
        rbp.perc_hit_reg = (rbp.c_hit_reg / c_regions) * 100

        """
        Get unique motif hits.

        """
        rbp.c_uniq_motif_hits = len(unique_motifs_dic)
        # Store individual motif unique hits.
        for motif_str_repr in unique_motifs_dic:
            motif_id = benchlib.get_motif_id_from_str_repr(motif_str_repr)
            idx = motif_id2idx_dic[motif_id]
            if id2type_dic[motif_id] == "meme_xml":
                rbp.seq_motif_hits[idx] += 1
            else:
                rbp.str_motif_hits[idx] += 1

        """
        Number of motif nucleotides over called + effective region size.

        """

        # print("Calculate effective motif region sizes ... ")

        # Output unique motif hit regions (sequence or structure) to BED.
        eff_motif_reg_size = 0
        if unique_motifs_dic:
            benchlib.batch_output_motif_hits_to_bed(unique_motifs_dic, out_tmp_bed,
                                                    one_based_start=True)
            # Calculate effective motif region size.
            eff_motif_reg_size = benchlib.get_uniq_gen_size(out_tmp_bed)

        # Number of unique motif nucleotides.
        rbp.c_uniq_motif_nts = eff_motif_reg_size
        # % unique motif nts over effective region length.
        rbp.perc_uniq_motif_nts_eff_reg = (eff_motif_reg_size / eff_reg_size) * 100
        # % unique motif nts over called region length.
        rbp.perc_uniq_motif_nts_cal_reg = (eff_motif_reg_size / called_reg_size) * 100
        # Number of unique motif hits per effective 1000 nt.
        rbp.uniq_motif_hits_eff_1000nt  = rbp.c_uniq_motif_hits / (eff_reg_size / 1000)
        # Number of unique motif hits per called 1000 nt.
        rbp.uniq_motif_hits_cal_1000nt  = rbp.c_uniq_motif_hits / (called_reg_size / 1000)

        print("# --in regions for motif search:", c_regions)
        print("Called genomic region size:     ", called_reg_size)
        print("Effective genomic region size:  ", eff_reg_size)

        """
        Motif enrichment test:
        Are motifs enriched in higher scoring sites?

        """

        # print("Calculate Wilcoxon rank-sum test statistics ... ")

        # Check if all scores same (e.g. 0).
        reg_scores_dic = {}
        for reg_id in reg2sc_dic:
            reg_scores_dic[reg2sc_dic[reg_id]] = 1
        if len(reg_scores_dic) == 1:
            print("WARNING: all site scores identical. Reported p-values meaningless! (i.e., equal 1.0)")

        hit_reg_scores = []
        non_hit_reg_scores = []

        for reg_id in reg2sc_dic:
            reg_sc = reg2sc_dic[reg_id] # float value.
            if reg_id in regions_with_motifs_dic:
                hit_reg_scores.append(reg_sc)
            else:
                non_hit_reg_scores.append(reg_sc)

        wc_pval = 1.0
        # In case no regions without motif hits.
        if not non_hit_reg_scores:
            print("WARNING: all input regions contain motifs. Adding dummy scores (median) ... ")
            dummy_val = statistics.median(hit_reg_scores)
            hit_reg_scores.append(dummy_val)
            non_hit_reg_scores.append(dummy_val)

        print("# regions with motifs:         ", len(hit_reg_scores))
        print("# regions without motifs:      ", len(non_hit_reg_scores))

        hit_reg_median_sc = "-"
        non_hit_reg_median_sc = "-"
        if hit_reg_scores:
            hit_reg_median_sc = statistics.median(hit_reg_scores)
        if non_hit_reg_scores:
            non_hit_reg_median_sc = statistics.median(non_hit_reg_scores)


        print("Median score motif regions:    ", hit_reg_median_sc)
        print("Median score non-motif regions:", non_hit_reg_median_sc)

        if unique_motifs_dic:
            wc_stat, wc_pval = mannwhitneyu(hit_reg_scores, non_hit_reg_scores, alternative="greater")
            wc_pval = benchlib.round_to_n_significant_digits_v2(wc_pval, 4)
            rbp.wc_pval = wc_pval

        print("Compact hit stats (RBP ID, # unique hits, Wilcoxon p-value):")
        print("%s\t%i\t%s" %(rbp_id, rbp.c_uniq_motif_hits, str(wc_pval)))

        ext2pval_dic[ext_str] = wc_pval
        ext2stats_dic[ext_str] = [len(hit_reg_scores), len(non_hit_reg_scores), rbp.c_uniq_motif_hits]
        if wc_pval <= args.ext_pval:
            new_sum = ext_up + ext_down
            if new_sum > sum_ext:
                sum_ext = new_sum
                longest_ext = ext_str

    """
    Take out the trash.

    """
    print("Delete tmp folder ... ")
    if os.path.exists(tmp_out_folder):
        shutil.rmtree(tmp_out_folder)

    """
    Report.

    """
    best_pval = 1.0
    best_ext = "-"
    for ext in ext2pval_dic:
        if ext2pval_dic[ext] < best_pval:
            best_pval = ext2pval_dic[ext]
            best_ext = ext

    print("")
    print("REPORT")
    print("======")
    print("")
    print("Longest extension p-value:   ", args.ext_pval)
    print("Longest extension:           ", longest_ext)
    print("Best p-value extension:      ", best_ext)
    print("Best p-value:                ", best_pval)
    print("# hit regions (best ext):    ", ext2stats_dic[best_ext][0])
    print("# non-hit regions (best ext):", ext2stats_dic[best_ext][1])
    print("# unique hits (best ext):    ", ext2stats_dic[best_ext][2])
    print("")


################################################################################

def main_info(args):
    """
    Print RBP IDs in database.

    """

    print("Running for you in INFO mode ... ")

    """
    Library path.
    """
    benchlib_path = os.path.dirname(benchlib.__file__)
    db_path = benchlib_path + "/content"

    """
    Motif database.
    """
    seq_motifs_db_file, str_motifs_db_file, rbp2ids_file, motif_db_str = specify_motif_db(args.motif_db, 
                                                                            db_path=db_path)
    # If custom database folder given.
    if args.custom_db:
        seq_motifs_db_file, str_motifs_db_file, rbp2ids_file = specify_custom_motif_db(args.custom_db)
        motif_db_str = "custom"

    """
    Get ID mappings

    name2ids_dic:
    RBP name -> motif IDs mapping
    id2type_dic:
    motif ID -> motif type (cm, meme_xml)
    id2org_dic (ignore for now):
    motif ID -> organism ID (so far only "human")

    """
    name2ids_dic, id2type_dic, name2gid_dic, name2fids_dic, id2pids_dic, id2exp_dic = benchlib.get_rbp_id_mappings(rbp2ids_file)

    # Motif ID -> RBP name (== RBP ID) mapping.
    id2name_dic = {}
    c_motif_ids = 0
    for rbp_id in name2ids_dic:
        for motif_id in name2ids_dic[rbp_id]:
            c_motif_ids += 1
            id2name_dic[motif_id] = rbp_id

    """
    Get function ID -> function descriptions mapping.

    """
    fid2desc_file = db_path + "/rbp_function_list.32728246.tsv"
    fid2desc_dic, desc2fid_dic = benchlib.get_fid2desc_mapping(fid2desc_file)
    
    print("")
    print("Motif database: %s" %(motif_db_str))
    print("# RBP IDs:      %i" %(len(name2ids_dic)))
    print("# motif IDs:    %i" %(c_motif_ids))
    print("")

    print("###########################################")
    print("  RBP FUNCTION ID -> FUNCTION DESCRIPTION")
    print("###########################################")
    print("")
    print("{:<10} {:>40}".format("Function ID", "Function description"))
    for fid in sorted(fid2desc_dic):
        print("{:<10} {:>40}".format(fid, fid2desc_dic[fid]))
    print("")
    print("")
    print("###########################################")
    print("  RBP ID -> # MOTIF IDS -> RBP FUNCTIONS")
    print("###########################################")
    print("")

    # print("rbp_id\t#_motifs")
    # for rbp_id in name2ids_dic:
    #     c_rbp_motifs = len(name2ids_dic[rbp_id])
    #     print("%s\t%i" %(rbp_id, c_rbp_motifs))
    # print("")
    print("{:<10} {:>10} {:>30}".format("RBP ID", "# Motifs", "Function ID"))
    for rbp_id in sorted(name2ids_dic):
        fids = "-"
        if name2fids_dic[rbp_id]:
            fids = ",".join(name2fids_dic[rbp_id]) 
        c_rbp_motifs = len(name2ids_dic[rbp_id])
        print("{:<10} {:>10} {:>30}".format(rbp_id, c_rbp_motifs, fids))
    print("")


################################################################################

def main_dist(args):
    """
    Plot nt distribution at crosslink (CL) positions.

    """

    print("Running for you in DIST mode ... ")

    assert os.path.exists(args.in_sites), "--in file \"%s\" not found" % (args.in_sites)
    assert args.ext_up_down >= 0 and args.ext_up_down <= 100, "please use reasonable --ext (>= 0 AND <= 100)"

    # Check --in BED format.
    benchlib.bed_check_format(args.in_sites, param_str="--in")

    """
    Output folders + files.

    """
    if not os.path.exists(args.out_folder):
        os.makedirs(args.out_folder)

    filtered_sites_bed = args.out_folder + "/in_sites.filtered.bed"
    filtered_sites_fa = args.out_folder + "/in_sites.filtered.fa"
    motif_plot_out = args.out_folder + "/nt_dist_zero_pos.png"
    if os.path.exists(motif_plot_out):
        os.remove(motif_plot_out)
    if args.plot_pdf:
        motif_plot_out = args.out_folder + "/nt_dist_zero_pos.pdf"
        if os.path.exists(motif_plot_out):
            os.remove(motif_plot_out)

    """
    Get chromosome IDs from --genome.
    """
    print("Get --genome FASTA headers ... ")
    chr_ids_dic = benchlib.get_fasta_headers(args.in_genome)

    remove_dupl = True
    if args.no_uniq_reg_check:
        print("Do not filter out --in duplicated regions ... ")
        remove_dupl = False

    # Filter / extend --in BED.
    print("Preprocess --in sites ... ")
    reg_stats_dic = benchlib.bed_extend_bed(args.in_sites, filtered_sites_bed,
                                          ext_lr=args.ext_up_down,
                                          cp_mode=args.cp_mode,
                                          remove_dupl=remove_dupl,
                                          chr_ids_dic=chr_ids_dic)

    print("# --in regions pre-filtering:  ", reg_stats_dic["c_in"])
    print("# --in regions post-filtering: ", reg_stats_dic["c_out"])
    print("# regions with invalid chr_id: ", reg_stats_dic["c_chr_filter"])
    print("# duplicated regions removed:  ", reg_stats_dic["c_dupl_filter"])
    assert reg_stats_dic["c_out"], "no --in BED sites remain after chromosome ID filtering"

    """
    Calculate effective size of genomic regions.
    
    """
    print("Calculate effective genomic region size ... ")
    eff_reg_size = benchlib.get_uniq_gen_size(filtered_sites_bed)


    print("Called region length sum:      ", reg_stats_dic["reg_len_sum"])
    print("Effective region length sum:   ", eff_reg_size)

    """
    Get genomic region sequences from --genome.

    Output FASTA header format:
    >chr8:9772198-9772297(+)

    No need to convert sequences to uppercase, as FIMO works on both 
    lower- and uppercase (as long as DNA / RNA is set correct).

    """

    print("Extract sequences from --genome ... ")
    benchlib.bed_extract_sequences_from_fasta(filtered_sites_bed, 
                                              args.in_genome, filtered_sites_fa,
                                              print_warnings=True)


    """
    Get FASTA sequences and sequence lengths.
    """

    id_check = True
    make_uniq_headers = False
    if args.no_uniq_reg_check:
        id_check = False
        make_uniq_headers = True

    out_seqs_dic = benchlib.read_fasta_into_dic(filtered_sites_fa,
                                       dna=True,
                                       all_uc=True,
                                       id_check=id_check,
                                       make_uniq_headers=make_uniq_headers,
                                       empty_check=False,
                                       skip_n_seqs=False)

    assert out_seqs_dic, "no sequences extracted from FASTA file for --in BED sites. Make sure to use compatible FASTA/BED files!"

    # Effective number of regions used for motif search.
    c_regions = len(out_seqs_dic)
    args.c_regions = c_regions

    """
    Construct positional probability matrix.
        
    """

    # Expected region size.
    exp_reg_len = args.ext_up_down*2 + 1

    print("Create position probability matrix ... ")
    # Get position probability matrix for plotting.
    ppm = benchlib.make_pos_freq_matrix(out_seqs_dic, 
                         exp_len=exp_reg_len,
                         report=True,
                         to_ppm=True)


    print("Plot distribution ... ")
    benchlib.plot_nt_distribution_zero_pos(ppm, args.ext_up_down,
                                           plot_out=motif_plot_out)


    print("")
    print("Filtered input regions .bed:\n%s" %(filtered_sites_bed))
    print("Filtered input regions .fa:\n%s" %(filtered_sites_fa))
    print("Distribution plot file:\n%s" %(motif_plot_out))
    print("")

    # Take out the trash.
    # if os.path.exists(filtered_sites_bed):
    #     os.remove(filtered_sites_bed)
    # if os.path.exists(filtered_sites_fa):
    #     os.remove(filtered_sites_fa)


################################################################################

def main_compare(args):
    """
    Compare and report different rbpbench motif search results.

    """

    print("Running for you in COMPARE mode ... ")

    """
    Check / read in input --in data.

    """
    in_files = []
    for data in args.data_in:
        if os.path.isdir(data):
            rbp_stats_file = data + "/rbp_hit_stats.tsv"
            motif_stats_file = data + "/motif_hit_stats.tsv"
            assert os.path.exists(rbp_stats_file), "RBP stats file %s not found. Please provide valid rbpbench search/batch output folder via --in" %(rbp_stats_file)
            assert os.path.exists(motif_stats_file), "motif stats file %s not found. Please provide valid rbpbench search/batch output folder via --in" %(motif_stats_file)
            in_files.append(rbp_stats_file)
            in_files.append(motif_stats_file)
        elif os.path.isfile(data):
            in_files.append(data)
        else:
            assert False, "--in argument \"%s\" neither folder nor a file. Please provide rbpbench search/batch output folder or RBP/motif stats files" %(data)

    assert in_files, "no --in files read in from given --in locations. Please provide valid rbpbench search/batch output folder or RBP/motif stats files"

    file_check_dic = {}
    for in_file in in_files:
        assert in_file not in file_check_dic, "--in file %s given > 1. Please provide each file only once" %(in_file)
        file_check_dic[in_file] = 1
    
    """
    Read in RBP/motif stats data.

    rbp_stats_dic:
    internal_id -> RBPStats object

    motif_stats_dic:
    internal_id -> MotifStats object

    """
    rbp_stats_dic = {}
    motif_stats_dic = {}

    for in_file in in_files:
        type = benchlib.check_report_in_file(in_file)
        if type == "rbp_stats":
            benchlib.read_in_rbp_stats(in_file, rbp_stats_dic=rbp_stats_dic)
        elif type == "motif_stats":
            benchlib.read_in_motif_stats(in_file, motif_stats_dic=motif_stats_dic,
                                         store_uniq_only=True)
        else:
            assert False, "invalid --in file encountered (%s). Please provide valid rbpbench search/batch output folder or RBP/motif stats files" %(in_file)

    assert rbp_stats_dic, "No RBP stats read in from --in files. Please provide valid (non-empty) rbpbench search/batch output folder or RBP/motif stats files"
    assert motif_stats_dic, "No motif stats read in from --in files. Please provide valid (non-empty) rbpbench search/batch output folder or RBP/motif stats files"
    c_rbp_stats = len(rbp_stats_dic)
    c_motif_stats = len(motif_stats_dic)
    assert c_rbp_stats == c_motif_stats, "# RBP stats != # motif stats. Please provide RBP/motif stats files in pairs"
    for internal_id in rbp_stats_dic:
        assert internal_id in motif_stats_dic, "RBP stats internal_id %s not found in motif stats files. Please provide matching RBP/motif stats files" %(internal_id)

    print("# RBP stats:  ", c_rbp_stats)
    print("# motif stats:", c_motif_stats)

    """
    Output files.

    """
    if not os.path.exists(args.out_folder):
        os.makedirs(args.out_folder)


    """
    Get data_id, method_id, rbp_id infos.
    Depending on whether combinations are present, different statistics can 
    be produced.

    3 types of comparions:
    1) between different methods (same rbp_id,data_id,motif_db)
        Table (some peak region dataset stats) between methods

        Bar plot % of regions with motifs
        Bar plot %
        Venn diagram motif
    2) between different RBPs on same dataset (so same data_id,
    and c_regions,mean_reg_len,median_reg_len,min_reg_len,max_reg_len,called_reg_size)
        basically reporting output of rbpbench search (could also be made as html output of rbpbench search!)
    3) between different datasets (same rbp_id,method_id,motif_db)

    Only do 1) 3) here, 2) can be done in search mode!

    """
    compare_methods_dic = {}
    compare_datasets_dic = {}

    for internal_id in rbp_stats_dic:
        data_id = rbp_stats_dic[internal_id].data_id
        method_id = rbp_stats_dic[internal_id].method_id
        rbp_id = rbp_stats_dic[internal_id].rbp_id
        motif_db = rbp_stats_dic[internal_id].motif_db
        comp_id = "%s,%s,%s" %(data_id, motif_db, rbp_id) # comparison ID (i.e. what is fixed).
        if comp_id not in compare_methods_dic:
            compare_methods_dic[comp_id] = {}
            compare_methods_dic[comp_id][method_id] = internal_id
        else:
            if method_id not in compare_methods_dic[comp_id]:
                compare_methods_dic[comp_id][method_id] = internal_id
            else:
                assert False, "data supplied via --in contains a combination twice (data_id:%s,rbp_id:%s,motif_db:%s,method_id:%s). Please provide unique combinations for method comparison" %(data_id, rbp_id, motif_db, method_id)
                # compare_methods_dic[comp_id][method_id].append(internal_id)
        # Dataset ID based dict of dicts.
        comp_id = "%s,%s,%s" %(method_id, motif_db, rbp_id)
        if comp_id not in compare_datasets_dic:
            compare_datasets_dic[comp_id] = {}
            compare_datasets_dic[comp_id][data_id] = internal_id
        else:
            if data_id not in compare_datasets_dic[comp_id]:
                compare_datasets_dic[comp_id][data_id] = internal_id
            else:
                assert False, "data supplied via --in contains a combination twice (data_id:%s,rbp_id:%s,motif_db:%s,method_id:%s). Please provide unique combinations for dataset comparison" %(data_id, rbp_id, motif_db, method_id)
                # compare_datasets_dic[comp_id][data_id].append(internal_id)

    # print("compare_methods_dic:", compare_methods_dic)
    # print("compare_datasets_dic:", compare_datasets_dic)

    # Binomial coefficient.
    from math import comb 

    # How much method comparisons?
    c_method_comps = 0
    for comp_id in compare_methods_dic:
        c_methods = len(compare_methods_dic[comp_id])
        c_method_comps += comb(c_methods, 2)

    print("# of method combinations:", c_method_comps)

    # How much dataset comparisons?
    c_data_comps = 0
    for comp_id in compare_datasets_dic:
        c_data = len(compare_datasets_dic[comp_id])
        c_data_comps += comb(c_data, 2)

    print("# of data combinations:", c_data_comps)

    if not c_method_comps and not c_data_comps:
        assert False, "--in data does not contain any dataset or method combinations. Please provide combinations for method or dataset comparison"

    """
    Create comparison statistics and HTML report.

    """

    html_report_out = args.out_folder + "/" + "report.rbpbench_compare.html"
    if args.plot_abs_paths:
        html_report_out = os.path.abspath(args.out_folder) + "/" + "report.rbpbench_compare.html"

    plots_subfolder = "html_report_plots"
    benchlib_path = os.path.dirname(benchlib.__file__)

    # Generate report.
    benchlib.compare_generate_html_report(args,
                                          compare_methods_dic,
                                          compare_datasets_dic,
                                          rbp_stats_dic, motif_stats_dic,
                                          benchlib_path,
                                          html_report_out=html_report_out,
                                          plots_subfolder=plots_subfolder)

    """
    Output comparison stats.

    """

    comp_stats_tsv_out = args.out_folder + "/" + "comparison_stats.rbpbench_compare.tsv"
    motif_hits_tsv_out = args.out_folder + "/" + "motif_hits.rbpbench_compare.tsv"
    motif_hits_bed_out = args.out_folder + "/motif_hits.rbpbench_compare.bed"

    OUTCS = open(comp_stats_tsv_out, "w")
    outcs_header = "combined_id\tmethod_id\tdata_id\tmotif_db\trbp_id\tc_regions\tc_uniq_motif_hits\t"
    outcs_header += "perc_reg_with_hits\tperc_uniq_motif_nts_eff_reg\tuniq_motif_hits_cal_1000nt\n"
    OUTCS.write(outcs_header)

    # Method ID comparisons.
    for comp_id, method_dic in sorted(compare_methods_dic.items()):
        if len(method_dic) < 2:
            continue
        for method_id in method_dic:
            int_id = compare_methods_dic[comp_id][method_id]
            data_id = rbp_stats_dic[int_id].data_id
            rbp_id = rbp_stats_dic[int_id].rbp_id
            motif_db = rbp_stats_dic[int_id].motif_db
            c_regions = rbp_stats_dic[int_id].c_regions
            c_uniq_motif_hits = rbp_stats_dic[int_id].c_uniq_motif_hits
            perc_reg_with_hits = rbp_stats_dic[int_id].perc_reg_with_hits
            perc_uniq_motif_nts_eff_reg = rbp_stats_dic[int_id].perc_uniq_motif_nts_eff_reg
            uniq_motif_hits_cal_1000nt = rbp_stats_dic[int_id].uniq_motif_hits_cal_1000nt
            outcs_line = comp_id + "\t"
            outcs_line += method_id + "\t"
            outcs_line += data_id + "\t"
            outcs_line += motif_db + "\t"
            outcs_line += rbp_id + "\t"
            outcs_line += str(c_regions) + "\t"
            outcs_line += str(c_uniq_motif_hits) + "\t"
            outcs_line += str(perc_reg_with_hits) + "\t"
            outcs_line += str(perc_uniq_motif_nts_eff_reg) + "\t"
            outcs_line += str(uniq_motif_hits_cal_1000nt) + "\n"
            OUTCS.write(outcs_line)

    # Data ID comparisons.
    for comp_id, data_dic in sorted(compare_datasets_dic.items()):
        if len(data_dic) < 2:
            continue
        for data_id in data_dic:
            int_id = compare_datasets_dic[comp_id][data_id]
            method_id = rbp_stats_dic[int_id].method_id
            rbp_id = rbp_stats_dic[int_id].rbp_id
            motif_db = rbp_stats_dic[int_id].motif_db
            c_regions = rbp_stats_dic[int_id].c_regions
            c_uniq_motif_hits = rbp_stats_dic[int_id].c_uniq_motif_hits
            perc_reg_with_hits = rbp_stats_dic[int_id].perc_reg_with_hits
            perc_uniq_motif_nts_eff_reg = rbp_stats_dic[int_id].perc_uniq_motif_nts_eff_reg
            uniq_motif_hits_cal_1000nt = rbp_stats_dic[int_id].uniq_motif_hits_cal_1000nt
            outcs_line = comp_id + "\t"
            outcs_line += method_id + "\t"
            outcs_line += data_id + "\t"
            outcs_line += motif_db + "\t"
            outcs_line += rbp_id + "\t"
            outcs_line += str(c_regions) + "\t"
            outcs_line += str(c_uniq_motif_hits) + "\t"
            outcs_line += str(perc_reg_with_hits) + "\t"
            outcs_line += str(perc_uniq_motif_nts_eff_reg) + "\t"
            outcs_line += str(uniq_motif_hits_cal_1000nt) + "\n"
            OUTCS.write(outcs_line)

    OUTCS.close()


    """
    Output motif intersection stats (used for Venn diagrams).

    """

    OUTMH = open(motif_hits_tsv_out, "w")
    outmh_header = "combined_id\tmotif_hit_id\tmethod_data_ids_with_hit\n"
    OUTMH.write(outmh_header)
    OUTBED = open(motif_hits_bed_out, "w")

    # Method ID comparisons.
    for comp_id, method_dic in sorted(compare_methods_dic.items()):
        if len(method_dic) < 2:
            continue
        int_ids = []
        method_ids = []
        for method_id, int_id in sorted(method_dic.items()):
            int_ids.append(int_id)
            method_ids.append(method_id)

        # Motif Hit ID (chr_id:gen_s-gen_e(strand)motif_id) to method IDs sporting this hit.
        hid2mids_dic = {}
        mid2c_dic = {}

        for idx, int_id in enumerate(int_ids):
            mid = method_ids[idx]
            for motif_stats in motif_stats_dic[int_id]:  # motif_stats instance of MotifStats object.
                hid = motif_stats.hit_id
                if hid in hid2mids_dic:
                    hid2mids_dic[hid].append(mid)
                else:
                    hid2mids_dic[hid] = [mid]
                if mid in mid2c_dic:
                    mid2c_dic[mid] += 1
                else:
                    mid2c_dic[mid] = 1

        # print("Comparison:", comp_id)
        # print("Method ID motif hit counts:")
        # for mid in mid2c_dic:
        #     print(mid, mid2c_dic[mid])


        for hid in hid2mids_dic:
            mids_str = ",".join(hid2mids_dic[hid])
            OUTMH.write("%s\t%s\t%s\n" %(comp_id, hid, mids_str))
            # Also store motif hit in BED format.
            id_elem = benchlib.get_hit_id_elements(hid)
            OUTBED.write("%s\t%s\t%s\t%s;%s;%s\t0\t%s\n" %(id_elem[0], id_elem[1], id_elem[2], id_elem[4], comp_id, mids_str, id_elem[3]))

    # Data ID comparisons.
    for comp_id, data_dic in sorted(compare_datasets_dic.items()):
        if len(data_dic) < 2:
            continue
        int_ids = []
        data_ids = []
        for data_id, int_id in sorted(data_dic.items()):
            int_ids.append(int_id)
            data_ids.append(data_id)

        # Motif Hit ID (chr_id:gen_s-gen_e(strand)motif_id) to data IDs sporting this hit.
        hid2dids_dic = {}
        did2c_dic = {}

        for idx, int_id in enumerate(int_ids):
            did = data_ids[idx]
            for motif_stats in motif_stats_dic[int_id]:
                hid = motif_stats.hit_id
                if hid in hid2dids_dic:
                    hid2dids_dic[hid].append(did)
                else:
                    hid2dids_dic[hid] = [did]
                if did in did2c_dic:
                    did2c_dic[did] += 1
                else:
                    did2c_dic[did] = 1

        # print("Comparison:", comp_id)
        # print("Data ID motif hit counts:")
        # for did in did2c_dic:
        #     print(did, did2c_dic[did])

        for hid in hid2dids_dic:
            dids_str = ",".join(hid2dids_dic[hid])
            OUTMH.write("%s\t%s\t%s\n" %(comp_id, hid, dids_str))
            # Also store motif hit in BED format.
            id_elem = benchlib.get_hit_id_elements(hid)
            OUTBED.write("%s\t%s\t%s\t%s;%s;%s\t0\t%s\n" %(id_elem[0], id_elem[1], id_elem[2], id_elem[4], comp_id, dids_str, id_elem[3]))

    OUTMH.close()
    OUTBED.close()

    """
    Inform about outputs.

    """
    print("")
    print("OUTPUT FILES")
    print("============")
    print("")
    print("Comparison stats .tsv:\n%s" %(comp_stats_tsv_out))
    print("Motif hit stats .tsv:\n%s" %(motif_hits_tsv_out))
    print("Motif hits .bed:\n%s" %(motif_hits_bed_out))
    print("Motif search comparison report .html:\n%s" %(html_report_out))
    print("")


################################################################################

if __name__ == '__main__':
    # Setup argparse.
    parser = setup_argument_parser()
    # Print help if no parameter is set.
    if len(sys.argv) < 2:
        parser.print_help()
        sys.exit()
    # Read in command line arguments.
    args = parser.parse_args()
    args.version = __version__

    # Show some banner.
    print(benchlib.print_some_banner())

    # Run selected mode.
    if args.which == 'search':
        main_search(args)
    elif args.which == 'batch':
        main_batch(args)
    elif args.which == 'searchseq':
        main_searchseq(args)
    elif args.which == 'searchregex':
        main_searchregex(args)
    elif args.which == 'searchlong':
        main_searchlong(args)
    elif args.which == 'searchrna':
        main_searchrna(args)
    elif args.which == 'searchlongrna':
        main_searchlongrna(args)
    elif args.which == 'enmo':
        main_enmo(args)
    elif args.which == 'nemo':
        main_nemo(args)
    elif args.which == 'con':
        main_con(args)
    elif args.which == 'sponge':
        main_sponge(args)
    elif args.which == 'isocomp':
        main_isocomp(args)
    elif args.which == 'streme':
        main_streme(args)
    elif args.which == 'tomtom':
        main_tomtom(args)
    elif args.which == 'goa':
        main_goa(args)
    elif args.which == 'optex':
        main_optex(args)
    elif args.which == 'info':
        main_info(args)
    elif args.which == 'dist':
        main_dist(args)
    elif args.which == 'compare':
        main_compare(args)
