#!/usr/bin/env perl

my $PROGID = 'rnashapes';

sub getPath {
	my ($url) = @_;
	my @parts = split(m|/|, $url);
	pop @parts;
	unshift @parts, "./" if (@parts == 0);
	return join('/', @parts).'/';
}

use lib getPath($0)."../lib/";

use strict;
use warnings;
use Data::Dumper;
use Getopt::Long;
use foldGrammars::Utils;
use foldGrammars::Settings;
use foldGrammars::RNAcast;
use foldGrammars::IO;
use POSIX 'isatty';

our @ALLMODES = ($Settings::MODE_MFE, $Settings::MODE_SUBOPT, $Settings::MODE_SHAPES, $Settings::MODE_PROBS, $Settings::MODE_SAMPLE, $Settings::MODE_CAST, $Settings::MODE_EVAL, $Settings::MODE_ABSTRACT, $Settings::MODE_OUTSIDE, $Settings::MODE_MEA, $Settings::MODE_PROBING);
@References::ORDER = ('lor:ber:sie:taf:fla:sta:hof:2011','gru:lor:ber:neu:hof:2008','mat:dis:chi:schroe:zuk:tur:2004','tur:mat:2009','jan:schud:ste:gie:2011','jan:gie:2010','voss:gie:reh:2006','ree:gie:2005','ste:voss:reh:ree:gie:2006','mcc:1990','gie:voss:reh:2004','wee:2010','dei:li:mat:wee:2009','sau:gie:2015','jan:gie:2014');

our $GRAMMAR_NODANGLE = 'nodangle';
our $GRAMMAR_OVERDANGLE = 'overdangle';
our $GRAMMAR_MICROSTATE = 'microstate';
our $GRAMMAR_MACROSTATE = 'macrostate';

my %PARAM;
$PARAM{mode} = {modes => \@ALLMODES, key => 'mode', type => 's', default => $Settings::MODE_SHAPES, info => "Select the computation mode. Available modes are \"".join('", "', @ALLMODES)."\". Omit the ticks on input.\nDefault is \"@(DEFAULT)\"."};
$PARAM{windowsize} = {modes => [$Settings::MODE_MFE, $Settings::MODE_SUBOPT, $Settings::MODE_SHAPES, $Settings::MODE_PROBS, $Settings::MODE_SAMPLE, $Settings::MODE_MEA, $Settings::MODE_PROBING], key => 'windowSize', type => 'i', gapc => 'w', default => undef, info => "Activates window mode and computes substrings of size <int> for the input. After computation for the first <int> bases is done, the window is pushed <y> bases to the right and the next computation is startet. <y> is set by --@(windowincrement).\n<int> must be a non-zero positive integer, smaller than the input length."};
$PARAM{windowincrement} = {modes => [$Settings::MODE_MFE, $Settings::MODE_SUBOPT, $Settings::MODE_SHAPES, $Settings::MODE_PROBS, $Settings::MODE_SAMPLE, $Settings::MODE_MEA, $Settings::MODE_PROBING], key => 'windowIncrement', gapc => 'i', type => 'i', default => 1, info => "If --@(windowsize) is given, this parameter sets the offset for the next window to <int> bases.\n<int> must be a non-zero positive integer, smaller than --@(windowsize).\nDefault is @(DEFAULT)."};
$PARAM{temperature} = {modes => [$Settings::MODE_MFE, $Settings::MODE_SUBOPT, $Settings::MODE_SHAPES, $Settings::MODE_PROBS, $Settings::MODE_SAMPLE, $Settings::MODE_CAST, $Settings::MODE_EVAL, $Settings::MODE_OUTSIDE, $Settings::MODE_MEA, $Settings::MODE_PROBING], key => 'temperature', gapc => 'T', type => 'f', default => 37, info => "Rescale energy parameters to a temperature of temp C.\n<float> must be a floating point number.\nDefault is @(DEFAULT) C."};
$PARAM{param} = {modes => [$Settings::MODE_MFE, $Settings::MODE_SUBOPT, $Settings::MODE_SHAPES, $Settings::MODE_PROBS, $Settings::MODE_SAMPLE, $Settings::MODE_CAST, $Settings::MODE_EVAL, $Settings::MODE_OUTSIDE, $Settings::MODE_MEA, $Settings::MODE_PROBING], key => 'param', gapc => 'P', type => 's', default => undef, infoType => "paramfile", info => "Read energy parameters from paramfile, instead of using the default parameter set. See the RNAlib (Vienna RNA package) documentation for details on the file format.\nDefault are parameters released by the Turner group in 2004 (see [".References::getNumber('mat:dis:chi:schroe:zuk:tur:2004')."] and [".References::getNumber('tur:mat:2009')."])."};
$PARAM{allowlp} = {modes => \@ALLMODES, key => 'allowLP', gapc => 'u', type => 'i', default => 0, info => "Lonely base pairs have no stabilizing effect, because they cannot stack on another pair, but they heavily increase the size of the folding space. Thus, we normally forbid them. Should you want to allow them set <int> to 1.\n<int> must be 0 (=don't allow lonely base pairs) or 1 (= allow them).\nDefault is @(DEFAULT), i.e. no lonely base pairs."};
$PARAM{absolutedeviation} = {modes => [$Settings::MODE_SUBOPT, $Settings::MODE_SHAPES, $Settings::MODE_CAST], key => 'absoluteDeviation', gapc => 'e', type => 'f', default => undef, info => "This sets the energy range as an absolute value of the minimum free energy. For example, when --@(absolutedeviation) 10.0 is specified, and the minimum free energy is -10.0 kcal/mol, the energy range is set to 0.0 to -10.0 kcal/mol.\n<float> must be a positive floating point number.\nConnot be combined with --@(relativedeviation)."};
$PARAM{relativedeviation} = {modes => [$Settings::MODE_SUBOPT, $Settings::MODE_SHAPES, $Settings::MODE_CAST], key => 'relativeDeviation', gapc => 'c', type => 'f', default => 10.0, info => "This sets the energy range as percentage value of the minimum free energy. For example, when --@(relativedeviation) 5.0 is specified, and the minimum free energy is -10.0 kcal/mol, the energy range is set to -9.5 to -10.0 kcal/mol.\n<float> must be a positive floating point number.\nBy default, --@(relativedeviation) is set to @(DEFAULT) %.\nCannot be combined with --@(absolutedeviation)."};
$PARAM{shapelevel} = {modes => [$Settings::MODE_MFE, $Settings::MODE_SUBOPT, $Settings::MODE_SHAPES, $Settings::MODE_PROBS, $Settings::MODE_SAMPLE, $Settings::MODE_CAST, $Settings::MODE_EVAL, $Settings::MODE_ABSTRACT, $Settings::MODE_MEA, $Settings::MODE_PROBING], key => 'shapeLevel', gapc => 'q', type => 'i', default => 5, info => "Set shape abstraction level. Currently, we provide five different levels (see [".References::getNumber('jan:gie:2010')."] for their definitions), where 5 is the most abstract and 1 the most concrete one.\n<int> must be a number between 5 and 1.\nDefault is @(DEFAULT) (the most abstract one)."};
$PARAM{lowprobfilter} = {modes => [$Settings::MODE_PROBS], key => 'lowProbFilter', gapc => 'F', type => 'f', default => 0.000001, info => "This option sets a barrier for filtering out results with very low probabilities during calculation. The default value here is @(DEFAULT), which gives a significant speedup compared to a disabled filter. (See [".References::getNumber('voss:gie:reh:2006')."] for details.) Note that this filter can have a slight influence on the overall results. To disable this filter, use option --@(lowprobfilter) 0. \n<float> must be a positive floating point number smaller than 1."};
$PARAM{lowprobfilteroutput} = {modes => [$Settings::MODE_PROBS, $Settings::MODE_SAMPLE], key => 'outputLowProbFilter', gapc => undef, type => 'f', default => 0.000001, info => "This option sets a filter for omitting low probability results during output. It is just for reporting convenience. Unlike probability cutoff filter, this option does not have any influence on runtime or probabilities beyond this value. To disable this filter, use option --@(lowprobfilteroutput) 0. \n<float> must be a positive floating point number smaller than 1."};
$PARAM{help} = {modes => \@ALLMODES, key => 'help', default => undef, info => "show this brief help on version and usage"};
$PARAM{binarypath} = {modes => \@ALLMODES, key => 'binPath', type => 's', default => undef, info => $Settings::PROGINFOS{$PROGID}->{name}." expects that according Bellman's GAP compiled binaries are located in the same directory as the Perl wrapper is. Should you moved them into another directory, you must set --@(binarypath) to this new location!"};
$PARAM{binaryprefix} = {modes => \@ALLMODES, key => 'binPrefix', type => 's', default => $Settings::PROGINFOS{$PROGID}->{name}.'_', info => $Settings::PROGINFOS{$PROGID}->{name}." expects a special naming schema for the according Bellman's GAP compiled binaries. The binary name is composed of three to four components:\n  1) the program prefix (on default \"@(DEFAULT)\"),\n  2) the mode,\n  3) the used grammar,\n  4) optionally, the word \"window\" if you activate window computation.\nThus, for non-window mode \"$Settings::MODE_SUBOPT\", with grammar \"$GRAMMAR_OVERDANGLE\" and \"mis\" representation, the name would be \"@(DEFAULT)".$Settings::MODE_SUBOPT."_".$GRAMMAR_OVERDANGLE."\".\nWith --@(binaryprefix) you can change the prefix into some arbitary one."};
$PARAM{probdecimals} = {modes => [$Settings::MODE_PROBS, $Settings::MODE_SAMPLE, $Settings::MODE_MEA, $Settings::MODE_PROBING], key => 'probDecimals', type => 'i', default => 7, info => "Sets the number of digits used for printing shape probabilities.\n<int> must be a positive integer number.\nDefault is @(DEFAULT).\nAlso sets the number of displayed decimals for reactivity values if mode '".$Settings::MODE_PROBING."' is used."};
$PARAM{numsamples} = {modes => [$Settings::MODE_SAMPLE], key => 'numSamples', type => 'i', gapc => 'r', default => 1000, info => "Sets the number of samples that are drawn to estimate shape probabilities.\nIn our experience, 1000 iterations are sufficient to achieve reasonable results for shapes with high probability. Thus, default is @(DEFAULT)."};
$PARAM{showsamples} = {modes => [$Settings::MODE_SAMPLE], key => 'showSamples', type => 'i', gapc => undef, default => 0, info => "You can inspect the samples drawn by stochastic backtrace if you turn --@(showsamples) on by setting it to 1.\nDefault is @(DEFAULT) = off."};
$PARAM{grammar} = {modes => \@ALLMODES, key => 'grammar', default => $GRAMMAR_MACROSTATE, type => 's', info => "How to treat \"dangling end\" energies for bases adjacent to helices in free ends and multi-loops.\n \n\"$GRAMMAR_NODANGLE\" (-d 0 in Vienna package) ignores dangling energies altogether.\n \n\"$GRAMMAR_OVERDANGLE\" (-d 2 in Vienna package) always dangles bases onto helices, even if they are part of neighboring helices themselves. Seems to be wrong, but could perform surprisingly well.\n \n\"$GRAMMAR_MICROSTATE\" (-d 1 in Vienna package) correct optimisation of all dangling possibilities, unfortunately this results in an semantically ambiguous search space regarding Vienna-Dot-Bracket notations.\n \n\"$GRAMMAR_MACROSTATE\" (no correspondens in Vienna package) same as $GRAMMAR_MICROSTATE, while staying unambiguous. Unfortunately, mfe computation violates Bellman's principle of optimality.\nDefault is \"@(DEFAULT)\". See [".References::getNumber('jan:schud:ste:gie:2011')."] for further details."};
$PARAM{bppmthreshold} = {modes => [$Settings::MODE_OUTSIDE], gapc => 'F', key => 'bppmThreshold', default => 0.00001, type => 'f', info => "Set the threshold for base pair probabilities included in the postscript output.\nDefault is @(DEFAULT)."};
$PARAM{dotplotfilename} = {modes => [$Settings::MODE_OUTSIDE], gapc => 'o', key => 'dotplot', default => 'dotPlot.ps', type => 's', info => "Sets the filename for the probability dot plot, produced in \"$Settings::MODE_OUTSIDE\" mode.\nDefault is \"@(DEFAULT)\"."};
$PARAM{dotplotpng} = {modes => [$Settings::MODE_OUTSIDE], key => 'png', default => 0, type => 'i', info => "Activate this option to also produce a png file of the \"dot plot\". This is deactivated by default and requires an installation of the program \"GhostScript\"."};
$PARAM{structureprobabilities} = {modes => [$Settings::MODE_MFE, $Settings::MODE_SUBOPT, $Settings::MODE_SHAPES, $Settings::MODE_PROBS, $Settings::MODE_SAMPLE, $Settings::MODE_CAST, $Settings::MODE_MEA, $Settings::MODE_PROBING], key => 'structureProbs', default => 0, type => 'i', info => "If activated, in addition to free energy also the probability of structures will be computed. To speed up computation, this calculation is switched off by default."};
$PARAM{verbose} = {modes => \@ALLMODES, key => 'verbose', default => 0, type => 'i', info => "Prints the actual command for Bellman's GAP binary."};
$PARAM{varnaoutput} = {modes => [$Settings::MODE_MFE, $Settings::MODE_SUBOPT, $Settings::MODE_SHAPES, $Settings::MODE_PROBS, $Settings::MODE_SAMPLE, $Settings::MODE_CAST, $Settings::MODE_EVAL, $Settings::MODE_MEA, $Settings::MODE_PROBING], key => 'varna', default => undef, type => 's', info => "Provide a file name to which a HTML formatted version of the output should be saved in."};
$PARAM{slope} = {modes => [$Settings::MODE_PROBING], key => 'slope', default => 1.8, type => 'f', gapc => 'A', info => "The program RNAstructure [".References::getNumber('dei:li:mat:wee:2009')."] adds a bonus to the free energy of a base-pair stack according to the formula: bonus = slope * log(reactivity + 1) + intercept. If you set --@(normalization) to '".$Settings::NORMALIZATION_RNASTRUCTURE."', reactivities are normalized according to this spirit, but added for all base-pairs and substracted for all unpaired bases. Default is @(DEFAULT)."};
$PARAM{intercept} = {modes => [$Settings::MODE_PROBING], key => 'intercept', default => -0.6, type => 'f', gapc => 'B', info => "See parameter --@(slope): sets the value for 'intercept'. Only effective if --@(normalization) is set to '".$Settings::NORMALIZATION_RNASTRUCTURE."'. Default is @(DEFAULT)."};
$PARAM{normalization} = {modes => [$Settings::MODE_PROBING], key => 'normalization', default => 'centroid', type => 's', gapc => 'N', info => "The reactivities read from a file (see --@(reactivityfilename)) can be normalized in four ways. Non zero values are always set to be 0.0:\n'".$Settings::NORMALIZATION_CENTROID."': a 2-means clustering of all raw reactivities gives a centroid value for 'paired' and another for 'unpaired' values. The difference to the respective centroid, given by the structure, is added to the score.\n'".$Settings::NORMALIZATION_ASPROBABILITIES."': All reactivities are converted into probabilities ranging from 0.0 to 1.0.\n'".$Settings::NORMALIZATION_LOGPLAIN."': reactivities are transformed via the formula: log(reactivity + 1.0).\n'".$Settings::NORMALIZATION_RNASTRUCTURE."': reactivities are normalized as in the program RNAstructure, i.e. basically by the formula: slope * log(reactivity + 1.0) + intercept.\nDefault is @(DEFAULT)."};
$PARAM{modifier} = {modes => [$Settings::MODE_PROBING], key => 'modifier', default => 'unknown', type => 's', gapc => 'M', info => "The modifier is the chemical reagent in a structure probing experiment that attacs the nucleotide which in the end gives its reactivity. Different modifier affect different bases. Reactivities given by the file --@(reactivityfilename) will be set to zero if the modifier cannot affect a base. Available modifier are:\n'".$Settings::MODIFIER_DMS."' affects [A,C]\n  '".$Settings::MODIFIER_CMCT."' affects [G,U]\n'".$Settings::MODIFIER_SHAPE."' affects [A,C,G,U]\n'".$Settings::MODIFIER_DIFFSHAPE."' affects [A,C,G,U]\n'".$Settings::MODIFIER_UNKNOWN."' affects [A,C,G,U]\nDefault is @(DEFAULT)."};
$PARAM{reactivityfilename} = {modes => [$Settings::MODE_PROBING], key => 'reactivityfilename', default => undef, type => 's', gapc => 'S', info => "The path to a file containing the raw reactivity values. The file must contain two tabular separated columns. First column is the index of the nucleotide, starting with 1. The second column is a float value giving the reactivity for that nucleotide. There must be as many rows as nucleotides in the input RNA sequence!"};

my $settings = {};
foreach my $param (keys %PARAM) {
	$settings->{$param} = $PARAM{$param}->{default};
}
my %help = ();
foreach my $param (keys %PARAM) {
	my $optionSec = $PARAM{$param}->{key};
	$optionSec .= "=".$PARAM{$param}->{type} if (exists $PARAM{$param}->{type});
	$help{$optionSec} = \$settings->{$param};
}
&GetOptions( 	
	%help
);


checkParameters($settings, \%PARAM);

usage() if (defined $settings->{'help'}); #user asks for help --> print usage and die
our $inputIndex = 0;
if (@ARGV == 0) {
	#input not given via command line parameter
	if (isatty(*STDIN)) {
		#we are somehow in an interactive mode
		if ($settings->{mode} eq $Settings::MODE_EVAL) {
			#expecting two inputs, first sequence, second structure
			print "You are in \"$Settings::MODE_EVAL\" mode. Please give me your RNA sequence:\n";
			my $inputSequence = <STDIN>; chomp $inputSequence;
			print "Second, I need your structure that should be evaluated:\n";
			my $inputStructure = <STDIN>; chomp $inputStructure;
			processInput({sequence => $inputSequence, header => "unnamed sequence", structure => $inputStructure}, $settings);
		} elsif ($settings->{mode} eq $Settings::MODE_CAST) {
			print "You are in \"$Settings::MODE_CAST\" mode. Please give me the name of a FASTA file, containing a family of RNA sequences:\n";
			my $inputFile = <STDIN>; chomp $inputFile;
			processInput($inputFile, $settings);
		} elsif ($settings->{mode} eq $Settings::MODE_ABSTRACT) {
			print "You are in \"".$settings->{mode}."\" mode. Please give me your Vienna-Dot-Bracket string, that shell be converted into a shape string:\n";
			my $inputStructure = <STDIN>; chomp $inputStructure;
			processInput($inputStructure, $settings);
		} else {
			#expecting a sequence or a filename
			print "You are in \"".$settings->{mode}."\" mode. Please give me either your RNA sequence, or a (multiple) FASTA file, containing your sequences:\n";
			my $input = <STDIN>; chomp $input;
			if (-e $input) {
				#since there is a file, having the name of the user input it is very likely that we really should read from a file
				processInput($input, $settings);
			} else {
				#otherwise, we assume it is a single, plain RNA sequence
				processInput({sequence => $input, header => "unnamed sequence"}, $settings);
			}
		}
	} else {
		#input must be delivered via pipe
		die "Your are in \"$Settings::MODE_EVAL\" mode. But I can't distinguish RNA sequence from structure if you deliver input via PIPE. Please choose another way to input your information.\n" if ($settings->{mode} eq $Settings::MODE_EVAL);
		processInput(\*STDIN, $settings);
	}
} elsif (@ARGV == 1) {
	#rna sequence, secondary structure or filename, given as command line parameter
	if (-e $ARGV[0]) {
		#since there is a file, having the name of the user input it is very likely that we really should read from a file
		processInput($ARGV[0], $settings);
	} else {
		#otherwise, we assume it is a single, plain RNA sequence or a 2D structure
		processInput({sequence => $ARGV[0], header => "unnamed sequence"}, $settings);
	}
} elsif (@ARGV == 2 && $settings->{mode} eq $Settings::MODE_EVAL) {
	#only for MODE_EVAL, rna sequence and secondary structure are given as two command line parameters
	my $inputStructure = $ARGV[1];
	if (-e $inputStructure) { #especially for BiBiServ: structure must be delivered as a filename, thus read the file to get the structure.
		my $command = Settings::getBinary('cat')." $inputStructure";
		$inputStructure = Utils::execute($command); 
		chomp $inputStructure;
	}
	processInput({sequence => $ARGV[0], header => "unnamed sequence", structure => $inputStructure}, $settings);
} else {
	print STDERR "You gave me too many inputs. Please ask for help, via \"".$Settings::PROGINFOS{$PROGID}->{name}." --".$PARAM{help}->{key}."\".\n";
	exit(1);
}

sub processInput {
	my ($input, $refHash_settings) = @_;
	
	if (ref($input) =~ m/HASH/) {
		#input is a sequence
		if ($settings->{mode} eq $Settings::MODE_CAST) {
			die "Your are in \"$Settings::MODE_CAST\" mode, but you provided just one sequence. Search for common shapes is useless, just use mode \"$Settings::MODE_SHAPES\" or provide at least two sequences!\n";
		} else {
			if (($settings->{mode} eq $Settings::MODE_EVAL) && (-e $input->{sequence})) {
				foreach my $refHash_seqBlock (@{Utils::applyFunctionToFastaFile($input->{sequence}, sub {my ($refHash_sequence) = @_; return @_;})}) {
					$input->{sequence} = $refHash_seqBlock->{result}->{sequence};
					$input->{header} = $refHash_seqBlock->{result}->{header};
					last;
				}
			}
			doComputation($input, $refHash_settings);
		}
	} elsif (ref($input) =~ m/GLOB/) {
		#input is STDIN
		if ($refHash_settings->{'mode'} eq $Settings::MODE_CAST) {
			RNAcast::cast_doComputation(\*STDIN, $refHash_settings, \&buildCommand, \%PARAM, $Settings::MODE_SHAPES, $Settings::PROGINFOS{$PROGID}->{name});
		} else {
			Utils::applyFunctionToFastaFile(\*STDIN, \&doComputation, $refHash_settings);
		}
	} else {
		#input is a filename
		die "The file '$input' does not exist!\n" if (not -e $input);
		if ($refHash_settings->{'mode'} eq $Settings::MODE_CAST) {
			RNAcast::cast_doComputation($input, $refHash_settings, \&buildCommand, \%PARAM, $Settings::MODE_SHAPES, $Settings::PROGINFOS{$PROGID}->{name});
		} else {
			if ($refHash_settings->{'mode'} eq $Settings::MODE_ABSTRACT) {
				open (IN, $input) || die "cannot read from input file '$input': $!\n";
					while (my $line = <IN>) {
						chomp $line;
						doComputation({sequence => $line, header => 'pure structure'}, $refHash_settings);
					}
				close (IN);
			} else {
				Utils::applyFunctionToFastaFile($input, \&doComputation, $refHash_settings);
			}
		}
	}
	
	IO::writeVarna($refHash_settings);
}

sub doComputation {
	my ($refHash_sequence, $settings) = @_;

	$inputIndex++;
	if (($settings->{mode} eq $Settings::MODE_EVAL) && (not exists $refHash_sequence->{structure})) {
		#since we are in EVAL mode, I guess that half the sequence is RNA, the other half should be a structure
		$refHash_sequence->{structure} = substr($refHash_sequence->{sequence}, length($refHash_sequence->{sequence})/2);
		$refHash_sequence->{sequence} = substr($refHash_sequence->{sequence}, 0, length($refHash_sequence->{sequence})/2);
	} elsif ($settings->{mode} eq $Settings::MODE_ABSTRACT) {
		#we have to convert a Vienna string into a shape class string. We use the eval binaries with a generic RNA input.
		$refHash_sequence->{structure} = $refHash_sequence->{sequence};
		$refHash_sequence->{sequence} = "a" x length($refHash_sequence->{structure});
	}

	if ($refHash_sequence->{sequence} !~ m/^\s*((A|C|G|U|T)+)\s*$/i) {
		print STDERR "sequence '".$refHash_sequence->{header}."' has been skipped, due to non RNA letter. Only A,C,G,U,T,a,c,g,u,t are allowed.";
	}
	my $seq = $refHash_sequence->{sequence};
	$seq =~ s/t/u/gi;
	$seq = $seq.'+'.$seq if ($settings->{mode} eq $Settings::MODE_OUTSIDE);
	
	my %pfAll = ();
	if ($settings->{'structureprobabilities'}) {
		my %pfallSettings = %{$settings};
		$pfallSettings{mode} = $Settings::MODE_PFALL;
		Utils::checkBinaryPresents(\%pfallSettings, "wrong command line parameter:\n  ", [$Settings::MODE_CAST], []);
		my $pfallCommand = buildCommand(\%pfallSettings, $refHash_sequence);
		my $inputFile = Utils::writeInputToTempfile($seq);
		print "Actual call was: $pfallCommand -f $inputFile\n" if ($settings->{verbose});
		my $res = Utils::execute("$pfallCommand -f $inputFile 2>&1");
		Utils::execute(Settings::getBinary('rm')." -f $inputFile") if (!$settings->{verbose});
		%pfAll = %{IO::parse($res, $refHash_sequence, $Settings::PROGINFOS{$PROGID}->{name}, \%pfallSettings, $inputIndex)};
	}
	
	$settings->{tmpReactivityFile} = Utils::writeInputToTempfile(IO::writeReactivityFileContent(IO::readReactivityFile($settings->{reactivityfilename}, $seq), $seq)) if ($settings->{mode} eq $Settings::MODE_PROBING);
	
	my $command = buildCommand($settings, $refHash_sequence);
	my $structureInputFile = undef;
	my $structureArg = "";
	if (($settings->{'mode'} eq $Settings::MODE_EVAL) || ($settings->{'mode'} eq $Settings::MODE_ABSTRACT)) {
		die "You are in \"".$settings->{'mode'}."\", but your input of RNA sequence and structure is of unequal length!\n" if (length($refHash_sequence->{sequence}) != length($refHash_sequence->{structure}));
		$structureInputFile = Utils::writeInputToTempfile($refHash_sequence->{structure});
		$structureArg = " -f $structureInputFile";
	}

	my $inputFile = Utils::writeInputToTempfile($seq);
	print "Actual call was: $command -f $inputFile $structureArg\n" if ($settings->{verbose});
	my $result = Utils::execute("$command -f $inputFile $structureArg 2>&1");
	Utils::execute(Settings::getBinary('rm')." -f $inputFile") if (!$settings->{verbose});
	Utils::execute(Settings::getBinary('rm')." -f $structureInputFile") if ((defined $structureInputFile) && (!$settings->{verbose}));
	Utils::execute(Settings::getBinary('rm')." -f ".$settings->{tmpReactivityFile}) if ((exists $settings->{tmpReactivityFile}) && (!$settings->{verbose}) && ($settings->{mode} eq $Settings::MODE_PROBING));
	IO::parse($result, $refHash_sequence, $Settings::PROGINFOS{$PROGID}->{name}, $settings, $inputIndex, \%pfAll);
	
	return undef;
}

sub buildCommand {
	my ($settings, $refHash_sequence, $task) = @_;

	my $cmd = "";
	$cmd .= $settings->{'binarypath'};
	$cmd .= "/" if (substr($cmd, -1, 1) ne "/");
	$cmd .= $settings->{'binaryprefix'};
	if ($settings->{'mode'} eq $Settings::MODE_ABSTRACT) {
		$cmd .= $Settings::MODE_EVAL;
	} else {
		$cmd .= $settings->{'mode'};
	}
	$cmd .= '_'.$settings->{'grammar'};
	if ((not defined $task) && (defined $settings->{'windowsize'})) {
		$cmd .= "_window";
		my $windowSize = $settings->{'windowsize'};
		$windowSize = length($refHash_sequence->{sequence}) if ($settings->{'windowsize'} > length($refHash_sequence->{sequence}));
		$cmd .= " -".$PARAM{windowsize}->{gapc}." ".$windowSize;
		$cmd .= " -".$PARAM{windowincrement}->{gapc}." ".$settings->{'windowincrement'};
	}
	$cmd .= " -".$PARAM{temperature}->{gapc}." ".$settings->{'temperature'} if ($settings->{'temperature'} != $PARAM{temperature}->{default});
	$cmd .= " -".$PARAM{param}->{gapc}." ".$settings->{'param'} if (defined $settings->{'param'});
	$cmd .= " -".$PARAM{allowlp}->{gapc}." ".$settings->{'allowlp'} if ($settings->{'allowlp'} != $PARAM{allowlp}->{default});
	$cmd .= " -".$PARAM{relativedeviation}->{gapc}." ".$settings->{'relativedeviation'} if ($settings->{'relativedeviation'} != $PARAM{relativedeviation}->{default});
	$cmd .= " -".$PARAM{absolutedeviation}->{gapc}." ".$settings->{'absolutedeviation'} if (defined $settings->{'absolutedeviation'});
	$cmd .= " -".$PARAM{shapelevel}->{gapc}." ".$settings->{'shapelevel'} if ($settings->{'shapelevel'} != $PARAM{shapelevel}->{default});
	$cmd .= " -".$PARAM{lowprobfilter}->{gapc}." ".$settings->{'lowprobfilter'} if ($settings->{'lowprobfilter'} != $PARAM{lowprobfilter}->{default});
	$cmd .= " -".$PARAM{numsamples}->{gapc}." ".$settings->{'numsamples'} if ($settings->{'mode'} eq $Settings::MODE_SAMPLE);
	$cmd .= " -".$PARAM{bppmthreshold}->{gapc}." ".$settings->{'bppmthreshold'} if (($settings->{'mode'} eq $Settings::MODE_OUTSIDE) && ($settings->{'bppmthreshold'} != $PARAM{bppmthreshold}->{default}));
	if (($settings->{'mode'} eq $Settings::MODE_OUTSIDE) && ($settings->{'dotplotfilename'} ne $PARAM{bppmthreshold}->{default})) {
		$cmd .= " -".$PARAM{dotplotfilename}->{gapc}." ".IO::getDotplotFilename($settings, $inputIndex);
	}
	if ($settings->{'mode'} eq $Settings::MODE_PROBING) {
		$cmd .= " -".$PARAM{slope}->{gapc}." ".$settings->{'slope'} if ($settings->{'slope'} != $PARAM{slope}->{default});
		$cmd .= " -".$PARAM{intercept}->{gapc}." ".$settings->{'intercept'} if ($settings->{'intercept'} != $PARAM{intercept}->{default});
		$cmd .= " -".$PARAM{normalization}->{gapc}." ".$settings->{'normalization'} if ($settings->{'normalization'} ne $PARAM{normalization}->{default});
		$cmd .= " -".$PARAM{modifier}->{gapc}." ".$settings->{'modifier'} if ($settings->{'modifier'} ne $PARAM{modifier}->{default});
		$cmd .= " -".$PARAM{reactivityfilename}->{gapc}." ".$settings->{'tmpReactivityFile'} if ((exists $settings->{'tmpReactivityFile'}) && (defined $settings->{'tmpReactivityFile'}));
	}

	return $cmd;
}

sub checkParameters {
	my ($settings, $refHash_params) = @_;
	
	my $diePrefix = "wrong command line parameter:\n  ";
	
	Utils::automatedParameterChecks(\%PARAM, $settings, \@ALLMODES, $diePrefix);
	die $diePrefix."Sorry, we don't provide a outside version for grammar \"macrostate\" yet.\n" if ($settings->{'grammar'} eq 'macrostate' && (($settings->{'mode'} eq $Settings::MODE_OUTSIDE) || ($settings->{'mode'} eq $Settings::MODE_MEA)));
	Utils::checkBinaryPresents($settings, $diePrefix, [$Settings::MODE_CAST], [], $refHash_params);

	die $diePrefix."--".$PARAM{'windowsize'}->{key}." must be a positive integer!\n" if ((defined $settings->{'windowsize'}) && ($settings->{'windowsize'} < 1));
	die $diePrefix."--".$PARAM{'windowsize'}->{key}." is smaller than --".$PARAM{'windowincrement'}->{key}." !\n" if ((defined $settings->{'windowsize'}) && ($settings->{'windowsize'} < $settings->{'windowincrement'}));
	die $diePrefix."the parameter file you specified could not be found.\n" if ((defined $settings->{'param'}) && (not -e $settings->{'param'}));
	$settings->{'grammar'} = lc($settings->{'grammar'});
	die $diePrefix."there is no grammar \"".$settings->{'grammar'}."\". Please select one of \"$GRAMMAR_NODANGLE\", \"$GRAMMAR_OVERDANGLE\", \"$GRAMMAR_MICROSTATE\" or \"$GRAMMAR_MACROSTATE\".\n" if ($settings->{'grammar'} !~ m/^nodangle|overdangle|microstate|macrostate$/i);
	die $diePrefix."--".$PARAM{'numsamples'}->{key}." must be a positive integer, otherwise shape frequencies cannot be estimated.\n" if ($settings->{'numsamples'} < 1);
	die $diePrefix."--".$PARAM{'allowlp'}->{key}." can either be 0 or 1, to forbid or disallow lonely base pairs.\n" if ($settings->{'allowlp'} !~ m/^0|1$/);
	die $diePrefix."--".$PARAM{'structureprobabilities'}->{key}." can either be 0 or 1, to omit or compute individual structure probabilities.\n" if ($settings->{'structureprobabilities'} !~ m/^0|1$/);
	die $diePrefix."--".$PARAM{'showsamples'}->{key}." can either be 0 or 1, to hide or omit sampled structures by stochastic backtrace.\n" if ($settings->{'showsamples'} !~ m/^0|1$/);
	die $diePrefix."--".$PARAM{'shapelevel'}->{key}." must be a number between 5 and 1.\n" if (($settings->{'shapelevel'} < 1) || ($settings->{'shapelevel'} > 5));
	die $diePrefix."--".$PARAM{'lowprobfilter'}->{key}." must be a non-negative floating point number below 1.\n" if (($settings->{'lowprobfilter'} >= 1) || ($settings->{'lowprobfilter'} < 0));
	die $diePrefix."--".$PARAM{'lowprobfilteroutput'}->{key}." must be a non-negative floating point number below 1.\n" if (($settings->{'lowprobfilteroutput'} >= 1) || ($settings->{'lowprobfilteroutput'} < 0));	
	die $diePrefix."--".$PARAM{'absolutedeviation'}->{key}." and --".$PARAM{'relativedeviation'}->{key}." cannot be set at the same time!\n" if ((defined $settings->{'absolutedeviation'}) && ($settings->{'relativedeviation'} != $PARAM{'relativedeviation'}->{default}));
	die $diePrefix."--".$PARAM{'probdecimals'}->{key}." must be a non-negative integer number!\n" if ($settings->{'probdecimals'} < 0);
	die $diePrefix."--".$PARAM{'bppmthreshold'}->{key}." must be a non-negative integer number!\n" if ($settings->{'bppmthreshold'} < 0);
	die $diePrefix."--".$PARAM{'bppmthreshold'}->{key}." should be less then 1, because no pair (i,j) will occure in _every_ structure of the search space.\n" if ($settings->{'bppmthreshold'} >= 1);
	
	my @modifier = ($Settings::MODIFIER_DMS, $Settings::MODIFIER_CMCT, $Settings::MODIFIER_SHAPE, $Settings::MODIFIER_DIFFSHAPE, $Settings::MODIFIER_UNKNOWN);
	die $diePrefix."--".$PARAM{'modifier'}->{key}." your input '".$settings->{'modifier'}."' is not in the list of available modifiers. Please choose one of the following values '".join("','", @modifier)."'. The input is case sensitive.\n" if (not Utils::contains(\@modifier, $settings->{'modifier'}));
	my @normalizations = ($Settings::NORMALIZATION_CENTROID,$Settings::NORMALIZATION_RNASTRUCTURE,$Settings::NORMALIZATION_LOGPLAIN,$Settings::NORMALIZATION_ASPROBABILITIES);
	die $diePrefix."--".$PARAM{'normalization'}->{key}." your input '".$settings->{'normalization'}."' is not in the list of available normalizations. Please choose one of the following values '".join("','", @normalizations)."'. The input is case sensitive.\n" if (not Utils::contains(\@normalizations, $settings->{'normalization'}));
	die $diePrefix."--".$PARAM{'reactivityfilename'}->{key}." must point to a file in order to execute the '".$Settings::MODE_PROBING."' mode." if (($settings->{mode} eq $Settings::MODE_PROBING) && (not defined $settings->{'reactivityfilename'}));
	die $diePrefix."--".$PARAM{'reactivityfilename'}->{key}." points to a file that does not exist: '".$settings->{'reactivityfilename'}."'" if (($settings->{mode} eq $Settings::MODE_PROBING) && (defined $settings->{'reactivityfilename'}) && (not -e $settings->{'reactivityfilename'}));
}

sub usage {
	my ($settings) = @_;

my $HELP = <<EOF;
# $Settings::PROGINFOS{$PROGID}->{name}: RNA secondary structure predictions
#            version $Settings::PROGINFOS{$PROGID}->{version} ($Settings::PROGINFOS{$PROGID}->{date})
#            Stefan Janssen (bibi-help\@techfak.uni-bielefeld.de)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

USAGE: 
perl $Settings::PROGINFOS{$PROGID}->{name} [-mode] [-options] <fasta file name or RNA sequence>

 $Settings::PROGINFOS{$PROGID}->{name} comes with the following different modes of predictions:
EOF
;
	$HELP .= Utils::printIdent("  ".$Settings::MODE_MFE."      : ", Utils::usage_convertInfoText("Computes the single energetically most stable secondary structure for the given RNA sequence. Co-optimal results will be suppressed, i.e. should different prediction have the same best energy value, just an arbitrary one out of them will be reported.\nThis resembles the function of the program \"RNAfold\" of the Vienna group (see [".References::getNumber('lor:ber:sie:taf:fla:sta:hof:2011')."] and [".References::getNumber('gru:lor:ber:neu:hof:2008')."]). If you only use \"$Settings::MODE_MFE\" mode, consider switching to RNAfold, because their implementation is much faster, due to sophisticated low level C optimisations.", \%PARAM))."\n";
	$HELP .= Utils::printIdent("  ".$Settings::MODE_SUBOPT."   : ", Utils::usage_convertInfoText("Often, the biological relevant structure is hidden among suboptimal predictions. In \"$Settings::MODE_SUBOPT\" mode, you can also inspect all suboptimal solutions up to a given threshold (see parameters --@(absolutedeviation) and --@(relativedeviation)). \nDuplicates might appear when using grammar \"$GRAMMAR_MICROSTATE\", due to its semantic ambiguity according Vienna-Dot-Bracket strings. See [".References::getNumber('jan:schud:ste:gie:2011')."] for details.", \%PARAM))."\n";
	$HELP .= Utils::printIdent("  ".$Settings::MODE_SHAPES."   : ", Utils::usage_convertInfoText("Output of \"$Settings::MODE_SUBOPT\" mode is crowded by many very similar answers, which make it hard to focus to the \"important\" changes. The abstract shape concept [".References::getNumber('jan:gie:2010')."] groups similar answers together and reports only the best answer within such a group. Due to abstraction, suboptimal analyses can be done more thorough, by ignoring boring differences.\n(see parameter --@(shapelevel))", \%PARAM))."\n";
	$HELP .= Utils::printIdent("  ".$Settings::MODE_PROBS."    : ", Utils::usage_convertInfoText("Structure probabilities are strictly correlated to their energy values. Grouped together into shape classes, their probabilities add up. Often a shape class with many members of worse energy becomes more probable than the shape containing the mfe structure but not much more members. See [".References::getNumber('voss:gie:reh:2006')."] for details on shape probabilities.", \%PARAM))."\n";
	$HELP .= Utils::printIdent("  ".$Settings::MODE_SAMPLE."   : ", Utils::usage_convertInfoText("Probabilistic sampling based on partition function. This mode combines stochastic sampling with a-posteriori shape abstraction. A sample from the structure space holds M structures together with their shapes, on which classification is performed. The probability of a shape can then be approximated by its frequency in the sample.", \%PARAM))."\n";
	$HELP .= Utils::printIdent("  ".$Settings::MODE_CAST."     : ", Utils::usage_convertInfoText("This mode is the RNAcast approache, see [".References::getNumber('ree:gie:2005')."].\nFor a family of RNA sequences, this method independently enumerates the near-optimal abstract shape space, and predicts as the consensus an abstract shape common to all sequences. For each sequence, it delivers the thermodynamically best structure which has this common shape.\nInput is a multiple fasta file, which should contain at least two sequences.\nOutput is sorted by \"score\" of common shapes, i.e. summed free energy of all sequences. R is the rank (= list position) of the shape in individual sequence analysis.", \%PARAM))."\n";
	$HELP .= Utils::printIdent("  ".$Settings::MODE_EVAL."     : ", Utils::usage_convertInfoText("Evaluates the free energy of an RNA molecule in fixed secondary structure, similar to RNAeval from the Vienna group. Multiple answers stem from semantic ambiguity of the underlying grammar.\nIt might happen, that your given structure is not a structure for the sequence. Maybe your settings are too restrictive, e.g. not allowing lonely base-pairs (--@(allowlp)).\nIf you input a (multiple) FASTA file, ".$Settings::PROGINFOS{$PROGID}->{name}." assumes that exactly first half of the contents of each entry is RNA sequence, second half is the according structure. Whitespaces are ignored.", \%PARAM))."\n";
	$HELP .= Utils::printIdent("  ".$Settings::MODE_ABSTRACT." : ", Utils::usage_convertInfoText("Converts a Vienna-Dot-Bracket representation of a secondary structure into a shape string.", \%PARAM))."\n";
	$HELP .= Utils::printIdent("  ".$Settings::MODE_OUTSIDE."  : ", Utils::usage_convertInfoText("Applies the \"outside\"-algorithm to compute probabilities for all base pairs (i,j), based on the partition function [".References::getNumber('mcc:1990')."]. Output is a PostScript file, visualizing these probabilities as a \"dot plot\".\nThe \"dot plot\" shows a matrix of squares with area proportional to the base pair probabilities in the upper right half. For each pair (i,j) with probability above --@(bppmthreshold) there is a line of the form\n    i  j  sqrt(p)  ubox\nin the PostScript file, so that they can be easily extracted.", \%PARAM))."\n";
	$HELP .= Utils::printIdent("  ".$Settings::MODE_MEA."      : ", Utils::usage_convertInfoText("Finds the secondary structure with the maximal sum of base-pair probabilities (MEA=maximal expected accuracy). The equivalent Vienna Package name is the 'centroid secondary structure', defined as 'The centroid structure is the structure with the minimum total base-pair distance to all structures in the thermodynamic ensemble.'.", \%PARAM))."\n";
	$HELP .= Utils::printIdent("  ".$Settings::MODE_PROBING."  : ", Utils::usage_convertInfoText("Structural probing is a wet-lab method to obtain hints about the likelihood of a nucleotide in a structure to be unpaired, a so called 'reactivity' [".References::getNumber('wee:2010')."]. We use the reactivities to enrich the thermodynamic model. To circumvent the challenge of properly weighting free energies and reactivities, as in e.g. RNAstructure [".References::getNumber('dei:li:mat:wee:2009')."], we compute a pareto front of both optimization criteria [".References::getNumber('sau:gie:2015')."]'. This returns a set of equally good candidates, which represent interesting spots of the structural ensemble. Finally, the user has to pick his/her favorite.", \%PARAM))."\n";
	
	my @paramGroups = ();
	push @paramGroups, {name => 'GENERAL OPTIONS', elements => ['mode', 'absolutedeviation', 'relativedeviation', 'shapelevel', 'lowprobfilter', 'lowprobfilteroutput', 'numsamples', 'showsamples', 'windowsize', 'windowincrement','structureprobabilities']};
	push @paramGroups, {name => 'FOLDING OPTIONS', elements => ['grammar','temperature','param','allowlp']};
	push @paramGroups, {name => 'OUTSIDE OPTIONS', elements => ['bppmthreshold','dotplotfilename','dotplotpng']};
	push @paramGroups, {name => 'SYSTEM OPTIONS', elements => ['binarypath','binaryprefix','probdecimals','help','verbose','varnaoutput']};
	push @paramGroups, {name => 'STRUCTURE PROBING OPTIONS', elements => ['slope','intercept','normalization','modifier','reactivityfilename']};
	foreach my $refHash_group (@paramGroups) {
		$HELP .= $refHash_group->{name}.":\n";
		for my $par (@{$refHash_group->{elements}}) {
			$HELP .= Utils::printParamUsage($PARAM{$par}, \%PARAM, \@ALLMODES)."\n";
		}
	}
	
	$HELP .= "REFERENCES:\n";
	foreach my $refID (@References::ORDER) { #'lor:ber:sie:taf:fla:sta:hof:2011','gru:lor:ber:neu:hof:2008','mat:dis:chi:schroe:zuk:tur:2004','tur:mat:2009','jan:schud:ste:gie:2011','jan:gie:2010','voss:gie:reh:2006','ree:gie:2005','ste:voss:reh:ree:gie:2006','mcc:1990','gie:voss:reh:2004') {
		next if ($refID eq 'jan:gie:2014');
		$HELP .= References::printReference($refID);
	}
	$HELP .= "CITATION:\n    If you use this program in your work you might want to cite:\n\n";
	foreach my $refID ('jan:gie:2014') {
		$HELP .= References::printReference($refID);
	}

	print $HELP;
	exit(0);
}

