#!/usr/bin/env perl
use 5.012;
use warnings;
use Getopt::Long;
use File::Spec;
use File::Basename;
use Data::Dumper;
use Pod::Usage;
use FASTX::Reader;
use File::Copy;
my $VERSION = '1.4.0';
my $opt_input_dir;
my $opt_output_dir;
my ($opt_version, $opt_help);
my @required_files = (
  'taxonomy.txt', 
  'metadata.tsv', 
  'feature-table.tsv', 
  'rep-seqs-tax.fasta', 
  'rep-seqs.tree');
my @optional_files = ('');
my $opt_skip_ma;
my $opt_skip_rhea;

GetOptions(
 'i|input-directory=s'  => \$opt_input_dir,
 'o|output-directory=s' => \$opt_output_dir,
 'skip-rhea'            => \$opt_skip_rhea,
 'skip-ma'              => \$opt_skip_ma,
 'version' => \$opt_version,
 'help'    => \$opt_help,
) || die " Parameters error. Use --help to display the manual.\n";


$opt_version && version();
$opt_help    && pod2usage({-exitval => 0, -verbose => 2});
# Check required input
die " Missing required parameter: -i INPUT_DIR\n" unless ($opt_input_dir);
die " Input directory not found ($opt_input_dir)\n" unless (-d "$opt_input_dir");
for my $fname (@required_files) {
  my $file = File::Spec->catfile($opt_input_dir, $fname);
  die "Required file $fname not found in $opt_input_dir\n" if (! -e "$file");
}

# Make output directory
$opt_output_dir = $opt_input_dir if (not defined $opt_output_dir);
my $opt_MA_dir = File::Spec->catdir($opt_output_dir, 'MicrobiomeAnalyst');
my $opt_Rh_dir = File::Spec->catdir($opt_output_dir, 'Rhea');

mkdir "$opt_output_dir" if (not -e "$opt_output_dir");
mkdir "$opt_MA_dir" if (not -e "$opt_MA_dir" and not defined $opt_skip_ma);
mkdir "$opt_Rh_dir" if (not -e "$opt_Rh_dir" and not defined $opt_skip_rhea);

# Metadata
exportMetadataMA(
  File::Spec->catfile($opt_input_dir, 'metadata.tsv'),
  File::Spec->catfile($opt_MA_dir, 'metadata.csv')
) if (not defined $opt_skip_ma);
copy( File::Spec->catfile($opt_input_dir, 'metadata.tsv'),
      File::Spec->catfile($opt_Rh_dir,    'mapping_file.tab')    ) if (not defined $opt_skip_rhea);


# FASTA and Taxonomy
exportSeqTaxMA(
  File::Spec->catfile($opt_input_dir, 'rep-seqs-tax.fasta'),
  File::Spec->catfile($opt_MA_dir, 'seqs.fa'),
  File::Spec->catfile($opt_MA_dir, 'taxonomy.csv')
) if (not defined $opt_skip_ma);

copy( File::Spec->catfile($opt_input_dir, 'rep-seqs.fasta'),
      File::Spec->catfile($opt_Rh_dir,    'OTUs-Seqs.fasta')    ) if (not defined $opt_skip_rhea);
  
# Feature table
exportTableMA(
  File::Spec->catfile($opt_input_dir, 'feature-table.tsv'),
  File::Spec->catfile($opt_MA_dir, 'table.csv')
) if (not defined $opt_skip_ma);

my $rhea_table_temp = File::Spec->catfile($opt_Rh_dir,    'OTUs-Table.tab');
exportTableRhea(
  File::Spec->catfile($opt_input_dir, 'rep-seqs-tax.fasta'),
  File::Spec->catfile($opt_input_dir, 'feature-table.tsv'),
  $rhea_table_temp
) if (not defined $opt_skip_rhea);


# Copy tree
copy( File::Spec->catfile($opt_input_dir, 'rep-seqs.tree'),
      File::Spec->catfile($opt_MA_dir,    'rep-seqs.tree')    ) if (not defined $opt_skip_ma);

copy( File::Spec->catfile($opt_input_dir, 'rep-seqs.tree'),
      File::Spec->catfile($opt_Rh_dir,    'OTUs-Tree.tre')    ) if (not defined $opt_skip_rhea);

sub version {
  say basename($0), " ", $VERSION;
  exit;
}

sub exportTableMA {
  my ($in, $out) = @_;
  my $c = 0;
  open (my $I, '<', "$in") || die " Unable to load table <$in>.\n";
  open (my $O, '>', "$out")|| die " Unable to write exported file to <$out>.\n";
  while (my $line = readline($I)) {
    $c++;
    chomp($line);
    my @fields = split /\t/, $line;
    if ($c==1) {
      $fields[0] = '#NAME';
    }
    print {$O} join(',', @fields), "\n";

  }
}


sub exportTableRhea {
  my ($fasta_tax, $table, $output) = @_;
  open (my $FA, '<', "$fasta_tax") || die " Unable to load fasta taxonomy <$fasta_tax>.\n";
  open (my $T,  '<', "$table") || die " Unable to load feature table <$table>.\n";
  open (my $O,  '>', "$output")|| die " Unable to write exported file to <$output>.\n";
  my %otuToTax = ();
  while (my $line = readline($FA)) {
    if ($line =~/^>/) {
      chomp($line);
      my ($name, $taxonomy) = split /\s+/, substr($line, 1);
      # Bacteria;Firmicutes;Clostridia;Clostridiales;Ruminococcaceae;;
      $taxonomy =~s/NA//g; # TODO Check boundaries?
      $otuToTax{$name} = $taxonomy . ';';
    }
  }

  while (my $line = readline($T)) {
    chomp($line);
    my @fields = split /\t/, $line;
    if ($line =~/^#/) {
      $fields[0] = '#OTUId';
      push(@fields, 'taxonomy');
    } else {
      push(@fields, $otuToTax{ $fields[0] });
    }
    say {$O} join("\t", @fields);
  }
}
sub exportMetadataMA{
  my ($in, $out) = @_;
  my $c = 0;
  open (my $I, '<', "$in") || die " Unable to load metadata <$in>.\n";
  open (my $O, '>', "$out")|| die " Unable to write exported file to <$out>.\n";
  while (my $line = readline($I)) {
    $c++;
    chomp($line);
    my @fields = split /\t/, $line;
    if ($c==1) {
      $fields[0] = '#NAME';
    }
    my @line_fields = ();
    for my $f (@fields) {

      if ($f !~/,/ ) {
        push (@line_fields, $f);
      } else {
        push (@line_fields, qq("$f") );
      }
      # Print separator
      
    }
    say {$O} join(',', @line_fields);
  }
}

sub exportSeqTaxMA {
  my ($in, $otus, $tax) = @_;
  my $null = '';
  my $READER = FASTX::Reader->new({filename => "$in"});
  my @ranks = ('Domain', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species');

  open (my $OTUS, '>', "$otus")|| die " Unable to write exported file to <$otus>.\n";
  open (my $TAX,  '>', "$tax")|| die " Unable to write exported file to <$tax>.\n";
  say {$TAX} join(',', '#TAXONOMY', @ranks);

  while (my $seq = $READER->getRead()) {
    if (not defined $seq->{comment}) {
      die "Sequence $seq->{name} does not have taxonomy associated.\n";
    }
    print {$OTUS} '>', $seq->{name}, "\n", $seq->{seq}, "\n";
    my @clades = split /;/, $seq->{comment};
    for (my $i = 0; $i <= 6; $i++) {
      if (defined $clades[$i]) {
        $clades[$i] = $null  if ($clades[$i] eq 'NA');
      } else {
        $clades[$i] = $null ;
      }
      $clades[$i] = lc( substr($ranks[$i], 0, 1) ) . '__' . $clades[$i];
    }
    say {$TAX} join(',', $seq->{name}, @clades);
  }
}
__END__

=head1 NAME

B<dadaist2-exporter> - tool to export dadaist2 output into MicrobiomeAnalyst
compatible format. I<MicrobiomeAnalyst> can be used as an B<R> module or
via the user-friendly website L<https://www.microbiomeanalyst.ca/> and
I<Rhea> (L<https://lagkouvardos.github.io/Rhea/>).

=head1 AUTHOR

Andrea Telatin <andrea.telatin@quadram.ac.uk>

=head1 SYNOPSIS

dadaist2-exporter [options] -i INPUT_DIR

=head1 PARAMETERS

=over 4

=item I<-i>, I<--input-directory> DIRECTORY

Directory containing the paired end files in FASTQ format, gzipped or not.

=item I<-o>, I<--output-directory> DIRECTORY

Output directory, by default will be a subdirectory called C<MicrobiomeAnalyst>
inside the input directory.

=item I<--skip-rhea>

Do not create the B<Rhea> subdirectory and its files.


=item I<--skip-ma>

Do not create the B<MicrobiomeAnalyst> subdirectory and its files.


=item I<--version>

Print version and exit.

=back

=head1 OUTPUT

The output directory will contain:

=over 4

=item I<metadata.csv>

Metadata file to be used in the omonymous field.

=item I<table.csv>

Feature table to be used in the 'OTU/ASV table' field.

=item I<taxonomy.csv>

Taxonomy table to be used in the 'Taxonomy table' field.

=item I<seqs.fa>

Not used in MicrobiomeAnalyst, but kept for reference.

=back

=head1 SOURCE CODE AND DOCUMENTATION

The program is freely available at L<https://quadram-institute-bioscience.github.io/dadaist2>
released under the MIT licence. The website contains further DOCUMENTATION.
