1#ifndef BTLLIB_SEQ_READER_HPP 
    2#define BTLLIB_SEQ_READER_HPP 
    4#include "btllib/cstring.hpp" 
    5#include "btllib/data_stream.hpp" 
    6#include "btllib/order_queue.hpp" 
    7#include "btllib/seq.hpp" 
    8#include "btllib/seq_reader_fasta_module.hpp" 
    9#include "btllib/seq_reader_fastq_module.hpp" 
   10#include "btllib/seq_reader_gfa2_module.hpp" 
   11#include "btllib/seq_reader_multiline_fasta_module.hpp" 
   12#include "btllib/seq_reader_multiline_fastq_module.hpp" 
   13#include "btllib/seq_reader_sam_module.hpp" 
   14#include "btllib/status.hpp" 
   18#include <condition_variable> 
   72            unsigned threads = 3);
 
   82  void close() noexcept;
 
   99  friend std::ostream& operator<<(std::ostream& os, 
const Format f)
 
  101    return os << static_cast<int32_t>(f);
 
  104  Format get_format()
 const { 
return format; }
 
  108    size_t num = std::numeric_limits<size_t>::max();
 
  114    operator bool()
 const { 
return !seq.empty(); }
 
  123  static const size_t MAX_SIMULTANEOUS_SEQREADERS = 256;
 
  130    void operator++() { record = reader.read(); }
 
  131    bool operator!=(
const RecordIterator& i)
 
  133      return bool(record) || bool(i.record);
 
  135    Record operator*() { 
return std::move(record); }
 
  139      auto val = operator*();
 
  147    RecordIterator(
SeqReader& reader, 
bool end)
 
  160  RecordIterator 
begin() { 
return RecordIterator(*
this, 
false); }
 
  161  RecordIterator end() { 
return RecordIterator(*
this, 
true); }
 
  163  size_t get_buffer_size()
 const { 
return buffer_size; }
 
  164  size_t get_block_size()
 const { 
return block_size; }
 
  166  static const size_t SHORT_MODE_BUFFER_SIZE = 32;
 
  167  static const size_t SHORT_MODE_BLOCK_SIZE = 32;
 
  169  static const size_t LONG_MODE_BUFFER_SIZE = 4;
 
  170  static const size_t LONG_MODE_BLOCK_SIZE = 1;
 
  172  static const size_t FORMAT_BUFFER_SIZE = 16384;
 
  179      : data(FORMAT_BUFFER_SIZE)
 
  182    std::vector<char> data;
 
  185    bool eof_newline_inserted = 
false;
 
  195  const std::string& source_path;
 
  197  const unsigned flags;
 
  198  const unsigned threads;
 
  199  Format format = Format::UNDETERMINED; 
 
  200  std::atomic<bool> closed{ 
false };
 
  202  std::unique_ptr<std::thread> reader_thread;
 
  203  std::vector<std::unique_ptr<std::thread>> processor_threads;
 
  204  std::mutex format_mutex;
 
  205  std::condition_variable format_cv;
 
  206  std::atomic<bool> reader_end{ 
false };
 
  207  RecordCString* reader_record = 
nullptr;
 
  208  const std::atomic<size_t> buffer_size;
 
  209  const std::atomic<size_t> block_size;
 
  210  OrderQueueSPMC<RecordCString> cstring_queue;
 
  211  OrderQueueMPMC<Record> output_queue;
 
  212  std::atomic<size_t> dummy_block_num{ 0 };
 
  216  thread_local static std::unique_ptr<
decltype(output_queue)::Block>
 
  218    ready_records_array[MAX_SIMULTANEOUS_SEQREADERS];
 
  221  thread_local static long ready_records_owners[MAX_SIMULTANEOUS_SEQREADERS];
 
  224  thread_local static size_t ready_records_current[MAX_SIMULTANEOUS_SEQREADERS];
 
  227  static std::atomic<long> last_id;
 
  230  void determine_format();
 
  232  void start_processors();
 
  235  bool readline_buffer_append(CString& s);
 
  236  static void readline_file(CString& s, FILE* f);
 
  237  void readline_file_append(CString& s, FILE* f);
 
  238  static bool file_at_end(FILE* f);
 
  240  int ungetc_buffer(
int c);
 
  242  void update_cstring_records(OrderQueueSPMC<RecordCString>::Block& records,
 
  246  template<
typename Module>
 
  247  void read_from_buffer(Module& module,
 
  248                        OrderQueueSPMC<RecordCString>::Block& records,
 
  251  template<
typename Module>
 
  252  void read_transition(Module& module,
 
  253                       OrderQueueSPMC<RecordCString>::Block& records,
 
  256  template<
typename Module>
 
  257  void read_from_file(Module& module,
 
  258                      OrderQueueSPMC<RecordCString>::Block& records,
 
  262  friend class SeqReaderFastaModule;
 
  263  SeqReaderFastaModule fasta_module;
 
  265  friend class SeqReaderMultilineFastaModule;
 
  266  SeqReaderMultilineFastaModule multiline_fasta_module;
 
  268  friend class SeqReaderFastqModule;
 
  269  SeqReaderFastqModule fastq_module;
 
  271  friend class SeqReaderMultilineFastqModule;
 
  272  SeqReaderMultilineFastqModule multiline_fastq_module;
 
  274  friend class SeqReaderSamModule;
 
  275  SeqReaderSamModule sam_module;
 
  277  friend class SeqReaderGfa2Module;
 
  278  SeqReaderGfa2Module gfa2_module;
 
  280  int module_in_use = 0;
 
  285template<
typename Module>
 
  287SeqReader::read_from_buffer(Module& module,
 
  288                            OrderQueueSPMC<RecordCString>::Block& records,
 
  291  while (!reader_end) {
 
  292    reader_record = &(records.data[records.count]);
 
  293    if (!module.read_buffer(*
this, *reader_record) ||
 
  294        reader_record->seq.empty()) {
 
  297    update_cstring_records(records, counter);
 
  301template<
typename Module>
 
  303SeqReader::read_transition(Module& module,
 
  304                           OrderQueueSPMC<RecordCString>::Block& records,
 
  308    reader_record = &(records.data[records.count]);
 
  309    module.read_transition(*
this, *reader_record);
 
  310    if (!reader_record->seq.empty()) {
 
  311      update_cstring_records(records, counter);
 
  313  } 
else if (!reader_record->seq.empty()) {
 
  314    update_cstring_records(records, counter);
 
  318template<
typename Module>
 
  320SeqReader::read_from_file(Module& module,
 
  321                          OrderQueueSPMC<RecordCString>::Block& records,
 
  324  while (!reader_end) {
 
  325    reader_record = &(records.data[records.count]);
 
  326    if (!module.read_file(*
this, *reader_record) ||
 
  327        reader_record->seq.empty()) {
 
  330    update_cstring_records(records, counter);
 
Definition: seq_reader.hpp:43
SeqReader(const std::string &source_path, unsigned flags, unsigned threads=3)
OrderQueueMPMC< Record >::Block read_block()
RecordIterator begin()
Definition: seq_reader.hpp:160
Definition: bloom_filter.hpp:16
Definition: seq_reader.hpp:50
static const unsigned TRIM_MASKED
Definition: seq_reader.hpp:55
static const unsigned FOLD_CASE
Definition: seq_reader.hpp:52
static const unsigned SHORT_MODE
Definition: seq_reader.hpp:57
static const unsigned LONG_MODE
Definition: seq_reader.hpp:59
Definition: seq_reader.hpp:107