//
//  parser.cpp
//  sequence
//
//  Created by Sensei on 9/24/15.
//  Copyright © 2015 Franco "Sensei" Milicchio. All rights reserved.
//



#include "dbparser.hpp"

#include <atomic>
#include <iostream>
#include <fstream>
#include <unordered_map>
#include <tbb/pipeline.h>
#include <tbb/parallel_do.h>

#include "../libseq/timer.h"
#include "lmf_graph.hpp"

// Constructor
dbparser::dbparser(const std::string &f) : filename_(f), db_(SeqDB::open(f.c_str()))
{
    // NOP
}

// Parse
void dbparser::parse(lmf_graph &g)
{
    seq::timer t;
    
    Sequence s;

    // Print every subint percentage, 10% is ok
    const std::size_t subint = 5;
    
    // Number of reads per subint%
    const std::size_t nreads = db_->size() * subint / 100;
    
    // Print only if it makes sense
    const bool startprint = nreads > 0;
    
    // Percentage counter
    std::size_t perc = subint;
    
    // Count the number of reads
    std::size_t readsn = nreads;

    t.start("Kmerizing the read archive");
    tbb::parallel_pipeline(30,
                           tbb::make_filter<void, std::string>(tbb::filter::serial,
                                                               [&](tbb::flow_control& fc)
                                                               {
                                                                   if (!db_->read(s))
                                                                       fc.stop();
                                                                   else
                                                                   {
                                                                       // Percentage
                                                                       //if ((readsn > 0) && (nreads > subint) && (readsn % nreads == 0) && (readsn / nreads < subint))
                                                                       if (startprint && (readsn == 0))
                                                                       {
                                                                           std::cout << "    Read " << perc << "%" << std::endl;
                                                                           perc  += subint;
                                                                           readsn = nreads;
                                                                       }
                                                                       readsn--;
                                                                   }
                                                                   return s.seq;
                                                               })
                           &
                           tbb::make_filter<std::string, void>(tbb::filter::parallel,
                                                                 [&](std::string s)
                                                                 {
                                                                     int i;
                                                                     for (i = 0; i < s.size() - g.kmersize() + 1; i++)
                                                                     {
                                                                         std::string k(s.substr(i, g.kmersize()));
                                                                         
                                                                         // Only use valid aminoacids
                                                                         if (k.find_first_not_of("ATCG") == std::string::npos)
                                                                         {
                                                                             g.insert(std::move(k));
                                                                         }
                                                                     }
                                                                 })
                           );
    t.stop();
    
    // Print some statistics
    std::cout << ">>> Unique kmers:    " << g.size()     << std::endl;
}
