/*!
 *  \file assembly.cpp
 *
 *  \copyright Copyright (c) 2013 Franco "Sensei" Milicchio. All rights reserved.
 *
 *  \license BSD Licensed.
 */

#include <iostream>
#include <fstream>
#include <memory>
#include <unordered_map>

#include <boost/program_options.hpp>
#include <boost/filesystem.hpp>

#include "libseq/timer.h"
#include "libseq/parser.h"
#include "libseqdb/seqdb.h"

#include "assembler/dbparser.hpp"
#include "assembler/lmf_graph.hpp"


//! \brief Shorthand namespace
namespace po = boost::program_options;

//! \brief Current program name
const char* prg_name = "assembly";

//! \brief Program options: help
const char* opt_help  = "help";
const char* opt_file  = "file";
const char* opt_clean = "clean";
const char* opt_kmers = "kmers";
const char* opt_graph = "graph";
const char* opt_size  = "size";

//! \brief Parser testing
int main(int argc, char* argv[])
{
    seq::timer  timer;
    
    // Command line arguments
    std::string input, csv, dot;
    std::size_t size;
    
    timer.start("Main program");
    
    // All known options
    po::options_description desc("Allowed options");
    desc.add_options()
        (opt_help, "Show this help message")
        (opt_file, po::value<std::string>(&input)->required(), "Set the input file name (SeqDB) **REQUIRED**")
        (opt_clean, "Clean the DNA sequences, NOT INCLUDED NOW")
        (opt_kmers, po::value<std::string>(&csv), "Dump kmers and frequencies into a CSV file")
        (opt_graph, po::value<std::string>(&dot), "Dump the de Bruijn graph into a graphviz file")
        (opt_size, po::value<std::size_t>(&size), "Use preallocated hash table of a user-defined size")
    ;
    
    // Map of variables
    po::variables_map vm;

    // Command line parsing
    try
    {
        // Parse the command line
        po::store(po::command_line_parser(argc, argv).options(desc).run(), vm);

        // Show command options
        if (vm.count(opt_help))
        {
            std::cout << "Usage: " << prg_name << " [options]\n";
            std::cout << desc;
            return 0;
        }

        po::notify(vm);
    }
    catch(std::exception &e)
    {
        // Upon error on command line
        std::cout << prg_name << ": An error has occurred in parsing the command line options. Try --help." << std::endl << std::endl;
        std::cout << "The problem is: " << e.what() << std::endl << std::endl << std::endl;
        
        return 1;
    }

    // ===============
    // ==== START ====
    // ===============

    // Print some statistics
    std::cout << ">>> DNA read SeqDB archive:  " << input << std::endl;
    std::cout << ">>> Selected kmer length:    " << kmertype().size() << std::endl;
    std::cout << ">>> Number of bits per kmer: " << sizeof(kmertype) * 8 << " (" << sizeof(kmertype) << " bytes)" << std::endl;
    std::cout << ">>> Kmer property bit size:  " << sizeof(kmerprop) * 8 << " (" << sizeof(kmerprop) << " bytes)" << std::endl;
    
    bool usereverse = true;
    
    lmf_graph g(usereverse);
    
    // Preallocate
    if (vm.count(opt_size))
    {
        g.resize(size);
        std::cout << ">>> Preallocated with size:  " << g.buckets() << std::endl;
    }
    
    // Check input
    if (!boost::filesystem::exists(input))
    {
        std::cout << "ERROR" << std::endl;
        std::cout << "=====" << std::endl;
        std::cout << "File does not exists: " << input << std::endl;

        return 1;
    }
    
    // Open the file
    dbparser p(input);
    
    // Parse it
    p.parse(g);
    
    // Clean useless kmers
    if (vm.count(opt_clean)) g.cleanup();
    
    // Dump CSV
    if (vm.count(opt_kmers))
    {
        g.dump_csv(csv);
    }
    // Build the graph
    g.build();

    // Dump CSV
    if (vm.count(opt_graph))
    {
        g.dump_graph(dot);
    }

    std::cout << std::endl;
    
    // Stop the timer for the whole program
    timer.stop();
        
    return 0;
}


