//
//  partitioner_hash.hpp
//
//  Copyright 2018 Franco Milicchio and Marco Oliva. All rights reserved.
//

#ifndef partitioner_hash_hpp
#define partitioner_hash_hpp

#include "partitioner.hpp"
#include "logger.hpp"
#include "fastq_mmap.hpp"
#include "file_mmap.hpp"
#include "file_std.hpp"
#include "accelerator.hpp"

namespace libseq
{
    /// Hashing partitioner class
    class partitioner_hash : public partitioner<partitioner_hash>
    {
    
    public:
    
        /// Actual name of the class
        static constexpr char name_[] = "partitioner_hash";
        
        /// List of output partitions
        std::vector<path> partitions_;
        
        /// Statistics
        partitioner_stats stats_;
        
        /// Core method, partition the input file according to the hash value of each kmer
        template <typename AcceleratorImplementation, typename AcceleratorStorageType>
        void run_(accelerator<AcceleratorImplementation, AcceleratorStorageType>& accelerator, path input_file, std::size_t k,
                std::size_t np)
        {
            // non human readable file
            path base_dir(input_file.file_path());
            path out_file_path = base_dir.append("out_file_1_of_20.bin");
            file_mmap bin_file(out_file_path);
            
            //human readable file
            std::ofstream human_file;
            human_file.open (out_file_path.absolute_path() + ".txt");
            
            
            // input file
            fastq_mmap fastq(input_file.absolute_path());

            // iterate over the input file
            auto it = fastq.begin();
            std::size_t reads = 0;

            while (it != fastq.end())
            {
                // get read from iterator
                auto read = std::get<1>(it.properties());
                ++it;
                stats_.n_reads++;
    
                // kmerize and partitioning
                std::size_t nkmers;
                read.length() < k ? nkmers = 0 : nkmers = read.length() - k + 1;
                
                for (std::size_t i = 0; i < nkmers; i++)
                {
                    stats_.n_kmers++;
                    
                    auto svk  = std::string_view(read.data() + i, k);
                    auto kmer = accelerator.to_forward(svk);
                    
                    auto hash = accelerator.hash(kmer) % np;
                    
                    if (hash == 0)
                    {
                        //non human readable
                        bin_file.append(kmer);
                        
                        //human readble
                        human_file << svk << std::endl;
                        
                    }
                }
                
            }
            
            bin_file.close();
            human_file.close();
        }
    };
}

#endif //partitioner_hash_hpp








#if 0
template <typename PartitionType, typename Kmer>
struct ScrumblePartitioner {

private:
    
    static void _process_file(mio::mmap_source& file, seq::indexed_fq<reader_type>& ifq,
    std::vector<PartitionType*>& parts,
    int thread_id) {
        
        std::size_t read_processed = 0;
        std::size_t index_size = ifq.index.size();
        seq::index_entry ie;
        std::size_t hash = 0;
        
        Kmer kmer;
        Kmer min;
        std::size_t n_of_parts = parts.size();
        
        unsigned int max_size = kmer.size();
        while (ifq.get_next_entry(ie)) {
            std::size_t pos = ie.offset;
            char c;
            if (ie.size >= max_size) {
                kmer.get() = 0;  min.get()  = 0;
                
                for (int i = 0; i<max_size; i++) {
                    c = file[pos++];
                    seq::container_traits<Kmer>::fromChar(kmer, c, i);
                }
                seq::container_traits<Kmer>::rev_for_min(min, kmer);
                hash = seq::container_traits<Kmer>::hash(min);
                parts[hash % n_of_parts]->write(min.get());
                
                while (pos < (ie.offset + ie.size)) {
                    c = file[pos++];
                    seq::container_traits<Kmer>::next_kmer(kmer, kmer, c);
                    seq::container_traits<Kmer>::rev_for_min(min, kmer);
                    hash = seq::container_traits<Kmer>::hash(min);
                    parts[hash % n_of_parts]->write(min.get());
                }
            }
            
            read_processed++;
            if (read_processed % 10000 == 0) {
                std::cout << "[Partitioning] ";
                std::cout << " thread " << thread_id << ": ";
                double perc = read_processed;
                perc = (perc / index_size) * 100;
                std::cout << perc << "%" << std::endl;
            };
        }
        
    }

public:
    
    void operator()(command_line_options& opts, statistics& stats,
    std::vector<PartitionType*>& partitions) {
        
        std::string file_path = opts.file_path;
        
        seq::indexed_fq<reader_type> ifq(file_path.c_str());
        std::error_code error;
        mio::mmap_source file = mio::make_mmap_source(file_path, error);
        if (error) { std::cout << error.message() << std::endl; }
        
        //Time statistics
        auto start = std::chrono::high_resolution_clock::now();
        
        std::size_t found;
        found = file_path.find_last_of("/\\");
        
        //TODO: replace with std::filesystem
        std::string dir_path = file_path.substr(0,found);
        dir_path += "/partitions";
        int err = 0;
#ifdef _WIN32
        err = _mkdir(dir_path.c_str());
#else // POSIX
        err = mkdir(dir_path.c_str(), 0700);
#endif
        if (err != 0) { std::cout << "Error while creating dir: " << dir_path << std::endl; }
        
        
        //Partitions paths
        std::vector<std::string> paths;
        
        //Input file
        std::cout << "[Partitioning] ";
        std::cout << "Reading: " << file_path << std::endl;
        
        
        std::size_t file_size = file.size();
        stats.estimated_number_of_kmer = (file_size / 2) - 31 + 1;
        std::cout << "Estimated number of kmers: " << stats.estimated_number_of_kmer << std::endl;
        
        
        
        //Number of partitions
        std::size_t number_of_partitions = (stats.estimated_number_of_kmer * sizeof(typename Kmer::storage_type))
        / ((opts.mem_size * 1000000) / opts.threads_counting);
        if (number_of_partitions == 0) number_of_partitions = 1;
        number_of_partitions = number_of_partitions * 2;
        if (number_of_partitions > 500) {std::cout << number_of_partitions<< std::endl; number_of_partitions = 500;}
        std::cout << "[Partitioning] ";
        std::cout << "Number of partitions: " << number_of_partitions << std::endl;
        
        stats.estimated_partition_size =  (stats.estimated_number_of_kmer / number_of_partitions) * sizeof(Kmer);
        //Adding 20%
        stats.estimated_partition_size += (stats.estimated_partition_size * 0.1);
        
        //Initialize partitions
        for (int p = 0; p < number_of_partitions; p++) {
            std::string partition_name = _get_partition_name(dir_path,p);
            partitions.push_back(new PartitionType(partition_name, stats.estimated_partition_size));
            paths.push_back(partition_name);
        }
        
        std::vector<std::thread> workers(opts.threads_partitioning);
        for (int i = 0; i < opts.threads_partitioning; i++) {
            workers[i] = std::thread(_process_file, std::ref(file), std::ref(ifq), std::ref(partitions), i);
        }
        
        for (int i = 0; i < opts.threads_partitioning; i++) {
            workers[i].join();
            std::cout << "\tWorker Thread[" << i << "]: done"<< std::endl;
        }
        
        auto finish = std::chrono::high_resolution_clock::now();
        stats.partitioning = finish - start;
        stats.number_of_reads = ifq.index.size();
        
    }
    
    
};
#endif
