
#include <cstring>
#include <stack>

#include "Common.h"
#include "mpi.h"

int main (int argc, char *argv[]) {

  int message_size = parse_args(argc, argv);
  MPI_Init(&argc, &argv);


  int rank, size;
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);

  enum MessageTag { wantWork, work, result, noMoreWork };
  int jobStart = 0;
  int jobEnd = vertices;

  if (rank == 0) { // MASTER
    Vector<btw_num_t> betweenness(vertices);
    betweenness.fill(0);

    Vector<btw_num_t> edgeBetweenness(edges);
    edgeBetweenness.fill(0);

    int slaves = size - 1;
    MPI_Request *dataSentRequests = new MPI_Request[slaves];
    MPI_Request *dataReceiveRequests = new MPI_Request[slaves];
    // initialize the data recv requests by MPI_REQUEST_NULL,
    // in order to be able to call MPI_Waitall in every round.
    std::fill(dataReceiveRequests,
              dataReceiveRequests + slaves,
              MPI_REQUEST_NULL);

    MPI_Request *endRequests = new MPI_Request[slaves];
    std::fill(endRequests,
              endRequests + slaves,
              MPI_REQUEST_NULL);

    MPI_Status *dataReceiveStatuses = new MPI_Status[slaves];

    int *accepted_sent_indices = new int[slaves];
    int *accepted_received_indices = new int[slaves];

    std::stack<int> workerStack;
    // NOTE: hidden because of nicer visualization
    for (size_t slave = 1; slave < size; slave++) {
      workerStack.push(slave);
    }
    // <<<

    btw_num_t **bufferOfSubBetweennesses = new btw_num_t*[slaves];
    // NOTE: hidden because of nicer visualization
    for (size_t i = 0; i < slaves; i++) {
      bufferOfSubBetweennesses[i] = new btw_num_t[vertices + edges];
    }
    // <<<

    int *work = new int[message_size];
    std::fill(work, work + message_size, 7); // prepare send data; NOTE: only first two elements are effectively used, the rest is for enlarging the message size

    while (!workerStack.empty()) {
      // ----------------------------------------------------------------------
      // Sending work to SLAVES

      while (!workerStack.empty()) {
        if (jobStart < jobEnd) {
          int slave = workerStack.top();
          int idx = slave - 1;

          // NOTE: hidden because of nicer visualization
          work[0] = jobStart;
          if (jobStart + chunkSize < jobEnd) {
            work[1] = jobStart + chunkSize;
          } else {
            work[1] = jobEnd - 1;
          }
          // <<<

          MPI_Isend(work, message_size, MPI_INT,
                    slave, MessageTag::work,
                    MPI_COMM_WORLD,
                    &dataSentRequests[idx]);

          jobStart += chunkSize;

          workerStack.pop(); // take the slave out of the stack

        } else {
          break; // no more work
        }
      }

      while (!workerStack.empty()) { // it can be possibly non-empty as the previous while may end by break
        // send the ending message to unused slaves and take them out of the stack
        int slave = workerStack.top();
        int idx = slave - 1;

        int noMoreFlag = 0;
        MPI_Isend(&noMoreFlag, 1, MPI_INT,
                  slave, MessageTag::noMoreWork,
                  MPI_COMM_WORLD,
                  &endRequests[idx]);

        // NOTE: it is necessary to set unused requests to null
        dataSentRequests[idx] = MPI_REQUEST_NULL;
        workerStack.pop();
      }

      int n_accepted_sent_requests; // NOTE: in the original version there was '= 0'.
      MPI_Waitsome(slaves, dataSentRequests,
                  &n_accepted_sent_requests, accepted_sent_indices,
                  MPI_STATUSES_IGNORE);

      // ----------------------------------------------------------------------
      // Receiving data from SLAVES

      if (n_accepted_sent_requests != MPI_UNDEFINED) {
        for (size_t i = 0; i < n_accepted_sent_requests; i++) {
          int idx = accepted_sent_indices[i];
          int slave = idx + 1; // +1 to get rank from index. Request were set at dest-1 position.

          MPI_Irecv(bufferOfSubBetweennesses[idx], vertices + edges, MPI_TYPE,
                    slave, MessageTag::work,
                    MPI_COMM_WORLD,
                    &dataReceiveRequests[idx]);
        }
      }

      int n_accepted_recvd_requests;
      MPI_Waitsome(slaves, dataReceiveRequests,
                    &n_accepted_recvd_requests, accepted_received_indices,
                    dataReceiveStatuses);

      if (n_accepted_recvd_requests != MPI_UNDEFINED) {
        for (size_t i = 0; i < n_accepted_recvd_requests; i++) {
          int slave = dataReceiveStatuses[i].MPI_SOURCE;
          int idx = slave - 1;

          // NOTE: hidden because of nicer visualization
          for (size_t j = 0; j < vertices; j++) {
            betweenness.data[j] = betweenness.data[j] + bufferOfSubBetweennesses[idx][j];
          }

          for (size_t j = 0; j < edges; j++) {
            edgeBetweenness.data[j] = edgeBetweenness.data[j] + bufferOfSubBetweennesses[idx][j + vertices];
          }
          // <<<

          workerStack.push(slave);
        }
      }
    }

    delete[] dataSentRequests;
    delete[] dataReceiveRequests;
    delete[] dataReceiveStatuses;
    delete[] accepted_sent_indices;
    delete[] accepted_received_indices;

    MPI_Waitall(slaves, endRequests, MPI_STATUSES_IGNORE);
    delete[] endRequests;

    for (int i = 0; i < slaves; i++) {
      delete[] bufferOfSubBetweennesses[i];
    }
    delete[] bufferOfSubBetweennesses;

  } else { // SLAVE
    Vector<btw_num_t> result(vertices + edges);

    while(true) {
      MPI_Status status;
      MPI_Message probed_msg;
      MPI_Mprobe(0, MPI_ANY_TAG, MPI_COMM_WORLD, &probed_msg, &status);

      if (status.MPI_TAG == MessageTag::work) {
        int *work = new int[message_size];
        MPI_Mrecv(work, message_size, MPI_INT, &probed_msg, MPI_STATUS_IGNORE);


        BetweennessResult subBetweenness = Calculate(work[0], work[1], rank);

        std::memcpy(result.data, subBetweenness.VertexBetweenness.data,
                    sizeof(btw_num_t) * vertices);
        std::memcpy(&result.data[vertices], subBetweenness.EdgeBetweenness.data,
                    sizeof(btw_num_t) * edges);

        MPI_Send(result.data, vertices + edges, MPI_TYPE, 0, MessageTag::work, MPI_COMM_WORLD);

        delete[] work;
      } else if (status.MPI_TAG == MessageTag::noMoreWork) {
        int data = 0;
        MPI_Mrecv(&data, 1, MPI_INT, &probed_msg, MPI_STATUS_IGNORE);
        break;
      } else {
        std::cerr << " Error occurred on rank " << rank << "\n";
      }
    }
  }

  MPI_Finalize();
  return 0;
}
