#include "depthFirst.h"
#include "filter.h"
#include <stack>
#include <unordered_map>
#include <numeric>
#include <algorithm>
#include <iostream>
#include <string>

/* =========================================== */
// used to check idSK, return the number of levels
size_t idSK_size(std::string& str) { // e.g. str = "0,0,1,2,1,"
	size_t N = 0; // record how many numbers in str
	for (size_t i = 0; i < str.size(); ++i) {
		if (str[i] == ',')
			N++;
	}
	return N; // e.g. return 5
}


/* =========================================== */
std::string dupLookupTabRNA_key(IDSTACK& idNow) {
	if (idNow.id.size() == 1)
		return "initial_level";

	std::string temp{};
	size_t N = 0, lenNeed = idNow.id.size() - 1;
	for (size_t i = 0; i < idNow.idStr.size(); ++i) {
		temp.push_back(idNow.idStr[i]);
		if (idNow.idStr[i] == ',')
			N++;
		if (N == lenNeed)
			return temp;
	}

	return temp;
}


/* =========================================== */
// Check whether two paths are identical.
bool isPathEquiv(std::vector<FILTERED_UNIT>& path1, std::vector<FILTERED_UNIT>& path2) {
	if (path1.size() == path2.size()) {
		sizeVec id(path2.size());
		std::iota(id.begin(), id.end(), 0);

		bool found;
		for (size_t i = 0; i < path1.size(); ++i) {
			found = false;
			for (auto it = id.begin(); it != id.end(); ++it) {
				if (path1[i].dup.compare(path2[*it].dup) == 0) {
					id.erase(it); // use *it, for the sake of speed
					found = true;
					break;
				}
			}
			if (!found)
				return false;
		}
		return true;
	}
	else {
		return false;
	}
}


/* =========================================== */
RNA renewRNA(FILTERED_UNIT& filterUnit, RNA& rna0) {
	// cut sequence first based on scheSAVE[i]
	size_t j;
	sizeVec idGroup0 = rna0.idGroup0, idGroup1 = rna0.idGroup1;
	for (size_t k = 0; k < filterUnit.sche.size(); ++k) {
		j = filterUnit.sche[k];
		if (j == idGroup0[j]) { // FIRST letter is cut
			for (auto it = idGroup0.begin() + j + 1; it <= idGroup0.begin() + idGroup1[j]; ++it)
				*it = j + 1;
			idGroup1[j] = j;
		}
		else {
			if (j == idGroup1[j] - 1) { // LAST letter is cut
				idGroup0[j + 1] = j + 1;
				for (auto it = idGroup1.begin() + idGroup0[j]; it <= idGroup1.begin() + j; ++it)
					*it = j;
			}
			else { // cut between FIRST and LAST
				for (auto it = idGroup0.begin() + j + 1; it <= idGroup0.begin() + idGroup1[j]; ++it)
					*it = j + 1;
				for (auto it = idGroup1.begin() + idGroup0[j]; it <= idGroup1.begin() + j; ++it)
					*it = j;
			}
		}
	}


	// git rid of dup-part & single-letter, and sort
	std::vector<std::string> seqVec{};
	j = 0;
	while (j < filterUnit.dupID0) {
		if (idGroup1[j] != idGroup0[j])
			seqVec.emplace_back(rna0.seqComb.substr(idGroup0[j], idGroup1[j] - idGroup0[j] + 1));
		j = idGroup1[j] + 1;
	}
	j = filterUnit.dupID1 + 1;
	while (j < idGroup0.size()) {
		if (idGroup1[j] != idGroup0[j])
			seqVec.emplace_back(rna0.seqComb.substr(idGroup0[j], idGroup1[j] - idGroup0[j] + 1));
		j = idGroup1[j] + 1;
	}
	std::sort(seqVec.begin(), seqVec.end());


	// reconstruct RNA-format
	RNA rnaNew{};
	sizeVec seqIdx1{};
	size_t idxPre;
	for (size_t i = 0; i < seqVec.size(); ++i) {
		idxPre = rnaNew.seqComb.size();
		rnaNew.seqComb.append(seqVec[i]);
		rnaNew.seqIdx0.emplace_back(idxPre);
		seqIdx1.emplace_back(rnaNew.seqComb.size() - 1);
	}

	size_t temp0, temp1;
	rnaNew.idGroup0.resize(rnaNew.seqComb.size());
	std::iota(rnaNew.idGroup0.begin(), rnaNew.idGroup0.end(), 0);
	rnaNew.idGroup1 = rnaNew.idGroup0;
	for (size_t i = 0; i < rnaNew.seqIdx0.size(); ++i) {
		temp0 = rnaNew.seqIdx0[i];
		temp1 = seqIdx1[i];
		for (auto iter = rnaNew.idGroup0.begin() + temp0; iter <= rnaNew.idGroup0.begin() + temp1; ++iter)
			*iter = temp0;
		for (auto iter = rnaNew.idGroup1.begin() + temp0; iter <= rnaNew.idGroup1.begin() + temp1; ++iter)
			*iter = temp1;
	}

	return rnaNew;
}


/* =========================================== */
std::vector<ONE_PATH> depthFirstSearch(RNA& rna0, 
	std::chrono::time_point<std::chrono::steady_clock>& startTime,
	size_t CashingSize, size_t CashingVacuumSize, size_t CashingVacuumSeqLen) {

	IDSTACK idNow{};
	sizeVec idSKMax{};
	std::vector<FILTERED_UNIT> filtered = filter(rna0, startTime, idSKMax, idNow.id);
	std::vector<ONE_PATH> pathVec{};
	if (filtered.empty())
		return pathVec;


	// display associated variables
	std::chrono::time_point<std::chrono::steady_clock>
		lastTimeDisplay = startTime, currentTime, startTimeFilter;
	std::chrono::duration<double> timeSinceLastChrono;


	// initialise Stack for depth-first searching
	// resSK, idSK, nSavedSK, dupLookupTab are main variables
	// resSK{} stores duplications in the stack.
	// idSK{} stores each searching level in stack in the format of string.
	// nSavedSK{} stores the number of steps saved for correspoding 
	//     pathways in the stack, for later checking minimum.
	// dupLookupTab{} records all FILTERED_UNIT associated with the correspoding 
	//     searching level, e.g. {"0,1,0":FILTERED_UNIT1, "1,2":FILTERED_UNIT5, ...}
	std::stack <FILTERED_UNIT> resSK{};
	std::stack <std::string> idSK{};
	std::stack <size_t> nSavedSK{};
	std::unordered_map <std::string, FILTERED_UNIT> dupLookupTabPATH;

	std::unordered_map <std::string, RNA> dupLookupTabRNA; // find mother-RNA at each level
	idSKMax = { filtered.size() - 1 };


	// initialise Stack
	std::string strTemp{};
	dupLookupTabRNA["initial_level"] = rna0;
	for (int i = static_cast<int>(idSKMax[0]); i >= 0; --i) {
		resSK.push(filtered[i]);
		strTemp = std::to_string(i) + ",";
		idSK.push(strTemp);
		nSavedSK.push(filtered[i].dupID1 - filtered[i].dupID0);
		dupLookupTabPATH[strTemp] = filtered[i];
	}


	CASHING Cashing(CashingSize, CashingVacuumSize, CashingVacuumSeqLen);
	//go through Stack
	RNA rnaThis{};
	size_t  nSavedMax = 0;

	std::vector<FILTERED_UNIT> path{};
	std::vector<std::vector<FILTERED_UNIT>> pathVec0{};
	size_t idSKsize, nSaved;
	std::string idSKsubstr, RNA_key, idNowStr;
	bool diffPath;
	while (!idSK.empty()) {// until stack is empty, then depth-first search finishes
		idNow.renew(idSK.top());
		RNA_key = dupLookupTabRNA_key(idNow);
		rnaThis = renewRNA(resSK.top(), dupLookupTabRNA[RNA_key]);
		resSK.pop();
		idSK.pop();
		nSaved = nSavedSK.top();
		nSavedSK.pop();
		idSKMax.resize(idNow.id.size());
		startTimeFilter = std::chrono::steady_clock::now();

		// cashing
		filtered = Cashing.find(rnaThis);
		if (Cashing.notExist) {
			filtered = filter(rnaThis, startTimeFilter, idSKMax, idNow.id);
			Cashing.add(rnaThis, filtered);
		}


		if (filtered.empty()) {
			// if yes, means this pathway searching finishes
			path = {};
			for (int i = 0; i < idNow.size; ++i)
				path.emplace_back(dupLookupTabPATH[idNow.from_0toi(i)]);

			// erase dupLookupTab, as idNow will never be used
			dupLookupTabPATH.erase(idNow.idStr); //erase idNow from dupLookupTab
			if (!idSK.empty()) {
				idSKsize = idSK_size(idSK.top());
				if (idNow.size > idSKsize) { // if yes, erase previous levels from dupLookupTab
					// initialise idSKsubstr from 0 to this level
					idSKsubstr = "";
					for (size_t i = 0; i < idSKsize - 1; ++i)
						idSKsubstr.append(std::to_string(idNow.id[i]) + ",");
					// erase
					for (size_t i = idSKsize - 1; i < idNow.size - 1; ++i) {
						idSKsubstr.append(std::to_string(idNow.id[i]) + ",");
						dupLookupTabPATH.erase(idSKsubstr);
						dupLookupTabRNA.erase(idSKsubstr);
					}
				}
			}


			// check if the pathway is the shortest
			if (nSaved > nSavedMax) {
				// it is the shortes, then get rid of all saved before
				nSavedMax = nSaved;
				pathVec0 = { path };
			}
			else if (nSaved == nSavedMax) {
				// if it equals to the shortes, then check whether 
				// it is a different pathway
				diffPath = true;
				for (std::vector<FILTERED_UNIT>& temp : pathVec0) {
					if (isPathEquiv(path, temp)) {
						diffPath = false;
						break;
					}
				}
				// if it is different, record; otherwise, skip
				if (diffPath)
					pathVec0.emplace_back(path);
			}


		}
		else {
			// means this pathway searching should continue, 
			// add "filtered" to resSK, to search further (go to next level)
			idSKMax.emplace_back(filtered.size() - 1);
			idNowStr = idNow.idStr;
			if (dupLookupTabRNA.find(idNowStr) == dupLookupTabRNA.end())
				dupLookupTabRNA[idNowStr] = rnaThis;

			for (int i = static_cast<int>(idSKMax.back()); i >= 0; --i) {
				resSK.push(filtered[i]);
				nSavedSK.push(nSaved + filtered[i].dupID1 - filtered[i].dupID0);
				strTemp = idNowStr + std::to_string(i) + ",";
				idNow.renew(strTemp);
				idSK.push(idNow.idStr);
				dupLookupTabPATH[idNow.idStr] = filtered[i];
			}
		}
	}


	// compact path
	bool found;
	size_t k, x = 0, NpathVec0 = pathVec0.size();
	ONE_PATH pathTemp{};
	size_t PAsaved;
	for (std::vector<FILTERED_UNIT>& pathi : pathVec0) {
		pathTemp.dups = { pathi[0].dup };
		pathTemp.dupN = { 1 };
		PAsaved = pathi[0].dup.size() - 1;


		for (size_t i = 1; i < pathi.size(); ++i) {

			PAsaved += pathi[i].dup.size() - 1;
			found = false;
			for (size_t j = 0; j < pathTemp.dups.size(); ++j) {
				if (pathTemp.dups[j].compare(pathi[i].dup) == 0) {
					k = j;
					found = true;
					break;
				}
			}
			if (found) {
				pathTemp.dupN[k]++;
			}
			else {
				pathTemp.dups.emplace_back(pathi[i].dup);
				pathTemp.dupN.emplace_back(1);
			}

		}
		pathTemp.PAIdx = rna0.seqComb.size() - rna0.seqIdx0.size() - PAsaved;
		pathVec.emplace_back(pathTemp);
	}

	return pathVec;
}