#include "filter.h"
#include <numeric>
#include <random>
#include <algorithm>


/* =========================================== */
SCHEME::SCHEME(RNA& rna0) {
	seqComb = rna0.seqComb;
	idGroup0 = rna0.idGroup0;
	idGroup1 = rna0.idGroup1;

	size_t len, seqStartID;
	sizeVec tempVec{};
	for (size_t i = 0; i < rna0.seqIdx0.size(); ++i) {
		seqStartID = rna0.seqIdx0[i];
		len = idGroup1[seqStartID] - seqStartID;
		if (len > 1) {
			tempVec.resize(len);
			std::iota(tempVec.begin(), tempVec.end(), seqStartID);
			cutMix.insert(cutMix.end(), tempVec.begin(), tempVec.end());
		}
	}
	//std::shuffle(cutMix.begin(), cutMix.end(), 
	//	std::default_random_engine(0)); // seed = 0
	std::random_device rd;
	std::mt19937 g(rd());
	std::shuffle(cutMix.begin(), cutMix.end(), g);

	if (!cutMix.empty())
		cutMixEnd = cutMix.size() - 1;
}



/* =========================================== */
int SCHEME::cutSeq(size_t iNowCut) {
	if (iNowCut == idGroup0[iNowCut]) { // FIRST letter is cut
		for (auto it = idGroup0.begin() + iNowCut + 1; it <= idGroup0.begin() + idGroup1[iNowCut]; ++it)
			*it = iNowCut + 1;
		idGroup1[iNowCut] = iNowCut;
		newSeq = seqComb.substr(iNowCut + 1, idGroup1[iNowCut + 1] - iNowCut);
		return 1;
	}
	else {
		if (iNowCut == idGroup1[iNowCut] - 1) { // LAST letter is cut
			idGroup0[iNowCut + 1] = iNowCut + 1;
			for (auto it = idGroup1.begin() + idGroup0[iNowCut]; it <= idGroup1.begin() + iNowCut; ++it)
				*it = iNowCut;
			newSeq = seqComb.substr(idGroup0[iNowCut], iNowCut - idGroup0[iNowCut] + 1);
			return 2;
		}
		else { // cut between FIRST and LAST
			for (auto it = idGroup0.begin() + iNowCut + 1; it <= idGroup0.begin() + idGroup1[iNowCut]; ++it)
				*it = iNowCut + 1;
			for (auto it = idGroup1.begin() + idGroup0[iNowCut]; it <= idGroup1.begin() + iNowCut; ++it)
				*it = iNowCut;
			newSeq = seqComb.substr(idGroup0[iNowCut], iNowCut - idGroup0[iNowCut] + 1);
			newSeq2 = seqComb.substr(iNowCut + 1, idGroup1[iNowCut + 1] - iNowCut);
			return 3;
		}
	}
}
/* =========================================== */
int SCHEME::initSche() {
	scheID.emplace_back(0);
	sche.emplace_back(cutMix[0]);
	return cutSeq(cutMix[0]);
}



/* =========================================== */
void SCHEME::restore(size_t iCut) {
	for (auto it = idGroup0.begin() + iCut + 1; it <= idGroup0.begin() + idGroup1[iCut + 1]; ++it)
		*it = idGroup0[iCut];
	for (auto it = idGroup1.begin() + idGroup0[iCut]; it <= idGroup1.begin() + iCut; ++it)
		*it = idGroup1[iCut + 1];
}
/* =========================================== */
size_t SCHEME::nextValid(size_t iNowCut) {
	if (iNowCut == cutMixEnd)
		return 0;
	size_t next = iNowCut + 1;
	while (idGroup1[cutMix[next]] - idGroup0[cutMix[next]] < 2) {
		if (next == cutMixEnd)
			return 0;
		next++;
	}
	return next;
}
/* =========================================== */
int SCHEME::nextSche_skip() {
	size_t iNextCut = 0, iNowCut;
	while (iNextCut == 0) {
		if (scheID.empty())
			return 0;
		iNowCut = scheID.back();
		scheID.pop_back();
		restore(sche.back());
		sche.pop_back();
		iNextCut = nextValid(iNowCut);
	}
	scheID.emplace_back(iNextCut);
	sche.emplace_back(cutMix[iNextCut]);
	return cutSeq(cutMix[iNextCut]);
}
/* =========================================== */
int SCHEME::nextSche_further() {
	size_t iNowCut = scheID.back();
	size_t iNextCut = nextValid(iNowCut);
	while (iNextCut == 0) {
		if (scheID.empty())
			return 0;
		iNowCut = scheID.back();
		scheID.pop_back();
		restore(sche.back());
		sche.pop_back();
		iNextCut = nextValid(iNowCut);
	}
	scheID.emplace_back(iNextCut);
	sche.emplace_back(cutMix[iNextCut]);
	return cutSeq(cutMix[iNextCut]);
}



/* =========================================== */
bool find_x_in_V(size_t x, sizeVec& V) {
	for (size_t i = 0; i < V.size(); ++i) {
		if (V[i] == x) {
			return true;
		}
	}
	return false;
}
/* =========================================== */
void SCHEME::saveScheUniq(size_t i0, size_t i1) {

	// if no identical scheme is found
	size_t len = sche.size();
	sizeVec id{};
	for (size_t i = 0; i < scheSAVE.size(); ++i) {
		if (len < scheSAVE[i].size()) {

			for (size_t& j : sche) {
				if (!find_x_in_V(j, scheSAVE[i])) {
					id.emplace_back(i);
					break;
				}
			}

		}
		else {
			id.emplace_back(i);
		}
	}

	if (id.size() < scheSAVE.size()) { // filter longer-repeated schemes
		len = id.size();
		for (size_t i = 0; i < len; ++i) {
			scheSAVE[i] = scheSAVE[id[i]];
			dupID0SAVE[i] = dupID0SAVE[id[i]];
			dupID1SAVE[i] = dupID1SAVE[id[i]];
		}
		scheSAVE.resize(len);
		dupID0SAVE.resize(len);
		dupID1SAVE.resize(len);
	}

	scheSAVE.emplace_back(sche);
	dupID0SAVE.emplace_back(i0);
	dupID1SAVE.emplace_back(i1);
}



/* =========================================== */
bool SCHEME::findDup_partial(std::string& seqToCompare, size_t id0, size_t id1) {
	bool dupFound = false;
	size_t len, i = 0;
	while (i < id0) {
		len = idGroup1[i] - idGroup0[i] + 1;
		if (seqToCompare.size() == len) {
			if (seqComb.substr(idGroup0[i], idGroup1[i] - idGroup0[i] + 1).compare(seqToCompare) == 0) {
				saveScheUniq(id0, id1);
				dupFound = true;
				break;
			}
		}
		i = idGroup1[i] + 1;
	}

	if (!dupFound) {
		i = id1 + 1;
		while (i < idGroup0.size()) {
			len = idGroup1[i] - idGroup0[i] + 1;
			if (seqToCompare.size() == len) {
				if (seqComb.substr(idGroup0[i], idGroup1[i] - idGroup0[i] + 1).compare(seqToCompare) == 0) {
					saveScheUniq(id0, id1);
					dupFound = true;
					break;
				}
			}
			i = idGroup1[i] + 1;
		}
	}

	return dupFound;
}
/* =========================================== */
bool SCHEME::findDup(int cutCase) {
	bool dupFound = false;
	size_t scheBack = sche.back();
	if (cutCase == 1) {
		dupFound = findDup_partial(newSeq, scheBack + 1, idGroup1[scheBack + 1]);
	}
	else if (cutCase == 2) {
		dupFound = findDup_partial(newSeq, idGroup0[scheBack], scheBack);
	}
	else { // cutCase == 3
		if (newSeq.size() == newSeq2.size()) {
			if (newSeq.compare(newSeq2) == 0) {
				saveScheUniq(idGroup0[scheBack], scheBack);
				dupFound = true;
			}
		}
		if (!dupFound)
			dupFound = findDup_partial(newSeq, idGroup0[scheBack], scheBack);
		if (!dupFound)
			dupFound = findDup_partial(newSeq2, scheBack + 1, idGroup1[scheBack + 1]);
	}

	return dupFound;
}

