#include #include #include #include #include #define MAXN 500 /* max length of genome */ #define MAXTYPES 5000 /* max number of distinct genotypes in the popn */ #define K 0 /* K in NK landscape */ #define ndfe 500 // number of fitness measurements in each dfe if DFE is sampled void transition(int seq[MAXN], int pos, int newseq[MAXN]); void transversion(int seq[MAXN], int pos, int newseq[MAXN]); void mutations(int seq[MAXN], int pos, int newseq[MAXN], int new_base); // performs the 1st, 2nd, or 3rd possible point mutation for the sequence at given position // depending on the new_base variable = 0, 1, or 2 // 0 and 1 give the two possible transversions, and 2 gives the one possible transition double fitness(int seq[MAXN], int neighs[K + 1][MAXN], double** wis); // Calculates fitness of given sequence int hamming_distance(int seq_a[MAXN], int seq_b[MAXN]); int is_tr(int x, int y); // Returns 1 if the mutation from x to y is a transition, and 2 if transversion // Returns zero if x and y are the same (no mutation) int N = 100; /* sequence length */ int ntypesave = 50; /* number of genotypes to start */ int Noftypesave = 100; // number of individuals for each genotype to start int carryingcap = 5000; void main() //(int argc, char **argv) { srand(time(NULL)); clock_t begin = clock(); int ngens = 21001; int nreps = 500; double mu = .0001; // sequence point mutation rate per GENOME per generation, not per base double ftsave = 0.9; // initial fts, freq of ts int dfeflag = 1; // this variable is 0 if we want a full DFE, and 1 if we want an observed sampled DFE int dfetimes[100] = { 0, 1000, 10000, 20000, 30000, 40000, 50000, 60000, 70000, 80000, 90000,100000 }; // times at which to write out a dfe int nextdfeindex = 0; // index of the next dfetime we will do double DFE[ndfe], tv_DFE[ndfe], ts_DFE[ndfe]; if (dfeflag == 0 && 3 * N > ndfe) { fprintf(stdout, "Too many DFE elements. Please set dfeflag to 1 instead"); exit(1); } int i, j, k, n, igen, itype, jtype, irep, nlookups, lookind, new_base, imut, imutes; int iham, trs_count, trv_count, count=0; long seed = -1; double sdfts = .3; double fts, w0; int pos, ntotal, ntypes; int neighs[K + 1][MAXN]; // epistatic neighbours in NK model int gen_jump = 15000; // generation at with the bias changes double bias_jump = 0.5; // bias to which we jump at gen_jump double mu_jump = mu * 200; // mutation rate to which we jump at gen_jump double fraction = 0.05; // fraction of population to change bias and/or mutation rate int** seqs = (int**)malloc(MAXTYPES * sizeof(int*)); //sequences of genotypes that make up the popn for (i = 0; i < MAXTYPES; i++) seqs[i] = (int*)malloc(MAXN * sizeof(int)); int dfeseq[MAXN]; // temporary sequence of potential indl contributing to dfe int Noftype[MAXTYPES]; // number of individuals of each distinct genotype double mus[MAXTYPES]; // mutation rate for each genotype double ws[MAXTYPES], ftseq[MAXTYPES]; // fitness and freq of ts for each genotype double wbar, ftbar, wtmp, mubar; double nweights[MAXTYPES]; char filename[100], dfefilename[100], tr_dfefilename[100]; FILE* fpout, * fpdfe, * fpdfe_tr; float poidev(float, long*), ran1(long*); // numerical recipes in C (gaussian, poisson, uniform Random Deviate Genetators) int min_ham_dist, temp_dist, initial_ntypes, type; // To store the initial population: int** initial_seqs = (int**)malloc(MAXTYPES * sizeof(int*)); for (i = 0; i < MAXTYPES; i++) initial_seqs[i] = (int*)malloc(MAXN * sizeof(int)); int initial_Noftype[MAXTYPES]; double initial_ws[MAXTYPES], initial_ftseq[MAXTYPES]; nlookups = 4; // number of possible nucleotides in each position for (i = 1; i <= K; i++) nlookups = nlookups * 4; // nlookups is 4^(K+1), size of the array for the fitness contributions // Dynamic memory allocation for declaring potentially large arrays using pointers double** wis = (double**)malloc(nlookups * sizeof(double*)); // fitness contriution of the ith neighbour for (i = 0; i < nlookups; i++) wis[i] = (double*)malloc(MAXN * sizeof(double)); fprintf(stdout, "Running Ts:Tv, N = %d, K =%d, ntypes = %d, ngens = %d, ts = %f, mu = %f\n", N, K, ntypesave, ngens, ftsave, mu); // Algorithm starts here // A) Initialization for (irep = 0; irep < nreps; irep++) { // replicate the whole process nreps times fprintf(stdout, "\nReplicate: %d\n", irep); ntypes = ntypesave; nextdfeindex = 0; //reset min_ham_dist = 0; // make the fitness landscape first // 1- Generate wi's for (i = 0; i < nlookups; i++) for (n = 0; n < N; n++) { wis[i][n] = (rand() / (double)RAND_MAX); while (wis[i][n] >= 1) wis[i][n] = (rand() / (double)RAND_MAX); } // 2- Choose random neighbours of each position ( You can be your own neighbour here! ) if (K > 0) for (k = 0; k < K; k++) for (n = 0; n < N; n++) { neighs[k][n] = (int)(N * (rand() / (double)RAND_MAX)); while (neighs[k][n] >= 100) neighs[k][n] = (int)(N * (rand() / (double)RAND_MAX)); } // then make the initial population of random sequences for (itype = 0; itype < ntypes; itype++) { for (n = 0; n < N; n++) { seqs[itype][n] = (int)(4 * (rand() / (double)RAND_MAX)); while (seqs[itype][n] == 4) seqs[itype][n] = (int)(4 * (rand() / (double)RAND_MAX)); } /* This is a faster way ( than using ran1 ) to generate random numbers : 4 * (integer rand / greatest possible number) = 4 * uniform rand = rand in (0,4) adding (int) before makes it an integer between 0 and 3 */ // now calcuating the fitness for this itype sequence ws[itype] = fitness(seqs[itype], neighs, wis); Noftype[itype] = Noftypesave; ftseq[itype] = ftsave; mus[itype] = mu; } // end loop, ntypes genotypes have been created, with Noftypesave individual each // here we give them all the same starting freq of ts // test opening the file before doing any more computation sprintf_s(filename, 100, "data/N%d_K%d_bias%f_mu%f_%d.txt", N, K, (float)ftsave, (float)mu, irep); int pout = fopen_s(&fpout, filename, "w"); if (pout != 0) { fprintf(stdout, "Unable to open output file\n"); exit(1); } sprintf_s(dfefilename, 100, "data/dfe_N%d_K%d_bias%f_mu%f_%d.txt", N, K, (float)ftsave, (float)mu, irep); int pdfe = fopen_s(&fpdfe, dfefilename, "w"); if (pdfe != 0) { fprintf(stdout, "Unable to open dfe file\n"); exit(1); } fprintf(fpdfe, "igen fben fdel s+ s- w min_ham_dist trs_count trv_count \n"); sprintf_s(tr_dfefilename, 100, "data/ts_tv_dfe_N%d_K%d_bias%f_mu%f_%d.txt", N, K, (float)ftsave, (float)mu, irep); int pdfe_tr = fopen_s(&fpdfe_tr, tr_dfefilename, "w"); if (pdfe_tr != 0) { fprintf(stdout, "Unable to open TR dfe file\n"); exit(1); } fprintf(fpdfe_tr, "igen ts_fben ts_fdel ts_s+ ts_s- tv_fben tv_fdel tv_s+ tv_s- w \n"); // B) Simulation: (Time is starting!) for (igen = 0; igen < ngens; igen++) { // loop over ngens generations // first compute total number of individuals and mean fitness double wwsum = 0; ntotal = 0; for (itype = 0; itype < ntypes; itype++) { wwsum += ws[itype] * Noftype[itype]; // fitness sum is weighted by the number of individuals in each type ntotal += Noftype[itype]; } wbar = wwsum / ntotal; if (igen == dfetimes[nextdfeindex]) { // make a dfe of the most common type and print it out for this generation fprintf(stdout, "Generation: %d\n", igen); nextdfeindex++; // find the most common genotype int tmpN = Noftype[0]; int bigtype = 0; for (itype = 1; itype < ntypes; itype++) if (Noftype[itype] > tmpN) { tmpN = Noftype[itype]; bigtype = itype; } // At generation 0, all genotypes have the same number of individuals // So, an inviable sequence might be chosen to write the DFE for // Instead, choose any genotype that has a nonzero fitness if (igen == 0) for (itype = 0; itype < ntypes; itype++) if (ws[itype] > 0) { bigtype = itype; break; } int dfe_length; if (dfeflag == 0) { dfe_length = 3 * N; for (pos = 0; pos < N; pos++) { // loop on all positions of the sequence for (new_base = 0; new_base < 3; new_base++) { // 3 possible mutations for each position mutations(seqs[bigtype], pos, dfeseq, new_base); int j = pos * 3 + new_base; DFE[j] = fitness(dfeseq, neighs, wis) / ws[bigtype]; // this is the relative fitness (1+s) } } } else { dfe_length = ndfe; for (j = 0; j < ndfe; j++) { pos = (int)(N * (rand() / (double)RAND_MAX)); // choose a random position in the sequence to introduce mutation while (pos >= N) pos = (int)(N * (rand() / (double)RAND_MAX)); if ((rand() / (double)RAND_MAX) < ftseq[bigtype]) transition(seqs[bigtype], pos, dfeseq); else transversion(seqs[bigtype], pos, dfeseq); DFE[j] = fitness(dfeseq, neighs, wis) / ws[bigtype]; // this is the relative fitness (1+s) } // end loop on j entries of the dfe } // Now, find beneficial and deleterious fractions in the DFE and write them to the file double fben = 0, fdel = 0, s_pos = 0, s_neg = 0; for (j = 0; j < dfe_length; j++) { if (DFE[j] > 1) { fben += 1; s_pos += DFE[j] - 1; } else { fdel += 1; s_neg += DFE[j] - 1; } } if (fben == 0) s_pos = 0; else s_pos /= fben; if (fdel == 0) s_neg = 0; else s_neg /= fdel; fben /= dfe_length; fdel /= dfe_length; // Find minimum hamming distance between the most common type and the initial population // except at generations 0 and 1 iham = 0; if (igen > 1) { min_ham_dist = hamming_distance(seqs[bigtype], initial_seqs[0]); for (itype = 1; itype < initial_ntypes; itype++) { temp_dist = hamming_distance(seqs[bigtype], initial_seqs[itype]); if (temp_dist < min_ham_dist) { min_ham_dist = temp_dist; iham = itype; } } } // Find the number of transitions and transversions among the minimum mutations found above trs_count = 0; trv_count = 0; if (igen > 1) { for (int i = 0; i < N; i++) { if (is_tr(seqs[bigtype][i], initial_seqs[iham][i]) == 1) trs_count += 1; else if (is_tr(seqs[bigtype][i], initial_seqs[iham][i]) == 2) trv_count += 1; } } fprintf(fpdfe, "%d %f %f %f %f %f %f %f %f\n", igen, (float)fben, (float)fdel, (float)s_pos, (float)s_neg, (float)ws[bigtype], (float)min_ham_dist, (float)trs_count, (float)trv_count); // Find the separated transitions and transversions DFEs for the most common type int ind; int tv_dfe_length, ts_dfe_length; if (dfeflag == 0) { tv_dfe_length = 2 * N; ts_dfe_length = N; for (pos = 0; pos < N; pos++) { // loop on all positions of the sequence for (new_base = 0; new_base < 2; new_base++) { // 2 possible transversions for each position mutations(seqs[bigtype], pos, dfeseq, new_base); double wtmp = fitness(dfeseq, neighs, wis) / ws[bigtype]; // this is the relative fitness (1+s) ind = pos * 2 + new_base; tv_DFE[ind] = wtmp; } mutations(seqs[bigtype], pos, dfeseq, 2); // 1 possible transition double wtmp = fitness(dfeseq, neighs, wis) / ws[bigtype]; // this is the relative fitness (1+s) ts_DFE[pos] = wtmp; } } else { tv_dfe_length = ndfe; ts_dfe_length = ndfe; for (j = 0; j < ndfe; j++) { // transversions first int pos = (int)(N * (rand() / (double)RAND_MAX)); // choose a random position in the sequence to introduce mutation if (rand() / (double)RAND_MAX < 0.5) // choose one tv posibility or the other randomly mutations(seqs[bigtype], pos, dfeseq, 0); else mutations(seqs[bigtype], pos, dfeseq, 1); double wtmp = fitness(dfeseq, neighs, wis) / ws[bigtype]; // this is the relative fitness (1+s) tv_DFE[j] = wtmp; } // end loop on j entries of the dfe for (j = 0; j < ndfe; j++) { // Then transitions int pos = (int)(N * rand() / (double)RAND_MAX); // choose a random position in the sequence to introduce mutation mutations(seqs[bigtype], pos, dfeseq, 2); double wtmp = fitness(dfeseq, neighs, wis) / ws[bigtype]; // this is the relative fitness (1+s) ts_DFE[j] = wtmp; } // end loop on j entries of the dfe } // Now find Mean positive and negative selection coefficients, // and the fractions of beneficial and deleterious mutations double tv_fben = 0, tv_fdel = 0, ts_fben = 0, ts_fdel = 0; double tv_s_pos = 0, tv_s_neg = 0, ts_s_pos = 0, ts_s_neg = 0; for (j = 0; j < tv_dfe_length; j++) { // tv if (tv_DFE[j] > 1) { tv_fben += 1; tv_s_pos += tv_DFE[j] - 1; } else { tv_fdel += 1; tv_s_neg += tv_DFE[j] - 1; } } for (j = 0; j < ts_dfe_length; j++) { // ts if (ts_DFE[j] > 1) { ts_fben += 1; ts_s_pos += ts_DFE[j] - 1; } else { ts_fdel += 1; ts_s_neg += ts_DFE[j] - 1; } } if (tv_fben == 0) tv_s_pos = 0; else tv_s_pos /= tv_fben; if (tv_fdel == 0) tv_s_neg = 0; else tv_s_neg /= tv_fdel; if (ts_fben == 0) ts_s_pos = 0; else ts_s_pos /= ts_fben; if (ts_fdel == 0) ts_s_neg = 0; else ts_s_neg /= ts_fdel; tv_fben /= tv_dfe_length; tv_fdel /= tv_dfe_length; ts_fben /= ts_dfe_length; ts_fdel /= ts_dfe_length; fprintf(fpdfe_tr, "%d %f %f %f %f %f %f %f %f %f\n", igen, (float)ts_fben, (float)ts_fdel, (float)ts_s_pos, (float)ts_s_neg, (float)tv_fben, (float)tv_fdel, (float)tv_s_pos, (float)tv_s_neg, (float)ws[bigtype]); } // finished creating and writing out the DFEs nweights[0] = (float)Noftype[0] / ntotal; for (itype = 1; itype < ntypes; itype++) nweights[itype] = nweights[itype - 1] + (float)Noftype[itype] / ntotal; nweights[ntypes - 1] = 1; // fix any rounding errors // Pick random individuals and change their bias and/or mutation rate at one generation only (gen_jump) // The individuals are chosen from one of the types with probability proportional to the // number of individuals in each type (using the weights found previously) double nmutes; int old_ntypes; if (igen == gen_jump) { old_ntypes = ntypes; // stors the number of genotypes before adding the mutations nmutes = ntotal * fraction; if (ran1(&seed) < (nmutes - floor(nmutes))) nmutes = ceil(nmutes); else nmutes = floor(nmutes); for (imut = 0; imut < nmutes; imut++) { type = 0; float r = ran1(&seed); while (r > nweights[type]) type++; while (Noftype[type] == 0) { // if the type chosen has no more individuals, choose another one r = ran1(&seed); type = 0; while (r > nweights[type]) type++; } Noftype[type]--; //subtract mutant from its type ntypes++; // add a new type if (ntypes > MAXTYPES) { fprintf(stdout, "Too many types!\n"); fclose(fpout); exit(2); }; Noftype[ntypes - 1] = 1; // add mutant to the new type fts = bias_jump; mus[ntypes - 1] = mu_jump; ws[ntypes - 1] = ws[type]; ftseq[ntypes - 1] = fts; for (i = 0; i < N; i++) seqs[ntypes - 1][i] = seqs[type][i]; } // Now, some of the new genotypes might be repeated, so we need to join them int t; // works as a boolean test for whether two types are the same or not for (itype = old_ntypes; itype < ntypes - 1; itype++) { for (jtype = itype + 1; jtype < ntypes; jtype++) { t = 1; n = 0; if (Noftype[jtype] != 0) { if (ftseq[itype] != ftseq[jtype] || mus[itype] != mus[jtype]) t = 0; while (t == 1 && n < N) { if (seqs[itype][n] != seqs[jtype][n]) t = 0; n++; } if (t == 1) { Noftype[itype] = Noftype[itype] + Noftype[jtype]; Noftype[jtype] = 0; } } } } } // remove any types that have zero individuals for (itype = 0; itype < ntypes; itype++) { if (Noftype[itype] == 0) { jtype = ntypes - 1; while ((Noftype[jtype] == 0) && jtype > itype) { jtype--; ntypes--; } if (jtype == itype) ntypes--; else { Noftype[itype] = Noftype[jtype]; ntypes--; ws[itype] = ws[jtype]; ftseq[itype] = ftseq[jtype]; mus[itype] = mus[jtype]; for (i = 0; i < N; i++) seqs[itype][i] = seqs[jtype][i]; } } } // loop on itype, removing zero entries // each individual in the next generation is chosen according to // the freq of each type in the current generation, weighted by fitness and Ricker factor double ricker = exp(1.0 - (double)ntotal / (double)carryingcap); ntotal = 0; for (itype = 0; itype < ntypes; itype++) { float mean = (float)(Noftype[itype] * ricker * ws[itype] / wbar); Noftype[itype] = poidev((float)(Noftype[itype] * ricker * ws[itype] / wbar), &seed); ntotal += Noftype[itype]; } // After making the new generation, we allow mutation and recompute fitness // figure out how many sequence mutations for each type according to its own mutation rate // Then, apply them for (itype = 0; itype < ntypes; itype++) { nmutes = mus[itype] * Noftype[itype]; // This will be a floating point number (n+f) // we ceil or floor randomly in the next two lines with prob=f if (ran1(&seed) < (nmutes - floor(nmutes))) nmutes = ceil(nmutes); else nmutes = floor(nmutes); for (imutes = 0; imutes < nmutes; imutes++) { Noftype[itype]--; //subtract mutant from its type ntypes++; // add a new type if (ntypes > MAXTYPES) { fprintf(stdout, "Too many types!\n"); fclose(fpout); exit(2); }; Noftype[ntypes - 1] = 1; // add mutant to the new type // a point mutation occurs in the sequence at a random position pos = (int)(N * (rand() / (double)RAND_MAX)); while (pos >= N) pos = (int)(N * (rand() / (double)RAND_MAX)); if ((rand() / (double)RAND_MAX) < ftseq[itype]) transition(seqs[itype], pos, seqs[ntypes - 1]); else transversion(seqs[itype], pos, seqs[ntypes - 1]); ws[ntypes - 1] = fitness(seqs[ntypes - 1], neighs, wis); ftseq[ntypes - 1] = ftseq[itype]; mus[ntypes - 1] = mus[itype]; } // finish loop on imutes } // finish loop on types // remove any types that have zero individuals for (itype = 0; itype < ntypes; itype++) { if (Noftype[itype] == 0) { jtype = ntypes - 1; while ((Noftype[jtype] == 0) && jtype > itype) { jtype--; ntypes--; } if (jtype == itype) ntypes--; else { Noftype[itype] = Noftype[jtype]; ntypes--; ws[itype] = ws[jtype]; ftseq[itype] = ftseq[jtype]; mus[itype] = mus[jtype]; for (i = 0; i < N; i++) seqs[itype][i] = seqs[jtype][i]; } } } // loop on itype, removing zero entries // compute some summary statistics for the output file, written each generation ntotal = 0; ftbar = 0; wbar = 0; mubar = 0; for (itype = 0; itype < ntypes; itype++) { ntotal += Noftype[itype]; ftbar += ftseq[itype] * Noftype[itype]; wbar += ws[itype] * Noftype[itype]; mubar += mus[itype] * Noftype[itype]; } // find the most common genotype int tmpN = Noftype[0]; int bigtype = 0; for (itype = 1; itype < ntypes; itype++) if (Noftype[itype] > tmpN) { tmpN = Noftype[itype]; bigtype = itype; } fprintf(fpout, "%d %d %f %f %f %f %f\n", ntotal, ntypes, (float)ftbar / ntotal, (float)ftseq[bigtype], (float)wbar / ntotal, (float)mubar / ntotal, (float)mus[bigtype]); if (ntotal == 0) { fprintf(stdout, "Population went extinct at generation %d\n", igen); igen = ngens + 1; } // Store initial population at generation 1 if (igen == 1) { for (i = 0; i < ntypes; i++) { for (n = 0; n < N; n++) initial_seqs[i][n] = seqs[i][n]; initial_ws[i] = ws[i]; initial_ftseq[i] = ftseq[i]; initial_Noftype[i] = Noftype[i]; } initial_ntypes = ntypes; } // Count this replicate if the mutator has fixed after 5000 generations of its emergence if (igen==gen_jump+5000 && mus[bigtype]!=mu) count += 1; } // loop on igen, a generation has been completed fclose(fpout); fclose(fpdfe); fclose(fpdfe_tr); } // loop on irep fprintf(stdout, "nreps= %d, K =%d, bias = %f, F= %f, new bias= %f, count= %d\n", nreps, K, ftsave, mu_jump/mu, bias_jump , count); printf("Hello, World!"); clock_t end = clock(); double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; fprintf(stdout, "Time spent: %f\n", time_spent); } void transition(int seq[MAXN], int pos, int newseq[MAXN]) // (original sequence, position to mutate, returns new sequence) { for (int i = 0; i < N; i++) newseq[i] = seq[i]; switch (seq[pos]) { case 0: newseq[pos] = 1; break; case 1: newseq[pos] = 0; break; case 2: newseq[pos] = 3; break; case 3: newseq[pos] = 2; break; } } void transversion(int seq[MAXN], int pos, int newseq[MAXN]) { for (int i = 0; i < N; i++) newseq[i] = seq[i]; switch (seq[pos]) { case 0: if ((rand() / (double)RAND_MAX) < 0.5) newseq[pos] = 2; else newseq[pos] = 3; break; case 1: if ((rand() / (double)RAND_MAX) < 0.5) newseq[pos] = 2; else newseq[pos] = 3; break; case 2: if ((rand() / (double)RAND_MAX) < 0.5) newseq[pos] = 0; else newseq[pos] = 1; break; case 3: if ((rand() / (double)RAND_MAX) < 0.5) newseq[pos] = 0; else newseq[pos] = 1; break; } } void mutations(int seq[MAXN], int pos, int newseq[MAXN], int new_base) // performs the 1st, 2nd, or 3rd possible point mutation for the sequence at given position // depending on the new_base variable = 0, 1, or 2 // 0 and 1 give the two possible transversions, and 2 gives the one possible transition { for (int i = 0; i < N; i++) newseq[i] = seq[i]; switch (seq[pos]) { case 0: if (new_base == 0) newseq[pos] = 2; else if (new_base == 1) newseq[pos] = 3; else newseq[pos] = 1; break; case 1: if (new_base == 0) newseq[pos] = 2; else if (new_base == 1) newseq[pos] = 3; else newseq[pos] = 0; break; case 2: if (new_base == 0) newseq[pos] = 0; else if (new_base == 1) newseq[pos] = 1; else newseq[pos] = 3; break; case 3: if (new_base == 0) newseq[pos] = 0; else if (new_base == 1) newseq[pos] = 1; else newseq[pos] = 2; break; } } double fitness(int seq[MAXN], int neighs[K + 1][MAXN], double** wis) // Calculates fitness of given sequence { double wtmp = 0; int lookind, n, k; for (n = 0; n < N; n++) { lookind = seq[n]; if (K > 0) for (k = 0; k < K; k++) lookind += seq[neighs[k][n]] * pow(4.0, (double)(k + 1)); wtmp += wis[lookind][n]; } return wtmp; } int hamming_distance(int seq_a[MAXN], int seq_b[MAXN]) { int dist = 0; for (int i = 0; i < N; i++) { if (seq_a[i] != seq_b[i]) dist += 1; } return dist; } int is_tr(int x, int y) { if (x == y) return 0; if ((x == 0 && y == 1) || (x == 1 && y == 0) || (x == 2 && y == 3) || (x == 3 && y == 2)) return 1; else return 2; }