diff -r 4d24bebd6734 src/centrimo.c --- a/src/centrimo.c Mon Apr 08 15:45:07 2019 -0700 +++ b/src/centrimo.c Wed Jul 31 14:48:07 2019 -0400 @@ -152,6 +152,7 @@ MOTIF_T* motif; ARRAYLST_T *windows; RBTREE_T* seq_ids; + RBTREE_T* neg_seq_ids; // how many tests have been done? int n_tests; // how many bins could this motif land in @@ -209,6 +210,7 @@ stats = (MOTIF_STATS_T*)s; arraylst_destroy(destroy_window, stats->windows); if (stats->seq_ids) rbtree_destroy(stats->seq_ids); + if (stats->neg_seq_ids) rbtree_destroy(stats->neg_seq_ids); memset(stats, 0, sizeof(MOTIF_STATS_T)); free(stats); } @@ -926,12 +928,28 @@ } /************************************************************************* + * Output JSON data for input sequences. + *************************************************************************/ +static void json_output_sequences(JSONWR_T* json, const char* name, + SEQ_T **sequences, int seqN) { + jsonwr_property(json, name); + jsonwr_start_array_value(json); + + int i; + for (i = 0; i < seqN; i++) { + jsonwr_str_value(json, get_seq_name(sequences[i])); + } + + jsonwr_end_array_value(json); +} + +/************************************************************************* * Setup the JSON writer and output a lot of pre-calculation data *************************************************************************/ static void start_json(CENTRIMO_OPTIONS_T* options, int argc, char** argv, - ARRAY_T* bg_freqs, SEQ_T** sequences, int seqN, int seq_skipped, int neg_seqN, - int neg_seq_skipped, ARRAYLST_T* dbs, int motifN, int seqlen, - HTMLWR_T** html_out, JSONWR_T** json_out) { + ARRAY_T* bg_freqs, SEQ_T** sequences, SEQ_T** neg_sequences, int seqN, + int seq_skipped, int neg_seqN, int neg_seq_skipped, ARRAYLST_T* dbs, + int motifN, int seqlen, HTMLWR_T** html_out, JSONWR_T** json_out) { int i; MOTIF_DB_T* db; HTMLWR_T *html; @@ -977,6 +995,7 @@ jsonwr_bool_prop(json, "local", options->local); jsonwr_dbl_prop(json, "ethresh", options->evalue_thresh); jsonwr_bool_prop(json, "noseq", options->noseq); + jsonwr_bool_prop(json, "neg_sequences", options->neg_sequences); jsonwr_bool_prop(json, "mcc", options->mcc); jsonwr_end_object_value(json); // output description @@ -1021,15 +1040,15 @@ jsonwr_end_object_value(json); } jsonwr_end_array_value(json); + if (!options->noseq) { - // output the sequences ID - jsonwr_property(json, "sequences"); - jsonwr_start_array_value(json); - for (i = 0; i < seqN; i++) { - jsonwr_str_value(json, get_seq_name(sequences[i])); - } - jsonwr_end_array_value(json); + json_output_sequences(json, "sequences", sequences, seqN); + + if (options->neg_sequences) { + json_output_sequences(json, "neg_sequences", neg_sequences, neg_seqN); + } } + // start the motif array jsonwr_property(json, "motifs"); jsonwr_start_array_value(json); @@ -1039,6 +1058,22 @@ } /************************************************************************* + * Output JSON data for a set of sequence indices, per motif. + *************************************************************************/ +static void json_output_seq_indices(JSONWR_T* json, RBTREE_T* seq_ids, + const char* name, bool store_sequences) { + jsonwr_property(json, name); + jsonwr_start_array_value(json); + if (store_sequences) { + RBNODE_T *seq; + for (seq = rbtree_first(seq_ids); seq; seq = rbtree_next(seq)) { + jsonwr_lng_value(json, (long)(*((int*)rbtree_key(seq)))); + } + } + jsonwr_end_array_value(json); +} + +/************************************************************************* * Output JSON data for a motif. *************************************************************************/ static void output_motif_json( @@ -1051,7 +1086,6 @@ MOTIF_T *motif; MATRIX_T *freqs; int i, j, mlen, asize, end, index; - RBNODE_T *seq; motif = stats->motif; freqs = get_motif_freqs(motif); asize = alph_size_core(get_motif_alph(motif)); @@ -1096,14 +1130,13 @@ } jsonwr_end_array_value(json); } - jsonwr_property(json, "seqs"); - jsonwr_start_array_value(json); - if (store_sequences) { - for (seq = rbtree_first(stats->seq_ids); seq; seq = rbtree_next(seq)) { - jsonwr_lng_value(json, (long)(*((int*)rbtree_key(seq)))); - } + + json_output_seq_indices(json, stats->seq_ids, "seqs", store_sequences); + + if (negative_sequences) { + json_output_seq_indices(json, stats->neg_seq_ids, "neg_seqs", store_sequences); } - jsonwr_end_array_value(json); + jsonwr_property(json, "peaks"); // There are several possible peaks for LocoMo output jsonwr_start_array_value(json); for (index = 0; index < arraylst_size(stats->windows); index++) { @@ -1830,20 +1863,28 @@ motif_stats->db = db; motif_stats->motif = motif; motif_stats->windows = best_windows; + if (!options->noseq) { win_stats = arraylst_get(0, best_windows); + motif_stats->seq_ids = all_sequences_in_window(win_stats->center, win_stats->spread, best_score_thresh, pve_scores); + + if (options->neg_sequences) { + motif_stats->neg_seq_ids = all_sequences_in_window(win_stats->center, + win_stats->spread, best_score_thresh, neg_scores); + } } + motif_stats->n_tests = n_tests; motif_stats->n_bins = n_bins; motif_stats->score_threshold = options->optimize_score ? best_score_thresh : score_thresh; motif_stats->sites = best_total_sites; motif_stats->neg_sites = best_neg_total_sites; - return motif_stats; - + return motif_stats; } // calculate_best_windows + /************************************************************************* * Allocates memory for a best sites buffer *************************************************************************/ @@ -2064,8 +2105,9 @@ // open output files sites_file = start_centrimo_sites(&options); //text_out = start_centrimo_text(&options); - start_json(&options, argc, argv, bg_freqs, sequences, seqN, seq_skipped, neg_seqN, - neg_seq_skipped, dbs, motifN, seqlen, &html, &json); + start_json(&options, argc, argv, bg_freqs, sequences, neg_sequences, + seqN, seq_skipped, neg_seqN, neg_seq_skipped, dbs, motifN, + seqlen, &html, &json); // initialize local variables create_buffers(&options, &buffers, seqlen, seqN, neg_seqN);