/* * CTPrune.h * * Created on: 31 May 2023 * Author: ljchang@outlook.com */ #ifndef CTPRUNE_H_ #define CTPRUNE_H_ #include <cstring> #include <algorithm> using ui = unsigned int; // vertex type using ept = unsigned int; // edge pointer type; unsigned int can be used to process upto two billion undirected edges namespace CTPrune { // orient graph void orient_graph(ui n, ept m, ui *peel_sequence, ept *pstart, ept *pend, ui *edges, ui *rid) { for(ui i = 0;i < n;i ++) rid[peel_sequence[i]] = i; for(ui i = 0;i < n;i ++) { ept &end = pend[i] = pstart[i]; for(ept j = pstart[i];j < pstart[i+1];j ++) if(rid[edges[j]] > rid[i]) edges[end ++] = edges[j]; } } // oriented triangle counting void oriented_triangle_counting(ui n, ept m, ept *pstart, ept *pend, ui *edges, ui *tri_cnt, ui *adj) { memset(adj, 0, sizeof(ui)*n); // long long cnt = 0; memset(tri_cnt, 0, sizeof(ui)*m); for(ui u = 0;u < n;u ++) { for(ept j = pstart[u];j < pend[u];j ++) adj[edges[j]] = j+1; for(ept j = pstart[u];j < pend[u];j ++) { ui v = edges[j]; for(ept k = pstart[v];k < pend[v];k ++) if(adj[edges[k]]) { ++ tri_cnt[j]; ++ tri_cnt[k]; ++ tri_cnt[adj[edges[k]]-1]; // ++ cnt; } } for(ept j = pstart[u];j < pend[u];j ++) adj[edges[j]] = 0; } } bool remove_and_shrink_oriented_tri(ui &n, ept &m, ui triangle_threshold, ui *out_mapping, ui *peel_sequence, ept *pstart, ept *pend, ui *edges, ui *tri_cnt, ui *rid, ui *degree) { for(ui i = 0;i < n;i ++) degree[i] = pstart[i+1] - pstart[i]; ept removed_edges = 0; for(ui i = 0;i < n;i ++) for(ui j = pstart[i];j < pend[i];j ++) if(tri_cnt[j] < triangle_threshold) { -- degree[i]; -- degree[edges[j]]; ++ removed_edges; } if(removed_edges <= m/4) return false; ui cnt = 0; for(ui i = 0;i < n;i ++) if(degree[i] > 0) { out_mapping[cnt] = out_mapping[i]; rid[i] = cnt++; } ui t_cnt = 0; for(ui i = 0;i < n;i ++) if(degree[peel_sequence[i]] > 0) peel_sequence[t_cnt++] = rid[peel_sequence[i]]; ui pos = 0; cnt = 0; for(ui i = 0;i < n;i ++) if(degree[i] > 0) { ui start = pstart[i]; pstart[cnt] = pos; for(ui j = start;j < pend[i];j ++) if(tri_cnt[j] >= triangle_threshold) edges[pos ++] = rid[edges[j]]; pend[cnt] = pos; pos = degree[i] + pstart[cnt]; ++ cnt; } pstart[cnt] = m = pos; n = cnt; return true; } // reorganize the adjacency lists // and sort each adjacency list to be in increasing order void reorganize_oriented_graph(ui n, ui *tri_cnt, ept *pstart, ept *pend, ept *pend2, ui *edges, ept *edges_pointer, ui *buf) { for(ui i = 0;i < n;i ++) pend2[i] = pend[i]; for(ui i = 0;i < n;i ++) { for(ept j = pstart[i];j < pend[i];j ++) { ept &k = pend2[edges[j]]; edges[k] = i; tri_cnt[k] = tri_cnt[j]; ++ k; } } for(ui i = 0;i < n;i ++) { pend2[i] = pend[i]; pend[i] = pstart[i]; } for(ui i = 0;i < n;i ++) { for(ept j = pend2[i];j < pstart[i+1];j ++) { ept &k = pend[edges[j]]; edges[k] = i; tri_cnt[k] = tri_cnt[j]; edges_pointer[k] = j; edges_pointer[j] = k; ++ k; } } ept *pointer = pend2; for(ui i = 0;i < n;i ++) { if(pend[i] == pstart[i]||pend[i] == pstart[i+1]) continue; ept j = pstart[i], k = pend[i], pos = 0; while(j < pend[i]||k < pstart[i+1]) { if(k >= pstart[i+1]||(j < pend[i]&&edges[j] < edges[k])) { buf[pos] = edges[j]; pointer[pos ++] = edges_pointer[j ++]; } else { buf[pos] = edges[k]; pointer[pos ++] = edges_pointer[k ++]; } } for(ept j = 0;j < pos;j ++) { ept idx = pstart[i]+j, k = pointer[j]; edges[idx] = buf[j]; tri_cnt[idx] = tri_cnt[k]; edges_pointer[idx] = k; edges_pointer[k] = idx; } } } bool find(ui w, ept b, ept e, char *deleted, ept &idx, ui *edges) { if(b >= e) return false; while(b+1 < e) { idx = b + (e-b)/2; if(edges[idx] > w) e = idx; else b = idx; } idx = b; if(edges[idx] == w&&!deleted[idx]) return true; return false; } void compact_neighbors(ui u, ui *tri_cnt, ui *edges_pointer, char *deleted, ept *pstart, ept *pend, ui *edges) { ui end = pstart[u]; for(ui i = pstart[u];i < pend[u];i ++) if(!deleted[i]) { edges[end] = edges[i]; tri_cnt[end] = tri_cnt[i]; edges_pointer[end] = edges_pointer[i]; edges_pointer[edges_pointer[end]] = end; deleted[end] = 0; ++ end; } pend[u] = end; } void truss_peeling(ui degree_threshold, ui *Qv, ui &Qv_n, ui triangle_threshold, ui *Qe, ept Qe_n, ui *tri_cnt, ept *edges_pointer, char *deleted, ui *degree, ept *pstart, ept *pend, ui *edges) { for(ept j = 0;j < Qe_n;j += 2) { ui u = Qe[j], v = Qe[j+1], idx; find(v, pstart[u], pend[u], deleted, idx, edges); ui tri_n = tri_cnt[idx]; deleted[idx] = deleted[edges_pointer[idx]] = 1; if(Qv != nullptr) { if(degree[u] == degree_threshold) Qv[Qv_n++] = u; if(degree[v] == degree_threshold) Qv[Qv_n++] = v; } -- degree[u]; -- degree[v]; if(pend[u]-pstart[u] > degree[u]*2) compact_neighbors(u, tri_cnt, edges_pointer, deleted, pstart, pend, edges); if(pend[v]-pstart[v] > degree[v]*2) compact_neighbors(v, tri_cnt, edges_pointer, deleted, pstart, pend, edges); if(pend[u]-pstart[u] < pend[v]-pstart[v]) std::swap(u,v); if(pend[u]-pstart[u] > (pend[v]-pstart[v])*2) { // binary search for(ept k = pstart[v];k < pend[v];k ++) if(!deleted[k]) { if(tri_n&&find(edges[k], pstart[u], pend[u], deleted, idx, edges)) { -- tri_n; -- tri_cnt[edges_pointer[idx]]; if( (tri_cnt[idx]--) == triangle_threshold) { Qe[Qe_n++] = u; Qe[Qe_n++] = edges[idx]; } -- tri_cnt[edges_pointer[k]]; if( (tri_cnt[k]--) == triangle_threshold) { Qe[Qe_n++] = v; Qe[Qe_n++] = edges[k]; } } } } else { // sorted_merge ept ii = pstart[u], jj = pstart[v]; while(ii < pend[u]&&jj < pend[v]) { if(edges[ii] == edges[jj]) { if(!deleted[ii]&&!deleted[jj]) { -- tri_n; -- tri_cnt[edges_pointer[ii]]; if( (tri_cnt[ii]--) == triangle_threshold) { Qe[Qe_n++] = u; Qe[Qe_n++] = edges[ii]; } -- tri_cnt[edges_pointer[jj]]; if( (tri_cnt[jj]--) == triangle_threshold) { Qe[Qe_n++] = v; Qe[Qe_n++] = edges[jj]; } } ++ ii; ++ jj; } else if(edges[ii] < edges[jj]) ++ ii; else ++ jj; } } } } void core_truss_copeeling(ui n, ui degree_threshold, ui triangle_threshold, ui *Qv, ui *Qe, ui *tri_cnt, ept *edges_pointer, char *deleted, char *exists, ui *degree, ept *pstart, ept *pend, ui *edges) { ept Qe_n = 0; for(ui i = 0;i < n;i ++) for(ept j = pstart[i];j < pend[i];j ++) if(tri_cnt[j] < triangle_threshold&&edges[j] > i) { Qe[Qe_n++] = i; Qe[Qe_n++] = edges[j]; } ui Qv_n = 0; for(ui i = 0;i < n;i ++) if(degree[i] < degree_threshold) Qv[Qv_n++] = i; while(Qe_n || Qv_n) { while(Qe_n == 0&&Qv_n != 0) { ui u = Qv[-- Qv_n]; // delete u from the graph due to have a degree < degree_threshold if(degree[u] == 0) continue; if(pend[u]-pstart[u] != degree[u]) compact_neighbors(u, tri_cnt, edges_pointer, deleted, pstart, pend, edges); assert(pend[u] - pstart[u] == degree[u]); for(ept i = pstart[u];i < pend[u];i ++) deleted[i] = deleted[edges_pointer[i]] = exists[edges[i]] = 1; degree[u] = 0; for(ept i = pstart[u];i < pend[u];i ++) { ui v = edges[i]; if( (degree[v]--) == degree_threshold) Qv[Qv_n++] = v; if(pend[v]-pstart[v] > degree[v]*2) compact_neighbors(v, tri_cnt, edges_pointer, deleted, pstart, pend, edges); for(ept j = pstart[v];j < pend[v];j ++) if(!deleted[j]&&edges[j] > v&&exists[edges[j]]) { -- tri_cnt[edges_pointer[j]]; if( (tri_cnt[j]--) == triangle_threshold) { Qe[Qe_n++] = v, Qe[Qe_n++] = edges[j]; } } } for(ept i = pstart[u];i < pend[u];i ++) exists[edges[i]] = 0; } truss_peeling(degree_threshold, Qv, Qv_n, triangle_threshold, Qe, Qe_n, tri_cnt, edges_pointer, deleted, degree, pstart, pend, edges); Qe_n = 0; } } // reduce the graph to its maximal subgraph with and minimum edge triangle count at least triangle_threshold void truss_pruning(ui &n, ept &m, ui triangle_threshold, ui *peel_sequence, ui *out_mapping, ui *rid, ept *pstart, ui *edges, ui *degree, bool output) { if(triangle_threshold == 0) { printf("!!! Triangle_threshold is 0\n"); return ; } ept *pend = new ui[n+1]; orient_graph(n, m, peel_sequence, pstart, pend, edges, rid); ui *tri_cnt = new ui[m]; oriented_triangle_counting(n, m, pstart, pend, edges, tri_cnt, rid); while(n&&remove_and_shrink_oriented_tri(n, m, triangle_threshold, out_mapping, peel_sequence, pstart, pend, edges, tri_cnt, rid, degree)) { oriented_triangle_counting(n, m, pstart, pend, edges, tri_cnt, rid); } ept *pend_buf = new ept[n+1]; ept *edges_pointer = new ept[m]; reorganize_oriented_graph(n, tri_cnt, pstart, pend, pend_buf, edges, edges_pointer, rid); delete[] pend_buf; pend_buf = nullptr; for(ui i = 0;i < n;i ++) { pend[i] = pstart[i+1]; degree[i] = pstart[i+1] - pstart[i]; } ui *Qe = new ui[m]; char *deleted = new char[m]; memset(deleted, 0, sizeof(char)*m); ept Qe_n = 0; for(ui i = 0;i < n;i ++) for(ui j = pstart[i];j < pend[i];j ++) if(tri_cnt[j] < triangle_threshold&&edges[j] > i) { Qe[Qe_n++] = i; Qe[Qe_n++] = edges[j]; } truss_peeling(0, nullptr, Qe_n, triangle_threshold, Qe, Qe_n, tri_cnt, edges_pointer, deleted, degree, pstart, pend, edges); ui cnt = 0; for(ui i = 0;i < n;i ++) if(degree[i] > 0) { out_mapping[cnt] = out_mapping[i]; rid[i] = cnt++; } ui t_cnt = 0; for(ui i = 0;i < n;i ++) if(degree[peel_sequence[i]] > 0) peel_sequence[t_cnt++] = rid[peel_sequence[i]]; ui pos = 0; cnt = 0; for(ui i = 0;i < n;i ++) if(degree[i] > 0) { ui start = pstart[i]; pstart[cnt] = pos; for(ui j = start;j < pend[i];j ++) if(!deleted[j]) edges[pos++] = rid[edges[j]]; ++ cnt; } pstart[cnt] = m = pos; n = cnt; delete[] Qe; delete[] deleted; delete[] edges_pointer; delete[] pend; delete[] tri_cnt; if(output) printf("*** After truss_pruning: n = %u, m = %lu (undirected)\n", n, m/2); } // reduce the graph to its maximal subgraph with minimum degree at least degree_threshold and minimum edge triangle count at least triangle_threshold void core_truss_copruning(ui &n, ept &m, ui degree_threshold, ui triangle_threshold, ui *peel_sequence, ui *out_mapping, ui *rid, ept *pstart, ui *edges, ui *degree, bool output) { if(triangle_threshold == 0) { printf("!!! Triangle_threshold is 0\n"); return ; } if(degree_threshold <= triangle_threshold+1) { printf("!!! Degree_threshold <= triangle_threshold + 1, please invoke truss_pruning\n"); return ; } ept *pend = new ui[n+1]; orient_graph(n, m, peel_sequence, pstart, pend, edges, rid); ui *tri_cnt = new ui[m]; oriented_triangle_counting(n, m, pstart, pend, edges, tri_cnt, rid); while(n&&remove_and_shrink_oriented_tri(n, m, triangle_threshold, out_mapping, peel_sequence, pstart, pend, edges, tri_cnt, rid, degree)) { oriented_triangle_counting(n, m, pstart, pend, edges, tri_cnt, rid); } ept *pend_buf = new ept[n+1]; ept *edges_pointer = new ept[m]; reorganize_oriented_graph(n, tri_cnt, pstart, pend, pend_buf, edges, edges_pointer, rid); delete[] pend_buf; pend_buf = nullptr; for(ui i = 0;i < n;i ++) { pend[i] = pstart[i+1]; degree[i] = pstart[i+1] - pstart[i]; } ui *Qe = new ui[m]; ui *Qv = new ui[n]; char *deleted = new char[m]; memset(deleted, 0, sizeof(char)*m); char *exists = new char[n]; memset(exists, 0, sizeof(char)*n); core_truss_copeeling(n, degree_threshold, triangle_threshold, Qv, Qe, tri_cnt, edges_pointer, deleted, exists, degree, pstart, pend, edges); ui cnt = 0; for(ui i = 0;i < n;i ++) if(degree[i] > 0) { out_mapping[cnt] = out_mapping[i]; rid[i] = cnt++; } ui t_cnt = 0; for(ui i = 0;i < n;i ++) if(degree[peel_sequence[i]] > 0) peel_sequence[t_cnt++] = rid[peel_sequence[i]]; ui pos = 0; cnt = 0; for(ui i = 0;i < n;i ++) if(degree[i] > 0) { ui start = pstart[i]; pstart[cnt] = pos; for(ui j = start;j < pend[i];j ++) if(!deleted[j]) edges[pos++] = rid[edges[j]]; ++ cnt; } pstart[cnt] = m = pos; n = cnt; delete[] Qv; delete[] Qe; delete[] deleted; delete[] edges_pointer; delete[] pend; delete[] tri_cnt; if(output) printf("*** After core_truss_copruning: n = %u, m = %lu (undirected)\n", n, m/2); } void check_core_pruning(ui n, ui m, ui degree_threshold, ept* pstart, ui *edges) { for(ui i = 0;i < n;i ++) { if(pstart[i+1]-pstart[i] < degree_threshold) printf("!!! WA degree in check_core_pruning\n"); for(ept j = pstart[i];j < pstart[i+1];j ++) if(edges[j] >= n) printf("!!! WA edge in check_core_pruning\n"); } } void check_truss_pruning(ui n, ui m, ui triangle_threshold, ept* pstart, ui *edges, char *exists) { memset(exists, 0, sizeof(char)*n); for(ui i = 0;i < n;i ++) { for(ept j = pstart[i];j < pstart[i+1];j ++) exists[edges[j]] = 1; for(ept j = pstart[i];j < pstart[i+1];j ++) { ui v = edges[j], cn = 0; for(ept k = pstart[v];k < pstart[v+1];k ++) if(exists[edges[k]]) ++ cn; if(cn < triangle_threshold) printf("!!! WA triangle in check_truss_pruning\n"); } for(ept j = pstart[i];j < pstart[i+1];j ++) exists[edges[j]] = 0; } } } #endif /* CTPRUNE_H_ */