-
Notifications
You must be signed in to change notification settings - Fork 0
/
cilk_version.c
160 lines (127 loc) · 4.9 KB
/
cilk_version.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#include <cilk/cilk.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "coo2csc.h"
#include "mergesort.h"
#include "mmio.h"
#include "utils.h"
// Matrix data
int M, N, nz;
int *lower_coo_rows, *lower_coo_cols, *lower_coo_values;
// Store CSC data for the whole adjacency matrix
int *indices;
int *pointers;
int *values;
// Variables store the lower triangle of the masked matrix product in COO format
int *lower_res_rows, *lower_res_cols, *lower_res_values;
// Variables to store the whole resulting matrix in CSC format
int *res_indices, *res_pointers, *res_values;
// Number of non-zero elemnts in the resulting matrix
int res_nnz = 0;
pthread_t mutex1;
pthread_t mutex2;
int main(int argc, char *argv[]) {
double t = clock();
char *filepath;
MM_typecode *matcode;
int ret_code;
int num_threads;
parse_cli_args_parallel(argc, argv, &filepath, &num_threads);
// Read mtx file
ret_code = mm_read_mtx_crd(filepath, &M, &N, &nz, &lower_coo_rows,
&lower_coo_cols, &lower_coo_values, &matcode);
if (ret_code) {
fprintf(stderr, "Error while reading mm file.");
exit(1);
}
// Convert to zero-based indexing for compatibility with C arrays
for (int i = 0; i < nz; i++) {
lower_coo_rows[i]--;
lower_coo_cols[i]--;
}
int *full_rows = (int *)malloc(2 * nz * sizeof(int));
int *full_cols = (int *)malloc(2 * nz * sizeof(int));
int *full_values = (int *)malloc(2 * nz * sizeof(int));
for (int i = 0; i < nz; i++) {
full_rows[i] = lower_coo_rows[i];
full_rows[nz + i] = lower_coo_cols[i];
full_cols[i] = lower_coo_cols[i];
full_cols[nz + i] = lower_coo_rows[i];
}
indices = (int *)malloc(2 * nz * sizeof(int));
pointers = (int *)malloc((N + 1) * sizeof(int));
// No need to allocate memory for vlaues array as it is not processed by the
// coo2csc() routine
coo2csc(indices, pointers, values, full_rows, full_cols, full_values, 2 * nz,
N, 0, 1);
for (int i = 0; i < N; i++) {
mergeSort(indices, pointers[i], pointers[i + 1] - 1);
}
// Allocate memory to store the results for the matrix product
lower_res_rows = (int *)malloc(nz * sizeof(int));
lower_res_cols = (int *)malloc(nz * sizeof(int));
lower_res_values = (int *)malloc(nz * sizeof(int));
int result;
pthread_mutex_init(&mutex1, NULL);
cilk_for(int i = 0; i < nz; ++i) {
result = get_common_subarray_items_count(
indices, pointers[lower_coo_rows[i]], pointers[lower_coo_rows[i] + 1],
pointers[lower_coo_cols[i]], pointers[lower_coo_cols[i] + 1]);
if (result != 0) {
pthread_mutex_lock(&mutex1);
lower_res_rows[res_nnz] = lower_coo_rows[i];
lower_res_cols[res_nnz] = lower_coo_cols[i];
lower_res_values[res_nnz] = result;
res_nnz++;
pthread_mutex_unlock(&mutex1);
}
}
// Reallocate memory as needed
lower_res_rows = (int *)realloc(lower_res_rows, res_nnz * sizeof(int));
lower_res_cols = (int *)realloc(lower_res_cols, res_nnz * sizeof(int));
lower_res_values = (int *)realloc(lower_res_values, res_nnz * sizeof(int));
// Arrays to store the COO format of the product result (for the whole matrix)
int *full_res_rows = (int *)malloc(2 * res_nnz * sizeof(int));
int *full_res_cols = (int *)malloc(2 * res_nnz * sizeof(int));
int *full_res_values = (int *)malloc(2 * res_nnz * sizeof(int));
for (int i = 0; i < res_nnz; i++) {
full_res_rows[i] = lower_res_rows[i];
full_res_rows[res_nnz + i] = lower_res_cols[i];
full_res_cols[i] = lower_res_cols[i];
full_res_cols[res_nnz + i] = lower_res_rows[i];
full_res_values[i] = lower_res_values[i];
full_res_values[res_nnz + i] = lower_res_values[i];
}
// Variables to store the result in CSC format
res_indices = (int *)malloc(2 * res_nnz * sizeof(int));
res_pointers = (int *)malloc((N + 1) * sizeof(int));
res_values = (int *)malloc(2 * res_nnz * sizeof(int));
coo2csc(res_indices, res_pointers, res_values, full_res_rows, full_res_cols,
full_res_values, 2 * res_nnz, N, 0, 0);
// Sort the indices for every column to get the correct CSC format
for (int i = 0; i < N; i++) {
mergeSort_mirror(res_indices, res_values, res_pointers[i],
res_pointers[i + 1] - 1);
}
int *triangles_vector = (int *)calloc(N, sizeof(int));
pthread_mutex_init(&mutex2, NULL);
cilk_for(int i = 0; i < N; i++) {
for (int j = res_pointers[i]; j < res_pointers[i + 1]; j++) {
triangles_vector[i] += res_values[j];
}
pthread_mutex_lock(&mutex2);
triangles_vector[i] /= 2;
pthread_mutex_unlock(&mutex2);
}
pthread_mutex_destroy(&mutex1);
pthread_mutex_destroy(&mutex2);
int triangle_count = 0;
for (int i = 0; i < N; i++) {
triangle_count += triangles_vector[i];
}
double total_time = (double)(clock() - t) / CLOCKS_PER_SEC;
printf("Triangle count : %d\n", triangle_count / 3);
fprintf(stdout, "Total execution time: %lf\n", total_time);
}