This repository has been archived by the owner on Oct 6, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
worker.c
164 lines (127 loc) · 4.01 KB
/
worker.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
/* Copyright (c) 2021, Evgeny Baskov. All rights reserved */
#define _POSIX_C_SOURCE 200809L
#include "util.h"
#include "worker.h"
#include <assert.h>
#include <pthread.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define MAX_THREADS 16
#define STORAGE_SIZE 65536
struct job {
struct job *next;
void (*func)(void *);
char data[];
} __attribute__((aligned(16)));
int nproc;
static pthread_t threads[MAX_THREADS];
static pthread_cond_t in_cond;
static pthread_mutex_t in_mtx;
static pthread_rwlock_t rw;
struct job *first, *last;
static _Bool should_exit;
static size_t active;
static uint8_t *storage_start;
static uint8_t *storage_cur;
static uint8_t *storage_end;
static void *worker(void *arg) {
(void)arg;
while (!__atomic_load_n(&should_exit, __ATOMIC_RELAXED)) {
pthread_mutex_lock(&in_mtx);
while (!first) {
pthread_cond_wait(&in_cond, &in_mtx);
if (__atomic_load_n(&should_exit, __ATOMIC_RELAXED)) {
pthread_mutex_unlock(&in_mtx);
return NULL;
}
}
__atomic_add_fetch(&active, 1, __ATOMIC_RELEASE);
struct job *newjob = first;
if (first == last) last = NULL;
first = first->next;
pthread_mutex_unlock(&in_mtx);
newjob->func(newjob->data);
__atomic_sub_fetch(&active, 1, __ATOMIC_RELEASE);
}
return NULL;
}
void drain_work(void) {
bool broadcast = 1;
pthread_mutex_lock(&in_mtx);
while (first || __atomic_load_n(&active, __ATOMIC_ACQUIRE)) {
struct job *newjob = NULL;
if (first) {
__atomic_add_fetch(&active, 1, __ATOMIC_RELEASE);
newjob = first;
if (first == last) last = NULL;
first = first->next;
}
pthread_mutex_unlock(&in_mtx);
if (broadcast) {
pthread_cond_broadcast(&in_cond);
broadcast = 0;
}
if (newjob) {
newjob->func(newjob->data);
__atomic_sub_fetch(&active, 1, __ATOMIC_RELEASE);
}
pthread_mutex_lock(&in_mtx);
}
pthread_mutex_unlock(&in_mtx);
}
void submit_work(void (*func)(void *), const void *data, size_t data_size) {
// Align args on CACHE_LINE to prefent false sharing
size_t inc = (sizeof(struct job) + data_size + CACHE_LINE - 1) & ~(CACHE_LINE - 1);
pthread_rwlock_rdlock(&rw);
// Lock-less allocation
struct job *new;
while (1) {
new = (void *)__atomic_fetch_add(&storage_cur, inc, __ATOMIC_ACQ_REL);
if ((uint8_t *)new + inc <= storage_end) break;
pthread_rwlock_unlock(&rw);
pthread_rwlock_wrlock(&rw);
drain_work();
__atomic_store_n(&storage_cur, storage_start, __ATOMIC_RELEASE);
pthread_rwlock_unlock(&rw);
pthread_rwlock_rdlock(&rw);
}
new->func = func;
new->next = NULL;
memcpy(new->data, data, data_size);
pthread_mutex_lock(&in_mtx);
bool ins_new = last;
if (ins_new) {
last->next = new;
last = new;
} else {
last = first = new;
}
pthread_mutex_unlock(&in_mtx);
pthread_rwlock_unlock(&rw);
if (ins_new) pthread_cond_signal(&in_cond);
}
void init_workers(void) {
// This is used for faster
// memory allocation for job
// arguments
storage_start = storage_cur = aligned_alloc(CACHE_LINE, STORAGE_SIZE);
storage_end = storage_start + STORAGE_SIZE;
pthread_cond_init(&in_cond, NULL);
pthread_mutex_init(&in_mtx, NULL);
pthread_rwlock_init(&rw, NULL);
nproc = MIN(sysconf(_SC_NPROCESSORS_ONLN), MAX_THREADS);
for (int i = 0; i < nproc; i++)
pthread_create(threads + i, NULL, worker, NULL);
}
void fini_workers(_Bool force) {
if (!force) drain_work();
__atomic_store_n(&should_exit, 1, __ATOMIC_RELAXED);
pthread_cond_broadcast(&in_cond);
for (int i = 0; i < nproc; i++)
pthread_join(threads[i], NULL);
pthread_cond_destroy(&in_cond);
pthread_mutex_destroy(&in_mtx);
free(storage_start);
}