19
|
1 #include <errno.h> |
|
2 #include <poll.h> |
|
3 #include <signal.h> |
|
4 #include <string.h> |
|
5 #include <time.h> |
|
6 |
|
7 #include <utlist.h> |
|
8 |
|
9 #include "apic.h" |
|
10 #include "log.h" |
|
11 #include "sciworkerd.h" |
|
12 #include "task.h" |
|
13 #include "types.h" |
|
14 #include "util.h" |
|
15 |
|
16 #define TAG "sigworkerd: " |
|
17 |
|
18 struct taskentry { |
|
19 struct task *task; |
|
20 struct job job; |
|
21 struct taskentry *next; |
|
22 }; |
|
23 |
|
24 static struct taskentry *taskpending; |
|
25 static struct taskentry *tasks; |
|
26 static struct taskentry *taskfinished; |
|
27 static struct worker worker; |
|
28 static int run = 1; |
|
29 |
|
30 struct sciworkerd sciworkerd = { |
|
31 .fetchinterval = 300, |
|
32 .maxjobs = 4, |
|
33 .timeout = 600 |
|
34 }; |
|
35 |
|
36 static inline void |
|
37 taskentry_free(struct taskentry *entry) |
|
38 { |
|
39 if (task_status(entry->task) == TASKSTATUS_RUNNING) { |
|
40 if (task_kill(entry->task) == 0) |
|
41 task_wait(entry->task); |
|
42 } |
|
43 |
|
44 task_free(entry->task); |
|
45 free(entry); |
|
46 } |
|
47 |
|
48 static void |
|
49 stop(int sign) |
|
50 { |
|
51 log_info(TAG "exiting on signal %d\n", sign); |
|
52 run = 0; |
|
53 } |
|
54 |
|
55 static inline int |
|
56 pending(int id) |
|
57 { |
|
58 const struct taskentry *iter; |
|
59 |
|
60 LL_FOREACH(taskpending, iter) |
|
61 if (iter->job.id == id) |
|
62 return 1; |
|
63 |
|
64 return 0; |
|
65 } |
|
66 |
|
67 static inline void |
|
68 queue(const struct job *job) |
|
69 { |
|
70 struct taskentry *tk; |
|
71 |
|
72 log_info(TAG "queued job build (%d) for tag %s\n", job->id, job->tag); |
|
73 |
|
74 tk = util_calloc(1, sizeof (*tk)); |
|
75 tk->task = task_new(job->tag); |
|
76 memcpy(&tk->job, job, sizeof (*job)); |
|
77 LL_APPEND(taskpending, tk); |
|
78 } |
|
79 |
|
80 static void |
20
|
81 merge(const struct job *jobs, size_t jobsz) |
19
|
82 { |
|
83 size_t total = 0; |
|
84 |
20
|
85 for (ssize_t i = 0; i < jobsz; ++i) { |
|
86 if (!pending(jobs[i].id)) { |
|
87 queue(&jobs[i]); |
|
88 total++; |
19
|
89 } |
20
|
90 } |
19
|
91 |
20
|
92 log_info(TAG "added %zu new pending tasks", total); |
19
|
93 } |
|
94 |
|
95 /* |
|
96 * Fetch jobs periodically, depending on the user setting. |
|
97 */ |
|
98 static void |
|
99 fetch_jobs(void) |
|
100 { |
|
101 static time_t startup; |
|
102 time_t now; |
20
|
103 struct apic req; |
|
104 struct job todo[SCI_JOB_LIST_MAX]; |
|
105 ssize_t todosz; |
19
|
106 |
|
107 if (!startup) |
|
108 startup = time(NULL); |
|
109 |
|
110 if (difftime((now = time(NULL)), startup) >= sciworkerd.fetchinterval) { |
|
111 startup = now; |
|
112 |
20
|
113 if ((todosz = apic_job_todo(&req, todo, UTIL_SIZE(todo), worker.id)) < 0) |
19
|
114 log_warn(TAG "unable to fetch jobs: %s", req.error); |
20
|
115 else |
|
116 merge(todo, todosz); |
|
117 |
|
118 apic_finish(&req); |
19
|
119 } |
|
120 } |
|
121 |
|
122 /* |
|
123 * Fetch information about myself. |
|
124 */ |
|
125 static void |
|
126 fetch_worker(void) |
|
127 { |
20
|
128 struct apic req; |
19
|
129 |
20
|
130 util_strlcpy(&worker.name, sciworkerd.name); |
|
131 |
|
132 if (apic_worker_find(&req, &worker) < 0) |
|
133 log_die(TAG, "unable to fetch worker info: %s", req.error); |
19
|
134 |
|
135 log_info("worker id: %d", worker.id); |
|
136 log_info("worker name: %s", worker.name); |
|
137 log_info("worker description: %s", worker.desc); |
|
138 |
20
|
139 apic_finish(&req); |
19
|
140 } |
|
141 |
|
142 /* |
|
143 * Fetch information about a project. |
|
144 */ |
|
145 static int |
|
146 fetch_project(struct project *project, int id) |
|
147 { |
20
|
148 struct apic req; |
19
|
149 |
20
|
150 if (apic_project_find_id(&req, project, id) < 0) |
|
151 return -1; |
|
152 #if 0 |
19
|
153 if (apic_get(&req, "%s/api/v1/projects/%d", id) < 0) |
|
154 return log_warn(TAG "unable to fetch project info: %s", req.error), -1; |
|
155 if (!req.doc) |
|
156 return log_warn(TAG "empty project response"), -1; |
|
157 if (project_from(project, 1, req.doc) < 0) |
|
158 return log_warn(TAG "unable to parse project: %s", strerror(errno)), -1; |
20
|
159 #endif |
19
|
160 |
|
161 return 0; |
|
162 } |
|
163 |
|
164 static inline size_t |
|
165 count(const struct taskentry *head) |
|
166 { |
|
167 const struct taskentry *iter; |
|
168 size_t tot = 0; |
|
169 |
|
170 LL_FOREACH(head, iter) |
|
171 tot++; |
|
172 |
|
173 return tot; |
|
174 } |
|
175 |
|
176 /* |
|
177 * Start a task. We fetch its script code and then create the task with that |
|
178 * script. |
|
179 */ |
|
180 static int |
|
181 start(struct taskentry *entry) |
|
182 { |
20
|
183 struct apic; |
19
|
184 struct project project; |
|
185 pid_t pid; |
|
186 |
20
|
187 if (apic_project_find_id(&project, entry->job.project_id) < 0) |
19
|
188 return log_warn(TAG "unable to fetch project, dropping task"), -1; |
|
189 if (task_setup(entry->task, project.script) < 0) |
|
190 return log_warn(TAG "unable to setup script code: %s, dropping task", strerror(errno)), -1; |
|
191 if ((pid = task_start(entry->task)) < 0) |
|
192 return log_warn(TAG "unable to spawn task process: %s", strerror(errno)), -1; |
|
193 |
|
194 log_info(TAG "task %lld spawned", (long long int)pid); |
|
195 |
|
196 return 0; |
|
197 } |
|
198 |
|
199 static inline void |
|
200 delete(struct taskentry *entry) |
|
201 { |
|
202 LL_DELETE(taskpending, entry); |
|
203 task_free(entry->task); |
|
204 free(entry); |
|
205 } |
|
206 |
|
207 static void |
|
208 start_all(void) |
|
209 { |
|
210 size_t running = count(tasks); |
|
211 struct taskentry *entry; |
|
212 |
|
213 while (running-- > 0 && (entry = taskpending)) { |
|
214 if (start(entry) < 0) |
|
215 delete(entry); |
|
216 else { |
|
217 LL_DELETE(taskpending, entry); |
|
218 LL_APPEND(tasks, entry); |
|
219 } |
|
220 } |
|
221 } |
|
222 |
|
223 static void |
|
224 process_all(void) |
|
225 { |
|
226 struct taskentry *iter, *next; |
|
227 struct taskcode code; |
|
228 struct pollfd *fds; |
|
229 size_t fdsz, i = 0; |
|
230 int ret; |
|
231 |
|
232 /* First, read every pipes. */ |
|
233 if (!(fdsz = count(tasks))) |
|
234 return; |
|
235 |
|
236 fds = util_calloc(fdsz, sizeof (*fds)); |
|
237 |
|
238 for (iter = tasks; iter; iter = iter->next) |
|
239 task_prepare(iter->task, &fds[i++]); |
|
240 |
|
241 if (poll(fds, fdsz, 5000) < 0) |
|
242 log_warn("poll: %s", strerror(errno)); |
|
243 |
|
244 for (iter = tasks, i = 0; i < fdsz; ++i) { |
|
245 next = iter->next; |
|
246 |
|
247 /* |
|
248 * 0: EOF [wait] |
|
249 * -1: error [kill + wait] |
|
250 * >0: keep going [nothing] |
|
251 */ |
|
252 if ((ret = task_sync(iter->task, &fds[i])) < 0) { |
|
253 log_warn(TAG "pipe error: %s, killing task", strerror(errno)); |
|
254 |
|
255 if (task_kill(iter->task) < 0) |
|
256 log_warn(TAG "task kill error: %s", strerror(errno)); |
|
257 } |
|
258 |
|
259 /* Now wait for the task to complete. */ |
|
260 if (ret <= 0) { |
|
261 if (task_wait(iter->task) < 0) |
|
262 log_warn(TAG "task wait error: %s", strerror(errno)); |
|
263 else { |
|
264 code = task_code(iter->task); |
|
265 |
|
266 switch (task_status(iter->task)) { |
|
267 case TASKSTATUS_EXITED: |
|
268 log_info(TAG "task %lld exited with code %d", |
|
269 (long long int)task_pid(iter->task), code.exitcode); |
|
270 break; |
|
271 case TASKSTATUS_KILLED: |
|
272 log_info(TAG "task %lld killed with signal %d", |
|
273 (long long int)task_pid(iter->task), code.sigcode); |
|
274 break; |
|
275 default: |
|
276 break; |
|
277 } |
|
278 } |
|
279 |
|
280 /* Remove that task and push to the outgoing queue. */ |
|
281 next = iter->next; |
|
282 LL_DELETE(tasks, iter); |
|
283 LL_APPEND(taskfinished, iter); |
|
284 } |
|
285 } |
|
286 |
|
287 free(fds); |
|
288 } |
|
289 |
|
290 /* |
|
291 * Kill all tasks that have been running for too long. |
|
292 */ |
|
293 static void |
|
294 ghost_all(void) |
|
295 { |
|
296 struct taskentry *iter, *tmp; |
|
297 time_t now; |
|
298 |
|
299 LL_FOREACH_SAFE(tasks, iter, tmp) { |
|
300 if (difftime(time(NULL), task_uptime(iter->task)) < sciworkerd.timeout) |
|
301 continue; |
|
302 |
|
303 /* Do not attempt to wait if kill failed to avoid lock. */ |
|
304 log_info(TAG "task timeout, killing"); |
|
305 |
|
306 if (task_kill(iter->task) == 0) |
|
307 task_wait(iter->task); |
|
308 |
|
309 LL_DELETE(tasks, iter); |
|
310 LL_APPEND(taskfinished, iter); |
|
311 } |
|
312 } |
|
313 |
|
314 static int |
|
315 publish(struct taskentry *iter) |
|
316 { |
|
317 // TODO: add sigcode. |
|
318 struct taskcode code = task_code(iter->task); |
|
319 struct jobresult res = { |
|
320 .job_id = iter->job.id, |
|
321 .exitcode = code.exitcode, |
|
322 .log = task_console(iter->task), |
|
323 .worker_id = worker.id |
|
324 }; |
|
325 struct apicreq req; |
|
326 json_t *doc; |
|
327 int ret; |
|
328 |
|
329 doc = jobresult_to(&res, 1); |
|
330 ret = apic_post(&req, doc, "%s/api/v1/jobs", sciworkerd.url); |
|
331 json_decref(doc); |
|
332 |
|
333 if (ret) |
|
334 log_warn(TAG "unable to publish task: %s", req.error); |
|
335 else |
|
336 log_info(TAG "task successfully published"); |
|
337 |
|
338 return ret; |
|
339 } |
|
340 |
|
341 static void |
|
342 publish_all(void) |
|
343 { |
|
344 struct taskentry *iter, *tmp; |
|
345 |
|
346 LL_FOREACH_SAFE(taskfinished, iter, tmp) { |
|
347 if (publish(iter) == 0) { |
|
348 LL_DELETE(taskfinished, iter); |
|
349 taskentry_free(iter); |
|
350 } |
|
351 } |
|
352 } |
|
353 |
|
354 void |
|
355 sciworkerd_init(void) |
|
356 { |
|
357 struct sigaction sa = {0}; |
|
358 |
|
359 log_open("sigworkerd"); |
|
360 |
|
361 sigemptyset(&sa.sa_mask); |
|
362 sa.sa_handler = stop; |
|
363 |
|
364 if (sigaction(SIGINT, &sa, NULL) < 0 || sigaction(SIGTERM, &sa, NULL) < 0) |
|
365 log_die(TAG "sigaction: %s", strerror(errno)); |
|
366 |
|
367 fetch_worker(); |
|
368 } |
|
369 |
|
370 void |
|
371 sciworkerd_run(void) |
|
372 { |
|
373 while (run) { |
|
374 fetch_jobs(); |
|
375 process_all(); |
|
376 ghost_all(); |
|
377 publish_all(); |
|
378 } |
|
379 } |
|
380 |
|
381 void |
|
382 sciworkerd_finish(void) |
|
383 { |
|
384 struct taskentry *iter, *tmp; |
|
385 |
|
386 LL_FOREACH_SAFE(taskpending, iter, tmp) |
|
387 taskentry_free(iter); |
|
388 LL_FOREACH_SAFE(tasks, iter, tmp) |
|
389 taskentry_free(iter); |
|
390 LL_FOREACH_SAFE(taskfinished, iter, tmp) |
|
391 taskentry_free(iter); |
|
392 } |