fail jobs if workers die
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Johannes Heuel
2022-10-20 10:03:15 +02:00
parent 949801fc4d
commit 71098b44e6
4 changed files with 44 additions and 12 deletions

View File

@@ -2,8 +2,17 @@
import sys
import requests
from os import environ
resp = requests.post("http://localhost:8080/status", json=[{"id": int(sys.argv[1])}])
def print_and_exit(s):
print(s)
exit(0)
resp = requests.post(
"http://localhost:8080/status",
json=[{"id": int(sys.argv[1])}],
headers={"cookie": environ["ZOIDBERG_SECRET"]},
)
translation = {
"Submitted": "running",
@@ -11,4 +20,12 @@ translation = {
"Failed": "failed",
}
print(translation[resp.json()[0]["status"]])
j = resp.json()
if len(j) == 0:
print_and_exit("failed")
if "Running" in j[0]["status"]:
print_and_exit("running")
print_and_exit(translation[resp.json()[0]["status"]])

View File

@@ -2,6 +2,7 @@
import sys
import requests
from os import environ
jobscript = sys.argv[1]
@@ -10,6 +11,7 @@ resp = requests.post(
json=[
{"cmd": jobscript},
],
headers={"cookie": environ["ZOIDBERG_SECRET"]},
)
assert resp.ok, "http request failed"

View File

@@ -40,8 +40,8 @@ impl State {
#[get("/")]
async fn index(data: web::Data<State>) -> impl Responder {
let workers = data.workers.lock().unwrap();
let new_jobs = data.new_jobs.lock().unwrap();
let page = webpage::render(&*new_jobs, &*workers);
let jobs = data.jobs.lock().unwrap();
let page = webpage::render(&*jobs, &*workers);
HttpResponse::Ok().body(page)
}
@@ -133,7 +133,7 @@ async fn heartbeat(
data: web::Data<State>,
_: Authorization,
) -> Result<String> {
log::info!("Heartbeat from worker {}", heartbeat.id);
log::debug!("Heartbeat from worker {}", heartbeat.id);
let mut workers = data.workers.lock().unwrap();
for w in workers.iter_mut() {
if w.id == heartbeat.id {
@@ -190,11 +190,23 @@ async fn main() -> std::io::Result<()> {
tokio::spawn(async move {
loop {
tokio::time::sleep(Duration::from_secs(10)).await;
let mut workers = s.workers.lock().unwrap();
workers.retain(|w| match w.last_heartbeat {
None => true,
Some(t) => Utc::now().timestamp() - t < 60,
})
{
let mut workers = s.workers.lock().unwrap();
workers.retain(|w| match w.last_heartbeat {
None => true,
Some(t) => Utc::now().timestamp() - t < 60,
})
}
let workers = s.workers.lock().unwrap();
let mut jobs = s.jobs.lock().unwrap();
for job in jobs.iter_mut() {
if let Status::Running(w) = &job.status {
let exists = workers.iter().filter(|x| &x.id == w).count() > 0;
if !exists {
job.status = Status::Failed;
}
}
}
}
});
@@ -211,7 +223,7 @@ async fn main() -> std::io::Result<()> {
.service(heartbeat)
.service(submit)
})
.bind(("127.0.0.1", 8080))?
.bind(("0.0.0.0", 8080))?
.run()
.await
}

View File

@@ -52,7 +52,8 @@ pub fn render(jobs: &[Job], workers: &[Worker]) -> String {
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Zoidberg</title>
<link rel="icon" href="data:,">
<link rel="icon" href="data:image/svg+xml,%3Csvg%20xmlns='http://www.w3.org/2000/svg'%20viewBox='0%200%2016%2016'%3E%3Ctext%20x='0'%20y='14'%3E🦀%3C/text%3E%3C/svg%3E" type="image/svg+xml" />
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css">
{}
</head>