fail jobs if workers die
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
@@ -2,8 +2,17 @@
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
import requests
|
import requests
|
||||||
|
from os import environ
|
||||||
|
|
||||||
resp = requests.post("http://localhost:8080/status", json=[{"id": int(sys.argv[1])}])
|
def print_and_exit(s):
|
||||||
|
print(s)
|
||||||
|
exit(0)
|
||||||
|
|
||||||
|
resp = requests.post(
|
||||||
|
"http://localhost:8080/status",
|
||||||
|
json=[{"id": int(sys.argv[1])}],
|
||||||
|
headers={"cookie": environ["ZOIDBERG_SECRET"]},
|
||||||
|
)
|
||||||
|
|
||||||
translation = {
|
translation = {
|
||||||
"Submitted": "running",
|
"Submitted": "running",
|
||||||
@@ -11,4 +20,12 @@ translation = {
|
|||||||
"Failed": "failed",
|
"Failed": "failed",
|
||||||
}
|
}
|
||||||
|
|
||||||
print(translation[resp.json()[0]["status"]])
|
j = resp.json()
|
||||||
|
|
||||||
|
if len(j) == 0:
|
||||||
|
print_and_exit("failed")
|
||||||
|
|
||||||
|
if "Running" in j[0]["status"]:
|
||||||
|
print_and_exit("running")
|
||||||
|
|
||||||
|
print_and_exit(translation[resp.json()[0]["status"]])
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
import requests
|
import requests
|
||||||
|
from os import environ
|
||||||
|
|
||||||
jobscript = sys.argv[1]
|
jobscript = sys.argv[1]
|
||||||
|
|
||||||
@@ -10,6 +11,7 @@ resp = requests.post(
|
|||||||
json=[
|
json=[
|
||||||
{"cmd": jobscript},
|
{"cmd": jobscript},
|
||||||
],
|
],
|
||||||
|
headers={"cookie": environ["ZOIDBERG_SECRET"]},
|
||||||
)
|
)
|
||||||
assert resp.ok, "http request failed"
|
assert resp.ok, "http request failed"
|
||||||
|
|
||||||
|
|||||||
@@ -40,8 +40,8 @@ impl State {
|
|||||||
#[get("/")]
|
#[get("/")]
|
||||||
async fn index(data: web::Data<State>) -> impl Responder {
|
async fn index(data: web::Data<State>) -> impl Responder {
|
||||||
let workers = data.workers.lock().unwrap();
|
let workers = data.workers.lock().unwrap();
|
||||||
let new_jobs = data.new_jobs.lock().unwrap();
|
let jobs = data.jobs.lock().unwrap();
|
||||||
let page = webpage::render(&*new_jobs, &*workers);
|
let page = webpage::render(&*jobs, &*workers);
|
||||||
HttpResponse::Ok().body(page)
|
HttpResponse::Ok().body(page)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -133,7 +133,7 @@ async fn heartbeat(
|
|||||||
data: web::Data<State>,
|
data: web::Data<State>,
|
||||||
_: Authorization,
|
_: Authorization,
|
||||||
) -> Result<String> {
|
) -> Result<String> {
|
||||||
log::info!("Heartbeat from worker {}", heartbeat.id);
|
log::debug!("Heartbeat from worker {}", heartbeat.id);
|
||||||
let mut workers = data.workers.lock().unwrap();
|
let mut workers = data.workers.lock().unwrap();
|
||||||
for w in workers.iter_mut() {
|
for w in workers.iter_mut() {
|
||||||
if w.id == heartbeat.id {
|
if w.id == heartbeat.id {
|
||||||
@@ -190,11 +190,23 @@ async fn main() -> std::io::Result<()> {
|
|||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
loop {
|
loop {
|
||||||
tokio::time::sleep(Duration::from_secs(10)).await;
|
tokio::time::sleep(Duration::from_secs(10)).await;
|
||||||
let mut workers = s.workers.lock().unwrap();
|
{
|
||||||
workers.retain(|w| match w.last_heartbeat {
|
let mut workers = s.workers.lock().unwrap();
|
||||||
None => true,
|
workers.retain(|w| match w.last_heartbeat {
|
||||||
Some(t) => Utc::now().timestamp() - t < 60,
|
None => true,
|
||||||
})
|
Some(t) => Utc::now().timestamp() - t < 60,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
let workers = s.workers.lock().unwrap();
|
||||||
|
let mut jobs = s.jobs.lock().unwrap();
|
||||||
|
for job in jobs.iter_mut() {
|
||||||
|
if let Status::Running(w) = &job.status {
|
||||||
|
let exists = workers.iter().filter(|x| &x.id == w).count() > 0;
|
||||||
|
if !exists {
|
||||||
|
job.status = Status::Failed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -211,7 +223,7 @@ async fn main() -> std::io::Result<()> {
|
|||||||
.service(heartbeat)
|
.service(heartbeat)
|
||||||
.service(submit)
|
.service(submit)
|
||||||
})
|
})
|
||||||
.bind(("127.0.0.1", 8080))?
|
.bind(("0.0.0.0", 8080))?
|
||||||
.run()
|
.run()
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -52,7 +52,8 @@ pub fn render(jobs: &[Job], workers: &[Worker]) -> String {
|
|||||||
<meta charset="utf-8">
|
<meta charset="utf-8">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
<title>Zoidberg</title>
|
<title>Zoidberg</title>
|
||||||
<link rel="icon" href="data:,">
|
<link rel="icon" href="data:image/svg+xml,%3Csvg%20xmlns='http://www.w3.org/2000/svg'%20viewBox='0%200%2016%2016'%3E%3Ctext%20x='0'%20y='14'%3E🦀%3C/text%3E%3C/svg%3E" type="image/svg+xml" />
|
||||||
|
|
||||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css">
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css">
|
||||||
{}
|
{}
|
||||||
</head>
|
</head>
|
||||||
|
|||||||
Reference in New Issue
Block a user