fail jobs if workers die
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
@@ -2,8 +2,17 @@
|
||||
|
||||
import sys
|
||||
import requests
|
||||
from os import environ
|
||||
|
||||
resp = requests.post("http://localhost:8080/status", json=[{"id": int(sys.argv[1])}])
|
||||
def print_and_exit(s):
|
||||
print(s)
|
||||
exit(0)
|
||||
|
||||
resp = requests.post(
|
||||
"http://localhost:8080/status",
|
||||
json=[{"id": int(sys.argv[1])}],
|
||||
headers={"cookie": environ["ZOIDBERG_SECRET"]},
|
||||
)
|
||||
|
||||
translation = {
|
||||
"Submitted": "running",
|
||||
@@ -11,4 +20,12 @@ translation = {
|
||||
"Failed": "failed",
|
||||
}
|
||||
|
||||
print(translation[resp.json()[0]["status"]])
|
||||
j = resp.json()
|
||||
|
||||
if len(j) == 0:
|
||||
print_and_exit("failed")
|
||||
|
||||
if "Running" in j[0]["status"]:
|
||||
print_and_exit("running")
|
||||
|
||||
print_and_exit(translation[resp.json()[0]["status"]])
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
import sys
|
||||
import requests
|
||||
from os import environ
|
||||
|
||||
jobscript = sys.argv[1]
|
||||
|
||||
@@ -10,6 +11,7 @@ resp = requests.post(
|
||||
json=[
|
||||
{"cmd": jobscript},
|
||||
],
|
||||
headers={"cookie": environ["ZOIDBERG_SECRET"]},
|
||||
)
|
||||
assert resp.ok, "http request failed"
|
||||
|
||||
|
||||
@@ -40,8 +40,8 @@ impl State {
|
||||
#[get("/")]
|
||||
async fn index(data: web::Data<State>) -> impl Responder {
|
||||
let workers = data.workers.lock().unwrap();
|
||||
let new_jobs = data.new_jobs.lock().unwrap();
|
||||
let page = webpage::render(&*new_jobs, &*workers);
|
||||
let jobs = data.jobs.lock().unwrap();
|
||||
let page = webpage::render(&*jobs, &*workers);
|
||||
HttpResponse::Ok().body(page)
|
||||
}
|
||||
|
||||
@@ -133,7 +133,7 @@ async fn heartbeat(
|
||||
data: web::Data<State>,
|
||||
_: Authorization,
|
||||
) -> Result<String> {
|
||||
log::info!("Heartbeat from worker {}", heartbeat.id);
|
||||
log::debug!("Heartbeat from worker {}", heartbeat.id);
|
||||
let mut workers = data.workers.lock().unwrap();
|
||||
for w in workers.iter_mut() {
|
||||
if w.id == heartbeat.id {
|
||||
@@ -190,11 +190,23 @@ async fn main() -> std::io::Result<()> {
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
tokio::time::sleep(Duration::from_secs(10)).await;
|
||||
let mut workers = s.workers.lock().unwrap();
|
||||
workers.retain(|w| match w.last_heartbeat {
|
||||
None => true,
|
||||
Some(t) => Utc::now().timestamp() - t < 60,
|
||||
})
|
||||
{
|
||||
let mut workers = s.workers.lock().unwrap();
|
||||
workers.retain(|w| match w.last_heartbeat {
|
||||
None => true,
|
||||
Some(t) => Utc::now().timestamp() - t < 60,
|
||||
})
|
||||
}
|
||||
let workers = s.workers.lock().unwrap();
|
||||
let mut jobs = s.jobs.lock().unwrap();
|
||||
for job in jobs.iter_mut() {
|
||||
if let Status::Running(w) = &job.status {
|
||||
let exists = workers.iter().filter(|x| &x.id == w).count() > 0;
|
||||
if !exists {
|
||||
job.status = Status::Failed;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
@@ -211,7 +223,7 @@ async fn main() -> std::io::Result<()> {
|
||||
.service(heartbeat)
|
||||
.service(submit)
|
||||
})
|
||||
.bind(("127.0.0.1", 8080))?
|
||||
.bind(("0.0.0.0", 8080))?
|
||||
.run()
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -52,7 +52,8 @@ pub fn render(jobs: &[Job], workers: &[Worker]) -> String {
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>Zoidberg</title>
|
||||
<link rel="icon" href="data:,">
|
||||
<link rel="icon" href="data:image/svg+xml,%3Csvg%20xmlns='http://www.w3.org/2000/svg'%20viewBox='0%200%2016%2016'%3E%3Ctext%20x='0'%20y='14'%3E🦀%3C/text%3E%3C/svg%3E" type="image/svg+xml" />
|
||||
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css">
|
||||
{}
|
||||
</head>
|
||||
|
||||
Reference in New Issue
Block a user