Compare commits

...

2 Commits

Author SHA1 Message Date
Johannes Heuel
6fd1cda2a2 change worker id to be a uuid
Some checks failed
continuous-integration/drone/push Build is failing
2022-09-27 14:00:09 +02:00
Johannes Heuel
60df9aca1a add worker heartbeat 2022-09-21 16:28:10 +02:00
7 changed files with 242 additions and 81 deletions

41
Cargo.lock generated
View File

@@ -166,7 +166,7 @@ dependencies = [
"serde_urlencoded",
"smallvec",
"socket2",
"time",
"time 0.3.14",
"url",
]
@@ -351,8 +351,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfd4d1b31faaa3a89d7934dbded3111da0d2ef28e3ebccdb4f0179f5929d1ef1"
dependencies = [
"iana-time-zone",
"js-sys",
"num-integer",
"num-traits",
"time 0.1.44",
"wasm-bindgen",
"winapi",
]
@@ -393,7 +396,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94d4706de1b0fa5b132270cddffa8585166037822e260a944fe161acd137ca05"
dependencies = [
"percent-encoding",
"time",
"time 0.3.14",
"version_check",
]
@@ -648,7 +651,7 @@ checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6"
dependencies = [
"cfg-if",
"libc",
"wasi",
"wasi 0.11.0+wasi-snapshot-preview1",
]
[[package]]
@@ -929,7 +932,7 @@ checksum = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf"
dependencies = [
"libc",
"log",
"wasi",
"wasi 0.11.0+wasi-snapshot-preview1",
"windows-sys",
]
@@ -1505,6 +1508,17 @@ dependencies = [
"syn",
]
[[package]]
name = "time"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
dependencies = [
"libc",
"wasi 0.10.0+wasi-snapshot-preview1",
"winapi",
]
[[package]]
name = "time"
version = "0.3.14"
@@ -1686,6 +1700,15 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "uuid"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd6469f4314d5f1ffec476e05f17cc9a78bc7a27a6a857842170bdf8d6f98d2f"
dependencies = [
"getrandom",
]
[[package]]
name = "vcpkg"
version = "0.2.15"
@@ -1708,6 +1731,12 @@ dependencies = [
"try-lock",
]
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
@@ -1879,6 +1908,7 @@ version = "0.1.0"
dependencies = [
"clap",
"env_logger",
"futures",
"log",
"reqwest",
"reqwest-middleware",
@@ -1901,11 +1931,14 @@ name = "zoidberg_server"
version = "0.1.0"
dependencies = [
"actix-web",
"chrono",
"clap",
"env_logger",
"futures",
"log",
"serde_json",
"tokio",
"uuid",
"zoidberg_lib",
]

View File

@@ -20,3 +20,4 @@ tokio = { version = "1", features = ["full"] }
clap = "3.2.22"
env_logger = "0.9"
log = "0.4"
futures = "0.3.24"

View File

@@ -1,13 +1,17 @@
use clap::{App, Arg};
use env_logger::Env;
use futures::future::{AbortHandle, Abortable};
use log;
use reqwest::{header, Client, ClientBuilder};
use std::error::Error;
use std::process::Command;
use std::process::Stdio;
use std::sync::Arc;
use std::time::Duration;
use std::{thread, time};
use tokio::{process::Command, time};
use zoidberg_lib::types::{FetchResponse, Job, RegisterResponse, Status, Update};
use zoidberg_lib::types::{
FetchRequest, FetchResponse, Heartbeat, Job, RegisterResponse, Status, Update,
};
const VERSION: &str = env!("CARGO_PKG_VERSION");
@@ -28,10 +32,11 @@ fn build_client(secret: &str) -> Client {
.expect("Could not create client")
}
#[derive(Debug)]
#[derive(Debug, Clone)]
struct Worker {
id: i32,
id: String,
secret: String,
server: String,
}
impl Worker {
@@ -45,23 +50,24 @@ impl Worker {
let r: RegisterResponse = serde_json::from_str(&body)?;
log::info!("registered worker with id: {}", &r.id);
Ok(Worker {
id: r.id,
id: r.id.to_string(),
secret: secret.to_string(),
server: server.to_string(),
})
}
async fn update(self: &Self, jobs: &[Job]) -> Result<(), Box<dyn Error>> {
async fn update(&self, jobs: &[Job]) -> Result<(), Box<dyn Error>> {
let updates: Vec<Update> = jobs
.iter()
.map(|job| Update {
worker: self.id,
worker: self.id.clone(),
job: job.id,
status: job.status.clone(),
})
.collect();
let body = build_client(&self.secret)
.post("http://localhost:8080/update")
.post(format!("{}/update", self.server))
.json(&updates)
.send()
.await?
@@ -72,9 +78,12 @@ impl Worker {
Ok(())
}
async fn fetch(self: &Self) -> Result<FetchResponse, Box<dyn Error>> {
async fn fetch(&self) -> Result<FetchResponse, Box<dyn Error>> {
let res = build_client(&self.secret)
.get("http://localhost:8080/fetch")
.post(format!("{}/fetch", self.server))
.json(&FetchRequest {
worker_id: self.id.clone(),
})
.send()
.await?;
let body = res.text().await?;
@@ -82,35 +91,35 @@ impl Worker {
Ok(resp)
}
async fn run(self: &Self, job: &Job) -> Result<(), Box<dyn Error>> {
let output = Command::new("bash").arg("-c").arg(&job.cmd).output()?;
log::info!(
"command: {}\nstdout: {}\nstderr: {}",
&job.cmd,
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
);
match output.status.success() {
true => Ok(()),
false => Err(Box::from("Job failed")),
}
async fn heartbeat(&self) {
let _ = build_client(&self.secret)
.post(format!("{}/heartbeat", self.server))
.json(&Heartbeat {
id: self.id.clone(),
})
.send()
.await;
}
}
async fn process(self: &Self, jobs: &[Job]) {
for job in jobs {
let status = match self.run(&job).await {
Ok(()) => Status::Completed,
Err(..) => Status::Failed,
};
let n = &[Job {
status,
..job.clone()
}];
if let Err(error) = self.update(n).await {
log::info!("Could not update job: {}", error);
}
}
async fn run(job: &Job) -> Result<(), Box<dyn Error>> {
let output = Command::new("bash")
.arg("-c")
.arg(&job.cmd)
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.output();
let output = output.await?;
log::info!(
"command: {}\nstdout: {}\nstderr: {}",
&job.cmd,
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
);
match output.status.success() {
true => Ok(()),
false => Err(Box::from("Job failed")),
}
}
@@ -130,27 +139,72 @@ async fn main() -> Result<(), Box<dyn Error>> {
.get_matches();
let server = matches.value_of("server").unwrap();
let secret = std::env::var("ZOIDBERG_SECRET").unwrap_or_else(|_| {
println!("Please set the $ZOIDBERG_SECRET environment variable");
eprintln!("Please set the $ZOIDBERG_SECRET environment variable");
std::process::exit(1);
});
let client = Worker::new(server, &secret)
.await
.expect("Could not create client");
let client = Arc::new(
Worker::new(server, &secret)
.await
.expect("Could not create client"),
);
let pause = time::Duration::from_secs(1);
let long_pause = time::Duration::from_secs(20);
let long_pause = time::Duration::from_secs(40);
let heartbeat_pause = time::Duration::from_secs(30);
let (heartbeat_handle, abort_registration) = AbortHandle::new_pair();
let c = Arc::clone(&client);
tokio::spawn(Abortable::new(
async move {
loop {
time::sleep(heartbeat_pause).await;
c.heartbeat().await;
}
},
abort_registration,
));
let mut fail_counter = 0;
loop {
if let Ok(fetch) = client.fetch().await {
let jobs = if let Ok(fetch) = client.fetch().await {
fail_counter = 0;
match fetch {
FetchResponse::Nop => thread::sleep(pause),
FetchResponse::StopWorking => break,
FetchResponse::Jobs(jobs) => client.process(&jobs).await,
FetchResponse::Nop => {
time::sleep(pause).await;
continue;
}
FetchResponse::Terminate(m) => {
println!("Terminate worker: {}", m);
break;
}
FetchResponse::Jobs(jobs) => jobs,
}
} else {
thread::sleep(long_pause);
fail_counter += 1;
if fail_counter == 3 {
log::error!("failed to fetch three times, assume that server crashed and exit");
std::process::exit(1);
}
log::error!("failed to fetch new jobs");
time::sleep(long_pause).await;
continue;
};
for job in jobs {
let status = match run(&job).await {
Ok(()) => Status::Completed,
Err(..) => Status::Failed,
};
let update = &[Job {
status,
..job.clone()
}];
if let Err(error) = client.update(update).await {
log::info!("Could not update job: {}", error);
}
}
}
heartbeat_handle.abort();
Ok(())
}

View File

@@ -4,7 +4,7 @@ use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize)]
pub struct Update {
pub worker: i32,
pub worker: String,
pub job: i32,
pub status: Status,
}
@@ -12,7 +12,7 @@ pub struct Update {
#[derive(Serialize, Deserialize, Clone, Debug)]
pub enum Status {
Submitted,
Running,
Running(String),
Completed,
Failed,
}
@@ -27,7 +27,7 @@ impl fmt::Display for Status {
fn fmt(self: &Self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Status::Submitted => write!(f, "submitted"),
Status::Running => write!(f, "running"),
Status::Running(w) => write!(f, "running on worker {}", w),
Status::Completed => write!(f, "completed"),
Status::Failed => write!(f, "failed"),
}
@@ -55,13 +55,18 @@ pub struct Node {
#[derive(Serialize, Deserialize)]
pub struct RegisterResponse {
pub id: i32,
pub id: String,
}
#[derive(Serialize, Deserialize)]
pub struct FetchRequest {
pub worker_id: String,
}
#[derive(Serialize, Deserialize)]
pub enum FetchResponse {
Jobs(Vec<Job>),
StopWorking,
Terminate(String),
Nop,
}
@@ -73,5 +78,13 @@ pub struct Submit {
#[derive(Serialize, Deserialize)]
pub struct Worker {
#[serde(default)]
pub id: i32,
pub id: String,
#[serde(default)]
pub last_heartbeat: Option<i64>,
}
#[derive(Serialize, Deserialize)]
pub struct Heartbeat {
#[serde(default)]
pub id: String,
}

View File

@@ -16,3 +16,6 @@ clap = "3.2"
env_logger = "0.9"
log = "0.4"
futures = "0.3.24"
tokio = { version = "1", features = ["full"] }
chrono = "0.4.22"
uuid = { version = "1.1.2", features = ["v4"] }

View File

@@ -3,19 +3,24 @@ use actix_web::{
dev, get, middleware::Logger, post, web, App, Error, FromRequest, HttpRequest, HttpResponse,
HttpServer, Responder, Result,
};
use chrono::Utc;
use clap;
use env_logger::Env;
use futures::future::{err, ok, Ready};
use log;
use std::sync::Mutex;
use zoidberg_lib::types::{FetchResponse, Job, RegisterResponse, StatusRequest, Update, Worker};
use std::time::Duration;
use uuid::Uuid;
use zoidberg_lib::types::{
FetchRequest, FetchResponse, Heartbeat, Job, RegisterResponse, Status, StatusRequest, Update,
Worker,
};
mod webpage;
const VERSION: &str = env!("CARGO_PKG_VERSION");
struct State {
counter_workers: Mutex<i32>,
counter_jobs: Mutex<i32>,
workers: Mutex<Vec<Worker>>,
new_jobs: Mutex<Vec<Job>>,
@@ -25,7 +30,6 @@ struct State {
impl State {
fn new() -> Self {
Self {
counter_workers: Mutex::new(0),
counter_jobs: Mutex::new(0),
workers: Mutex::new(Vec::new()),
new_jobs: Mutex::new(Vec::new()),
@@ -44,7 +48,6 @@ impl FromRequest for Authorization {
if let Some(head) = req.headers().get("cookie") {
if let Ok(cookie) = head.to_str() {
if let Some(secret) = req.app_data::<String>() {
println!("{} == {}", secret, cookie);
if secret == cookie {
return ok(Authorization {});
} else {
@@ -67,24 +70,40 @@ async fn index(data: web::Data<State>) -> impl Responder {
#[get("/register")]
async fn register(data: web::Data<State>, _: Authorization) -> Result<impl Responder> {
let mut counter_workers = data.counter_workers.lock().unwrap();
*counter_workers += 1;
let mut workers = data.workers.lock().unwrap();
let uuid = Uuid::new_v4().to_string();
workers.push(Worker {
id: *counter_workers,
id: uuid.clone(),
last_heartbeat: None,
});
log::info!("Registered worker node with id: {}", *counter_workers);
Ok(web::Json(RegisterResponse {
id: *counter_workers,
}))
log::info!("Registered worker node with id: {}", uuid);
Ok(web::Json(RegisterResponse { id: uuid }))
}
#[get("/fetch")]
async fn fetch(data: web::Data<State>, _: Authorization) -> Result<impl Responder> {
#[post("/fetch")]
async fn fetch(
data: web::Data<State>,
f: web::Json<FetchRequest>,
_: Authorization,
) -> Result<impl Responder> {
let requesting_worker = f.into_inner().worker_id;
{
let workers = data.workers.lock().unwrap();
if workers.iter().filter(|w| w.id == requesting_worker).count() != 1 {
return Ok(web::Json(FetchResponse::Terminate(
"Worker not found".into(),
)));
}
}
let mut new_jobs = data.new_jobs.lock().unwrap();
if let Some(j) = new_jobs.pop() {
let mut jobs = data.jobs.lock().unwrap();
for cj in jobs.iter_mut() {
if cj.id == j.id {
cj.status = Status::Running(requesting_worker.clone())
}
}
return Ok(web::Json(FetchResponse::Jobs(vec![j])));
}
Ok(web::Json(FetchResponse::Nop))
@@ -131,6 +150,22 @@ async fn update(
Ok(format!("Worker updated {} job(s)", n))
}
#[post("/heartbeat")]
async fn heartbeat(
heartbeat: web::Json<Heartbeat>,
data: web::Data<State>,
_: Authorization,
) -> Result<String> {
log::info!("Heartbeat from worker {}", heartbeat.id);
let mut workers = data.workers.lock().unwrap();
for w in workers.iter_mut() {
if w.id == heartbeat.id {
w.last_heartbeat = Some(Utc::now().timestamp());
}
}
Ok(format!("Heartbeat from worker {}", heartbeat.id))
}
#[post("/submit")]
async fn submit(
data: web::Data<State>,
@@ -163,7 +198,7 @@ async fn main() -> std::io::Result<()> {
env_logger::Builder::from_env(Env::default().default_filter_or("zoidberg_server=info")).init();
let secret = std::env::var("ZOIDBERG_SECRET").unwrap_or_else(|_| {
println!("Please set the $ZOIDBERG_SECRET environment variable");
eprintln!("Please set the $ZOIDBERG_SECRET environment variable");
std::process::exit(1);
});
@@ -174,6 +209,18 @@ async fn main() -> std::io::Result<()> {
let state = web::Data::new(State::new());
let s = state.clone();
tokio::spawn(async move {
loop {
tokio::time::sleep(Duration::from_secs(10)).await;
let mut workers = s.workers.lock().unwrap();
workers.retain(|w| match w.last_heartbeat {
None => true,
Some(t) => Utc::now().timestamp() - t < 60,
})
}
});
HttpServer::new(move || {
App::new()
.wrap(Logger::default())
@@ -184,6 +231,7 @@ async fn main() -> std::io::Result<()> {
.service(fetch)
.service(status)
.service(update)
.service(heartbeat)
.service(submit)
})
.bind(("127.0.0.1", 8080))?
@@ -256,8 +304,8 @@ mod tests {
FetchResponse::Nop => {
panic!("did not expect FetchResponse::Nop")
}
FetchResponse::StopWorking => {
panic!("did not expect FetchResponse::NotWorking")
FetchResponse::Terminate(w) => {
panic!("did not expect FetchResponse::Terminate from worker {}", w)
}
FetchResponse::Jobs(new_jobs) => {
assert_eq!(new_jobs[0].id, 0);
@@ -280,7 +328,7 @@ mod tests {
jobs: Mutex::new(vec![Job {
id: 1,
cmd: cmd.clone(),
status: Status::Running,
status: Status::Running(0),
}]),
}))
.service(status),
@@ -309,7 +357,7 @@ mod tests {
.set_json(vec![Update {
worker: 0,
job: 0,
status: Status::Running,
status: Status::Running(0),
}])
.uri("/update")
.to_request();
@@ -331,7 +379,7 @@ mod tests {
.set_json(vec![Job {
id: 0,
cmd: String::from("hi"),
status: Status::Running,
status: Status::Running(0),
}])
.uri("/submit")
.to_request();

View File

@@ -1,3 +1,4 @@
use chrono::Utc;
use zoidberg_lib::types::{Job, Worker};
// TODO: write nicer frontend
@@ -17,10 +18,17 @@ pub fn render(jobs: &[Job], workers: &[Worker]) -> String {
+ "</tbody></table>";
let workers_html: String = String::from("<table class=\"table is-hoverable\">")
+ "<thead><tr><th><td>ID</td></th></tr></thead><tbody>"
+ "<thead><tr><th><td>ID</td><td>last heartbeat</td></th></tr></thead><tbody>"
+ &workers
.iter()
.map(|w| format!("<tr><th></th><td>{}</td></tr>", w.id))
.map(|w| {
let ts = if let Some(ts) = w.last_heartbeat {
format!("{}", Utc::now().timestamp() - ts)
} else {
String::from("")
};
format!("<tr><th></th><td>{}</td><td>{}</td></tr>", w.id, ts)
})
.collect::<Vec<String>>()
.join("\n")
+ "</tbody></table>";
@@ -43,7 +51,8 @@ pub fn render(jobs: &[Job], workers: &[Worker]) -> String {
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Hello Bulma!</title>
<title>Zoidberg</title>
<link rel="icon" href="data:,">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css">
{}
</head>