diff options
author | Nathan Whitaker <17734409+nathanwhit@users.noreply.github.com> | 2024-03-29 23:23:48 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-29 23:23:48 -0700 |
commit | 99720d0713ecc17245e9b08652aadcb160d92cb8 (patch) | |
tree | 88682deabbc84b43c50ff058a6419f199e816cf2 /tests/integration/jupyter_tests.rs | |
parent | 524e451bfbd36d3f5a00a16a7115b31b56805f10 (diff) |
chore: Make jupyter integration tests less flaky and avoid hang (#23134)
There's a TOCTOU issue that can happen when selecting unused ports for
the server to use (we get assigned an unused port by the OS, and between
then and when the server actually binds to the port another test steals
it). Improve this by checking if the server existed soon after setup,
and if so we retry starting it. Client connection can also fail
spuriously (in local testing) so added a retry mechanism.
This also fixes a hang, where if the server exited (almost always due to
the issue described above) before we connected to it, attempting to
connect our client ZMQ sockets to it would just hang. To resolve this, I
added a timeout so we can't wait forever.
Diffstat (limited to 'tests/integration/jupyter_tests.rs')
-rw-r--r-- | tests/integration/jupyter_tests.rs | 68 |
1 files changed, 53 insertions, 15 deletions
diff --git a/tests/integration/jupyter_tests.rs b/tests/integration/jupyter_tests.rs index 02695c595..af8101ea7 100644 --- a/tests/integration/jupyter_tests.rs +++ b/tests/integration/jupyter_tests.rs @@ -191,7 +191,24 @@ async fn connect_socket<S: zeromq::Socket>( ) -> S { let addr = spec.endpoint(port); let mut socket = S::new(); - socket.connect(&addr).await.unwrap(); + let mut connected = false; + for _ in 0..5 { + match timeout(Duration::from_secs(5), socket.connect(&addr)).await { + Ok(Ok(_)) => { + connected = true; + break; + } + Ok(Err(e)) => { + eprintln!("Failed to connect to {addr}: {e}"); + } + Err(e) => { + eprintln!("Timed out connecting to {addr}: {e}"); + } + } + } + if !connected { + panic!("Failed to connect to {addr}"); + } socket } @@ -369,27 +386,48 @@ impl Drop for JupyterServerProcess { } } -fn setup_server() -> (TestContext, ConnectionSpec, JupyterServerProcess) { +async fn setup_server() -> (TestContext, ConnectionSpec, JupyterServerProcess) { let context = TestContextBuilder::new().use_temp_cwd().build(); - let conn = ConnectionSpec::default(); + let mut conn = ConnectionSpec::default(); let conn_file = context.temp_dir().path().join("connection.json"); conn_file.write_json(&conn); - let process = context - .new_command() - .piped_output() - .args_vec(vec![ - "jupyter", - "--kernel", - "--conn", - conn_file.to_string().as_str(), - ]) - .spawn() - .unwrap(); + + let start_process = |conn_file: &test_util::PathRef| { + context + .new_command() + .args_vec(vec![ + "jupyter", + "--kernel", + "--conn", + conn_file.to_string().as_str(), + ]) + .spawn() + .unwrap() + }; + + // try to start the server, retrying up to 5 times + // (this can happen due to TOCTOU errors with selecting unused TCP ports) + let mut process = start_process(&conn_file); + tokio::time::sleep(Duration::from_millis(1000)).await; + + for _ in 0..5 { + if process.try_wait().unwrap().is_none() { + break; + } else { + conn = ConnectionSpec::default(); + conn_file.write_json(&conn); + process = start_process(&conn_file); + tokio::time::sleep(Duration::from_millis(1000)).await; + } + } + if process.try_wait().unwrap().is_some() { + panic!("Failed to start Jupyter server"); + } (context, conn, JupyterServerProcess(Some(process))) } async fn setup() -> (TestContext, JupyterClient, JupyterServerProcess) { - let (context, conn, process) = setup_server(); + let (context, conn, process) = setup_server().await; let client = JupyterClient::new(&conn).await; client.io_subscribe("").await.unwrap(); |