examples/long-running-process: Add termination

Add a LongRunningProcess.terminate() method, and expose it in the
example.

Getting this race free is a bit tricky: The simple way is to tell
systemd that `SIGTERM` is a `SuccessExitStatus`, but this cannot be
given to systemd-run with systemd versions up to at least 241. So
remember that the unit got terminated, and reset it after it stops and
went into "failed" state.

Closes #14373
This commit is contained in:
Martin Pitt 2020-07-15 21:12:49 +02:00 committed by Marius Vollmer
parent cf2eed5de7
commit a8ab1ed480
3 changed files with 56 additions and 9 deletions

View File

@ -31,14 +31,17 @@ function update(process) {
break;
case ProcessState.STOPPED:
run_button.removeAttribute("disabled");
run_button.innerHTML = "Start";
break;
case ProcessState.RUNNING:
run_button.setAttribute("disabled", "");
run_button.removeAttribute("disabled");
run_button.innerHTML = "Terminate";
// StateChangeTimestamp property is in µs since epoch, but journalctl expects seconds
showJournal(process.serviceName, "--since=@" + Math.floor(process.startTimestamp / 1000000));
break;
case ProcessState.FAILED:
run_button.setAttribute("disabled", "");
run_button.innerHTML = "Start";
// Show the whole journal of this boot
showJournal(process.serviceName, "--boot");
break;
@ -69,10 +72,13 @@ cockpit.transport.wait(() => {
* This runs as root, thus will be shared with all privileged Cockpit sessions.
*/
run_button.addEventListener("click", () => {
process.run(["/bin/sh", "-ec", command.value])
.catch(ex => {
state.innerHTML = "Error: " + ex.toString();
run_button.setAttribute("disabled", "");
});
if (process.state === ProcessState.RUNNING)
process.terminate();
else
process.run(["/bin/sh", "-ec", command.value])
.catch(ex => {
state.innerHTML = "Error: " + ex.toString();
run_button.setAttribute("disabled", "");
});
});
});

View File

@ -49,12 +49,12 @@ export class LongRunningProcess {
* argument is `this` LongRunningProcess instance.
*/
constructor(serviceName, updateCallback) {
// don't require superuser; this is only for reading the current state
this.systemdClient = cockpit.dbus("org.freedesktop.systemd1");
this.systemdClient = cockpit.dbus("org.freedesktop.systemd1", { superuser: "require" });
this.serviceName = serviceName;
this.updateCallback = updateCallback;
this._setState(ProcessState.INIT);
this.startTimestamp = null; // µs since epoch
this.terminated = false;
// Watch for start event of the service
this.systemdClient.subscribe({ interface: I_SD_MGR, member: "JobNew" }, (path, iface, signal, args) => {
@ -79,6 +79,18 @@ export class LongRunningProcess {
{ superuser: "require", err: "message", ...options });
}
/* Stop long-running process while it is RUNNING, or reset a FAILED one */
terminate() {
if (this.state !== ProcessState.RUNNING && this.state !== ProcessState.FAILED)
throw new Error(`cannot terminate LongRunningProcess in state ${ this.sate }`);
/* This sends a SIGTERM to the unit, causing it to go into "failed" state. This would not
* happen with `systemd-run -p SuccessExitStatus=0`, but that does not yet work on older
* OSes with systemd 241 So let checkState() know that a failure is due to termination. */
this.terminated = true;
return this.systemdClient.call(O_SD_OBJ, I_SD_MGR, "StopUnit", [this.serviceName, "replace"], { type: "ss" });
}
/*
* below are internal private methods
*/
@ -88,6 +100,7 @@ export class LongRunningProcess {
if (state === this.state)
return;
this.state = state;
this.terminated = false;
if (this.updateCallback)
this.updateCallback(this);
}
@ -100,7 +113,12 @@ export class LongRunningProcess {
break;
case 'failed':
this.startTimestamp = null; // TODO: can we derive this from InvocationID?
this._setState(ProcessState.FAILED);
if (this.terminated) {
/* terminating causes failure; reset that and do not announce it as failed */
this.systemdClient.call(O_SD_OBJ, I_SD_MGR, "ResetFailedUnit", [this.serviceName], { type: "s" })
} else {
this._setState(ProcessState.FAILED);
}
break;
case 'inactive':
this._setState(ProcessState.STOPPED);

View File

@ -93,6 +93,7 @@ class TestLongRunning(MachineCase):
# run a command that the test can control synchronously
ack_file = self.vm_tmpdir + "/ack_a";
b.set_val("#command", "date; echo STEP_A; until [ -e %s ]; do sleep 1; done; echo STEP_B; sleep 1; echo DONE" % ack_file)
b.wait_text("button#run", "Start")
b.click("button#run")
b.wait_text("#state", "cockpit-longrunning.service running")
@ -104,6 +105,7 @@ class TestLongRunning(MachineCase):
b.logout()
b.login_and_go("/long-running-process")
b.wait_text("#state", "cockpit-longrunning.service running")
b.wait_text("button#run", "Terminate")
b.wait_in_text("#output", "\nSTEP_A\n")
self.assertEqual(m.execute("systemctl is-active cockpit-longrunning.service || true").strip(), "activating")
@ -120,6 +122,7 @@ class TestLongRunning(MachineCase):
b.login_and_go("/long-running-process")
b.wait_text("#state", "cockpit-longrunning.service stopped")
b.wait_text("#output", "")
b.wait_text("button#run", "Start")
# failing process
m.execute("rm -f " + ack_file)
@ -134,6 +137,7 @@ class TestLongRunning(MachineCase):
self.assertNotIn("\nNOTME", out)
# does not contain previous logs
self.assertNotIn("STEP_B", out)
b.wait_text("button#run", "Start")
# failing state gets picked up on page reconnect
b.logout()
@ -144,6 +148,25 @@ class TestLongRunning(MachineCase):
self.assertIn("\nBREAK_A\n", out)
self.assertNotIn("\nNOTME", out)
b.wait_present("button#run:disabled")
b.wait_text("button#run", "Start")
# reset
m.execute("systemctl reset-failed cockpit-longrunning.service")
b.wait_text("#state", "cockpit-longrunning.service stopped")
# cancel long-running command
b.set_val("#command", "for i in $(seq 100); do echo LONG$i; sleep 1; done")
b.wait_text("button#run", "Start")
b.click("button#run")
b.wait_text("#state", "cockpit-longrunning.service running")
b.wait_text("button#run", "Terminate")
b.wait_in_text("#output", "\nLONG2\n")
b.click("button#run")
# terminates cleanly
b.wait_text("#state", "cockpit-longrunning.service stopped")
self.assertEqual(m.execute("systemctl is-active cockpit-longrunning.service || true").strip(), "inactive")
b.wait_in_text("#output", "\nLONG2\n")
self.assertNotIn("\nLONG30\n", b.text("#output"))
if __name__ == '__main__':