Services provided:
lookup
publish
Called by parent program:
vc-go.setl (Section A.18 [vc-go.setl])
Calls child programs:
vc-camera.setl (Section A.4 [vc-camera.setl])
vc-do.setl (Section A.11 [vc-do.setl])
vc-event.setl (Section A.12 [vc-event.setl])
vc-evjump.setl (Section A.13 [vc-evjump.setl])
vc-evzoom.setl (Section A.14 [vc-evzoom.setl])
vc-giver.setl (Section A.17 [vc-giver.setl])
vc-httpd.setl (Section A.19 [vc-httpd.setl])
vc-image.setl (Section A.20 [vc-image.setl])
vc-jumper.setl (Section A.25 [vc-jumper.setl])
vc-mouse.setl (Section A.28 [vc-mouse.setl])
vc-mover.setl (Section A.29 [vc-mover.setl])
vc-push.setl (Section A.34 [vc-push.setl])
vc-snap.setl (Section A.41 [vc-snap.setl])
vc-zoomer.setl (Section A.43 [vc-zoomer.setl])
Textually #includes:
vc-admin.setl (Section A.1 [vc-admin.setl])
vc-allowed.setl (Section A.2 [vc-allowed.setl])
vc-getname.setl (Section A.16 [vc-getname.setl])
Source code: *
const yhwh = `vc-toplev.setl';
-- This is the primordial program for the Box known as WEBeye.
-- It starts all the servers in the Box, catches their log output,
-- and, if necessary, shuts them down.
--
-- This version tries to bring server management up to a high
-- standard. It takes advantage of the consistent use of
-- obtain_service and provide_service, and of the idiom by which
-- parents start pump and pipe co-processes, to figure out,
-- based on the SETL source texts, which servers (together with
-- their substituent process trees) transitively depend on which
-- services, and thereby what order to start the servers in.
--
-- Note that a server can provide multiple services, a client can
-- obtain multiple services, and a child program can be instantiated
-- multiply. Thank goodness for SETL maps.
--
-- The file named in my_lock serves as a lock to make sure we have
-- only one instance of the Box running at one time on the local host.
--
-- The file named in vc_link is for the use of a Web server, and
-- points to (1) a static document saying we are in the process of
-- coming up, (2) a pseudo-document created dynamically after we have
-- fully come up and know our port number, (3) a static document saying
-- we are shutting down, or (4) a static document saying we are down.
--
-- So the external party, probably a CGI script, should simply try
-- to read the link file, and either use what it gets or report that
-- the Box has never been started. See for example `vc-master.cgi',
-- from which `vc.cgi' is instantiated.
const stub = `vc-toplev'; -- our ``base name'' for logging purposes
const my_lock = `vc-lock'; -- lock file (mutex)
const vc_link = `vc-link.html'; -- link to one of these 4:
const starting_name = `vc-starting.html';
const running_name = `vc-running.html';
const stopping_name = `vc-stopping.html';
const down_name = `vc-down.html';
const master_name = `vc-up.html'; -- template for running_name file
const pid_dir = `vc-pid'; -- directory for recording server process ids
var pub_fd := om; -- miscellaneous file descriptors
var lookup_fd := om; -- ...
var health_fd := om; -- ...
var waiter_fd := om; -- miscellaneous pseudo-fds
var sigterm_fd := om; -- ...
var wait_time;
var service_db := {}; -- service name[host,port,pid]
var fd_map := {}; -- fdserver name
var src_names;
var server_map, client_map;
var started := [ ]; -- which servers have been started
commence; -- acquire mutex or exit abnormally right away
spew (stub+` <starting>');
-- Point the link at the ``just in the process of coming up'' document:
redirect_link (starting_name);
setpgrp(); -- be a process group leader (see terminate)
-- An external record of our pid for the likes of vc-quit and vc-check:
putfile (pid_dir+`/'+stub, str pid);
sigterm_fd := open (`SIGTERM', `signal'); -- catch TERM signals
--- This global dependency analysis phase is slow enough that it
--- should probably be moved to ``configuration'' time as something
--- to be re-done if any source file changes:
-- Raw names of the program sources:
src_names := {src_name in split (filter (
``grep -l 'const yhwh' vc-*.setl'', ``''))
| src_name notin {`', yhwh}};
server_map := scan (`provide_service'); -- service nameserver name
client_map := scan (`obtain_service'); -- service nameclient name
-- The use of `exec' in front of our invocations of the SETL driver
-- is idiomatic--the shell (/bin/sh) is implicitly used to launch all
-- commands started by open, filter, and system, and sometimes
-- (depending on the shell implementation, but almost always if the
-- command has tricky things like I/O redirections in it, and always
-- if it consists of multiple process specifications) hangs around.
-- This interferes with our desire to send signals such as SIGTERM to
-- our SETL subprocesses, because the shell will not propagate these
-- without being told to. The easier solution is simply to have the
-- shell move out of the way as soon as it has parsed the command and
-- set up the I/O redirections, and is ready to launch the SETL
-- subprocess (see proc start in this very program for an example):
parent_child_map := {[parent, child] : src_name in src_names,
line in split (filter (prep_cmd (src_name)+`` | ''+
``egrep '(const|open|filter|system).*exec setl .*\\.setl''', ``''),
`\n') | #line > 0
doing
src_name(`\\.setl$') := `';
parent := src_name;
line(1..`setl ') := `';
line(`\\.setl'..) := `';
child := line;
};
-- Start core services and identify them with environment variables:
pub_fd := core_service (`publish', `VC_PUBLISH'); -- publication
lookup_fd := core_service (`lookup', `VC_LOOKUP'); -- information
-- Start a warning timer to report services that fail to come up:
waiter_fd := open (`60000', `real-ms');
wait_time := 0; -- minutes
-- Melt down server_map and client_map by removing entries from
-- them as services come up. Also record which servers have been
-- started, in order, so we can later shut them down in reverse order:
msg (`starting servers...');
while #server_map > 0 loop
-- Which servers in server_map can now be started? The
-- prerequisite is that among the constituent programs of a server
-- (the server name's transitive closure under parent_child_map),
-- there are none still in client_map, meaning no clients dependent
-- on services that are not yet up.
servers := {server in range server_map | server notin started and
forall pgm in transitive_closure (parent_child_map, server)
| pgm notin range client_map};
start (servers);
-- Do like the main loop until a new service publishes itself:
old_service_db := service_db;
while service_db = old_service_db loop
main_loop_step;
end loop;
service_names := domain (service_db - old_service_db);
assert #service_names = 1; -- presume 1 at a time from main_loop_step
service_name := arb service_names;
msg (`service "'+service_name+`" is up');
-- Revise the maps, preparatory to re-evaluating the dependencies:
server_map(service_name) := om;
client_map{service_name} := {};
end loop;
close (waiter_fd); -- finished with the egg timer
waiter_fd := om;
close (pub_fd); -- unless you'd like to allow further publication
pub_fd := om;
health_fd := core_service (`health', `VC_HEALTH'); -- sanity check
-- Instantiate the pseudo-document to be presented while we are running:
master := getfile master_name;
gsub (master, `LOOKUP', getenv `VC_LOOKUP'); -- lookup service locus
putfile (running_name, master); -- master as after instantiation
if getfile running_namemaster then
msg (`fatal - problem creating file "'+running_name+`"');
terminate (1);
end if;
msg (`created "'+running_name+`"');
-- Point the link at the ``now running'' pseudo-document. It's not a
-- ``real'' document, because all it actually does is give the location
-- of our lookup service for a CGI script to pick up.
redirect_link (running_name);
spew (stub+` <ready>');
loop -- until terminate is called
main_loop_step;
end loop;
-- Try to make link file point appropriately for our life-cycle phase
proc redirect_link (target);
unlink (vc_link);
clear_error;
link (target, vc_link);
if last_error = no_error then
msg (str vc_link+` now refers to '+str target);
else
msg (`problem pointing '+str vc_link+` at '+str target+` - '+
last_error);
end if;
end proc;
-- ``Pre-process'' some SETL source
proc prep_cmd (src_name);
-- First check for an early TERMinate request
[ready] := select ([{sigterm_fd}], 0);
if sigterm_fd in ready then
msg (yhwh + ` (' + str pid + `) caught SIGTERM');
terminate (0);
end if;
return ``setl -c ''+src_name+`` | '' +
``awk '/^%SOURCE/,/^%CODE/' | '' +
``sed -e 's/--.*$//''';
end proc;
-- Obtain a service nameprogram name (sans `.setl' suffix) map
proc scan (what);
return {[service_name, src_name] : src_name in src_names,
line in split (filter (prep_cmd (src_name) +
`` | grep ''+what+`` | grep -v proc'', ``''), `\n') | #line > 0
doing
service_name := unstr line(``'''..``''');
src_name(`.setl$') := `';
};
end proc;
-- Start a core service and make its location visible to child
-- processes through an environment variable
proc core_service (serv_name, envt_var);
var serv_fd, serv_host, serv_port, serv_pid, serv_loc; -- locals
serv_fd := open (`0', `server-socket'); -- listen on arbitrary port
serv_host := `localhost';
serv_port := port serv_fd;
serv_pid := pid;
-- Include also a service_db entry for this core service:
service_db(serv_name) := [serv_host, serv_port, serv_pid];
serv_loc := serv_host + `:' + str serv_port;
-- Make the core service visible:
setenv (envt_var, serv_loc);
-- This record can be used by parties external to the Box:
putfile (`vc-tcp/'+serv_name, serv_loc);
return serv_fd;
end proc;
proc start (servers);
for server in servers loop
-- Set up the command so that the shell will redirect the server's
-- stderr into its stdout stream. Then when we use `pipe-in'
-- mode to start the server, we'll be able to pick up all the
-- debugging and diagnostic output that it and its children spew
-- on stderr, even though any such child may have stdout
-- redirected for communication with its parent:
cmd := `exec setl '+server+`.setl 2>&1';
fd := open (cmd, `pipe-in');
if fdom then
fd_map(fd) := server;
spew (server+` <started>');
putfile (pid_dir+`/'+server, str pid(fd)); -- record process id
started(1..0) := [server]; -- insert server at front of list
else
msg (`fatal - cannot open pump "'+cmd+`"');
terminate (1);
stop 1; -- in case terminate mistakenly returns
end if;
end loop;
end proc;
proc main_loop_step;
[ready] := select ([{pub_fd, lookup_fd, health_fd,
sigterm_fd, waiter_fd} + domain fd_map]);
if pub_fdom and pub_fd in ready then
fd := accept (pub_fd);
if fdom then
if allowed (fd) then
reada (fd, service_name, service_info);
service_db(service_name) := service_info;
else
msg (getname fd+` is trying to provide a service???');
end if;
close (fd);
end if;
end if;
if lookup_fd in ready then
fd := accept (lookup_fd);
if fdom then
if allowed (fd) then
-- Local clients are expected to make at most a few rapid
-- lookup requests and then immediately close the connection.
loop doing
reada (fd, service_name);
while not eof do
printa (fd, service_db(service_name) ? [ ]);
end loop;
else
msg (`refusing lookup service to '+getname fd);
end if;
close (fd);
end if;
end if;
if health_fd in ready then
fd := accept (health_fd);
if fdom then
if allowed (fd) then
-- Placeholder for any global checks we want this program to do
printa (fd, `ok'); -- faith in self-health
else
msg (`refusing health-check service to '+getname fd);
end if;
close (fd);
end if;
end if;
if waiter_fdom and waiter_fd in ready then
reada (waiter_fd);
wait_time +:= 1;
msg (`services '+str domain server_map+` not started after '+
str wait_time+` minute(s) - still waiting');
end if;
if sigterm_fd in ready then
msg (yhwh + `(' + str pid + `) caught SIGTERM');
terminate (0);
end if;
for fd in ready | (server := fd_map(fd))om loop
if (s := getline fd)om then
spew (server+` : '+s);
else
msg (server+` exited! - shutting down Box...');
terminate (1);
-- The following code is not executed (terminate does not
-- return), but this is what should happen if there comes to be
-- some valid reason for individual servers to terminate:
close (fd);
spew (server+` <done>');
fd_map(fd) := om;
end if;
end loop;
end proc main_loop_step;
proc terminate (rc);
spew (stub+` <stopping>');
-- Point at the ``just in the process of shutting down'' document:
redirect_link (stopping_name);
-- Get rid of the dynamically created pseudo-document. If other
-- processes happen to be reading it, it won't actually disappear
-- until they all close it:
system (`rm -f '+running_name);
msg (`removed "'+running_name+`"');
if health_fdom then
close (health_fd);
health_fd := om;
end if;
if lookup_fdom then
close (lookup_fd);
lookup_fd := om;
end if;
if pub_fdom then
close (pub_fd);
pub_fd := om;
end if;
inv_fd_map := {[server, fd] : server = fd_map(fd)};
-- Try the polite signal first, to give servers a chance to clean up:
for server in started loop
fd := inv_fd_map(server);
msg (`sending TERM signal to '+server+` (pid '+str pid (fd)+`)');
kill (pid (fd));
end loop;
-- Wait for all the servers to go down. Assume progress is being
-- made as long as no more than 1.618 seconds of silence goes by:
while #fd_map > 0 loop
[ready] := select ([domain fd_map], 1618);
if #ready = 0 then
-- Timeout. Resort to the impolite signal for remaining servers:
msg (str range fd_map+` did not exit - killing...');
for server = fd_map(fd) loop
kill (pid(fd), `KILL');
close (fd);
spew (server+` <killed>');
fd_map(fd) := om;
end loop;
else
-- Response from server. It might be telling us something we
-- should log before it goes down, or EOF to say it has exited:
for fd in ready loop
server := fd_map(fd);
if (s := getline fd)om then
spew (server+` : '+s);
else
close (fd);
spew (server+` <done>');
fd_map(fd) := om;
end if;
end loop;
end if;
end loop;
-- Lest some servers abandoned their children, make sure all the
-- processes in the Box receive a TERM signal and then a KILL.
-- This is predicated on the assumption that all the processes in
-- the Box are in our process group. To avoid killing ourself,
-- we do the signalling from a special child that puts itself in
-- its own process group:
if fork() = 0 then
-- Special child process
box_pgrp := getpgrp();
setpgrp(); -- escape the Box's process group
kill (-box_pgrp); -- send TERM to all processes in the Box's group
select (om, 618); -- wait 0.618 sec (should be plenty)
kill (-box_pgrp, `KILL'); -- kill them if nothing else did it
-- Point at the ``now down'' document:
redirect_link (down_name);
spew (stub+` <done>');
finis (rc);
end if;
-- It is not an error for the parent to reach here, nor is it an
-- error for it not to. It just depends on whether the special
-- child above gets to us first or not--a race where we don't
-- care who wins...
--- I'm not really quite comfortable with that. It might be better
--- to spawn the child which starts its own process group right near
--- the beginning, and have it start the tree. Then the parent should
--- wait for that child to exit, and finally do the group-signalling
--- and THEN the lock release.
stop rc;
end proc terminate;
proc transitive_closure (f, x);
-- adapted from SDDS 1986, page 334
to_process := seen_already := {x};
return {y : doing
y from to_process;
to_process +:= f{y} - seen_already;
seen_already +:= f{y};
until #to_process = 0};
end proc;
#include ``vc-getname.setl''
#include ``vc-allowed.setl''
#include ``vc-admin.setl''