Reputation: 3855
I am targeting to implement a tiny Python written script to handle Jupyter easier to me.
Therefore I wrote this script:
import signal
import socket
import subprocess
import sys
sp = None
port = 8888
def get_own_ip():
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
try:
s.connect(('1.1.1.1', 1))
IP = s.getsockname()[0]
except:
IP = '127.0.0.1'
finally:
s.close()
return IP
def signal_handler(sig, frame):
# terminates Jupyter by sending two SIGINTs to it
if sp is not None:
# send termination to jupyter
sp.send_signal(signal.SIGINT)
sp.send_signal(signal.SIGINT)
sys.exit(0)
if __name__ == "__main__":
own_ip = get_own_ip()
sp = subprocess.Popen(["jupyter-notebook"
, "--ip='%s'" % own_ip
, "--port=%i" % port
, "--no-browser"],
stdout=subprocess.PIPE,
stdin=subprocess.PIPE,
bufsize=1)
print(sp)
signal.signal(signal.SIGINT, signal_handler)
with sp.stdout:
print('read')
for line in sp.stdout.readline():
print('line: %s' % line)
print('wait')
sp.wait() # wait for the subprocess to exit
First I retrieve my IP address in order to use it as an argument to Jupyter. Then I run Jupyter and then I would like to filter some output from Jupyter (stdout
) while Jupyter is running. But it seems, that sp.stdout.readline()
blocks.
The code above produces the following output to the terminal:
/usr/bin/python3.6 /home/alex/.scripts/get_own_ip.py
<subprocess.Popen object at 0x7fa956374240>
read
[I 22:43:31.611 NotebookApp] Serving notebooks from local directory: /home/alex/.scripts
[I 22:43:31.611 NotebookApp] The Jupyter Notebook is running at:
[I 22:43:31.611 NotebookApp] http://192.168.18.32:8888/?token=c4b7784d784206fc357b8f484b8d659fed6a2b1733b46ae6
[I 22:43:31.611 NotebookApp] or http://127.0.0.1:8888/?token=c4b7784d784206fc357b8f484b8d659fed6a2b1733b46ae6
[I 22:43:31.611 NotebookApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation).
[C 22:43:31.614 NotebookApp]
To access the notebook, open this file in a browser:
file:///home/alex/.local/share/jupyter/runtime/nbserver-18280-open.html
Or copy and paste one of these URLs:
http://192.168.18.32:8888/?token=c4b7784d784206fc357b8f484b8d659fed6a2b1733b46ae6
or http://127.0.0.1:8888/?token=c4b7784d784206fc357b8f484b8d659fed6a2b1733b46ae6
You can see that output occurs, but will not be recognised by sp.stdout.readline()
.
How to correctly stream from sp.stdout
?
Following the hint of @Douglas Myers-Turnbull I changed my main function to:
if __name__ == "__main__":
own_ip = get_own_ip()
# store ip as byte stream
own_ip_bstr = own_ip.encode()
sp = subprocess.Popen(["jupyter-notebook"
, "--ip='%s'" % own_ip
, "--port=%i" % port
, "--no-browser"],
stderr=subprocess.PIPE,
stdin=subprocess.PIPE,
bufsize=1)
# set up handler to terminate jupyter
signal.signal(signal.SIGINT, signal_handler)
with open('jupyter.log', mode='wb') as flog:
for line in sp.stderr:
flog.write(line)
if own_ip_bstr in line.strip():
with open('jupyter.url', mode='w') as furl:
furl.write(line.decode().split('NotebookApp] ')[1])
break
for line in sp.stderr:
flog.write(line)
Upvotes: 1
Views: 458
Reputation: 373
I think those messages are being written to stderr rather than stdout.
So you need to use sp.stderr
instead.
That's common with the python logging
framework.
You can test that this is the case by running this in your shell (if you're on Linux):
jupyter notebook > stdout.log 2> stderr.log
You probably won't encounter this issue with output from just jupyter notebook, but I've previously run into an error where the output buffer filled up before my calling code could process it. You need to make sure your code processes lines from stdout (and/or stderr) at least as quickly as jupyter notebook writes lines. If that's not the case, you can process the lines by stuffing them into queues. Something like this:
def _reader(cls, pipe_type, pipe, queue):
"""Read in lines of text (utf-8) and add them into the queue."""
try:
with pipe:
for line in iter(pipe.readline, b""):
queue.put((pipe_type, line))
finally:
queue.put(None)
#
def stream_cmd(log_callback):
"""Stream lines of stdout and stderr into a queue, then call log_callback on them.
By putting the lines into a queue and processing with log_callback on another thread, it's ok if log_callback takes a bit longer than the output.
"""
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd, bufsize=1)
try:
q = Queue()
Thread(target=_reader, args=[1, p.stdout, q]).start()
Thread(target=_reader, args=[2, p.stderr, q]).start()
for _ in range(2):
for source, line in iter(q.get, None):
log_callback(source, line)
exit_code = p.wait(timeout=timeout_secs)
finally:
p.kill()
if exit_code != 0:
raise subprocess.CalledProcessError(
exit_code, " ".join(cmd), "<<unknown>>", "<<unknown>>"
)
I've successfully used similar code before, but there might be errors in that code.
Upvotes: 3