Skip to content

Commit 40fbb97

Browse files
authored
Read-only mode (#138)
Add a new read-only mode
1 parent 112734d commit 40fbb97

30 files changed

+570
-430
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
0.5.1 (2023-02-??)
2+
------------------
3+
4+
* Feature: A new `--readonly-mode` is available for the webapp. This allows users to have an instance of Notebooker which only displays the results of externally-run or scheduler-run reports. See [the docs](https://notebooker.readthedocs.io/en/latest/webapp/webapp.html#read-only-mode) for more details.
5+
* Bugfix: Scheduler-executed reports will now correctly record stdout.
6+
17
0.5.0 (2023-01-19)
28
------------------
39

298 KB
Loading

docs/webapp/webapp.rst

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,4 +153,23 @@ The webapp itself is configured via the command line notebooker-cli:
153153

154154
.. click:: notebooker._entrypoints:base_notebooker
155155
:prog: notebooker-cli
156-
:nested: full
156+
:nested: full
157+
158+
159+
Read-only mode
160+
--------------
161+
There exists a read-only mode (add :code:`--readonly-mode` to command line arguments) in the
162+
Notebooker webapp which will disable the ability to run new,
163+
rerun, or delete existing reports. This mode is useful in situations where you would like Notebooker
164+
reports to be executed by a trusted process (e.g. the internal scheduler, or an external job scheduling engine)
165+
but you don't want users to be able to directly execute Notebooks. This is suited well to production
166+
environments or where the reports can reveal sensitive data if misconfigured.
167+
168+
.. image:: /images/read_only_result_page.png
169+
:width: 600
170+
:alt: A Notebooker report in a read-only instance of the Notebooker webapp.
171+
172+
.. note::
173+
Please note that read-only mode does not change the functionality of the scheduler; users will still be able to
174+
modify schedules and it will execute as intended. To disable the scheduler you can add :code:`--disable-scheduler`
175+
to the command line arguments of the webapp; likewise git pulls can be prevented by using :code:`--disable-git`.

notebooker/_entrypoints.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def filesystem_default_value(dirname):
5959
)
6060
@click.option(
6161
"--notebooker-disable-git",
62+
"--disable-git",
6263
default=False,
6364
is_flag=True,
6465
help="If selected, notebooker will not try to pull the latest version of python templates from git.",
@@ -138,6 +139,13 @@ def base_notebooker(
138139
help="The name of the mongo collection within the scheduler-mongo-database which is used for "
139140
"the scheduling back-end. Defaults to the same as the serializer's mongo collection + '_scheduler'.",
140141
)
142+
@click.option(
143+
"--readonly-mode",
144+
default=False,
145+
is_flag=True,
146+
help="This mode disables the ability to execute notebooks via REST or the webapp front-end. "
147+
"Useful if you only want to display results which were e.g. executed by an external application.",
148+
)
141149
@pass_config
142150
def start_webapp(
143151
config: BaseConfig,
@@ -148,6 +156,7 @@ def start_webapp(
148156
disable_scheduler,
149157
scheduler_mongo_database,
150158
scheduler_mongo_collection,
159+
readonly_mode,
151160
):
152161
web_config = WebappConfig.copy_existing(config)
153162
web_config.PORT = port
@@ -157,6 +166,7 @@ def start_webapp(
157166
web_config.DISABLE_SCHEDULER = disable_scheduler
158167
web_config.SCHEDULER_MONGO_DATABASE = scheduler_mongo_database
159168
web_config.SCHEDULER_MONGO_COLLECTION = scheduler_mongo_collection
169+
web_config.READONLY_MODE = readonly_mode
160170
return main(web_config)
161171

162172

notebooker/execute_notebook.py

Lines changed: 132 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
from __future__ import unicode_literals
2+
3+
import threading
4+
import time
5+
16
import copy
27
import datetime
38
import json
@@ -18,7 +23,7 @@
1823
NotebookResultError,
1924
python_template_dir,
2025
)
21-
from notebooker.serialization.serialization import get_serializer_from_cls
26+
from notebooker.serialization.serialization import get_serializer_from_cls, initialize_serializer_from_config
2227
from notebooker.settings import BaseConfig
2328
from notebooker.utils.conversion import _output_ipynb_name, generate_ipynb_from_py, ipython_to_html, ipython_to_pdf
2429
from notebooker.utils.filesystem import initialise_base_dirs
@@ -104,11 +109,7 @@ def _run_checks(
104109

105110
logger.info("Executing notebook at {} using parameters {} --> {}".format(ipynb_raw_path, overrides, output_ipynb))
106111
pm.execute_notebook(
107-
ipynb_raw_path,
108-
ipynb_executed_path,
109-
parameters=overrides,
110-
log_output=True,
111-
prepare_only=prepare_only,
112+
ipynb_raw_path, ipynb_executed_path, parameters=overrides, log_output=True, prepare_only=prepare_only
112113
)
113114
with open(ipynb_executed_path, "r") as f:
114115
raw_executed_ipynb = f.read()
@@ -167,11 +168,7 @@ def run_report(
167168
job_id = job_id or str(uuid.uuid4())
168169
stop_execution = os.getenv("NOTEBOOKER_APP_STOPPING")
169170
if stop_execution:
170-
logger.info(
171-
"Aborting attempt to run %s, jobid=%s as app is shutting down.",
172-
report_name,
173-
job_id,
174-
)
171+
logger.info("Aborting attempt to run %s, jobid=%s as app is shutting down.", report_name, job_id)
175172
result_serializer.update_check_status(job_id, JobStatus.CANCELLED, error_info=CANCEL_MESSAGE)
176173
return
177174
try:
@@ -182,10 +179,7 @@ def run_report(
182179
attempts_remaining,
183180
)
184181
result_serializer.update_check_status(
185-
job_id,
186-
report_name=report_name,
187-
job_start_time=job_submit_time,
188-
status=JobStatus.PENDING,
182+
job_id, report_name=report_name, job_start_time=job_submit_time, status=JobStatus.PENDING
189183
)
190184
result = _run_checks(
191185
job_id,
@@ -439,3 +433,126 @@ def docker_compose_entrypoint():
439433
logger.info("Received a request to run a report with the following parameters:")
440434
logger.info(args_to_execute)
441435
return subprocess.Popen(args_to_execute).wait()
436+
437+
438+
def _monitor_stderr(process, job_id, serializer_cls, serializer_args):
439+
stderr = []
440+
# Unsure whether flask app contexts are thread-safe; just reinitialise the serializer here.
441+
result_serializer = get_serializer_from_cls(serializer_cls, **serializer_args)
442+
while True:
443+
line = process.stderr.readline().decode("utf-8")
444+
if line != "":
445+
stderr.append(line)
446+
logger.info(line) # So that we have it in the log, not just in memory.
447+
result_serializer.update_stdout(job_id, new_lines=[line])
448+
elif process.poll() is not None:
449+
result_serializer.update_stdout(job_id, stderr, replace=True)
450+
break
451+
return "".join(stderr)
452+
453+
454+
def run_report_in_subprocess(
455+
base_config,
456+
report_name,
457+
report_title,
458+
mailto,
459+
overrides,
460+
*,
461+
hide_code=False,
462+
generate_pdf_output=False,
463+
prepare_only=False,
464+
scheduler_job_id=None,
465+
run_synchronously=False,
466+
mailfrom=None,
467+
n_retries=3,
468+
is_slideshow=False,
469+
) -> str:
470+
"""
471+
Execute the Notebooker report in a subprocess.
472+
Uses a subprocess to execute the report asynchronously, which is identical to the non-webapp entrypoint.
473+
:param base_config: `BaseConfig` A set of configuration options which specify serialisation parameters.
474+
:param report_name: `str` The report which we are executing
475+
:param report_title: `str` The user-specified title of the report
476+
:param mailto: `Optional[str]` Who the results will be emailed to
477+
:param overrides: `Optional[Dict[str, Any]]` The parameters to be passed into the report
478+
:param generate_pdf_output: `bool` Whether we're generating a PDF. Defaults to False.
479+
:param prepare_only: `bool` Whether to do everything except execute the notebook. Useful for testing.
480+
:param scheduler_job_id: `Optional[str]` if the job was triggered from the scheduler, this is the scheduler's job id
481+
:param run_synchronously: `bool` If True, then we will join the stderr monitoring thread until the job has completed
482+
:param mailfrom: `str` if passed, then this string will be used in the from field
483+
:param n_retries: The number of retries to attempt.
484+
:param is_slideshow: Whether the notebook is a reveal.js slideshow or not.
485+
:return: The unique job_id.
486+
"""
487+
job_id = str(uuid.uuid4())
488+
job_start_time = datetime.datetime.now()
489+
result_serializer = initialize_serializer_from_config(base_config)
490+
result_serializer.save_check_stub(
491+
job_id,
492+
report_name,
493+
report_title=report_title,
494+
job_start_time=job_start_time,
495+
status=JobStatus.SUBMITTED,
496+
overrides=overrides,
497+
mailto=mailto,
498+
generate_pdf_output=generate_pdf_output,
499+
hide_code=hide_code,
500+
scheduler_job_id=scheduler_job_id,
501+
is_slideshow=is_slideshow,
502+
)
503+
504+
command = (
505+
[
506+
os.path.join(sys.exec_prefix, "bin", "notebooker-cli"),
507+
"--output-base-dir",
508+
base_config.OUTPUT_DIR,
509+
"--template-base-dir",
510+
base_config.TEMPLATE_DIR,
511+
"--py-template-base-dir",
512+
base_config.PY_TEMPLATE_BASE_DIR,
513+
"--py-template-subdir",
514+
base_config.PY_TEMPLATE_SUBDIR,
515+
"--default-mailfrom",
516+
base_config.DEFAULT_MAILFROM,
517+
]
518+
+ (["--notebooker-disable-git"] if base_config.NOTEBOOKER_DISABLE_GIT else [])
519+
+ ["--serializer-cls", result_serializer.__class__.__name__]
520+
+ result_serializer.serializer_args_to_cmdline_args()
521+
+ [
522+
"execute-notebook",
523+
"--job-id",
524+
job_id,
525+
"--report-name",
526+
report_name,
527+
"--report-title",
528+
report_title,
529+
"--mailto",
530+
mailto,
531+
"--overrides-as-json",
532+
json.dumps(overrides),
533+
"--pdf-output" if generate_pdf_output else "--no-pdf-output",
534+
"--hide-code" if hide_code else "--show-code",
535+
"--n-retries",
536+
str(n_retries),
537+
]
538+
+ (["--prepare-notebook-only"] if prepare_only else [])
539+
+ (["--is-slideshow"] if is_slideshow else [])
540+
+ ([f"--scheduler-job-id={scheduler_job_id}"] if scheduler_job_id is not None else [])
541+
+ ([f"--mailfrom={mailfrom}"] if mailfrom is not None else [])
542+
)
543+
p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
544+
545+
stderr_thread = threading.Thread(
546+
target=_monitor_stderr, args=(p, job_id, base_config.SERIALIZER_CLS, base_config.SERIALIZER_CONFIG)
547+
)
548+
stderr_thread.daemon = True
549+
stderr_thread.start()
550+
if run_synchronously:
551+
p.wait()
552+
else:
553+
time.sleep(1)
554+
p.poll()
555+
if p.returncode:
556+
raise RuntimeError(f"The report execution failed with exit code {p.returncode}")
557+
558+
return job_id

notebooker/settings.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ class BaseConfig:
3939
def copy_existing(cls, existing: "BaseConfig"):
4040
return cls(**asdict(existing))
4141

42+
@classmethod
43+
def from_superset_kwargs(cls, kwargs: dict):
44+
""" When we have too many kwargs but we want to use a subset containing the fields. """
45+
return cls(**{k: v for (k, v) in kwargs.items() if k in cls.__dataclass_fields__})
46+
4247

4348
@dataclass
4449
class WebappConfig(BaseConfig):
@@ -53,3 +58,4 @@ class WebappConfig(BaseConfig):
5358
SCHEDULER_MONGO_DATABASE: str = ""
5459
SCHEDULER_MONGO_COLLECTION: str = ""
5560
DISABLE_SCHEDULER: bool = False
61+
READONLY_MODE: bool = False

notebooker/utils/filesystem.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,18 @@ def mkdir_p(path):
4141

4242

4343
def get_cache_dir():
44-
return current_app.config["CACHE_DIR"]
44+
with current_app.app_context():
45+
return current_app.config["CACHE_DIR"]
4546

4647

4748
def get_output_dir():
48-
return current_app.config["OUTPUT_DIR"]
49+
with current_app.app_context():
50+
return current_app.config["OUTPUT_DIR"]
4951

5052

5153
def get_template_dir():
52-
return current_app.config["TEMPLATE_DIR"]
54+
with current_app.app_context():
55+
return current_app.config["TEMPLATE_DIR"]
5356

5457

5558
def _cleanup_dirs(webapp_config):

notebooker/utils/results.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -110,24 +110,31 @@ def get_all_result_keys(
110110
return all_keys
111111

112112

113-
def get_all_available_results_json(serializer: MongoResultSerializer, limit: int, report_name: str = None) -> List[constants.NotebookResultBase]:
113+
def get_all_available_results_json(
114+
serializer: MongoResultSerializer, limit: int, report_name: str = None, readonly_mode: bool = False
115+
) -> List[constants.NotebookResultBase]:
114116
json_output = []
115117
mongo_filter = {"report_name": report_name} if report_name is not None else {}
116118
for result in serializer.get_all_results(mongo_filter=mongo_filter, limit=limit, load_payload=False):
117119
output = result.saveable_output()
118-
output["result_url"] = url_for(
119-
"serve_results_bp.task_results", job_id=output["job_id"], report_name=output["report_name"]
120-
)
121-
output["ipynb_url"] = url_for(
122-
"serve_results_bp.download_ipynb_result", job_id=output["job_id"], report_name=output["report_name"]
123-
)
124-
output["pdf_url"] = url_for(
125-
"serve_results_bp.download_pdf_result", job_id=output["job_id"], report_name=output["report_name"]
126-
)
127-
output["rerun_url"] = url_for(
128-
"run_report_bp.rerun_report", job_id=output["job_id"], report_name=output["report_name"]
129-
)
130-
120+
job_id = output["job_id"]
121+
report_name = output["report_name"]
122+
urls = {"ipynb_url": "", "pdf_url": "", "result_url": "", "rerun_url": "", "clone_url": "", "delete_url": ""}
123+
if job_id:
124+
new_urls = {
125+
"result_url": url_for("serve_results_bp.task_results", report_name=report_name, job_id=job_id),
126+
"ipynb_url": url_for("serve_results_bp.download_ipynb_result", report_name=report_name, job_id=job_id),
127+
"pdf_url": url_for("serve_results_bp.download_pdf_result", report_name=report_name, job_id=job_id),
128+
}
129+
urls.update(new_urls)
130+
if not readonly_mode:
131+
urls.update(
132+
{
133+
"rerun_url": url_for("run_report_bp.rerun_report", report_name=report_name, job_id=job_id),
134+
"delete_url": url_for("run_report_bp.delete_report", report_name=report_name, job_id=job_id),
135+
}
136+
)
137+
output.update(urls)
131138
json_output.append(output)
132139
return json_output
133140

notebooker/web/app.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,10 @@
2121
from notebooker.web.routes.core import core_bp
2222
from notebooker.web.routes.index import index_bp
2323
from notebooker.web.routes.pending_results import pending_results_bp
24-
from notebooker.web.routes.run_report import run_report_bp
24+
from notebooker.web.routes.report_execution import run_report_bp
2525
from notebooker.web.routes.scheduling import scheduling_bp
2626
from notebooker.web.routes.serve_results import serve_results_bp
27+
from notebooker.web.routes.templates import templates_bp
2728

2829
logger = logging.getLogger(__name__)
2930
all_report_refresher: Optional[threading.Thread] = None
@@ -71,8 +72,10 @@ def create_app(webapp_config=None):
7172

7273
flask_app.url_map.converters["date"] = DateConverter
7374
flask_app.register_blueprint(index_bp)
74-
flask_app.register_blueprint(run_report_bp)
75+
if webapp_config and not webapp_config.READONLY_MODE:
76+
flask_app.register_blueprint(run_report_bp)
7577
flask_app.register_blueprint(core_bp)
78+
flask_app.register_blueprint(templates_bp)
7679
flask_app.register_blueprint(serve_results_bp)
7780
flask_app.register_blueprint(pending_results_bp)
7881
if webapp_config and not webapp_config.DISABLE_SCHEDULER:

0 commit comments

Comments
 (0)