LLMのPythonスクリプト出力を比較的安全に実行したい、そういう時って割とありますよね。使うのは自分だけなので怖いのは偶然危険なコードが出てきた時だけ、みたいなケースです。
でも世間に出回ってる情報はDockerを使ったもの (llm-sandbox) とか、WebAssemblyを使ったもの (Pyodide) とか、CPython以外の実装を使ったもの (PyPyのサンドボックス機能) とか、Webサービス向けの重量級のものばかり。軽かったであろう pysandbox はデザインに欠陥があり、制限も多く開発中止となっていました。
そんな中でさてどうしよう、サンドボックスのPython環境でもホストのPython環境と同じものが使えれば管理が楽なのに、と思いながら調べていました。
その結果、Pythonライブラリの中では決定打が見つからなかったものの、Linux環境の中では systemd-nspawn を読み込み専用マウントで使う方法を思いつき(但しルート権限が必要)、その systemd-nspawn に似ながらもルート権限が不要なbubblewrap (bwrap) を見つけ、その bwrap を使ってサンドボックスで Python スクリプトを実行するコードを書いてみました。
# License: Public Domain
import os
import subprocess
home_dir = os.path.expanduser("~")
try:
os.makedirs("/tmp/mysandbox_root")
except FileExistsError:
pass
def sandbox_pyexec(code):
proc = subprocess.Popen(
["bwrap", "--bind", "/tmp/mysandbox_root", "/", "--ro-bind", "/bin", "/bin", # システムファイルをRead-Onlyで共有する
"--ro-bind", "/lib", "/lib", "--ro-bind", "/lib64", "/lib64",
"--ro-bind", "/usr", "/usr", "--ro-bind", "/etc", "/etc", # あんまり良くない
"--ro-bind", home_dir + "/.local/lib", home_dir + "/.local/lib", # pip ライブラリをRead-Onlyで共有する
"--ro-bind", home_dir + "/.local/bin", home_dir + "/.local/bin",
"--ro-bind", home_dir + "/.local/share", home_dir + "/.local/share",
"--bind", "/proc", "/proc", "--unshare-all", "--", # なるべくホストと非共有(systemd-nspawnと違ってプロセスリストは共有してるけど)
"bash", "-c", "python -"], # Pythonで標準入力のコードを実行
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True # stdin / stdout を文字列として扱う
)
result, err = proc.communicate(code)
return (result.strip(), err.strip())
r, err = sandbox_pyexec( """print("これはテストです")""")
print("stdout:", r)
print("stderr:", err)
# stdout: これはテストです
# stderr:
r, err = sandbox_pyexec( """import os
os.system("ls /")""")
print("stdout:", r)
print("stderr:", err)
# stdout: bin
# etc
# home
# lib
# lib64
# proc
# usr
# stderr:
r, err = sandbox_pyexec( """wrong python syntax here""")
print("stdout:", r)
print("stderr:", err)
# stdout:
# stderr: File "<stdin>", line 1
# wrong python syntax here
# ^^^^^^
# SyntaxError: invalid syntax
Jupyter互換版
Jupyter互換版も作りました(2026年1月11日追記)。
こっちの方がLLMとの相性は良いはずです(少なくともgpt-oss-120bはこっちの方が良いです)。
# License: Public Domain
import os
import pty
import termios
import select
import sys
import signal
import psutil
home_dir = os.path.expanduser("~")
try:
os.makedirs("/tmp/mysandbox_root")
except FileExistsError:
pass
def disable_echo(fd):
attrs = termios.tcgetattr(fd)
attrs[3] = attrs[3] & ~termios.ECHO # lflags から ECHO を落とす
termios.tcsetattr(fd, termios.TCSANOW, attrs)
# IPythonの動作をシミュレート
# python -m pip install ipython --break-system-packages
helper_code = """from IPython.core.ultratb import VerboseTB
import sys
sys.excepthook = VerboseTB(theme_name="Linux", tb_offset=2)
import linecache
file_count = 1
import ast
def _exec_with_expr(code, globals=None, locals=None):
global file_count
tree = ast.parse(code, mode="exec")
if len(tree.body) == 0:
print("")
return None
filename = "/tmp/test-%s.py"%file_count
file_count += 1
linecache.cache[filename] = (
len(code),
None,
code.splitlines(True),
filename,
)
last = tree.body[-1]
if isinstance(last, ast.Expr):
expr = ast.Expression(last.value)
tree.body = tree.body[:-1]
exec(compile(tree, filename, "exec"), globals, locals)
return eval(compile(expr, filename, "eval"), globals, locals)
else:
exec(compile(tree, filename, "exec"), globals, locals)
return None
""" # + """_exec_with_expr('print("ok")\\n"ok"', globals(), locals())"""
def sandbox_pyenv():
pid, fd = pty.fork()
if pid == 0: # child
# sys.stdout.flush()][
disable_echo(0)
os.execvp("bwrap",
["bwrap", "--bind", "/tmp/mysandbox_root", "/", "--ro-bind", "/bin", "/bin", # システムファイルをRead-Onlyで共有する
"--ro-bind", "/lib", "/lib", "--ro-bind", "/lib64", "/lib64",
"--ro-bind", "/usr", "/usr", "--ro-bind", "/etc", "/etc", # あんまり良くない
"--ro-bind", home_dir + "/.local/lib", home_dir + "/.local/lib", # pip ライブラリをRead-Onlyで共有する
"--ro-bind", home_dir + "/.local/bin", home_dir + "/.local/bin",
"--ro-bind", home_dir + "/.local/share", home_dir + "/.local/share",
"--bind", "/proc", "/proc", "--unshare-all", "--", # なるべくホストと非共有(systemd-nspawnと違ってプロセスリストは共有してるけど)
"python", "-i"], # Pythonのインタラクティブモードで標準入力のコードを実行
)
exit()
else:
output = b""
while True:
r, _, _ = select.select([fd], [], [])
chunk = os.read(fd, 1024)
output += chunk
if output.endswith(b">>> "):
#print("debug: bunner: %s"%output)
break
# print(_sandbox_pyexec((pid, fd), helper_code))
_sandbox_pyexec((pid, fd), helper_code)
return (pid, fd)
TIMEOUT = 3.0 # 3秒
def _sandbox_pyexec(senv, code):
output = b""
for line in code.split("\n"):
os.write(senv[1], line.encode("utf-8") + b"\n")
while True:
r, _, _ = select.select([senv[1]], [], [], TIMEOUT)
if not r:
parent = psutil.Process(senv[0])
children = parent.children(recursive=True)
for c in children:
if "python" in c.name():
c.send_signal(signal.SIGINT)
output = b""
while True:
r, _, _ = select.select([senv[1]], [], [])
chunk = os.read(senv[1], 1024)
output += chunk
if output.endswith(b">>> "):
#print("debug: KeyboardInterrupt: %s"%output)
break
raise TimeoutError("execution timed out")
chunk = os.read(senv[1], 1024)
output += chunk
if output.endswith(b">>> ") or output.endswith(b"... "):
output = output[:-4] # プロンプトを除去
break
text = output.decode(errors="ignore")
return text
def sandbox_pyexec(senv, code):
code = "_exec_with_expr('''" + code.replace('\\', '\\\\').replace("'", "\\'") + "''', globals(), locals())\n"
return _sandbox_pyexec(senv, code)
def sandbox_reset(senv):
_sandbox_pyexec(senv, """globals().clear()
import gc
gc.collect()
globals().clear()
""" + helper_code)
senv = sandbox_pyenv()
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "a = 100")) # [exec][/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "print(a)")) # [exec]100\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "a")) # [exec]100\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "b")) # [exec]---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nFile /tmp/test-4.py:1\n----> 1 b\n\nNameError: name 'b' is not defined\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, """print("aaa")\nprint("bbb")""")) # [exec]aaa\nbbb\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, """print("aaa" + \\\n "bbb")""")) # [exec]aaabbb\n[/exec]
sandbox_reset(senv)
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "a")) # [exec]---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nFile /tmp/test-1.py:1\n----> 1 a\n\nNameError: name 'a' is not defined\n[/exec]
try:
print("[exec]%s[/exec]"%sandbox_pyexec(senv, """while True:\n continue""")) # TimeoutError
except TimeoutError:
print ("TimeoutError: execution timed out")
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "print(100)")) # [exec]100\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "if True:\n pass\nprint(100)")) # [exec]100\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "if True:\n pass\nprint(\"\"\"ok\"\"\")")) # [exec]ok\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "if True:\n pass\nprint('''ok''')")) # [exec]ok\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "if True:\n pass\n'ok'")) # [exec]'ok'\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "# comment test")) # [exec]\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "def a(x): return x*x")) # [exec][/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "\n\na('a')")) # [exec]---------------------------------------------------------------------------\nTypeError Traceback (most recent call last)\nFile /tmp/test-9.py:3\n----> 3 a('a')\nFile /tmp/test-8.py:1, in a(x='a')\n----> 1 def a(x): return x*x\n x = 'a'\nTypeError: can't multiply sequence by non-int of type 'str'\n[/exec]
Jupyter互換 + Pyrefly統合版
関数だけを追加する時はエラーが出なくてアレなので、その場合は静的解析 (lint) の Pyrefly を掛けるようにしてみました(2026年1月12日追記)。
2026年1月13日修正: エラー表示をJupyterに近づけました(カラーコードはまだ非互換です)。
# License: Public Domain
# pip install pyrefly
# export PATH=$HOME/.local/bin:$PATH
import os
import pty
import termios
import select
import sys
import signal
import psutil
import subprocess
import re
home_dir = os.path.expanduser("~")
try:
os.makedirs("/tmp/mysandbox_root")
except FileExistsError:
pass
try:
os.mkfifo("/tmp/mysandbox_pipe.py")
except FileExistsError:
pass
def disable_echo(fd):
attrs = termios.tcgetattr(fd)
attrs[3] = attrs[3] & ~termios.ECHO # lflags から ECHO を落とす
termios.tcsetattr(fd, termios.TCSANOW, attrs)
whole_code = ""
whole_code_line = [""]
file_count = 1
# IPythonの動作をシミュレート
# python -m pip install ipython --break-system-packages
helper_code = """from IPython.core.ultratb import VerboseTB
import sys
sys.excepthook = VerboseTB(theme_name="linux", tb_offset=2)
import linecache
import ast
def _exec_with_expr(code, filename, globals=None, locals=None):
tree = ast.parse(code, mode="exec")
if len(tree.body) == 0:
print("")
return None
linecache.cache[filename] = (
len(code),
None,
code.splitlines(True),
filename,
)
last = tree.body[-1]
if isinstance(last, ast.Expr):
expr = ast.Expression(last.value)
tree.body = tree.body[:-1]
exec(compile(tree, filename, "exec"), globals, locals)
return eval(compile(expr, filename, "eval"), globals, locals)
else:
exec(compile(tree, filename, "exec"), globals, locals)
return None
""" # + """_exec_with_expr('print("ok")\\n"ok"', 1, globals(), locals())"""
def sandbox_pyenv():
pid, fd = pty.fork()
if pid == 0: # child
# sys.stdout.flush()
disable_echo(0)
os.execvp("bwrap",
["bwrap", "--bind", "/tmp/mysandbox_root", "/", "--ro-bind", "/bin", "/bin", # システムファイルをRead-Onlyで共有する
"--ro-bind", "/lib", "/lib", "--ro-bind", "/lib64", "/lib64",
"--ro-bind", "/usr", "/usr", "--ro-bind", "/etc", "/etc", # あんまり良くない
"--ro-bind", home_dir + "/.local/lib", home_dir + "/.local/lib", # pip ライブラリをRead-Onlyで共有する
"--ro-bind", home_dir + "/.local/bin", home_dir + "/.local/bin",
"--ro-bind", home_dir + "/.local/share", home_dir + "/.local/share",
"--bind", "/proc", "/proc", "--unshare-all", "--", # なるべくホストと非共有(systemd-nspawnと違ってプロセスリストは共有してるけど)
"python", "-i"], # Pythonのインタラクティブモードで標準入力のコードを実行
)
exit()
else:
output = b""
while True:
r, _, _ = select.select([fd], [], [])
chunk = os.read(fd, 1024)
output += chunk
if output.endswith(b">>> "):
#print("debug: bunner: %s"%output)
break
# print(_sandbox_pyexec((pid, fd), helper_code))
_sandbox_pyexec((pid, fd), helper_code)
return (pid, fd)
TIMEOUT = 3.0 # 3秒
def _sandbox_pyexec(senv, code):
output = b""
for line in code.split("\n"):
os.write(senv[1], line.encode("utf-8") + b"\n")
while True:
r, _, _ = select.select([senv[1]], [], [], TIMEOUT)
if not r:
parent = psutil.Process(senv[0])
children = parent.children(recursive=True)
for c in children:
if "python" in c.name():
c.send_signal(signal.SIGINT)
output = b""
while True:
r, _, _ = select.select([senv[1]], [], [])
chunk = os.read(senv[1], 1024)
output += chunk
if output.endswith(b">>> "):
#print("debug: KeyboardInterrupt: %s"%output)
break
raise TimeoutError("execution timed out")
chunk = os.read(senv[1], 1024)
output += chunk
if output.endswith(b">>> ") or output.endswith(b"... "):
output = output[:-4] # プロンプトを除去
break
text = output.decode(errors="ignore")
return text
def sandbox_pyexec(senv, code, filename = None):
global whole_code
global whole_code_line
global file_count
if not filename:
filename = "test-%s.py"%file_count
file_count += 1
filename = "/tmp/" + filename
e_code = "_exec_with_expr('''" + code.replace('\\', '\\\\').replace("'", "\\'") + "''' , '''" + filename + "''', globals(), locals())\n"
r = _sandbox_pyexec(senv, e_code)
# print([r[i].encode("utf-8") if i < len(r) else None for i in range(5)])
def result_to_jupyter(r):
def replace_with_array_access2(match): # XXX: IPythonとのカラーコード非互換の問題
return "\x1b[32m/tmp/" + match.group(1) + ".py\x1b[39m in \x1b[36m<cell line:" + match.group(2) +">\033[34m()\x1b[39m"
r = re.sub("\x1b\\[96mFile \x1b\\[39m\x1b\\[32m/tmp/([^:]+).py:(\\d+)\x1b\\[39m", replace_with_array_access2, r)
return r
# エラーになったコードは履歴に残さない
if r.startswith("\x1b[91m---------------------------------------------------------------------------"):
return result_to_jupyter(r)
# lintが通らなかった時も履歴には残す(実際に残ってるため)
whole_code = whole_code + code + "\n"
for i, v in enumerate(code.split("\n")): # TODO: 使う側で二分木にした方が良い
whole_code_line.append("%s in <cell line: %s>"%(filename, i))
# print(whole_code_line)
# print(whole_code)
# 返り値が何もない時(関数だけなど)はlintに掛ける
if r == "":
proc = subprocess.Popen(["pyrefly", "check", "/tmp/mysandbox_pipe.py"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True)
with open("/tmp/mysandbox_pipe.py", "a", encoding="utf-8") as f:
f.write(whole_code)
stdout, stderr = proc.communicate()
def replace_with_array_access(match):
return whole_code_line[int(match.group(1))] + ":" + match.group(2)
stdout = re.sub(r'/tmp/mysandbox_pipe.py:(\d+):(\d+)', replace_with_array_access, stdout)
return stdout
return result_to_jupyter(r)
def sandbox_reset(senv):
global whole_code
global whole_code_line
global file_count
_sandbox_pyexec(senv, """globals().clear()
import gc
gc.collect()
globals().clear()
""" + helper_code)
whole_code = ""
whole_code_line = [""]
senv = sandbox_pyenv()
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "a = 100")) # [exec][/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "print(a)")) # [exec]100\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "a")) # [exec]100\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "b")) # [exec]---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\n/tmp/test-4.py in <cell line:1>()\n----> 1 b\n\nNameError: name 'b' is not defined\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, """print("aaa")\nprint("bbb")""")) # [exec]aaa\nbbb\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, """print("aaa" + \\\n "bbb")""")) # [exec]aaabbb\n[/exec]
sandbox_reset(senv)
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "a")) # [exec]---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\n/tmp/test-7.py in <cell line:1>()\n----> 1 a\n\nNameError: name 'a' is not defined\n[/exec]
try:
print("[exec]%s[/exec]"%sandbox_pyexec(senv, """while True:\n continue""")) # TimeoutError
except TimeoutError:
print ("TimeoutError: execution timed out")
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "print(100)")) # [exec]100\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "if True:\n pass\nprint(100)")) # [exec]100\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "if True:\n pass\nprint(\"\"\"ok\"\"\")")) # [exec]ok\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "if True:\n pass\nprint('''ok''')")) # [exec]ok\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "if True:\n pass\n'ok'")) # [exec]'ok'\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "# comment test")) # [exec]\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "def a(x): return x*x")) # [exec][/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "x=100\ny=200\na('a')")) # [exec]---------------------------------------------------------------------------\nTypeError Traceback (most recent call last)\n/tmp/test-16.py in <cell line:3>()\n----> 3 a('a')\n/tmp/test-15.py in <cell line:1>(), in a(x='a')\n----> 1 def a(x): return x*x\n x = 'a'\nTypeError: can't multiply sequence by non-int of type 'str'\n[/exec]
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "import sympy as sp\ndef a(x):\n sp.undefined()\n return sp.undefined2()"))
"""
[exec]ERROR No attribute `undefined` in module `sympy` [missing-attribute]
--> test-17.py in <cell line: 2>:5
|
18 | sp.undefined()
| ^^^^^^^^^^^^
|
ERROR No attribute `undefined2` in module `sympy` [missing-attribute]
--> test-17.py in <cell line: 3>:12
|
19 | return sp.undefined2()
| ^^^^^^^^^^^^^
|
[/exec]
"""
print("[exec]%s[/exec]"%sandbox_pyexec(senv, "def a():\n print('ok')\n raise Exception('ng')\na()", "a.py"))
"""
[exec]ok
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
/tmp/a.py in <cell line:4>()
2 print('ok')
3 raise Exception('ng')
----> 4 a()
/tmp/a.py in <cell line:3>(), in a()
1 def a():
2 print('ok')
----> 3 raise Exception('ng')
Exception: ng
[/exec]
"""