Reputation: 4948
Suppose I have the following BUILD
file
py_library(
name = "foo",
src = ["foo.py"],
data = ["//bar:data.json"],
)
How should I refer to the data.json
in foo.py
file? I wanted to have something like below, what should I use for some_path
?
with open(os.path.join(some_path, "bar/data.json"), 'r') as fp:
data = json.load(fp)
I couldn't find much general documentation about *.runfiles
online -- any pointer will be appreciated!
Upvotes: 7
Views: 8701
Reputation: 348
For the benefit of others who find there way here, the canonical solution is to use the provided "runfiles" library provided by the rules_python
package for bazel.
In my environment, it looks roughly like this:
from rules_python.python.runfiles import runfiles
resource = "workspace/bar/data.json"
r = runfiles.Create()
path = r.Rlocation(resource)
More documentation can be found here:
Upvotes: 1
Reputation: 61
Here is a function that should return the path to the runfiles root for any py_binary in all the cases that I'm aware of:
import os
import re
def find_runfiles():
"""Find the runfiles tree (useful when _not_ run from a zip file)"""
# Follow symlinks, looking for my module space
stub_filename = os.path.abspath(sys.argv[0])
while True:
# Found it?
module_space = stub_filename + '.runfiles'
if os.path.isdir(module_space):
break
runfiles_pattern = r"(.*\.runfiles)"
matchobj = re.match(runfiles_pattern, os.path.abspath(sys.argv[0]))
if matchobj:
module_space = matchobj.group(1)
break
raise RuntimeError('Cannot find .runfiles directory for %s' %
sys.argv[0])
return module_space
For the example in your question you could use it like so:
with open(os.path.join(find_runfiles(), "name_of_workspace/bar/data.json"), 'r') as fp:
data = json.load(fp)
Note that this function won't help if you build zipped executables of your python apps (using subpar, probably); for those you will need some more code. This next snippet includes get_resource_filename()
and get_resource_directory()
, which will work for both regular py_binary and .par binaries:
import atexit
import os
import re
import shutil
import sys
import tempfile
import zipfile
def get_resource_filename(path):
zip_path = get_zip_path(sys.modules.get("__main__").__file__)
if zip_path:
tmpdir = tempfile.mkdtemp()
atexit.register(lambda: shutil.rmtree(tmpdir, ignore_errors=True))
zf = BetterZipFile(zip_path)
zf.extract(member=path, path=tmpdir)
return os.path.join(tmpdir, path)
elif os.path.exists(path):
return path
else:
path_in_runfiles = os.path.join(find_runfiles(), path)
if os.path.exists(path_in_runfiles):
return path_in_runfiles
else:
raise ResourceNotFoundError
def get_resource_directory(path):
"""Find or extract an entire subtree and return its location."""
zip_path = get_zip_path(sys.modules.get("__main__").__file__)
if zip_path:
tmpdir = tempfile.mkdtemp()
atexit.register(lambda: shutil.rmtree(tmpdir, ignore_errors=True))
zf = BetterZipFile(zip_path)
members = []
for fn in zf.namelist():
if fn.startswith(path):
members += [fn]
zf.extractall(members=members, path=tmpdir)
return os.path.join(tmpdir, path)
elif os.path.exists(path):
return path
else:
path_in_runfiles = os.path.join(find_runfiles(), path)
if os.path.exists(path_in_runfiles):
return path_in_runfiles
else:
raise ResourceNotFoundError
def get_zip_path(path):
"""If path is inside a zip file, return the zip file's path."""
if path == os.path.sep:
return None
elif zipfile.is_zipfile(path):
return path
return get_zip_path(os.path.dirname(path))
class ResourceNotFoundError(RuntimeError):
pass
def find_runfiles():
"""Find the runfiles tree (useful when _not_ run from a zip file)"""
# Follow symlinks, looking for my module space
stub_filename = os.path.abspath(sys.argv[0])
while True:
# Found it?
module_space = stub_filename + '.runfiles'
if os.path.isdir(module_space):
break
runfiles_pattern = r"(.*\.runfiles)"
matchobj = re.match(runfiles_pattern, os.path.abspath(sys.argv[0]))
if matchobj:
module_space = matchobj.group(1)
break
raise RuntimeError('Cannot find .runfiles directory for %s' %
sys.argv[0])
return module_space
class BetterZipFile(zipfile.ZipFile):
"""Shim around ZipFile that preserves permissions on extract."""
def extract(self, member, path=None, pwd=None):
if not isinstance(member, zipfile.ZipInfo):
member = self.getinfo(member)
if path is None:
path = os.getcwd()
ret_val = self._extract_member(member, path, pwd)
attr = member.external_attr >> 16
os.chmod(ret_val, attr)
return ret_val
Using this second code snippet, your example would look like:
with open(get_resource_filename("name_of_workspace/bar/data.json"), 'r') as fp:
data = json.load(fp)
Upvotes: 6
Reputation: 2999
Short answer: os.path.dirname(__file__)
Here is the full example:
$ ls
bar/ BUILD foo.py WORKSPACE
$ cat BUILD
py_binary(
name = "foo",
srcs = ["foo.py"],
data = ["//bar:data.json"],
)
$ cat foo.py
import json
import os
ws = os.path.dirname(__file__)
with open(os.path.join(ws, "bar/data.json"), 'r') as fp:
print(json.load(fp))
$ cat bar/BUILD
exports_files(["data.json"])
$ bazel run :foo
Edit: it doesn't work well when your package is in a subdirectory. You may need to go back using os.path.dirname
.
Upvotes: 6