diff --git a/BUILD b/BUILD index e120f11..b88e70c 100644 --- a/BUILD +++ b/BUILD @@ -25,7 +25,11 @@ filegroup( # Implementation: # If you are looking into the implementation, start with the overview in ImplementationReadme.md. -exports_files(["refresh.template.py", "check_python_version.template.py"]) # For implicit use by the refresh_compile_commands macro, not direct use. +exports_files([ + "refresh.template.py", + "check_python_version.template.py", + "refresh_wrapper.sh.template", # Wrapper script to avoid rules_python placeholder issues +]) # For implicit use by the refresh_compile_commands macro, not direct use. cc_binary( name = "print_args", diff --git a/README.md b/README.md index dcf07f0..4a8c0d3 100644 --- a/README.md +++ b/README.md @@ -227,6 +227,34 @@ Everything should also work for generated files, though you may have to run a bu ## Rough Edges +### Hermetic Python Toolchains + +**Problem:** If your project uses hermetic Python toolchains (via `rules_python`), you may encounter errors like: + +``` +python3: can't open file '%interpreter_args%': [Errno 2] No such file or directory +``` + +**Cause:** By default, this tool uses `py_binary` which relies on `rules_python`'s template placeholder expansion mechanism. When projects specify custom hermetic Python interpreters (e.g., `@python3_12_host//:python`), these placeholders (`%interpreter_args%`, `%stage2_bootstrap%`) may fail to expand correctly. + +**Solution:** Use the `use_hermetic_python_workaround` parameter to enable an alternative implementation using `sh_binary`: + +```Starlark +refresh_compile_commands( + name = "refresh_compile_commands", + targets = {"//...": ""}, + use_hermetic_python_workaround = True, # Enable workaround for hermetic Python +) +``` + +This workaround bypasses `rules_python`'s templating entirely by using a simple shell wrapper that directly invokes Python. The traditional `py_binary` approach remains the default for backward compatibility. + +**Who's affected:** Projects using hermetic Python toolchains, particularly large C++ projects with complex Python build requirements (e.g., Envoy, TensorFlow) or builds in containerized environments. + +**Related:** See issues #165, #245, #168 for more context. + +--- + Otherwise, we've self-filed issues for the rough edges we know about and are tracking. We'd love to hear from you there about what you're seeing, good and bad. Please add things if you find more rough edges, and let us know if you need help or more features. On the other hand, if you've set things up and they're working well, we'd still love to hear from you. Please file a "non-issue" in the issues tab describing your success! We'd love to hear what you're working on, what platforms you're using, and what you're finding most useful. And maybe also toss a star our way so we know it was helpful to you. diff --git a/refresh.template.py b/refresh.template.py index 194f365..691bb9a 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -1422,3 +1422,7 @@ def main(): indent=2, # Yay, human readability! check_circular=False # For speed. ) + + +if __name__ == "__main__": + main() diff --git a/refresh_compile_commands.bzl b/refresh_compile_commands.bzl index 0210d42..824ce16 100644 --- a/refresh_compile_commands.bzl +++ b/refresh_compile_commands.bzl @@ -64,6 +64,7 @@ def refresh_compile_commands( targets = None, exclude_headers = None, exclude_external_sources = False, + use_hermetic_python_workaround = False, **kwargs): # For the other common attributes. Tags, compatible_with, etc. https://docs.bazel.build/versions/main/be/common-definitions.html#common-attributes. # Convert the various, acceptable target shorthands into the dictionary format # In Python, `type(x) == y` is an antipattern, but [Starlark doesn't support inheritance](https://bazel.build/rules/language), so `isinstance` doesn't exist, and this is the correct way to switch on type. @@ -83,23 +84,44 @@ def refresh_compile_commands( target if target.startswith("/") or target.startswith("@") else "{}//{}:{}".format(native.repository_name(), native.package_name(), target.removeprefix(":")): flags for target, flags in targets.items() } - # Create a wrapper script that prints a helpful error message if the python version is too old, generated from check_python_version.template.py - version_checker_script_name = name + ".check_python_version.py" - _check_python_version(name = version_checker_script_name, to_run = name) - # Generate the core, runnable python script from refresh.template.py script_name = name + ".py" _expand_template(name = script_name, labels_to_flags = targets, exclude_headers = exclude_headers, exclude_external_sources = exclude_external_sources, **kwargs) - # Combine them so the wrapper calls the main script - native.py_binary( - name = name, - main = version_checker_script_name, - srcs = [version_checker_script_name, script_name], - data = ["@hedron_compile_commands//:print_args"], - imports = [''], # Allows binary to import templated script, even if this macro is being called inside a sub package. See https://github.com/hedronvision/bazel-compile-commands-extractor/issues/137 - **kwargs - ) + if use_hermetic_python_workaround: + # Use sh_binary wrapper to avoid hermetic Python toolchain issues + # This bypasses rules_python's template placeholder expansion which can fail + # with hermetic Python interpreters (e.g., @python3_12_host//:python) + wrapper_script_name = name + "_wrapper.sh" + _expand_wrapper_template( + name = wrapper_script_name, + main_script = script_name, + **kwargs + ) + native.sh_binary( + name = name, + srcs = [wrapper_script_name], + data = [ + script_name, + "@hedron_compile_commands//:print_args", + ], + **kwargs + ) + else: + # Standard py_binary approach (default, backward compatible) + # Create a wrapper script that prints a helpful error message if the python version is too old + version_checker_script_name = name + ".check_python_version.py" + _check_python_version(name = version_checker_script_name, to_run = name) + + # Combine them so the wrapper calls the main script + native.py_binary( + name = name, + main = version_checker_script_name, + srcs = [version_checker_script_name, script_name], + data = ["@hedron_compile_commands//:print_args"], + imports = [''], # Allows binary to import templated script, even if this macro is being called inside a sub package. See https://github.com/hedronvision/bazel-compile-commands-extractor/issues/137 + **kwargs + ) def _expand_template_impl(ctx): """Inject targets of interest--and other settings--into refresh.template.py, and set it up to be run.""" @@ -154,3 +176,27 @@ _check_python_version = rule( }, implementation = _check_python_version_impl, ) + +def _expand_wrapper_template_impl(ctx): + """Expand the bash wrapper template that will execute the Python script.""" + script = ctx.actions.declare_file(ctx.attr.name) + ctx.actions.expand_template( + output = script, + is_executable = True, + template = ctx.file._wrapper_template, + substitutions = { + "{main_script}": ctx.attr.main_script, + }, + ) + return DefaultInfo(files = depset([script])) + +_expand_wrapper_template = rule( + attrs = { + "main_script": attr.string(mandatory = True), + "_wrapper_template": attr.label( + allow_single_file = True, + default = "refresh_wrapper.sh.template" + ), + }, + implementation = _expand_wrapper_template_impl, +) diff --git a/refresh_wrapper.sh.template b/refresh_wrapper.sh.template new file mode 100644 index 0000000..5a5a8e6 --- /dev/null +++ b/refresh_wrapper.sh.template @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# Wrapper script to run Hedron's Compile Commands extractor +# This script bypasses rules_python's templating system to avoid +# placeholder expansion issues with hermetic Python toolchains. + +set -euo pipefail + +# Find the directory where this script is located +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Get the base name of this wrapper script (e.g., "compdb_wrapper.sh" -> "compdb") +WRAPPER_NAME="$(basename "${BASH_SOURCE[0]}")" +BASE_NAME="${WRAPPER_NAME%_wrapper.sh}" + +# Runfiles directory (where Bazel puts data dependencies) +# The wrapper script is named "_wrapper.sh" but runfiles is ".runfiles" +RUNFILES_DIR="${SCRIPT_DIR}/${BASE_NAME}.runfiles" + +# If not found, try the standard Bazel pattern (script_name.runfiles) +if [[ ! -d "${RUNFILES_DIR}" ]]; then + RUNFILES_DIR="${SCRIPT_DIR}.runfiles" +fi + +# Python executable to use (from system PATH) +PYTHON="${PYTHON:-python3}" + +# Verify Python version is 3.6+ +$PYTHON - <<'EOF' +import sys +if sys.version_info < (3, 6): + sys.exit("\n\033[31mFATAL ERROR:\033[0m Python 3.6 or later is required. Please update!") +EOF + +# Set up environment for the Python script +export RUNFILES_DIR +export PATH="${RUNFILES_DIR}/hedron_compile_commands:${PATH}" + +# Find the Python script - it could be in different locations depending on the workspace +PYTHON_SCRIPT="" +if [[ -f "${RUNFILES_DIR}/envoy/{main_script}" ]]; then + PYTHON_SCRIPT="${RUNFILES_DIR}/envoy/{main_script}" +elif [[ -f "${RUNFILES_DIR}/hedron_compile_commands/{main_script}" ]]; then + PYTHON_SCRIPT="${RUNFILES_DIR}/hedron_compile_commands/{main_script}" +elif [[ -f "${RUNFILES_DIR}/_main/{main_script}" ]]; then + PYTHON_SCRIPT="${RUNFILES_DIR}/_main/{main_script}" +else + echo "ERROR: Could not find Python script {main_script} in runfiles directory" >&2 + echo "Searched in:" >&2 + echo " ${RUNFILES_DIR}/envoy/{main_script}" >&2 + echo " ${RUNFILES_DIR}/hedron_compile_commands/{main_script}" >&2 + echo " ${RUNFILES_DIR}/_main/{main_script}" >&2 + exit 1 +fi + +# Execute the main Python script with all arguments passed through +exec $PYTHON "$PYTHON_SCRIPT" "$@"