| # Copyright 2020 The Fuchsia Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Recipe for training ML inliner model for Clang.""" |
| |
| from recipe_engine.recipe_api import Property |
| |
| import re |
| |
| DEPS = [ |
| "fuchsia/archive", |
| "fuchsia/build", |
| "fuchsia/buildbucket_util", |
| "fuchsia/checkout", |
| "fuchsia/git", |
| "fuchsia/jiri", |
| "fuchsia/upload", |
| "recipe_engine/cipd", |
| "recipe_engine/context", |
| "recipe_engine/file", |
| "recipe_engine/path", |
| "recipe_engine/properties", |
| "recipe_engine/python", |
| "recipe_engine/raw_io", |
| "recipe_engine/step", |
| ] |
| |
| PROPERTIES = { |
| "manifest": Property(kind=str, help="Jiri manifest to use", default=None), |
| "remote": Property(kind=str, help="Manifest project remote", default=None), |
| "fint_params_paths": Property( |
| kind=dict, help="Mapping from target arch to fint parameter file path" |
| ), |
| } |
| |
| PLATFORM_TO_TARGET = { |
| "linux-amd64": "x86_64-unknown-linux-gnu", |
| "linux-arm64": "aarch64-unknown-linux-gnu", |
| "mac-amd64": "x86_64-apple-darwin", |
| "windows-amd64": "x86_64-pc-windows-msvc", |
| } |
| |
| |
| def RunSteps( |
| api, |
| manifest, |
| remote, |
| fint_params_paths, |
| ): |
| checkout = api.checkout.fuchsia_with_options( |
| manifest=manifest, |
| remote=remote, |
| ) |
| |
| with api.context(cwd=checkout.root_dir): |
| project_data = api.jiri.project(["fuchsia"]).json.output |
| assert len(project_data) == 1 |
| revision = project_data[0]["revision"] |
| |
| package_data = api.jiri.package( |
| ["fuchsia/third_party/clang/${platform}"], |
| test_data=[ |
| { |
| "name": "fuchsia/third_party/clang/${platform}", |
| "manifest": str(checkout.root_dir.join("integration", "prebuilts")), |
| "path": str( |
| checkout.root_dir.join( |
| "prebuilt", "third_party", "clang", "linux-x64" |
| ) |
| ), |
| "platforms": [ |
| "linux-amd64", |
| "linux-arm64", |
| "mac-amd64", |
| "windows-amd64", |
| ], |
| "version": "git_revision:f52666985d7011b539f26f54e09a5c89b62dad56", |
| } |
| ], |
| ).json.output |
| assert len(package_data) == 1 |
| clang_version = package_data[0]["version"].split(":", 1)[1] |
| |
| with api.step.nest("llvm"): |
| llvm_dir = api.path["start_dir"].join("llvm") |
| llvm_project_dir = llvm_dir.join("llvm-project") |
| |
| with api.step.nest("git"), api.context(infra_steps=True): |
| revision = api.git.checkout( |
| "https://llvm.googlesource.com/llvm-project", |
| path=llvm_project_dir, |
| ref=clang_version, |
| ) |
| |
| cipd_dir = api.path["start_dir"].join("cipd") |
| pkgs = api.cipd.EnsureFile() |
| pkgs.add_package("fuchsia/sdk/core/linux-amd64", "latest", "sdk") |
| pkgs.add_package( |
| "fuchsia/third_party/sysroot/linux", |
| "tp7-Zyo4pv2SVEoK_eaU6yuKmyxJWcR54vtJKTWpTIYC", |
| "linux", |
| ) |
| pkgs.add_package( |
| "fuchsia/third_party/libtensorflow/${platform}", |
| "version:1.15.0", |
| "libtensorflow", |
| ) |
| pkgs.add_package( |
| "fuchsia/third_party/cmake/${platform}", |
| "git_revision:fab301bb9d6d7d1c92db077fcd4789c0eb03203f", |
| ) |
| pkgs.add_package( |
| "fuchsia/third_party/ninja/${platform}", |
| "git_revision:0ccc7886fd4694ae1372d29b4954e2dd3be118be", |
| ) |
| api.cipd.ensure(cipd_dir, pkgs) |
| sdk_dir = cipd_dir.join("sdk") |
| sysroot_dir = cipd_dir.join("linux") |
| |
| build_dir = llvm_dir.join("build") |
| api.step( |
| "configure", |
| [ |
| cipd_dir.join("bin", "cmake"), |
| "-G", |
| "Ninja", |
| "-DCMAKE_MAKE_PROGRAM=%s" % cipd_dir.join("ninja"), |
| "-DCMAKE_INSTALL_PREFIX=", |
| "-DLLVM_ENABLE_LTO=OFF", |
| "-DCMAKE_TOOLCHAIN_FILE=%s" |
| % checkout.root_dir.join("scripts", "clang", "ToolChain.cmake"), |
| "-DLINUX_x86_64-unknown-linux-gnu_SYSROOT=%s" % sysroot_dir, |
| "-DLINUX_aarch64-unknown-linux-gnu_SYSROOT=%s" % sysroot_dir, |
| "-DFUCHSIA_SDK=%s" % cipd_dir.join("sdk"), |
| "-DTENSORFLOW_C_LIB_PATH=%s" % cipd_dir.join("libtensorflow"), |
| "-DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON", |
| "-DCMAKE_FIND_ROOT_PATH=%s" % cipd_dir.join("libtensorflow"), |
| "-C", |
| llvm_project_dir.join( |
| "clang", |
| "cmake", |
| "caches", |
| "Fuchsia-stage2.cmake", |
| ), |
| "-S", |
| llvm_project_dir.join("llvm"), |
| "-B", |
| build_dir, |
| ], |
| ) |
| api.step( |
| "build", |
| [ |
| cipd_dir.join("ninja"), |
| "-C", |
| build_dir, |
| "distribution", |
| ], |
| ) |
| install_dir = llvm_dir.join("install") |
| with api.context(env={"DESTDIR": install_dir}): |
| api.step( |
| "install", |
| [ |
| cipd_dir.join("ninja"), |
| "-C", |
| build_dir, |
| "install-distribution", |
| ], |
| ) |
| for f in api.file.listdir( |
| "tensorflow libraries", |
| cipd_dir.join("libtensorflow", "lib"), |
| test_data=["libtensorflow.so"], |
| ): |
| api.file.copy("copy %s" % f, f, install_dir.join("lib")) |
| api.python( |
| "generate runtimes.json", |
| checkout.root_dir.join("scripts", "clang", "generate_runtimes.py"), |
| args=[ |
| "--clang-prefix=%s" % install_dir, |
| "--sdk-dir=%s" % sdk_dir, |
| "--build-id-dir=%s" % install_dir.join("lib", ".build-id"), |
| ], |
| venv=api.resource("tensorflow.vpython"), |
| stdout=api.raw_io.output( |
| leak_to=install_dir.join("lib", "runtime.json"), add_output_log=True |
| ), |
| ) |
| |
| # Use the just built Clang to build Fuchsia. |
| api.build.clang_toolchain_dir = install_dir |
| |
| with api.step.nest("ml-compiler-opt"): |
| ml_compiler_opt_dir = api.path["start_dir"].join("ml-compiler-opt") |
| |
| with api.step.nest("git"), api.context(infra_steps=True): |
| api.git.checkout( |
| "https://github.com/google/ml-compiler-opt", |
| path=ml_compiler_opt_dir, |
| ref="b235539ce2e5c6e5fddd1ad77638f433f5ac8689", |
| ) |
| |
| with api.step.nest("corpus"): |
| compiler_opt_dir = ml_compiler_opt_dir.join("compiler_opt") |
| corpus_dir = api.path["start_dir"].join("corpus") |
| |
| for target_arch, fint_params_path in fint_params_paths.items(): |
| with api.step.nest(target_arch): |
| # Build Fuchsia for each target architecture... |
| build_result = api.build.with_options( |
| checkout=checkout, |
| fint_params_path=fint_params_path, |
| build_dir=checkout.root_dir.join("out", target_arch), |
| ) |
| |
| # ...and extract IR from the generated object files. |
| api.python( |
| "extract ir", |
| compiler_opt_dir.join("tools", "extract_ir.py"), |
| args=[ |
| "--cmd_filter=^-Oz$", |
| "--input=%s" % build_result.compdb_path, |
| "--input_type=json", |
| "--llvm_objcopy_path=%s" % build_result.tool("llvm-objcopy"), |
| "--output_dir=%s" % corpus_dir.join(target_arch), |
| ], |
| venv=api.resource("tensorflow.vpython"), |
| ) |
| |
| # Combine IR from both builds into a single training corpus. |
| api.python( |
| "combine training corpus", |
| compiler_opt_dir.join("tools", "combine_training_corpus.py"), |
| args=[ |
| "--root_dir=%s" % corpus_dir, |
| ], |
| venv=api.resource("tensorflow.vpython"), |
| ) |
| |
| with api.step.nest("train"), api.context( |
| env_prefixes={"PYTHONPATH": [ml_compiler_opt_dir]} |
| ): |
| default_trace_dir = api.path["start_dir"].join("default_trace") |
| |
| # Collect traces from the default heuristic, to kick off the training |
| # process. |
| api.python( |
| "generate default trace", |
| compiler_opt_dir.join("tools", "generate_default_trace.py"), |
| args=[ |
| "--data_path=%s" % corpus_dir, |
| "--output_path=%s" % default_trace_dir, |
| "--compile_task=inlining", |
| "--clang_path=%s" % install_dir.join("bin", "clang"), |
| "--llvm_size_path=%s" % install_dir.join("bin", "llvm-size"), |
| "--sampling_rate=0.2", |
| ], |
| venv=api.resource("tensorflow.vpython"), |
| ) |
| |
| warmstart_output_dir = api.path["start_dir"].join("warmstart") |
| |
| # Train a behavioral cloning model based on the above trace, that mimics |
| # default inlining behavior. This is the 'warmstart' model. |
| api.python( |
| "train bc", |
| compiler_opt_dir.join("rl", "train_bc.py"), |
| args=[ |
| "--root_dir=%s" % warmstart_output_dir, |
| "--data_path=%s" % default_trace_dir, |
| "--gin_files=%s" |
| % compiler_opt_dir.join( |
| "rl", |
| "inlining", |
| "gin_configs", |
| "behavioral_cloning_nn_agent.gin", |
| ), |
| ], |
| venv=api.resource("tensorflow.vpython"), |
| ) |
| |
| model_output_dir = api.path["start_dir"].join("model") |
| |
| # Starting from the 'warmstart' model, train the optimized model. |
| api.python( |
| "train locally", |
| compiler_opt_dir.join("rl", "train_locally.py"), |
| args=[ |
| "--root_dir=%s" % model_output_dir, |
| "--data_path=%s" % corpus_dir, |
| "--clang_path=%s" % install_dir.join("bin", "clang"), |
| "--llvm_size_path=%s" % install_dir.join("bin", "llvm-size"), |
| "--num_modules=100", |
| "--gin_files=%s" |
| % compiler_opt_dir.join( |
| "rl", "inlining", "gin_configs", "ppo_nn_agent.gin" |
| ), |
| '--gin_bindings=train_eval.warmstart_policy_dir="%s"' |
| % warmstart_output_dir.join("saved_policy"), |
| ], |
| venv=api.resource("tensorflow.vpython"), |
| ) |
| |
| api.archive.upload(model_output_dir, output_property="isolated") |
| |
| api.upload.cipd_package( |
| "fuchsia/model/inlining", |
| model_output_dir, |
| [api.upload.DirectoryPath(model_output_dir)], |
| {"git_revision": revision}, |
| ) |
| |
| with api.step.nest("generate"): |
| venv_dir = api.path["start_dir"].join("tensorflow-venv") |
| tensorflow_aot_path = api.python( |
| "get tensorflow", |
| api.resource("get_tensorflow.py"), |
| args=["-vpython-root", venv_dir], |
| stdout=api.raw_io.output(name="tensorflow-path", add_output_log=True), |
| step_test_data=lambda: api.raw_io.test_api.stream_output( |
| "%s" % venv_dir.join("lib", "python3.8", "site-packages", "tensorflow") |
| ), |
| ).stdout.strip() |
| tensorflow_aot_compiler = ( |
| tensorflow_aot_path + "/../../../../bin/saved_model_cli" |
| ) |
| |
| for platform in package_data[0]["platforms"]: |
| inliner_model_dir = api.path["start_dir"].join("inliner_model", platform) |
| api.step( |
| "aot_compile %s" % platform, |
| [ |
| tensorflow_aot_compiler, |
| "aot_compile_cpu", |
| "--multithreading=false", |
| "--dir=%s" % model_output_dir, |
| "--tag_set=serve", |
| "--signature_def_key=action", |
| "--output_prefix=%s" % inliner_model_dir.join("InlinerSizeModel"), |
| "--cpp_class=llvm::InlinerSizeModel", |
| "--target_triple=%s" % PLATFORM_TO_TARGET[platform], |
| ], |
| ) |
| |
| api.upload.cipd_package( |
| "fuchsia/model/inlining/%s" % platform, |
| inliner_model_dir, |
| [api.upload.DirectoryPath(inliner_model_dir)], |
| {"git_revision": revision}, |
| ) |
| |
| |
| def GenTests(api): |
| yield ( |
| api.buildbucket_util.test("basic") |
| + api.properties( |
| manifest="fuchsia", |
| remote="https://fuchsia.googlesource.com/fuchsia", |
| fint_params_paths={ |
| "arm64": "specs/clang-ml-training-arm64.fint.textproto", |
| "x64": "specs/clang-ml-training-x64.fint.textproto", |
| }, |
| ) |
| ) |