init: allow entering of network namespaces

Add the ability to enter a network namespace when launching a service.
Typical usage of this would be something similar to the below:

on fs
  exec ip netns add namespace_name

service vendor_something /vendor/...
  capabilities <lower than root>
  user not_root
  enter_namespace net /mnt/.../namespace_name

Note changes to the `ip` tool are needed to create the namespace in
the correct directory.

Bug: 73334854
Test: not yet
Change-Id: Ifa91c873d36d69db399bb9c04ff2362518a0b07d
diff --git a/init/README.md b/init/README.md
index c08b07a..550ef05 100644
--- a/init/README.md
+++ b/init/README.md
@@ -195,6 +195,10 @@
 > This service will not automatically start with its class.
   It must be explicitly started by name or by interface name.
 
+`enter_namespace <type> <path>`
+> Enters the namespace of type _type_ located at _path_. Only network namespaces are supported with
+  _type_ set to "net". Note that only one namespace of a given _type_ may be entered.
+
 `file <path> <type>`
 > Open a file path and pass its fd to the launched process. _type_ must be
   "r", "w" or "rw".  For native executables see libcutils
diff --git a/init/service.cpp b/init/service.cpp
index 03c2cee..0e08d9b 100644
--- a/init/service.cpp
+++ b/init/service.cpp
@@ -34,6 +34,7 @@
 #include <android-base/parseint.h>
 #include <android-base/stringprintf.h>
 #include <android-base/strings.h>
+#include <android-base/unique_fd.h>
 #include <hidl-util/FQName.h>
 #include <processgroup/processgroup.h>
 #include <selinux/selinux.h>
@@ -59,13 +60,13 @@
 using android::base::ParseInt;
 using android::base::StartsWith;
 using android::base::StringPrintf;
+using android::base::unique_fd;
 using android::base::WriteStringToFile;
 
 namespace android {
 namespace init {
 
-static Result<std::string> ComputeContextFromExecutable(std::string& service_name,
-                                                        const std::string& service_path) {
+static Result<std::string> ComputeContextFromExecutable(const std::string& service_path) {
     std::string computed_context;
 
     char* raw_con = nullptr;
@@ -101,36 +102,49 @@
     return computed_context;
 }
 
-static void SetUpPidNamespace(const std::string& service_name) {
+Result<Success> Service::SetUpMountNamespace() const {
     constexpr unsigned int kSafeFlags = MS_NODEV | MS_NOEXEC | MS_NOSUID;
 
-    // It's OK to LOG(FATAL) in this function since it's running in the first
-    // child process.
-
     // Recursively remount / as slave like zygote does so unmounting and mounting /proc
     // doesn't interfere with the parent namespace's /proc mount. This will also
     // prevent any other mounts/unmounts initiated by the service from interfering
     // with the parent namespace but will still allow mount events from the parent
     // namespace to propagate to the child.
     if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) {
-        PLOG(FATAL) << "couldn't remount(/) recursively as slave for " << service_name;
-    }
-    // umount() then mount() /proc.
-    // Note that it is not sufficient to mount with MS_REMOUNT.
-    if (umount("/proc") == -1) {
-        PLOG(FATAL) << "couldn't umount(/proc) for " << service_name;
-    }
-    if (mount("", "/proc", "proc", kSafeFlags, "") == -1) {
-        PLOG(FATAL) << "couldn't mount(/proc) for " << service_name;
+        return ErrnoError() << "Could not remount(/) recursively as slave";
     }
 
-    if (prctl(PR_SET_NAME, service_name.c_str()) == -1) {
-        PLOG(FATAL) << "couldn't set name for " << service_name;
+    // umount() then mount() /proc and/or /sys
+    // Note that it is not sufficient to mount with MS_REMOUNT.
+    if (namespace_flags_ & CLONE_NEWPID) {
+        if (umount("/proc") == -1) {
+            return ErrnoError() << "Could not umount(/proc)";
+        }
+        if (mount("", "/proc", "proc", kSafeFlags, "") == -1) {
+            return ErrnoError() << "Could not mount(/proc)";
+        }
+    }
+    bool remount_sys = std::any_of(namespaces_to_enter_.begin(), namespaces_to_enter_.end(),
+                                   [](const auto& entry) { return entry.first == CLONE_NEWNET; });
+    if (remount_sys) {
+        if (umount2("/sys", MNT_DETACH) == -1) {
+            return ErrnoError() << "Could not umount(/sys)";
+        }
+        if (mount("", "/sys", "sys", kSafeFlags, "") == -1) {
+            return ErrnoError() << "Could not mount(/sys)";
+        }
+    }
+    return Success();
+}
+
+Result<Success> Service::SetUpPidNamespace() const {
+    if (prctl(PR_SET_NAME, name_.c_str()) == -1) {
+        return ErrnoError() << "Could not set name";
     }
 
     pid_t child_pid = fork();
     if (child_pid == -1) {
-        PLOG(FATAL) << "couldn't fork init inside the PID namespace for " << service_name;
+        return ErrnoError() << "Could not fork init inside the PID namespace";
     }
 
     if (child_pid > 0) {
@@ -153,6 +167,20 @@
         }
         _exit(WEXITSTATUS(init_exitstatus));
     }
+    return Success();
+}
+
+Result<Success> Service::EnterNamespaces() const {
+    for (const auto& [nstype, path] : namespaces_to_enter_) {
+        auto fd = unique_fd{open(path.c_str(), O_RDONLY | O_CLOEXEC)};
+        if (!fd) {
+            return ErrnoError() << "Could not open namespace at " << path;
+        }
+        if (setns(fd, nstype) == -1) {
+            return ErrnoError() << "Could not setns() namespace at " << path;
+        }
+    }
+    return Success();
 }
 
 static bool ExpandArgsAndExecv(const std::vector<std::string>& args, bool sigstop) {
@@ -422,6 +450,20 @@
     return Success();
 }
 
+Result<Success> Service::ParseEnterNamespace(const std::vector<std::string>& args) {
+    if (args[1] != "net") {
+        return Error() << "Init only supports entering network namespaces";
+    }
+    if (!namespaces_to_enter_.empty()) {
+        return Error() << "Only one network namespace may be entered";
+    }
+    // Network namespaces require that /sys is remounted, otherwise the old adapters will still be
+    // present. Therefore, they also require mount namespaces.
+    namespace_flags_ |= CLONE_NEWNS;
+    namespaces_to_enter_.emplace_back(CLONE_NEWNET, args[2]);
+    return Success();
+}
+
 Result<Success> Service::ParseGroup(const std::vector<std::string>& args) {
     auto gid = DecodeUid(args[1]);
     if (!gid) {
@@ -691,6 +733,8 @@
         {"console",     {0,     1,    &Service::ParseConsole}},
         {"critical",    {0,     0,    &Service::ParseCritical}},
         {"disabled",    {0,     0,    &Service::ParseDisabled}},
+        {"enter_namespace",
+                        {2,     2,    &Service::ParseEnterNamespace}},
         {"file",        {2,     2,    &Service::ParseFile}},
         {"group",       {1,     NR_SVC_SUPP_GIDS + 1, &Service::ParseGroup}},
         {"interface",   {2,     2,    &Service::ParseInterface}},
@@ -793,7 +837,7 @@
     if (!seclabel_.empty()) {
         scon = seclabel_;
     } else {
-        auto result = ComputeContextFromExecutable(name_, args_[0]);
+        auto result = ComputeContextFromExecutable(args_[0]);
         if (!result) {
             return result.error();
         }
@@ -812,10 +856,24 @@
     if (pid == 0) {
         umask(077);
 
+        if (auto result = EnterNamespaces(); !result) {
+            LOG(FATAL) << "Service '" << name_ << "' could not enter namespaces: " << result.error();
+        }
+
+        if (namespace_flags_ & CLONE_NEWNS) {
+            if (auto result = SetUpMountNamespace(); !result) {
+                LOG(FATAL) << "Service '" << name_
+                           << "' could not set up mount namespace: " << result.error();
+            }
+        }
+
         if (namespace_flags_ & CLONE_NEWPID) {
             // This will fork again to run an init process inside the PID
             // namespace.
-            SetUpPidNamespace(name_);
+            if (auto result = SetUpPidNamespace(); !result) {
+                LOG(FATAL) << "Service '" << name_
+                           << "' could not set up PID namespace: " << result.error();
+            }
         }
 
         for (const auto& [key, value] : environment_vars_) {
diff --git a/init/service.h b/init/service.h
index 9cb35b8..cbfd52f 100644
--- a/init/service.h
+++ b/init/service.h
@@ -125,6 +125,9 @@
     using OptionParser = Result<Success> (Service::*)(const std::vector<std::string>& args);
     class OptionParserMap;
 
+    Result<Success> SetUpMountNamespace() const;
+    Result<Success> SetUpPidNamespace() const;
+    Result<Success> EnterNamespaces() const;
     void NotifyStateChange(const std::string& new_state) const;
     void StopOrReset(int how);
     void ZapStdio() const;
@@ -137,6 +140,7 @@
     Result<Success> ParseConsole(const std::vector<std::string>& args);
     Result<Success> ParseCritical(const std::vector<std::string>& args);
     Result<Success> ParseDisabled(const std::vector<std::string>& args);
+    Result<Success> ParseEnterNamespace(const std::vector<std::string>& args);
     Result<Success> ParseGroup(const std::vector<std::string>& args);
     Result<Success> ParsePriority(const std::vector<std::string>& args);
     Result<Success> ParseInterface(const std::vector<std::string>& args);
@@ -181,6 +185,8 @@
     std::vector<gid_t> supp_gids_;
     CapSet capabilities_;
     unsigned namespace_flags_;
+    // Pair of namespace type, path to namespace.
+    std::vector<std::pair<int, std::string>> namespaces_to_enter_;
 
     std::string seclabel_;