// Copyright 2020 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Copyright 2019 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #if !defined(_WIN32) #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include "libreprl.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // Well-known file descriptor numbers for reprl <-> child communication, child process side #define REPRL_CHILD_CTRL_IN 100 #define REPRL_CHILD_CTRL_OUT 101 #define REPRL_CHILD_DATA_IN 102 #define REPRL_CHILD_DATA_OUT 103 /// Maximum timeout in microseconds. Mostly just limited by the fact that the timeout in milliseconds has to fit into a 32-bit integer. #define REPRL_MAX_TIMEOUT_IN_MICROSECONDS ((uint64_t)(INT_MAX) * 1000) static size_t min(size_t x, size_t y) { return x < y ? x : y; } #ifdef __linux__ // This function creates the UID/GID mapping that we need inside of the user // namespace. This is needed such that the files we create have a proper owner // attached to them. static void write_id_maps(uid_t uid, gid_t gid) { char setgroups_path[] = "/proc/self/setgroups"; char uid_map_path[] = "/proc/self/uid_map"; char gid_map_path[] = "/proc/self/gid_map"; int setgroups_fd = open(setgroups_path, O_WRONLY); int uid_map_fd = open(uid_map_path, O_WRONLY); int gid_map_fd = open(gid_map_path, O_WRONLY); if (setgroups_fd == -1 || uid_map_fd == -1 || gid_map_fd == -1) { fprintf(stderr, "Error opening setgroups/uid_map/gid_map file: %s\n", strerror(errno)); _exit(-1); } // More context on this: https://lwn.net/Articles/626665/ dprintf(setgroups_fd, "deny"); dprintf(uid_map_fd, "%d %d 1", uid, uid); dprintf(gid_map_fd, "%d %d 1", gid, gid); close(setgroups_fd); close(uid_map_fd); close(gid_map_fd); } // Creates a tmpfs at `mount_point` in a new user namespace. static void create_tmpfs(const char* mount_point) { // Get the UID and GID before we call unshare. uid_t uid = getuid(); gid_t gid = getgid(); // We create a new user (CLONE_NEWUSER) and mount (CLONE_NEWNS) // namespace here such that we can mount our own tmpfs onto // mount_point that is only visible to this process. if (unshare(CLONE_NEWUSER | CLONE_NEWNS) == -1) { fprintf(stderr, "unshare failed to create a new mount namespace in the child: %s\n", strerror(errno)); _exit(-1); }; // Now write the UID / GID mappings write_id_maps(uid, gid); // Mount a new tmpfs onto `mount_point` this allows us to add files // here that get automatically cleaned up once the process exits. if (mount("tmpfs", mount_point, "tmpfs", 0, NULL) == -1) { fprintf(stderr, "mount failed to create a tmpfs in namespace in the child: %s\n", strerror(errno)); _exit(-1); } } #endif static uint64_t current_usecs() { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return ts.tv_sec * 1000000 + ts.tv_nsec / 1000; } static char** copy_string_array(const char** orig) { size_t num_entries = 0; for (const char** current = orig; *current; current++) { num_entries += 1; } char** copy = calloc(num_entries + 1, sizeof(char*)); for (size_t i = 0; i < num_entries; i++) { copy[i] = strdup(orig[i]); } return copy; } static void free_string_array(char** arr) { if (!arr) return; for (char** current = arr; *current; current++) { free(*current); } free(arr); } // A unidirectional communication channel for larger amounts of data, up to a maximum size (REPRL_MAX_DATA_SIZE). // Implemented as a (RAM-backed) file for which the file descriptor is shared with the child process and which is mapped into our address space. struct data_channel { // File descriptor of the underlying file. Directly shared with the child process. int fd; // Memory mapping of the file, always of size REPRL_MAX_DATA_SIZE. char* mapping; }; struct reprl_context { // Whether reprl_initialize has been successfully performed on this context. int initialized; // Read file descriptor of the control pipe. Only valid if a child process is running (i.e. pid is nonzero). int ctrl_in; // Write file descriptor of the control pipe. Only valid if a child process is running (i.e. pid is nonzero). int ctrl_out; // Data channel REPRL -> Child struct data_channel* data_in; // Data channel Child -> REPRL struct data_channel* data_out; // Optional data channel for the child's stdout and stderr. struct data_channel* child_stdout; struct data_channel* child_stderr; // PID of the child process. Will be zero if no child process is currently running. pid_t pid; // Arguments and environment for the child process. char** argv; char** envp; // A malloc'd string containing a description of the last error that occurred. char* last_error; }; static int reprl_error(struct reprl_context* ctx, const char *format, ...) { va_list args; va_start(args, format); free(ctx->last_error); vasprintf(&ctx->last_error, format, args); return -1; } static struct data_channel* reprl_create_data_channel(struct reprl_context* ctx) { #ifdef __linux__ int fd = memfd_create("REPRL_DATA_CHANNEL", MFD_CLOEXEC); #else char path[] = "/tmp/reprl_data_channel_XXXXXXXX"; int fd = mkostemp(path, O_CLOEXEC); unlink(path); #endif if (fd == -1 || ftruncate(fd, REPRL_MAX_DATA_SIZE) != 0) { reprl_error(ctx, "Failed to create data channel file: %s", strerror(errno)); return NULL; } char* mapping = mmap(0, REPRL_MAX_DATA_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (mapping == MAP_FAILED) { reprl_error(ctx, "Failed to mmap data channel file: %s", strerror(errno)); return NULL; } struct data_channel* channel = malloc(sizeof(struct data_channel)); channel->fd = fd; channel->mapping = mapping; return channel; } static void reprl_destroy_data_channel(struct data_channel* channel) { if (!channel) return; close(channel->fd); munmap(channel->mapping, REPRL_MAX_DATA_SIZE); free(channel); } static void reprl_child_terminated(struct reprl_context* ctx) { if (!ctx->pid) return; ctx->pid = 0; close(ctx->ctrl_in); close(ctx->ctrl_out); } static void reprl_terminate_child(struct reprl_context* ctx) { if (!ctx->pid) return; int status; kill(ctx->pid, SIGKILL); waitpid(ctx->pid, &status, 0); reprl_child_terminated(ctx); } static int reprl_spawn_child(struct reprl_context* ctx) { // This is also a good time to ensure the data channel backing files don't grow too large. ftruncate(ctx->data_in->fd, REPRL_MAX_DATA_SIZE); ftruncate(ctx->data_out->fd, REPRL_MAX_DATA_SIZE); if (ctx->child_stdout) ftruncate(ctx->child_stdout->fd, REPRL_MAX_DATA_SIZE); if (ctx->child_stderr) ftruncate(ctx->child_stderr->fd, REPRL_MAX_DATA_SIZE); int crpipe[2] = { 0, 0 }; // control pipe child -> reprl int cwpipe[2] = { 0, 0 }; // control pipe reprl -> child if (pipe(crpipe) != 0) { return reprl_error(ctx, "Could not create pipe for REPRL communication: %s", strerror(errno)); } if (pipe(cwpipe) != 0) { close(crpipe[0]); close(crpipe[1]); return reprl_error(ctx, "Could not create pipe for REPRL communication: %s", strerror(errno)); } ctx->ctrl_in = crpipe[0]; ctx->ctrl_out = cwpipe[1]; fcntl(ctx->ctrl_in, F_SETFD, FD_CLOEXEC); fcntl(ctx->ctrl_out, F_SETFD, FD_CLOEXEC); #ifdef __linux__ // This is where we will mount our own tmpfs, this is intended to be used // for targets like Chrome, where we have to pass the user data directory. // Even if the target does not clean up after themselves, the tmpfs in the // user namespace will be removed once the process exits. Also, every child // process, i.e. fuzzing instance, can then have it's own tmpfs. // This only works on Linux right now, which is where we fuzz Chrome, this // won't work on any other OS. const char mount_point[] = "/tmp/fuzzilli_tmp"; // Create the mountpoint for our tmpfs here. This is just an empty dir. // We also do not really care if this directory exists, we just need it as // a mountpoint. if (mkdir(mount_point, 0)) { if (errno != EEXIST) { fprintf(stderr, "mkdir failed to create %s to create a mountpoint: %s\n", mount_point, strerror(errno)); } } #endif #ifdef __linux__ // Use vfork() on Linux as that considerably improves the fuzzer performance. See also https://github.com/googleprojectzero/fuzzilli/issues/174 // Due to vfork, the code executed in the child process *must not* modify any memory apart from its stack, as it will share the page table of its parent. pid_t pid = vfork(); #else pid_t pid = fork(); #endif if (pid == 0) { if (dup2(cwpipe[0], REPRL_CHILD_CTRL_IN) < 0 || dup2(crpipe[1], REPRL_CHILD_CTRL_OUT) < 0 || dup2(ctx->data_out->fd, REPRL_CHILD_DATA_IN) < 0 || dup2(ctx->data_in->fd, REPRL_CHILD_DATA_OUT) < 0) { fprintf(stderr, "dup2 failed in the child: %s\n", strerror(errno)); _exit(-1); } #ifdef __linux__ // Set RLIMIT_CORE to 0, such that we don't produce core dumps. The // added benefit of doing this here, in the child process, is that we // can still get core dumps when Fuzzilli crashes. struct rlimit core_limit; core_limit.rlim_cur = 0; core_limit.rlim_max = 0; if (setrlimit(RLIMIT_CORE, &core_limit) < 0) { fprintf(stderr, "setrlimit failed in the child: %s\n", strerror(errno)); _exit(-1); }; #endif // Unblock any blocked signals. It seems that libdispatch sometimes blocks delivery of certain signals. sigset_t newset; sigemptyset(&newset); if (sigprocmask(SIG_SETMASK, &newset, NULL) != 0) { fprintf(stderr, "sigprocmask failed in the child: %s\n", strerror(errno)); _exit(-1); } close(cwpipe[0]); close(crpipe[1]); int devnull = open("/dev/null", O_RDWR); dup2(devnull, 0); if (ctx->child_stdout) dup2(ctx->child_stdout->fd, 1); else dup2(devnull, 1); if (ctx->child_stderr) dup2(ctx->child_stderr->fd, 2); else dup2(devnull, 2); close(devnull); #ifdef __linux__ // Create the tmpfs at the specific mount point here in the child process // such that we have a tmpfs for this process only that will be cleaned up at process exit. // This will also write into the necessary files in /proc, so we need to do this here after we've fork()'ed. // This will only work on Linux, see the comment above where call mkdir. create_tmpfs(mount_point); #endif // close all other FDs. We try to use FD_CLOEXEC everywhere, but let's be extra sure we don't leak any fds to the child. int tablesize = getdtablesize(); for (int i = 3; i < tablesize; i++) { if (i == REPRL_CHILD_CTRL_IN || i == REPRL_CHILD_CTRL_OUT || i == REPRL_CHILD_DATA_IN || i == REPRL_CHILD_DATA_OUT) { continue; } close(i); } execve(ctx->argv[0], ctx->argv, ctx->envp); fprintf(stderr, "Failed to execute child process %s: %s\n", ctx->argv[0], strerror(errno)); fflush(stderr); _exit(-1); } close(crpipe[1]); close(cwpipe[0]); if (pid < 0) { close(ctx->ctrl_in); close(ctx->ctrl_out); return reprl_error(ctx, "Failed to fork: %s", strerror(errno)); } ctx->pid = pid; char helo[5] = { 0 }; if (read(ctx->ctrl_in, helo, 4) != 4) { reprl_terminate_child(ctx); return reprl_error(ctx, "Did not receive HELO message from child: %s", strerror(errno)); } if (strncmp(helo, "HELO", 4) != 0) { reprl_terminate_child(ctx); return reprl_error(ctx, "Received invalid HELO message from child: %s", helo); } if (write(ctx->ctrl_out, helo, 4) != 4) { reprl_terminate_child(ctx); return reprl_error(ctx, "Failed to send HELO reply message to child: %s", strerror(errno)); } #ifdef __linux__ struct rlimit core_limit = {}; if (prlimit(pid, RLIMIT_CORE, NULL, &core_limit) < 0) { reprl_terminate_child(ctx); return reprl_error(ctx, "prlimit failed: %s\n", strerror(errno)); } if (core_limit.rlim_cur != 0 || core_limit.rlim_max != 0) { reprl_terminate_child(ctx); return reprl_error(ctx, "Detected non-zero RLIMIT_CORE. Check that the child does not set RLIMIT_CORE manually.\n"); } #endif return 0; } struct reprl_context* reprl_create_context() { // "Reserve" the well-known REPRL fds so no other fd collides with them. // This would cause various kinds of issues in reprl_spawn_child. // It would be enough to do this once per process in the case of multiple // REPRL instances, but it's probably not worth the implementation effort. int devnull = open("/dev/null", O_RDWR); dup2(devnull, REPRL_CHILD_CTRL_IN); dup2(devnull, REPRL_CHILD_CTRL_OUT); dup2(devnull, REPRL_CHILD_DATA_IN); dup2(devnull, REPRL_CHILD_DATA_OUT); close(devnull); return calloc(1, sizeof(struct reprl_context)); } int reprl_initialize_context(struct reprl_context* ctx, const char** argv, const char** envp, int capture_stdout, int capture_stderr) { if (ctx->initialized) { return reprl_error(ctx, "Context is already initialized"); } // We need to ignore SIGPIPE since we could end up writing to a pipe after our child process has exited. signal(SIGPIPE, SIG_IGN); ctx->argv = copy_string_array(argv); ctx->envp = copy_string_array(envp); ctx->data_in = reprl_create_data_channel(ctx); ctx->data_out = reprl_create_data_channel(ctx); if (capture_stdout) { ctx->child_stdout = reprl_create_data_channel(ctx); } if (capture_stderr) { ctx->child_stderr = reprl_create_data_channel(ctx); } if (!ctx->data_in || !ctx->data_out || (capture_stdout && !ctx->child_stdout) || (capture_stderr && !ctx->child_stderr)) { // Proper error message will have been set by reprl_create_data_channel return -1; } ctx->initialized = 1; return 0; } void reprl_destroy_context(struct reprl_context* ctx) { reprl_terminate_child(ctx); free_string_array(ctx->argv); free_string_array(ctx->envp); reprl_destroy_data_channel(ctx->data_in); reprl_destroy_data_channel(ctx->data_out); reprl_destroy_data_channel(ctx->child_stdout); reprl_destroy_data_channel(ctx->child_stderr); free(ctx->last_error); free(ctx); } int reprl_execute(struct reprl_context* ctx, const char* script, uint64_t script_size, uint64_t timeout, uint64_t* execution_time, int fresh_instance) { if (!ctx->initialized) { return reprl_error(ctx, "REPRL context is not initialized"); } if (script_size > REPRL_MAX_DATA_SIZE) { return reprl_error(ctx, "Script too large"); } if (timeout > REPRL_MAX_TIMEOUT_IN_MICROSECONDS) { return reprl_error(ctx, "Timeout too large"); } int timeout_ms = (int)(timeout / 1000); // Terminate any existing instance if requested. if (fresh_instance && ctx->pid) { reprl_terminate_child(ctx); } // Reset file position so the child can simply read(2) and write(2) to these fds. lseek(ctx->data_out->fd, 0, SEEK_SET); lseek(ctx->data_in->fd, 0, SEEK_SET); if (ctx->child_stdout) { lseek(ctx->child_stdout->fd, 0, SEEK_SET); } if (ctx->child_stderr) { lseek(ctx->child_stderr->fd, 0, SEEK_SET); } // Spawn a new instance if necessary. if (!ctx->pid) { int r = reprl_spawn_child(ctx); if (r != 0) return r; } // Copy the script to the data channel. memcpy(ctx->data_out->mapping, script, script_size); // Tell child to execute the script. if (write(ctx->ctrl_out, "exec", 4) != 4 || write(ctx->ctrl_out, &script_size, 8) != 8) { // These can fail if the child unexpectedly terminated between executions. // Check for that here to be able to provide a better error message. int status; if (waitpid(ctx->pid, &status, WNOHANG) == ctx->pid) { reprl_child_terminated(ctx); if (WIFEXITED(status)) { return reprl_error(ctx, "Child unexpectedly exited with status %i between executions", WEXITSTATUS(status)); } else { return reprl_error(ctx, "Child unexpectedly terminated with signal %i between executions", WTERMSIG(status)); } } return reprl_error(ctx, "Failed to send command to child process: %s", strerror(errno)); } // Wait for child to finish execution (or crash). uint64_t start_time = current_usecs(); struct pollfd fds = {.fd = ctx->ctrl_in, .events = POLLIN, .revents = 0}; int res = poll(&fds, 1, timeout_ms); *execution_time = current_usecs() - start_time; if (res == 0) { // Execution timed out. Kill child and return a timeout status. reprl_terminate_child(ctx); return 1 << 16; } else if (res != 1) { // An error occurred. // We expect all signal handlers to be installed with SA_RESTART, so receiving EINTR here is unexpected and thus also an error. return reprl_error(ctx, "Failed to poll: %s", strerror(errno)); } // Poll succeeded, so there must be something to read now (either the status or EOF). int status; ssize_t rv = read(ctx->ctrl_in, &status, 4); if (rv < 0) { return reprl_error(ctx, "Failed to read from control pipe: %s", strerror(errno)); } else if (rv != 4) { // Most likely, the child process crashed and closed the write end of the control pipe. // Unfortunately, there probably is nothing that guarantees that waitpid() will immediately succeed now, // and we also don't want to block here. So just retry waitpid() a few times... int success = 0; do { success = waitpid(ctx->pid, &status, WNOHANG) == ctx->pid; if (!success) usleep(10); } while (!success && current_usecs() - start_time < timeout); if (!success) { // Wait failed, so something weird must have happened. Maybe somehow the control pipe was closed without the child exiting? // Probably the best we can do is kill the child and return an error. reprl_terminate_child(ctx); return reprl_error(ctx, "Child in weird state after execution"); } // Cleanup any state related to this child process. reprl_child_terminated(ctx); if (WIFEXITED(status)) { status = WEXITSTATUS(status) << 8; } else if (WIFSIGNALED(status)) { status = WTERMSIG(status); } else { // This shouldn't happen, since we don't specify WUNTRACED for waitpid... return reprl_error(ctx, "Waitpid returned unexpected child state %i", status); } } // The status must be a positive number, see the status encoding format below. // We also don't allow the child process to indicate a timeout. If we wanted, // we could treat it as an error if the upper bits are set. status &= 0xffff; return status; } static const char* fetch_data_channel_content(struct data_channel* channel) { if (!channel) return ""; size_t pos = lseek(channel->fd, 0, SEEK_CUR); pos = min(pos, REPRL_MAX_DATA_SIZE - 1); channel->mapping[pos] = 0; return channel->mapping; } const char* reprl_fetch_fuzzout(struct reprl_context* ctx) { return fetch_data_channel_content(ctx->data_in); } const char* reprl_fetch_stdout(struct reprl_context* ctx) { return fetch_data_channel_content(ctx->child_stdout); } const char* reprl_fetch_stderr(struct reprl_context* ctx) { return fetch_data_channel_content(ctx->child_stderr); } const char* reprl_get_last_error(struct reprl_context* ctx) { return ctx->last_error; } #endif