nodejs/deps/v8/test/fuzzilli/libreprl.c
Michaël Zasso 5edec0e39a
deps: update V8 to 13.0.245.25
PR-URL: https://github.com/nodejs/node/pull/55014
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com>
2025-01-31 12:45:51 +01:00

602 lines
21 KiB
C

// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#if !defined(_WIN32)
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include "libreprl.h"
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <poll.h>
#include <signal.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sched.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/resource.h>
#include <sys/time.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
// Well-known file descriptor numbers for reprl <-> child communication, child process side
#define REPRL_CHILD_CTRL_IN 100
#define REPRL_CHILD_CTRL_OUT 101
#define REPRL_CHILD_DATA_IN 102
#define REPRL_CHILD_DATA_OUT 103
/// Maximum timeout in microseconds. Mostly just limited by the fact that the timeout in milliseconds has to fit into a 32-bit integer.
#define REPRL_MAX_TIMEOUT_IN_MICROSECONDS ((uint64_t)(INT_MAX) * 1000)
static size_t min(size_t x, size_t y) {
return x < y ? x : y;
}
#ifdef __linux__
// This function creates the UID/GID mapping that we need inside of the user
// namespace. This is needed such that the files we create have a proper owner
// attached to them.
static void write_id_maps(uid_t uid, gid_t gid) {
char setgroups_path[] = "/proc/self/setgroups";
char uid_map_path[] = "/proc/self/uid_map";
char gid_map_path[] = "/proc/self/gid_map";
int setgroups_fd = open(setgroups_path, O_WRONLY);
int uid_map_fd = open(uid_map_path, O_WRONLY);
int gid_map_fd = open(gid_map_path, O_WRONLY);
if (setgroups_fd == -1 || uid_map_fd == -1 || gid_map_fd == -1) {
fprintf(stderr, "Error opening setgroups/uid_map/gid_map file: %s\n", strerror(errno));
_exit(-1);
}
// More context on this: https://lwn.net/Articles/626665/
dprintf(setgroups_fd, "deny");
dprintf(uid_map_fd, "%d %d 1", uid, uid);
dprintf(gid_map_fd, "%d %d 1", gid, gid);
close(setgroups_fd);
close(uid_map_fd);
close(gid_map_fd);
}
// Creates a tmpfs at `mount_point` in a new user namespace.
static void create_tmpfs(const char* mount_point) {
// Get the UID and GID before we call unshare.
uid_t uid = getuid();
gid_t gid = getgid();
// We create a new user (CLONE_NEWUSER) and mount (CLONE_NEWNS)
// namespace here such that we can mount our own tmpfs onto
// mount_point that is only visible to this process.
if (unshare(CLONE_NEWUSER | CLONE_NEWNS) == -1) {
fprintf(stderr, "unshare failed to create a new mount namespace in the child: %s\n", strerror(errno));
_exit(-1);
};
// Now write the UID / GID mappings
write_id_maps(uid, gid);
// Mount a new tmpfs onto `mount_point` this allows us to add files
// here that get automatically cleaned up once the process exits.
if (mount("tmpfs", mount_point, "tmpfs", 0, NULL) == -1) {
fprintf(stderr, "mount failed to create a tmpfs in namespace in the child: %s\n", strerror(errno));
_exit(-1);
}
}
#endif
static uint64_t current_usecs()
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
}
static char** copy_string_array(const char** orig)
{
size_t num_entries = 0;
for (const char** current = orig; *current; current++) {
num_entries += 1;
}
char** copy = calloc(num_entries + 1, sizeof(char*));
for (size_t i = 0; i < num_entries; i++) {
copy[i] = strdup(orig[i]);
}
return copy;
}
static void free_string_array(char** arr)
{
if (!arr) return;
for (char** current = arr; *current; current++) {
free(*current);
}
free(arr);
}
// A unidirectional communication channel for larger amounts of data, up to a maximum size (REPRL_MAX_DATA_SIZE).
// Implemented as a (RAM-backed) file for which the file descriptor is shared with the child process and which is mapped into our address space.
struct data_channel {
// File descriptor of the underlying file. Directly shared with the child process.
int fd;
// Memory mapping of the file, always of size REPRL_MAX_DATA_SIZE.
char* mapping;
};
struct reprl_context {
// Whether reprl_initialize has been successfully performed on this context.
int initialized;
// Read file descriptor of the control pipe. Only valid if a child process is running (i.e. pid is nonzero).
int ctrl_in;
// Write file descriptor of the control pipe. Only valid if a child process is running (i.e. pid is nonzero).
int ctrl_out;
// Data channel REPRL -> Child
struct data_channel* data_in;
// Data channel Child -> REPRL
struct data_channel* data_out;
// Optional data channel for the child's stdout and stderr.
struct data_channel* child_stdout;
struct data_channel* child_stderr;
// PID of the child process. Will be zero if no child process is currently running.
pid_t pid;
// Arguments and environment for the child process.
char** argv;
char** envp;
// A malloc'd string containing a description of the last error that occurred.
char* last_error;
};
static int reprl_error(struct reprl_context* ctx, const char *format, ...)
{
va_list args;
va_start(args, format);
free(ctx->last_error);
vasprintf(&ctx->last_error, format, args);
return -1;
}
static struct data_channel* reprl_create_data_channel(struct reprl_context* ctx)
{
#ifdef __linux__
int fd = memfd_create("REPRL_DATA_CHANNEL", MFD_CLOEXEC);
#else
char path[] = "/tmp/reprl_data_channel_XXXXXXXX";
int fd = mkostemp(path, O_CLOEXEC);
unlink(path);
#endif
if (fd == -1 || ftruncate(fd, REPRL_MAX_DATA_SIZE) != 0) {
reprl_error(ctx, "Failed to create data channel file: %s", strerror(errno));
return NULL;
}
char* mapping = mmap(0, REPRL_MAX_DATA_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (mapping == MAP_FAILED) {
reprl_error(ctx, "Failed to mmap data channel file: %s", strerror(errno));
return NULL;
}
struct data_channel* channel = malloc(sizeof(struct data_channel));
channel->fd = fd;
channel->mapping = mapping;
return channel;
}
static void reprl_destroy_data_channel(struct data_channel* channel)
{
if (!channel) return;
close(channel->fd);
munmap(channel->mapping, REPRL_MAX_DATA_SIZE);
free(channel);
}
static void reprl_child_terminated(struct reprl_context* ctx)
{
if (!ctx->pid) return;
ctx->pid = 0;
close(ctx->ctrl_in);
close(ctx->ctrl_out);
}
static void reprl_terminate_child(struct reprl_context* ctx)
{
if (!ctx->pid) return;
int status;
kill(ctx->pid, SIGKILL);
waitpid(ctx->pid, &status, 0);
reprl_child_terminated(ctx);
}
static int reprl_spawn_child(struct reprl_context* ctx)
{
// This is also a good time to ensure the data channel backing files don't grow too large.
ftruncate(ctx->data_in->fd, REPRL_MAX_DATA_SIZE);
ftruncate(ctx->data_out->fd, REPRL_MAX_DATA_SIZE);
if (ctx->child_stdout) ftruncate(ctx->child_stdout->fd, REPRL_MAX_DATA_SIZE);
if (ctx->child_stderr) ftruncate(ctx->child_stderr->fd, REPRL_MAX_DATA_SIZE);
int crpipe[2] = { 0, 0 }; // control pipe child -> reprl
int cwpipe[2] = { 0, 0 }; // control pipe reprl -> child
if (pipe(crpipe) != 0) {
return reprl_error(ctx, "Could not create pipe for REPRL communication: %s", strerror(errno));
}
if (pipe(cwpipe) != 0) {
close(crpipe[0]);
close(crpipe[1]);
return reprl_error(ctx, "Could not create pipe for REPRL communication: %s", strerror(errno));
}
ctx->ctrl_in = crpipe[0];
ctx->ctrl_out = cwpipe[1];
fcntl(ctx->ctrl_in, F_SETFD, FD_CLOEXEC);
fcntl(ctx->ctrl_out, F_SETFD, FD_CLOEXEC);
#ifdef __linux__
// This is where we will mount our own tmpfs, this is intended to be used
// for targets like Chrome, where we have to pass the user data directory.
// Even if the target does not clean up after themselves, the tmpfs in the
// user namespace will be removed once the process exits. Also, every child
// process, i.e. fuzzing instance, can then have it's own tmpfs.
// This only works on Linux right now, which is where we fuzz Chrome, this
// won't work on any other OS.
const char mount_point[] = "/tmp/fuzzilli_tmp";
// Create the mountpoint for our tmpfs here. This is just an empty dir.
// We also do not really care if this directory exists, we just need it as
// a mountpoint.
if (mkdir(mount_point, 0)) {
if (errno != EEXIST) {
fprintf(stderr, "mkdir failed to create %s to create a mountpoint: %s\n", mount_point, strerror(errno));
}
}
#endif
#ifdef __linux__
// Use vfork() on Linux as that considerably improves the fuzzer performance. See also https://github.com/googleprojectzero/fuzzilli/issues/174
// Due to vfork, the code executed in the child process *must not* modify any memory apart from its stack, as it will share the page table of its parent.
pid_t pid = vfork();
#else
pid_t pid = fork();
#endif
if (pid == 0) {
if (dup2(cwpipe[0], REPRL_CHILD_CTRL_IN) < 0 ||
dup2(crpipe[1], REPRL_CHILD_CTRL_OUT) < 0 ||
dup2(ctx->data_out->fd, REPRL_CHILD_DATA_IN) < 0 ||
dup2(ctx->data_in->fd, REPRL_CHILD_DATA_OUT) < 0) {
fprintf(stderr, "dup2 failed in the child: %s\n", strerror(errno));
_exit(-1);
}
#ifdef __linux__
// Set RLIMIT_CORE to 0, such that we don't produce core dumps. The
// added benefit of doing this here, in the child process, is that we
// can still get core dumps when Fuzzilli crashes.
struct rlimit core_limit;
core_limit.rlim_cur = 0;
core_limit.rlim_max = 0;
if (setrlimit(RLIMIT_CORE, &core_limit) < 0) {
fprintf(stderr, "setrlimit failed in the child: %s\n", strerror(errno));
_exit(-1);
};
#endif
// Unblock any blocked signals. It seems that libdispatch sometimes blocks delivery of certain signals.
sigset_t newset;
sigemptyset(&newset);
if (sigprocmask(SIG_SETMASK, &newset, NULL) != 0) {
fprintf(stderr, "sigprocmask failed in the child: %s\n", strerror(errno));
_exit(-1);
}
close(cwpipe[0]);
close(crpipe[1]);
int devnull = open("/dev/null", O_RDWR);
dup2(devnull, 0);
if (ctx->child_stdout) dup2(ctx->child_stdout->fd, 1);
else dup2(devnull, 1);
if (ctx->child_stderr) dup2(ctx->child_stderr->fd, 2);
else dup2(devnull, 2);
close(devnull);
#ifdef __linux__
// Create the tmpfs at the specific mount point here in the child process
// such that we have a tmpfs for this process only that will be cleaned up at process exit.
// This will also write into the necessary files in /proc, so we need to do this here after we've fork()'ed.
// This will only work on Linux, see the comment above where call mkdir.
create_tmpfs(mount_point);
#endif
// close all other FDs. We try to use FD_CLOEXEC everywhere, but let's be extra sure we don't leak any fds to the child.
int tablesize = getdtablesize();
for (int i = 3; i < tablesize; i++) {
if (i == REPRL_CHILD_CTRL_IN || i == REPRL_CHILD_CTRL_OUT || i == REPRL_CHILD_DATA_IN || i == REPRL_CHILD_DATA_OUT) {
continue;
}
close(i);
}
execve(ctx->argv[0], ctx->argv, ctx->envp);
fprintf(stderr, "Failed to execute child process %s: %s\n", ctx->argv[0], strerror(errno));
fflush(stderr);
_exit(-1);
}
close(crpipe[1]);
close(cwpipe[0]);
if (pid < 0) {
close(ctx->ctrl_in);
close(ctx->ctrl_out);
return reprl_error(ctx, "Failed to fork: %s", strerror(errno));
}
ctx->pid = pid;
char helo[5] = { 0 };
if (read(ctx->ctrl_in, helo, 4) != 4) {
reprl_terminate_child(ctx);
return reprl_error(ctx, "Did not receive HELO message from child: %s", strerror(errno));
}
if (strncmp(helo, "HELO", 4) != 0) {
reprl_terminate_child(ctx);
return reprl_error(ctx, "Received invalid HELO message from child: %s", helo);
}
if (write(ctx->ctrl_out, helo, 4) != 4) {
reprl_terminate_child(ctx);
return reprl_error(ctx, "Failed to send HELO reply message to child: %s", strerror(errno));
}
#ifdef __linux__
struct rlimit core_limit = {};
if (prlimit(pid, RLIMIT_CORE, NULL, &core_limit) < 0) {
reprl_terminate_child(ctx);
return reprl_error(ctx, "prlimit failed: %s\n", strerror(errno));
}
if (core_limit.rlim_cur != 0 || core_limit.rlim_max != 0) {
reprl_terminate_child(ctx);
return reprl_error(ctx, "Detected non-zero RLIMIT_CORE. Check that the child does not set RLIMIT_CORE manually.\n");
}
#endif
return 0;
}
struct reprl_context* reprl_create_context()
{
// "Reserve" the well-known REPRL fds so no other fd collides with them.
// This would cause various kinds of issues in reprl_spawn_child.
// It would be enough to do this once per process in the case of multiple
// REPRL instances, but it's probably not worth the implementation effort.
int devnull = open("/dev/null", O_RDWR);
dup2(devnull, REPRL_CHILD_CTRL_IN);
dup2(devnull, REPRL_CHILD_CTRL_OUT);
dup2(devnull, REPRL_CHILD_DATA_IN);
dup2(devnull, REPRL_CHILD_DATA_OUT);
close(devnull);
return calloc(1, sizeof(struct reprl_context));
}
int reprl_initialize_context(struct reprl_context* ctx, const char** argv, const char** envp, int capture_stdout, int capture_stderr)
{
if (ctx->initialized) {
return reprl_error(ctx, "Context is already initialized");
}
// We need to ignore SIGPIPE since we could end up writing to a pipe after our child process has exited.
signal(SIGPIPE, SIG_IGN);
ctx->argv = copy_string_array(argv);
ctx->envp = copy_string_array(envp);
ctx->data_in = reprl_create_data_channel(ctx);
ctx->data_out = reprl_create_data_channel(ctx);
if (capture_stdout) {
ctx->child_stdout = reprl_create_data_channel(ctx);
}
if (capture_stderr) {
ctx->child_stderr = reprl_create_data_channel(ctx);
}
if (!ctx->data_in || !ctx->data_out || (capture_stdout && !ctx->child_stdout) || (capture_stderr && !ctx->child_stderr)) {
// Proper error message will have been set by reprl_create_data_channel
return -1;
}
ctx->initialized = 1;
return 0;
}
void reprl_destroy_context(struct reprl_context* ctx)
{
reprl_terminate_child(ctx);
free_string_array(ctx->argv);
free_string_array(ctx->envp);
reprl_destroy_data_channel(ctx->data_in);
reprl_destroy_data_channel(ctx->data_out);
reprl_destroy_data_channel(ctx->child_stdout);
reprl_destroy_data_channel(ctx->child_stderr);
free(ctx->last_error);
free(ctx);
}
int reprl_execute(struct reprl_context* ctx, const char* script, uint64_t script_size, uint64_t timeout, uint64_t* execution_time, int fresh_instance)
{
if (!ctx->initialized) {
return reprl_error(ctx, "REPRL context is not initialized");
}
if (script_size > REPRL_MAX_DATA_SIZE) {
return reprl_error(ctx, "Script too large");
}
if (timeout > REPRL_MAX_TIMEOUT_IN_MICROSECONDS) {
return reprl_error(ctx, "Timeout too large");
}
int timeout_ms = (int)(timeout / 1000);
// Terminate any existing instance if requested.
if (fresh_instance && ctx->pid) {
reprl_terminate_child(ctx);
}
// Reset file position so the child can simply read(2) and write(2) to these fds.
lseek(ctx->data_out->fd, 0, SEEK_SET);
lseek(ctx->data_in->fd, 0, SEEK_SET);
if (ctx->child_stdout) {
lseek(ctx->child_stdout->fd, 0, SEEK_SET);
}
if (ctx->child_stderr) {
lseek(ctx->child_stderr->fd, 0, SEEK_SET);
}
// Spawn a new instance if necessary.
if (!ctx->pid) {
int r = reprl_spawn_child(ctx);
if (r != 0) return r;
}
// Copy the script to the data channel.
memcpy(ctx->data_out->mapping, script, script_size);
// Tell child to execute the script.
if (write(ctx->ctrl_out, "exec", 4) != 4 ||
write(ctx->ctrl_out, &script_size, 8) != 8) {
// These can fail if the child unexpectedly terminated between executions.
// Check for that here to be able to provide a better error message.
int status;
if (waitpid(ctx->pid, &status, WNOHANG) == ctx->pid) {
reprl_child_terminated(ctx);
if (WIFEXITED(status)) {
return reprl_error(ctx, "Child unexpectedly exited with status %i between executions", WEXITSTATUS(status));
} else {
return reprl_error(ctx, "Child unexpectedly terminated with signal %i between executions", WTERMSIG(status));
}
}
return reprl_error(ctx, "Failed to send command to child process: %s", strerror(errno));
}
// Wait for child to finish execution (or crash).
uint64_t start_time = current_usecs();
struct pollfd fds = {.fd = ctx->ctrl_in, .events = POLLIN, .revents = 0};
int res = poll(&fds, 1, timeout_ms);
*execution_time = current_usecs() - start_time;
if (res == 0) {
// Execution timed out. Kill child and return a timeout status.
reprl_terminate_child(ctx);
return 1 << 16;
} else if (res != 1) {
// An error occurred.
// We expect all signal handlers to be installed with SA_RESTART, so receiving EINTR here is unexpected and thus also an error.
return reprl_error(ctx, "Failed to poll: %s", strerror(errno));
}
// Poll succeeded, so there must be something to read now (either the status or EOF).
int status;
ssize_t rv = read(ctx->ctrl_in, &status, 4);
if (rv < 0) {
return reprl_error(ctx, "Failed to read from control pipe: %s", strerror(errno));
} else if (rv != 4) {
// Most likely, the child process crashed and closed the write end of the control pipe.
// Unfortunately, there probably is nothing that guarantees that waitpid() will immediately succeed now,
// and we also don't want to block here. So just retry waitpid() a few times...
int success = 0;
do {
success = waitpid(ctx->pid, &status, WNOHANG) == ctx->pid;
if (!success) usleep(10);
} while (!success && current_usecs() - start_time < timeout);
if (!success) {
// Wait failed, so something weird must have happened. Maybe somehow the control pipe was closed without the child exiting?
// Probably the best we can do is kill the child and return an error.
reprl_terminate_child(ctx);
return reprl_error(ctx, "Child in weird state after execution");
}
// Cleanup any state related to this child process.
reprl_child_terminated(ctx);
if (WIFEXITED(status)) {
status = WEXITSTATUS(status) << 8;
} else if (WIFSIGNALED(status)) {
status = WTERMSIG(status);
} else {
// This shouldn't happen, since we don't specify WUNTRACED for waitpid...
return reprl_error(ctx, "Waitpid returned unexpected child state %i", status);
}
}
// The status must be a positive number, see the status encoding format below.
// We also don't allow the child process to indicate a timeout. If we wanted,
// we could treat it as an error if the upper bits are set.
status &= 0xffff;
return status;
}
static const char* fetch_data_channel_content(struct data_channel* channel)
{
if (!channel) return "";
size_t pos = lseek(channel->fd, 0, SEEK_CUR);
pos = min(pos, REPRL_MAX_DATA_SIZE - 1);
channel->mapping[pos] = 0;
return channel->mapping;
}
const char* reprl_fetch_fuzzout(struct reprl_context* ctx)
{
return fetch_data_channel_content(ctx->data_in);
}
const char* reprl_fetch_stdout(struct reprl_context* ctx)
{
return fetch_data_channel_content(ctx->child_stdout);
}
const char* reprl_fetch_stderr(struct reprl_context* ctx)
{
return fetch_data_channel_content(ctx->child_stderr);
}
const char* reprl_get_last_error(struct reprl_context* ctx)
{
return ctx->last_error;
}
#endif