src: use Blob{Des|S}erializer for SEA blobs

PR-URL: https://github.com/nodejs/node/pull/47962
Reviewed-By: Darshan Sen <raisinten@gmail.com>
This commit is contained in:
Joyee Cheung 2023-05-23 19:37:29 +02:00 committed by GitHub
parent 300f68e9d0
commit e2caafa5bd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 205 additions and 92 deletions

View File

@ -13,8 +13,8 @@
#include "debug_utils-inl.h"
// This is related to the blob that is used in snapshots and has nothing to do
// with `node_blob.h`.
// This is related to the blob that is used in snapshots and single executable
// applications and has nothing to do with `node_blob.h`.
namespace node {
@ -130,22 +130,22 @@ std::vector<T> BlobDeserializer<Impl>::ReadVector() {
template <typename Impl>
std::string BlobDeserializer<Impl>::ReadString() {
std::string_view view = ReadStringView(StringLogMode::kAddressAndContent);
return std::string(view);
}
template <typename Impl>
std::string_view BlobDeserializer<Impl>::ReadStringView(StringLogMode mode) {
size_t length = ReadArithmetic<size_t>();
Debug("ReadStringView(), length=%zu: ", length);
if (is_debug) {
Debug("ReadString(), length=%d: ", length);
std::string_view result(sink.data() + read_total, length);
Debug("%p, read %zu bytes\n", result.data(), result.size());
if (mode == StringLogMode::kAddressAndContent) {
Debug("%s", result);
}
CHECK_GT(length, 0); // There should be no empty strings.
MallocedBuffer<char> buf(length + 1);
memcpy(buf.data, sink.data() + read_total, length + 1);
std::string result(buf.data, length); // This creates a copy of buf.data.
if (is_debug) {
Debug("\"%s\", read %zu bytes\n", result.c_str(), length + 1);
}
read_total += length + 1;
read_total += length;
return result;
}
@ -262,26 +262,28 @@ size_t BlobSerializer<Impl>::WriteVector(const std::vector<T>& data) {
// [ 4/8 bytes ] length
// [ |length| bytes ] contents
template <typename Impl>
size_t BlobSerializer<Impl>::WriteString(const std::string& data) {
CHECK_GT(data.size(), 0); // No empty strings should be written.
size_t BlobSerializer<Impl>::WriteStringView(std::string_view data,
StringLogMode mode) {
Debug("WriteStringView(), length=%zu: %p\n", data.size(), data.data());
size_t written_total = WriteArithmetic<size_t>(data.size());
if (is_debug) {
std::string str = ToStr(data);
Debug("WriteString(), length=%zu: \"%s\"\n", data.size(), data.c_str());
}
// Write the null-terminated string.
size_t length = data.size() + 1;
sink.insert(sink.end(), data.c_str(), data.c_str() + length);
size_t length = data.size();
sink.insert(sink.end(), data.data(), data.data() + length);
written_total += length;
if (is_debug) {
Debug("WriteString() wrote %zu bytes\n", written_total);
Debug("WriteStringView() wrote %zu bytes\n", written_total);
if (mode == StringLogMode::kAddressAndContent) {
Debug("%s", data);
}
return written_total;
}
template <typename Impl>
size_t BlobSerializer<Impl>::WriteString(const std::string& data) {
return WriteStringView(data, StringLogMode::kAddressAndContent);
}
// Helper for writing an array of numeric types.
template <typename Impl>
template <typename T>

View File

@ -6,8 +6,8 @@
#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
// This is related to the blob that is used in snapshots and has nothing to do
// with `node_blob.h`.
// This is related to the blob that is used in snapshots and single executable
// applications and has nothing to do with `node_blob.h`.
namespace node {
@ -27,6 +27,11 @@ class BlobSerializerDeserializer {
bool is_debug = false;
};
enum class StringLogMode {
kAddressOnly, // Can be used when the string contains binary content.
kAddressAndContent,
};
// Child classes are expected to implement T Read<T>() where
// !std::is_arithmetic_v<T> && !std::is_same_v<T, std::string>
template <typename Impl>
@ -52,7 +57,9 @@ class BlobDeserializer : public BlobSerializerDeserializer {
template <typename T>
std::vector<T> ReadVector();
// ReadString() creates a copy of the data. ReadStringView() doesn't.
std::string ReadString();
std::string_view ReadStringView(StringLogMode mode);
// Helper for reading an array of numeric types.
template <typename T>
@ -77,11 +84,7 @@ template <typename Impl>
class BlobSerializer : public BlobSerializerDeserializer {
public:
explicit BlobSerializer(bool is_debug_v)
: BlobSerializerDeserializer(is_debug_v) {
// Currently the snapshot blob built with an empty script is around 4MB.
// So use that as the default sink size.
sink.reserve(4 * 1024 * 1024);
}
: BlobSerializerDeserializer(is_debug_v) {}
~BlobSerializer() {}
Impl* impl() { return static_cast<Impl*>(this); }
@ -102,6 +105,7 @@ class BlobSerializer : public BlobSerializerDeserializer {
// The layout of a written string:
// [ 4/8 bytes ] length
// [ |length| bytes ] contents
size_t WriteStringView(std::string_view data, StringLogMode mode);
size_t WriteString(const std::string& data);
// Helper for writing an array of numeric types.

View File

@ -48,6 +48,7 @@ void NODE_EXTERN_PRIVATE FWrite(FILE* file, const std::string& str);
V(INSPECTOR_PROFILER) \
V(CODE_CACHE) \
V(NGTCP2_DEBUG) \
V(SEA) \
V(WASI) \
V(MKSNAPSHOT)

View File

@ -87,14 +87,16 @@ ExitCode NodeMainInstance::Run() {
void NodeMainInstance::Run(ExitCode* exit_code, Environment* env) {
if (*exit_code == ExitCode::kNoFailure) {
bool is_sea = false;
bool runs_sea_code = false;
#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION
if (sea::IsSingleExecutable()) {
is_sea = true;
LoadEnvironment(env, sea::FindSingleExecutableCode());
runs_sea_code = true;
sea::SeaResource sea = sea::FindSingleExecutableResource();
std::string_view code = sea.code;
LoadEnvironment(env, code);
}
#endif
if (!is_sea) {
if (!runs_sea_code) {
LoadEnvironment(env, StartExecutionCallback{});
}

View File

@ -1,5 +1,6 @@
#include "node_sea.h"
#include "blob_serializer_deserializer-inl.h"
#include "debug_utils-inl.h"
#include "env-inl.h"
#include "json_parser.h"
@ -34,16 +35,6 @@ namespace node {
namespace sea {
namespace {
// A special number that will appear at the beginning of the single executable
// preparation blobs ready to be injected into the binary. We use this to check
// that the data given to us are intended for building single executable
// applications.
const uint32_t kMagic = 0x143da20;
enum class SeaFlags : uint32_t {
kDefault = 0,
kDisableExperimentalSeaWarning = 1 << 0,
};
SeaFlags operator|(SeaFlags x, SeaFlags y) {
return static_cast<SeaFlags>(static_cast<uint32_t>(x) |
@ -59,47 +50,100 @@ SeaFlags operator|=(/* NOLINT (runtime/references) */ SeaFlags& x, SeaFlags y) {
return x = x | y;
}
struct SeaResource {
SeaFlags flags = SeaFlags::kDefault;
std::string_view code;
static constexpr size_t kHeaderSize = sizeof(kMagic) + sizeof(SeaFlags);
class SeaSerializer : public BlobSerializer<SeaSerializer> {
public:
SeaSerializer()
: BlobSerializer<SeaSerializer>(
per_process::enabled_debug_list.enabled(DebugCategory::SEA)) {}
template <typename T,
std::enable_if_t<!std::is_same<T, std::string>::value>* = nullptr,
std::enable_if_t<!std::is_arithmetic<T>::value>* = nullptr>
size_t Write(const T& data);
};
SeaResource FindSingleExecutableResource() {
template <>
size_t SeaSerializer::Write(const SeaResource& sea) {
sink.reserve(SeaResource::kHeaderSize + sea.code.size());
Debug("Write SEA magic %x\n", kMagic);
size_t written_total = WriteArithmetic<uint32_t>(kMagic);
uint32_t flags = static_cast<uint32_t>(sea.flags);
Debug("Write SEA flags %x\n", flags);
written_total += WriteArithmetic<uint32_t>(flags);
DCHECK_EQ(written_total, SeaResource::kHeaderSize);
Debug("Write SEA resource code %p, size=%zu\n",
sea.code.data(),
sea.code.size());
written_total += WriteStringView(sea.code, StringLogMode::kAddressAndContent);
return written_total;
}
class SeaDeserializer : public BlobDeserializer<SeaDeserializer> {
public:
explicit SeaDeserializer(std::string_view v)
: BlobDeserializer<SeaDeserializer>(
per_process::enabled_debug_list.enabled(DebugCategory::SEA), v) {}
template <typename T,
std::enable_if_t<!std::is_same<T, std::string>::value>* = nullptr,
std::enable_if_t<!std::is_arithmetic<T>::value>* = nullptr>
T Read();
};
template <>
SeaResource SeaDeserializer::Read() {
uint32_t magic = ReadArithmetic<uint32_t>();
Debug("Read SEA magic %x\n", magic);
CHECK_EQ(magic, kMagic);
SeaFlags flags(static_cast<SeaFlags>(ReadArithmetic<uint32_t>()));
Debug("Read SEA flags %x\n", static_cast<uint32_t>(flags));
CHECK_EQ(read_total, SeaResource::kHeaderSize);
std::string_view code = ReadStringView(StringLogMode::kAddressAndContent);
Debug("Read SEA resource code %p, size=%zu\n", code.data(), code.size());
return {flags, code};
}
std::string_view FindSingleExecutableBlob() {
CHECK(IsSingleExecutable());
static const SeaResource sea_resource = []() -> SeaResource {
static const std::string_view result = []() -> std::string_view {
size_t size;
#ifdef __APPLE__
postject_options options;
postject_options_init(&options);
options.macho_segment_name = "NODE_SEA";
const char* code = static_cast<const char*>(
const char* blob = static_cast<const char*>(
postject_find_resource("NODE_SEA_BLOB", &size, &options));
#else
const char* code = static_cast<const char*>(
const char* blob = static_cast<const char*>(
postject_find_resource("NODE_SEA_BLOB", &size, nullptr));
#endif
uint32_t first_word = reinterpret_cast<const uint32_t*>(code)[0];
CHECK_EQ(first_word, kMagic);
SeaFlags flags{
reinterpret_cast<const SeaFlags*>(code + sizeof(first_word))[0]};
// TODO(joyeecheung): do more checks here e.g. matching the versions.
return {
flags,
{
code + SeaResource::kHeaderSize,
size - SeaResource::kHeaderSize,
},
};
return {blob, size};
}();
return sea_resource;
per_process::Debug(DebugCategory::SEA,
"Found SEA blob %p, size=%zu\n",
result.data(),
result.size());
return result;
}
} // namespace
} // anonymous namespace
std::string_view FindSingleExecutableCode() {
SeaResource sea_resource = FindSingleExecutableResource();
return sea_resource.code;
SeaResource FindSingleExecutableResource() {
static const SeaResource sea_resource = []() -> SeaResource {
std::string_view blob = FindSingleExecutableBlob();
per_process::Debug(DebugCategory::SEA,
"Found SEA resource %p, size=%zu\n",
blob.data(),
blob.size());
SeaDeserializer deserializer(blob);
return deserializer.Read<SeaResource>();
}();
return sea_resource;
}
bool IsSingleExecutable() {
@ -194,38 +238,33 @@ std::optional<SeaConfig> ParseSingleExecutableConfig(
return result;
}
bool GenerateSingleExecutableBlob(const SeaConfig& config) {
ExitCode GenerateSingleExecutableBlob(const SeaConfig& config) {
std::string main_script;
// TODO(joyeecheung): unify the file utils.
int r = ReadFileSync(&main_script, config.main_path.c_str());
if (r != 0) {
const char* err = uv_strerror(r);
FPrintF(stderr, "Cannot read main script %s:%s\n", config.main_path, err);
return false;
return ExitCode::kGenericUserError;
}
std::vector<char> sink;
// TODO(joyeecheung): reuse the SnapshotSerializerDeserializer for this.
sink.reserve(SeaResource::kHeaderSize + main_script.size());
const char* pos = reinterpret_cast<const char*>(&kMagic);
sink.insert(sink.end(), pos, pos + sizeof(kMagic));
pos = reinterpret_cast<const char*>(&(config.flags));
sink.insert(sink.end(), pos, pos + sizeof(SeaFlags));
sink.insert(
sink.end(), main_script.data(), main_script.data() + main_script.size());
SeaResource sea{config.flags, main_script};
uv_buf_t buf = uv_buf_init(sink.data(), sink.size());
SeaSerializer serializer;
serializer.Write(sea);
uv_buf_t buf = uv_buf_init(serializer.sink.data(), serializer.sink.size());
r = WriteFileSync(config.output_path.c_str(), buf);
if (r != 0) {
const char* err = uv_strerror(r);
FPrintF(stderr, "Cannot write output to %s:%s\n", config.output_path, err);
return false;
return ExitCode::kGenericUserError;
}
FPrintF(stderr,
"Wrote single executable preparation blob to %s\n",
config.output_path);
return true;
return ExitCode::kNoFailure;
}
} // anonymous namespace
@ -233,12 +272,12 @@ bool GenerateSingleExecutableBlob(const SeaConfig& config) {
ExitCode BuildSingleExecutableBlob(const std::string& config_path) {
std::optional<SeaConfig> config_opt =
ParseSingleExecutableConfig(config_path);
if (!config_opt.has_value() ||
!GenerateSingleExecutableBlob(config_opt.value())) {
return ExitCode::kGenericUserError;
if (config_opt.has_value()) {
ExitCode code = GenerateSingleExecutableBlob(config_opt.value());
return code;
}
return ExitCode::kNoFailure;
return ExitCode::kGenericUserError;
}
void Initialize(Local<Object> target,

View File

@ -11,9 +11,26 @@
namespace node {
namespace sea {
// A special number that will appear at the beginning of the single executable
// preparation blobs ready to be injected into the binary. We use this to check
// that the data given to us are intended for building single executable
// applications.
const uint32_t kMagic = 0x143da20;
enum class SeaFlags : uint32_t {
kDefault = 0,
kDisableExperimentalSeaWarning = 1 << 0,
};
struct SeaResource {
SeaFlags flags = SeaFlags::kDefault;
std::string_view code;
static constexpr size_t kHeaderSize = sizeof(kMagic) + sizeof(SeaFlags);
};
bool IsSingleExecutable();
std::string_view FindSingleExecutableCode();
SeaResource FindSingleExecutableResource();
std::tuple<int, char**> FixupArgsForSEA(int argc, char** argv);
node::ExitCode BuildSingleExecutableBlob(const std::string& config_path);
} // namespace sea

View File

@ -159,7 +159,11 @@ class SnapshotSerializer : public BlobSerializer<SnapshotSerializer> {
SnapshotSerializer()
: BlobSerializer<SnapshotSerializer>(
per_process::enabled_debug_list.enabled(
DebugCategory::MKSNAPSHOT)) {}
DebugCategory::MKSNAPSHOT)) {
// Currently the snapshot blob built with an empty script is around 4MB.
// So use that as the default sink size.
sink.reserve(4 * 1024 * 1024);
}
template <typename T,
std::enable_if_t<!std::is_same<T, std::string>::value>* = nullptr,
@ -554,7 +558,7 @@ size_t SnapshotSerializer::Write(const SnapshotMetadata& data) {
// We need the Node.js version, platform and arch to match because
// Node.js may perform synchronizations that are platform-specific and they
// can be changed in semver-patches.
Debug("Write snapshot type %" PRIu8 "\n", static_cast<uint8_t>(data.type));
Debug("Write snapshot type %d\n", static_cast<uint8_t>(data.type));
written_total += WriteArithmetic<uint8_t>(static_cast<uint8_t>(data.type));
Debug("Write Node.js version %s\n", data.node_version.c_str());
written_total += WriteString(data.node_version);

View File

@ -0,0 +1,44 @@
'use strict';
require('../common');
const {
injectAndCodeSign,
skipIfSingleExecutableIsNotSupported,
} = require('../common/sea');
skipIfSingleExecutableIsNotSupported();
// This tests the creation of a single executable application with an empty
// script.
const tmpdir = require('../common/tmpdir');
const { copyFileSync, writeFileSync, existsSync } = require('fs');
const { execFileSync } = require('child_process');
const { join } = require('path');
const assert = require('assert');
const configFile = join(tmpdir.path, 'sea-config.json');
const seaPrepBlob = join(tmpdir.path, 'sea-prep.blob');
const outputFile = join(tmpdir.path, process.platform === 'win32' ? 'sea.exe' : 'sea');
tmpdir.refresh();
writeFileSync(join(tmpdir.path, 'empty.js'), '', 'utf-8');
writeFileSync(configFile, `
{
"main": "empty.js",
"output": "sea-prep.blob"
}
`);
execFileSync(process.execPath, ['--experimental-sea-config', 'sea-config.json'], {
cwd: tmpdir.path
});
assert(existsSync(seaPrepBlob));
copyFileSync(process.execPath, outputFile);
injectAndCodeSign(outputFile, seaPrepBlob);
execFileSync(outputFile);