buffer: add buffer.isUtf8 for utf8 validation
PR-URL: https://github.com/nodejs/node/pull/45947 Reviewed-By: Robert Nagy <ronagy@icloud.com> Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: Luigi Pinca <luigipinca@gmail.com> Reviewed-By: Rafael Gonzaga <rafael.nunu@hotmail.com> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: Anna Henningsen <anna@addaleax.net>
This commit is contained in:
parent
07fdbbd015
commit
d5a08c7e11
@ -5130,6 +5130,17 @@ For code running using Node.js APIs, converting between base64-encoded strings
|
||||
and binary data should be performed using `Buffer.from(str, 'base64')` and
|
||||
`buf.toString('base64')`.**
|
||||
|
||||
### `buffer.isUtf8(input)`
|
||||
|
||||
<!-- YAML
|
||||
added: REPLACEME
|
||||
-->
|
||||
|
||||
* input {Buffer | ArrayBuffer | TypedArray} The input to validate.
|
||||
* Returns: {boolean} Returns `true` if and only if the input is valid UTF-8.
|
||||
|
||||
This function is used to check if input contains UTF-8 code points (characters).
|
||||
|
||||
### `buffer.INSPECT_MAX_BYTES`
|
||||
|
||||
<!-- YAML
|
||||
|
@ -57,6 +57,7 @@ const {
|
||||
compareOffset,
|
||||
createFromString,
|
||||
fill: bindingFill,
|
||||
isUtf8: bindingIsUtf8,
|
||||
indexOfBuffer,
|
||||
indexOfNumber,
|
||||
indexOfString,
|
||||
@ -84,7 +85,8 @@ const {
|
||||
const {
|
||||
isAnyArrayBuffer,
|
||||
isArrayBufferView,
|
||||
isUint8Array
|
||||
isUint8Array,
|
||||
isTypedArray,
|
||||
} = require('internal/util/types');
|
||||
const {
|
||||
inspect: utilInspect
|
||||
@ -1314,10 +1316,19 @@ function atob(input) {
|
||||
return Buffer.from(input, 'base64').toString('latin1');
|
||||
}
|
||||
|
||||
function isUtf8(input) {
|
||||
if (isTypedArray(input) || isAnyArrayBuffer(input)) {
|
||||
return bindingIsUtf8(input);
|
||||
}
|
||||
|
||||
throw new ERR_INVALID_ARG_TYPE('input', ['TypedArray', 'Buffer'], input);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
Buffer,
|
||||
SlowBuffer,
|
||||
transcode,
|
||||
isUtf8,
|
||||
|
||||
// Legacy
|
||||
kMaxLength,
|
||||
|
@ -1223,6 +1223,20 @@ static void EncodeInto(const FunctionCallbackInfo<Value>& args) {
|
||||
results[1] = written;
|
||||
}
|
||||
|
||||
static void IsUtf8(const FunctionCallbackInfo<Value>& args) {
|
||||
Environment* env = Environment::GetCurrent(args);
|
||||
CHECK_EQ(args.Length(), 1);
|
||||
CHECK(args[0]->IsTypedArray() || args[0]->IsArrayBuffer() ||
|
||||
args[0]->IsSharedArrayBuffer());
|
||||
ArrayBufferViewContents<char> abv(args[0]);
|
||||
|
||||
if (abv.WasDetached()) {
|
||||
return node::THROW_ERR_INVALID_STATE(
|
||||
env, "Cannot validate on a detached buffer");
|
||||
}
|
||||
|
||||
args.GetReturnValue().Set(simdutf::validate_utf8(abv.data(), abv.length()));
|
||||
}
|
||||
|
||||
void SetBufferPrototype(const FunctionCallbackInfo<Value>& args) {
|
||||
Environment* env = Environment::GetCurrent(args);
|
||||
@ -1358,6 +1372,8 @@ void Initialize(Local<Object> target,
|
||||
SetMethod(context, target, "encodeInto", EncodeInto);
|
||||
SetMethodNoSideEffect(context, target, "encodeUtf8String", EncodeUtf8String);
|
||||
|
||||
SetMethodNoSideEffect(context, target, "isUtf8", IsUtf8);
|
||||
|
||||
target
|
||||
->Set(context,
|
||||
FIXED_ONE_BYTE_STRING(isolate, "kMaxLength"),
|
||||
@ -1413,6 +1429,8 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
|
||||
registry->Register(EncodeInto);
|
||||
registry->Register(EncodeUtf8String);
|
||||
|
||||
registry->Register(IsUtf8);
|
||||
|
||||
registry->Register(StringSlice<ASCII>);
|
||||
registry->Register(StringSlice<BASE64>);
|
||||
registry->Register(StringSlice<BASE64URL>);
|
||||
|
@ -68,6 +68,7 @@ void OOMErrorHandler(const char* location, const v8::OOMDetails& details);
|
||||
V(ERR_INVALID_ARG_TYPE, TypeError) \
|
||||
V(ERR_INVALID_OBJECT_DEFINE_PROPERTY, TypeError) \
|
||||
V(ERR_INVALID_MODULE, Error) \
|
||||
V(ERR_INVALID_STATE, Error) \
|
||||
V(ERR_INVALID_THIS, TypeError) \
|
||||
V(ERR_INVALID_TRANSFER_OBJECT, TypeError) \
|
||||
V(ERR_MEMORY_ALLOCATION_FAILED, Error) \
|
||||
|
@ -555,6 +555,7 @@ void ArrayBufferViewContents<T, S>::ReadValue(v8::Local<v8::Value> buf) {
|
||||
auto ab = buf.As<v8::ArrayBuffer>();
|
||||
length_ = ab->ByteLength();
|
||||
data_ = static_cast<T*>(ab->Data());
|
||||
was_detached_ = ab->WasDetached();
|
||||
} else {
|
||||
CHECK(buf->IsSharedArrayBuffer());
|
||||
auto sab = buf.As<v8::SharedArrayBuffer>();
|
||||
|
@ -511,6 +511,7 @@ class ArrayBufferViewContents {
|
||||
inline void Read(v8::Local<v8::ArrayBufferView> abv);
|
||||
inline void ReadValue(v8::Local<v8::Value> buf);
|
||||
|
||||
inline bool WasDetached() const { return was_detached_; }
|
||||
inline const T* data() const { return data_; }
|
||||
inline size_t length() const { return length_; }
|
||||
|
||||
@ -525,6 +526,7 @@ class ArrayBufferViewContents {
|
||||
T stack_storage_[kStackStorageSize];
|
||||
T* data_ = nullptr;
|
||||
size_t length_ = 0;
|
||||
bool was_detached_ = false;
|
||||
};
|
||||
|
||||
class Utf8Value : public MaybeStackBuffer<char> {
|
||||
|
86
test/parallel/test-buffer-isutf8.js
Normal file
86
test/parallel/test-buffer-isutf8.js
Normal file
@ -0,0 +1,86 @@
|
||||
'use strict';
|
||||
|
||||
require('../common');
|
||||
const assert = require('assert');
|
||||
const { isUtf8, Buffer } = require('buffer');
|
||||
const { TextEncoder } = require('util');
|
||||
|
||||
const encoder = new TextEncoder();
|
||||
|
||||
assert.strictEqual(isUtf8(encoder.encode('hello')), true);
|
||||
assert.strictEqual(isUtf8(encoder.encode('ğ')), true);
|
||||
assert.strictEqual(isUtf8(Buffer.from([])), true);
|
||||
|
||||
// Taken from test/fixtures/wpt/encoding/textdecoder-fatal.any.js
|
||||
[
|
||||
[0xFF], // 'invalid code'
|
||||
[0xC0], // 'ends early'
|
||||
[0xE0], // 'ends early 2'
|
||||
[0xC0, 0x00], // 'invalid trail'
|
||||
[0xC0, 0xC0], // 'invalid trail 2'
|
||||
[0xE0, 0x00], // 'invalid trail 3'
|
||||
[0xE0, 0xC0], // 'invalid trail 4'
|
||||
[0xE0, 0x80, 0x00], // 'invalid trail 5'
|
||||
[0xE0, 0x80, 0xC0], // 'invalid trail 6'
|
||||
[0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], // '> 0x10FFFF'
|
||||
[0xFE, 0x80, 0x80, 0x80, 0x80, 0x80], // 'obsolete lead byte'
|
||||
|
||||
// Overlong encodings
|
||||
[0xC0, 0x80], // 'overlong U+0000 - 2 bytes'
|
||||
[0xE0, 0x80, 0x80], // 'overlong U+0000 - 3 bytes'
|
||||
[0xF0, 0x80, 0x80, 0x80], // 'overlong U+0000 - 4 bytes'
|
||||
[0xF8, 0x80, 0x80, 0x80, 0x80], // 'overlong U+0000 - 5 bytes'
|
||||
[0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], // 'overlong U+0000 - 6 bytes'
|
||||
|
||||
[0xC1, 0xBF], // 'overlong U+007F - 2 bytes'
|
||||
[0xE0, 0x81, 0xBF], // 'overlong U+007F - 3 bytes'
|
||||
[0xF0, 0x80, 0x81, 0xBF], // 'overlong U+007F - 4 bytes'
|
||||
[0xF8, 0x80, 0x80, 0x81, 0xBF], // 'overlong U+007F - 5 bytes'
|
||||
[0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF], // 'overlong U+007F - 6 bytes'
|
||||
|
||||
[0xE0, 0x9F, 0xBF], // 'overlong U+07FF - 3 bytes'
|
||||
[0xF0, 0x80, 0x9F, 0xBF], // 'overlong U+07FF - 4 bytes'
|
||||
[0xF8, 0x80, 0x80, 0x9F, 0xBF], // 'overlong U+07FF - 5 bytes'
|
||||
[0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF], // 'overlong U+07FF - 6 bytes'
|
||||
|
||||
[0xF0, 0x8F, 0xBF, 0xBF], // 'overlong U+FFFF - 4 bytes'
|
||||
[0xF8, 0x80, 0x8F, 0xBF, 0xBF], // 'overlong U+FFFF - 5 bytes'
|
||||
[0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF], // 'overlong U+FFFF - 6 bytes'
|
||||
|
||||
[0xF8, 0x84, 0x8F, 0xBF, 0xBF], // 'overlong U+10FFFF - 5 bytes'
|
||||
[0xFC, 0x80, 0x84, 0x8F, 0xBF, 0xBF], // 'overlong U+10FFFF - 6 bytes'
|
||||
|
||||
// UTF-16 surrogates encoded as code points in UTF-8
|
||||
[0xED, 0xA0, 0x80], // 'lead surrogate'
|
||||
[0xED, 0xB0, 0x80], // 'trail surrogate'
|
||||
[0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80], // 'surrogate pair'
|
||||
].forEach((input) => {
|
||||
assert.strictEqual(isUtf8(Buffer.from(input)), false);
|
||||
});
|
||||
|
||||
[
|
||||
null,
|
||||
undefined,
|
||||
'hello',
|
||||
true,
|
||||
false,
|
||||
].forEach((input) => {
|
||||
assert.throws(
|
||||
() => { isUtf8(input); },
|
||||
{
|
||||
code: 'ERR_INVALID_ARG_TYPE',
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
{
|
||||
// Test with detached array buffers
|
||||
const arrayBuffer = new ArrayBuffer(1024);
|
||||
structuredClone(arrayBuffer, { transfer: [arrayBuffer] });
|
||||
assert.throws(
|
||||
() => { isUtf8(arrayBuffer); },
|
||||
{
|
||||
code: 'ERR_INVALID_STATE'
|
||||
}
|
||||
);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user