diff --git a/test/yarp/errors_test.rb b/test/yarp/errors_test.rb index 54cbda14d1..41cb6b7f47 100644 --- a/test/yarp/errors_test.rb +++ b/test/yarp/errors_test.rb @@ -1255,6 +1255,20 @@ module YARP assert_error_messages "0x1_1_", error_messages end + def test_alnum_delimiters + error_messages = ["Invalid `%` token"] + + assert_error_messages "%qXfooX", error_messages + assert_error_messages "%QXfooX", error_messages + assert_error_messages "%wXfooX", error_messages + assert_error_messages "%WxfooX", error_messages + assert_error_messages "%iXfooX", error_messages + assert_error_messages "%IXfooX", error_messages + assert_error_messages "%xXfooX", error_messages + assert_error_messages "%rXfooX", error_messages + assert_error_messages "%sXfooX", error_messages + end + private def assert_errors(expected, source, errors, compare_ripper: RUBY_ENGINE == "ruby") diff --git a/yarp/yarp.c b/yarp/yarp.c index b343566ee0..7c4b347678 100644 --- a/yarp/yarp.c +++ b/yarp/yarp.c @@ -7077,9 +7077,10 @@ parser_lex(yp_parser_t *parser) { // % %= %i %I %q %Q %w %W case '%': { - // If there is no subsequent character then we have an invalid token. We're - // going to say it's the percent operator because we don't want to move into the - // string lex mode unnecessarily. + // If there is no subsequent character then we have an + // invalid token. We're going to say it's the percent + // operator because we don't want to move into the string + // lex mode unnecessarily. if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) { yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_PERCENT); LEX(YP_TOKEN_PERCENT); @@ -7110,6 +7111,14 @@ parser_lex(yp_parser_t *parser) { } } + // Delimiters for %-literals cannot be alphanumeric. We + // validate that here. + uint8_t delimiter = peek_offset(parser, 1); + if (delimiter >= 0x80 || parser->encoding.alnum_char(&delimiter, 1)) { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_PERCENT); + goto lex_next_token; + } + switch (peek(parser)) { case 'i': { parser->current.end++;