John Thacker 60a9e1124b Find: Matching multiple occurrences in Packet Bytes
Match multiple occurrence in Find Packet Bytes, both forwards
and backwards.

Also fix an issue highlighting wide strings properly reintroduced by
commit c0885fe390f1fba32986806383dd38a437c7681f

For backwards searching in string and binary searches, use
the memrchr and backwards mempbrk implementations. For regex,
use PCRE2_ANCHORED to transform the user's regex expression into
one that is anchored at the start byte, and progressively search
backwards.

Fix #11269
2023-10-27 05:56:29 +00:00

206 lines
4.8 KiB
C

/*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 1998 Gerald Combs
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "config.h"
#include "regex.h"
#include <wsutil/str_util.h>
#include <pcre2.h>
struct _ws_regex {
pcre2_code *code;
char *pattern;
};
#define ERROR_MAXLEN_IN_CODE_UNITS 128
static char *
get_error_msg(int errorcode)
{
char *buffer;
/*
* We have to provide a buffer and we don't know how long the
* error message is or even the maximum size. From pcre2api(3):
* "None of the messages are very long; a
* buffer size of 120 code units is ample."
*/
/* Code unit = one byte */
buffer = g_malloc(ERROR_MAXLEN_IN_CODE_UNITS);
/* Message is returned with a trailing zero. */
pcre2_get_error_message(errorcode, buffer, ERROR_MAXLEN_IN_CODE_UNITS);
/* One more at the end for good luck. */
buffer[ERROR_MAXLEN_IN_CODE_UNITS-1] = '\0';
return buffer;
}
static pcre2_code *
compile_pcre2(const char *patt, ssize_t size, char **errmsg, unsigned flags)
{
pcre2_code *code;
int errorcode;
PCRE2_SIZE length;
PCRE2_SIZE erroroffset;
uint32_t options = 0;
if (size < 0)
length = PCRE2_ZERO_TERMINATED;
else
length = (PCRE2_SIZE)size;
if (flags & WS_REGEX_NEVER_UTF)
options |= PCRE2_NEVER_UTF;
if (flags & WS_REGEX_CASELESS)
options |= PCRE2_CASELESS;
if (flags & WS_REGEX_ANCHORED)
options |= PCRE2_ANCHORED;
/* By default UTF-8 is off. */
code = pcre2_compile_8((PCRE2_SPTR)patt,
length,
options,
&errorcode,
&erroroffset,
NULL);
if (code == NULL) {
*errmsg = get_error_msg(errorcode);
return NULL;
}
return code;
}
ws_regex_t *
ws_regex_compile_ex(const char *patt, ssize_t size, char **errmsg, unsigned flags)
{
ws_return_val_if(!patt, NULL);
pcre2_code *code = compile_pcre2(patt, size, errmsg, flags);
if (code == NULL)
return NULL;
ws_regex_t *re = g_new(ws_regex_t, 1);
re->code = code;
re->pattern = ws_escape_string_len(NULL, patt, size, false);
return re;
}
ws_regex_t *
ws_regex_compile(const char *patt, char **errmsg)
{
return ws_regex_compile_ex(patt, -1, errmsg, 0);
}
static bool
match_pcre2(pcre2_code *code, const char *subject, ssize_t subj_length,
size_t subj_offset, pcre2_match_data *match_data)
{
PCRE2_SIZE length;
int rc;
if (subj_length < 0)
length = PCRE2_ZERO_TERMINATED;
else
length = (PCRE2_SIZE)subj_length;
rc = pcre2_match(code,
subject,
length,
(PCRE2_SIZE)subj_offset,
0, /* default options */
match_data,
NULL);
if (rc < 0) {
/* No match */
if (rc != PCRE2_ERROR_NOMATCH) {
/* Error. Should not happen with UTF-8 disabled. Some huge
* subject strings could hit some internal limit. */
char *msg = get_error_msg(rc);
ws_debug("Unexpected pcre2_match() error: %s.", msg);
g_free(msg);
}
return false;
}
/* Matched */
return true;
}
bool
ws_regex_matches(const ws_regex_t *re, const char *subj)
{
return ws_regex_matches_length(re, subj, -1);
}
bool
ws_regex_matches_length(const ws_regex_t *re,
const char *subj, ssize_t subj_length)
{
bool matched;
pcre2_match_data *match_data;
ws_return_val_if(!re, false);
ws_return_val_if(!subj, false);
/* We don't use the matched substring but pcre2_match requires
* at least one pair of offsets. */
match_data = pcre2_match_data_create(1, NULL);
matched = match_pcre2(re->code, subj, subj_length, 0, match_data);
pcre2_match_data_free(match_data);
return matched;
}
bool
ws_regex_matches_pos(const ws_regex_t *re,
const char *subj, ssize_t subj_length,
size_t subj_offset, size_t pos_vect[2])
{
bool matched;
pcre2_match_data *match_data;
ws_return_val_if(!re, false);
ws_return_val_if(!subj, false);
match_data = pcre2_match_data_create(1, NULL);
matched = match_pcre2(re->code, subj, subj_length, subj_offset, match_data);
if (matched && pos_vect) {
PCRE2_SIZE *ovect = pcre2_get_ovector_pointer(match_data);
pos_vect[0] = ovect[0];
pos_vect[1] = ovect[1];
}
pcre2_match_data_free(match_data);
return matched;
}
void
ws_regex_free(ws_regex_t *re)
{
pcre2_code_free(re->code);
g_free(re->pattern);
g_free(re);
}
const char *
ws_regex_pattern(const ws_regex_t *re)
{
return re->pattern;
}