Using g_regex_match_full allows us to avoid the need to NUL-terminate the input file, which is nice because that NUL is being written beyond the defined length of the mmap'd region -- it could cause a crash or undefined behavior if the input file is an exact integer multiple of the system's page size and the NUL is written to the following page. There's also no need to insist upon a newline at the end of the file. All of the parsing code is already written to not depend on the fields being delimited in any specific way, so this test appears superfluous. Clean up a _potential_ infinite loop if we ever reached the path where the "data" field was not found. Failing to call g_match_info_next and continuing from the top of the loop would prevent it from ever advancing from that point. I'm not convinced that it is truly possible to reach that path with G_REGEX_MATCH_NOTEMPTY specified, but it is easy enough to make it safe. Finally, add very basic unit tests for the regex functionality of text2pcap, including a case which lacks a newline at the end of the file. The test is based upon the minimal example in the text2pcap utility's "--help" text. Fixes #20245.
106 lines
3.9 KiB
C
106 lines
3.9 KiB
C
/* text_import_regex.c
|
|
* Regex based text importer
|
|
* March 2021, Paul Weiß <paulniklasweiss@gmail.com>
|
|
*
|
|
* Wireshark - Network traffic analyzer
|
|
* By Gerald Combs <gerald@wireshark.org>
|
|
* Copyright 1998 Gerald Combs
|
|
*
|
|
* Based on text_import.c by Jaap Keuter <jaap.keuter@xs4all.nl>
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later
|
|
*/
|
|
|
|
#include "config.h"
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
#include <glib.h>
|
|
|
|
#include "text_import.h"
|
|
#include "text_import_regex.h"
|
|
|
|
typedef unsigned int uint;
|
|
|
|
/*--- Options --------------------------------------------------------------------*/
|
|
|
|
int text_import_regex(const text_import_info_t* info) {
|
|
int status = 1;
|
|
int parsed_packets = 0;
|
|
ws_debug("starting import...");
|
|
|
|
// IO
|
|
GMappedFile* file = g_mapped_file_ref(info->regex.import_text_GMappedFile);
|
|
GError* gerror = NULL;
|
|
size_t f_size = g_mapped_file_get_length(file);
|
|
unsigned char* f_content = g_mapped_file_get_contents(file);
|
|
|
|
// Regex result dissecting
|
|
bool re_time, re_dir, re_seqno;
|
|
GMatchInfo* match;
|
|
int field_start;
|
|
int field_end;
|
|
{ /* analyze regex */
|
|
re_time = g_regex_get_string_number(info->regex.format, "time") >= 0;
|
|
re_dir = g_regex_get_string_number(info->regex.format, "dir") >= 0;
|
|
re_seqno = g_regex_get_string_number(info->regex.format, "seqno") >= 0;
|
|
if (g_regex_get_string_number(info->regex.format, "data") < 0) {
|
|
/* This should never happen, as the dialog checks for this */
|
|
fprintf(stderr, "Error could not find data in pattern\n");
|
|
g_mapped_file_unref(file);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
ws_debug("regex has %s%s%s", re_dir ? "dir, " : "",
|
|
re_time ? "time, " : "",
|
|
re_seqno ? "seqno, " : "");
|
|
g_regex_match_full(info->regex.format, f_content, f_size, 0, G_REGEX_MATCH_NOTEMPTY, &match, &gerror);
|
|
while (g_match_info_matches(match)) {
|
|
/* parse the data */
|
|
if (g_match_info_fetch_named_pos(match, "data", &field_start, &field_end)) {
|
|
parse_data(f_content + field_start, f_content + field_end, info->regex.encoding);
|
|
|
|
/* parse the auxiliary information if present */
|
|
if (re_time &&
|
|
g_match_info_fetch_named_pos(match, "time", &field_start, &field_end)) {
|
|
parse_time(f_content + field_start, f_content + field_end, info->timestamp_format);
|
|
} else {
|
|
/* No time present, so add a fixed delta. */
|
|
parse_time(NULL, NULL, NULL);
|
|
}
|
|
|
|
if (re_dir &&
|
|
g_match_info_fetch_named_pos(match, "dir", &field_start, &field_end))
|
|
parse_dir(f_content + field_start, f_content + field_end, info->regex.in_indication, info->regex.out_indication);
|
|
|
|
if (re_seqno &&
|
|
g_match_info_fetch_named_pos(match, "seqno", &field_start, &field_end))
|
|
parse_seqno(f_content + field_start, f_content + field_end);
|
|
|
|
if (ws_log_get_level() == LOG_LEVEL_NOISY) {
|
|
g_match_info_fetch_pos(match, 0, &field_start, &field_end);
|
|
ws_noisy("Packet %d at %x to %x: %.*s\n", parsed_packets + 1,
|
|
field_start, field_end,
|
|
field_end - field_start, f_content + field_start);
|
|
}
|
|
flush_packet();
|
|
++parsed_packets;
|
|
} else {
|
|
fprintf(stderr, "Warning: could not fetch data on would be packet %d, discarding\n", parsed_packets + 1);
|
|
}
|
|
/* prepare next packet */
|
|
g_match_info_next(match, &gerror);
|
|
if (gerror && gerror->code) {
|
|
status = -1;
|
|
g_error_free(gerror);
|
|
break;
|
|
}
|
|
}
|
|
ws_debug("processed %d packets", parsed_packets);
|
|
g_match_info_unref(match);
|
|
g_mapped_file_unref(file);
|
|
return status * parsed_packets;
|
|
}
|