Add all supported charsets to Show Packet Bytes/Follow Stream

Use the QT text codec support to add charset conversions for all character
encodings supported by QT to Show Packet Bytes and Follow Stream (Save As
will convert to UTF-8.) Note that this is dynamic and the exact list will
depend on the version of QT and if libicu support is enabled. This does
make the list of codecs pretty long, so hopefully it shows up well on all
the different QT styles.

This does not yet support when multibyte characters span more than one packet
in Follow Stream, though the current code doesn't do that for UTF-8 or UTF-16
already. This is probably most useful for HTTP captures.

Bug: 16137
Change-Id: I6d5cd761a5d9d914b7a787fe8eb02b07b19642e6
Ping-Bug: 16630
Reviewed-on: https://code.wireshark.org/review/37707
Petri-Dish: Stig Bjørlykke <stig@bjorlykke.org>
Tested-by: Petri Dish Buildbot
Reviewed-by: Stig Bjørlykke <stig@bjorlykke.org>
This commit is contained in:
John Thacker 2020-07-05 07:35:57 -04:00 committed by Stig Bjørlykke
parent 23cb6959c1
commit 9fc054a65b
8 changed files with 120 additions and 69 deletions

View File

@ -50,13 +50,12 @@ typedef enum {
/* Show Type */
typedef enum {
SHOW_ASCII,
SHOW_CARRAY,
SHOW_EBCDIC,
SHOW_HEXDUMP,
SHOW_CARRAY,
SHOW_RAW,
SHOW_YAML,
SHOW_UTF8,
SHOW_UTF16
SHOW_CODEC, // Ordered to match UTF-8 combobox index
SHOW_YAML
} show_type_t;

View File

@ -47,6 +47,7 @@
#include <QPrintDialog>
#include <QPrinter>
#include <QScrollBar>
#include <QTextCodec>
// To do:
// - Show text while tapping.
@ -118,10 +119,10 @@ FollowStreamDialog::FollowStreamDialog(QWidget &parent, CaptureFile &cf, follow_
cbcs->addItem(tr("C Arrays"), SHOW_CARRAY);
cbcs->addItem(tr("EBCDIC"), SHOW_EBCDIC);
cbcs->addItem(tr("Hex Dump"), SHOW_HEXDUMP);
cbcs->addItem(tr("UTF-8"), SHOW_UTF8);
cbcs->addItem(tr("UTF-16"), SHOW_UTF16);
cbcs->addItem(tr("YAML"), SHOW_YAML);
cbcs->addItem(tr("Raw"), SHOW_RAW);
// UTF-8 is guaranteed to exist as a QTextCodec
cbcs->addItem(tr("UTF-8"), SHOW_CODEC);
cbcs->addItem(tr("YAML"), SHOW_YAML);
cbcs->blockSignals(false);
b_filter_out_ = ui->buttonBox->addButton(tr("Filter Out This Stream"), QDialogButtonBox::ActionRole);
@ -155,6 +156,18 @@ FollowStreamDialog::~FollowStreamDialog()
resetStream(); // Frees payload
}
void FollowStreamDialog::addCodecs(const QMap<QString, QTextCodec *> &codecMap)
{
// Make the combobox respect max visible items?
//ui->cbCharset->setStyleSheet("QComboBox { combobox-popup: 0;}");
ui->cbCharset->insertSeparator(ui->cbCharset->count());
for (const auto &codec : qAsConst(codecMap)) {
// This is already in the menu and handled separately
if (codec->name() != "US-ASCII" && codec->name() != "UTF-8")
ui->cbCharset->addItem(tr(codec->name()), SHOW_CODEC);
}
}
void FollowStreamDialog::printStream()
{
#ifndef QT_NO_PRINTER
@ -279,7 +292,7 @@ void FollowStreamDialog::saveAs()
return;
}
// Unconditionally save data as UTF-8 (even if data is decoded as UTF-16).
// Unconditionally save data as UTF-8 (even if data is decoded otherwise).
QByteArray bytes = ui->teStreamContent->toPlainText().toUtf8();
if (show_type_ == SHOW_RAW) {
// The "Raw" format is currently displayed as hex data and needs to be
@ -658,22 +671,17 @@ FollowStreamDialog::showBuffer(char *buffer, size_t nchars, gboolean is_from_ser
break;
}
case SHOW_UTF8:
case SHOW_CODEC:
{
// The QString docs say that invalid characters will be replaced with
// replacement characters or removed. It would be nice if we could
// explicitly choose one or the other.
QString utf8 = QString::fromUtf8(buffer, (int)nchars);
addText(utf8, is_from_server, packet_num);
break;
}
case SHOW_UTF16:
{
// QString::fromUtf16 calls QUtf16::convertToUnicode, casting buffer
// back to a const char * and doubling nchars.
QString utf16 = QString::fromUtf16((const unsigned short *)buffer, (int)nchars / 2);
addText(utf16, is_from_server, packet_num);
// This assumes that multibyte characters don't span packets in the
// stream. To handle that case properly (which might occur with fixed
// block sizes, e.g. transferring over TFTP, we would need to create
// two stateful QTextDecoders, one for each direction, presumably in
// on_cbCharset_currentIndexChanged()
QTextCodec *codec = QTextCodec::codecForName(ui->cbCharset->currentText().toUtf8());
QByteArray ba = QByteArray(buffer, (int)nchars);
QString decoded = codec->toUnicode(ba);
addText(decoded, is_from_server, packet_num);
break;
}

View File

@ -42,6 +42,7 @@ public:
explicit FollowStreamDialog(QWidget &parent, CaptureFile &cf, follow_type_t type = FOLLOW_TCP);
~FollowStreamDialog();
void addCodecs(const QMap<QString, QTextCodec *> &codecMap);
bool follow(QString previous_filter = QString(), bool use_stream_index = false, guint stream_num = 0, guint sub_stream_num = 0);
public slots:

View File

@ -83,6 +83,7 @@ DIAG_ON(frame-larger-than=)
#include <QMetaObject>
#include <QMimeData>
#include <QTabWidget>
#include <QTextCodec>
#include <QToolButton>
#include <QTreeWidget>
#include <QUrl>
@ -313,6 +314,7 @@ MainWindow::MainWindow(QWidget *parent) :
capture_input_init(&cap_session_, CaptureFile::globalCapFile());
#endif
findTextCodecs();
// setpUi calls QMetaObject::connectSlotsByName(this). connectSlotsByName
// iterates over *all* of our children, looking for matching "on_" slots.
// The fewer children we have at this point the better.
@ -1948,6 +1950,49 @@ void MainWindow::captureStop() {
}
}
void MainWindow::findTextCodecs() {
const QList<int> mibs = QTextCodec::availableMibs();
QRegularExpression ibmRegExp("^IBM([0-9]+).*$");
QRegularExpression iso8859RegExp("^ISO-8859-([0-9]+).*$");
QRegularExpression windowsRegExp("^WINDOWS-([0-9]+).*$");
QRegularExpressionMatch match;
for (int mib : mibs) {
QTextCodec *codec = QTextCodec::codecForMib(mib);
QString key = codec->name().toUpper();
char rank;
if (key.localeAwareCompare("IBM") < 0) {
rank = 1;
} else if ((match = ibmRegExp.match(key)).hasMatch()) {
rank = match.capturedRef(1).size(); // Up to 5
} else if (key.localeAwareCompare("ISO-8859-") < 0) {
rank = 6;
} else if ((match = iso8859RegExp.match(key)).hasMatch()) {
rank = 6 + match.capturedRef(1).size(); // Up to 6 + 2
} else if (key.localeAwareCompare("WINDOWS-") < 0) {
rank = 9;
} else if ((match = windowsRegExp.match(key)).hasMatch()) {
rank = 9 + match.capturedRef(1).size(); // Up to 9 + 4
} else {
rank = 14;
}
// This doesn't perfectly well order the IBM codecs because it's
// annoying to properly place IBM00858 and IBM00924 in the middle of
// code page numbers not zero padded to 5 digits.
// We could manipulate the key further to have more commonly used
// charsets earlier. IANA MIB ordering would be unxpected:
// https://www.iana.org/assignments/character-sets/character-sets.xml
// For data about use in HTTP (other protocols can be quite different):
// https://w3techs.com/technologies/overview/character_encoding
key.prepend('0' + rank);
// We use a map here because, due to backwards compatibility,
// the same QTextCodec may be returned for multiple MIBs, which
// happens for GBK/GB2312, EUC-KR/windows-949/UHC, and others.
text_codec_map_.insert(key, codec);
}
}
void MainWindow::initMainToolbarIcons()
{
// Normally 16 px. Reflects current GTK+ behavior and other Windows apps.

View File

@ -183,6 +183,7 @@ private:
DisplayFilterCombo *df_combo_box_;
CaptureFile capture_file_;
QFont mono_font_;
QMap<QString, QTextCodec *> text_codec_map_;
#if defined(HAVE_LIBNL) && defined(HAVE_NL80211)
WirelessFrame *wireless_frame_;
#endif
@ -257,6 +258,8 @@ private:
bool testCaptureFileClose(QString before_what, FileCloseContext context = Default);
void captureStop();
void findTextCodecs();
void initMainToolbarIcons();
void initShowHideMainWidgets();
void initTimeDisplayFormatMenu();

View File

@ -1812,6 +1812,7 @@ void MainWindow::on_actionFileExportPacketBytes_triggered()
void MainWindow::on_actionAnalyzeShowPacketBytes_triggered()
{
ShowPacketBytesDialog *spbd = new ShowPacketBytesDialog(*this, capture_file_);
spbd->addCodecs(text_codec_map_);
spbd->show();
}
@ -2831,7 +2832,7 @@ void MainWindow::openFollowStreamDialog(follow_type_t type, guint stream_num, gu
FollowStreamDialog *fsd = new FollowStreamDialog(*this, capture_file_, type);
connect(fsd, SIGNAL(updateFilter(QString, bool)), this, SLOT(filterPackets(QString, bool)));
connect(fsd, SIGNAL(goToPacket(int)), packet_list_, SLOT(goToPacket(int)));
fsd->addCodecs(text_codec_map_);
fsd->show();
if (use_stream_index) {
// If a specific conversation was requested, then ignore any previous

View File

@ -24,6 +24,7 @@
#include <QMenu>
#include <QPrintDialog>
#include <QPrinter>
#include <QTextCodec>
#include <QTextStream>
// To do:
@ -70,10 +71,9 @@ ShowPacketBytesDialog::ShowPacketBytesDialog(QWidget &parent, CaptureFile &cf) :
ui->cbShowAs->addItem(tr("Hex Dump"), ShowAsHexDump);
ui->cbShowAs->addItem(tr("HTML"), ShowAsHTML);
ui->cbShowAs->addItem(tr("Image"), ShowAsImage);
ui->cbShowAs->addItem(tr("ISO 8859-1"), ShowAsISO8859_1);
ui->cbShowAs->addItem(tr("Raw"), ShowAsRAW);
ui->cbShowAs->addItem(tr("UTF-8"), ShowAsUTF8);
ui->cbShowAs->addItem(tr("UTF-16"), ShowAsUTF16);
// UTF-8 is guaranteed to exist as a QTextCodec
ui->cbShowAs->addItem(tr("UTF-8"), ShowAsCodec);
ui->cbShowAs->addItem(tr("YAML"), ShowAsYAML);
ui->cbShowAs->setCurrentIndex(show_as_);
ui->cbShowAs->blockSignals(false);
@ -101,6 +101,20 @@ ShowPacketBytesDialog::~ShowPacketBytesDialog()
delete ui;
}
void ShowPacketBytesDialog::addCodecs(const QMap<QString, QTextCodec *> &codecMap)
{
ui->cbShowAs->blockSignals(true);
// Make the combobox respect max visible items?
//ui->cbShowAs->setStyleSheet("QComboBox { combobox-popup: 0;}");
ui->cbShowAs->insertSeparator(ui->cbShowAs->count());
for (const auto &codec : qAsConst(codecMap)) {
// This is already placed in the menu and handled separately
if (codec->name() != "US-ASCII" && codec->name() != "UTF-8")
ui->cbShowAs->addItem(tr(codec->name()), ShowAsCodec);
}
ui->cbShowAs->blockSignals(false);
}
void ShowPacketBytesDialog::showSelected(int start, int end)
{
if (end == -1) {
@ -265,7 +279,6 @@ void ShowPacketBytesDialog::copyBytes()
case ShowAsCArray:
case ShowAsEBCDIC:
case ShowAsHexDump:
case ShowAsISO8859_1:
case ShowAsRAW:
case ShowAsYAML:
wsApp->clipboard()->setText(ui->tePacketBytes->toPlainText());
@ -279,8 +292,7 @@ void ShowPacketBytesDialog::copyBytes()
wsApp->clipboard()->setImage(image_);
break;
case ShowAsUTF8:
case ShowAsUTF16:
case ShowAsCodec:
wsApp->clipboard()->setText(ui->tePacketBytes->toPlainText().toUtf8());
break;
}
@ -298,13 +310,12 @@ void ShowPacketBytesDialog::saveAs()
case ShowAsASCII:
case ShowAsASCIIandControl:
case ShowAsCArray:
// We always save as UTF-8, so set text mode as we would for UTF-8
case ShowAsCodec:
case ShowAsHexDump:
case ShowAsISO8859_1:
case ShowAsYAML:
case ShowAsHTML:
case ShowAsUTF8:
open_mode |= QFile::Text;
// case ShowAsUTF16: ???
default:
break;
}
@ -326,7 +337,6 @@ void ShowPacketBytesDialog::saveAs()
case ShowAsCArray:
case ShowAsEBCDIC:
case ShowAsHexDump:
case ShowAsISO8859_1:
case ShowAsYAML:
{
QTextStream out(&file);
@ -341,8 +351,7 @@ void ShowPacketBytesDialog::saveAs()
break;
}
case ShowAsUTF8:
case ShowAsUTF16:
case ShowAsCodec:
{
QTextStream out(&file);
out << ui->tePacketBytes->toPlainText().toUtf8();
@ -621,6 +630,20 @@ void ShowPacketBytesDialog::updatePacketBytes(void)
break;
}
case ShowAsCodec:
{
// The QTextCodecs docs say that there's a flag to cause invalid
// characters to be replaced with null. It's unclear what happens
// in the default case; it might depend on the codec though it
// seems that in practice replacement characters are used.
QTextCodec *codec = QTextCodec::codecForName(ui->cbShowAs->currentText().toUtf8());
QByteArray ba(field_bytes_);
QString decoded = codec->toUnicode(ba);
ui->tePacketBytes->setLineWrapMode(QTextEdit::WidgetWidth);
ui->tePacketBytes->setPlainText(decoded);
break;
}
case ShowAsEBCDIC:
{
QByteArray ba(field_bytes_);
@ -703,35 +726,6 @@ void ShowPacketBytesDialog::updatePacketBytes(void)
break;
}
case ShowAsISO8859_1:
{
QString latin1 = QString::fromLatin1(field_bytes_.constData(), (int)field_bytes_.length());
ui->tePacketBytes->setLineWrapMode(QTextEdit::WidgetWidth);
ui->tePacketBytes->setPlainText(latin1);
break;
}
case ShowAsUTF8:
{
// The QString docs say that invalid characters will be replaced with
// replacement characters or removed. It would be nice if we could
// explicitly choose one or the other.
QString utf8 = QString::fromUtf8(field_bytes_.constData(), (int)field_bytes_.length());
ui->tePacketBytes->setLineWrapMode(QTextEdit::WidgetWidth);
ui->tePacketBytes->setPlainText(utf8);
break;
}
case ShowAsUTF16:
{
// QString::fromUtf16 calls QUtf16::convertToUnicode, casting buffer
// back to a const char * and doubling nchars.
QString utf16 = QString::fromUtf16((const unsigned short *)field_bytes_.constData(), (int)field_bytes_.length() / 2);
ui->tePacketBytes->setLineWrapMode(QTextEdit::WidgetWidth);
ui->tePacketBytes->setPlainText(utf16);
break;
}
case ShowAsYAML:
{
const int base64_raw_len = 57; // Encodes to 76 bytes, common in RFCs

View File

@ -38,6 +38,8 @@ public:
explicit ShowPacketBytesDialog(QWidget &parent, CaptureFile &cf);
~ShowPacketBytesDialog();
void addCodecs(const QMap<QString, QTextCodec *> &codecMap);
public slots:
void captureFileClosing();
@ -78,11 +80,9 @@ private:
ShowAsHexDump,
ShowAsHTML,
ShowAsImage,
ShowAsISO8859_1,
ShowAsRAW,
ShowAsUTF8,
ShowAsUTF16,
ShowAsYAML
ShowAsCodec, // Ordered to match the UTF-8 combobox index
ShowAsYAML,
};
void setStartAndEnd(int start, int end);