diff --git a/epan/follow.h b/epan/follow.h index c0df89c0ce..42c0cc9ff4 100644 --- a/epan/follow.h +++ b/epan/follow.h @@ -50,13 +50,12 @@ typedef enum { /* Show Type */ typedef enum { SHOW_ASCII, + SHOW_CARRAY, SHOW_EBCDIC, SHOW_HEXDUMP, - SHOW_CARRAY, SHOW_RAW, - SHOW_YAML, - SHOW_UTF8, - SHOW_UTF16 + SHOW_CODEC, // Ordered to match UTF-8 combobox index + SHOW_YAML } show_type_t; diff --git a/ui/qt/follow_stream_dialog.cpp b/ui/qt/follow_stream_dialog.cpp index d0db58b72e..62a6e03de7 100644 --- a/ui/qt/follow_stream_dialog.cpp +++ b/ui/qt/follow_stream_dialog.cpp @@ -47,6 +47,7 @@ #include #include #include +#include // To do: // - Show text while tapping. @@ -118,10 +119,10 @@ FollowStreamDialog::FollowStreamDialog(QWidget &parent, CaptureFile &cf, follow_ cbcs->addItem(tr("C Arrays"), SHOW_CARRAY); cbcs->addItem(tr("EBCDIC"), SHOW_EBCDIC); cbcs->addItem(tr("Hex Dump"), SHOW_HEXDUMP); - cbcs->addItem(tr("UTF-8"), SHOW_UTF8); - cbcs->addItem(tr("UTF-16"), SHOW_UTF16); - cbcs->addItem(tr("YAML"), SHOW_YAML); cbcs->addItem(tr("Raw"), SHOW_RAW); + // UTF-8 is guaranteed to exist as a QTextCodec + cbcs->addItem(tr("UTF-8"), SHOW_CODEC); + cbcs->addItem(tr("YAML"), SHOW_YAML); cbcs->blockSignals(false); b_filter_out_ = ui->buttonBox->addButton(tr("Filter Out This Stream"), QDialogButtonBox::ActionRole); @@ -155,6 +156,18 @@ FollowStreamDialog::~FollowStreamDialog() resetStream(); // Frees payload } +void FollowStreamDialog::addCodecs(const QMap &codecMap) +{ + // Make the combobox respect max visible items? + //ui->cbCharset->setStyleSheet("QComboBox { combobox-popup: 0;}"); + ui->cbCharset->insertSeparator(ui->cbCharset->count()); + for (const auto &codec : qAsConst(codecMap)) { + // This is already in the menu and handled separately + if (codec->name() != "US-ASCII" && codec->name() != "UTF-8") + ui->cbCharset->addItem(tr(codec->name()), SHOW_CODEC); + } +} + void FollowStreamDialog::printStream() { #ifndef QT_NO_PRINTER @@ -279,7 +292,7 @@ void FollowStreamDialog::saveAs() return; } - // Unconditionally save data as UTF-8 (even if data is decoded as UTF-16). + // Unconditionally save data as UTF-8 (even if data is decoded otherwise). QByteArray bytes = ui->teStreamContent->toPlainText().toUtf8(); if (show_type_ == SHOW_RAW) { // The "Raw" format is currently displayed as hex data and needs to be @@ -658,22 +671,17 @@ FollowStreamDialog::showBuffer(char *buffer, size_t nchars, gboolean is_from_ser break; } - case SHOW_UTF8: + case SHOW_CODEC: { - // The QString docs say that invalid characters will be replaced with - // replacement characters or removed. It would be nice if we could - // explicitly choose one or the other. - QString utf8 = QString::fromUtf8(buffer, (int)nchars); - addText(utf8, is_from_server, packet_num); - break; - } - - case SHOW_UTF16: - { - // QString::fromUtf16 calls QUtf16::convertToUnicode, casting buffer - // back to a const char * and doubling nchars. - QString utf16 = QString::fromUtf16((const unsigned short *)buffer, (int)nchars / 2); - addText(utf16, is_from_server, packet_num); + // This assumes that multibyte characters don't span packets in the + // stream. To handle that case properly (which might occur with fixed + // block sizes, e.g. transferring over TFTP, we would need to create + // two stateful QTextDecoders, one for each direction, presumably in + // on_cbCharset_currentIndexChanged() + QTextCodec *codec = QTextCodec::codecForName(ui->cbCharset->currentText().toUtf8()); + QByteArray ba = QByteArray(buffer, (int)nchars); + QString decoded = codec->toUnicode(ba); + addText(decoded, is_from_server, packet_num); break; } diff --git a/ui/qt/follow_stream_dialog.h b/ui/qt/follow_stream_dialog.h index a2c3928fae..d6cbfeebf1 100644 --- a/ui/qt/follow_stream_dialog.h +++ b/ui/qt/follow_stream_dialog.h @@ -42,6 +42,7 @@ public: explicit FollowStreamDialog(QWidget &parent, CaptureFile &cf, follow_type_t type = FOLLOW_TCP); ~FollowStreamDialog(); + void addCodecs(const QMap &codecMap); bool follow(QString previous_filter = QString(), bool use_stream_index = false, guint stream_num = 0, guint sub_stream_num = 0); public slots: diff --git a/ui/qt/main_window.cpp b/ui/qt/main_window.cpp index c7e046c417..9ca52461da 100644 --- a/ui/qt/main_window.cpp +++ b/ui/qt/main_window.cpp @@ -83,6 +83,7 @@ DIAG_ON(frame-larger-than=) #include #include #include +#include #include #include #include @@ -313,6 +314,7 @@ MainWindow::MainWindow(QWidget *parent) : capture_input_init(&cap_session_, CaptureFile::globalCapFile()); #endif + findTextCodecs(); // setpUi calls QMetaObject::connectSlotsByName(this). connectSlotsByName // iterates over *all* of our children, looking for matching "on_" slots. // The fewer children we have at this point the better. @@ -1948,6 +1950,49 @@ void MainWindow::captureStop() { } } +void MainWindow::findTextCodecs() { + const QList mibs = QTextCodec::availableMibs(); + QRegularExpression ibmRegExp("^IBM([0-9]+).*$"); + QRegularExpression iso8859RegExp("^ISO-8859-([0-9]+).*$"); + QRegularExpression windowsRegExp("^WINDOWS-([0-9]+).*$"); + QRegularExpressionMatch match; + for (int mib : mibs) { + QTextCodec *codec = QTextCodec::codecForMib(mib); + QString key = codec->name().toUpper(); + char rank; + + if (key.localeAwareCompare("IBM") < 0) { + rank = 1; + } else if ((match = ibmRegExp.match(key)).hasMatch()) { + rank = match.capturedRef(1).size(); // Up to 5 + } else if (key.localeAwareCompare("ISO-8859-") < 0) { + rank = 6; + } else if ((match = iso8859RegExp.match(key)).hasMatch()) { + rank = 6 + match.capturedRef(1).size(); // Up to 6 + 2 + } else if (key.localeAwareCompare("WINDOWS-") < 0) { + rank = 9; + } else if ((match = windowsRegExp.match(key)).hasMatch()) { + rank = 9 + match.capturedRef(1).size(); // Up to 9 + 4 + } else { + rank = 14; + } + // This doesn't perfectly well order the IBM codecs because it's + // annoying to properly place IBM00858 and IBM00924 in the middle of + // code page numbers not zero padded to 5 digits. + // We could manipulate the key further to have more commonly used + // charsets earlier. IANA MIB ordering would be unxpected: + // https://www.iana.org/assignments/character-sets/character-sets.xml + // For data about use in HTTP (other protocols can be quite different): + // https://w3techs.com/technologies/overview/character_encoding + + key.prepend('0' + rank); + // We use a map here because, due to backwards compatibility, + // the same QTextCodec may be returned for multiple MIBs, which + // happens for GBK/GB2312, EUC-KR/windows-949/UHC, and others. + text_codec_map_.insert(key, codec); + } +} + void MainWindow::initMainToolbarIcons() { // Normally 16 px. Reflects current GTK+ behavior and other Windows apps. diff --git a/ui/qt/main_window.h b/ui/qt/main_window.h index 478e3d0846..e1125c816b 100644 --- a/ui/qt/main_window.h +++ b/ui/qt/main_window.h @@ -183,6 +183,7 @@ private: DisplayFilterCombo *df_combo_box_; CaptureFile capture_file_; QFont mono_font_; + QMap text_codec_map_; #if defined(HAVE_LIBNL) && defined(HAVE_NL80211) WirelessFrame *wireless_frame_; #endif @@ -257,6 +258,8 @@ private: bool testCaptureFileClose(QString before_what, FileCloseContext context = Default); void captureStop(); + void findTextCodecs(); + void initMainToolbarIcons(); void initShowHideMainWidgets(); void initTimeDisplayFormatMenu(); diff --git a/ui/qt/main_window_slots.cpp b/ui/qt/main_window_slots.cpp index 61a95bba57..20d8db22e0 100644 --- a/ui/qt/main_window_slots.cpp +++ b/ui/qt/main_window_slots.cpp @@ -1812,6 +1812,7 @@ void MainWindow::on_actionFileExportPacketBytes_triggered() void MainWindow::on_actionAnalyzeShowPacketBytes_triggered() { ShowPacketBytesDialog *spbd = new ShowPacketBytesDialog(*this, capture_file_); + spbd->addCodecs(text_codec_map_); spbd->show(); } @@ -2831,7 +2832,7 @@ void MainWindow::openFollowStreamDialog(follow_type_t type, guint stream_num, gu FollowStreamDialog *fsd = new FollowStreamDialog(*this, capture_file_, type); connect(fsd, SIGNAL(updateFilter(QString, bool)), this, SLOT(filterPackets(QString, bool))); connect(fsd, SIGNAL(goToPacket(int)), packet_list_, SLOT(goToPacket(int))); - + fsd->addCodecs(text_codec_map_); fsd->show(); if (use_stream_index) { // If a specific conversation was requested, then ignore any previous diff --git a/ui/qt/show_packet_bytes_dialog.cpp b/ui/qt/show_packet_bytes_dialog.cpp index c19a5ddec3..6b6d6da09b 100644 --- a/ui/qt/show_packet_bytes_dialog.cpp +++ b/ui/qt/show_packet_bytes_dialog.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include // To do: @@ -70,10 +71,9 @@ ShowPacketBytesDialog::ShowPacketBytesDialog(QWidget &parent, CaptureFile &cf) : ui->cbShowAs->addItem(tr("Hex Dump"), ShowAsHexDump); ui->cbShowAs->addItem(tr("HTML"), ShowAsHTML); ui->cbShowAs->addItem(tr("Image"), ShowAsImage); - ui->cbShowAs->addItem(tr("ISO 8859-1"), ShowAsISO8859_1); ui->cbShowAs->addItem(tr("Raw"), ShowAsRAW); - ui->cbShowAs->addItem(tr("UTF-8"), ShowAsUTF8); - ui->cbShowAs->addItem(tr("UTF-16"), ShowAsUTF16); + // UTF-8 is guaranteed to exist as a QTextCodec + ui->cbShowAs->addItem(tr("UTF-8"), ShowAsCodec); ui->cbShowAs->addItem(tr("YAML"), ShowAsYAML); ui->cbShowAs->setCurrentIndex(show_as_); ui->cbShowAs->blockSignals(false); @@ -101,6 +101,20 @@ ShowPacketBytesDialog::~ShowPacketBytesDialog() delete ui; } +void ShowPacketBytesDialog::addCodecs(const QMap &codecMap) +{ + ui->cbShowAs->blockSignals(true); + // Make the combobox respect max visible items? + //ui->cbShowAs->setStyleSheet("QComboBox { combobox-popup: 0;}"); + ui->cbShowAs->insertSeparator(ui->cbShowAs->count()); + for (const auto &codec : qAsConst(codecMap)) { + // This is already placed in the menu and handled separately + if (codec->name() != "US-ASCII" && codec->name() != "UTF-8") + ui->cbShowAs->addItem(tr(codec->name()), ShowAsCodec); + } + ui->cbShowAs->blockSignals(false); +} + void ShowPacketBytesDialog::showSelected(int start, int end) { if (end == -1) { @@ -265,7 +279,6 @@ void ShowPacketBytesDialog::copyBytes() case ShowAsCArray: case ShowAsEBCDIC: case ShowAsHexDump: - case ShowAsISO8859_1: case ShowAsRAW: case ShowAsYAML: wsApp->clipboard()->setText(ui->tePacketBytes->toPlainText()); @@ -279,8 +292,7 @@ void ShowPacketBytesDialog::copyBytes() wsApp->clipboard()->setImage(image_); break; - case ShowAsUTF8: - case ShowAsUTF16: + case ShowAsCodec: wsApp->clipboard()->setText(ui->tePacketBytes->toPlainText().toUtf8()); break; } @@ -298,13 +310,12 @@ void ShowPacketBytesDialog::saveAs() case ShowAsASCII: case ShowAsASCIIandControl: case ShowAsCArray: + // We always save as UTF-8, so set text mode as we would for UTF-8 + case ShowAsCodec: case ShowAsHexDump: - case ShowAsISO8859_1: case ShowAsYAML: case ShowAsHTML: - case ShowAsUTF8: open_mode |= QFile::Text; - // case ShowAsUTF16: ??? default: break; } @@ -326,7 +337,6 @@ void ShowPacketBytesDialog::saveAs() case ShowAsCArray: case ShowAsEBCDIC: case ShowAsHexDump: - case ShowAsISO8859_1: case ShowAsYAML: { QTextStream out(&file); @@ -341,8 +351,7 @@ void ShowPacketBytesDialog::saveAs() break; } - case ShowAsUTF8: - case ShowAsUTF16: + case ShowAsCodec: { QTextStream out(&file); out << ui->tePacketBytes->toPlainText().toUtf8(); @@ -621,6 +630,20 @@ void ShowPacketBytesDialog::updatePacketBytes(void) break; } + case ShowAsCodec: + { + // The QTextCodecs docs say that there's a flag to cause invalid + // characters to be replaced with null. It's unclear what happens + // in the default case; it might depend on the codec though it + // seems that in practice replacement characters are used. + QTextCodec *codec = QTextCodec::codecForName(ui->cbShowAs->currentText().toUtf8()); + QByteArray ba(field_bytes_); + QString decoded = codec->toUnicode(ba); + ui->tePacketBytes->setLineWrapMode(QTextEdit::WidgetWidth); + ui->tePacketBytes->setPlainText(decoded); + break; + } + case ShowAsEBCDIC: { QByteArray ba(field_bytes_); @@ -703,35 +726,6 @@ void ShowPacketBytesDialog::updatePacketBytes(void) break; } - case ShowAsISO8859_1: - { - QString latin1 = QString::fromLatin1(field_bytes_.constData(), (int)field_bytes_.length()); - ui->tePacketBytes->setLineWrapMode(QTextEdit::WidgetWidth); - ui->tePacketBytes->setPlainText(latin1); - break; - } - - case ShowAsUTF8: - { - // The QString docs say that invalid characters will be replaced with - // replacement characters or removed. It would be nice if we could - // explicitly choose one or the other. - QString utf8 = QString::fromUtf8(field_bytes_.constData(), (int)field_bytes_.length()); - ui->tePacketBytes->setLineWrapMode(QTextEdit::WidgetWidth); - ui->tePacketBytes->setPlainText(utf8); - break; - } - - case ShowAsUTF16: - { - // QString::fromUtf16 calls QUtf16::convertToUnicode, casting buffer - // back to a const char * and doubling nchars. - QString utf16 = QString::fromUtf16((const unsigned short *)field_bytes_.constData(), (int)field_bytes_.length() / 2); - ui->tePacketBytes->setLineWrapMode(QTextEdit::WidgetWidth); - ui->tePacketBytes->setPlainText(utf16); - break; - } - case ShowAsYAML: { const int base64_raw_len = 57; // Encodes to 76 bytes, common in RFCs diff --git a/ui/qt/show_packet_bytes_dialog.h b/ui/qt/show_packet_bytes_dialog.h index b73cd1e79e..554baa600e 100644 --- a/ui/qt/show_packet_bytes_dialog.h +++ b/ui/qt/show_packet_bytes_dialog.h @@ -38,6 +38,8 @@ public: explicit ShowPacketBytesDialog(QWidget &parent, CaptureFile &cf); ~ShowPacketBytesDialog(); + void addCodecs(const QMap &codecMap); + public slots: void captureFileClosing(); @@ -78,11 +80,9 @@ private: ShowAsHexDump, ShowAsHTML, ShowAsImage, - ShowAsISO8859_1, ShowAsRAW, - ShowAsUTF8, - ShowAsUTF16, - ShowAsYAML + ShowAsCodec, // Ordered to match the UTF-8 combobox index + ShowAsYAML, }; void setStartAndEnd(int start, int end);