wireshark/tools/make-usb.py
John Thacker 75b742d299 tools: lowercase usb hex values for the value strings
Some of the USB sources contain uppercase for the hex digits,
but most are lowercase. Lowercase all the hex strings so that
they sort in proper numerical order. Prevents:

 ** (wireshark:3326737) 20:46:56.474521 [Epan WARNING] epan/value_string.c:471 -- _try_val_to_str_ext_init(): Extended value string 'usb_products_vals' forced to fall back to linear search:
  entry 20705, value 645005387 [0x2672004b] < previous entry, value 645005389 [0x2672004d]
2024-10-14 21:01:32 -04:00

167 lines
5.5 KiB
Python
Executable File

#!/usr/bin/env python3
#
# make-usb - Creates a file containing vendor and product ids.
# It use the databases from
# - The USB ID Repository: https://usb-ids.gowdy.us (http://www.linux-usb.org), mirrored at Sourceforge
# - libgphoto2 from gPhoto: https://github.com/gphoto/libgphoto2 (http://gphoto.org), available at GitHub
# to create our file epan/dissectors/usb.c
import re
import sys
import urllib.request, urllib.error, urllib.parse
MODE_IDLE = 0
MODE_VENDOR_PRODUCT = 1
MIN_VENDORS = 3400 # 3409 as of 2020-11-15
MIN_PRODUCTS = 20000 # 20361 as of 2020-11-15
mode = MODE_IDLE
req_headers = { 'User-Agent': 'Wireshark make-usb' }
req = urllib.request.Request('https://sourceforge.net/p/linux-usb/repo/HEAD/tree/trunk/htdocs/usb.ids?format=raw', headers=req_headers)
response = urllib.request.urlopen(req)
lines = response.read().decode('UTF-8', 'replace').splitlines()
vendors = dict()
products = dict()
vendors_str="static const value_string usb_vendors_vals[] = {\n"
products_str="static const value_string usb_products_vals[] = {\n"
# Escape backslashes, quotes, control characters and non-ASCII characters.
escapes = {}
for i in range(256):
if i in b'\\"':
escapes[i] = '\\%c' % i
elif i in range(0x20, 0x80) or i in b'\t':
escapes[i] = chr(i)
else:
escapes[i] = '\\%03o' % i
for utf8line in lines:
# Convert single backslashes to double (escaped) backslashes, escape quotes, etc.
utf8line = utf8line.rstrip()
utf8line = re.sub(r"\?+", "?", utf8line)
line = ''.join(escapes[byte] for byte in utf8line.encode('utf8'))
if line == "# Vendors, devices and interfaces. Please keep sorted.":
mode = MODE_VENDOR_PRODUCT
continue
elif line == "# List of known device classes, subclasses and protocols":
mode = MODE_IDLE
continue
if mode == MODE_VENDOR_PRODUCT:
if re.match("^[0-9a-f]{4}", line):
last_vendor=line[:4]
vendors[last_vendor] = line[4:].strip()
elif re.match("^\t[0-9a-f]{4}", line):
line = line.strip()
product = "%s%s"%(last_vendor, line[:4])
products[product] = line[4:].strip()
req = urllib.request.Request('https://raw.githubusercontent.com/gphoto/libgphoto2/master/camlibs/ptp2/library.c', headers=req_headers)
response = urllib.request.urlopen(req)
lines = response.read().decode('UTF-8', 'replace').splitlines()
mode = MODE_IDLE
for line in lines:
if mode == MODE_IDLE and re.match(r".*\bmodels\[\]", line):
mode = MODE_VENDOR_PRODUCT
continue
if mode == MODE_VENDOR_PRODUCT and re.match(r"};", line):
mode = MODE_IDLE
if mode == MODE_IDLE:
continue
m = re.match(r"\s*{\"(.*):(.*)\",\s*0x([0-9a-fA-F]{4}),\s*0x([0-9a-fA-F]{4}),.*},", line)
if m is not None:
manuf = m.group(1).strip()
model = re.sub(r"\(.*\)", "", m.group(2)).strip()
product = m.group(3) + m.group(4)
products[product] = ' '.join((manuf, model))
req = urllib.request.Request('https://raw.githubusercontent.com/gphoto/libgphoto2/master/camlibs/ptp2/music-players.h', headers=req_headers)
response = urllib.request.urlopen(req)
lines = response.read().decode('UTF-8', 'replace').splitlines()
for line in lines:
m = re.match(r"\s*{\s*\"(.*)\",\s*0x([0-9a-fA-F]{4}),\s*\"(.*)\",\s*0x([0-9a-fA-F]{4}),", line)
if m is not None:
manuf = m.group(1).strip()
model = m.group(3).strip()
product = m.group(2) + m.group(4)
products[product] = ' '.join((manuf, model))
if (len(vendors) < MIN_VENDORS):
sys.stderr.write("Not enough vendors: %d\n" % len(vendors))
sys.exit(1)
if (len(products) < MIN_PRODUCTS):
sys.stderr.write("Not enough products: %d\n" % len(products))
sys.exit(1)
vendors = {k.lower(): v for k, v in vendors.items()}
for v in sorted(vendors):
vendors_str += " { 0x%s, \"%s\" },\n"%(v,vendors[v])
vendors_str += """ { 0, NULL }\n};
value_string_ext ext_usb_vendors_vals = VALUE_STRING_EXT_INIT(usb_vendors_vals);
"""
products = {k.lower(): v for k, v in products.items()}
for p in sorted(products):
products_str += " { 0x%s, \"%s\" },\n"%(p,products[p])
products_str += """ { 0, NULL }\n};
value_string_ext ext_usb_products_vals = VALUE_STRING_EXT_INIT(usb_products_vals);
"""
header="""/* usb.c
* USB vendor id and product ids
* This file was generated by running python ./tools/make-usb.py
* Don't change it directly.
*
* Copyright 2012, Michal Labedzki for Tieto Corporation
*
* Other values imported from libghoto2/camlibs/ptp2/library.c, music-players.h
*
* Copyright (C) 2001-2005 Mariusz Woloszyn <emsi@ipartners.pl>
* Copyright (C) 2003-2013 Marcus Meissner <marcus@jet.franken.de>
* Copyright (C) 2005 Hubert Figuiere <hfiguiere@teaser.fr>
* Copyright (C) 2009 Axel Waggershauser <awagger@web.de>
* Copyright (C) 2005-2007 Richard A. Low <richard@wentnet.com>
* Copyright (C) 2005-2012 Linus Walleij <triad@df.lth.se>
* Copyright (C) 2007 Ted Bullock
* Copyright (C) 2012 Sony Mobile Communications AB
*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 1998 Gerald Combs
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
/*
* XXX We should probably parse a USB ID file at program start instead
* of generating this file.
*/
#include "config.h"
#include <epan/packet.h>
"""
f = open('epan/dissectors/usb.c', 'w')
f.write(header)
f.write("\n")
f.write(vendors_str)
f.write("\n\n")
f.write(products_str)
f.write("\n")
f.close()
print("Success!")