From 801587c821290e707884f425a2da22fd88d32cf9 Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Fri, 10 Nov 2023 11:22:46 +1100 Subject: [PATCH] MDEV-32686: minimise crash information Provide bug url in addition to how to report the bug. Remove obsolete information like key_buffers and used connections as they haven't meaningfully added value to a bug report for quite a while. Remove information that comes from long fixed interfaces in glibc/kernel. Encourage the use of a full backtrace from the core with debug symbols. Lets be realistic about the error messages, its the users we are addressing not developers so wording around getting the information communicated is the key aspect. All the user readable text and instructions are in on place, as non-understandable is the end of the reading process for the user. Remove the duplicate printing of the query. Use my_progname rather than "mysqld" to reflex the program name. So the signal handler output is now in the form: 1. User instructions 2. Server Information 3. Stacktrace 4. connection/query/optimizer_switch 5. Core information and resource limits 6. Kernel information --- sql/signal_handler.cc | 122 ++++++------------------------------------ 1 file changed, 17 insertions(+), 105 deletions(-) diff --git a/sql/signal_handler.cc b/sql/signal_handler.cc index 002a4c244d1..1200a5a1608 100644 --- a/sql/signal_handler.cc +++ b/sql/signal_handler.cc @@ -144,7 +144,6 @@ extern "C" sig_handler handle_fatal_signal(int sig) We will try and print the query at the end of the signal handler, in case we're wrong. */ - bool print_invalid_query_pointer= false; #endif if (segfaulted) @@ -173,57 +172,19 @@ extern "C" sig_handler handle_fatal_signal(int sig) goto end; } - my_safe_printf_stderr("[ERROR] mysqld got " SIGNAL_FMT " ;\n",sig); + my_safe_printf_stderr("[ERROR] %s got " SIGNAL_FMT " ;\n", my_progname, sig); my_safe_printf_stderr("%s", "Sorry, we probably made a mistake, and this is a bug.\n\n" "Your assistance in bug reporting will enable us to fix this for the next release.\n" - "To report this bug, see https://mariadb.com/kb/en/reporting-bugs\n\n"); - - my_safe_printf_stderr("%s", - "We will try our best to scrape up some info that will hopefully help\n" - "diagnose the problem, but since we have already crashed, \n" - "something is definitely wrong and this may fail.\n\n"); + "To report this bug, see https://mariadb.com/kb/en/reporting-bugs about how to report\n" + "a bug on https://jira.mariadb.org/.\n\n" + "Please include the information from the server start above, to the end of the\n" + "information below.\n\n"); set_server_version(server_version, sizeof(server_version)); - my_safe_printf_stderr("Server version: %s source revision: %s\n", - server_version, SOURCE_REVISION); - - if (dflt_key_cache) - my_safe_printf_stderr("key_buffer_size=%zu\n", - dflt_key_cache->key_cache_mem_size); - - my_safe_printf_stderr("read_buffer_size=%lu\n", - global_system_variables.read_buff_size); - - my_safe_printf_stderr("max_used_connections=%lu\n", - max_used_connections); - - if (thread_scheduler) - my_safe_printf_stderr("max_threads=%lu\n", - thread_scheduler->max_threads + - extra_max_connections); - - my_safe_printf_stderr("thread_count=%u\n", THD_count::value()); - - if (dflt_key_cache && thread_scheduler) - { - size_t used_mem= - (dflt_key_cache->key_cache_mem_size + - (global_system_variables.read_buff_size + - (size_t) global_system_variables.sortbuff_size) * - (thread_scheduler->max_threads + extra_max_connections) + - (max_connections + extra_max_connections) * sizeof(THD)) / 1024; - - my_safe_printf_stderr("It is possible that mysqld could use up to \n" - "key_buffer_size + " - "(read_buffer_size + sort_buffer_size)*max_threads = " - "%zu K bytes of memory\n", used_mem); - - my_safe_printf_stderr("%s", - "Hope that's ok; if not, decrease some variables in " - "the equation.\n\n"); - } + my_safe_printf_stderr("Server version: %s source revision: %s\n\n", + server_version, SOURCE_REVISION); #ifdef WITH_WSREP Wsrep_server_state::handle_fatal_signal(); @@ -234,12 +195,14 @@ extern "C" sig_handler handle_fatal_signal(int sig) if (opt_stack_trace) { - my_safe_printf_stderr("Thread pointer: %p\n", thd); my_safe_printf_stderr("%s", - "Attempting backtrace. You can use the following " - "information to find out\n" - "where mysqld died. If you see no messages after this, something went\n" - "terribly wrong...\n"); + "The information page at " + "https://mariadb.com/kb/en/how-to-produce-a-full-stack-trace-for-mariadbd/\n" + "contains instructions to obtain a better version of the backtrace below.\n" + "Following these instructions will help MariaDB developers provide a fix quicker.\n\n" + "Attempting backtrace. Include this in the bug report.\n" + "(note: Retrieving this information may fail)\n\n"); + my_safe_printf_stderr("Thread pointer: %p\n", thd); my_print_stacktrace(thd ? (uchar*) thd->thread_stack : NULL, (ulong)my_thread_stack_size, 0); } @@ -287,20 +250,12 @@ extern "C" sig_handler handle_fatal_signal(int sig) kreason= "KILL_WAIT_TIMEOUT"; break; } - my_safe_printf_stderr("%s", "\n" - "Trying to get some variables.\n" - "Some pointers may be invalid and cause the dump to abort.\n"); - - my_safe_printf_stderr("Query (%p): ", thd->query()); - if (my_safe_print_str(thd->query(), MY_MIN(65536U, thd->query_length()))) - { - // Query was found invalid. We will try to print it at the end. - print_invalid_query_pointer= true; - } my_safe_printf_stderr("\nConnection ID (thread ID): %lu\n", (ulong) thd->thread_id); - my_safe_printf_stderr("Status: %s\n\n", kreason); + my_safe_printf_stderr("Status: %s\n", kreason); + my_safe_printf_stderr("Query (%p): ", thd->query()); + my_safe_print_str(thd->query(), MY_MIN(65536U, thd->query_length())); my_safe_printf_stderr("%s", "Optimizer switch: "); ulonglong optsw= thd->variables.optimizer_switch; for (uint i= 0; optimizer_switch_names[i+1]; i++, optsw >>= 1) @@ -312,52 +267,9 @@ extern "C" sig_handler handle_fatal_signal(int sig) } my_safe_printf_stderr("%s", "\n\n"); } - my_safe_printf_stderr("%s", - "The manual page at " - "https://mariadb.com/kb/en/how-to-produce-a-full-stack-trace-for-mariadbd/ contains\n" - "information that should help you find out what is causing the crash.\n"); #endif /* HAVE_STACKTRACE */ -#ifdef HAVE_INITGROUPS - if (calling_initgroups) - { - my_safe_printf_stderr("%s", "\n" - "This crash occurred while the server was calling initgroups(). This is\n" - "often due to the use of a mysqld that is statically linked against \n" - "glibc and configured to use LDAP in /etc/nsswitch.conf.\n" - "You will need to either upgrade to a version of glibc that does not\n" - "have this problem (2.3.4 or later when used with nscd),\n" - "disable LDAP in your nsswitch.conf, or use a " - "mysqld that is not statically linked.\n"); - } -#endif - - if (locked_in_memory) - { - my_safe_printf_stderr("%s", "\n" - "The \"--memlock\" argument, which was enabled, " - "uses system calls that are\n" - "unreliable and unstable on some operating systems and " - "operating-system versions (notably, some versions of Linux).\n" - "This crash could be due to use of those buggy OS calls.\n" - "You should consider whether you really need the " - "\"--memlock\" parameter and/or consult the OS distributer about " - "\"mlockall\" bugs.\n"); - } - -#ifdef HAVE_STACKTRACE - if (print_invalid_query_pointer) - { - my_safe_printf_stderr( - "\nWe think the query pointer is invalid, but we will try " - "to print it anyway. \n" - "Query: "); - my_write_stderr(thd->query(), MY_MIN(65536U, thd->query_length())); - my_safe_printf_stderr("\n\n"); - } -#endif - output_core_info(); #ifdef HAVE_WRITE_CORE if (test_flags & TEST_CORE_ON_SIGNAL)