mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	Check that it is not needed and remove, fixing up some fallout for places where it was only serving to get something else. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Link: https://lkml.kernel.org/n/tip-9h6dg6lsqe2usyqjh5rrues4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
		
			
				
	
	
		
			269 lines
		
	
	
	
		
			6.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			269 lines
		
	
	
	
		
			6.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
// SPDX-License-Identifier: GPL-2.0
 | 
						|
#include <string.h>
 | 
						|
#include "debug.h"
 | 
						|
 | 
						|
#include "demangle-rust.h"
 | 
						|
 | 
						|
/*
 | 
						|
 * Mangled Rust symbols look like this:
 | 
						|
 *
 | 
						|
 *     _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
 | 
						|
 *
 | 
						|
 * The original symbol is:
 | 
						|
 *
 | 
						|
 *     <std::sys::fd::FileDesc as core::ops::Drop>::drop
 | 
						|
 *
 | 
						|
 * The last component of the path is a 64-bit hash in lowercase hex, prefixed
 | 
						|
 * with "h". Rust does not have a global namespace between crates, an illusion
 | 
						|
 * which Rust maintains by using the hash to distinguish things that would
 | 
						|
 * otherwise have the same symbol.
 | 
						|
 *
 | 
						|
 * Any path component not starting with a XID_Start character is prefixed with
 | 
						|
 * "_".
 | 
						|
 *
 | 
						|
 * The following escape sequences are used:
 | 
						|
 *
 | 
						|
 *     ","  =>  $C$
 | 
						|
 *     "@"  =>  $SP$
 | 
						|
 *     "*"  =>  $BP$
 | 
						|
 *     "&"  =>  $RF$
 | 
						|
 *     "<"  =>  $LT$
 | 
						|
 *     ">"  =>  $GT$
 | 
						|
 *     "("  =>  $LP$
 | 
						|
 *     ")"  =>  $RP$
 | 
						|
 *     " "  =>  $u20$
 | 
						|
 *     "'"  =>  $u27$
 | 
						|
 *     "["  =>  $u5b$
 | 
						|
 *     "]"  =>  $u5d$
 | 
						|
 *     "~"  =>  $u7e$
 | 
						|
 *
 | 
						|
 * A double ".." means "::" and a single "." means "-".
 | 
						|
 *
 | 
						|
 * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$
 | 
						|
 */
 | 
						|
 | 
						|
static const char *hash_prefix = "::h";
 | 
						|
static const size_t hash_prefix_len = 3;
 | 
						|
static const size_t hash_len = 16;
 | 
						|
 | 
						|
static bool is_prefixed_hash(const char *start);
 | 
						|
static bool looks_like_rust(const char *sym, size_t len);
 | 
						|
static bool unescape(const char **in, char **out, const char *seq, char value);
 | 
						|
 | 
						|
/*
 | 
						|
 * INPUT:
 | 
						|
 *     sym: symbol that has been through BFD-demangling
 | 
						|
 *
 | 
						|
 * This function looks for the following indicators:
 | 
						|
 *
 | 
						|
 *  1. The hash must consist of "h" followed by 16 lowercase hex digits.
 | 
						|
 *
 | 
						|
 *  2. As a sanity check, the hash must use between 5 and 15 of the 16 possible
 | 
						|
 *     hex digits. This is true of 99.9998% of hashes so once in your life you
 | 
						|
 *     may see a false negative. The point is to notice path components that
 | 
						|
 *     could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In
 | 
						|
 *     this case a false positive (non-Rust symbol has an important path
 | 
						|
 *     component removed because it looks like a Rust hash) is worse than a
 | 
						|
 *     false negative (the rare Rust symbol is not demangled) so this sets the
 | 
						|
 *     balance in favor of false negatives.
 | 
						|
 *
 | 
						|
 *  3. There must be no characters other than a-zA-Z0-9 and _.:$
 | 
						|
 *
 | 
						|
 *  4. There must be no unrecognized $-sign sequences.
 | 
						|
 *
 | 
						|
 *  5. There must be no sequence of three or more dots in a row ("...").
 | 
						|
 */
 | 
						|
bool
 | 
						|
rust_is_mangled(const char *sym)
 | 
						|
{
 | 
						|
	size_t len, len_without_hash;
 | 
						|
 | 
						|
	if (!sym)
 | 
						|
		return false;
 | 
						|
 | 
						|
	len = strlen(sym);
 | 
						|
	if (len <= hash_prefix_len + hash_len)
 | 
						|
		/* Not long enough to contain "::h" + hash + something else */
 | 
						|
		return false;
 | 
						|
 | 
						|
	len_without_hash = len - (hash_prefix_len + hash_len);
 | 
						|
	if (!is_prefixed_hash(sym + len_without_hash))
 | 
						|
		return false;
 | 
						|
 | 
						|
	return looks_like_rust(sym, len_without_hash);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex
 | 
						|
 * digits must comprise between 5 and 15 (inclusive) distinct digits.
 | 
						|
 */
 | 
						|
static bool is_prefixed_hash(const char *str)
 | 
						|
{
 | 
						|
	const char *end;
 | 
						|
	bool seen[16];
 | 
						|
	size_t i;
 | 
						|
	int count;
 | 
						|
 | 
						|
	if (strncmp(str, hash_prefix, hash_prefix_len))
 | 
						|
		return false;
 | 
						|
	str += hash_prefix_len;
 | 
						|
 | 
						|
	memset(seen, false, sizeof(seen));
 | 
						|
	for (end = str + hash_len; str < end; str++)
 | 
						|
		if (*str >= '0' && *str <= '9')
 | 
						|
			seen[*str - '0'] = true;
 | 
						|
		else if (*str >= 'a' && *str <= 'f')
 | 
						|
			seen[*str - 'a' + 10] = true;
 | 
						|
		else
 | 
						|
			return false;
 | 
						|
 | 
						|
	/* Count how many distinct digits seen */
 | 
						|
	count = 0;
 | 
						|
	for (i = 0; i < 16; i++)
 | 
						|
		if (seen[i])
 | 
						|
			count++;
 | 
						|
 | 
						|
	return count >= 5 && count <= 15;
 | 
						|
}
 | 
						|
 | 
						|
static bool looks_like_rust(const char *str, size_t len)
 | 
						|
{
 | 
						|
	const char *end = str + len;
 | 
						|
 | 
						|
	while (str < end)
 | 
						|
		switch (*str) {
 | 
						|
		case '$':
 | 
						|
			if (!strncmp(str, "$C$", 3))
 | 
						|
				str += 3;
 | 
						|
			else if (!strncmp(str, "$SP$", 4)
 | 
						|
					|| !strncmp(str, "$BP$", 4)
 | 
						|
					|| !strncmp(str, "$RF$", 4)
 | 
						|
					|| !strncmp(str, "$LT$", 4)
 | 
						|
					|| !strncmp(str, "$GT$", 4)
 | 
						|
					|| !strncmp(str, "$LP$", 4)
 | 
						|
					|| !strncmp(str, "$RP$", 4))
 | 
						|
				str += 4;
 | 
						|
			else if (!strncmp(str, "$u20$", 5)
 | 
						|
					|| !strncmp(str, "$u27$", 5)
 | 
						|
					|| !strncmp(str, "$u5b$", 5)
 | 
						|
					|| !strncmp(str, "$u5d$", 5)
 | 
						|
					|| !strncmp(str, "$u7e$", 5))
 | 
						|
				str += 5;
 | 
						|
			else
 | 
						|
				return false;
 | 
						|
			break;
 | 
						|
		case '.':
 | 
						|
			/* Do not allow three or more consecutive dots */
 | 
						|
			if (!strncmp(str, "...", 3))
 | 
						|
				return false;
 | 
						|
			/* Fall through */
 | 
						|
		case 'a' ... 'z':
 | 
						|
		case 'A' ... 'Z':
 | 
						|
		case '0' ... '9':
 | 
						|
		case '_':
 | 
						|
		case ':':
 | 
						|
			str++;
 | 
						|
			break;
 | 
						|
		default:
 | 
						|
			return false;
 | 
						|
		}
 | 
						|
 | 
						|
	return true;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * INPUT:
 | 
						|
 *     sym: symbol for which rust_is_mangled(sym) returns true
 | 
						|
 *
 | 
						|
 * The input is demangled in-place because the mangled name is always longer
 | 
						|
 * than the demangled one.
 | 
						|
 */
 | 
						|
void
 | 
						|
rust_demangle_sym(char *sym)
 | 
						|
{
 | 
						|
	const char *in;
 | 
						|
	char *out;
 | 
						|
	const char *end;
 | 
						|
 | 
						|
	if (!sym)
 | 
						|
		return;
 | 
						|
 | 
						|
	in = sym;
 | 
						|
	out = sym;
 | 
						|
	end = sym + strlen(sym) - (hash_prefix_len + hash_len);
 | 
						|
 | 
						|
	while (in < end)
 | 
						|
		switch (*in) {
 | 
						|
		case '$':
 | 
						|
			if (!(unescape(&in, &out, "$C$", ',')
 | 
						|
					|| unescape(&in, &out, "$SP$", '@')
 | 
						|
					|| unescape(&in, &out, "$BP$", '*')
 | 
						|
					|| unescape(&in, &out, "$RF$", '&')
 | 
						|
					|| unescape(&in, &out, "$LT$", '<')
 | 
						|
					|| unescape(&in, &out, "$GT$", '>')
 | 
						|
					|| unescape(&in, &out, "$LP$", '(')
 | 
						|
					|| unescape(&in, &out, "$RP$", ')')
 | 
						|
					|| unescape(&in, &out, "$u20$", ' ')
 | 
						|
					|| unescape(&in, &out, "$u27$", '\'')
 | 
						|
					|| unescape(&in, &out, "$u5b$", '[')
 | 
						|
					|| unescape(&in, &out, "$u5d$", ']')
 | 
						|
					|| unescape(&in, &out, "$u7e$", '~'))) {
 | 
						|
				pr_err("demangle-rust: unexpected escape sequence");
 | 
						|
				goto done;
 | 
						|
			}
 | 
						|
			break;
 | 
						|
		case '_':
 | 
						|
			/*
 | 
						|
			 * If this is the start of a path component and the next
 | 
						|
			 * character is an escape sequence, ignore the
 | 
						|
			 * underscore. The mangler inserts an underscore to make
 | 
						|
			 * sure the path component begins with a XID_Start
 | 
						|
			 * character.
 | 
						|
			 */
 | 
						|
			if ((in == sym || in[-1] == ':') && in[1] == '$')
 | 
						|
				in++;
 | 
						|
			else
 | 
						|
				*out++ = *in++;
 | 
						|
			break;
 | 
						|
		case '.':
 | 
						|
			if (in[1] == '.') {
 | 
						|
				/* ".." becomes "::" */
 | 
						|
				*out++ = ':';
 | 
						|
				*out++ = ':';
 | 
						|
				in += 2;
 | 
						|
			} else {
 | 
						|
				/* "." becomes "-" */
 | 
						|
				*out++ = '-';
 | 
						|
				in++;
 | 
						|
			}
 | 
						|
			break;
 | 
						|
		case 'a' ... 'z':
 | 
						|
		case 'A' ... 'Z':
 | 
						|
		case '0' ... '9':
 | 
						|
		case ':':
 | 
						|
			*out++ = *in++;
 | 
						|
			break;
 | 
						|
		default:
 | 
						|
			pr_err("demangle-rust: unexpected character '%c' in symbol\n",
 | 
						|
				*in);
 | 
						|
			goto done;
 | 
						|
		}
 | 
						|
 | 
						|
done:
 | 
						|
	*out = '\0';
 | 
						|
}
 | 
						|
 | 
						|
static bool unescape(const char **in, char **out, const char *seq, char value)
 | 
						|
{
 | 
						|
	size_t len = strlen(seq);
 | 
						|
 | 
						|
	if (strncmp(*in, seq, len))
 | 
						|
		return false;
 | 
						|
 | 
						|
	**out = value;
 | 
						|
 | 
						|
	*in += len;
 | 
						|
	*out += 1;
 | 
						|
 | 
						|
	return true;
 | 
						|
}
 |