forked from mirrors/gecko-dev
Bug 1900788 - Rewrite the Windows symbol scraper to improve coverage r=gerard-majax
- Sample 2k crashes, 500 for each release channel, this is somewhat slow as we're not using an API token (but we can add it later) - Use dump_syms' logic to fetch the files from the symbol servers, rather than using the Python logic - Fail hard if one of the steps fail, including unexpected dump_syms crashes - Print out a summary of all the actions that have been taken Differential Revision: https://phabricator.services.mozilla.com/D212692
This commit is contained in:
parent
d3fea1aa85
commit
2fb6fe576b
7 changed files with 69 additions and 705 deletions
|
|
@ -10,11 +10,11 @@ WORKDIR /builds/worker
|
|||
# AUFS slowness.
|
||||
VOLUME /builds/worker/checkouts
|
||||
|
||||
COPY requirements.txt /builds/worker/requirements.txt
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends -y gcc python3-dev python3-pip python3-setuptools libffi-dev && \
|
||||
apt-get install --no-install-recommends -y 7zip jq python3-pip wget && \
|
||||
apt-get autoremove -y && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
RUN pip3 install --break-system-packages --no-cache-dir --require-hashes -r /builds/worker/requirements.txt
|
||||
RUN pip3 install --progress-bar off --break-system-packages crashstats-tools==2.0.0
|
||||
|
||||
# %include tools/crashreporter/system-symbols/win
|
||||
COPY topsrcdir/tools/crashreporter/system-symbols/win /builds/worker
|
||||
|
|
|
|||
|
|
@ -1,17 +0,0 @@
|
|||
aiofile==3.8.7 --hash=sha256:4c38991b1227e221296fa05bbc95bffba9c203fef1ce09ad3076cfe7b61842c7
|
||||
aiohttp==3.8.5 --hash=sha256:5ed1c46fb119f1b59304b5ec89f834f07124cd23ae5b74288e364477641060ff
|
||||
aiohttp-retry==2.8.3 --hash=sha256:3aeeead8f6afe48272db93ced9440cf4eda8b6fd7ee2abb25357b7eb28525b45
|
||||
aiosignal==1.2.0 --hash=sha256:26e62109036cd181df6e6ad646f91f0dcfd05fe16d0cb924138ff2ab75d64e3a
|
||||
asyncio==3.4.3 --hash=sha256:c4d18b22701821de07bd6aea8b53d21449ec0ec5680645e5317062ea21817d2d
|
||||
asynctest==0.13.0 --hash=sha256:5da6118a7e6d6b54d83a8f7197769d046922a44d2a99c21382f0a6e4fadae676
|
||||
async-timeout==4.0.2 --hash=sha256:8ca1e4fcf50d07413d66d1a5e416e42cfdf5851c981d679a09851a6853383b3c
|
||||
attrs==23.1.0 --hash=sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04
|
||||
caio==0.9.12 --hash=sha256:7e569b83e9b41d12e094190d0e1a546610829a65609f429a1845e3250d4c5804
|
||||
cffi==1.15.1 --hash=sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c
|
||||
chardet==5.2.0 --hash=sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970
|
||||
charset-normalizer==3.2.0 --hash=sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6
|
||||
frozenlist==1.4.0 --hash=sha256:261b9f5d17cac914531331ff1b1d452125bf5daa05faf73b71d935485b0c510b
|
||||
idna==3.4 --hash=sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2
|
||||
multidict==6.0.4 --hash=sha256:11bdf3f5e1518b24530b8241529d2050014c884cf18b6fc69c0c2b30ca248710
|
||||
pycparser==2.21 --hash=sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9
|
||||
yarl==1.9.2 --hash=sha256:159d81f22d7a43e6eabc36d7194cb53f2f15f498dbbfa8edc8a3239350f59fe7
|
||||
|
|
@ -1,17 +0,0 @@
|
|||
d2d1.pdb
|
||||
d3d10level9.pdb
|
||||
d3d10warp.pdb
|
||||
d3d11.pdb
|
||||
d3d9.pdb
|
||||
d3dcompiler_47.pdb
|
||||
d3dim700.pdb
|
||||
kernel32.pdb
|
||||
kernelbase.pdb
|
||||
ntdll.pdb
|
||||
user32.pdb
|
||||
wkernel32.pdb
|
||||
wkernelbase.pdb
|
||||
wntdll.pdb
|
||||
ws2_32.pdb
|
||||
wuser32.pdb
|
||||
zipwriter.pdb
|
||||
|
|
@ -1,11 +1,71 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -v -e -x
|
||||
set -e
|
||||
|
||||
base="$(realpath "$(dirname "$0")")"
|
||||
pwd
|
||||
|
||||
export DUMP_SYMS_PATH="${MOZ_FETCHES_DIR}/dump_syms/dump_syms"
|
||||
mkdir -p "/builds/worker/artifacts"
|
||||
|
||||
mkdir -p artifacts && \
|
||||
ulimit -n 16384 && \
|
||||
python3 "${base}/symsrv-fetch.py" artifacts/target.crashreporter-symbols.zip
|
||||
DUMP_SYMS="${MOZ_FETCHES_DIR}/dump_syms/dump_syms"
|
||||
SYMBOL_STORE=$(mktemp -d -p "/builds/worker/")
|
||||
SYMBOL_CACHE=$(mktemp -d -p "/builds/worker/")
|
||||
SYMBOL_SERVER_URLS="SRV*${SYMBOL_CACHE}*https://msdl.microsoft.com/download/symbols;SRV*${SYMBOL_CACHE}*https://software.intel.com/sites/downloads/symbols;SRV*${SYMBOL_CACHE}*https://download.amd.com/dir/bin;SRV*${SYMBOL_CACHE}*https://driver-symbols.nvidia.com"
|
||||
|
||||
# List Windows crashes from the various release channels.
|
||||
for release_channel in release beta nightly esr; do
|
||||
supersearch --num=500 --release_channel="${release_channel}" --platform=Windows | while read line; do
|
||||
printf "https://crash-stats.mozilla.org/api/ProcessedCrash/?crash_id=${line}\n" >> crashes.list
|
||||
done
|
||||
done
|
||||
|
||||
# Fetch the raw JSON for each crash.
|
||||
CRASHES_DIR=$(mktemp -d -p "/builds/worker/")
|
||||
cd "${CRASHES_DIR}"
|
||||
wget --no-verbose --waitretry=100 --retry-on-http-error=429 --compression=auto -i ../crashes.list
|
||||
cd ..
|
||||
|
||||
# Find the missing modules entries, in the crash modules list, and extract the
|
||||
# code id and debug filename, we'll store them in a file to process them later.
|
||||
find "${CRASHES_DIR}" -name "index.html*" -exec jq ".json_dump.modules[] | select(.missing_symbols == true) | .code_id,.filename" {} \; | tr -d '"' | while read line; do
|
||||
if [ -z "${code_id}" ]; then
|
||||
code_id="${line}"
|
||||
else
|
||||
filename="${line}"
|
||||
printf "${filename},${code_id}\n" >> debuginfo.list
|
||||
|
||||
code_id=""
|
||||
filename=""
|
||||
fi
|
||||
done
|
||||
|
||||
# Dump every missing module we found, some will not be available.
|
||||
sort -u debuginfo.list | while read line; do
|
||||
filename=$(echo "${line}" | cut -d',' -f1)
|
||||
code_id=$(echo "${line}" | cut -d',' -f2)
|
||||
printf "Attempting to dump file ${filename} with id ${code_id}\n"
|
||||
|
||||
# dump_syms may fail, but we don't always want to stop the script if it
|
||||
# does, so we capture the return value and use the output to decide whether
|
||||
# we want to move on or not.
|
||||
"${DUMP_SYMS}" "${filename}" --code-id "${code_id}" --inlines --check-cfi \
|
||||
--store "${SYMBOL_STORE}" --symbol-server "${SYMBOL_SERVER_URLS}" \
|
||||
--verbose error 2>dump_syms.errors && rv=0 || rv=$?
|
||||
|
||||
if [ ${rv} -ne 0 ]; then
|
||||
errors=$(cat dump_syms.errors)
|
||||
printf "error: ${errors}\n"
|
||||
|
||||
if [ "${errors}" != "Impossible to get file ${filename} with id ${code_id}" ] && \
|
||||
[ "${errors}" != "No CFI data" ]; then
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# Create the symbols archive only if we actually dumped something.
|
||||
symbols=$(find "${SYMBOL_STORE}" -type f)
|
||||
|
||||
if [ -n "${symbols}" ]; then
|
||||
cd "${SYMBOL_STORE}" && \
|
||||
7zz a "/builds/worker/artifacts/target.crashreporter-symbols.zip"
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -1,75 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2016 Mozilla
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import csv
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
import requests
|
||||
import urlparse
|
||||
|
||||
log = logging.getLogger()
|
||||
|
||||
|
||||
def fetch_missing_symbols_from_crash(file_or_crash):
|
||||
if os.path.isfile(file_or_crash):
|
||||
log.info("Fetching missing symbols from JSON file: %s" % file_or_crash)
|
||||
j = {"json_dump": json.load(open(file_or_crash, "rb"))}
|
||||
else:
|
||||
if "report/index/" in file_or_crash:
|
||||
crash_id = urlparse.urlparse(file_or_crash).path.split("/")[-1]
|
||||
else:
|
||||
crash_id = file_or_crash
|
||||
url = (
|
||||
"https://crash-stats.mozilla.org/api/ProcessedCrash/"
|
||||
"?crash_id={crash_id}&datatype=processed".format(crash_id=crash_id)
|
||||
)
|
||||
log.info("Fetching missing symbols from crash: %s" % url)
|
||||
r = requests.get(url)
|
||||
if r.status_code != 200:
|
||||
log.error("Failed to fetch crash %s" % url)
|
||||
return set()
|
||||
j = r.json()
|
||||
return set(
|
||||
[
|
||||
(m["debug_file"], m["debug_id"], m["filename"], m["code_id"])
|
||||
for m in j["json_dump"]["modules"]
|
||||
if "missing_symbols" in m
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig()
|
||||
log.setLevel(logging.DEBUG)
|
||||
urllib3_logger = logging.getLogger("urllib3")
|
||||
urllib3_logger.setLevel(logging.ERROR)
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
log.error("Specify a crash URL or ID")
|
||||
sys.exit(1)
|
||||
symbols = fetch_missing_symbols_from_crash(sys.argv[1])
|
||||
log.info("Found %d missing symbols" % len(symbols))
|
||||
c = csv.writer(sys.stdout)
|
||||
c.writerow(["debug_file", "debug_id", "code_file", "code_id"])
|
||||
for row in symbols:
|
||||
c.writerow(row)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,587 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2016 Mozilla
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# This script will fetch a thousand recent crashes from Socorro, and try to
|
||||
# retrieve missing symbols from Microsoft's symbol server. It honors a list
|
||||
# (ignorelist.txt) of symbols that are known to be from our applications,
|
||||
# and it maintains its own list of symbols that the MS symbol server
|
||||
# doesn't have (skiplist.txt).
|
||||
#
|
||||
# The script also depends on having write access to the directory it is
|
||||
# installed in, to write the skiplist text file.
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import collections
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import zipfile
|
||||
from collections import defaultdict
|
||||
from tempfile import mkdtemp
|
||||
from urllib.parse import quote, urljoin
|
||||
|
||||
from aiofile import AIOFile, LineReader
|
||||
from aiohttp import ClientSession, ClientTimeout
|
||||
from aiohttp.connector import TCPConnector
|
||||
from aiohttp_retry import JitterRetry, RetryClient
|
||||
|
||||
# Just hardcoded here
|
||||
MICROSOFT_SYMBOL_SERVER = "https://msdl.microsoft.com/download/symbols/"
|
||||
USER_AGENT = "Microsoft-Symbol-Server/6.3.0.0"
|
||||
MOZILLA_SYMBOL_SERVER = "https://symbols.mozilla.org/"
|
||||
CRASHSTATS_API_URL = "https://crash-stats.mozilla.org/api/"
|
||||
SUPERSEARCH_PARAM = "SuperSearch/?proto_signature=~.DLL&proto_signature=~.dll&platform=Windows&_results_number=1000"
|
||||
PROCESSED_CRASHES_PARAM = "ProcessedCrash/?crash_id="
|
||||
HEADERS = {"User-Agent": USER_AGENT}
|
||||
SYM_SRV = "SRV*{0}*https://msdl.microsoft.com/download/symbols;SRV*{0}*https://software.intel.com/sites/downloads/symbols;SRV*{0}*https://download.amd.com/dir/bin;SRV*{0}*https://driver-symbols.nvidia.com" # noqa
|
||||
TIMEOUT = 7200
|
||||
RETRIES = 5
|
||||
|
||||
|
||||
MissingSymbol = collections.namedtuple(
|
||||
"MissingSymbol", ["debug_file", "debug_id", "filename", "code_id"]
|
||||
)
|
||||
log = logging.getLogger()
|
||||
|
||||
|
||||
def get_type(data):
|
||||
# PDB v7
|
||||
if data.startswith(b"Microsoft C/C++ MSF 7.00"):
|
||||
return "pdb-v7"
|
||||
# PDB v2
|
||||
if data.startswith(b"Microsoft C/C++ program database 2.00"):
|
||||
return "pdb-v2"
|
||||
# DLL
|
||||
if data.startswith(b"MZ"):
|
||||
return "dll"
|
||||
# CAB
|
||||
if data.startswith(b"MSCF"):
|
||||
return "cab"
|
||||
|
||||
return "unknown"
|
||||
|
||||
|
||||
async def exp_backoff(retry_num):
|
||||
await asyncio.sleep(2**retry_num)
|
||||
|
||||
|
||||
async def server_has_file(client, server, filename):
|
||||
"""
|
||||
Send the symbol server a HEAD request to see if it has this symbol file.
|
||||
"""
|
||||
url = urljoin(server, quote(filename))
|
||||
for i in range(RETRIES):
|
||||
try:
|
||||
async with client.head(url, headers=HEADERS, allow_redirects=True) as resp:
|
||||
if resp.status == 200 and (
|
||||
(
|
||||
"microsoft" in server
|
||||
and resp.headers["Content-Type"] == "application/octet-stream"
|
||||
)
|
||||
or "mozilla" in server
|
||||
):
|
||||
log.debug(f"File exists: {url}")
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
except Exception as e:
|
||||
# Sometimes we've SSL errors or disconnections... so in such a situation just retry
|
||||
log.warning(f"Error with {url}: retry")
|
||||
log.exception(e)
|
||||
await exp_backoff(i)
|
||||
|
||||
log.debug(f"Too many retries (HEAD) for {url}: give up.")
|
||||
return False
|
||||
|
||||
|
||||
async def fetch_file(client, server, filename):
|
||||
"""
|
||||
Fetch the file from the server
|
||||
"""
|
||||
url = urljoin(server, quote(filename))
|
||||
log.debug(f"Fetch url: {url}")
|
||||
for i in range(RETRIES):
|
||||
try:
|
||||
async with client.get(url, headers=HEADERS, allow_redirects=True) as resp:
|
||||
if resp.status == 200:
|
||||
data = await resp.read()
|
||||
typ = get_type(data)
|
||||
if typ == "unknown":
|
||||
# try again
|
||||
await exp_backoff(i)
|
||||
elif typ == "pdb-v2":
|
||||
# too old: skip it
|
||||
log.debug(f"PDB v2 (skipped because too old): {url}")
|
||||
return None
|
||||
else:
|
||||
return data
|
||||
else:
|
||||
log.error(f"Cannot get data (status {resp.status}) for {url}: ")
|
||||
except Exception as e:
|
||||
log.warning(f"Error with {url}")
|
||||
log.exception(e)
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
log.debug(f"Too many retries (GET) for {url}: give up.")
|
||||
return None
|
||||
|
||||
|
||||
def write_skiplist(skiplist):
|
||||
with open("skiplist.txt", "w") as sf:
|
||||
sf.writelines(
|
||||
f"{debug_id} {debug_file}\n" for debug_id, debug_file in skiplist.items()
|
||||
)
|
||||
|
||||
|
||||
async def fetch_crash(session, url):
|
||||
async with session.get(url) as resp:
|
||||
if resp.status == 200:
|
||||
return json.loads(await resp.text())
|
||||
|
||||
raise RuntimeError("Network request returned status = " + str(resp.status))
|
||||
|
||||
|
||||
async def fetch_crashes(session, urls):
|
||||
tasks = []
|
||||
for url in urls:
|
||||
task = asyncio.create_task(fetch_crash(session, url))
|
||||
tasks.append(task)
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
return results
|
||||
|
||||
|
||||
async def fetch_latest_crashes(client, url):
|
||||
async with client.get(url + SUPERSEARCH_PARAM) as resp:
|
||||
if resp.status != 200:
|
||||
resp.raise_for_status()
|
||||
data = await resp.text()
|
||||
reply = json.loads(data)
|
||||
crashes = []
|
||||
for crash in reply.get("hits"):
|
||||
if "uuid" in crash:
|
||||
crashes.append(crash.get("uuid"))
|
||||
return crashes
|
||||
|
||||
|
||||
async def fetch_missing_symbols(url):
|
||||
log.info("Looking for missing symbols on %s" % url)
|
||||
connector = TCPConnector(limit=4, limit_per_host=0)
|
||||
missing_symbols = set()
|
||||
crash_count = 0
|
||||
|
||||
client_session = ClientSession(
|
||||
headers=HEADERS, connector=connector, timeout=ClientTimeout(total=TIMEOUT)
|
||||
)
|
||||
while crash_count < 1000:
|
||||
async with RetryClient(
|
||||
client_session=client_session,
|
||||
retry_options=JitterRetry(attempts=30, statuses=[429]),
|
||||
) as client:
|
||||
crash_uuids = await fetch_latest_crashes(client, url)
|
||||
urls = [url + PROCESSED_CRASHES_PARAM + uuid for uuid in crash_uuids]
|
||||
crashes = await fetch_crashes(client, urls)
|
||||
for crash in crashes:
|
||||
if type(crash) is not dict:
|
||||
continue
|
||||
|
||||
crash_count += 1
|
||||
modules = crash.get("json_dump").get("modules")
|
||||
for module in modules:
|
||||
if module.get("missing_symbols"):
|
||||
missing_symbols.add(
|
||||
MissingSymbol(
|
||||
module.get("debug_file"),
|
||||
module.get("debug_id"),
|
||||
module.get("filename"),
|
||||
module.get("code_id"),
|
||||
)
|
||||
)
|
||||
|
||||
return missing_symbols
|
||||
|
||||
|
||||
async def get_list(filename):
|
||||
alist = set()
|
||||
try:
|
||||
async with AIOFile(filename, "r") as In:
|
||||
async for line in LineReader(In):
|
||||
line = line.rstrip()
|
||||
alist.add(line)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
log.debug(f"{filename} contains {len(alist)} items")
|
||||
|
||||
return alist
|
||||
|
||||
|
||||
async def get_skiplist():
|
||||
skiplist = {}
|
||||
path = "skiplist.txt"
|
||||
try:
|
||||
async with AIOFile(path, "r") as In:
|
||||
async for line in LineReader(In):
|
||||
line = line.strip()
|
||||
if line == "":
|
||||
continue
|
||||
s = line.split(" ", maxsplit=1)
|
||||
if len(s) != 2:
|
||||
continue
|
||||
debug_id, debug_file = s
|
||||
skiplist[debug_id] = debug_file.lower()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
log.debug(f"{path} contains {len(skiplist)} items")
|
||||
|
||||
return skiplist
|
||||
|
||||
|
||||
def get_missing_symbols(missing_symbols, skiplist, ignorelist):
|
||||
modules = defaultdict(set)
|
||||
stats = {"ignorelist": 0, "skiplist": 0}
|
||||
for symbol in missing_symbols:
|
||||
pdb = symbol.debug_file
|
||||
debug_id = symbol.debug_id
|
||||
code_file = symbol.filename
|
||||
code_id = symbol.code_id
|
||||
if pdb and debug_id and pdb.endswith(".pdb"):
|
||||
if pdb.lower() in ignorelist:
|
||||
stats["ignorelist"] += 1
|
||||
continue
|
||||
|
||||
if skiplist.get(debug_id) != pdb.lower():
|
||||
modules[pdb].add((debug_id, code_file, code_id))
|
||||
else:
|
||||
stats["skiplist"] += 1
|
||||
# We've asked the symbol server previously about this,
|
||||
# so skip it.
|
||||
log.debug("%s/%s already in skiplist", pdb, debug_id)
|
||||
|
||||
return modules, stats
|
||||
|
||||
|
||||
async def collect_info(client, filename, debug_id, code_file, code_id):
|
||||
pdb_path = os.path.join(filename, debug_id, filename)
|
||||
sym_path = os.path.join(filename, debug_id, filename.replace(".pdb", "") + ".sym")
|
||||
|
||||
has_pdb = await server_has_file(client, MICROSOFT_SYMBOL_SERVER, pdb_path)
|
||||
has_code = is_there = False
|
||||
if has_pdb:
|
||||
if not await server_has_file(client, MOZILLA_SYMBOL_SERVER, sym_path):
|
||||
has_code = (
|
||||
code_file
|
||||
and code_id
|
||||
and await server_has_file(
|
||||
client,
|
||||
MICROSOFT_SYMBOL_SERVER,
|
||||
f"{code_file}/{code_id}/{code_file}",
|
||||
)
|
||||
)
|
||||
else:
|
||||
# if the file is on moz sym server no need to do anything
|
||||
is_there = True
|
||||
has_pdb = False
|
||||
|
||||
return (filename, debug_id, code_file, code_id, has_pdb, has_code, is_there)
|
||||
|
||||
|
||||
async def check_x86_file(path):
|
||||
async with AIOFile(path, "rb") as In:
|
||||
head = b"MODULE windows x86 "
|
||||
chunk = await In.read(len(head))
|
||||
if chunk == head:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
async def run_command(cmd):
|
||||
proc = await asyncio.create_subprocess_shell(
|
||||
cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
_, err = await proc.communicate()
|
||||
err = err.decode().strip()
|
||||
|
||||
return err
|
||||
|
||||
|
||||
async def dump_module(
|
||||
output, symcache, filename, debug_id, code_file, code_id, has_code, dump_syms
|
||||
):
|
||||
sym_path = os.path.join(filename, debug_id, filename.replace(".pdb", ".sym"))
|
||||
output_path = os.path.join(output, sym_path)
|
||||
sym_srv = SYM_SRV.format(symcache)
|
||||
res = {"path": sym_path, "error": "ok"}
|
||||
|
||||
if has_code:
|
||||
cmd = (
|
||||
f"{dump_syms} {code_file} --code-id {code_id} --check-cfi --inlines "
|
||||
f"--store {output} --symbol-server '{sym_srv}' --verbose error"
|
||||
)
|
||||
else:
|
||||
cmd = (
|
||||
f"{dump_syms} {filename} --debug-id {debug_id} --check-cfi --inlines "
|
||||
f"--store {output} --symbol-server '{sym_srv}' --verbose error"
|
||||
)
|
||||
|
||||
err = await run_command(cmd)
|
||||
|
||||
if err:
|
||||
log.error(f"Error with {cmd}")
|
||||
log.error(err)
|
||||
res["error"] = "dump error"
|
||||
return res
|
||||
|
||||
if not os.path.exists(output_path):
|
||||
log.error(f"Could not find file {output_path} after running {cmd}")
|
||||
res["error"] = "dump error"
|
||||
return res
|
||||
|
||||
if not has_code and not await check_x86_file(output_path):
|
||||
# PDB for 32 bits contains everything we need (symbols + stack unwind info)
|
||||
# But PDB for 64 bits don't contain stack unwind info
|
||||
# (they're in the binary (.dll/.exe) itself).
|
||||
# So here we're logging because we've got a PDB (64 bits) without its DLL/EXE.
|
||||
if code_file and code_id:
|
||||
log.debug(f"x86_64 binary {code_file}/{code_id} required")
|
||||
else:
|
||||
log.debug(f"x86_64 binary for {filename}/{debug_id} required")
|
||||
res["error"] = "no binary"
|
||||
return res
|
||||
|
||||
log.info(f"Successfully dumped: {filename}/{debug_id}")
|
||||
return res
|
||||
|
||||
|
||||
async def dump(output, symcache, modules, dump_syms):
|
||||
tasks = []
|
||||
for filename, debug_id, code_file, code_id, has_code in modules:
|
||||
tasks.append(
|
||||
dump_module(
|
||||
output,
|
||||
symcache,
|
||||
filename,
|
||||
debug_id,
|
||||
code_file,
|
||||
code_id,
|
||||
has_code,
|
||||
dump_syms,
|
||||
)
|
||||
)
|
||||
|
||||
res = await asyncio.gather(*tasks)
|
||||
|
||||
# Even if we haven't CFI the generated file is useful to get symbols
|
||||
# from addresses so keep error == 2.
|
||||
file_index = {x["path"] for x in res if x["error"] in ["ok", "no binary"]}
|
||||
stats = {
|
||||
"dump_error": sum(1 for x in res if x["error"] == "dump error"),
|
||||
"no_bin": sum(1 for x in res if x["error"] == "no binary"),
|
||||
}
|
||||
|
||||
return file_index, stats
|
||||
|
||||
|
||||
async def collect(modules):
|
||||
loop = asyncio.get_event_loop()
|
||||
tasks = []
|
||||
|
||||
# In case of errors (Too many open files), just change limit_per_host
|
||||
connector = TCPConnector(limit=100, limit_per_host=4)
|
||||
|
||||
async with ClientSession(
|
||||
loop=loop, timeout=ClientTimeout(total=TIMEOUT), connector=connector
|
||||
) as client:
|
||||
for filename, ids in modules.items():
|
||||
for debug_id, code_file, code_id in ids:
|
||||
tasks.append(
|
||||
collect_info(client, filename, debug_id, code_file, code_id)
|
||||
)
|
||||
|
||||
res = await asyncio.gather(*tasks)
|
||||
to_dump = []
|
||||
stats = {"no_pdb": 0, "is_there": 0}
|
||||
for filename, debug_id, code_file, code_id, has_pdb, has_code, is_there in res:
|
||||
if not has_pdb:
|
||||
if is_there:
|
||||
stats["is_there"] += 1
|
||||
else:
|
||||
stats["no_pdb"] += 1
|
||||
log.info(f"No pdb for {filename}/{debug_id}")
|
||||
continue
|
||||
|
||||
log.info(
|
||||
f"To dump: {filename}/{debug_id}, {code_file}/{code_id} and has_code = {has_code}"
|
||||
)
|
||||
to_dump.append((filename, debug_id, code_file, code_id, has_code))
|
||||
|
||||
log.info(f"Collected {len(to_dump)} files to dump")
|
||||
|
||||
return to_dump, stats
|
||||
|
||||
|
||||
async def make_dirs(path):
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
def helper(path):
|
||||
os.makedirs(path, exist_ok=True)
|
||||
|
||||
await loop.run_in_executor(None, helper, path)
|
||||
|
||||
|
||||
async def fetch_and_write(output, client, filename, file_id):
|
||||
path = os.path.join(filename, file_id, filename)
|
||||
data = await fetch_file(client, MICROSOFT_SYMBOL_SERVER, path)
|
||||
|
||||
if not data:
|
||||
return False
|
||||
|
||||
output_dir = os.path.join(output, filename, file_id)
|
||||
await make_dirs(output_dir)
|
||||
|
||||
output_path = os.path.join(output_dir, filename)
|
||||
async with AIOFile(output_path, "wb") as Out:
|
||||
await Out.write(data)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
async def fetch_all_modules(output, modules):
|
||||
loop = asyncio.get_event_loop()
|
||||
tasks = []
|
||||
fetched_modules = []
|
||||
|
||||
# In case of errors (Too many open files), just change limit_per_host
|
||||
connector = TCPConnector(limit=100, limit_per_host=0)
|
||||
|
||||
async with ClientSession(
|
||||
loop=loop, timeout=ClientTimeout(total=TIMEOUT), connector=connector
|
||||
) as client:
|
||||
for filename, debug_id, code_file, code_id, has_code in modules:
|
||||
tasks.append(fetch_and_write(output, client, filename, debug_id))
|
||||
if has_code:
|
||||
tasks.append(fetch_and_write(output, client, code_file, code_id))
|
||||
|
||||
res = await asyncio.gather(*tasks)
|
||||
res = iter(res)
|
||||
for filename, debug_id, code_file, code_id, has_code in modules:
|
||||
fetched_pdb = next(res)
|
||||
if has_code:
|
||||
has_code = next(res)
|
||||
if fetched_pdb:
|
||||
fetched_modules.append(
|
||||
(filename, debug_id, code_file, code_id, has_code)
|
||||
)
|
||||
|
||||
return fetched_modules
|
||||
|
||||
|
||||
def get_base_data(url):
|
||||
async def helper(url):
|
||||
return await asyncio.gather(
|
||||
fetch_missing_symbols(url),
|
||||
# Symbols that we know belong to us, so don't ask Microsoft for them.
|
||||
get_list("ignorelist.txt"),
|
||||
# Symbols that we know belong to Microsoft, so don't skiplist them.
|
||||
get_list("known-microsoft-symbols.txt"),
|
||||
# Symbols that we've asked for in the past unsuccessfully
|
||||
get_skiplist(),
|
||||
)
|
||||
|
||||
return asyncio.run(helper(url))
|
||||
|
||||
|
||||
def gen_zip(output, output_dir, file_index):
|
||||
if not file_index:
|
||||
return
|
||||
|
||||
with zipfile.ZipFile(output, "w", zipfile.ZIP_DEFLATED) as z:
|
||||
for f in file_index:
|
||||
z.write(os.path.join(output_dir, f), f)
|
||||
log.info(f"Wrote zip as {output}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Fetch missing symbols from Microsoft symbol server"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--crashstats-api",
|
||||
type=str,
|
||||
help="crash-stats API URL",
|
||||
default=CRASHSTATS_API_URL,
|
||||
)
|
||||
parser.add_argument("zip", type=str, help="output zip file")
|
||||
parser.add_argument(
|
||||
"--dump-syms",
|
||||
type=str,
|
||||
help="dump_syms path",
|
||||
default=os.environ.get("DUMP_SYMS_PATH"),
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
assert args.dump_syms, "dump_syms path is empty"
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
aiohttp_logger = logging.getLogger("aiohttp.client")
|
||||
aiohttp_logger.setLevel(logging.INFO)
|
||||
log.info("Started")
|
||||
|
||||
missing_symbols, ignorelist, known_ms_symbols, skiplist = get_base_data(
|
||||
args.crashstats_api
|
||||
)
|
||||
|
||||
modules, stats_skipped = get_missing_symbols(missing_symbols, skiplist, ignorelist)
|
||||
|
||||
symbol_path = mkdtemp("symsrvfetch")
|
||||
temp_path = mkdtemp(prefix="symcache")
|
||||
|
||||
modules, stats_collect = asyncio.run(collect(modules))
|
||||
modules = asyncio.run(fetch_all_modules(temp_path, modules))
|
||||
|
||||
file_index, stats_dump = asyncio.run(
|
||||
dump(symbol_path, temp_path, modules, args.dump_syms)
|
||||
)
|
||||
|
||||
gen_zip(args.zip, symbol_path, file_index)
|
||||
|
||||
shutil.rmtree(symbol_path, True)
|
||||
shutil.rmtree(temp_path, True)
|
||||
|
||||
write_skiplist(skiplist)
|
||||
|
||||
if not file_index:
|
||||
log.info(f"No symbols downloaded: {len(missing_symbols)} considered")
|
||||
else:
|
||||
log.info(
|
||||
f"Total files: {len(missing_symbols)}, Stored {len(file_index)} symbol files"
|
||||
)
|
||||
|
||||
log.info(
|
||||
f"{stats_collect['is_there']} already present, {stats_skipped['ignorelist']} in ignored list, " # noqa
|
||||
f"{stats_skipped['skiplist']} skipped, {stats_collect['no_pdb']} not found, "
|
||||
f"{stats_dump['dump_error']} processed with errors, "
|
||||
f"{stats_dump['no_bin']} processed but with no binaries (x86_64)"
|
||||
)
|
||||
log.info("Finished, exiting")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in a new issue