diff options
Diffstat (limited to 'check_checkers.py')
-rwxr-xr-x | check_checkers.py | 243 |
1 files changed, 243 insertions, 0 deletions
diff --git a/check_checkers.py b/check_checkers.py new file mode 100755 index 0000000..8ce03fa --- /dev/null +++ b/check_checkers.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 +import html +import json +import os +import re +import shutil +from subprocess import PIPE, Popen, check_output +import tempfile +import time +from typing import Dict, List, NamedTuple, TextIO, Tuple, TypedDict + +import mypy.api +from pygments import highlight +from pygments.formatters import HtmlFormatter +from pygments.lexers import PythonLexer + + +Error = Tuple[str, int, str] +"""The filename, line number and the error message.""" + + +class Checker: + url: str + + def run(self, path: str, typeshed_path: str) -> List[Error]: + """ + Type checks the given path with the given options. + """ + raise NotImplementedError() + + def version(self) -> str: + """Returns the version of the checker.""" + raise NotImplementedError() + + +class Mypy(Checker): + url = 'https://github.com/python/mypy' + + # mypy cannot output JSON (https://github.com/python/mypy/issues/10816) + # though there is a PR (https://github.com/python/mypy/pull/11396) + # so we just use the regex from https://github.com/matangover/mypy-vscode/blob/48162f345c7f14b96f29976660100ae1dd49cc0a/src/mypy.ts + _pattern = re.compile( + r'^(?P<file>[^\n]+?):((?P<line>\d+):)?((?P<column>\d+):)? (?P<type>\w+): (?P<message>.*)$', + re.MULTILINE, + ) + + @classmethod + def run(cls, path: str, typeshed_path: str): + cachedir = tempfile.mkdtemp(prefix='mypy-cache-') + stdout, stderr, retcode = mypy.api.run( + [ + # fmt: off + '--cache-dir', cachedir, + '--custom-typeshed-dir', typeshed_path, + # fmt: on + '--', + path, + ] + ) + shutil.rmtree(cachedir) + return [ + (m.group('file'), m.group('line'), m.group('message')) + for m in cls._pattern.finditer(stdout) + ] + + @staticmethod + def version(): + return mypy.api.run(['--version'])[0].split()[1].strip() + + +class Pytype(Checker): + url = 'https://github.com/google/pytype' + + # pytype supports CSV output only for pytype-single which however doesn't support multiple modules + # (https://github.com/google/pytype/issues/92) + _pattern = re.compile( + r'^File "(?P<file>[^"]+?)", line (?P<line>\d+), in (?P<module>[^ ]+): (?P<message>.*) \[(?P<id>[^]]+)\]$', + re.MULTILINE, + ) + + @classmethod + def run(cls, path: str, typeshed_path: str): + env = {'TYPESHED_HOME': typeshed_path, 'PATH': os.environ['PATH']} + proc = Popen( + ['pytype', '--', path], + stdout=PIPE, + stderr=PIPE, + encoding='utf-8', + env=env, + ) + stdout, stderr = proc.communicate() + return [ + (m.group('file'), m.group('line'), m.group('message')) + for m in cls._pattern.finditer(stdout) + ] + + @staticmethod + def version(): + return check_output(['pytype', '--version'], encoding='utf-8').strip() + + +class Pyright(Checker): + url = 'https://github.com/microsoft/pyright' + + @staticmethod + def run(path: str, typeshed_path: str): + proc = Popen( + [ + 'pyright', + # fmt: off + '--typeshed-path', typeshed_path, + '--outputjson', + # fmt: on + # pyright does not support -- + path, + ], + stdout=PIPE, + stderr=PIPE, + encoding='utf-8', + ) + stdout, stderr = proc.communicate() + return [ + (d['file'], d['range']['start']['line'] + 1, d['message']) + for d in json.loads(stdout)['generalDiagnostics'] + ] + + @staticmethod + def version(): + return ( + check_output(['pyright', '--version'], encoding='utf-8').split()[1].strip() + ) + + +# We don't check pyre because it has a very slow startup time (5s) since it parses the whole typeshed. +# (see https://github.com/facebook/pyre-check/issues/592) + + +class Puzzle(TypedDict): + checker_results: Dict[str, List[Error]] + last_modified: int + + +def run_checkers(checkers: List[Checker], puzzle: str, typeshed_path: str): + results = {} + for checker in checkers: + start = time.time() + results[checker.__class__.__name__] = checker.run(puzzle, typeshed_path) + duration = time.time() - start + print(checker, time.time() - start) + return results + + +def run( + checkers: List[Checker], + puzzles: List[str], + default_typeshed: str, + out: TextIO, + cache: Dict[str, Puzzle], +): + python_lexer = PythonLexer() + html_formatter = HtmlFormatter(noclasses=True, linenos='table') + out.write("<meta charset=utf-8><title>Comparison of static type checkers for Python</title>") + out.write( + '''<p>This page compares three static type checkers for Python. + The <span class=unexpected>red</span> background indicates that the checker + outputs an incorrect result (either a false positive or a false negative). + <a href=/>Back to start page</a>.</p>''' + ) + + out.write('<style>.unexpected {background: #ffd2d0}</style>') + + out.write('<table border=1>') + out.write('<tr><th>Input') + for checker in checkers: + out.write('<th>') + out.write('<a href="{}">'.format(html.escape(checker.url))) + out.write(checker.__class__.__name__) + out.write('</a>') + out.write('<br>({})'.format(html.escape(checker.version()))) + out.write('</tr>') + + for puzzle in puzzles: + print(puzzle) + last_modified = int(os.stat(puzzle).st_mtime) + if puzzle in cache and last_modified == cache[puzzle]['last_modified']: + checker_results = cache[puzzle]['checker_results'] + else: + checker_results = run_checkers(checkers, puzzle, default_typeshed) + cache[puzzle] = { + 'last_modified': last_modified, + 'checker_results': checker_results, + } + + out.write('<tr>') + out.write('<td>') + with open(puzzle) as f: + code = f.read() + out.write(highlight(code, python_lexer, html_formatter)) + error_ok = '# error' in code or '# maybe error' in code + no_error_ok = '# error' not in code + + for checker in checkers: + errors = checker_results[checker.__class__.__name__] + out.write( + '<td class="{}">'.format( + 'ok' + if (errors and error_ok) or (not errors and no_error_ok) + else 'unexpected' + ) + ) + if errors: + out.write('<ul>') + for filename, line, message in errors: + out.write('<li>') + out.write(f'{line}: ' + html.escape(message).replace('\n', '<br>')) + out.write('</li>') + out.write('</ul>') + else: + out.write('<center>no errors found') + out.write('</table>') + + +if __name__ == '__main__': + # TODO: git clone typeshed if missing + typeshed = os.path.abspath('typeshed') # pytype requries an absolute path + + try: + with open('cache.json') as f: + cache = json.load(f) + except FileNotFoundError: + cache = {} + + with open('dist/checkers.html', 'w') as f: + run( + [Mypy(), Pytype(), Pyright()], + ['puzzles/' + f for f in sorted(os.listdir('puzzles'))], + typeshed, + f, + cache, + ) + + with open('cache.json', 'w') as f: + json.dump(cache, f) |