diff options
-rw-r--r-- | .gitignore | 4 | ||||
-rw-r--r-- | Makefile | 9 | ||||
-rwxr-xr-x | check_checkers.py | 243 | ||||
-rwxr-xr-x | check_loaders.py | 228 | ||||
-rwxr-xr-x | deploy.sh | 6 | ||||
-rw-r--r-- | index.html | 18 | ||||
-rw-r--r-- | puzzles/easy.py | 1 | ||||
-rw-r--r-- | puzzles/easy_reassign.py | 2 | ||||
-rw-r--r-- | puzzles/infer_append.py | 2 | ||||
-rw-r--r-- | puzzles/infer_return_basic.py | 3 | ||||
-rw-r--r-- | puzzles/tricky_enum.py | 8 | ||||
-rw-r--r-- | puzzles/tricky_recursive.py | 3 | ||||
-rw-r--r-- | puzzles/tricky_try_except.py | 8 | ||||
-rw-r--r-- | puzzles/tricky_typevar_constrained.py | 9 | ||||
-rw-r--r-- | pyrightconfig.json | 3 | ||||
-rw-r--r-- | pyserde.py | 3 | ||||
-rw-r--r-- | requirements.txt | 8 |
17 files changed, 558 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..dfdec93 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.pyc +typeshed/ +dist/ +cache.json diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..be30dac --- /dev/null +++ b/Makefile @@ -0,0 +1,9 @@ +all: + mkdir -p dist + cp index.html dist/ + ./check_checkers.py + ./check_loaders.py > dist/loaders.html + +clean: + rm -r dist + rm cache.json diff --git a/check_checkers.py b/check_checkers.py new file mode 100755 index 0000000..8ce03fa --- /dev/null +++ b/check_checkers.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 +import html +import json +import os +import re +import shutil +from subprocess import PIPE, Popen, check_output +import tempfile +import time +from typing import Dict, List, NamedTuple, TextIO, Tuple, TypedDict + +import mypy.api +from pygments import highlight +from pygments.formatters import HtmlFormatter +from pygments.lexers import PythonLexer + + +Error = Tuple[str, int, str] +"""The filename, line number and the error message.""" + + +class Checker: + url: str + + def run(self, path: str, typeshed_path: str) -> List[Error]: + """ + Type checks the given path with the given options. + """ + raise NotImplementedError() + + def version(self) -> str: + """Returns the version of the checker.""" + raise NotImplementedError() + + +class Mypy(Checker): + url = 'https://github.com/python/mypy' + + # mypy cannot output JSON (https://github.com/python/mypy/issues/10816) + # though there is a PR (https://github.com/python/mypy/pull/11396) + # so we just use the regex from https://github.com/matangover/mypy-vscode/blob/48162f345c7f14b96f29976660100ae1dd49cc0a/src/mypy.ts + _pattern = re.compile( + r'^(?P<file>[^\n]+?):((?P<line>\d+):)?((?P<column>\d+):)? (?P<type>\w+): (?P<message>.*)$', + re.MULTILINE, + ) + + @classmethod + def run(cls, path: str, typeshed_path: str): + cachedir = tempfile.mkdtemp(prefix='mypy-cache-') + stdout, stderr, retcode = mypy.api.run( + [ + # fmt: off + '--cache-dir', cachedir, + '--custom-typeshed-dir', typeshed_path, + # fmt: on + '--', + path, + ] + ) + shutil.rmtree(cachedir) + return [ + (m.group('file'), m.group('line'), m.group('message')) + for m in cls._pattern.finditer(stdout) + ] + + @staticmethod + def version(): + return mypy.api.run(['--version'])[0].split()[1].strip() + + +class Pytype(Checker): + url = 'https://github.com/google/pytype' + + # pytype supports CSV output only for pytype-single which however doesn't support multiple modules + # (https://github.com/google/pytype/issues/92) + _pattern = re.compile( + r'^File "(?P<file>[^"]+?)", line (?P<line>\d+), in (?P<module>[^ ]+): (?P<message>.*) \[(?P<id>[^]]+)\]$', + re.MULTILINE, + ) + + @classmethod + def run(cls, path: str, typeshed_path: str): + env = {'TYPESHED_HOME': typeshed_path, 'PATH': os.environ['PATH']} + proc = Popen( + ['pytype', '--', path], + stdout=PIPE, + stderr=PIPE, + encoding='utf-8', + env=env, + ) + stdout, stderr = proc.communicate() + return [ + (m.group('file'), m.group('line'), m.group('message')) + for m in cls._pattern.finditer(stdout) + ] + + @staticmethod + def version(): + return check_output(['pytype', '--version'], encoding='utf-8').strip() + + +class Pyright(Checker): + url = 'https://github.com/microsoft/pyright' + + @staticmethod + def run(path: str, typeshed_path: str): + proc = Popen( + [ + 'pyright', + # fmt: off + '--typeshed-path', typeshed_path, + '--outputjson', + # fmt: on + # pyright does not support -- + path, + ], + stdout=PIPE, + stderr=PIPE, + encoding='utf-8', + ) + stdout, stderr = proc.communicate() + return [ + (d['file'], d['range']['start']['line'] + 1, d['message']) + for d in json.loads(stdout)['generalDiagnostics'] + ] + + @staticmethod + def version(): + return ( + check_output(['pyright', '--version'], encoding='utf-8').split()[1].strip() + ) + + +# We don't check pyre because it has a very slow startup time (5s) since it parses the whole typeshed. +# (see https://github.com/facebook/pyre-check/issues/592) + + +class Puzzle(TypedDict): + checker_results: Dict[str, List[Error]] + last_modified: int + + +def run_checkers(checkers: List[Checker], puzzle: str, typeshed_path: str): + results = {} + for checker in checkers: + start = time.time() + results[checker.__class__.__name__] = checker.run(puzzle, typeshed_path) + duration = time.time() - start + print(checker, time.time() - start) + return results + + +def run( + checkers: List[Checker], + puzzles: List[str], + default_typeshed: str, + out: TextIO, + cache: Dict[str, Puzzle], +): + python_lexer = PythonLexer() + html_formatter = HtmlFormatter(noclasses=True, linenos='table') + out.write("<meta charset=utf-8><title>Comparison of static type checkers for Python</title>") + out.write( + '''<p>This page compares three static type checkers for Python. + The <span class=unexpected>red</span> background indicates that the checker + outputs an incorrect result (either a false positive or a false negative). + <a href=/>Back to start page</a>.</p>''' + ) + + out.write('<style>.unexpected {background: #ffd2d0}</style>') + + out.write('<table border=1>') + out.write('<tr><th>Input') + for checker in checkers: + out.write('<th>') + out.write('<a href="{}">'.format(html.escape(checker.url))) + out.write(checker.__class__.__name__) + out.write('</a>') + out.write('<br>({})'.format(html.escape(checker.version()))) + out.write('</tr>') + + for puzzle in puzzles: + print(puzzle) + last_modified = int(os.stat(puzzle).st_mtime) + if puzzle in cache and last_modified == cache[puzzle]['last_modified']: + checker_results = cache[puzzle]['checker_results'] + else: + checker_results = run_checkers(checkers, puzzle, default_typeshed) + cache[puzzle] = { + 'last_modified': last_modified, + 'checker_results': checker_results, + } + + out.write('<tr>') + out.write('<td>') + with open(puzzle) as f: + code = f.read() + out.write(highlight(code, python_lexer, html_formatter)) + error_ok = '# error' in code or '# maybe error' in code + no_error_ok = '# error' not in code + + for checker in checkers: + errors = checker_results[checker.__class__.__name__] + out.write( + '<td class="{}">'.format( + 'ok' + if (errors and error_ok) or (not errors and no_error_ok) + else 'unexpected' + ) + ) + if errors: + out.write('<ul>') + for filename, line, message in errors: + out.write('<li>') + out.write(f'{line}: ' + html.escape(message).replace('\n', '<br>')) + out.write('</li>') + out.write('</ul>') + else: + out.write('<center>no errors found') + out.write('</table>') + + +if __name__ == '__main__': + # TODO: git clone typeshed if missing + typeshed = os.path.abspath('typeshed') # pytype requries an absolute path + + try: + with open('cache.json') as f: + cache = json.load(f) + except FileNotFoundError: + cache = {} + + with open('dist/checkers.html', 'w') as f: + run( + [Mypy(), Pytype(), Pyright()], + ['puzzles/' + f for f in sorted(os.listdir('puzzles'))], + typeshed, + f, + cache, + ) + + with open('cache.json', 'w') as f: + json.dump(cache, f) diff --git a/check_loaders.py b/check_loaders.py new file mode 100755 index 0000000..c1e841c --- /dev/null +++ b/check_loaders.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python3 +""" +# Comparison of type-safe loaders for Python + +When loading data with json.load or toml.load you get unstructured data. +There exist several libraries to help you you check that this unstructured data +matches your expected structure, without having to write a bunch of boilerplate +code. This Python script compares these libraries. + +* typedload (??) +* apischema (??) +* serdelicacy (??) +* perde (??) +* typical (??) +* cattrs (??) +* pyserde (??) + +For other comparisons see https://quackmark.push-f.com/. + +""" +from types import ModuleType +from typing import Optional, Literal +import typing, msgpack + +module: Optional[ModuleType] = None + +class NotApplicable(Exception): ... + +def perde(m, v, t): + try: + encoded = msgpack.dumps(v) + except TypeError: + raise NotApplicable + return m.msgpack.loads_as(t, encoded) + +LOADERS = { + 'typedload': lambda m, v, t: m.load(v, t, basiccast=False, failonextra=True), + 'apischema': lambda m, v, t: m.deserialize(t, v), + 'serdelicacy': lambda m, v, t: m.load(v, t), + 'perde': perde, + 'typic': lambda m, v, t: m.protocol(t).transmute(v), + 'cattrs': lambda m, v, t: m.structure(v, t), + 'pyserde': lambda m, v, t: m.from_dict(t, v), +} + +def load(v, t): + if module: + return LOADERS[module.__name__](module, v, t) + +Err = ... # denotes that a function raised an exception + +# booleans +load(True, bool) == True +load(False, bool) == False +load(None, bool) == Err +load(1, bool) == Err +load(0, bool) == Err +load('foo', bool) == Err +load('false', bool) == Err + +# strings, bytes and floats +load(b'test', str) == Err +load(b'test', bytes) == b'test' +load('inf', float) == Err +load('nan', float) == Err + +# byte arrays +load(b'test', bytearray) == bytearray(b'test') + +# literals + +YesOrNo = Literal['yes', 'no'] +load('yes', YesOrNo) == 'yes' +load('Yes', YesOrNo) == Err + +# optionals +load(None, Optional[int]) == None +load(3, Optional[int]) == 3 + +# enums +import enum + +class Answer(enum.Enum): + Yes = 1 + No = 2 + +class AnswerNum(enum.IntEnum): + Yes = 1 + No = 2 + +class Color(enum.IntFlag): + Red = enum.auto() + Green = enum.auto() + Blue = enum.auto() + +color = Color.Red | Color.Blue + +load('Yes', Answer) == Err +load(1, Answer) == Answer.Yes +load('Yes', AnswerNum) == Err +load(1, AnswerNum) == AnswerNum.Yes +load(int(color), Color) == color + +# newtype + +MyInt = typing.NewType('MyInt', int) + +load(3, MyInt) == MyInt(3) + +# collections +load([1,2,3], tuple[int, ...]) == (1,2,3) +load([1,2,3], set[int]) == {1,2,3} +load([1,2,3], frozenset[int]) == frozenset((1,2,3)) +load({1,2,3}, list[int]) == [1,2,3] +load((1,2,3), list[int]) == [1,2,3] +load([(1, 1)], dict[int, int]) == Err + +# named tuples + +class NT(typing.NamedTuple): + x: int = 0 + +load({'x': 1}, NT) == NT(1) +load({}, NT) == NT() +load({'y': 1}, NT) == Err + +# dataclasses +from dataclasses import dataclass, InitVar + +@dataclass +class DC: + x: int = 0 + i: InitVar[int] = 0 + + def __post_init__(self, i): + self.x += i + +load({'x': 1}, DC) == DC(1) +load({}, DC) == DC() +load({'y': 1}, DC) == Err +load({'i': 5}, DC) == DC(5) + +# TypedDict + +class TD(typing.TypedDict): + x: int + +load({'x': 1}, TD) == {'x': 1} +load({'x': 'x'}, TD) == Err +load({'y': 1}, TD) == Err + +# objects from strings +from pathlib import Path +from ipaddress import IPv4Address, IPv6Interface + +class Foo: + def __init__(self, x: int): + self.x = x + +load('test', Path) == Path('test') +load('127.0.0.1', IPv4Address) == IPv4Address('127.0.0.1') +load('::1.2.3.4/24', IPv6Interface) == IPv6Interface('::1.2.3.4/24') +load('foo', Foo) == Err + +from uuid import UUID +from decimal import Decimal +from datetime import datetime +my_id = '12345678123456781234567812345678' +now = datetime.now() + +load(my_id, UUID) == UUID(my_id) +load(1, Decimal) == Decimal(1) +load(str(now), datetime) == now + +# run method +import html, importlib, importlib.metadata, inspect, sys, re + +def run(): + global module + print('<meta charset=utf-8><pre style="font-family: \'Source Code Pro\', monospace; width: 90ch; margin: 0 auto;">') + lines = iter(inspect.getsourcelines(sys.modules[__name__])[0]) + while line := next(lines, None): + if line.startswith('# '): + print('<h2 style="margin:0; font-size: inherit">' + html.escape(line.strip()), '</h2>', end='') + elif line.startswith('* ') and '(' in line: + pkg_name = line.split()[1] + metadata = importlib.metadata.metadata(pkg_name) + link = '<a href="{}">{}</a>'.format(html.escape(metadata['Home-page']), pkg_name) + print(line.split('(')[0].replace(pkg_name, link) + f'</a>({metadata.get("Version")})') + elif line.startswith('load('): + tests = [line] + while (l := next(lines)).startswith('load('): + tests.append(l) + print('<table>') + print('<tr><td><td>#') + for loader in LOADERS: + print('<td style="padding: 0 0.5em; text-align: center">', loader) + print('</tr>') + for test in tests: + print('<tr>') + print('<td style="padding-right: 1em">', html.escape(test), '<td>#') + for loader in LOADERS: + module = importlib.import_module(loader) + load_call, expected = test.strip().split(' == ') + print('<td align=center>') + try: + result = eval(load_call) + if result == eval(expected): + print('✅') + else: + print(html.escape(repr(result))) + except NotApplicable: + print('N/A') + except Exception as e: + if expected == 'Err': + print('✅') + else: + print('<abbr title="{}">Error</abbr>'\ + .format(html.escape(str(e)))) + pass + print('</tr>') + print('</table>') + else: + print(re.sub("https://[a-z./-]+/", lambda url: f'<a href="{url.group()}">{url.group()}</a>', html.escape(line)), end='') + print('<pre>') + +if __name__ == '__main__': + run() diff --git a/deploy.sh b/deploy.sh new file mode 100755 index 0000000..8efd5ae --- /dev/null +++ b/deploy.sh @@ -0,0 +1,6 @@ +tar cf - -C dist . | ssh push-f.com 'sh -c " +set -x && +cd /var/www/quackmark.push-f.com && +rm -rf * && +tar xvf - +"' diff --git a/index.html b/index.html new file mode 100644 index 0000000..1a607c4 --- /dev/null +++ b/index.html @@ -0,0 +1,18 @@ +<!doctype html> +<html> +<meta name="viewport" content="width=device-width, initial-scale=1"> +<body style="max-width: 60ch; margin: 0 auto; font-size: 1.2em; text-align: center;"> +<h1>Quackmark</h1> + +<p style="line-height: 1.5em;"> +If it walks like a duck and it quacks like a duck, then it's probably a duck.<br> +But if you want to be safe, you have to check. +</p> + +Compare <a href="checkers.html">static type checkers</a> +and <a href="loaders.html">type‑safe loaders</a>. + +<p>Learn more on <a href="https://typing.readthedocs.io/en/latest/">typing.readthedocs.io</a>. + +<footer style="font-size: smaller; border-top: 1px solid #ccc; padding-top: 1em;"> +Created by <a href="https://push-f.com/">push-f</a>. diff --git a/puzzles/easy.py b/puzzles/easy.py new file mode 100644 index 0000000..6298e46 --- /dev/null +++ b/puzzles/easy.py @@ -0,0 +1 @@ +x: int = 'Python' # error diff --git a/puzzles/easy_reassign.py b/puzzles/easy_reassign.py new file mode 100644 index 0000000..aeed688 --- /dev/null +++ b/puzzles/easy_reassign.py @@ -0,0 +1,2 @@ +x = 3 +x = 'test' # maybe error diff --git a/puzzles/infer_append.py b/puzzles/infer_append.py new file mode 100644 index 0000000..3511512 --- /dev/null +++ b/puzzles/infer_append.py @@ -0,0 +1,2 @@ +lst = ["duck"] +lst.append(2022) # maybe error diff --git a/puzzles/infer_return_basic.py b/puzzles/infer_return_basic.py new file mode 100644 index 0000000..c2cbe7a --- /dev/null +++ b/puzzles/infer_return_basic.py @@ -0,0 +1,3 @@ +def f(): return "Python" + +def g(): return f() + 3 # error diff --git a/puzzles/tricky_enum.py b/puzzles/tricky_enum.py new file mode 100644 index 0000000..a743168 --- /dev/null +++ b/puzzles/tricky_enum.py @@ -0,0 +1,8 @@ +import enum +from typing import Literal + +class Color(enum.Enum): + Red = enum.auto() + Blue = enum.auto() + +c: Literal[Color.Red] = Color.Blue # error diff --git a/puzzles/tricky_recursive.py b/puzzles/tricky_recursive.py new file mode 100644 index 0000000..5de796c --- /dev/null +++ b/puzzles/tricky_recursive.py @@ -0,0 +1,3 @@ +from typing import Union + +Foo = list[Union['Foo', int]] diff --git a/puzzles/tricky_try_except.py b/puzzles/tricky_try_except.py new file mode 100644 index 0000000..de829c3 --- /dev/null +++ b/puzzles/tricky_try_except.py @@ -0,0 +1,8 @@ +def foo(): + file = None + try: + file = open('test.json') + except Exception: + pass + + file.name # error diff --git a/puzzles/tricky_typevar_constrained.py b/puzzles/tricky_typevar_constrained.py new file mode 100644 index 0000000..50ce10a --- /dev/null +++ b/puzzles/tricky_typevar_constrained.py @@ -0,0 +1,9 @@ +from typing import TypeVar, Any, Union + +V = TypeVar("V", str, bytes) + +def check_v(x: Union[V, list[V]]) -> V: + raise NotImplementedError() + +def foo(a: list[Any]): + check_v(a) diff --git a/pyrightconfig.json b/pyrightconfig.json new file mode 100644 index 0000000..401e7ff --- /dev/null +++ b/pyrightconfig.json @@ -0,0 +1,3 @@ +{ + "include": ["check_checkers.py", "check_loaders.py"] +} diff --git a/pyserde.py b/pyserde.py new file mode 100644 index 0000000..c1cba31 --- /dev/null +++ b/pyserde.py @@ -0,0 +1,3 @@ +from serde import * + +# having the package name different from the PyPI name is confusing diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..bb5abe7 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +apischema==0.17.5 +cattrs==1.10.0 +msgpack==1.0.0 +perde==0.0.2 +pyserde==0.7.0 +serdelicacy==0.18.1 +typedload==2.15 +typical==2.8.0 |