diff --git a/pgidocgen/docobj.py b/pgidocgen/docobj.py index 6338432d..8464f34a 100644 --- a/pgidocgen/docobj.py +++ b/pgidocgen/docobj.py @@ -338,6 +338,7 @@ def from_object(cls, repo, parent_fullname, sig): print("FIXME: signal: %s " % inst.fullname) signature_desc = "(FIXME pgi-docgen: arguments are missing here)" + inst.full_signature = fsig inst.signature_desc = signature_desc inst.info = DocInfo.from_object(repo, "signals", inst, current_type=parent_fullname) @@ -653,6 +654,7 @@ def from_object(cls, repo, parent_fullname, field_info): name = field_info.name field = cls(parent_fullname, name) + field.py_type = field_info.py_type field.type_desc = py_type_to_class_ref(field_info.py_type) field.readable = field_info.readable field.writable = field_info.writeable @@ -793,6 +795,7 @@ def render(docs): signature = get_signature_string(obj) assert signature + instance.full_signature = func_sig instance.signature_desc = signature_desc instance.signature = signature instance.info.desc = desc @@ -887,12 +890,13 @@ def from_object(cls, repo, obj): class Constant(BaseDocObject): - def __init__(self, parent_fullname, name, value): + def __init__(self, parent_fullname, name, value, py_type): self.fullname = parent_fullname + "." + name self.name = name self.info = None self.value = value + self.py_type = py_type @classmethod def from_object(cls, repo, parent_fullname, name, obj): @@ -904,7 +908,7 @@ def from_object(cls, repo, parent_fullname, name, obj): else: value = repr(obj) - instance = Constant(parent_fullname, name, value) + instance = Constant(parent_fullname, name, value, type(obj)) instance.info = DocInfo.from_object(repo, "all", instance) return instance diff --git a/pgidocgen/main.py b/pgidocgen/main.py index 1542c77b..7d178901 100644 --- a/pgidocgen/main.py +++ b/pgidocgen/main.py @@ -9,7 +9,12 @@ import argparse -from . import create, build, stubs, create_debian, update +from . import create, build, stubs, update + +try: + from . import create_debian +except ImportError: + create_debian = None def main(argv): @@ -19,7 +24,8 @@ def main(argv): create.add_parser(subparser) build.add_parser(subparser) stubs.add_parser(subparser) - create_debian.add_parser(subparser) + if create_debian: + create_debian.add_parser(subparser) update.add_parser(subparser) args = parser.parse_args(argv[1:]) diff --git a/pgidocgen/namespace.py b/pgidocgen/namespace.py index f563d7c9..8fcee573 100644 --- a/pgidocgen/namespace.py +++ b/pgidocgen/namespace.py @@ -545,6 +545,20 @@ def is_available(mod, name): return types, type_structs, shadow_map, instance_params +def _is_empty_text_node(child) -> bool: + return ( + child.nodeType == child.TEXT_NODE and + not child.data.strip() + ) + + +def _is_source_position_node(child) -> bool: + return ( + child.nodeType == child.ELEMENT_NODE and + child.tagName == 'source-position' + ) + + def _parse_private(dom, namespace): private = set() @@ -563,8 +577,15 @@ def is_empty(node): is_gtype_struct = bool(record.getAttribute("glib:is-gtype-struct-for")) is_private = record.getAttribute("name").endswith("Private") if is_private and not is_gtype_struct and is_empty(record): - name = namespace + "." + record.getAttribute("name") - private.add(name) + + meaningful_children = [ + c for c in record.childNodes + if not _is_empty_text_node(c) and not _is_source_position_node(c) + ] + + if not meaningful_children: + name = namespace + "." + record.getAttribute("name") + private.add(name) return private diff --git a/pgidocgen/stuboverrides.py b/pgidocgen/stuboverrides.py new file mode 100644 index 00000000..0b8986e1 --- /dev/null +++ b/pgidocgen/stuboverrides.py @@ -0,0 +1,80 @@ + +PROPERTY_STUB = """# TODO: Constrain T if possible. Builtins + GTypes might be sufficient? +T = typing.TypeVar('T') + +PropertyGetterFn = typing.Callable[[typing.Any], T] +PropertySetterFn = typing.Callable[[typing.Any, T], None] + + +class Property(typing.Generic[T]): + + name: typing.Optional[str] + type: typing.Type[T] + default: typing.Optional[T] + nick: str + blurb: str + flags: ParamFlags + minimum: typing.Optional[T] + maximum: typing.Optional[T] + + def __init__( + self, + getter: typing.Optional[PropertyGetterFn[T]] = None, + setter: typing.Optional[PropertySetterFn[T]] = None, + type: typing.Optional[typing.Type[T]] = None, + default: typing.Optional[T] = None, + nick: str = '', + blurb: str = '', + flags: ParamFlags = ParamFlags.READWRITE, + minimum: typing.Optional[T] = None, + maximum: typing.Optional[T] = None, + ) -> None: + ... + + def __get__(self, instance: typing.Any, klass: typing.Type) -> T: + ... + + def __set__(self, instance: typing.Any, value: T) -> None: + ... + + def __call__(self, PropertyGetterFn) -> Property[T]: + ... + + def getter(self: Property[T], fget: PropertyGetterFn) -> Property[T]: + ... + + def setter(self: Property[T], fset: PropertySetterFn) -> Property[T]: + ... + + # TODO: There's three Tuple variant structures that could be + # returned here, and they're all unpleasantly complicated. + def get_pspec_args(self) -> typing.Sequence[typing.Any]: + ... +""" + +FLAGS_TYPEVAR = """FlagsT = typing.TypeVar('FlagsT') +""" + + +#: Map of namespace to additional manually-written stub classes that +#: should be added to the top of the generated stub. +NAMESPACE_OVERRIDES = { + 'GLib': [ + FLAGS_TYPEVAR, + ], + 'GObject': [ + PROPERTY_STUB, + ], +} + +#: Map of class full name to attributes to be added to the class. +OBJECT_OVERRIDES = { + 'GLib.Flags': [ + 'def __or__(self: FlagsT, other: typing.Union[int, FlagsT]) -> FlagsT: ...', + 'def __and__(self: FlagsT, other: typing.Union[int, FlagsT]) -> FlagsT: ...', + 'def __xor__(self: FlagsT, other: typing.Union[int, FlagsT]) -> FlagsT: ...', + ], + 'GObject.Object': [ + 'Property = Property', + ], +} diff --git a/pgidocgen/stubs.py b/pgidocgen/stubs.py index ca903182..ebbad43e 100644 --- a/pgidocgen/stubs.py +++ b/pgidocgen/stubs.py @@ -5,13 +5,87 @@ # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. -import sys -import subprocess +import builtins +import io import os +import subprocess +import sys +import typing +from .docobj import Flags +from .namespace import get_namespace, set_cache_prefix_path from .repo import Repository +from .stuboverrides import NAMESPACE_OVERRIDES, OBJECT_OVERRIDES from .util import get_gir_files -from .namespace import get_namespace, set_cache_prefix_path + + +#: Mapping from namespace to a list of tuples of (class name, alias) +MODULE_ALIASES = { + 'GObject': [ + ('Object', 'GObject'), + ], +} + +#: Methods that are left out of the type stub *entirely*. In general, +#: these are methods that have Liskov violations and are not in common +#: use or are deprecated. +OMITTED_METHODS = { + ('Gio.Initable', 'newv'), + ('GObject.Object', 'newv'), +} + +#: Methods that have explicit `type: ignore` annotations added. These +#: are methods that violate Liskov by overriding superclass methods +#: with an incompatible signature. +LISKOV_VIOLATING_METHODS = { + ('Gio.Cancellable', 'connect'), + ('Gio.Cancellable', 'disconnect'), + ('Gio.MemoryOutputStream', 'get_data'), + ('Gio.MemoryOutputStream', 'steal_data'), + ('Gio.Socket', 'condition_wait'), + ('Gio.Socket', 'connect'), + ('Gio.Socket', 'receive_messages'), + ('Gio.Socket', 'send_messages'), + ('Gio.SocketClient', 'connect'), + ('Gio.SocketConnection', 'connect'), + ('GObject.TypeModule', 'use'), + ('Gtk.AccelGroup', 'connect'), + ('Gtk.AccelGroup', 'disconnect'), + ('Gtk.FileFilter', 'get_name'), + ('Gtk.RecentFilter', 'get_name'), + ('Gtk.Settings', 'install_property'), + ('Gtk.StyleContext', 'get_property'), + ('Gtk.StyleProperties', 'get_property'), + ('Gtk.StyleProperties', 'set_property'), + ('Gtk.ThemingEngine', 'get_property'), +} + + +# Initialising the current module to an invalid name +current_module: str = '-' +current_module_dependencies = set() + +# Set of type names that can be unwittingly shadowed and will cause +# trouble with the type checker. +shadowed_builtins = { + builtin for builtin in builtins.__dict__ + if isinstance(builtins.__dict__[builtin], type) +} + + +def strip_current_module(clsname: str) -> str: + # Strip GI module prefix from names in the current module + if clsname.startswith(current_module + "."): + return clsname[len(current_module + "."):] + return clsname + + +def add_dependent_module(module: str): + # FIXME: Find a better way to check this. This currently won't work for GI + # modules that aren't installed in the current venv. + import gi.repository + if hasattr(gi.repository, module): + current_module_dependencies.add(module) def add_parser(subparsers): @@ -29,6 +103,348 @@ def _main_many(target, namespaces): [sys.executable, sys.argv[0], "stubs", target, namespace]) +class StubClass: + + indent = " " * 4 + + def __init__(self, classname): + self.classname = classname + self.parents = [] + self.members = [] + self.functions = [] + self.ignore_type_errors = False + + def add_member(self, member): + self.members.append(member) + + def add_function(self, function, *, ignore_type_error=False): + if ignore_type_error: + function += " # type: ignore" + self.functions.append(function) + + @property + def class_line(self): + parents = ', '.join(strip_current_module(p) for p in self.parents) + line = 'class {}({}):'.format(self.classname, parents) + if self.ignore_type_errors: + line += ' # type: ignore' + return line + + def __str__(self): + + body_lines = sorted(self.members) + for function in self.functions: + body_lines.extend([''] + function.splitlines()) + if not body_lines: + body_lines = ['...'] + + return '\n'.join( + [self.class_line] + + [(self.indent + line).rstrip() for line in body_lines] + + [''] + ) + + +def topological_sort(class_nodes): + """ + Topologically sort a list of class nodes according to inheritance + + We use this as a workaround for typing stub order dependencies + (see python/mypy#6119). + + Code adapted from https://stackoverflow.com/a/43702281/2963 + """ + # Map class name to actual class node being sorted + name_node_map = {node.fullname: node for node in class_nodes} + # Map class name to parent classes + name_parents_map = {} + # Map class name to child classes + name_children_map = {name: [] for name in name_node_map} + + for name, node in name_node_map.items(): + in_module_bases = [b.name for b in node.bases if b.name in name_node_map] + name_parents_map[name] = in_module_bases + for base_name in in_module_bases: + name_children_map[base_name].append(name) + + # Establish bases + sorted_names = [n for n, preds in name_parents_map.items() if not preds] + + for name in sorted_names: + for child in name_children_map[name]: + name_parents_map[child].remove(name) + if not name_parents_map[child]: + # Mutating list that we're iterating over, so that this + # class gets removed from subsequent pending parents + # lists. + sorted_names.append(child) + + if len(sorted_names) < len(name_node_map): + raise RuntimeError("Couldn't establish a topological ordering") + + return [name_node_map[name] for name in sorted_names] + + +def get_typing_name(type_: typing.Any) -> str: + """Gives a name for a type that is suitable for a typing annotation. + + This is the Python annotation counterpart to funcsig.get_type_name(). + + int -> "int" + Gtk.Window -> "Gtk.Window" + [int] -> "Sequence[int]" + {int: Gtk.Button} -> "Mapping[int, Gtk.Button]" + """ + + if type_ is None: + return "" + elif type_ is type(None): # noqa: E721 + # As a weird corner-case, some non-introspectable base types + # actually give NoneType here. We treat them as very special. + return "typing.Any" + elif isinstance(type_, str): + return type_ + elif isinstance(type_, list): + assert len(type_) == 1 + return "typing.Sequence[%s]" % get_typing_name(type_[0]) + elif isinstance(type_, dict): + assert len(type_) == 1 + key, value = type_.popitem() + return "typing.Mapping[%s, %s]" % (get_typing_name(key), get_typing_name(value)) + elif type_.__module__ in ("__builtin__", "builtins"): + return "builtins.%s" % type_.__name__ + elif type_.__module__ == current_module: + # Strip GI module prefix from current-module types + return type_.__name__ + else: + add_dependent_module(type_.__module__) + return "%s.%s" % (type_.__module__, type_.__name__) + + +def arg_to_annotation(text): + """Convert a docstring argument to a Python annotation string + + This is the Python annotation counterpart to funcsig.arg_to_class_ref(). + """ + + if not text.startswith(("[", "{")) or not text.endswith(("}", "]")): + parts = text.split(" or ") + else: + parts = [text] + + out = [] + for p in parts: + if p.startswith("["): + out.append("typing.Sequence[%s]" % arg_to_annotation(p[1:-1])) + elif p.startswith("{"): + p = p[1:-1] + k, v = p.split(":", 1) + k = arg_to_annotation(k.strip()) + v = arg_to_annotation(v.strip()) + out.append("typing.Mapping[%s, %s]" % (k, v)) + elif p in shadowed_builtins: + out.append("builtins.%s" % p) + elif p: + class_str = strip_current_module(p) + if '.' in class_str: + add_dependent_module(class_str.split('.', 1)[0]) + out.append(class_str) + + if len(out) == 0: + return "typing.Any" + elif len(out) == 1: + return out[0] + elif len(out) == 2 and 'None' in out: + # This is not strictly necessary, but it's easier to read than the Union + out.pop(out.index('None')) + return f"typing.Optional[{out[0]}]" + else: + return f"typing.Union[{', '.join(out)}]" + + +def format_function_args( + function, *, accepts_kwargs: bool = False, include_arg_names=True) -> str: + """Format function arguments as a type annotation fragment""" + + arg_specs = [] + + if (function.is_method or function.is_vfunc) and not function.is_static: + arg_specs.append('self') + + for key, value in function.full_signature.args: + spec = "{key}: {type}" if include_arg_names else "{type}" + arg_specs.append(spec.format(key=key, type=arg_to_annotation(value))) + + if accepts_kwargs: + arg_specs.append('**kwargs') + + return ", ".join(arg_specs) + + +def format_function_returns(function) -> str: + """Format function return values as a type annotation fragment""" + + return_values = [] + for r in function.full_signature.res: + # We have either a (name, return type) pair, or just the return type. + type_ = r[1] if len(r) > 1 else r[0] + return_values.append(arg_to_annotation(type_)) + + # Additional handling for structuring return values + if len(return_values) == 0: + returns = 'None' + elif len(return_values) == 1: + returns = return_values[0] + else: + returns = f'typing.Tuple[{", ".join(return_values)}]' + + return returns + + +def format_function(function, *, accepts_kwargs=False) -> str: + # We require the full signature details for argument types, and fallback + # to the simplest possible function signature if it's not available. + if not getattr(function, 'full_signature', None): + print("Missing full signature for {}".format(function)) + return "def {}(*args, **kwargs): ...".format(function.name) + + return '{decorator}def {name}({args}) -> {returns}: ...'.format( + decorator="@staticmethod\n" if function.is_static else "", + name=function.name, + args=format_function_args(function, accepts_kwargs=accepts_kwargs), + returns=format_function_returns(function), + ) + + +def stub_class(cls) -> str: + stub = StubClass(cls.name) + + if hasattr(cls, 'bases'): + stub.parents = [b.name for b in cls.bases] + elif hasattr(cls, 'base'): + stub.parents = [cls.base] if cls.base else [] + # GObject.Flags and GObject.Enum types support Python integer methods + if isinstance(cls, Flags): + stub.parents.append('builtins.int') + + overrides = OBJECT_OVERRIDES.get(cls.fullname, []) + for member in overrides: + stub.add_member(member) + + # TODO: We don't handle: + # * child_properties: It's not clear how to annotate these + # * gtype_struct: I'm not sure what we'd use this for. + # * properties: It's not clear how to annotate these + # * signals: It's not clear how to annotate these + # * pyprops: These have no type information, and I'm not certain + # what they cover, etc. + + for f in getattr(cls, 'fields', []): + if not f.name.isidentifier(): + continue + + # Special case handling for weird annotations + if cls.fullname == 'GObject.Value' and f.name == 'data': + continue + + stub.add_member(format_field(f)) + + # The `values` attribute is available on enums and flags, and its + # type will always be the current class. + for v in getattr(cls, 'values', []): + if not v.name.isidentifier(): + continue + stub.add_member(f"{v.name} = ... # type: {cls.name}") + + for v in cls.methods + cls.vfuncs: + # Check our ignored methods list before doing anything else + if (cls.fullname, v.name) in OMITTED_METHODS: + continue + + # GObject-based constructors often violate Liskov substitution, + # leading to typing errors such as: + # Signature of "new" incompatible with supertype "Object" + # While we're waiting for a more general solution (see + # https://github.com/python/mypy/issues/1237) we'll just ignore + # the typing errors. + + # TODO: Extract constructor information from GIR and add it to + # docobj.Function to use here. + # FIXME: This list is overly broad and very fragile. + is_constructor = v.name in ( + 'new', + 'new_from_stock', + 'new_with_label', + 'new_with_mnemonic', + 'new_with_range', + ) + ignore_type_error = ( + is_constructor or + (cls.fullname, v.name) in LISKOV_VIOLATING_METHODS + ) + + # All GObject-inheriting classes should accept keyword + # arguments to set GObject properties in their default + # constructor. + inherits_gobject = any(p == 'GObject.Object' for p in stub.parents) + accepts_kwargs = is_constructor and inherits_gobject + + stub.add_function( + format_function(v, accepts_kwargs=accepts_kwargs), + ignore_type_error=ignore_type_error, + ) + + return str(stub) + + +def format_field(field) -> str: + return f"{field.name}: {get_typing_name(field.py_type)}" + + +def format_callback(function) -> str: + # We're formatting a callback signature here, not an actual function. + return "{name} = typing.Callable[[{args}], {returns}]".format( + name=function.name, + args=format_function_args(function, include_arg_names=False), + returns=format_function_returns(function), + ) + + +def format_imports(namespace, version): + ns = get_namespace(namespace, version) + for dep in ns.dependencies: + current_module_dependencies.add(dep[0]) + + import_lines = [ + "import builtins", + "import typing", + "", + *sorted(f"from gi.repository import {dep}" for dep in current_module_dependencies), + "" + ] + return "\n".join(import_lines) + + +def get_module_classlikes(module): + return ( + module.pyclasses + + topological_sort(module.classes) + + # From a GI point of view, structures are really just classes + # that can't inherit from anything. + module.structures + + # The semantics of a GI-mapped union type don't really map + # nicely to typing structures. It *is* a typing.Union[], but + # you can't add e.g., function signatures to one of those. + # + # In practical terms, treating these as classes seems best. + module.unions + + # `GFlag`s and `GEnum`s are slightly different to classes, but + # easily covered by the same code. + module.flags + + module.enums + ) + + def main(args): if not args.namespace: print("No namespace given") @@ -60,34 +476,64 @@ def get_to_write(dir_, namespace, version): build directory is found, skipping it and all its deps. """ - mods = [] + mods = set() if os.path.exists(os.path.join(dir_, namespace + ".pyi")): return mods - mods.append((namespace, version)) + mods.add((namespace, version)) ns = get_namespace(namespace, version) for dep in ns.dependencies: - mods.extend(get_to_write(dir_, *dep)) + mods |= get_to_write(dir_, *dep) return mods + # We track the module currently being stubbed for naming reasons. e.g., + # within GObject stubs, referring to "GObject.Object" is incorrect; we + # need the typing reference to be simply "Object". + global current_module + global current_module_dependencies + for namespace, version in get_to_write(args.target, namespace, version): mod = Repository(namespace, version).parse() module_path = os.path.join(args.target, namespace + ".pyi") - types = mod.classes + mod.flags + mod.enums + \ - mod.structures + mod.unions - with open(module_path, "w", encoding="utf-8") as h: - for cls in types: - h.write("""\ -class {}: ... -""".format(cls.name)) - - for func in mod.functions: - h.write("""\ -def {}(*args, **kwargs): ... -""".format(func.name)) - - for const in mod.constants: - h.write("""\ -{} = ... -""".format(const.name)) + + current_module = namespace + current_module_dependencies = set() + + h = io.StringIO() + + for override in NAMESPACE_OVERRIDES.get(namespace, []): + h.write(override) + h.write("\n\n") + + for cls in get_module_classlikes(mod): + h.write(stub_class(cls)) + h.write("\n\n") + + for fn in mod.callbacks: + h.write(format_callback(fn)) + h.write("\n") + + if mod.callbacks: + h.write("\n\n") + + for func in mod.functions: + h.write(format_function(func)) + # Extra \n because the signature lacks one. + h.write("\n\n\n") + + for const in mod.constants: + h.write(format_field(const)) + h.write("\n") + + aliases = MODULE_ALIASES.get(namespace, []) + if aliases: + h.write("\n\n") + for (real, alias) in aliases: + h.write('{} = {}\n'.format(alias, real)) + + with open(module_path, "w", encoding="utf-8") as f: + # Start by handling all required imports for type annotations + f.write(format_imports(namespace, version)) + f.write("\n\n") + f.write(h.getvalue())