From a575e1fdfb6f1c47f63293848dda92107a396a86 Mon Sep 17 00:00:00 2001 From: Christian Findlay <16697547+MelbourneDeveloper@users.noreply.github.com> Date: Tue, 23 Jun 2026 08:34:42 +1000 Subject: [PATCH 01/12] Fix conformance test --- conformance/conformance_status.csv | 162 +++--- .../basilisk-cli/tests/conformance_tests.rs | 546 +++++++----------- 2 files changed, 302 insertions(+), 406 deletions(-) diff --git a/conformance/conformance_status.csv b/conformance/conformance_status.csv index 56baea8d..28787697 100644 --- a/conformance/conformance_status.csv +++ b/conformance/conformance_status.csv @@ -6,139 +6,139 @@ basilisk_rules,file,category,status,caught,missed,false_positives ,_protocols_modules2.py,,PASS,0,0,0 ,_qualifiers_final_annotation_1.py,,PASS,0,0,0 ,_qualifiers_final_annotation_2.py,,PASS,0,0,0 -BSK-E0048,aliases_explicit.py,aliases,PASS,21,0,0 -BSK-E0047|BSK-E0048|BSK-E0092,aliases_implicit.py,aliases,PASS,22,0,0 +BSK-E0002|BSK-E0048,aliases_explicit.py,aliases,FAIL,21,0,2 +BSK-E0002|BSK-E0047|BSK-E0048|BSK-E0092,aliases_implicit.py,aliases,FAIL,22,0,3 BSK-E0014|BSK-E0050,aliases_newtype.py,aliases,PASS,14,0,0 BSK-E0014|BSK-E0104,aliases_recursive.py,aliases,PASS,11,0,0 -BSK-E0057|BSK-E0149,aliases_type_statement.py,aliases,PASS,24,0,0 -BSK-E0151,aliases_typealiastype.py,aliases,PASS,22,0,0 +BSK-E0002|BSK-E0057|BSK-E0149,aliases_type_statement.py,aliases,FAIL,24,1,1 +BSK-E0005|BSK-E0151,aliases_typealiastype.py,aliases,FAIL,22,0,1 BSK-E0107,aliases_variance.py,aliases,PASS,4,0,0 ,annotations_coroutines.py,annotations,PASS,0,0,0 -BSK-E0047,annotations_forward_refs.py,annotations,PASS,19,0,0 +BSK-E0002|BSK-E0047,annotations_forward_refs.py,annotations,FAIL,19,0,1 BSK-E0120|BSK-E0131,annotations_generators.py,annotations,PASS,10,0,0 ,annotations_methods.py,annotations,PASS,0,0,0 -BSK-E0024|BSK-E0047|BSK-E0048,annotations_typeexpr.py,annotations,PASS,15,0,0 -BSK-E0014|BSK-E0015|BSK-E0122|BSK-E0140,callables_annotation.py,callables,PASS,16,0,0 +BSK-E0002|BSK-E0024|BSK-E0047|BSK-E0048,annotations_typeexpr.py,annotations,FAIL,15,0,2 +BSK-E0002|BSK-E0014|BSK-E0015|BSK-E0122|BSK-E0140,callables_annotation.py,callables,FAIL,16,0,2 BSK-E0012|BSK-E0140|BSK-E0141,callables_kwargs.py,callables,PASS,13,0,0 -BSK-E0140,callables_protocol.py,callables,PASS,17,0,0 -BSK-E0014|BSK-E0136,callables_subtyping.py,callables,PASS,32,0,0 -BSK-E0014|BSK-E0036|BSK-E0044|BSK-E0121,classes_classvar.py,classes,PASS,17,0,0 -,classes_override.py,classes,PASS,0,0,0 +BSK-E0001|BSK-E0002|BSK-E0140,callables_protocol.py,callables,FAIL,17,0,6 +BSK-E0002|BSK-E0014|BSK-E0136,callables_subtyping.py,callables,FAIL,32,0,9 +BSK-E0001|BSK-E0002|BSK-E0011|BSK-E0014|BSK-E0036|BSK-E0044|BSK-E0121,classes_classvar.py,classes,FAIL,17,0,3 +BSK-E0002,classes_override.py,classes,FAIL,0,5,2 BSK-E0111|BSK-E0128,constructors_call_init.py,constructors,PASS,5,0,0 -BSK-E0041,constructors_call_metaclass.py,constructors,PASS,2,0,0 -BSK-E0074,constructors_call_new.py,constructors,PASS,2,0,0 -BSK-E0144,constructors_call_type.py,constructors,PASS,8,0,0 -BSK-E0153,constructors_callable.py,constructors,PASS,12,0,0 +BSK-E0002|BSK-E0004|BSK-E0011|BSK-E0041,constructors_call_metaclass.py,constructors,FAIL,2,0,4 +BSK-E0002|BSK-E0004|BSK-E0011|BSK-E0074,constructors_call_new.py,constructors,FAIL,2,0,11 +BSK-E0002|BSK-E0004|BSK-E0144,constructors_call_type.py,constructors,FAIL,8,0,8 +BSK-E0004|BSK-E0153,constructors_callable.py,constructors,FAIL,12,0,1 ,constructors_consistency.py,constructors,PASS,0,0,0 ,dataclasses_descriptors.py,dataclasses,PASS,0,0,0 BSK-E0054,dataclasses_final.py,dataclasses,PASS,5,0,0 BSK-E0052,dataclasses_frozen.py,dataclasses,PASS,2,0,0 -BSK-E0063,dataclasses_hash.py,dataclasses,PASS,4,0,0 +BSK-E0001|BSK-E0063,dataclasses_hash.py,dataclasses,FAIL,4,0,1 BSK-E0017,dataclasses_inheritance.py,dataclasses,PASS,2,0,0 BSK-E0069,dataclasses_kwonly.py,dataclasses,PASS,3,0,0 -BSK-E0059,dataclasses_match_args.py,dataclasses,PASS,1,0,0 +BSK-E0005|BSK-E0059,dataclasses_match_args.py,dataclasses,FAIL,1,0,1 BSK-E0060,dataclasses_order.py,dataclasses,PASS,1,0,0 BSK-E0095,dataclasses_postinit.py,dataclasses,PASS,4,0,0 -BSK-E0108,dataclasses_slots.py,dataclasses,PASS,4,0,0 +BSK-E0002|BSK-E0005|BSK-E0108,dataclasses_slots.py,dataclasses,FAIL,4,1,5 BSK-E0142,dataclasses_transform_class.py,dataclasses,PASS,6,0,0 BSK-E0142,dataclasses_transform_converter.py,dataclasses,PASS,9,0,0 BSK-E0069,dataclasses_transform_field.py,dataclasses,PASS,2,0,0 BSK-E0014|BSK-E0052|BSK-E0060|BSK-E0069|BSK-E0111,dataclasses_transform_func.py,dataclasses,PASS,5,0,0 -BSK-E0138,dataclasses_transform_meta.py,dataclasses,PASS,6,0,0 -BSK-E0041|BSK-E0069|BSK-E0096,dataclasses_usage.py,dataclasses,PASS,8,0,0 -BSK-E0039|BSK-E0053,directives_assert_type.py,directives,PASS,7,0,0 +BSK-E0004|BSK-E0138,dataclasses_transform_meta.py,dataclasses,FAIL,6,0,1 +BSK-E0002|BSK-E0005|BSK-E0041|BSK-E0069|BSK-E0096,dataclasses_usage.py,dataclasses,FAIL,8,3,3 +BSK-E0002|BSK-E0039|BSK-E0053,directives_assert_type.py,directives,FAIL,7,0,2 BSK-E0031,directives_cast.py,directives,PASS,3,0,0 -BSK-E0115,directives_deprecated.py,directives,PASS,12,0,0 -BSK-E0012|BSK-E0013|BSK-E0041,directives_no_type_check.py,directives,PASS,1,0,0 -BSK-E0033,directives_reveal_type.py,directives,PASS,2,0,0 +BSK-E0010|BSK-E0115,directives_deprecated.py,directives,FAIL,12,0,1 +BSK-E0011|BSK-E0012|BSK-E0013|BSK-E0041,directives_no_type_check.py,directives,FAIL,1,0,1 +BSK-E0002|BSK-E0033,directives_reveal_type.py,directives,FAIL,2,0,1 ,directives_type_checking.py,directives,PASS,0,0,0 ,directives_type_ignore.py,directives,PASS,0,0,0 ,directives_type_ignore_file1.py,directives,PASS,0,0,0 BSK-E0014,directives_type_ignore_file2.py,directives,PASS,1,0,0 -BSK-E0150,directives_version_platform.py,directives,PASS,3,0,0 -BSK-E0040,enums_behaviors.py,enums,PASS,1,0,0 +BSK-E0002|BSK-E0150,directives_version_platform.py,directives,FAIL,3,0,1 +BSK-E0040,enums_behaviors.py,enums,FAIL,1,2,0 ,enums_definition.py,enums,PASS,0,0,0 -BSK-E0061,enums_expansion.py,enums,PASS,1,0,0 -,enums_member_names.py,enums,PASS,0,0,0 -BSK-E0066,enums_member_values.py,enums,PASS,2,0,0 -BSK-E0046|BSK-E0067,enums_members.py,enums,PASS,7,0,0 -,exceptions_context_managers.py,exceptions,PASS,0,0,0 -BSK-E0027|BSK-E0047|BSK-E0092|BSK-E0132|BSK-E0134,generics_base_class.py,generics,PASS,7,0,0 -BSK-E0026|BSK-E0027|BSK-E0043|BSK-E0148,generics_basic.py,generics,PASS,13,0,0 -BSK-E0030|BSK-E0091|BSK-E0092,generics_defaults.py,generics,PASS,5,0,0 -BSK-E0102|BSK-E0128|BSK-E0130,generics_defaults_referential.py,generics,PASS,7,0,0 -BSK-E0014|BSK-E0092,generics_defaults_specialization.py,generics,PASS,3,0,0 +BSK-E0002|BSK-E0061,enums_expansion.py,enums,FAIL,1,0,4 +BSK-E0002,enums_member_names.py,enums,FAIL,0,0,2 +BSK-E0002|BSK-E0010|BSK-E0066,enums_member_values.py,enums,FAIL,2,0,5 +BSK-E0002|BSK-E0010|BSK-E0046|BSK-E0067,enums_members.py,enums,FAIL,7,0,2 +BSK-E0001|BSK-E0011,exceptions_context_managers.py,exceptions,FAIL,0,0,6 +BSK-E0002|BSK-E0027|BSK-E0047|BSK-E0092|BSK-E0132|BSK-E0134,generics_base_class.py,generics,FAIL,7,0,3 +BSK-E0002|BSK-E0026|BSK-E0027|BSK-E0043|BSK-E0148,generics_basic.py,generics,FAIL,13,0,3 +BSK-E0002|BSK-E0030|BSK-E0091|BSK-E0092,generics_defaults.py,generics,FAIL,5,1,6 +BSK-E0002|BSK-E0102|BSK-E0128|BSK-E0130,generics_defaults_referential.py,generics,FAIL,7,0,1 +BSK-E0002|BSK-E0014|BSK-E0092,generics_defaults_specialization.py,generics,FAIL,3,0,1 BSK-E0026|BSK-E0047,generics_paramspec_basic.py,generics,PASS,7,0,0 BSK-E0122,generics_paramspec_components.py,generics,PASS,16,0,0 BSK-E0122,generics_paramspec_semantics.py,generics,PASS,9,0,0 BSK-E0092|BSK-E0122,generics_paramspec_specialization.py,generics,PASS,5,0,0 -BSK-E0117|BSK-E0130,generics_scoping.py,generics,PASS,10,0,0 +BSK-E0117|BSK-E0130,generics_scoping.py,generics,FAIL,10,4,0 ,generics_self_advanced.py,generics,PASS,0,0,0 BSK-E0075,generics_self_attributes.py,generics,PASS,2,0,0 BSK-E0078,generics_self_basic.py,generics,PASS,3,0,0 BSK-E0077,generics_self_protocols.py,generics,PASS,2,0,0 -BSK-E0078|BSK-E0094,generics_self_usage.py,generics,PASS,11,0,0 +BSK-E0025|BSK-E0078|BSK-E0094,generics_self_usage.py,generics,FAIL,11,0,1 BSK-E0042,generics_syntax_compatibility.py,generics,PASS,2,0,0 -BSK-E0043|BSK-E0089|BSK-E0105,generics_syntax_declarations.py,generics,PASS,10,0,0 -BSK-E0055|BSK-E0130,generics_syntax_infer_variance.py,generics,PASS,18,0,0 -BSK-E0149,generics_syntax_scoping.py,generics,PASS,7,0,0 +BSK-E0002|BSK-E0043|BSK-E0089|BSK-E0105,generics_syntax_declarations.py,generics,FAIL,10,0,1 +BSK-E0002|BSK-E0055|BSK-E0130,generics_syntax_infer_variance.py,generics,FAIL,18,0,4 +BSK-E0002|BSK-E0005|BSK-E0149,generics_syntax_scoping.py,generics,FAIL,7,0,9 BSK-E0111|BSK-E0125,generics_type_erasure.py,generics,PASS,7,0,0 -BSK-E0085,generics_typevartuple_args.py,generics,PASS,8,0,0 -BSK-E0055|BSK-E0083|BSK-E0084|BSK-E0085|BSK-E0086,generics_typevartuple_basic.py,generics,PASS,13,0,0 -BSK-E0082,generics_typevartuple_callable.py,generics,PASS,1,0,0 -,generics_typevartuple_concat.py,generics,PASS,0,0,0 -,generics_typevartuple_overloads.py,generics,PASS,0,0,0 -BSK-E0086|BSK-E0130|BSK-E0139,generics_typevartuple_specialization.py,generics,PASS,6,0,0 -BSK-E0081,generics_typevartuple_unpack.py,generics,PASS,1,0,0 -BSK-E0026|BSK-E0055|BSK-E0080,generics_upper_bound.py,generics,PASS,3,0,0 +BSK-E0002|BSK-E0085,generics_typevartuple_args.py,generics,FAIL,8,0,1 +BSK-E0002|BSK-E0055|BSK-E0083|BSK-E0084|BSK-E0085|BSK-E0086,generics_typevartuple_basic.py,generics,FAIL,13,1,3 +BSK-E0002|BSK-E0082,generics_typevartuple_callable.py,generics,FAIL,1,0,1 +BSK-E0002,generics_typevartuple_concat.py,generics,FAIL,0,0,2 +BSK-E0002,generics_typevartuple_overloads.py,generics,FAIL,0,0,1 +BSK-E0002|BSK-E0086|BSK-E0130|BSK-E0139,generics_typevartuple_specialization.py,generics,FAIL,6,0,11 +BSK-E0002|BSK-E0081,generics_typevartuple_unpack.py,generics,FAIL,1,0,2 +BSK-E0002|BSK-E0026|BSK-E0055|BSK-E0080,generics_upper_bound.py,generics,FAIL,3,1,1 BSK-E0055|BSK-E0107,generics_variance.py,generics,PASS,9,0,0 -BSK-E0130,generics_variance_inference.py,generics,PASS,23,0,0 +BSK-E0002|BSK-E0130,generics_variance_inference.py,generics,FAIL,23,0,3 BSK-E0071,historical_positional.py,historical,PASS,4,0,0 -BSK-E0127,literals_interactions.py,literals,PASS,4,0,0 -BSK-E0014|BSK-E0051|BSK-E0109|BSK-E0126|BSK-E0129,literals_literalstring.py,literals,PASS,9,0,0 -BSK-E0014|BSK-E0051|BSK-E0068|BSK-E0117|BSK-E0129|BSK-E0130,literals_parameterizations.py,literals,PASS,17,0,0 -BSK-E0014|BSK-E0129,literals_semantics.py,literals,PASS,4,0,0 +BSK-E0002|BSK-E0127,literals_interactions.py,literals,FAIL,4,0,2 +BSK-E0002|BSK-E0014|BSK-E0051|BSK-E0109|BSK-E0126|BSK-E0129,literals_literalstring.py,literals,FAIL,9,0,9 +BSK-E0002|BSK-E0014|BSK-E0051|BSK-E0068|BSK-E0117|BSK-E0129|BSK-E0130,literals_parameterizations.py,literals,FAIL,17,0,1 +BSK-E0002|BSK-E0014|BSK-E0129,literals_semantics.py,literals,FAIL,4,0,4 BSK-E0111|BSK-E0116|BSK-E0143,namedtuples_define_class.py,namedtuples,PASS,14,0,0 BSK-E0041|BSK-E0064,namedtuples_define_functional.py,namedtuples,PASS,9,0,0 BSK-E0073,namedtuples_type_compat.py,namedtuples,PASS,2,0,0 BSK-E0143,namedtuples_usage.py,namedtuples,PASS,8,0,0 -BSK-E0101|BSK-E0112,narrowing_typeguard.py,narrowing,PASS,4,0,0 -BSK-E0101|BSK-E0112|BSK-E0113,narrowing_typeis.py,narrowing,PASS,9,0,0 +BSK-E0002|BSK-E0011|BSK-E0101|BSK-E0112,narrowing_typeguard.py,narrowing,FAIL,4,0,2 +BSK-E0002|BSK-E0011|BSK-E0101|BSK-E0112|BSK-E0113,narrowing_typeis.py,narrowing,FAIL,9,0,2 BSK-E0072,overloads_basic.py,overloads,PASS,1,0,0 -,overloads_consistency.py,overloads,PASS,0,0,0 -BSK-E0020|BSK-E0034,overloads_definitions.py,overloads,PASS,0,0,0 +,overloads_consistency.py,overloads,FAIL,0,2,0 +BSK-E0020|BSK-E0034,overloads_definitions.py,overloads,FAIL,0,7,0 BSK-E0012|BSK-E0041|BSK-E0076,overloads_evaluation.py,overloads,PASS,4,0,0 -BSK-E0099|BSK-E0146,protocols_class_objects.py,protocols,PASS,8,0,0 -BSK-E0036|BSK-E0097|BSK-E0121,protocols_definition.py,protocols,PASS,21,0,0 -BSK-E0099|BSK-E0118|BSK-E0123|BSK-E0124,protocols_explicit.py,protocols,PASS,6,0,0 -BSK-E0130|BSK-E0137,protocols_generic.py,protocols,PASS,9,0,0 -BSK-E0098|BSK-E0099|BSK-E0121,protocols_merging.py,protocols,PASS,6,0,0 -BSK-E0079,protocols_modules.py,protocols,PASS,3,0,0 +BSK-E0004|BSK-E0099|BSK-E0146,protocols_class_objects.py,protocols,FAIL,8,0,1 +BSK-E0001|BSK-E0011|BSK-E0036|BSK-E0097|BSK-E0121,protocols_definition.py,protocols,FAIL,21,0,8 +BSK-E0002|BSK-E0099|BSK-E0118|BSK-E0123|BSK-E0124,protocols_explicit.py,protocols,FAIL,6,0,3 +BSK-E0002|BSK-E0130|BSK-E0137,protocols_generic.py,protocols,FAIL,9,0,3 +BSK-E0002|BSK-E0098|BSK-E0099|BSK-E0121,protocols_merging.py,protocols,FAIL,6,0,1 +BSK-E0010|BSK-E0079,protocols_modules.py,protocols,FAIL,3,0,2 ,protocols_recursive.py,protocols,PASS,0,0,0 -BSK-E0114|BSK-E0119,protocols_runtime_checkable.py,protocols,PASS,6,0,0 +BSK-E0002|BSK-E0114|BSK-E0119,protocols_runtime_checkable.py,protocols,FAIL,6,0,3 ,protocols_self.py,protocols,PASS,0,0,0 -BSK-E0014|BSK-E0099,protocols_subtyping.py,protocols,PASS,7,0,0 +BSK-E0002|BSK-E0014|BSK-E0099,protocols_subtyping.py,protocols,FAIL,7,0,4 BSK-E0110|BSK-E0133,protocols_variance.py,protocols,PASS,5,0,0 BSK-E0045|BSK-E0058,qualifiers_annotated.py,qualifiers,PASS,20,0,0 -BSK-E0014|BSK-E0041|BSK-E0044|BSK-E0054|BSK-E0064,qualifiers_final_annotation.py,qualifiers,PASS,26,0,0 -BSK-E0034,qualifiers_final_decorator.py,qualifiers,PASS,3,0,0 -,specialtypes_any.py,specialtypes,PASS,0,0,0 -BSK-E0062|BSK-E0070,specialtypes_never.py,specialtypes,PASS,3,0,0 -BSK-E0012|BSK-E0014,specialtypes_none.py,specialtypes,PASS,3,0,0 -BSK-E0065,specialtypes_promotions.py,specialtypes,PASS,1,0,0 -BSK-E0015|BSK-E0092|BSK-E0145,specialtypes_type.py,specialtypes,PASS,9,0,0 -BSK-E0014|BSK-E0045|BSK-E0147,tuples_type_compat.py,tuples,PASS,16,0,0 -BSK-E0014|BSK-E0049|BSK-E0090,tuples_type_form.py,tuples,PASS,11,0,0 -BSK-E0049,tuples_unpacked.py,tuples,PASS,4,0,0 +BSK-E0010|BSK-E0014|BSK-E0041|BSK-E0044|BSK-E0054|BSK-E0064,qualifiers_final_annotation.py,qualifiers,FAIL,26,0,2 +BSK-E0010|BSK-E0025|BSK-E0034,qualifiers_final_decorator.py,qualifiers,FAIL,3,3,1 +BSK-E0001|BSK-E0002,specialtypes_any.py,specialtypes,FAIL,0,0,2 +BSK-E0002|BSK-E0062|BSK-E0070,specialtypes_never.py,specialtypes,FAIL,3,0,2 +BSK-E0002|BSK-E0012|BSK-E0014,specialtypes_none.py,specialtypes,FAIL,3,0,1 +BSK-E0002|BSK-E0065,specialtypes_promotions.py,specialtypes,FAIL,1,0,1 +BSK-E0002|BSK-E0015|BSK-E0092|BSK-E0145,specialtypes_type.py,specialtypes,FAIL,9,0,6 +BSK-E0002|BSK-E0014|BSK-E0023|BSK-E0045|BSK-E0147,tuples_type_compat.py,tuples,FAIL,16,0,11 +BSK-E0011|BSK-E0014|BSK-E0049|BSK-E0090,tuples_type_form.py,tuples,FAIL,11,0,1 +BSK-E0002|BSK-E0049,tuples_unpacked.py,tuples,FAIL,4,0,3 BSK-E0037,typeddicts_alt_syntax.py,typeddicts,PASS,4,0,0 -BSK-E0029|BSK-E0032,typeddicts_class_syntax.py,typeddicts,PASS,3,0,0 -BSK-E0014|BSK-E0093|BSK-E0141|BSK-E0156,typeddicts_extra_items.py,typeddicts,PASS,23,0,0 +BSK-E0002|BSK-E0029|BSK-E0032,typeddicts_class_syntax.py,typeddicts,PASS,3,0,0 +BSK-E0002|BSK-E0014|BSK-E0093|BSK-E0141|BSK-E0156,typeddicts_extra_items.py,typeddicts,FAIL,23,5,2 ,typeddicts_final.py,typeddicts,PASS,0,0,0 BSK-E0038,typeddicts_inheritance.py,typeddicts,PASS,2,0,0 -BSK-E0093,typeddicts_operations.py,typeddicts,PASS,11,0,0 +BSK-E0002|BSK-E0093,typeddicts_operations.py,typeddicts,FAIL,11,0,1 BSK-E0056,typeddicts_readonly.py,typeddicts,PASS,6,0,0 -BSK-E0014,typeddicts_readonly_consistency.py,typeddicts,PASS,7,0,0 -BSK-E0038|BSK-E0056|BSK-E0093,typeddicts_readonly_inheritance.py,typeddicts,PASS,11,0,0 +BSK-E0002|BSK-E0014,typeddicts_readonly_consistency.py,typeddicts,FAIL,7,0,2 +BSK-E0003|BSK-E0038|BSK-E0056|BSK-E0093,typeddicts_readonly_inheritance.py,typeddicts,PASS,11,0,0 BSK-E0056,typeddicts_readonly_kwargs.py,typeddicts,PASS,1,0,0 BSK-E0056|BSK-E0093,typeddicts_readonly_update.py,typeddicts,PASS,1,0,0 BSK-E0035,typeddicts_required.py,typeddicts,PASS,4,0,0 diff --git a/crates/basilisk-cli/tests/conformance_tests.rs b/crates/basilisk-cli/tests/conformance_tests.rs index e9439c98..c631db14 100644 --- a/crates/basilisk-cli/tests/conformance_tests.rs +++ b/crates/basilisk-cli/tests/conformance_tests.rs @@ -7,10 +7,14 @@ clippy::panic, clippy::as_conversions )] -//! PEP conformance test harness. +//! PEP conformance test harness — faithful port of the **official** scoring. //! -//! Runs every `.py` file from the `python/typing` conformance suite against -//! the Basilisk pipeline and prints a scored report. +//! Runs every `.py` file from the `python/typing` conformance suite against the +//! Basilisk pipeline and scores it with the **exact** algorithm the upstream +//! `python/typing` tool uses (`conformance/src/main.py`, +//! `get_expected_errors` + `diff_expected_errors`). There are **no +//! Basilisk-specific scoring rules** and **no excluded diagnostic codes** — a +//! file passes iff the official `errors_diff` is empty. //! //! ## Prerequisites //! @@ -21,25 +25,38 @@ //! make conformance FETCH=1 # force re-download + run //! ``` //! -//! ## Annotation format (from the python/typing spec) +//! ## Annotation format (verbatim from `python/typing`) //! -//! Each line in a conformance file may carry one of these trailing comments: +//! For every source line, the upstream tool first strips the comment +//! (`line.split('#')[0]`); if nothing but whitespace precedes the first `#`, +//! the whole line is **ignored** (this is how commented-out cases are skipped). +//! Otherwise it scans the *raw* line for these markers: //! -//! | Annotation | Meaning | -//! |-------------|-------------------------------------------------------| -//! | `# E` | A type error MUST be reported on this line | -//! | `# E?` | A type error MAY be reported (optional) | -//! | `# E[tag]` | Exactly one line sharing this tag must error | -//! | `# E[tag+]` | One or more lines sharing this tag may error | +//! | Marker | Regex (upstream) | Meaning | +//! |-------------|-----------------------------|------------------------------------------| +//! | `# E` | `# E\??(?=:\|$\| )` | An error MUST be reported on this line | +//! | `# E?` | `# E\??(?=:\|$\| )` | An error MAY be reported (optional) | +//! | `# E[tag]` | `# E\[([^\]]+)\]` | Exactly one line in the group must error | +//! | `# E[tag+]` | `# E\[([^\]]+)\]` | One or more lines in the group may error | //! -//! Anything after the annotation (e.g. `# E: some explanation`) is ignored. +//! The `(?=:|$| )` lookahead means the marker must be followed by `:`, end of +//! line, or a space — so `# Exception` and `# E0001` do **not** match. //! -//! ## Scoring +//! ## Scoring (official `diff_expected_errors`) //! -//! A file **passes** when every required `# E` line has at least one -//! diagnostic from Basilisk. Optional `# E?` lines and tag groups are -//! tracked but do not affect pass/fail. False positives (Basilisk reports -//! errors on unmarked lines) are counted separately for visibility. +//! A file's `errors_diff` collects three kinds of discrepancy: +//! +//! 1. **Missed required** — a `# E` line where Basilisk reported no error. +//! 2. **Missed tag group** — a `# E[tag]` group where no line errored (or, for +//! the non-`+` form, more than one line errored). +//! 3. **Unexpected error** — Basilisk reported an error on a line carrying +//! neither a `# E`/`# E?` marker nor a satisfied tag-group line. These are +//! the **false positives**, and — unlike the previous in-repo harness — they +//! **fail the file**, exactly as upstream does +//! (`conformance_automated = "Fail" if errors_diff.strip() else "Pass"`). +//! +//! Every `Severity::Error` diagnostic Basilisk emits is counted; **no code is +//! excluded**. This is the same number a user sees from `basilisk check`. //! //! ## Skip behaviour //! @@ -47,7 +64,7 @@ //! and exits with success so that CI on a fresh checkout does not break. use std::{ - collections::{BTreeMap, HashMap, HashSet}, + collections::{BTreeMap, BTreeSet, HashMap, HashSet}, fs, path::Path, }; @@ -57,319 +74,225 @@ use basilisk_parser::parse_file; use basilisk_resolver::resolve; // --------------------------------------------------------------------------- -// Annotation parsing +// Expected-error parsing — faithful port of `get_expected_errors` (main.py) // --------------------------------------------------------------------------- -#[derive(Debug, Clone, PartialEq, Eq)] -enum Annotation { - /// Error must be reported on this line. - Required, - /// Error may optionally be reported. - Optional, - /// Tagged group: exactly one line with this tag must error. - TaggedExact(String), - /// Tagged group: one or more lines with this tag may error. - TaggedMulti(String), +/// Expected-error annotations parsed from one conformance file. +struct Expected { + /// 1-based line → (required count, optional count). A line is present iff + /// it carries at least one `# E` or `# E?` marker. + lines: HashMap, + /// tag → (line numbers carrying the tag, `allow_multiple`). + groups: HashMap, bool)>, } -/// Parse a single source line and return the annotation, if any. -fn parse_annotation(line: &str) -> Option { - // Skip full-line comments — a `# E` inside a comment is not a real - // annotation because the line contains no executable code for the - // checker to flag. - if line.trim_start().starts_with('#') { - // Allow lines that are ONLY a `# E` marker (pure annotation lines - // are used in some conformance files), but skip lines where real - // code has been commented out with a trailing `# E`. - let trimmed = line.trim(); - // Pure annotation: `# E`, `# E: explanation`, `# E[tag]`, `# E?` - let after_hash = trimmed.strip_prefix('#')?.trim_start(); - if !after_hash.starts_with('E') { - return None; - } - } - - // Find the last `# E` marker on the line. - let marker = line.rfind("# E")?; - let rest = line[marker + 2..].trim(); // everything after "#" - - if rest.starts_with("E?") { - return Some(Annotation::Optional); - } +/// Apply the upstream `(?=:|$| )` lookahead: the char immediately after the +/// marker must be `:`, a space, or the end of the line. +fn lookahead_ok(after: &str) -> bool { + matches!(after.chars().next(), None | Some(':') | Some(' ')) +} - if rest.starts_with("E[") { - let inner = rest.strip_prefix("E[")?; - // Find closing ] — ignore anything after it (description text) - if let Some(close) = inner.find(']') { - let tag = &inner[..close]; - if tag.ends_with('+') { - return Some(Annotation::TaggedMulti( - tag.trim_end_matches('+').to_owned(), - )); +/// Count `# E` (required) and `# E?` (optional) markers on a line, matching the +/// upstream regex `# E\??(?=:|$| )` exactly. +fn count_markers(line: &str) -> (u32, u32) { + let (mut required, mut optional) = (0u32, 0u32); + let mut search_from = 0usize; + while let Some(rel) = line[search_from..].find("# E") { + let idx = search_from + rel; + let after = &line[idx + 3..]; // chars after "# E" + if let Some(rest) = after.strip_prefix('?') { + // `\??` greedily consumed the `?`; lookahead applies to what follows. + if lookahead_ok(rest) { + optional += 1; } - return Some(Annotation::TaggedExact(tag.to_owned())); + } else if lookahead_ok(after) { + required += 1; } - // No closing ] at all — malformed, treat as required - return Some(Annotation::Required); + // Advance past this "# E" occurrence (upstream finditer is non-overlapping). + search_from = idx + 3; } + (required, optional) +} - // `# E` standing alone, or followed by `:`/whitespace + description text - // (e.g. `# E`, `# E: explanation`, `# E (see ...)`). The char immediately - // after `E` must be a boundary — end-of-marker, `:`, or whitespace — so we - // accept the upstream `# E (…)` form while still rejecting words such as - // `# Exception` or `# Edge case`. NOTE: inspect the *untrimmed* remainder; - // trimming first would erase the space boundary and silently drop `# E (…)`. - if let Some(after) = rest.strip_prefix('E') { - if after.is_empty() || after.starts_with(':') || after.starts_with(char::is_whitespace) { - return Some(Annotation::Required); +/// Parse `# E[tag]` / `# E[tag+]` groups on a line, matching the upstream regex +/// `# E\[([^\]]+)\]` exactly. +fn parse_groups(line: &str) -> Vec<(String, bool)> { + let mut out = Vec::new(); + let mut search_from = 0usize; + while let Some(rel) = line[search_from..].find("# E[") { + let open = search_from + rel + "# E[".len(); + let Some(close_rel) = line[open..].find(']') else { + break; + }; + let inner = &line[open..open + close_rel]; + if !inner.is_empty() { + let (tag, allow_multiple) = inner + .strip_suffix('+') + .map_or((inner, false), |stripped| (stripped, true)); + out.push((tag.to_owned(), allow_multiple)); } + search_from = open + close_rel + 1; } - - None + out } -// --------------------------------------------------------------------------- -// Line-number helper (byte offset → 1-based line) -// --------------------------------------------------------------------------- +/// Faithful port of upstream `get_expected_errors`. +fn get_expected_errors(source: &str) -> Expected { + let mut lines: HashMap = HashMap::new(); + let mut groups: HashMap, bool)> = HashMap::new(); -fn byte_offset_to_line(source: &str, offset: u32) -> usize { - let clamped = (offset as usize).min(source.len()); - source[..clamped].chars().filter(|&c| c == '\n').count() + 1 -} - -// --------------------------------------------------------------------------- -// Per-file result -// --------------------------------------------------------------------------- + for (idx, line) in source.lines().enumerate() { + let lineno = idx + 1; + // `line.split('#')[0]` — skip lines with no code before the first '#' + // (this is how upstream ignores commented-out test cases). + let before_hash = line.split('#').next().unwrap_or(""); + if before_hash.trim().is_empty() { + continue; + } -#[derive(Debug, Default)] -struct FileResult { - /// `# E` lines that Basilisk caught. - caught: usize, - /// `# E` lines that Basilisk missed. - missed: usize, - /// Lines Basilisk flagged that had no annotation (false positives). - false_positives: usize, - /// `# E?` optional lines where Basilisk did fire. - #[expect(dead_code, reason = "tracked for future reporting")] - optional_caught: usize, - /// `# E[tag]` groups satisfied. - tagged_exact_satisfied: usize, - /// `# E[tag]` groups missed. - tagged_exact_missed: usize, - /// Distinct Basilisk rule codes fired on this file (conformance-relevant only). - rules_fired: Vec, -} + let (required, optional) = count_markers(line); + if required > 0 || optional > 0 { + let _ = lines.insert(lineno, (required, optional)); + } -impl FileResult { - fn passes(&self) -> bool { - self.missed == 0 + for (tag, allow_multiple) in parse_groups(line) { + let entry = groups.entry(tag).or_insert_with(|| (Vec::new(), allow_multiple)); + entry.0.push(lineno); + } } + + Expected { lines, groups } } // --------------------------------------------------------------------------- -// Annotation collection +// Diagnostic collection — every Severity::Error, NO exclusions // --------------------------------------------------------------------------- -struct Annotations { - required: HashSet, - optional: HashSet, - tagged_exact: HashMap>, - tagged_multi: HashMap>, +/// Line numbers (1-based) where Basilisk reported an `Error`, with the codes +/// that fired there. This is exactly what `basilisk check` prints — no code is +/// filtered out. +struct Diagnostics { + by_line: HashMap>, + rules_seen: BTreeSet, } -/// Scan source lines and collect all conformance annotations by 1-based line -/// number. -fn collect_annotations(source: &str) -> Annotations { - let mut required: HashSet = HashSet::new(); - let mut optional: HashSet = HashSet::new(); - let mut tagged_exact: HashMap> = HashMap::new(); - let mut tagged_multi: HashMap> = HashMap::new(); +fn byte_offset_to_line(source: &str, offset: u32) -> usize { + let clamped = (offset as usize).min(source.len()); + source[..clamped].chars().filter(|&c| c == '\n').count() + 1 +} - for (idx, line) in source.lines().enumerate() { - let lineno = idx + 1; - match parse_annotation(line) { - Some(Annotation::Required) => { - let _ = required.insert(lineno); - } - Some(Annotation::Optional) => { - let _ = optional.insert(lineno); - } - Some(Annotation::TaggedExact(tag)) => { - let _ = tagged_exact.entry(tag).or_default().insert(lineno); +fn collect_diagnostics(path: &Path, source: &str) -> Diagnostics { + let mut by_line: HashMap> = HashMap::new(); + let mut rules_seen = BTreeSet::new(); + + if let Ok(parsed) = parse_file(path.to_string_lossy().as_ref()) { + if let Ok(resolved) = resolve(&parsed) { + for diag in check(&resolved) + .iter() + .filter(|d| d.severity == basilisk_checker::Severity::Error) + { + let _ = rules_seen.insert(diag.code.code.to_owned()); + let line = byte_offset_to_line(source, diag.span.start); + by_line.entry(line).or_default().push(diag.code.code.to_owned()); } - Some(Annotation::TaggedMulti(tag)) => { - let _ = tagged_multi.entry(tag).or_default().insert(lineno); - } - None => {} } } - Annotations { - required, - optional, - tagged_exact, - tagged_multi, - } + Diagnostics { by_line, rules_seen } } // --------------------------------------------------------------------------- -// Diagnostic collection +// The official diff — faithful port of `diff_expected_errors` (main.py) // --------------------------------------------------------------------------- -struct DiagnosticOutput { - diag_lines: HashSet, - rules_seen: std::collections::BTreeSet, - diag_line_rules: HashMap>, +/// One scored conformance file. +#[derive(Debug, Default)] +struct FileResult { + /// Required lines Basilisk caught. + required_caught: usize, + /// `# E` lines + tag groups Basilisk missed (false negatives). + missed: usize, + /// Lines Basilisk flagged that no annotation expected (false positives). + false_positives: usize, + /// Distinct Basilisk codes that fired on this file. + rules_fired: Vec, + /// The upstream-style discrepancy strings (empty ⇒ Pass). + diffs: Vec, } -/// Run the Basilisk pipeline on `path` and collect diagnostic lines, filtering -/// out strictness-only rules. -fn collect_diagnostics(path: &Path, source: &str) -> DiagnosticOutput { - // Rules that are Basilisk-specific strictness requirements not covered by - // the PEP conformance suite. These codes are excluded from both the - // "caught" count and the false-positive count so they do not inflate or - // deflate the conformance score: - // - // - E0001–E0005: annotation completeness (PEP suite fixtures are unannotated) - // - E0010, E0011: import strictness and Any warnings - // - E0023: non-exhaustive match — PEP conformance suite tests type narrowing - // inside match arms but does not require a wildcard `case _:` branch - // - E0025: missing @override (PEP 698 makes @override optional documentation) - const STRICTNESS_ONLY: &[&str] = &[ - "BSK-E0001", - "BSK-E0002", - "BSK-E0003", - "BSK-E0004", - "BSK-E0005", - "BSK-E0010", - "BSK-E0011", - "BSK-E0023", - "BSK-E0025", - ]; - - let mut rules_seen = std::collections::BTreeSet::new(); - let mut diag_line_rules: HashMap> = HashMap::new(); - - let diag_lines: HashSet = match parse_file(path.to_string_lossy().as_ref()) { - Ok(parsed) => match resolve(&parsed) { - Ok(resolved) => { - let diags = check(&resolved); - diags - .iter() - .filter(|d| d.severity == basilisk_checker::Severity::Error) - .filter(|d| !STRICTNESS_ONLY.contains(&d.code.code)) - .map(|d| { - let _ = rules_seen.insert(d.code.code.to_owned()); - let line = byte_offset_to_line(source, d.span.start); - diag_line_rules - .entry(line) - .or_default() - .push(d.code.code.to_owned()); - line - }) - .collect() - } - Err(_) => HashSet::new(), - }, - Err(_) => HashSet::new(), - }; - - DiagnosticOutput { - diag_lines, - rules_seen, - diag_line_rules, +impl FileResult { + /// A file passes iff the official `errors_diff` is empty. + fn passes(&self) -> bool { + self.diffs.is_empty() } } -// --------------------------------------------------------------------------- -// Run one conformance file -// --------------------------------------------------------------------------- - fn run_file(path: &Path) -> FileResult { let Ok(source) = fs::read_to_string(path) else { return FileResult::default(); }; - let annotations = collect_annotations(&source); + let expected = get_expected_errors(&source); let diagnostics = collect_diagnostics(path, &source); + let errors = &diagnostics.by_line; + + let mut diffs: Vec = Vec::new(); + let mut missed = 0usize; + let mut false_positives = 0usize; + + // 1. Missed required lines. + let mut required_caught = 0usize; + for (&lineno, &(required, _optional)) in &expected.lines { + if required > 0 { + if errors.contains_key(&lineno) { + required_caught += 1; + } else { + missed += 1; + diffs.push(format!("Line {lineno}: Expected {required} errors")); + } + } + } - // Score required lines. - let caught = annotations - .required - .iter() - .filter(|l| diagnostics.diag_lines.contains(l)) - .count(); - let missed = annotations.required.len() - caught; - - // Score optional lines. - let optional_caught = annotations - .optional - .iter() - .filter(|l| diagnostics.diag_lines.contains(l)) - .count(); - - // Score tagged-exact groups: a group passes if at least one line errored. - let mut tagged_exact_satisfied = 0usize; - let mut tagged_exact_missed = 0usize; - for lines in annotations.tagged_exact.values() { - if lines.iter().any(|l| diagnostics.diag_lines.contains(l)) { - tagged_exact_satisfied += 1; + // 2. Tag groups (and the set of group lines that "absorb" an error so they + // are not later counted as unexpected). + let mut linenos_used_by_groups: HashSet = HashSet::new(); + for (tag, (linenos, allow_multiple)) in &expected.groups { + let num_errors = linenos.iter().filter(|l| errors.contains_key(l)).count(); + if num_errors == 0 { + missed += 1; + diffs.push(format!("Lines {linenos:?}: Expected error (tag {tag:?})")); + } else if num_errors == 1 || *allow_multiple { + linenos_used_by_groups.extend(linenos.iter().copied()); } else { - tagged_exact_missed += 1; + missed += 1; + diffs.push(format!("Lines {linenos:?}: Expected exactly one error (tag {tag:?})")); } } - // All annotated lines (don't count false positives on annotated lines). - let all_annotated: HashSet = annotations - .required - .iter() - .chain(annotations.optional.iter()) - .chain(annotations.tagged_exact.values().flatten()) - .chain(annotations.tagged_multi.values().flatten()) - .copied() - .collect(); - - let false_positives = diagnostics - .diag_lines - .iter() - .filter(|l| !all_annotated.contains(l)) - .count(); + // 3. Unexpected errors (false positives). + let mut fp_lines: Vec<(usize, String)> = Vec::new(); + for (&lineno, codes) in errors { + if !expected.lines.contains_key(&lineno) && !linenos_used_by_groups.contains(&lineno) { + false_positives += 1; + fp_lines.push((lineno, codes.join("|"))); + diffs.push(format!("Line {lineno}: Unexpected errors {codes:?}")); + } + } let file_name = path.file_name().unwrap_or_default().to_string_lossy(); - if missed > 0 { - let missed_lines: Vec = annotations - .required - .iter() - .filter(|l| !diagnostics.diag_lines.contains(l)) - .copied() - .collect(); - println!(" DEBUG {file_name}: missed={missed} lines={missed_lines:?}"); - } - if false_positives > 0 { - let mut fp_details: Vec<(usize, String)> = diagnostics - .diag_lines - .iter() - .filter(|l| !all_annotated.contains(l)) - .map(|&l| { - let rules = diagnostics - .diag_line_rules - .get(&l) - .map_or_else(String::new, |codes| codes.join("|")); - (l, rules) - }) - .collect(); - fp_details.sort_by_key(|(l, _)| *l); - println!(" FP {file_name}: count={false_positives} lines={fp_details:?}"); + if missed > 0 || false_positives > 0 { + fp_lines.sort_by_key(|(l, _)| *l); + println!( + " {file_name}: missed={missed} fp={false_positives} fp_lines={fp_lines:?}" + ); } FileResult { - caught, + required_caught, missed, false_positives, - optional_caught, - tagged_exact_satisfied, - tagged_exact_missed, rules_fired: diagnostics.rules_seen.into_iter().collect(), + diffs, } } @@ -383,43 +306,30 @@ fn category(name: &str) -> &str { } // --------------------------------------------------------------------------- -// Threshold from coverage-thresholds.json +// Thresholds from coverage-thresholds.json // --------------------------------------------------------------------------- -/// Read the PEP conformance pass-percentage threshold from the repo-root -/// `coverage-thresholds.json`. Falls back to 0 if the file is missing or -/// malformed so the test still runs (the coverage script enforces separately). +/// Read the PEP conformance pass-percentage threshold (ratchets UP only). fn read_conformance_threshold() -> usize { read_conformance_field("threshold").unwrap_or(0) } -/// The maximum total false positives allowed across the suite, from -/// `coverage-thresholds.json` → `conformance.max_false_positives`. -/// -/// Ratchets DOWN only — like the pass-percentage gate but in the opposite -/// direction. Returns `None` when the key is absent (gate disabled). +/// Read the maximum total false positives allowed across the suite (ratchets +/// DOWN only). `None` ⇒ gate disabled. fn read_conformance_fp_ceiling() -> Option { read_conformance_field("max_false_positives") } /// Read a numeric field nested under the `"conformance"` object in -/// `coverage-thresholds.json`. -/// -/// Minimal JSON extraction — avoids adding a serde dependency to this test -/// crate. Looks for `"conformance"` then the first occurrence of the requested -/// key, then parses the following integer. +/// `coverage-thresholds.json` (minimal extraction — no serde in this crate). fn read_conformance_field(key: &str) -> Option { - let manifest = Path::new(env!("CARGO_MANIFEST_DIR")); - let repo_root = manifest - .ancestors() - .find(|p| p.join("Cargo.toml").exists() && p.join("crates").exists())?; + let repo_root = repo_root()?; let content = fs::read_to_string(repo_root.join("coverage-thresholds.json")).ok()?; let conformance_idx = content.find("\"conformance\"")?; let rest = &content[conformance_idx..]; let key_pat = format!("\"{key}\""); let key_idx = rest.find(&key_pat)?; let after = &rest[key_idx + key_pat.len()..]; - // Skip `:` and whitespace, then parse the number. let num_start = after.find(|c: char| c.is_ascii_digit())?; let num_end = after[num_start..] .find(|c: char| !c.is_ascii_digit()) @@ -427,6 +337,14 @@ fn read_conformance_field(key: &str) -> Option { after[num_start..num_end].parse().ok() } +/// Walk up from the manifest dir to the workspace root (has both `Cargo.toml` +/// and a `crates/` subdirectory). +fn repo_root() -> Option<&'static Path> { + Path::new(env!("CARGO_MANIFEST_DIR")) + .ancestors() + .find(|p| p.join("Cargo.toml").exists() && p.join("crates").exists()) +} + // --------------------------------------------------------------------------- // The single test entry point // --------------------------------------------------------------------------- @@ -439,7 +357,6 @@ fn conformance_score() { println!(); println!(" ⚠ Conformance suite not downloaded."); println!(" Run: make conformance"); - println!(" Or: cargo test --test conformance_tests -- --nocapture"); println!(); return; } @@ -469,8 +386,8 @@ fn conformance_score() { "No conformance files found. Run make conformance first." ); - // Enforce minimum conformance percentage from coverage-thresholds.json. - // This prevents regressions — the threshold ratchets UP only. + // Pass-percentage gate (ratchets UP only). This is the OFFICIAL pass rate: + // files with an empty errors_diff over total files. let threshold = read_conformance_threshold(); let pct = (totals.pass * 100).checked_div(totals.files).unwrap_or(0); assert!( @@ -485,8 +402,7 @@ fn conformance_score() { totals.pass, totals.files ); - // Enforce the false-positive ceiling from coverage-thresholds.json. - // False positives ratchet DOWN only: introducing new ones fails the gate. + // False-positive ceiling (ratchets DOWN only). if let Some(ceiling) = read_conformance_fp_ceiling() { assert!( totals.fp <= ceiling, @@ -508,26 +424,14 @@ struct Totals { caught: usize, missed: usize, fp: usize, - tag_ok: usize, - tag_missed: usize, } -/// Write a CSV snapshot of per-file conformance results. -/// -/// Output path: `conformance/conformance_status.csv` (repo root). -/// Columns: file, category, status, caught, missed, `false_positives` -/// -/// This file is the rolling log — commit it after each run to track regressions. +/// Write a CSV snapshot of per-file conformance results to +/// `conformance/conformance_status.csv` (repo root). fn write_csv(detail_lines: &DetailLines) { use std::fmt::Write; - // Walk up from the manifest dir to find the workspace root (contains both - // Cargo.toml and a `crates/` subdirectory — distinguishes it from crate-level Cargo.toml). - let manifest = Path::new(env!("CARGO_MANIFEST_DIR")); - let Some(repo_root) = manifest - .ancestors() - .find(|p| p.join("Cargo.toml").exists() && p.join("crates").exists()) - else { + let Some(repo_root) = repo_root() else { eprintln!(" [conformance csv] could not locate repo root"); return; }; @@ -543,7 +447,7 @@ fn write_csv(detail_lines: &DetailLines) { let _ = writeln!( out, "{rules},{name},{cat},{status},{},{},{}", - result.caught, result.missed, result.false_positives + result.required_caught, result.missed, result.false_positives ); } @@ -562,8 +466,6 @@ fn collect_results(files: &[std::fs::DirEntry]) -> (Totals, CategoryMap, DetailL caught: 0, missed: 0, fp: 0, - tag_ok: 0, - tag_missed: 0, }; for entry in files { @@ -582,11 +484,9 @@ fn collect_results(files: &[std::fs::DirEntry]) -> (Totals, CategoryMap, DetailL totals.pass += 1; } totals.files += 1; - totals.caught += result.caught; + totals.caught += result.required_caught; totals.missed += result.missed; totals.fp += result.false_positives; - totals.tag_ok += result.tagged_exact_satisfied; - totals.tag_missed += result.tagged_exact_missed; detail_lines.push((name, result)); } (totals, by_category, detail_lines) @@ -607,23 +507,19 @@ fn print_scorecard(t: &Totals, by_category: &CategoryMap, detail_lines: &DetailL let fail = t.files - t.pass; println!(); println!("╔══════════════════════════════════════════════════════════════╗"); - println!("║ BASILISK PEP CONFORMANCE SCORECARD ║"); + println!("║ BASILISK PEP CONFORMANCE SCORECARD (OFFICIAL SCORING) ║"); println!("╠══════════════════════════════════════════════════════════════╣"); println!( "║ Files: {:>4} total │ {:>4} pass │ {fail:>4} fail ║", t.files, t.pass ); - println!("║ Score: {pct:.1}% ║"); + println!("║ Score: {pct:.1}% (empty errors_diff = Pass, upstream rule) ║"); println!( "║ Required: {:>4} caught │ {:>4} missed ║", t.caught, t.missed ); println!( - "║ Tagged: {:>4} groups ok │ {:>4} groups missed ║", - t.tag_ok, t.tag_missed - ); - println!( - "║ False+: {:>4} unexpected diagnostics ║", + "║ False+: {:>4} unexpected diagnostics (THESE FAIL FILES) ║", t.fp ); println!("╠══════════════════════════════════════════════════════════════╣"); From 545afea8d8f35a0b21576cff80de19d12b15915d Mon Sep 17 00:00:00 2001 From: Christian Findlay <16697547+MelbourneDeveloper@users.noreply.github.com> Date: Tue, 23 Jun 2026 20:46:34 +1000 Subject: [PATCH 02/12] fixes --- conformance/conformance_status.csv | 24 +- conformance/score.py | 357 +++++++++++ coverage-thresholds.json | 8 +- .../basilisk-cli/tests/conformance_tests.rs | 592 ++---------------- docs/plans/CHECK-ELIMINATE-FALSE-POSITIVES.md | 12 + docs/plans/CHECKER-PEP-CONFORMANCE-PLAN.md | 14 +- .../CHECKER-TYPE-NARROWING-INFERENCE-PLAN.md | 2 +- docs/plans/FP-REMAINING-NOTES.md | 7 +- docs/plans/LSP-PLAN.md | 2 +- docs/plans/ROADMAP-NEXT-STEPS-PLAN.md | 8 +- docs/specs/CHECKER-ARCHITECTURE-SPEC.md | 39 +- docs/specs/CHECKER-TYPE-INFERENCE-SPEC.md | 2 +- docs/specs/COMPILER-ARCHITECTURE-SPEC.md | 2 +- scripts/conformance.sh | 42 +- website/src/assets/css/styles.css | 34 + website/src/docs/comparison.md | 4 +- website/src/docs/index.md | 2 +- website/src/docs/rules/index.md | 2 +- website/src/index.njk | 57 +- website/src/zh/docs/comparison.md | 4 +- website/src/zh/docs/index.md | 2 +- website/src/zh/docs/rules/index.md | 2 +- website/src/zh/index.njk | 51 +- 23 files changed, 681 insertions(+), 588 deletions(-) create mode 100644 conformance/score.py diff --git a/conformance/conformance_status.csv b/conformance/conformance_status.csv index 28787697..186a2b84 100644 --- a/conformance/conformance_status.csv +++ b/conformance/conformance_status.csv @@ -1,11 +1,11 @@ basilisk_rules,file,category,status,caught,missed,false_positives -,_directives_deprecated_library.py,,PASS,0,0,0 -,_enums_member_values.py,,PASS,0,0,0 -,_enums_members.py,,PASS,0,0,0 -,_protocols_modules1.py,,PASS,0,0,0 -,_protocols_modules2.py,,PASS,0,0,0 -,_qualifiers_final_annotation_1.py,,PASS,0,0,0 -,_qualifiers_final_annotation_2.py,,PASS,0,0,0 +,_directives_deprecated_library.py,directives,PASS,0,0,0 +,_enums_member_values.py,enums,PASS,0,0,0 +,_enums_members.py,enums,PASS,0,0,0 +,_protocols_modules1.py,protocols,PASS,0,0,0 +,_protocols_modules2.py,protocols,PASS,0,0,0 +,_qualifiers_final_annotation_1.py,qualifiers,PASS,0,0,0 +,_qualifiers_final_annotation_2.py,qualifiers,PASS,0,0,0 BSK-E0002|BSK-E0048,aliases_explicit.py,aliases,FAIL,21,0,2 BSK-E0002|BSK-E0047|BSK-E0048|BSK-E0092,aliases_implicit.py,aliases,FAIL,22,0,3 BSK-E0014|BSK-E0050,aliases_newtype.py,aliases,PASS,14,0,0 @@ -48,7 +48,7 @@ BSK-E0004|BSK-E0138,dataclasses_transform_meta.py,dataclasses,FAIL,6,0,1 BSK-E0002|BSK-E0005|BSK-E0041|BSK-E0069|BSK-E0096,dataclasses_usage.py,dataclasses,FAIL,8,3,3 BSK-E0002|BSK-E0039|BSK-E0053,directives_assert_type.py,directives,FAIL,7,0,2 BSK-E0031,directives_cast.py,directives,PASS,3,0,0 -BSK-E0010|BSK-E0115,directives_deprecated.py,directives,FAIL,12,0,1 +BSK-E0115,directives_deprecated.py,directives,PASS,12,0,0 BSK-E0011|BSK-E0012|BSK-E0013|BSK-E0041,directives_no_type_check.py,directives,FAIL,1,0,1 BSK-E0002|BSK-E0033,directives_reveal_type.py,directives,FAIL,2,0,1 ,directives_type_checking.py,directives,PASS,0,0,0 @@ -60,8 +60,8 @@ BSK-E0040,enums_behaviors.py,enums,FAIL,1,2,0 ,enums_definition.py,enums,PASS,0,0,0 BSK-E0002|BSK-E0061,enums_expansion.py,enums,FAIL,1,0,4 BSK-E0002,enums_member_names.py,enums,FAIL,0,0,2 -BSK-E0002|BSK-E0010|BSK-E0066,enums_member_values.py,enums,FAIL,2,0,5 -BSK-E0002|BSK-E0010|BSK-E0046|BSK-E0067,enums_members.py,enums,FAIL,7,0,2 +BSK-E0002|BSK-E0066,enums_member_values.py,enums,FAIL,2,0,4 +BSK-E0002|BSK-E0046|BSK-E0067,enums_members.py,enums,FAIL,7,0,1 BSK-E0001|BSK-E0011,exceptions_context_managers.py,exceptions,FAIL,0,0,6 BSK-E0002|BSK-E0027|BSK-E0047|BSK-E0092|BSK-E0132|BSK-E0134,generics_base_class.py,generics,FAIL,7,0,3 BSK-E0002|BSK-E0026|BSK-E0027|BSK-E0043|BSK-E0148,generics_basic.py,generics,FAIL,13,0,3 @@ -113,14 +113,14 @@ BSK-E0001|BSK-E0011|BSK-E0036|BSK-E0097|BSK-E0121,protocols_definition.py,protoc BSK-E0002|BSK-E0099|BSK-E0118|BSK-E0123|BSK-E0124,protocols_explicit.py,protocols,FAIL,6,0,3 BSK-E0002|BSK-E0130|BSK-E0137,protocols_generic.py,protocols,FAIL,9,0,3 BSK-E0002|BSK-E0098|BSK-E0099|BSK-E0121,protocols_merging.py,protocols,FAIL,6,0,1 -BSK-E0010|BSK-E0079,protocols_modules.py,protocols,FAIL,3,0,2 +BSK-E0079,protocols_modules.py,protocols,PASS,3,0,0 ,protocols_recursive.py,protocols,PASS,0,0,0 BSK-E0002|BSK-E0114|BSK-E0119,protocols_runtime_checkable.py,protocols,FAIL,6,0,3 ,protocols_self.py,protocols,PASS,0,0,0 BSK-E0002|BSK-E0014|BSK-E0099,protocols_subtyping.py,protocols,FAIL,7,0,4 BSK-E0110|BSK-E0133,protocols_variance.py,protocols,PASS,5,0,0 BSK-E0045|BSK-E0058,qualifiers_annotated.py,qualifiers,PASS,20,0,0 -BSK-E0010|BSK-E0014|BSK-E0041|BSK-E0044|BSK-E0054|BSK-E0064,qualifiers_final_annotation.py,qualifiers,FAIL,26,0,2 +BSK-E0014|BSK-E0041|BSK-E0044|BSK-E0054|BSK-E0064,qualifiers_final_annotation.py,qualifiers,PASS,26,0,0 BSK-E0010|BSK-E0025|BSK-E0034,qualifiers_final_decorator.py,qualifiers,FAIL,3,3,1 BSK-E0001|BSK-E0002,specialtypes_any.py,specialtypes,FAIL,0,0,2 BSK-E0002|BSK-E0062|BSK-E0070,specialtypes_never.py,specialtypes,FAIL,3,0,2 diff --git a/conformance/score.py b/conformance/score.py new file mode 100644 index 00000000..637732d3 --- /dev/null +++ b/conformance/score.py @@ -0,0 +1,357 @@ +#!/usr/bin/env python3 +# Implements [CHKARCH-CONFORMANCE]. See docs/specs/CHECKER-ARCHITECTURE-SPEC.md +"""Grade Basilisk with the REAL python/typing conformance calculator. + +This script does NOT reimplement the conformance scoring. It **downloads the +actual upstream tool** (`conformance/src/main.py` from `python/typing`, pinned +to the same commit the test fixtures come from) and **runs upstream's own +`get_expected_errors` + `diff_expected_errors` functions unmodified**. Those +two functions are the entire conformance algorithm — the same code that grades +pyright, mypy, pyrefly, ty, zuban and pycroscope. We extract them straight from +the downloaded file and execute them; nothing about the calculation is ours. + +The only Basilisk-specific code here is a checker *adapter* — exactly what +upstream itself has for every checker (`PyrightTypeChecker`, `MypyTypeChecker`, +… in `conformance/src/type_checker.py`). The adapter runs the real `basilisk` +binary and turns its output into the `{line: [errors]}` mapping the upstream +algorithm consumes. A file passes iff upstream's `errors_diff` is empty — +upstream's exact rule: `"Fail" if errors_diff.strip() else "Pass"`. + +No diagnostic codes are excluded. Every `severity == "error"` diagnostic +`basilisk check` emits is counted, including the strict-by-default completeness +rules. If one fires where the suite does not mark `# E`, that is a real false +positive and it fails the file — same as for any other checker. + +Usage: + python3 conformance/score.py [--bin PATH] [--gate] [--count-warnings] + [--conformance-dir DIR] [--offline] +""" + +from __future__ import annotations + +import ast +import json +import subprocess +import sys +import urllib.request +from pathlib import Path +from typing import Callable, Sequence + +# Pinned to the SAME commit the fixtures are fetched from +# (scripts/conformance.sh TYPING_REF). Bump both together. +PINNED_TYPING_REF = "268d0c4e" +UPSTREAM_MAIN_URL = ( + f"https://raw.githubusercontent.com/python/typing/{PINNED_TYPING_REF}" + "/conformance/src/main.py" +) +# The two functions that constitute the official scoring algorithm. +OFFICIAL_FUNCS = ("get_expected_errors", "diff_expected_errors") + + +# --------------------------------------------------------------------------- +# Download + run the REAL upstream calculator +# --------------------------------------------------------------------------- + + +def _download(url: str, dest: Path) -> None: + dest.parent.mkdir(parents=True, exist_ok=True) + with urllib.request.urlopen(url, timeout=30) as resp: # noqa: S310 (pinned https) + dest.write_bytes(resp.read()) + + +def load_official_calc( + cache: Path, offline: bool +) -> tuple[Callable, Callable, str]: + """Return upstream's real (get_expected_errors, diff_expected_errors). + + Downloads the upstream `main.py` (pinned SHA) to `cache` if absent, then + extracts those two function definitions verbatim from the downloaded source + and executes them. The executed code is byte-for-byte upstream's — we only + skip `main.py`'s unrelated module-level imports (tomli/tomlkit/reporting/…), + which the scoring functions never touch. + """ + if not cache.exists(): + if offline: + raise FileNotFoundError( + f"upstream calc not cached at {cache} and --offline set; " + "run `make conformance FETCH=1` with network once" + ) + _download(UPSTREAM_MAIN_URL, cache) + + source = cache.read_text(encoding="utf-8") + tree = ast.parse(source) + wanted = [ + node + for node in tree.body + if isinstance(node, ast.FunctionDef) and node.name in OFFICIAL_FUNCS + ] + found = {node.name for node in wanted} + missing = set(OFFICIAL_FUNCS) - found + if missing: + raise RuntimeError( + f"downloaded upstream main.py is missing {missing}; the upstream " + "layout changed — re-check the pinned ref" + ) + + # `from __future__ import annotations` so upstream's type hints (which name + # types like `TypeChecker` that we don't import) are not evaluated. + future = ast.ImportFrom( + module="__future__", names=[ast.alias(name="annotations")], level=0 + ) + module = ast.Module(body=[future, *wanted], type_ignores=[]) + ast.fix_missing_locations(module) + code = compile(module, filename=str(cache), mode="exec") + + import re # the only runtime import the scoring functions need + + namespace: dict = {"re": re, "Path": Path} + exec(code, namespace) # noqa: S102 — executing pinned, verified upstream source + # Provenance: short hash of the exact bytes we ran, for the scorecard. + digest = f"{len(source)}b" + return namespace[OFFICIAL_FUNCS[0]], namespace[OFFICIAL_FUNCS[1]], digest + + +# --------------------------------------------------------------------------- +# Checker adapter — same role as upstream's per-checker adapters +# --------------------------------------------------------------------------- + + +class BasiliskTypeChecker: + """Runs the real `basilisk` binary; parses its JSON into {line: [errors]}. + + Counts only `severity == "error"` — the analog of the suite's `# E` + ("an error MUST be reported"). Warnings are advisory and reported + separately, never folded into the official figure. + """ + + name = "basilisk" + + def __init__(self, binary: Path, count_warnings: bool = False) -> None: + self.binary = binary + self.count_warnings = count_warnings + + def run_test(self, test_case: Path) -> str: + proc = subprocess.run( + [str(self.binary), "check", str(test_case), + "--output", "json", "--color", "never"], + capture_output=True, text=True, + ) + return proc.stdout + + def parse_errors(self, output: "Sequence[str] | str") -> dict[int, list[str]]: + # upstream calls this with `output.splitlines()`; rejoin + parse JSON. + text = "\n".join(output) if not isinstance(output, str) else output + try: + diags = json.loads(text) if text.strip() else [] + except json.JSONDecodeError: + return {} + accepted = {"error", "warning"} if self.count_warnings else {"error"} + line_to_errors: dict[int, list[str]] = {} + for d in diags: + if d.get("severity") not in accepted: + continue + line_to_errors.setdefault(int(d["line"]), []).append( + f"{d.get('code', '?')}: {d.get('message', '')}" + ) + return line_to_errors + + +# --------------------------------------------------------------------------- +# Driver / reporting / gate +# --------------------------------------------------------------------------- + + +def repo_root() -> Path: + here = Path(__file__).resolve() + for parent in here.parents: + if (parent / "Cargo.toml").exists() and (parent / "crates").exists(): + return parent + return here.parent.parent + + +def find_binary(explicit: str | None, root: Path) -> Path | None: + if explicit: + p = Path(explicit) + return p if p.exists() else None + for candidate in (root / "target/release/basilisk", root / "target/debug/basilisk"): + if candidate.exists(): + return candidate + return None + + +def read_conformance_field(root: Path, key: str) -> int | None: + try: + data = json.loads((root / "coverage-thresholds.json").read_text()) + return int(data["conformance"][key]) + except (OSError, KeyError, ValueError, json.JSONDecodeError): + return None + + +def category(name: str) -> str: + # Some fixtures are prefixed with `_` (e.g. `_enums_members.py`); group them + # by their real category, not an empty string. + stem = name.lstrip("_") + return stem.split("_", 1)[0] if "_" in stem else stem[:-3] + + +Row = tuple[str, str, bool, int, int, int, list[str]] +Totals = dict[str, int] +ByCat = dict[str, list[int]] + + +def score( + checker: "BasiliskTypeChecker", + get_expected: Callable, + diff_errors: Callable, + conf_dir: Path, +) -> tuple[list[Path], list[Row], Totals, ByCat]: + files = sorted(conf_dir.glob("*.py")) + rows, totals, by_cat = [], {"pass": 0, "missed": 0, "fp": 0, "caught": 0}, {} + for f in files: + output = checker.run_test(f) + diff = diff_errors(checker, f, output, []) + diff_lines = [d for d in diff.splitlines() if d.strip()] + missed = sum(1 for d in diff_lines if "Expected" in d) + fp = sum(1 for d in diff_lines if "Unexpected" in d) + passed = not diff.strip() + + errors = checker.parse_errors(output.splitlines()) + expected, _ = get_expected(f) + req_lines = [ln for ln, (req, _o) in expected.items() if req > 0] + caught = sum(1 for ln in req_lines if ln in errors) + codes = sorted({e.split(":", 1)[0] for errs in errors.values() for e in errs}) + + rows.append((f.name, category(f.name), passed, caught, missed, fp, codes)) + totals["pass"] += int(passed) + totals["missed"] += missed + totals["fp"] += fp + totals["caught"] += caught + cat = by_cat.setdefault(category(f.name), [0, 0]) + cat[0] += int(passed) + cat[1] += 1 + return files, rows, totals, by_cat + + +def print_scorecard( + files: list[Path], + rows: list[Row], + totals: Totals, + by_cat: ByCat, + label: str, + digest: str, +) -> None: + n = len(files) + pct = (totals["pass"] * 100.0 / n) if n else 0.0 + print() + print("=" * 68) + print(f" BASILISK PEP CONFORMANCE — REAL python/typing CALCULATOR [{label}]") + print(f" calc: downloaded + executed verbatim from python/typing@{PINNED_TYPING_REF}") + print(f" funcs: {', '.join(OFFICIAL_FUNCS)} ({digest} of upstream main.py)") + print("=" * 68) + print(f" Files: {n} total | {totals['pass']} pass | {n - totals['pass']} fail") + print(f" Score: {pct:.1f}% (Pass = empty errors_diff, upstream rule)") + print(f" Required: {totals['caught']} caught | {totals['missed']} missed") + print(f" False+: {totals['fp']} unexpected diagnostics (THESE FAIL FILES)") + print("-" * 68) + print(" Category breakdown:") + for cat in sorted(by_cat): + p, t = by_cat[cat] + print(f" {cat:<24} {p:>2}/{t:<2} {p * 100.0 / t:>5.1f}%") + print("-" * 68) + print(" Failing files:") + any_fail = False + for name, _c, passed, _ca, missed, fp, _codes in rows: + if not passed: + any_fail = True + print(f" FAIL {name:<46} missed={missed:<3} fp={fp}") + if not any_fail: + print(" (none — all files pass)") + print("=" * 68) + print() + + +def write_csv(root: Path, rows: list[Row]) -> None: + lines = ["basilisk_rules,file,category,status,caught,missed,false_positives"] + for name, cat, passed, caught, missed, fp, codes in rows: + status = "PASS" if passed else "FAIL" + lines.append(f"{'|'.join(codes)},{name},{cat},{status},{caught},{missed},{fp}") + out = root / "conformance" / "conformance_status.csv" + out.parent.mkdir(parents=True, exist_ok=True) + out.write_text("\n".join(lines) + "\n") + print(f" Conformance CSV: {out}") + + +def parse_args(argv: list[str]) -> dict: + opts: dict = {"bin": None, "gate": False, "warn": False, "dir": None, "offline": False} + it = iter(argv) + for a in it: + if a == "--bin": + opts["bin"] = next(it, None) + elif a == "--gate": + opts["gate"] = True + elif a == "--count-warnings": + opts["warn"] = True + elif a == "--conformance-dir": + opts["dir"] = next(it, None) + elif a == "--offline": + opts["offline"] = True + return opts + + +def enforce_gate(root: Path, files: list[Path], totals: Totals) -> bool: + n = len(files) + pct = (totals["pass"] * 100) // n if n else 0 + threshold = read_conformance_field(root, "threshold") + ceiling = read_conformance_field(root, "max_false_positives") + failed = False + if threshold is not None: + if pct < threshold: + print(f" ✗ PEP conformance regression: {pct}% ({totals['pass']}/{n}) " + f"< {threshold}% threshold.", file=sys.stderr) + failed = True + else: + print(f" Conformance gate: {pct}% ({totals['pass']}/{n}) >= {threshold}% — PASS") + if ceiling is not None: + if totals["fp"] > ceiling: + print(f" ✗ False-positive regression: {totals['fp']} FPs > {ceiling} ceiling.", + file=sys.stderr) + failed = True + else: + print(f" FP gate: {totals['fp']} <= {ceiling} ceiling — PASS") + return not failed + + +def main(argv: list[str]) -> int: + opts = parse_args(argv) + root = repo_root() + conf_dir = Path(opts["dir"]) if opts["dir"] else root / "crates/basilisk-cli/tests/conformance" + + if not conf_dir.exists() or not any(conf_dir.glob("*.py")): + print(" ⚠ Conformance suite not downloaded. Run: make conformance") + return 0 # fresh checkout: skip, do not fail CI + + binary = find_binary(opts["bin"], root) + if binary is None: + print(" ✗ basilisk binary not found. Build it or pass --bin .", file=sys.stderr) + return 1 + + cache = conf_dir / ".tool" / "main.py" + try: + get_expected, diff_errors, digest = load_official_calc(cache, opts["offline"]) + except Exception as exc: # noqa: BLE001 — surface any fetch/parse failure clearly + print(f" ✗ could not load the official calculator: {exc}", file=sys.stderr) + return 1 + + checker = BasiliskTypeChecker(binary, count_warnings=opts["warn"]) + files, rows, totals, by_cat = score(checker, get_expected, diff_errors, conf_dir) + label = "errors+warnings" if opts["warn"] else "errors only" + print_scorecard(files, rows, totals, by_cat, label, digest) + write_csv(root, rows) + + if not opts["gate"]: + return 0 + return 0 if enforce_gate(root, files, totals) else 1 + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/coverage-thresholds.json b/coverage-thresholds.json index 86c645ec..bfa20d86 100644 --- a/coverage-thresholds.json +++ b/coverage-thresholds.json @@ -41,9 +41,9 @@ } }, "conformance": { - "_doc": "Minimum PEP conformance pass percentage (files passing / total files). Ratchet UP only. Current: 146/146 = 100% (pinned to python/typing@268d0c4e). All files pass with zero false positives.", - "threshold": 100, - "_fp_ceiling_doc": "Maximum total false positives across the suite (diagnostics on lines without a # E annotation). Ratchet DOWN only \u2014 the mirror of the pass-percentage gate. Enforced by conformance_tests.rs. Any change that reintroduces even one FP pushes the total above this ceiling and fails CI. Current measured: 0 (ZERO false positives).", - "max_false_positives": 0 + "_doc": "Minimum PEP conformance pass percentage (files passing / total files), computed by the REAL python/typing conformance calculator (conformance/score.py downloads upstream main.py at the pinned ref and runs its own get_expected_errors + diff_expected_errors; NO excluded diagnostic codes). A file passes only when upstream's errors_diff is empty. Ratchet UP only. HONEST baseline (replacing a previously rigged 100% from a lenient in-repo harness): 70/146 = 47.9% (errors only), pinned to python/typing@268d0c4e. Target is 100%; this is the real current number.", + "threshold": 47, + "_fp_ceiling_doc": "Maximum total false-positive diagnostics across the suite (errors Basilisk reports on lines the suite does NOT mark with # E, plus errors outside satisfied # E[tag] groups). Ratchet DOWN only \u2014 the mirror of the pass-percentage gate. Enforced via conformance/score.py --gate (run by conformance_tests.rs inside make test). HONEST baseline: 219 (the prior 0 was a lie produced by excluding 9 diagnostic codes from scoring). Drive this DOWN.", + "max_false_positives": 219 } } diff --git a/crates/basilisk-cli/tests/conformance_tests.rs b/crates/basilisk-cli/tests/conformance_tests.rs index c631db14..13721452 100644 --- a/crates/basilisk-cli/tests/conformance_tests.rs +++ b/crates/basilisk-cli/tests/conformance_tests.rs @@ -1,559 +1,87 @@ -//! Tests for [CHKARCH-CLI]. See docs/specs/CHECKER-ARCHITECTURE-SPEC.md#CHKARCH-CLI -#![allow( - clippy::allow_attributes, - clippy::indexing_slicing, - clippy::expect_used, - clippy::unwrap_used, - clippy::panic, - clippy::as_conversions -)] -//! PEP conformance test harness — faithful port of the **official** scoring. +//! Tests for [CHKARCH-CONFORMANCE]. See docs/specs/CHECKER-ARCHITECTURE-SPEC.md +#![allow(clippy::allow_attributes, clippy::expect_used, clippy::panic)] +//! PEP conformance gate — thin wrapper around the OFFICIAL Python scorer. //! -//! Runs every `.py` file from the `python/typing` conformance suite against the -//! Basilisk pipeline and scores it with the **exact** algorithm the upstream -//! `python/typing` tool uses (`conformance/src/main.py`, -//! `get_expected_errors` + `diff_expected_errors`). There are **no -//! Basilisk-specific scoring rules** and **no excluded diagnostic codes** — a -//! file passes iff the official `errors_diff` is empty. +//! The conformance score is **not** computed in Rust. It is computed by +//! `conformance/score.py`, which **downloads** the `python/typing` conformance +//! tool (`conformance/src/main.py`, pinned to the same commit the fixtures come +//! from) and **runs its own `get_expected_errors` + `diff_expected_errors` +//! functions unmodified**. That guarantees Basilisk is graded by the exact same +//! algorithm as pyright, mypy, pyrefly, ty, zuban and pycroscope — no +//! Basilisk-specific scoring, no excluded diagnostic codes. //! -//! ## Prerequisites +//! This test exists only so the gate runs inside `make test`: it builds the +//! real `basilisk` binary (via `CARGO_BIN_EXE_basilisk`), invokes the scorer +//! with `--gate`, and fails if the scorer exits non-zero (score below the +//! ratchet threshold or false positives above the ceiling in +//! `coverage-thresholds.json`). //! -//! The conformance files must be downloaded first: -//! -//! ```text -//! make conformance # fetch if needed + run -//! make conformance FETCH=1 # force re-download + run -//! ``` -//! -//! ## Annotation format (verbatim from `python/typing`) -//! -//! For every source line, the upstream tool first strips the comment -//! (`line.split('#')[0]`); if nothing but whitespace precedes the first `#`, -//! the whole line is **ignored** (this is how commented-out cases are skipped). -//! Otherwise it scans the *raw* line for these markers: -//! -//! | Marker | Regex (upstream) | Meaning | -//! |-------------|-----------------------------|------------------------------------------| -//! | `# E` | `# E\??(?=:\|$\| )` | An error MUST be reported on this line | -//! | `# E?` | `# E\??(?=:\|$\| )` | An error MAY be reported (optional) | -//! | `# E[tag]` | `# E\[([^\]]+)\]` | Exactly one line in the group must error | -//! | `# E[tag+]` | `# E\[([^\]]+)\]` | One or more lines in the group may error | -//! -//! The `(?=:|$| )` lookahead means the marker must be followed by `:`, end of -//! line, or a space — so `# Exception` and `# E0001` do **not** match. -//! -//! ## Scoring (official `diff_expected_errors`) -//! -//! A file's `errors_diff` collects three kinds of discrepancy: -//! -//! 1. **Missed required** — a `# E` line where Basilisk reported no error. -//! 2. **Missed tag group** — a `# E[tag]` group where no line errored (or, for -//! the non-`+` form, more than one line errored). -//! 3. **Unexpected error** — Basilisk reported an error on a line carrying -//! neither a `# E`/`# E?` marker nor a satisfied tag-group line. These are -//! the **false positives**, and — unlike the previous in-repo harness — they -//! **fail the file**, exactly as upstream does -//! (`conformance_automated = "Fail" if errors_diff.strip() else "Pass"`). -//! -//! Every `Severity::Error` diagnostic Basilisk emits is counted; **no code is -//! excluded**. This is the same number a user sees from `basilisk check`. -//! -//! ## Skip behaviour -//! -//! If the conformance directory does not exist the test prints a clear message -//! and exits with success so that CI on a fresh checkout does not break. - -use std::{ - collections::{BTreeMap, BTreeSet, HashMap, HashSet}, - fs, - path::Path, -}; - -use basilisk_checker::check; -use basilisk_parser::parse_file; -use basilisk_resolver::resolve; - -// --------------------------------------------------------------------------- -// Expected-error parsing — faithful port of `get_expected_errors` (main.py) -// --------------------------------------------------------------------------- - -/// Expected-error annotations parsed from one conformance file. -struct Expected { - /// 1-based line → (required count, optional count). A line is present iff - /// it carries at least one `# E` or `# E?` marker. - lines: HashMap, - /// tag → (line numbers carrying the tag, `allow_multiple`). - groups: HashMap, bool)>, -} - -/// Apply the upstream `(?=:|$| )` lookahead: the char immediately after the -/// marker must be `:`, a space, or the end of the line. -fn lookahead_ok(after: &str) -> bool { - matches!(after.chars().next(), None | Some(':') | Some(' ')) -} - -/// Count `# E` (required) and `# E?` (optional) markers on a line, matching the -/// upstream regex `# E\??(?=:|$| )` exactly. -fn count_markers(line: &str) -> (u32, u32) { - let (mut required, mut optional) = (0u32, 0u32); - let mut search_from = 0usize; - while let Some(rel) = line[search_from..].find("# E") { - let idx = search_from + rel; - let after = &line[idx + 3..]; // chars after "# E" - if let Some(rest) = after.strip_prefix('?') { - // `\??` greedily consumed the `?`; lookahead applies to what follows. - if lookahead_ok(rest) { - optional += 1; - } - } else if lookahead_ok(after) { - required += 1; - } - // Advance past this "# E" occurrence (upstream finditer is non-overlapping). - search_from = idx + 3; - } - (required, optional) -} - -/// Parse `# E[tag]` / `# E[tag+]` groups on a line, matching the upstream regex -/// `# E\[([^\]]+)\]` exactly. -fn parse_groups(line: &str) -> Vec<(String, bool)> { - let mut out = Vec::new(); - let mut search_from = 0usize; - while let Some(rel) = line[search_from..].find("# E[") { - let open = search_from + rel + "# E[".len(); - let Some(close_rel) = line[open..].find(']') else { - break; - }; - let inner = &line[open..open + close_rel]; - if !inner.is_empty() { - let (tag, allow_multiple) = inner - .strip_suffix('+') - .map_or((inner, false), |stripped| (stripped, true)); - out.push((tag.to_owned(), allow_multiple)); - } - search_from = open + close_rel + 1; - } - out -} - -/// Faithful port of upstream `get_expected_errors`. -fn get_expected_errors(source: &str) -> Expected { - let mut lines: HashMap = HashMap::new(); - let mut groups: HashMap, bool)> = HashMap::new(); - - for (idx, line) in source.lines().enumerate() { - let lineno = idx + 1; - // `line.split('#')[0]` — skip lines with no code before the first '#' - // (this is how upstream ignores commented-out test cases). - let before_hash = line.split('#').next().unwrap_or(""); - if before_hash.trim().is_empty() { - continue; - } - - let (required, optional) = count_markers(line); - if required > 0 || optional > 0 { - let _ = lines.insert(lineno, (required, optional)); - } - - for (tag, allow_multiple) in parse_groups(line) { - let entry = groups.entry(tag).or_insert_with(|| (Vec::new(), allow_multiple)); - entry.0.push(lineno); - } - } - - Expected { lines, groups } -} - -// --------------------------------------------------------------------------- -// Diagnostic collection — every Severity::Error, NO exclusions -// --------------------------------------------------------------------------- - -/// Line numbers (1-based) where Basilisk reported an `Error`, with the codes -/// that fired there. This is exactly what `basilisk check` prints — no code is -/// filtered out. -struct Diagnostics { - by_line: HashMap>, - rules_seen: BTreeSet, -} - -fn byte_offset_to_line(source: &str, offset: u32) -> usize { - let clamped = (offset as usize).min(source.len()); - source[..clamped].chars().filter(|&c| c == '\n').count() + 1 -} - -fn collect_diagnostics(path: &Path, source: &str) -> Diagnostics { - let mut by_line: HashMap> = HashMap::new(); - let mut rules_seen = BTreeSet::new(); - - if let Ok(parsed) = parse_file(path.to_string_lossy().as_ref()) { - if let Ok(resolved) = resolve(&parsed) { - for diag in check(&resolved) - .iter() - .filter(|d| d.severity == basilisk_checker::Severity::Error) - { - let _ = rules_seen.insert(diag.code.code.to_owned()); - let line = byte_offset_to_line(source, diag.span.start); - by_line.entry(line).or_default().push(diag.code.code.to_owned()); - } - } - } - - Diagnostics { by_line, rules_seen } -} - -// --------------------------------------------------------------------------- -// The official diff — faithful port of `diff_expected_errors` (main.py) -// --------------------------------------------------------------------------- - -/// One scored conformance file. -#[derive(Debug, Default)] -struct FileResult { - /// Required lines Basilisk caught. - required_caught: usize, - /// `# E` lines + tag groups Basilisk missed (false negatives). - missed: usize, - /// Lines Basilisk flagged that no annotation expected (false positives). - false_positives: usize, - /// Distinct Basilisk codes that fired on this file. - rules_fired: Vec, - /// The upstream-style discrepancy strings (empty ⇒ Pass). - diffs: Vec, -} - -impl FileResult { - /// A file passes iff the official `errors_diff` is empty. - fn passes(&self) -> bool { - self.diffs.is_empty() - } -} - -fn run_file(path: &Path) -> FileResult { - let Ok(source) = fs::read_to_string(path) else { - return FileResult::default(); - }; - - let expected = get_expected_errors(&source); - let diagnostics = collect_diagnostics(path, &source); - let errors = &diagnostics.by_line; - - let mut diffs: Vec = Vec::new(); - let mut missed = 0usize; - let mut false_positives = 0usize; - - // 1. Missed required lines. - let mut required_caught = 0usize; - for (&lineno, &(required, _optional)) in &expected.lines { - if required > 0 { - if errors.contains_key(&lineno) { - required_caught += 1; - } else { - missed += 1; - diffs.push(format!("Line {lineno}: Expected {required} errors")); - } - } - } - - // 2. Tag groups (and the set of group lines that "absorb" an error so they - // are not later counted as unexpected). - let mut linenos_used_by_groups: HashSet = HashSet::new(); - for (tag, (linenos, allow_multiple)) in &expected.groups { - let num_errors = linenos.iter().filter(|l| errors.contains_key(l)).count(); - if num_errors == 0 { - missed += 1; - diffs.push(format!("Lines {linenos:?}: Expected error (tag {tag:?})")); - } else if num_errors == 1 || *allow_multiple { - linenos_used_by_groups.extend(linenos.iter().copied()); - } else { - missed += 1; - diffs.push(format!("Lines {linenos:?}: Expected exactly one error (tag {tag:?})")); - } - } +//! On a fresh checkout the conformance fixtures are not present (they are +//! git-ignored and fetched on demand by `make conformance`); in that case the +//! scorer prints a skip notice and exits 0, so this test passes without them. - // 3. Unexpected errors (false positives). - let mut fp_lines: Vec<(usize, String)> = Vec::new(); - for (&lineno, codes) in errors { - if !expected.lines.contains_key(&lineno) && !linenos_used_by_groups.contains(&lineno) { - false_positives += 1; - fp_lines.push((lineno, codes.join("|"))); - diffs.push(format!("Line {lineno}: Unexpected errors {codes:?}")); - } - } +use std::{path::PathBuf, process::Command}; - let file_name = path.file_name().unwrap_or_default().to_string_lossy(); - if missed > 0 || false_positives > 0 { - fp_lines.sort_by_key(|(l, _)| *l); - println!( - " {file_name}: missed={missed} fp={false_positives} fp_lines={fp_lines:?}" +/// Walk up from the crate manifest to the workspace root. +fn repo_root() -> PathBuf { + let mut dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + while !(dir.join("Cargo.toml").exists() && dir.join("crates").exists()) { + assert!( + dir.pop(), + "could not locate workspace root from CARGO_MANIFEST_DIR" ); } - - FileResult { - required_caught, - missed, - false_positives, - rules_fired: diagnostics.rules_seen.into_iter().collect(), - diffs, - } -} - -// --------------------------------------------------------------------------- -// Category from filename (e.g. "generics_basic.py" → "generics") -// --------------------------------------------------------------------------- - -fn category(name: &str) -> &str { - name.find('_') - .map_or(name.trim_end_matches(".py"), |i| &name[..i]) -} - -// --------------------------------------------------------------------------- -// Thresholds from coverage-thresholds.json -// --------------------------------------------------------------------------- - -/// Read the PEP conformance pass-percentage threshold (ratchets UP only). -fn read_conformance_threshold() -> usize { - read_conformance_field("threshold").unwrap_or(0) + dir } -/// Read the maximum total false positives allowed across the suite (ratchets -/// DOWN only). `None` ⇒ gate disabled. -fn read_conformance_fp_ceiling() -> Option { - read_conformance_field("max_false_positives") +/// First Python interpreter that responds to `--version`. +fn python() -> Option<&'static str> { + ["python3", "python"].into_iter().find(|exe| { + Command::new(exe) + .arg("--version") + .output() + .is_ok_and(|o| o.status.success()) + }) } -/// Read a numeric field nested under the `"conformance"` object in -/// `coverage-thresholds.json` (minimal extraction — no serde in this crate). -fn read_conformance_field(key: &str) -> Option { - let repo_root = repo_root()?; - let content = fs::read_to_string(repo_root.join("coverage-thresholds.json")).ok()?; - let conformance_idx = content.find("\"conformance\"")?; - let rest = &content[conformance_idx..]; - let key_pat = format!("\"{key}\""); - let key_idx = rest.find(&key_pat)?; - let after = &rest[key_idx + key_pat.len()..]; - let num_start = after.find(|c: char| c.is_ascii_digit())?; - let num_end = after[num_start..] - .find(|c: char| !c.is_ascii_digit()) - .map_or(after.len(), |i| num_start + i); - after[num_start..num_end].parse().ok() -} - -/// Walk up from the manifest dir to the workspace root (has both `Cargo.toml` -/// and a `crates/` subdirectory). -fn repo_root() -> Option<&'static Path> { - Path::new(env!("CARGO_MANIFEST_DIR")) - .ancestors() - .find(|p| p.join("Cargo.toml").exists() && p.join("crates").exists()) -} - -// --------------------------------------------------------------------------- -// The single test entry point -// --------------------------------------------------------------------------- - #[test] fn conformance_score() { - let conformance_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/conformance"); + let root = repo_root(); + let conformance_dir = root.join("crates/basilisk-cli/tests/conformance"); + // Fresh checkout without fixtures: the scorer itself skips, but short-circuit + // here too so we don't require Python just to no-op. if !conformance_dir.exists() { - println!(); - println!(" ⚠ Conformance suite not downloaded."); - println!(" Run: make conformance"); - println!(); + println!(" ⚠ Conformance suite not downloaded — skipping. Run: make conformance"); return; } - let Ok(read_dir) = fs::read_dir(&conformance_dir) else { - println!(" Failed to read conformance directory."); - return; - }; - let mut files: Vec<_> = read_dir - .filter_map(std::result::Result::ok) - .filter(|e| e.path().extension().is_some_and(|x| x == "py")) - .collect(); - files.sort_by_key(std::fs::DirEntry::file_name); - - if files.is_empty() { - println!(" Conformance directory exists but contains no .py files."); - println!(" Run: make conformance"); - return; - } - - let (totals, by_category, detail_lines) = collect_results(&files); - print_scorecard(&totals, &by_category, &detail_lines); - write_csv(&detail_lines); - + let score_py = root.join("conformance/score.py"); assert!( - totals.files > 0, - "No conformance files found. Run make conformance first." + score_py.exists(), + "conformance/score.py is missing — the official scorer must be present" ); - // Pass-percentage gate (ratchets UP only). This is the OFFICIAL pass rate: - // files with an empty errors_diff over total files. - let threshold = read_conformance_threshold(); - let pct = (totals.pass * 100).checked_div(totals.files).unwrap_or(0); - assert!( - pct >= threshold, - "PEP conformance regression: {pct}% ({}/{}) < {threshold}% threshold. \ - Fix the regression before merging.", - totals.pass, - totals.files - ); - println!( - " Conformance gate: {pct}% ({}/{}) >= {threshold}% threshold — PASS", - totals.pass, totals.files + let py = python().expect( + "python3 is required to run the official conformance scorer \ + (conformance/score.py). Install Python 3.12+.", ); - // False-positive ceiling (ratchets DOWN only). - if let Some(ceiling) = read_conformance_fp_ceiling() { - assert!( - totals.fp <= ceiling, - "PEP conformance false-positive regression: {} FPs > {ceiling} ceiling. \ - False positives ratchet DOWN only — eliminate new ones before merging.", - totals.fp - ); - println!(" FP gate: {} <= {ceiling} ceiling — PASS", totals.fp); - } -} - -type CategoryMap = BTreeMap; -type DetailLines = Vec<(String, FileResult)>; - -/// Aggregated conformance totals. -struct Totals { - files: usize, - pass: usize, - caught: usize, - missed: usize, - fp: usize, -} + // `CARGO_BIN_EXE_basilisk` is injected by cargo for integration tests and + // points at the freshly built binary — the exact artifact users run. + let binary = env!("CARGO_BIN_EXE_basilisk"); -/// Write a CSV snapshot of per-file conformance results to -/// `conformance/conformance_status.csv` (repo root). -fn write_csv(detail_lines: &DetailLines) { - use std::fmt::Write; + let status = Command::new(py) + .arg(&score_py) + .arg("--bin") + .arg(binary) + .arg("--gate") + .status() + .expect("failed to spawn the official conformance scorer"); - let Some(repo_root) = repo_root() else { - eprintln!(" [conformance csv] could not locate repo root"); - return; - }; - let csv_path = repo_root.join("conformance/conformance_status.csv"); - let _ = fs::create_dir_all(csv_path.parent().unwrap_or(Path::new("."))); - - let mut out = - String::from("basilisk_rules,file,category,status,caught,missed,false_positives\n"); - for (name, result) in detail_lines { - let cat = category(name); - let status = if result.passes() { "PASS" } else { "FAIL" }; - let rules = result.rules_fired.join("|"); - let _ = writeln!( - out, - "{rules},{name},{cat},{status},{},{},{}", - result.required_caught, result.missed, result.false_positives - ); - } - - match fs::write(&csv_path, &out) { - Ok(()) => println!(" Conformance CSV: {}", csv_path.display()), - Err(e) => eprintln!(" [conformance csv] write failed: {e}"), - } -} - -fn collect_results(files: &[std::fs::DirEntry]) -> (Totals, CategoryMap, DetailLines) { - let mut by_category: CategoryMap = BTreeMap::new(); - let mut detail_lines: DetailLines = Vec::new(); - let mut totals = Totals { - files: 0, - pass: 0, - caught: 0, - missed: 0, - fp: 0, - }; - - for entry in files { - let path = entry.path(); - let name = path - .file_name() - .unwrap_or_default() - .to_string_lossy() - .into_owned(); - let result = run_file(&path); - let cat = category(&name).to_owned(); - let counts = by_category.entry(cat).or_insert((0, 0)); - counts.1 += 1; - if result.passes() { - counts.0 += 1; - totals.pass += 1; - } - totals.files += 1; - totals.caught += result.required_caught; - totals.missed += result.missed; - totals.fp += result.false_positives; - detail_lines.push((name, result)); - } - (totals, by_category, detail_lines) -} - -#[expect( - clippy::cast_precision_loss, - clippy::cast_possible_truncation, - clippy::cast_sign_loss, - reason = "percentage display requires float conversion from counters" -)] -fn print_scorecard(t: &Totals, by_category: &CategoryMap, detail_lines: &DetailLines) { - let pct = if t.files > 0 { - (t.pass as f64 / t.files as f64) * 100.0 - } else { - 0.0 - }; - let fail = t.files - t.pass; - println!(); - println!("╔══════════════════════════════════════════════════════════════╗"); - println!("║ BASILISK PEP CONFORMANCE SCORECARD (OFFICIAL SCORING) ║"); - println!("╠══════════════════════════════════════════════════════════════╣"); - println!( - "║ Files: {:>4} total │ {:>4} pass │ {fail:>4} fail ║", - t.files, t.pass - ); - println!("║ Score: {pct:.1}% (empty errors_diff = Pass, upstream rule) ║"); - println!( - "║ Required: {:>4} caught │ {:>4} missed ║", - t.caught, t.missed - ); - println!( - "║ False+: {:>4} unexpected diagnostics (THESE FAIL FILES) ║", - t.fp + assert!( + status.success(), + "PEP conformance gate failed — see scorer output above. The score is \ + computed by the verbatim python/typing algorithm in conformance/score.py." ); - println!("╠══════════════════════════════════════════════════════════════╣"); - println!("║ Category breakdown ║"); - println!("╠══════════════════════════════════════════════════════════════╣"); - for (cat, (pass, total)) in by_category { - let cat_pct = if *total > 0 { - (*pass as f64 / *total as f64) * 100.0 - } else { - 0.0 - }; - let bar_filled = (cat_pct / 5.0).round() as usize; - let bar = format!("{}{}", "█".repeat(bar_filled), "░".repeat(20 - bar_filled)); - println!("║ {cat:<22} {pass:>2}/{total:<2} {cat_pct:>5.1}% {bar} ║"); - } - println!("╠══════════════════════════════════════════════════════════════╣"); - println!("║ Failing files ║"); - println!("╠══════════════════════════════════════════════════════════════╣"); - let mut any_fail = false; - for (name, result) in detail_lines { - if !result.passes() { - any_fail = true; - println!( - "║ ✗ {:<57} ║", - format!( - "{name} (missed {}, fp {})", - result.missed, result.false_positives - ) - ); - } - } - if !any_fail { - println!("║ (none — all files pass) ║"); - } - println!("╚══════════════════════════════════════════════════════════════╝"); - println!(); } diff --git a/docs/plans/CHECK-ELIMINATE-FALSE-POSITIVES.md b/docs/plans/CHECK-ELIMINATE-FALSE-POSITIVES.md index 9f8c8c3b..71db5acc 100644 --- a/docs/plans/CHECK-ELIMINATE-FALSE-POSITIVES.md +++ b/docs/plans/CHECK-ELIMINATE-FALSE-POSITIVES.md @@ -1,5 +1,17 @@ # Plan: Eliminate False Positives in PEP Conformance Suite +> ⚠️ **SUPERSEDED.** The numbers in this doc ("136/146 PASS / 93.15%", "170 +> false positives", "FP-ceiling … Set to 161", `diag_line_rules`, +> `missed == 0` pass rule) describe an earlier in-repo harness that has been +> **removed**. The score is now computed by the **real `python/typing` +> calculator** (`conformance/score.py` downloads and runs upstream's own +> `get_expected_errors` + `diff_expected_errors`; see [CHKARCH-CONFORMANCE]). +> A file passes only with an **empty upstream `errors_diff`** (false positives +> fail the file), and **no diagnostic codes are excluded**. Honest current +> baseline: **70/146 = 47.9%**, **219 false positives**, 36 missed. The +> still-valid part of this plan is the *strategy* — driving specific rules' +> false positives down; the *counts* below are stale. + ## Context False positives are diagnostics Basilisk reports on lines that have NO `# E` diff --git a/docs/plans/CHECKER-PEP-CONFORMANCE-PLAN.md b/docs/plans/CHECKER-PEP-CONFORMANCE-PLAN.md index 25301a5b..a9913bd3 100644 --- a/docs/plans/CHECKER-PEP-CONFORMANCE-PLAN.md +++ b/docs/plans/CHECKER-PEP-CONFORMANCE-PLAN.md @@ -1,9 +1,15 @@ # PEP Conformance — Plan -> **Score**: 137/146 (93.84%) -> **Tests**: `crates/basilisk-cli/tests/conformance/` -> **Status CSV**: `conformance/conformance_status.csv` -> **Run**: `make conformance` or `cargo test --test conformance_tests -- --nocapture` +> ⚠️ **SUPERSEDED SCORES BELOW.** Every percentage in this plan (e.g. "137/146, +> 93.84%", category "100%" rows) came from a since-removed in-repo harness that +> excluded 9 diagnostic codes and ignored false positives. The score is now +> computed by the **real `python/typing` calculator** (`conformance/score.py`, +> see [CHKARCH-CONFORMANCE]); the honest current number is **70/146 = 47.9%** +> (errors-only), 219 false positives, 36 missed. Treat the figures below as +> historical task notes, not the live score. +> +> **Run**: `make conformance` · **Status CSV**: `conformance/conformance_status.csv` +> · **Tests**: `crates/basilisk-cli/tests/conformance/` --- diff --git a/docs/plans/CHECKER-TYPE-NARROWING-INFERENCE-PLAN.md b/docs/plans/CHECKER-TYPE-NARROWING-INFERENCE-PLAN.md index ebe45e85..4d871c86 100644 --- a/docs/plans/CHECKER-TYPE-NARROWING-INFERENCE-PLAN.md +++ b/docs/plans/CHECKER-TYPE-NARROWING-INFERENCE-PLAN.md @@ -418,5 +418,5 @@ Phases 1 and 2 are independent and can be parallelized. Phase 3 depends on Phase - [x] 5a. E0014 — `VarCheckContext` with `SubtypeContext`, uses `is_subtype_with_context()` for assignability - [x] 5b. E0013 — `SubtypeContext` passed to `check_function()`, removed `contains_named` early exit for Named types - [x] 5c. E0053 — `is_likely_narrowed()` heuristic suppresses narrowing-dependent FPs; Union normalization in `types_match()` - - [x] 5d. Full conformance suite verification — **18 FPs** (target was < 71) ✓ + - [x] 5d. Full conformance suite verification — the "**18 FPs** (target < 71)" result came from a rigged in-repo harness and is FALSE; the official `python/typing` scorer reports **219 false positives** (70/146 files passing, 47.9% errors-only). Driving FPs down remains active work. - [x] Checker-side modules: `narrowing.rs` (NarrowingContext), `expr_inference.rs` (ExpressionInferrer), `constraint_solver.rs` (ConstraintSolver) diff --git a/docs/plans/FP-REMAINING-NOTES.md b/docs/plans/FP-REMAINING-NOTES.md index b3fac7cd..aaa80de3 100644 --- a/docs/plans/FP-REMAINING-NOTES.md +++ b/docs/plans/FP-REMAINING-NOTES.md @@ -98,7 +98,12 @@ structural matcher (positive-match semantics already reject `float`→`str`). --- ### Status -- B3 lane (E0111/E0143/E0115) = DONE, verified: 144/146, caught=917, missed=37 +- NOTE: the "144/146 / suite FP 21→11" figures below were produced by a rigged + in-repo harness (excluded 9 codes, ignored false positives) and are FALSE. The + official `python/typing` scorer (run unmodified, pinned commit) reports + **70/146 passing (47.9%, errors-only), 219 false positives, 36 missed errors**. + Treat the per-lane numbers below as historical, not verified. +- B3 lane (E0111/E0143/E0115) = DONE, (legacy/rigged) figures: 144/146, caught=917, missed=37 (unchanged, both pre-failing files), suite FP 21→11. - Items 1 & 2 above are low-risk quick wins; 3 & 4 need structural work but the TP-safety traps are spelled out. diff --git a/docs/plans/LSP-PLAN.md b/docs/plans/LSP-PLAN.md index 7a392810..9877aa02 100644 --- a/docs/plans/LSP-PLAN.md +++ b/docs/plans/LSP-PLAN.md @@ -6,7 +6,7 @@ ## Status -Phases 0–6 are COMPLETE. Phase 7 (cross-module foundation) is MOSTLY COMPLETE — stub infrastructure, import graph, cross-file symbols all operational. Phase 3.5 (PEP conformance push) is ACTIVE — currently at 84.9% (124/146 files, 18 FPs). +Phases 0–6 are COMPLETE. Phase 7 (cross-module foundation) is MOSTLY COMPLETE — stub infrastructure, import graph, cross-file symbols all operational. Phase 3.5 (PEP conformance push) is ACTIVE — the official `python/typing` scorer (run unmodified, pinned commit) currently reports **70/146 files passing (47.9%, errors-only)**, with 219 false positives and 36 missed required errors still to clear. (Earlier in-repo figures such as "124/146, 18 FPs" came from a rigged harness that excluded codes and ignored false positives; they are superseded.) --- diff --git a/docs/plans/ROADMAP-NEXT-STEPS-PLAN.md b/docs/plans/ROADMAP-NEXT-STEPS-PLAN.md index 031535ff..c823397f 100644 --- a/docs/plans/ROADMAP-NEXT-STEPS-PLAN.md +++ b/docs/plans/ROADMAP-NEXT-STEPS-PLAN.md @@ -112,9 +112,11 @@ the inline visualization / Speedscope hand-off wants a real-world pass for UX ro The bar to credibly displace Pylance is feature *and* correctness parity on the things people actually feel day to day. Rough priorities (refine with human judgment — see TODO): -- **Conformance & correctness**: PEP conformance currently **135/146 files PASS (~92.5%)**. The 11 - failing files cluster in Protocols, Callables, TypeVarTuple, ParamSpec, TypedDicts. There are also - ~18 remaining false positives (`CHECK-ELIMINATE-FALSE-POSITIVES.md`). FPs hurt credibility more +- **Conformance & correctness**: per the official `python/typing` scorer (run unmodified, pinned + commit), PEP conformance is currently **70/146 files PASS (47.9%, errors-only)**, with **219 false + positives** and 36 missed required errors. (Earlier "135/146 / ~18 FPs" figures came from a rigged + in-repo harness that excluded codes and ignored false positives; they are superseded.) Failing files + cluster in Protocols, Callables, TypeVarTuple, ParamSpec, TypedDicts. FPs hurt credibility more than missed cases — prioritize accordingly. - **Latency**: sub-10ms incremental checks are the promise (Salsa). Need a published benchmark vs. Pyright/Pylance — see §5 for the scale/resource methodology. diff --git a/docs/specs/CHECKER-ARCHITECTURE-SPEC.md b/docs/specs/CHECKER-ARCHITECTURE-SPEC.md index cfee4697..68f1e63d 100644 --- a/docs/specs/CHECKER-ARCHITECTURE-SPEC.md +++ b/docs/specs/CHECKER-ARCHITECTURE-SPEC.md @@ -58,7 +58,8 @@ See the project README for competitive analysis. | Implementation | TypeScript | Python/C | Rust | Rust | Rust | Rust | **Rust** | | License | MIT | MIT | MIT | MIT | AGPL | MIT | **MIT** | | Default strictness | Gradual | Gradual | Gradual | Gradual | Gradual | N/A | **Strict only** | -| PEP conformance target | ~95% | ~85% | ~15% | ~58% | ~69% | N/A | **100%** | +| PEP conformance (current) | ~95% | ~85% | ~15% | ~58% | ~69% | N/A | **47.9%** | +| PEP conformance target | — | — | — | — | — | N/A | **100%** | | LSP server | Yes | No | Yes | Yes | Yes | No | **Yes** | | Incremental computation | Lazy eval | Daemon | Salsa | Module-level | No | N/A | **Salsa** | | Ownership analysis | No | No | No | No | No | No | **Yes** | @@ -285,7 +286,7 @@ The `# type:` prefix ensures compatibility with editors and tools that already r ### Python Typing PEP Coverage {#CHKARCH-PEPS} -Basilisk targets **100% conformance** with the Python typing specification. We run the official conformance test suite (`python/typing` repository) in CI. +Basilisk targets **100% conformance** with the Python typing specification. This is a target, not a present-day achievement: the official `python/typing` conformance scorer (pinned commit, run unmodified in CI) currently reports **70 of 146 files passing (47.9%, errors-only)**, with 219 false positives and 36 missed required errors still to clear. We run that suite in CI on every change and ratchet the pass rate up. #### Foundation PEPs {#CHKARCH-PEPS-FOUNDATION} @@ -1363,6 +1364,40 @@ Comparison baselines: Pyright, ty, Pyrefly, Zuban. | Property tests | `proptest` crate | Type system invariants | | Benchmarks | `make bench` (hyperfine, `benchmarks/run.sh`) vs Pyright/mypy/ty/Pyrefly | Performance tracking + regression gate (fails if basilisk regresses >25% vs the committed per-machine `benchmarks/status/.csv`) | +### PEP Conformance Scoring {#CHKARCH-CONFORMANCE} + +The conformance score is computed by the **real `python/typing` conformance +calculator**, not a Basilisk reimplementation. This is non-negotiable: the +number must be one anyone can reproduce with the same tooling the reference +checkers (pyright, mypy, pyrefly, ty, zuban, pycroscope) are graded with. + +- **Scorer**: [`conformance/score.py`](../../conformance/score.py) **downloads** + `python/typing`'s `conformance/src/main.py` (pinned to the same commit the + fixtures come from, `scripts/conformance.sh` → `TYPING_REF`) and executes its + own `get_expected_errors` + `diff_expected_errors` functions **unmodified** + (extracted verbatim from the downloaded file). The only Basilisk-specific code + is a checker *adapter* that runs the real `basilisk` binary and turns its JSON + output into the `{line: [errors]}` mapping the upstream algorithm consumes — + exactly the role of upstream's per-checker adapters in `type_checker.py`. +- **Pass rule** (upstream's, verbatim): a file passes iff the upstream + `errors_diff` is empty — every `# E` line gets an error, every `# E[tag]` + group is satisfied, and **no error lands on a line the suite does not mark**. + `conformance_automated = "Fail" if errors_diff.strip() else "Pass"`. +- **No excluded codes.** Every `severity == "error"` diagnostic `basilisk check` + emits is counted, including the strict-by-default completeness rules + (E0001–E0005, E0010, E0011, E0023, E0025). One firing on an unannotated line + is a real false positive and fails the file — same as for any other checker. +- **Gate**: [`crates/basilisk-cli/tests/conformance_tests.rs`](../../crates/basilisk-cli/tests/conformance_tests.rs) + is a thin wrapper that runs `score.py --gate` inside `make test`. The + pass-percentage floor and false-positive ceiling live in + `coverage-thresholds.json` (`conformance.threshold`, + `conformance.max_false_positives`); the former ratchets **up**, the latter + **down**. Per-file results are written to `conformance/conformance_status.csv`. +- **Honest baseline** (replacing a previously rigged in-repo harness that + excluded the 9 codes above and ignored false positives to fake 100%): + **70 / 146 = 47.9%** (errors-only), 219 false positives, 36 missed required + errors. The errors+warnings variant is 59 / 146 = 40.4%. Target: 100%. + ### Mutation Testing Ratchet {#CHKARCH-TESTING-MUTATION-RATCHET} Mutation testing is the proof that the test suite actually asserts behaviour — diff --git a/docs/specs/CHECKER-TYPE-INFERENCE-SPEC.md b/docs/specs/CHECKER-TYPE-INFERENCE-SPEC.md index 4b3069ac..177b7e13 100644 --- a/docs/specs/CHECKER-TYPE-INFERENCE-SPEC.md +++ b/docs/specs/CHECKER-TYPE-INFERENCE-SPEC.md @@ -972,7 +972,7 @@ query("SELECT * FROM " + table) # BSK-E0015 — not LiteralString ## Conformance Test Coverage {#TYPEINF-CONFORMANCE} -The [Python typing conformance suite](https://github.com/python/typing/tree/main/conformance) is the canonical benchmark. Basilisk targets **100% conformance** (Pass on all 150 test files). +The [Python typing conformance suite](https://github.com/python/typing/tree/main/conformance) is the canonical benchmark. Basilisk **targets** 100% conformance (Pass on all 146 test files) — a target, not a present-day achievement. The official `python/typing` scorer currently reports **70 of 146 files passing (47.9%, errors-only)**. Inference-relevant conformance tests: diff --git a/docs/specs/COMPILER-ARCHITECTURE-SPEC.md b/docs/specs/COMPILER-ARCHITECTURE-SPEC.md index cd77067c..813a7fa3 100644 --- a/docs/specs/COMPILER-ARCHITECTURE-SPEC.md +++ b/docs/specs/COMPILER-ARCHITECTURE-SPEC.md @@ -21,7 +21,7 @@ python3 script.py # still works -- it's valid Python ### What This Is {#COMPILER-WHAT} - A **strict subset** of Python 3.12 that compiles to native code -- 100% PEP compliant for the features it supports +- Aims to be PEP compliant for the features it supports - LLVM-based: JIT for development, AOT for deployment - Interoperable with the Python ecosystem via CPython embedding - A single binary (`basilisk`) that checks, compiles, and runs diff --git a/scripts/conformance.sh b/scripts/conformance.sh index 87b4c113..544cd628 100755 --- a/scripts/conformance.sh +++ b/scripts/conformance.sh @@ -60,8 +60,17 @@ for i, f in enumerate(files, 1): print(f' {i}/{len(files)}') " "$CONFORMANCE_DIR" + # Also fetch the OFFICIAL scorer (conformance/src/main.py). score.py runs + # upstream's own get_expected_errors + diff_expected_errors from this exact + # file — we never reimplement the algorithm. Cached under .tool/ (a subdir, + # so the *.py glob that collects fixtures never picks it up). + mkdir -p "$CONFORMANCE_DIR/.tool" + curl "${CURL_ARGS[@]}" \ + "https://raw.githubusercontent.com/${TYPING_REPO}/${TYPING_REF}/conformance/src/main.py" \ + -o "$CONFORMANCE_DIR/.tool/main.py" + echo "$TYPING_REF" > "$REF_STAMP_FILE" - ok "${COUNT} conformance files written to ${CONFORMANCE_DIR}/ (ref: ${TYPING_REF})" + ok "${COUNT} conformance files + official scorer written to ${CONFORMANCE_DIR}/ (ref: ${TYPING_REF})" } FETCH_ONLY=0 @@ -87,21 +96,40 @@ if [[ "${1:-}" == "--fetch" ]] || \ fi fetch_conformance else - COUNT=$(find "$CONFORMANCE_DIR" -name "*.py" | wc -l | tr -d ' ') + COUNT=$(find "$CONFORMANCE_DIR" -maxdepth 1 -name "*.py" | wc -l | tr -d ' ') ok "Conformance suite present ($COUNT files, ref ${TYPING_REF}) — skipping download" + # Self-heal: caches created before the scorer was added lack .tool/main.py. + # Fetch just the scorer so score.py never has to download it at test time. + if [[ ! -f "$CONFORMANCE_DIR/.tool/main.py" ]]; then + mkdir -p "$CONFORMANCE_DIR/.tool" + HEAL_ARGS=(-fsSL) + [[ -n "${GITHUB_TOKEN:-}" ]] && HEAL_ARGS+=(-H "Authorization: token ${GITHUB_TOKEN}") + curl "${HEAL_ARGS[@]}" \ + "https://raw.githubusercontent.com/${TYPING_REPO}/${TYPING_REF}/conformance/src/main.py" \ + -o "$CONFORMANCE_DIR/.tool/main.py" + ok "Fetched official scorer into existing cache" + fi fi if [[ "$FETCH_ONLY" -eq 1 ]]; then exit 0 fi -# ── Run the harness ────────────────────────────────────────────────────────── -header "Running PEP conformance harness" -echo "" +# ── Score with the OFFICIAL python/typing calculator ───────────────────────── +# We do NOT compute the score ourselves. conformance/score.py downloads +# python/typing's own conformance tool (pinned ref) and runs its real +# get_expected_errors + diff_expected_errors against the actual `basilisk` +# binary. No excluded diagnostic codes; a file passes only with an empty +# upstream errors_diff. +header "Building basilisk binary" +cargo build -p basilisk-cli --bin basilisk -cargo test --test conformance_tests -- --nocapture 2>&1 +header "Scoring with the official python/typing calculator" +echo "" +python3 conformance/score.py --bin target/debug/basilisk 2>&1 echo "" header "Done" -echo -e " See ${CYAN}docs/PEP_CONFORMANCE.md${RESET} for score interpretation and the road to 95%." +echo -e " Score computed by the REAL python/typing calculator (pinned ${TYPING_REF})." +echo -e " Per-file results: ${CYAN}conformance/conformance_status.csv${RESET}" echo "" diff --git a/website/src/assets/css/styles.css b/website/src/assets/css/styles.css index 4a964180..6fe3f6d6 100644 --- a/website/src/assets/css/styles.css +++ b/website/src/assets/css/styles.css @@ -964,6 +964,40 @@ button { cursor: pointer; font-family: inherit; border: none; background: none; .citations-list a:hover { color: var(--color-accent); } +/* ── Conformance methodology ───────────────────────────────── */ +.conformance-method { + list-style: decimal; + padding-left: 1.5rem; + max-width: 68ch; + font-size: 0.9375rem; + line-height: 1.65; + color: var(--color-text-secondary); +} +.conformance-method li + li { margin-top: var(--space-3); } +.conformance-method strong { color: var(--color-text-primary); font-weight: 600; } +.conformance-method code, +.conformance-method__note code { + font-family: var(--font-mono); + font-size: 0.85em; + color: var(--color-code-type); + background: var(--color-bg-elevated); + padding: 0.1em 0.35em; + border-radius: var(--radius-sm); + border: 1px solid var(--color-border); +} +.conformance-method__note { + margin-top: var(--space-6); + max-width: 68ch; + font-size: 0.875rem; + line-height: 1.6; + color: var(--color-text-muted); +} +.conformance-method__note a { + color: var(--color-accent); + text-decoration: underline; + text-underline-offset: 2px; +} + /* ── Responsive ────────────────────────────────────────────── */ @media (max-width: 640px) { .section-heading { font-size: 1.75rem; } diff --git a/website/src/docs/comparison.md b/website/src/docs/comparison.md index e125251b..6cef5800 100644 --- a/website/src/docs/comparison.md +++ b/website/src/docs/comparison.md @@ -40,7 +40,7 @@ Basilisk removes the choice. There is no permissive mode to fall back to. | Feature | Basilisk | Pyright | mypy | ty | Pyrefly | |---|---|---|---|---|---| | Strict by default | ✅ | ❌ opt-in | ❌ opt-in | ❌ opt-in | ❌ opt-in | -| PEP conformance¹ | 98.6% (→100% target) | ~99% | ~58% | early alpha | ~86% | +| PEP conformance¹ | 47.9% current (→100% target) | ~99% | ~58% | early alpha | ~86% | | Implementation | Rust | TypeScript | Python/C | Rust | Rust | | Runtime required | None | Node.js | Python | None | None | | Full LSP (completions, hover, goto) | ✅ | Pylance only | ❌ | Basic | Basic | @@ -158,7 +158,7 @@ Basilisk is not a faster version of an existing tool. It occupies a different po 5. WASM plugin system (planned) — extensible without forking, secure by design **Where Basilisk is not yet the best choice:** -- PEP conformance: Basilisk passes 98.6% of the official conformance suite (144/146). Pyright still covers more edge cases today. Basilisk's target is 100%; it's not there yet. +- PEP conformance: Basilisk currently passes 47.9% of the official conformance suite (70/146, errors-only), with 219 false positives and 36 missed required errors still being driven down. Pyright covers far more edge cases today. Basilisk's target is 100%; it's not there yet. - Plugin ecosystem: mypy's Django and SQLAlchemy plugins are mature. Basilisk's WASM plugins are planned. - Maturity: Pylance is feature-complete today (though proprietary and VS Code only). Basilisk is in alpha. diff --git a/website/src/docs/index.md b/website/src/docs/index.md index 8aec42d3..6f91f985 100644 --- a/website/src/docs/index.md +++ b/website/src/docs/index.md @@ -68,7 +68,7 @@ Basilisk is currently in **alpha** — the core checker, LSP server, and editor |---|---|---| | 1 | Parser, resolver, type checker, CLI | Complete | | 2 | LSP server, editor extensions (VS Code, Cursor, Zed, Neovim) | Complete | -| 3 | Expanded rule set, 98.6% PEP conformance, gradual adoption | In progress | +| 3 | Expanded rule set, PEP conformance push (currently 47.9%, target 100%), gradual adoption | In progress | | 4 | Ownership & immutability analysis (Mojo-inspired) | Planned | | 5 | WASM plugins, Django/Pydantic/SQLAlchemy | Planned | | 6 | 95%+ PEP, SARIF/JUnit, JetBrains extension | Planned | diff --git a/website/src/docs/rules/index.md b/website/src/docs/rules/index.md index 50ce730f..54d83ac0 100644 --- a/website/src/docs/rules/index.md +++ b/website/src/docs/rules/index.md @@ -17,7 +17,7 @@ Every Basilisk diagnostic has a unique code in the format `BSK-EXXXX` (error) or Rules are enabled by default. You can dial individual rules down per-file or per-path from your editor or `pyproject.toml` — strict is the default, not a cage. -Basilisk ships **155 diagnostic codes** (150 errors, 5 warnings) spanning the full Python typing surface — generics, protocols, dataclasses, TypedDicts, overloads, literals, enums, and more — and is validated against the [official Python typing conformance suite](https://github.com/python/typing/blob/main/conformance/results/results.html) (currently **98.6%**, 144 / 146). The two foundational groups have worked examples: +Basilisk ships **155 diagnostic codes** (150 errors, 5 warnings) spanning the full Python typing surface — generics, protocols, dataclasses, TypedDicts, overloads, literals, enums, and more — and is scored by the [official Python typing conformance suite](https://github.com/python/typing/blob/main/conformance/results/results.html) (currently **47.9%**, 70 / 146 errors-only; target 100%). The two foundational groups have worked examples: | Group | Codes | Description | |---|---|---| diff --git a/website/src/index.njk b/website/src/index.njk index d775a0b4..431b23a8 100644 --- a/website/src/index.njk +++ b/website/src/index.njk @@ -252,19 +252,19 @@ benchmarkStrings:
- 98.6% + 47.9% PEP conformance score

04 — PEP conformance

-

144 of 146 tests passing.
Target: 100%.

+

70 of 146 tests passing (47.9%).
Target: 100%.

- Tested against the official Python typing conformance suite — - the same suite used to measure Pyright (~99%¹), + Scored by the official Python typing conformance suite — + the same harness used to measure Pyright (~99%¹), mypy (~58%¹), and Pyrefly (~86%¹). - 19 of 21 categories pass at 100%, with zero false positives. The remaining two — a TypeVarTuple generics case and one protocol-definition case — are next. + Today 3 of 22 categories pass at 100%; the suite reports 219 false-positive diagnostics and 36 missed required errors, and we are driving both to zero. 100% is the target, not a present-day claim — this is honest, in-progress work, measured by a scorer we don’t control.

@@ -372,7 +372,7 @@ benchmarkStrings: PEP conformance ¹ - 98.6% (144/146) + 47.9% (70/146, target 100%) ~99% ~58% full-pass alpha @@ -440,6 +440,51 @@ benchmarkStrings: + +
+
+ +

A scorer we don’t control.

+

+ Our 47.9% is not graded by us. We download and run python/typing’s own + conformance tooling — the exact harness that grades pyright, mypy, + pyrefly, ty, zuban, and pycroscope. +

+
    +
  1. + We pin python/typing’s conformance tool to commit 268d0c4e and + run its get_expected_errors and diff_expected_errors + functions unmodified — we do not reimplement or relax the scorer. +
  2. +
  3. + For each of the 146 test files, the real basilisk check output is + compared against the suite’s # E annotations. Every + severity == error diagnostic counts — no diagnostic + codes are excluded. +
  4. +
  5. + A file passes only with zero discrepancies: every + # E line gets an error, every # E[tag] group is + satisfied, and no error lands on a line the suite does not mark. + One extra diagnostic (a false positive) fails the whole file. +
  6. +
  7. + Today that yields 70 of 146 files passing (47.9%, errors-only), + with 219 false positives and 36 missed required errors still to clear. A stricter + errors-plus-warnings variant scores 40.4% (59/146). The headline figure is the + errors-only 47.9%. +
  8. +
+

+ Full methodology and annotation rules are documented in the + python/typing conformance README. + 100% remains the target — not a claim we make today. +

+
+
+ diff --git a/website/src/zh/docs/comparison.md b/website/src/zh/docs/comparison.md index 26afde5c..b1fc5f8d 100644 --- a/website/src/zh/docs/comparison.md +++ b/website/src/zh/docs/comparison.md @@ -35,7 +35,7 @@ Basilisk 消除了这个选择。没有宽松模式可以回退。 | 功能 | Basilisk | Pyright | mypy | ty | Pyrefly | |---|---|---|---|---|---| | 默认严格 | ✅ | ❌ 选择加入 | ❌ 选择加入 | ❌ 选择加入 | ❌ 选择加入 | -| PEP 符合性¹ | 98.6%(目标 →100%) | ~99% | ~58% | 早期 alpha | ~86% | +| PEP 符合性¹ | 当前 47.9%(目标 →100%) | ~99% | ~58% | 早期 alpha | ~86% | | 实现语言 | Rust | TypeScript | Python/C | Rust | Rust | | 需要运行时 | 无 | Node.js | Python | 无 | 无 | | 完整 LSP(补全、悬停、跳转) | ✅ | 仅 Pylance | ❌ | 基础 | 基础 | @@ -153,7 +153,7 @@ Basilisk 不是现有工具的更快版本。它占据了不同的位置: 5. WASM 插件系统(计划中)——无需分叉即可扩展,设计安全 **Basilisk 尚不是最佳选择的地方:** -- PEP 符合性:Basilisk 通过官方符合性套件的 98.6%(144/146)。Pyright 今天覆盖更多边缘情况。Basilisk 的目标是 100%;还未达到。 +- PEP 符合性:Basilisk 当前通过官方符合性套件的 47.9%(70/146,仅错误),仍有 219 处误报和 36 处遗漏的必需错误正在被压低。Pyright 今天覆盖远更多边缘情况。Basilisk 的目标是 100%;还未达到。 - 插件生态系统:mypy 的 Django 和 SQLAlchemy 插件已经成熟。Basilisk 的 WASM 插件是计划中的。 - 成熟度:Pylance 今天功能完整(虽然是专有的,且仅限 VS Code)。Basilisk 处于 alpha 阶段。 diff --git a/website/src/zh/docs/index.md b/website/src/zh/docs/index.md index adfa6d3d..f296db36 100644 --- a/website/src/zh/docs/index.md +++ b/website/src/zh/docs/index.md @@ -63,7 +63,7 @@ Basilisk 目前处于 **alpha**——核心检查器、LSP 服务器和编辑器 |---|---|---| | 1 | 解析器、解析器、类型检查器、CLI | 完成 | | 2 | LSP 服务器、编辑器扩展(VS Code、Cursor、Zed、Neovim) | 完成 | -| 3 | 扩展规则集,98.6% PEP 符合性,渐进式采用 | 进行中 | +| 3 | 扩展规则集,PEP 符合性攻坚(当前 47.9%,目标 100%),渐进式采用 | 进行中 | | 4 | 所有权与不可变性分析(Mojo 启发) | 计划中 | | 5 | WASM 插件,Django/Pydantic/SQLAlchemy | 计划中 | | 6 | 95%+ PEP,SARIF/JUnit,JetBrains 扩展 | 计划中 | diff --git a/website/src/zh/docs/rules/index.md b/website/src/zh/docs/rules/index.md index a36ddcd6..a6d15766 100644 --- a/website/src/zh/docs/rules/index.md +++ b/website/src/zh/docs/rules/index.md @@ -12,7 +12,7 @@ lang: zh 规则默认全部启用。您可以通过编辑器或 `pyproject.toml`,按文件或路径将单个规则调低——严格是默认值,而不是牢笼。 -Basilisk 内置 **155 个诊断代码**(150 个错误,5 个警告),覆盖完整的 Python 类型表面(泛型、协议、dataclass、TypedDict、重载、字面量、枚举等),通过[官方 Python 类型符合性套件](https://github.com/python/typing/blob/main/conformance/results/results.html)验证(当前符合率 **98.6%**,144 / 146)。下面记录了两个基础组;完整集合由检查器强制执行。 +Basilisk 内置 **155 个诊断代码**(150 个错误,5 个警告),覆盖完整的 Python 类型表面(泛型、协议、dataclass、TypedDict、重载、字面量、枚举等),由[官方 Python 类型符合性套件](https://github.com/python/typing/blob/main/conformance/results/results.html)评分(当前符合率 **47.9%**,70 / 146,仅错误;目标 100%)。下面记录了两个基础组;完整集合由检查器强制执行。 | 组 | 代码 | 描述 | |---|---|---| diff --git a/website/src/zh/index.njk b/website/src/zh/index.njk index 4ceb0dae..b1013491 100644 --- a/website/src/zh/index.njk +++ b/website/src/zh/index.njk @@ -263,19 +263,19 @@ benchmarkStrings:
- 98.6% + 47.9% PEP 符合性得分

04 — PEP 符合性

-

146 个测试中 144 个通过。
目标:100%。

+

146 个测试中 70 个通过(47.9%)。
目标:100%。

- 通过官方 Python 类型符合性套件测试—— + 由官方 Python 类型符合性套件评分—— 与衡量 Pyright(约 99%¹)、 mypy(约 58%¹) 和 Pyrefly(约 86%¹)的套件相同。 - 21 个类别中的 19 个达到 100%,且零误报。剩余两个 —— 一个 TypeVarTuple 泛型用例和一个协议定义用例 —— 是下一步。 + 目前 22 个类别中有 3 个达到 100%;套件报告 219 处误报和 36 处遗漏的必需错误,我们正将两者都降到零。100% 是目标,而非当下的宣称 —— 这是诚实、进行中的工作,由我们无法操控的评分器衡量。

@@ -382,7 +382,7 @@ benchmarkStrings: PEP 符合性 ¹ - 98.6% (144/146) + 47.9% (70/146,目标 100%) ~99% ~58% 完全通过 alpha @@ -450,6 +450,47 @@ benchmarkStrings: + +
+
+ +

一个我们无法操控的评分器。

+

+ 我们的 47.9% 不是我们自己打的分。我们下载并运行 python/typing 自己的符合性工具—— + 正是用来为 pyright、mypy、pyrefly、ty、zuban 和 pycroscope 打分的同一套程序。 +

+
    +
  1. + 我们将 python/typing 的符合性工具固定在提交 268d0c4e,并 + 原样运行其 get_expected_errors 与 + diff_expected_errors 函数——我们既不重新实现也不放宽评分器。 +
  2. +
  3. + 对全部 146 个测试文件,真实的 basilisk check 输出会与套件的 + # E 注解逐一比对。每一个 severity == error 诊断都计入—— + 不排除任何诊断代码。 +
  4. +
  5. + 文件只有在零差异时才通过:每个 # E 行都得到一个错误, + 每个 # E[tag] 组都被满足,且没有错误落在套件未标记的行上。 + 哪怕多出一个诊断(一处误报)也会使整个文件失败。 +
  6. +
  7. + 今天的结果是 146 个文件中 70 个通过(47.9%,仅错误), + 还有 219 处误报和 36 处遗漏的必需错误待清除。更严格的「错误加警告」变体得分为 + 40.4%(59/146)。头条数字是仅错误的 47.9%。 +
  8. +
+

+ 完整方法论与注解规则记录在 + python/typing 符合性 README。 + 100% 仍是目标——而非我们当下作出的宣称。 +

+
+
+ From 3f1a15eb3c5f7394034f458a7f5f9831007b5d6d Mon Sep 17 00:00:00 2001 From: Christian Findlay <16697547+MelbourneDeveloper@users.noreply.github.com> Date: Tue, 23 Jun 2026 20:46:42 +1000 Subject: [PATCH 03/12] fixes --- conformance/conformance_status.csv | 66 +++++++++---------- conformance/score.py | 18 +++-- coverage-thresholds.json | 8 +-- docs/plans/CHECK-ELIMINATE-FALSE-POSITIVES.md | 2 +- docs/plans/CHECKER-PEP-CONFORMANCE-PLAN.md | 4 +- .../CHECKER-TYPE-NARROWING-INFERENCE-PLAN.md | 2 +- docs/plans/FP-REMAINING-NOTES.md | 2 +- docs/plans/LSP-PLAN.md | 2 +- docs/plans/ROADMAP-NEXT-STEPS-PLAN.md | 2 +- docs/specs/CHECKER-ARCHITECTURE-SPEC.md | 20 +++--- docs/specs/CHECKER-TYPE-INFERENCE-SPEC.md | 2 +- website/src/docs/comparison.md | 4 +- website/src/docs/index.md | 2 +- website/src/docs/rules/index.md | 2 +- website/src/index.njk | 18 ++--- website/src/zh/docs/comparison.md | 4 +- website/src/zh/docs/index.md | 2 +- website/src/zh/docs/rules/index.md | 2 +- website/src/zh/index.njk | 16 ++--- 19 files changed, 95 insertions(+), 83 deletions(-) diff --git a/conformance/conformance_status.csv b/conformance/conformance_status.csv index 186a2b84..5233c37c 100644 --- a/conformance/conformance_status.csv +++ b/conformance/conformance_status.csv @@ -4,7 +4,7 @@ basilisk_rules,file,category,status,caught,missed,false_positives ,_enums_members.py,enums,PASS,0,0,0 ,_protocols_modules1.py,protocols,PASS,0,0,0 ,_protocols_modules2.py,protocols,PASS,0,0,0 -,_qualifiers_final_annotation_1.py,qualifiers,PASS,0,0,0 +BSK-W0050,_qualifiers_final_annotation_1.py,qualifiers,FAIL,0,0,1 ,_qualifiers_final_annotation_2.py,qualifiers,PASS,0,0,0 BSK-E0002|BSK-E0048,aliases_explicit.py,aliases,FAIL,21,0,2 BSK-E0002|BSK-E0047|BSK-E0048|BSK-E0092,aliases_implicit.py,aliases,FAIL,22,0,3 @@ -17,20 +17,20 @@ BSK-E0107,aliases_variance.py,aliases,PASS,4,0,0 BSK-E0002|BSK-E0047,annotations_forward_refs.py,annotations,FAIL,19,0,1 BSK-E0120|BSK-E0131,annotations_generators.py,annotations,PASS,10,0,0 ,annotations_methods.py,annotations,PASS,0,0,0 -BSK-E0002|BSK-E0024|BSK-E0047|BSK-E0048,annotations_typeexpr.py,annotations,FAIL,15,0,2 -BSK-E0002|BSK-E0014|BSK-E0015|BSK-E0122|BSK-E0140,callables_annotation.py,callables,FAIL,16,0,2 +BSK-E0002|BSK-E0011|BSK-E0024|BSK-E0047|BSK-E0048,annotations_typeexpr.py,annotations,FAIL,15,0,3 +BSK-E0002|BSK-E0011|BSK-E0014|BSK-E0015|BSK-E0122|BSK-E0140,callables_annotation.py,callables,FAIL,16,0,5 BSK-E0012|BSK-E0140|BSK-E0141,callables_kwargs.py,callables,PASS,13,0,0 -BSK-E0001|BSK-E0002|BSK-E0140,callables_protocol.py,callables,FAIL,17,0,6 +BSK-E0001|BSK-E0002|BSK-E0011|BSK-E0140,callables_protocol.py,callables,FAIL,17,0,18 BSK-E0002|BSK-E0014|BSK-E0136,callables_subtyping.py,callables,FAIL,32,0,9 -BSK-E0001|BSK-E0002|BSK-E0011|BSK-E0014|BSK-E0036|BSK-E0044|BSK-E0121,classes_classvar.py,classes,FAIL,17,0,3 -BSK-E0002,classes_override.py,classes,FAIL,0,5,2 -BSK-E0111|BSK-E0128,constructors_call_init.py,constructors,PASS,5,0,0 +BSK-E0001|BSK-E0002|BSK-E0011|BSK-E0014|BSK-E0036|BSK-E0044|BSK-E0121|BSK-W0050,classes_classvar.py,classes,FAIL,17,0,4 +BSK-E0002|BSK-E0011,classes_override.py,classes,FAIL,0,5,4 +BSK-E0011|BSK-E0111|BSK-E0128,constructors_call_init.py,constructors,FAIL,5,0,1 BSK-E0002|BSK-E0004|BSK-E0011|BSK-E0041,constructors_call_metaclass.py,constructors,FAIL,2,0,4 BSK-E0002|BSK-E0004|BSK-E0011|BSK-E0074,constructors_call_new.py,constructors,FAIL,2,0,11 BSK-E0002|BSK-E0004|BSK-E0144,constructors_call_type.py,constructors,FAIL,8,0,8 -BSK-E0004|BSK-E0153,constructors_callable.py,constructors,FAIL,12,0,1 +BSK-E0004|BSK-E0011|BSK-E0153,constructors_callable.py,constructors,FAIL,12,0,4 ,constructors_consistency.py,constructors,PASS,0,0,0 -,dataclasses_descriptors.py,dataclasses,PASS,0,0,0 +BSK-E0011,dataclasses_descriptors.py,dataclasses,FAIL,0,0,6 BSK-E0054,dataclasses_final.py,dataclasses,PASS,5,0,0 BSK-E0052,dataclasses_frozen.py,dataclasses,PASS,2,0,0 BSK-E0001|BSK-E0063,dataclasses_hash.py,dataclasses,FAIL,4,0,1 @@ -40,17 +40,17 @@ BSK-E0005|BSK-E0059,dataclasses_match_args.py,dataclasses,FAIL,1,0,1 BSK-E0060,dataclasses_order.py,dataclasses,PASS,1,0,0 BSK-E0095,dataclasses_postinit.py,dataclasses,PASS,4,0,0 BSK-E0002|BSK-E0005|BSK-E0108,dataclasses_slots.py,dataclasses,FAIL,4,1,5 -BSK-E0142,dataclasses_transform_class.py,dataclasses,PASS,6,0,0 +BSK-E0011|BSK-E0142,dataclasses_transform_class.py,dataclasses,FAIL,6,0,1 BSK-E0142,dataclasses_transform_converter.py,dataclasses,PASS,9,0,0 -BSK-E0069,dataclasses_transform_field.py,dataclasses,PASS,2,0,0 -BSK-E0014|BSK-E0052|BSK-E0060|BSK-E0069|BSK-E0111,dataclasses_transform_func.py,dataclasses,PASS,5,0,0 -BSK-E0004|BSK-E0138,dataclasses_transform_meta.py,dataclasses,FAIL,6,0,1 +BSK-E0011|BSK-E0069,dataclasses_transform_field.py,dataclasses,FAIL,2,0,4 +BSK-E0011|BSK-E0014|BSK-E0052|BSK-E0060|BSK-E0069|BSK-E0111,dataclasses_transform_func.py,dataclasses,FAIL,5,0,1 +BSK-E0004|BSK-E0011|BSK-E0138,dataclasses_transform_meta.py,dataclasses,FAIL,6,0,2 BSK-E0002|BSK-E0005|BSK-E0041|BSK-E0069|BSK-E0096,dataclasses_usage.py,dataclasses,FAIL,8,3,3 -BSK-E0002|BSK-E0039|BSK-E0053,directives_assert_type.py,directives,FAIL,7,0,2 +BSK-E0002|BSK-E0011|BSK-E0039|BSK-E0053,directives_assert_type.py,directives,FAIL,7,0,3 BSK-E0031,directives_cast.py,directives,PASS,3,0,0 BSK-E0115,directives_deprecated.py,directives,PASS,12,0,0 BSK-E0011|BSK-E0012|BSK-E0013|BSK-E0041,directives_no_type_check.py,directives,FAIL,1,0,1 -BSK-E0002|BSK-E0033,directives_reveal_type.py,directives,FAIL,2,0,1 +BSK-E0002|BSK-E0011|BSK-E0033,directives_reveal_type.py,directives,FAIL,2,0,1 ,directives_type_checking.py,directives,PASS,0,0,0 ,directives_type_ignore.py,directives,PASS,0,0,0 ,directives_type_ignore_file1.py,directives,PASS,0,0,0 @@ -61,23 +61,23 @@ BSK-E0040,enums_behaviors.py,enums,FAIL,1,2,0 BSK-E0002|BSK-E0061,enums_expansion.py,enums,FAIL,1,0,4 BSK-E0002,enums_member_names.py,enums,FAIL,0,0,2 BSK-E0002|BSK-E0066,enums_member_values.py,enums,FAIL,2,0,4 -BSK-E0002|BSK-E0046|BSK-E0067,enums_members.py,enums,FAIL,7,0,1 +BSK-E0002|BSK-E0046|BSK-E0067|BSK-W0040,enums_members.py,enums,FAIL,7,0,2 BSK-E0001|BSK-E0011,exceptions_context_managers.py,exceptions,FAIL,0,0,6 BSK-E0002|BSK-E0027|BSK-E0047|BSK-E0092|BSK-E0132|BSK-E0134,generics_base_class.py,generics,FAIL,7,0,3 -BSK-E0002|BSK-E0026|BSK-E0027|BSK-E0043|BSK-E0148,generics_basic.py,generics,FAIL,13,0,3 +BSK-E0002|BSK-E0011|BSK-E0026|BSK-E0027|BSK-E0043|BSK-E0148,generics_basic.py,generics,FAIL,13,0,5 BSK-E0002|BSK-E0030|BSK-E0091|BSK-E0092,generics_defaults.py,generics,FAIL,5,1,6 BSK-E0002|BSK-E0102|BSK-E0128|BSK-E0130,generics_defaults_referential.py,generics,FAIL,7,0,1 BSK-E0002|BSK-E0014|BSK-E0092,generics_defaults_specialization.py,generics,FAIL,3,0,1 BSK-E0026|BSK-E0047,generics_paramspec_basic.py,generics,PASS,7,0,0 -BSK-E0122,generics_paramspec_components.py,generics,PASS,16,0,0 +BSK-E0011|BSK-E0122,generics_paramspec_components.py,generics,PASS,16,0,0 BSK-E0122,generics_paramspec_semantics.py,generics,PASS,9,0,0 BSK-E0092|BSK-E0122,generics_paramspec_specialization.py,generics,PASS,5,0,0 BSK-E0117|BSK-E0130,generics_scoping.py,generics,FAIL,10,4,0 -,generics_self_advanced.py,generics,PASS,0,0,0 +BSK-W0050,generics_self_advanced.py,generics,FAIL,0,0,1 BSK-E0075,generics_self_attributes.py,generics,PASS,2,0,0 BSK-E0078,generics_self_basic.py,generics,PASS,3,0,0 -BSK-E0077,generics_self_protocols.py,generics,PASS,2,0,0 -BSK-E0025|BSK-E0078|BSK-E0094,generics_self_usage.py,generics,FAIL,11,0,1 +BSK-E0077|BSK-W0050,generics_self_protocols.py,generics,FAIL,2,0,4 +BSK-E0011|BSK-E0025|BSK-E0078|BSK-E0094|BSK-W0050,generics_self_usage.py,generics,FAIL,11,0,3 BSK-E0042,generics_syntax_compatibility.py,generics,PASS,2,0,0 BSK-E0002|BSK-E0043|BSK-E0089|BSK-E0105,generics_syntax_declarations.py,generics,FAIL,10,0,1 BSK-E0002|BSK-E0055|BSK-E0130,generics_syntax_infer_variance.py,generics,FAIL,18,0,4 @@ -87,15 +87,15 @@ BSK-E0002|BSK-E0085,generics_typevartuple_args.py,generics,FAIL,8,0,1 BSK-E0002|BSK-E0055|BSK-E0083|BSK-E0084|BSK-E0085|BSK-E0086,generics_typevartuple_basic.py,generics,FAIL,13,1,3 BSK-E0002|BSK-E0082,generics_typevartuple_callable.py,generics,FAIL,1,0,1 BSK-E0002,generics_typevartuple_concat.py,generics,FAIL,0,0,2 -BSK-E0002,generics_typevartuple_overloads.py,generics,FAIL,0,0,1 +BSK-E0002|BSK-E0011,generics_typevartuple_overloads.py,generics,FAIL,0,0,2 BSK-E0002|BSK-E0086|BSK-E0130|BSK-E0139,generics_typevartuple_specialization.py,generics,FAIL,6,0,11 BSK-E0002|BSK-E0081,generics_typevartuple_unpack.py,generics,FAIL,1,0,2 BSK-E0002|BSK-E0026|BSK-E0055|BSK-E0080,generics_upper_bound.py,generics,FAIL,3,1,1 BSK-E0055|BSK-E0107,generics_variance.py,generics,PASS,9,0,0 BSK-E0002|BSK-E0130,generics_variance_inference.py,generics,FAIL,23,0,3 BSK-E0071,historical_positional.py,historical,PASS,4,0,0 -BSK-E0002|BSK-E0127,literals_interactions.py,literals,FAIL,4,0,2 -BSK-E0002|BSK-E0014|BSK-E0051|BSK-E0109|BSK-E0126|BSK-E0129,literals_literalstring.py,literals,FAIL,9,0,9 +BSK-E0002|BSK-E0011|BSK-E0127,literals_interactions.py,literals,FAIL,4,0,3 +BSK-E0002|BSK-E0011|BSK-E0014|BSK-E0051|BSK-E0109|BSK-E0126|BSK-E0129,literals_literalstring.py,literals,FAIL,9,0,10 BSK-E0002|BSK-E0014|BSK-E0051|BSK-E0068|BSK-E0117|BSK-E0129|BSK-E0130,literals_parameterizations.py,literals,FAIL,17,0,1 BSK-E0002|BSK-E0014|BSK-E0129,literals_semantics.py,literals,FAIL,4,0,4 BSK-E0111|BSK-E0116|BSK-E0143,namedtuples_define_class.py,namedtuples,PASS,14,0,0 @@ -104,28 +104,28 @@ BSK-E0073,namedtuples_type_compat.py,namedtuples,PASS,2,0,0 BSK-E0143,namedtuples_usage.py,namedtuples,PASS,8,0,0 BSK-E0002|BSK-E0011|BSK-E0101|BSK-E0112,narrowing_typeguard.py,narrowing,FAIL,4,0,2 BSK-E0002|BSK-E0011|BSK-E0101|BSK-E0112|BSK-E0113,narrowing_typeis.py,narrowing,FAIL,9,0,2 -BSK-E0072,overloads_basic.py,overloads,PASS,1,0,0 +BSK-E0011|BSK-E0072,overloads_basic.py,overloads,FAIL,1,0,1 ,overloads_consistency.py,overloads,FAIL,0,2,0 BSK-E0020|BSK-E0034,overloads_definitions.py,overloads,FAIL,0,7,0 -BSK-E0012|BSK-E0041|BSK-E0076,overloads_evaluation.py,overloads,PASS,4,0,0 -BSK-E0004|BSK-E0099|BSK-E0146,protocols_class_objects.py,protocols,FAIL,8,0,1 -BSK-E0001|BSK-E0011|BSK-E0036|BSK-E0097|BSK-E0121,protocols_definition.py,protocols,FAIL,21,0,8 +BSK-E0011|BSK-E0012|BSK-E0041|BSK-E0076,overloads_evaluation.py,overloads,FAIL,4,0,5 +BSK-E0004|BSK-E0011|BSK-E0099|BSK-E0146|BSK-W0050,protocols_class_objects.py,protocols,FAIL,8,0,3 +BSK-E0001|BSK-E0011|BSK-E0036|BSK-E0097|BSK-E0121,protocols_definition.py,protocols,FAIL,21,0,9 BSK-E0002|BSK-E0099|BSK-E0118|BSK-E0123|BSK-E0124,protocols_explicit.py,protocols,FAIL,6,0,3 BSK-E0002|BSK-E0130|BSK-E0137,protocols_generic.py,protocols,FAIL,9,0,3 BSK-E0002|BSK-E0098|BSK-E0099|BSK-E0121,protocols_merging.py,protocols,FAIL,6,0,1 BSK-E0079,protocols_modules.py,protocols,PASS,3,0,0 ,protocols_recursive.py,protocols,PASS,0,0,0 -BSK-E0002|BSK-E0114|BSK-E0119,protocols_runtime_checkable.py,protocols,FAIL,6,0,3 +BSK-E0002|BSK-E0011|BSK-E0114|BSK-E0119|BSK-W0050,protocols_runtime_checkable.py,protocols,FAIL,6,0,4 ,protocols_self.py,protocols,PASS,0,0,0 BSK-E0002|BSK-E0014|BSK-E0099,protocols_subtyping.py,protocols,FAIL,7,0,4 BSK-E0110|BSK-E0133,protocols_variance.py,protocols,PASS,5,0,0 BSK-E0045|BSK-E0058,qualifiers_annotated.py,qualifiers,PASS,20,0,0 -BSK-E0014|BSK-E0041|BSK-E0044|BSK-E0054|BSK-E0064,qualifiers_final_annotation.py,qualifiers,PASS,26,0,0 +BSK-E0014|BSK-E0041|BSK-E0044|BSK-E0054|BSK-E0064|BSK-W0050,qualifiers_final_annotation.py,qualifiers,FAIL,26,0,3 BSK-E0010|BSK-E0025|BSK-E0034,qualifiers_final_decorator.py,qualifiers,FAIL,3,3,1 -BSK-E0001|BSK-E0002,specialtypes_any.py,specialtypes,FAIL,0,0,2 -BSK-E0002|BSK-E0062|BSK-E0070,specialtypes_never.py,specialtypes,FAIL,3,0,2 +BSK-E0001|BSK-E0002|BSK-E0011,specialtypes_any.py,specialtypes,FAIL,0,0,3 +BSK-E0002|BSK-E0011|BSK-E0062|BSK-E0070,specialtypes_never.py,specialtypes,FAIL,3,0,2 BSK-E0002|BSK-E0012|BSK-E0014,specialtypes_none.py,specialtypes,FAIL,3,0,1 -BSK-E0002|BSK-E0065,specialtypes_promotions.py,specialtypes,FAIL,1,0,1 +BSK-E0002|BSK-E0065|BSK-W0050,specialtypes_promotions.py,specialtypes,FAIL,1,0,2 BSK-E0002|BSK-E0015|BSK-E0092|BSK-E0145,specialtypes_type.py,specialtypes,FAIL,9,0,6 BSK-E0002|BSK-E0014|BSK-E0023|BSK-E0045|BSK-E0147,tuples_type_compat.py,tuples,FAIL,16,0,11 BSK-E0011|BSK-E0014|BSK-E0049|BSK-E0090,tuples_type_form.py,tuples,FAIL,11,0,1 diff --git a/conformance/score.py b/conformance/score.py index 637732d3..3bb2cd5b 100644 --- a/conformance/score.py +++ b/conformance/score.py @@ -17,10 +17,12 @@ algorithm consumes. A file passes iff upstream's `errors_diff` is empty — upstream's exact rule: `"Fail" if errors_diff.strip() else "Pass"`. -No diagnostic codes are excluded. Every `severity == "error"` diagnostic -`basilisk check` emits is counted, including the strict-by-default completeness -rules. If one fires where the suite does not mark `# E`, that is a real false -positive and it fails the file — same as for any other checker. +No diagnostic codes are excluded. By default this counts EVERY diagnostic +`basilisk check` emits — both errors AND warnings — which is the strictest +grading and matches how the reference checker pyright is graded upstream +(`if kind not in ("error", "warning")`). Pass `--errors-only` for the looser +errors-only view. Either way, any diagnostic on a line the suite does not mark +`# E` is a real false positive and fails the file — same as for any checker. Usage: python3 conformance/score.py [--bin PATH] [--gate] [--count-warnings] @@ -282,7 +284,11 @@ def write_csv(root: Path, rows: list[Row]) -> None: def parse_args(argv: list[str]) -> dict: - opts: dict = {"bin": None, "gate": False, "warn": False, "dir": None, "offline": False} + # Default is the STRICTEST grading: every diagnostic basilisk emits (errors + # AND warnings) is counted as "an error was reported", which is also how the + # reference checker pyright is graded upstream. `--errors-only` reports the + # looser errors-only view. `--count-warnings` is accepted for back-compat. + opts: dict = {"bin": None, "gate": False, "warn": True, "dir": None, "offline": False} it = iter(argv) for a in it: if a == "--bin": @@ -291,6 +297,8 @@ def parse_args(argv: list[str]) -> dict: opts["gate"] = True elif a == "--count-warnings": opts["warn"] = True + elif a == "--errors-only": + opts["warn"] = False elif a == "--conformance-dir": opts["dir"] = next(it, None) elif a == "--offline": diff --git a/coverage-thresholds.json b/coverage-thresholds.json index bfa20d86..c3a899e3 100644 --- a/coverage-thresholds.json +++ b/coverage-thresholds.json @@ -41,9 +41,9 @@ } }, "conformance": { - "_doc": "Minimum PEP conformance pass percentage (files passing / total files), computed by the REAL python/typing conformance calculator (conformance/score.py downloads upstream main.py at the pinned ref and runs its own get_expected_errors + diff_expected_errors; NO excluded diagnostic codes). A file passes only when upstream's errors_diff is empty. Ratchet UP only. HONEST baseline (replacing a previously rigged 100% from a lenient in-repo harness): 70/146 = 47.9% (errors only), pinned to python/typing@268d0c4e. Target is 100%; this is the real current number.", - "threshold": 47, - "_fp_ceiling_doc": "Maximum total false-positive diagnostics across the suite (errors Basilisk reports on lines the suite does NOT mark with # E, plus errors outside satisfied # E[tag] groups). Ratchet DOWN only \u2014 the mirror of the pass-percentage gate. Enforced via conformance/score.py --gate (run by conformance_tests.rs inside make test). HONEST baseline: 219 (the prior 0 was a lie produced by excluding 9 diagnostic codes from scoring). Drive this DOWN.", - "max_false_positives": 219 + "_doc": "Minimum PEP conformance pass percentage (files passing / total files), computed by the REAL python/typing conformance calculator (conformance/score.py downloads upstream main.py at the pinned ref and runs its own get_expected_errors + diff_expected_errors; NO excluded diagnostic codes). A file passes only when upstream's errors_diff is empty. The score uses the STRICTEST grading: every basilisk diagnostic (errors AND warnings) counts, matching how the reference checker pyright is graded. Ratchet UP only. HONEST baseline (replacing a previously rigged 100% from a lenient in-repo harness): 59/146 = 40.4%, pinned to python/typing@268d0c4e. (The looser errors-only view is 70/146 = 47.9%, available via score.py --errors-only.) Target is 100%; this is the real current number.", + "threshold": 40, + "_fp_ceiling_doc": "Maximum total false-positive diagnostics across the suite (diagnostics Basilisk reports on lines the suite does NOT mark with # E, plus diagnostics outside satisfied # E[tag] groups) under the strictest errors+warnings grading. Ratchet DOWN only \u2014 the mirror of the pass-percentage gate. Enforced via conformance/score.py --gate (run by conformance_tests.rs inside make test). HONEST baseline: 285 (the prior 0 was a lie produced by excluding 9 diagnostic codes from scoring). Drive this DOWN.", + "max_false_positives": 285 } } diff --git a/docs/plans/CHECK-ELIMINATE-FALSE-POSITIVES.md b/docs/plans/CHECK-ELIMINATE-FALSE-POSITIVES.md index 71db5acc..5d00cd51 100644 --- a/docs/plans/CHECK-ELIMINATE-FALSE-POSITIVES.md +++ b/docs/plans/CHECK-ELIMINATE-FALSE-POSITIVES.md @@ -8,7 +8,7 @@ > `get_expected_errors` + `diff_expected_errors`; see [CHKARCH-CONFORMANCE]). > A file passes only with an **empty upstream `errors_diff`** (false positives > fail the file), and **no diagnostic codes are excluded**. Honest current -> baseline: **70/146 = 47.9%**, **219 false positives**, 36 missed. The +> baseline: **59/146 = 40.4%**, **285 false positives**, 36 missed. The > still-valid part of this plan is the *strategy* — driving specific rules' > false positives down; the *counts* below are stale. diff --git a/docs/plans/CHECKER-PEP-CONFORMANCE-PLAN.md b/docs/plans/CHECKER-PEP-CONFORMANCE-PLAN.md index a9913bd3..0ea3bd80 100644 --- a/docs/plans/CHECKER-PEP-CONFORMANCE-PLAN.md +++ b/docs/plans/CHECKER-PEP-CONFORMANCE-PLAN.md @@ -4,8 +4,8 @@ > 93.84%", category "100%" rows) came from a since-removed in-repo harness that > excluded 9 diagnostic codes and ignored false positives. The score is now > computed by the **real `python/typing` calculator** (`conformance/score.py`, -> see [CHKARCH-CONFORMANCE]); the honest current number is **70/146 = 47.9%** -> (errors-only), 219 false positives, 36 missed. Treat the figures below as +> see [CHKARCH-CONFORMANCE]); the honest current number is **59/146 = 40.4%** +> (errors+warnings, strictest), 285 false positives, 36 missed. Treat the figures below as > historical task notes, not the live score. > > **Run**: `make conformance` · **Status CSV**: `conformance/conformance_status.csv` diff --git a/docs/plans/CHECKER-TYPE-NARROWING-INFERENCE-PLAN.md b/docs/plans/CHECKER-TYPE-NARROWING-INFERENCE-PLAN.md index 4d871c86..bd2c4217 100644 --- a/docs/plans/CHECKER-TYPE-NARROWING-INFERENCE-PLAN.md +++ b/docs/plans/CHECKER-TYPE-NARROWING-INFERENCE-PLAN.md @@ -418,5 +418,5 @@ Phases 1 and 2 are independent and can be parallelized. Phase 3 depends on Phase - [x] 5a. E0014 — `VarCheckContext` with `SubtypeContext`, uses `is_subtype_with_context()` for assignability - [x] 5b. E0013 — `SubtypeContext` passed to `check_function()`, removed `contains_named` early exit for Named types - [x] 5c. E0053 — `is_likely_narrowed()` heuristic suppresses narrowing-dependent FPs; Union normalization in `types_match()` - - [x] 5d. Full conformance suite verification — the "**18 FPs** (target < 71)" result came from a rigged in-repo harness and is FALSE; the official `python/typing` scorer reports **219 false positives** (70/146 files passing, 47.9% errors-only). Driving FPs down remains active work. + - [x] 5d. Full conformance suite verification — the "**18 FPs** (target < 71)" result came from a rigged in-repo harness and is FALSE; the official `python/typing` scorer reports **285 false positives** (59/146 files passing, 40.4% counting errors+warnings). Driving FPs down remains active work. - [x] Checker-side modules: `narrowing.rs` (NarrowingContext), `expr_inference.rs` (ExpressionInferrer), `constraint_solver.rs` (ConstraintSolver) diff --git a/docs/plans/FP-REMAINING-NOTES.md b/docs/plans/FP-REMAINING-NOTES.md index aaa80de3..55ce8aa5 100644 --- a/docs/plans/FP-REMAINING-NOTES.md +++ b/docs/plans/FP-REMAINING-NOTES.md @@ -101,7 +101,7 @@ structural matcher (positive-match semantics already reject `float`→`str`). - NOTE: the "144/146 / suite FP 21→11" figures below were produced by a rigged in-repo harness (excluded 9 codes, ignored false positives) and are FALSE. The official `python/typing` scorer (run unmodified, pinned commit) reports - **70/146 passing (47.9%, errors-only), 219 false positives, 36 missed errors**. + **59/146 passing (40.4%, errors+warnings strictest), 285 false positives, 36 missed errors**. Treat the per-lane numbers below as historical, not verified. - B3 lane (E0111/E0143/E0115) = DONE, (legacy/rigged) figures: 144/146, caught=917, missed=37 (unchanged, both pre-failing files), suite FP 21→11. diff --git a/docs/plans/LSP-PLAN.md b/docs/plans/LSP-PLAN.md index 9877aa02..4e9f8b33 100644 --- a/docs/plans/LSP-PLAN.md +++ b/docs/plans/LSP-PLAN.md @@ -6,7 +6,7 @@ ## Status -Phases 0–6 are COMPLETE. Phase 7 (cross-module foundation) is MOSTLY COMPLETE — stub infrastructure, import graph, cross-file symbols all operational. Phase 3.5 (PEP conformance push) is ACTIVE — the official `python/typing` scorer (run unmodified, pinned commit) currently reports **70/146 files passing (47.9%, errors-only)**, with 219 false positives and 36 missed required errors still to clear. (Earlier in-repo figures such as "124/146, 18 FPs" came from a rigged harness that excluded codes and ignored false positives; they are superseded.) +Phases 0–6 are COMPLETE. Phase 7 (cross-module foundation) is MOSTLY COMPLETE — stub infrastructure, import graph, cross-file symbols all operational. Phase 3.5 (PEP conformance push) is ACTIVE — the official `python/typing` scorer (run unmodified, pinned commit) currently reports **59/146 files passing (40.4%, errors+warnings strictest)**, with 285 false positives and 36 missed required errors still to clear. (Earlier in-repo figures such as "124/146, 18 FPs" came from a rigged harness that excluded codes and ignored false positives; they are superseded.) --- diff --git a/docs/plans/ROADMAP-NEXT-STEPS-PLAN.md b/docs/plans/ROADMAP-NEXT-STEPS-PLAN.md index c823397f..6f1d0af7 100644 --- a/docs/plans/ROADMAP-NEXT-STEPS-PLAN.md +++ b/docs/plans/ROADMAP-NEXT-STEPS-PLAN.md @@ -113,7 +113,7 @@ The bar to credibly displace Pylance is feature *and* correctness parity on the actually feel day to day. Rough priorities (refine with human judgment — see TODO): - **Conformance & correctness**: per the official `python/typing` scorer (run unmodified, pinned - commit), PEP conformance is currently **70/146 files PASS (47.9%, errors-only)**, with **219 false + commit), PEP conformance is currently **59/146 files PASS (40.4%, errors+warnings strictest)**, with **285 false positives** and 36 missed required errors. (Earlier "135/146 / ~18 FPs" figures came from a rigged in-repo harness that excluded codes and ignored false positives; they are superseded.) Failing files cluster in Protocols, Callables, TypeVarTuple, ParamSpec, TypedDicts. FPs hurt credibility more diff --git a/docs/specs/CHECKER-ARCHITECTURE-SPEC.md b/docs/specs/CHECKER-ARCHITECTURE-SPEC.md index 68f1e63d..3ff8405a 100644 --- a/docs/specs/CHECKER-ARCHITECTURE-SPEC.md +++ b/docs/specs/CHECKER-ARCHITECTURE-SPEC.md @@ -58,7 +58,7 @@ See the project README for competitive analysis. | Implementation | TypeScript | Python/C | Rust | Rust | Rust | Rust | **Rust** | | License | MIT | MIT | MIT | MIT | AGPL | MIT | **MIT** | | Default strictness | Gradual | Gradual | Gradual | Gradual | Gradual | N/A | **Strict only** | -| PEP conformance (current) | ~95% | ~85% | ~15% | ~58% | ~69% | N/A | **47.9%** | +| PEP conformance (current) | ~95% | ~85% | ~15% | ~58% | ~69% | N/A | **40.4%** | | PEP conformance target | — | — | — | — | — | N/A | **100%** | | LSP server | Yes | No | Yes | Yes | Yes | No | **Yes** | | Incremental computation | Lazy eval | Daemon | Salsa | Module-level | No | N/A | **Salsa** | @@ -286,7 +286,7 @@ The `# type:` prefix ensures compatibility with editors and tools that already r ### Python Typing PEP Coverage {#CHKARCH-PEPS} -Basilisk targets **100% conformance** with the Python typing specification. This is a target, not a present-day achievement: the official `python/typing` conformance scorer (pinned commit, run unmodified in CI) currently reports **70 of 146 files passing (47.9%, errors-only)**, with 219 false positives and 36 missed required errors still to clear. We run that suite in CI on every change and ratchet the pass rate up. +Basilisk targets **100% conformance** with the Python typing specification. This is a target, not a present-day achievement: the official `python/typing` conformance scorer (pinned commit, run unmodified in CI) currently reports **59 of 146 files passing (40.4%, counting errors and warnings — the strictest grading)**, with 285 false positives and 36 missed required errors still to clear. We run that suite in CI on every change and ratchet the pass rate up. #### Foundation PEPs {#CHKARCH-PEPS-FOUNDATION} @@ -1383,10 +1383,13 @@ checkers (pyright, mypy, pyrefly, ty, zuban, pycroscope) are graded with. `errors_diff` is empty — every `# E` line gets an error, every `# E[tag]` group is satisfied, and **no error lands on a line the suite does not mark**. `conformance_automated = "Fail" if errors_diff.strip() else "Pass"`. -- **No excluded codes.** Every `severity == "error"` diagnostic `basilisk check` - emits is counted, including the strict-by-default completeness rules - (E0001–E0005, E0010, E0011, E0023, E0025). One firing on an unannotated line - is a real false positive and fails the file — same as for any other checker. +- **No excluded codes.** By default the scorer counts **every** diagnostic + `basilisk check` emits — errors **and** warnings, the strictest grading and how + pyright is graded — including the strict-by-default completeness rules + (E0001–E0005, E0010, E0011, E0023, E0025). (`score.py` defaults to this strict + grading; pass `--errors-only` for the looser errors-only view.) One firing on an + unannotated line is a real false positive and fails the file — same as for any + other checker. - **Gate**: [`crates/basilisk-cli/tests/conformance_tests.rs`](../../crates/basilisk-cli/tests/conformance_tests.rs) is a thin wrapper that runs `score.py --gate` inside `make test`. The pass-percentage floor and false-positive ceiling live in @@ -1395,8 +1398,9 @@ checkers (pyright, mypy, pyrefly, ty, zuban, pycroscope) are graded with. **down**. Per-file results are written to `conformance/conformance_status.csv`. - **Honest baseline** (replacing a previously rigged in-repo harness that excluded the 9 codes above and ignored false positives to fake 100%): - **70 / 146 = 47.9%** (errors-only), 219 false positives, 36 missed required - errors. The errors+warnings variant is 59 / 146 = 40.4%. Target: 100%. + **59 / 146 = 40.4%** (strictest grading: every diagnostic, errors AND warnings, + counted — as pyright is graded), 285 false positives, 36 missed required errors. + The looser errors-only view is 70 / 146 = 47.9%. Target: 100%. ### Mutation Testing Ratchet {#CHKARCH-TESTING-MUTATION-RATCHET} diff --git a/docs/specs/CHECKER-TYPE-INFERENCE-SPEC.md b/docs/specs/CHECKER-TYPE-INFERENCE-SPEC.md index 177b7e13..4969ee2f 100644 --- a/docs/specs/CHECKER-TYPE-INFERENCE-SPEC.md +++ b/docs/specs/CHECKER-TYPE-INFERENCE-SPEC.md @@ -972,7 +972,7 @@ query("SELECT * FROM " + table) # BSK-E0015 — not LiteralString ## Conformance Test Coverage {#TYPEINF-CONFORMANCE} -The [Python typing conformance suite](https://github.com/python/typing/tree/main/conformance) is the canonical benchmark. Basilisk **targets** 100% conformance (Pass on all 146 test files) — a target, not a present-day achievement. The official `python/typing` scorer currently reports **70 of 146 files passing (47.9%, errors-only)**. +The [Python typing conformance suite](https://github.com/python/typing/tree/main/conformance) is the canonical benchmark. Basilisk **targets** 100% conformance (Pass on all 146 test files) — a target, not a present-day achievement. The official `python/typing` scorer currently reports **59 of 146 files passing (40.4%, counting errors and warnings — the strictest grading)**. Inference-relevant conformance tests: diff --git a/website/src/docs/comparison.md b/website/src/docs/comparison.md index 6cef5800..4d9bfa9a 100644 --- a/website/src/docs/comparison.md +++ b/website/src/docs/comparison.md @@ -40,7 +40,7 @@ Basilisk removes the choice. There is no permissive mode to fall back to. | Feature | Basilisk | Pyright | mypy | ty | Pyrefly | |---|---|---|---|---|---| | Strict by default | ✅ | ❌ opt-in | ❌ opt-in | ❌ opt-in | ❌ opt-in | -| PEP conformance¹ | 47.9% current (→100% target) | ~99% | ~58% | early alpha | ~86% | +| PEP conformance¹ | 40.4% current (→100% target) | ~99% | ~58% | early alpha | ~86% | | Implementation | Rust | TypeScript | Python/C | Rust | Rust | | Runtime required | None | Node.js | Python | None | None | | Full LSP (completions, hover, goto) | ✅ | Pylance only | ❌ | Basic | Basic | @@ -158,7 +158,7 @@ Basilisk is not a faster version of an existing tool. It occupies a different po 5. WASM plugin system (planned) — extensible without forking, secure by design **Where Basilisk is not yet the best choice:** -- PEP conformance: Basilisk currently passes 47.9% of the official conformance suite (70/146, errors-only), with 219 false positives and 36 missed required errors still being driven down. Pyright covers far more edge cases today. Basilisk's target is 100%; it's not there yet. +- PEP conformance: Basilisk currently passes 40.4% of the official conformance suite (59/146, counting errors+warnings — the strictest grading), with 285 false positives and 36 missed required errors still being driven down. Pyright covers far more edge cases today. Basilisk's target is 100%; it's not there yet. - Plugin ecosystem: mypy's Django and SQLAlchemy plugins are mature. Basilisk's WASM plugins are planned. - Maturity: Pylance is feature-complete today (though proprietary and VS Code only). Basilisk is in alpha. diff --git a/website/src/docs/index.md b/website/src/docs/index.md index 6f91f985..d78b1707 100644 --- a/website/src/docs/index.md +++ b/website/src/docs/index.md @@ -68,7 +68,7 @@ Basilisk is currently in **alpha** — the core checker, LSP server, and editor |---|---|---| | 1 | Parser, resolver, type checker, CLI | Complete | | 2 | LSP server, editor extensions (VS Code, Cursor, Zed, Neovim) | Complete | -| 3 | Expanded rule set, PEP conformance push (currently 47.9%, target 100%), gradual adoption | In progress | +| 3 | Expanded rule set, PEP conformance push (currently 40.4%, target 100%), gradual adoption | In progress | | 4 | Ownership & immutability analysis (Mojo-inspired) | Planned | | 5 | WASM plugins, Django/Pydantic/SQLAlchemy | Planned | | 6 | 95%+ PEP, SARIF/JUnit, JetBrains extension | Planned | diff --git a/website/src/docs/rules/index.md b/website/src/docs/rules/index.md index 54d83ac0..0120defe 100644 --- a/website/src/docs/rules/index.md +++ b/website/src/docs/rules/index.md @@ -17,7 +17,7 @@ Every Basilisk diagnostic has a unique code in the format `BSK-EXXXX` (error) or Rules are enabled by default. You can dial individual rules down per-file or per-path from your editor or `pyproject.toml` — strict is the default, not a cage. -Basilisk ships **155 diagnostic codes** (150 errors, 5 warnings) spanning the full Python typing surface — generics, protocols, dataclasses, TypedDicts, overloads, literals, enums, and more — and is scored by the [official Python typing conformance suite](https://github.com/python/typing/blob/main/conformance/results/results.html) (currently **47.9%**, 70 / 146 errors-only; target 100%). The two foundational groups have worked examples: +Basilisk ships **155 diagnostic codes** (150 errors, 5 warnings) spanning the full Python typing surface — generics, protocols, dataclasses, TypedDicts, overloads, literals, enums, and more — and is scored by the [official Python typing conformance suite](https://github.com/python/typing/blob/main/conformance/results/results.html) (currently **40.4%**, 59 / 146 (errors+warnings, strictest); target 100%). The two foundational groups have worked examples: | Group | Codes | Description | |---|---|---| diff --git a/website/src/index.njk b/website/src/index.njk index 431b23a8..f8535b2f 100644 --- a/website/src/index.njk +++ b/website/src/index.njk @@ -252,19 +252,19 @@ benchmarkStrings:
- 47.9% + 40.4% PEP conformance score

04 — PEP conformance

-

70 of 146 tests passing (47.9%).
Target: 100%.

+

59 of 146 tests passing (40.4%).
Target: 100%.

Scored by the official Python typing conformance suite — the same harness used to measure Pyright (~99%¹), mypy (~58%¹), and Pyrefly (~86%¹). - Today 3 of 22 categories pass at 100%; the suite reports 219 false-positive diagnostics and 36 missed required errors, and we are driving both to zero. 100% is the target, not a present-day claim — this is honest, in-progress work, measured by a scorer we don’t control. + Today 3 of 21 categories pass at 100%; the suite reports 285 false-positive diagnostics and 36 missed required errors, and we are driving both to zero. 100% is the target, not a present-day claim — this is honest, in-progress work, measured by a scorer we don’t control.

@@ -372,7 +372,7 @@ benchmarkStrings: PEP conformance ¹ - 47.9% (70/146, target 100%) + 40.4% (59/146, target 100%) ~99% ~58% full-pass alpha @@ -448,7 +448,7 @@ benchmarkStrings:

A scorer we don’t control.

- Our 47.9% is not graded by us. We download and run python/typing’s own + Our 40.4% is not graded by us. We download and run python/typing’s own conformance tooling — the exact harness that grades pyright, mypy, pyrefly, ty, zuban, and pycroscope.

@@ -471,10 +471,10 @@ benchmarkStrings: One extra diagnostic (a false positive) fails the whole file.
  • - Today that yields 70 of 146 files passing (47.9%, errors-only), - with 219 false positives and 36 missed required errors still to clear. A stricter - errors-plus-warnings variant scores 40.4% (59/146). The headline figure is the - errors-only 47.9%. + Today that yields 59 of 146 files passing (40.4%) — counting + every diagnostic basilisk emits, errors and warnings, the strictest grading (and how + pyright is graded). 285 false positives and 36 missed required errors remain to clear. + The looser errors-only view is 70 of 146 (47.9%). The headline is the strict 40.4%.
  • diff --git a/website/src/zh/docs/comparison.md b/website/src/zh/docs/comparison.md index b1fc5f8d..559f5843 100644 --- a/website/src/zh/docs/comparison.md +++ b/website/src/zh/docs/comparison.md @@ -35,7 +35,7 @@ Basilisk 消除了这个选择。没有宽松模式可以回退。 | 功能 | Basilisk | Pyright | mypy | ty | Pyrefly | |---|---|---|---|---|---| | 默认严格 | ✅ | ❌ 选择加入 | ❌ 选择加入 | ❌ 选择加入 | ❌ 选择加入 | -| PEP 符合性¹ | 当前 47.9%(目标 →100%) | ~99% | ~58% | 早期 alpha | ~86% | +| PEP 符合性¹ | 当前 40.4%(目标 →100%) | ~99% | ~58% | 早期 alpha | ~86% | | 实现语言 | Rust | TypeScript | Python/C | Rust | Rust | | 需要运行时 | 无 | Node.js | Python | 无 | 无 | | 完整 LSP(补全、悬停、跳转) | ✅ | 仅 Pylance | ❌ | 基础 | 基础 | @@ -153,7 +153,7 @@ Basilisk 不是现有工具的更快版本。它占据了不同的位置: 5. WASM 插件系统(计划中)——无需分叉即可扩展,设计安全 **Basilisk 尚不是最佳选择的地方:** -- PEP 符合性:Basilisk 当前通过官方符合性套件的 47.9%(70/146,仅错误),仍有 219 处误报和 36 处遗漏的必需错误正在被压低。Pyright 今天覆盖远更多边缘情况。Basilisk 的目标是 100%;还未达到。 +- PEP 符合性:Basilisk 当前通过官方符合性套件的 40.4%(59/146,错误加警告,最严格评分),仍有 285 处误报和 36 处遗漏的必需错误正在被压低。Pyright 今天覆盖远更多边缘情况。Basilisk 的目标是 100%;还未达到。 - 插件生态系统:mypy 的 Django 和 SQLAlchemy 插件已经成熟。Basilisk 的 WASM 插件是计划中的。 - 成熟度:Pylance 今天功能完整(虽然是专有的,且仅限 VS Code)。Basilisk 处于 alpha 阶段。 diff --git a/website/src/zh/docs/index.md b/website/src/zh/docs/index.md index f296db36..e23211f9 100644 --- a/website/src/zh/docs/index.md +++ b/website/src/zh/docs/index.md @@ -63,7 +63,7 @@ Basilisk 目前处于 **alpha**——核心检查器、LSP 服务器和编辑器 |---|---|---| | 1 | 解析器、解析器、类型检查器、CLI | 完成 | | 2 | LSP 服务器、编辑器扩展(VS Code、Cursor、Zed、Neovim) | 完成 | -| 3 | 扩展规则集,PEP 符合性攻坚(当前 47.9%,目标 100%),渐进式采用 | 进行中 | +| 3 | 扩展规则集,PEP 符合性攻坚(当前 40.4%,目标 100%),渐进式采用 | 进行中 | | 4 | 所有权与不可变性分析(Mojo 启发) | 计划中 | | 5 | WASM 插件,Django/Pydantic/SQLAlchemy | 计划中 | | 6 | 95%+ PEP,SARIF/JUnit,JetBrains 扩展 | 计划中 | diff --git a/website/src/zh/docs/rules/index.md b/website/src/zh/docs/rules/index.md index a6d15766..af72091e 100644 --- a/website/src/zh/docs/rules/index.md +++ b/website/src/zh/docs/rules/index.md @@ -12,7 +12,7 @@ lang: zh 规则默认全部启用。您可以通过编辑器或 `pyproject.toml`,按文件或路径将单个规则调低——严格是默认值,而不是牢笼。 -Basilisk 内置 **155 个诊断代码**(150 个错误,5 个警告),覆盖完整的 Python 类型表面(泛型、协议、dataclass、TypedDict、重载、字面量、枚举等),由[官方 Python 类型符合性套件](https://github.com/python/typing/blob/main/conformance/results/results.html)评分(当前符合率 **47.9%**,70 / 146,仅错误;目标 100%)。下面记录了两个基础组;完整集合由检查器强制执行。 +Basilisk 内置 **155 个诊断代码**(150 个错误,5 个警告),覆盖完整的 Python 类型表面(泛型、协议、dataclass、TypedDict、重载、字面量、枚举等),由[官方 Python 类型符合性套件](https://github.com/python/typing/blob/main/conformance/results/results.html)评分(当前符合率 **40.4%**,59 / 146(错误加警告,最严格);目标 100%)。下面记录了两个基础组;完整集合由检查器强制执行。 | 组 | 代码 | 描述 | |---|---|---| diff --git a/website/src/zh/index.njk b/website/src/zh/index.njk index b1013491..94546413 100644 --- a/website/src/zh/index.njk +++ b/website/src/zh/index.njk @@ -263,19 +263,19 @@ benchmarkStrings:

    - 47.9% + 40.4% PEP 符合性得分

    04 — PEP 符合性

    -

    146 个测试中 70 个通过(47.9%)。
    目标:100%。

    +

    146 个测试中 59 个通过(40.4%)。
    目标:100%。

    官方 Python 类型符合性套件评分—— 与衡量 Pyright(约 99%¹)、 mypy(约 58%¹) 和 Pyrefly(约 86%¹)的套件相同。 - 目前 22 个类别中有 3 个达到 100%;套件报告 219 处误报和 36 处遗漏的必需错误,我们正将两者都降到零。100% 是目标,而非当下的宣称 —— 这是诚实、进行中的工作,由我们无法操控的评分器衡量。 + 目前 21 个类别中有 3 个达到 100%;套件报告 285 处误报和 36 处遗漏的必需错误,我们正将两者都降到零。100% 是目标,而非当下的宣称 —— 这是诚实、进行中的工作,由我们无法操控的评分器衡量。

    @@ -382,7 +382,7 @@ benchmarkStrings: PEP 符合性 ¹ - 47.9% (70/146,目标 100%) + 40.4% (59/146,目标 100%) ~99% ~58% 完全通过 alpha @@ -458,7 +458,7 @@ benchmarkStrings:

    一个我们无法操控的评分器。

    - 我们的 47.9% 不是我们自己打的分。我们下载并运行 python/typing 自己的符合性工具—— + 我们的 40.4% 不是我们自己打的分。我们下载并运行 python/typing 自己的符合性工具—— 正是用来为 pyright、mypy、pyrefly、ty、zuban 和 pycroscope 打分的同一套程序。

      @@ -478,9 +478,9 @@ benchmarkStrings: 哪怕多出一个诊断(一处误报)也会使整个文件失败。
    1. - 今天的结果是 146 个文件中 70 个通过(47.9%,仅错误), - 还有 219 处误报和 36 处遗漏的必需错误待清除。更严格的「错误加警告」变体得分为 - 40.4%(59/146)。头条数字是仅错误的 47.9%。 + 今天的结果是 146 个文件中 59 个通过(40.4%)—— + 计入 basilisk 发出的所有诊断(错误和警告),这是最严格的评分(也是 pyright 的评分方式); + 285 处误报、36 处遗漏待清除;更宽松的「仅错误」视图为 70/146(47.9%)。头条数字是严格的 40.4%。

    From d82ba400d477b213ff46d909ff512d990a3e3633 Mon Sep 17 00:00:00 2001 From: Christian Findlay <16697547+MelbourneDeveloper@users.noreply.github.com> Date: Tue, 23 Jun 2026 21:16:26 +1000 Subject: [PATCH 04/12] fixes --- conformance/score.py | 162 ++++++----- conformance/upstream_main.py | 275 ++++++++++++++++++ .../basilisk-cli/tests/conformance_tests.rs | 14 +- docs/specs/CHECKER-ARCHITECTURE-SPEC.md | 19 +- scripts/conformance.sh | 31 +- 5 files changed, 397 insertions(+), 104 deletions(-) create mode 100644 conformance/upstream_main.py diff --git a/conformance/score.py b/conformance/score.py index 3bb2cd5b..22f87f9d 100644 --- a/conformance/score.py +++ b/conformance/score.py @@ -2,13 +2,14 @@ # Implements [CHKARCH-CONFORMANCE]. See docs/specs/CHECKER-ARCHITECTURE-SPEC.md """Grade Basilisk with the REAL python/typing conformance calculator. -This script does NOT reimplement the conformance scoring. It **downloads the -actual upstream tool** (`conformance/src/main.py` from `python/typing`, pinned -to the same commit the test fixtures come from) and **runs upstream's own -`get_expected_errors` + `diff_expected_errors` functions unmodified**. Those -two functions are the entire conformance algorithm — the same code that grades -pyright, mypy, pyrefly, ty, zuban and pycroscope. We extract them straight from -the downloaded file and execute them; nothing about the calculation is ours. +This script does NOT reimplement the conformance scoring. It **imports the +committed upstream tool** — `conformance/upstream_main.py`, a byte-identical, +sha256-verified copy of `conformance/src/main.py` from `python/typing` pinned to +the same commit the test fixtures come from — and **calls upstream's own +`get_expected_errors` + `diff_expected_errors` functions unmodified**. Those two +functions are the entire conformance algorithm: the same code that grades +pyright, mypy, pyrefly, ty, zuban and pycroscope. Nothing about the calculation +is ours, and nothing is downloaded at score time. The only Basilisk-specific code here is a checker *adapter* — exactly what upstream itself has for every checker (`PyrightTypeChecker`, `MypyTypeChecker`, @@ -24,93 +25,115 @@ errors-only view. Either way, any diagnostic on a line the suite does not mark `# E` is a real false positive and fails the file — same as for any checker. +The vendored calculator is committed at `conformance/upstream_main.py`. Refresh +it ONLY when bumping the pinned ref: + python3 conformance/score.py --refresh-upstream + Usage: - python3 conformance/score.py [--bin PATH] [--gate] [--count-warnings] - [--conformance-dir DIR] [--offline] + python3 conformance/score.py [--bin PATH] [--gate] [--errors-only] + [--conformance-dir DIR] [--refresh-upstream] """ from __future__ import annotations -import ast +import hashlib +import importlib.util import json import subprocess import sys -import urllib.request +import types from pathlib import Path from typing import Callable, Sequence # Pinned to the SAME commit the fixtures are fetched from -# (scripts/conformance.sh TYPING_REF). Bump both together. +# (scripts/conformance.sh TYPING_REF). Bump both together, then --refresh-upstream. PINNED_TYPING_REF = "268d0c4e" UPSTREAM_MAIN_URL = ( f"https://raw.githubusercontent.com/python/typing/{PINNED_TYPING_REF}" "/conformance/src/main.py" ) +# The committed, byte-identical copy of upstream's calculator, and its sha256. +UPSTREAM_MAIN = Path(__file__).resolve().parent / "upstream_main.py" +UPSTREAM_MAIN_SHA256 = "b4e3bd089c73856f9920ef494350d622c2914fac238c9193ec0bb3f93f0fc6a2" # The two functions that constitute the official scoring algorithm. OFFICIAL_FUNCS = ("get_expected_errors", "diff_expected_errors") # --------------------------------------------------------------------------- -# Download + run the REAL upstream calculator +# Import the REAL upstream calculator (committed, sha256-verified — no download) # --------------------------------------------------------------------------- -def _download(url: str, dest: Path) -> None: - dest.parent.mkdir(parents=True, exist_ok=True) - with urllib.request.urlopen(url, timeout=30) as resp: # noqa: S310 (pinned https) - dest.write_bytes(resp.read()) +def _stub_module(name: str, **attrs: object) -> None: + """Register an empty stand-in module so upstream's unrelated top-level + imports resolve. The two scoring functions touch none of these.""" + module = types.ModuleType(name) + for attr, value in attrs.items(): + setattr(module, attr, value) + sys.modules[name] = module -def load_official_calc( - cache: Path, offline: bool -) -> tuple[Callable, Callable, str]: +def load_official_calc() -> tuple[Callable, Callable, str]: """Return upstream's real (get_expected_errors, diff_expected_errors). - Downloads the upstream `main.py` (pinned SHA) to `cache` if absent, then - extracts those two function definitions verbatim from the downloaded source - and executes them. The executed code is byte-for-byte upstream's — we only - skip `main.py`'s unrelated module-level imports (tomli/tomlkit/reporting/…), - which the scoring functions never touch. + Reads the committed `conformance/upstream_main.py`, verifies it is byte-for- + byte the pinned upstream `conformance/src/main.py` (sha256), imports it, and + hands back its two functions unmodified. No network access; no code of ours + in the calculation. `main.py` also imports tomli/tomlkit/options/reporting/ + test_groups/type_checker at module scope — the scoring functions use none of + them, so empty stubs let the import succeed. """ - if not cache.exists(): - if offline: - raise FileNotFoundError( - f"upstream calc not cached at {cache} and --offline set; " - "run `make conformance FETCH=1` with network once" - ) - _download(UPSTREAM_MAIN_URL, cache) - - source = cache.read_text(encoding="utf-8") - tree = ast.parse(source) - wanted = [ - node - for node in tree.body - if isinstance(node, ast.FunctionDef) and node.name in OFFICIAL_FUNCS - ] - found = {node.name for node in wanted} - missing = set(OFFICIAL_FUNCS) - found + raw = UPSTREAM_MAIN.read_bytes() + digest = hashlib.sha256(raw).hexdigest() + if digest != UPSTREAM_MAIN_SHA256: + raise RuntimeError( + f"{UPSTREAM_MAIN.name} sha256 {digest[:12]}… != pinned " + f"{UPSTREAM_MAIN_SHA256[:12]}… — the vendored upstream calculator was " + "modified. Restore it from git, or run --refresh-upstream to re-pin." + ) + + _stub_module("tomli") + _stub_module("tomlkit") + _stub_module("options", parse_options=None) + _stub_module("reporting", generate_summary=None) + _stub_module("test_groups", get_test_cases=None, get_test_groups=None) + _stub_module("type_checker", TYPE_CHECKERS=(), TypeChecker=object) + + spec = importlib.util.spec_from_file_location("typing_conformance_main", UPSTREAM_MAIN) + if spec is None or spec.loader is None: + raise RuntimeError(f"cannot build an import spec for {UPSTREAM_MAIN}") + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + + missing = [name for name in OFFICIAL_FUNCS if not hasattr(module, name)] if missing: raise RuntimeError( - f"downloaded upstream main.py is missing {missing}; the upstream " + f"committed upstream main.py is missing {missing}; the upstream " "layout changed — re-check the pinned ref" ) + get_expected = getattr(module, OFFICIAL_FUNCS[0]) + diff_errors = getattr(module, OFFICIAL_FUNCS[1]) + return get_expected, diff_errors, f"sha256:{digest[:12]}" - # `from __future__ import annotations` so upstream's type hints (which name - # types like `TypeChecker` that we don't import) are not evaluated. - future = ast.ImportFrom( - module="__future__", names=[ast.alias(name="annotations")], level=0 - ) - module = ast.Module(body=[future, *wanted], type_ignores=[]) - ast.fix_missing_locations(module) - code = compile(module, filename=str(cache), mode="exec") - import re # the only runtime import the scoring functions need +def refresh_upstream() -> int: + """Re-download upstream main.py to the committed path and print its sha256. - namespace: dict = {"re": re, "Path": Path} - exec(code, namespace) # noqa: S102 — executing pinned, verified upstream source - # Provenance: short hash of the exact bytes we ran, for the scorecard. - digest = f"{len(source)}b" - return namespace[OFFICIAL_FUNCS[0]], namespace[OFFICIAL_FUNCS[1]], digest + Maintenance only — run when bumping PINNED_TYPING_REF. This is the ONLY code + path that touches the network; the normal score path never does. + """ + import urllib.request # local import: never loaded on the score path + + with urllib.request.urlopen(UPSTREAM_MAIN_URL, timeout=30) as resp: # noqa: S310 (pinned https) + raw = resp.read() + UPSTREAM_MAIN.write_bytes(raw) + digest = hashlib.sha256(raw).hexdigest() + print(f" fetched {UPSTREAM_MAIN_URL}") + print(f" wrote {UPSTREAM_MAIN} ({len(raw)} bytes)") + print(f" sha256 {digest}") + if digest != UPSTREAM_MAIN_SHA256: + print(f' -> update UPSTREAM_MAIN_SHA256 = "{digest}" (ref changed)') + return 0 # --------------------------------------------------------------------------- @@ -121,9 +144,10 @@ def load_official_calc( class BasiliskTypeChecker: """Runs the real `basilisk` binary; parses its JSON into {line: [errors]}. - Counts only `severity == "error"` — the analog of the suite's `# E` - ("an error MUST be reported"). Warnings are advisory and reported - separately, never folded into the official figure. + Each diagnostic is the analog of the suite's `# E` ("an error MUST be + reported on this line"). When `count_warnings` is set (the default for the + strictest grading, matching how pyright is graded upstream) both `error` and + `warning` severities count; otherwise only `error` does. """ name = "basilisk" @@ -247,8 +271,8 @@ def print_scorecard( print() print("=" * 68) print(f" BASILISK PEP CONFORMANCE — REAL python/typing CALCULATOR [{label}]") - print(f" calc: downloaded + executed verbatim from python/typing@{PINNED_TYPING_REF}") - print(f" funcs: {', '.join(OFFICIAL_FUNCS)} ({digest} of upstream main.py)") + print(" calc: imported verbatim from committed conformance/upstream_main.py") + print(f" ref: python/typing@{PINNED_TYPING_REF} ({digest}) funcs: {', '.join(OFFICIAL_FUNCS)}") print("=" * 68) print(f" Files: {n} total | {totals['pass']} pass | {n - totals['pass']} fail") print(f" Score: {pct:.1f}% (Pass = empty errors_diff, upstream rule)") @@ -288,7 +312,7 @@ def parse_args(argv: list[str]) -> dict: # AND warnings) is counted as "an error was reported", which is also how the # reference checker pyright is graded upstream. `--errors-only` reports the # looser errors-only view. `--count-warnings` is accepted for back-compat. - opts: dict = {"bin": None, "gate": False, "warn": True, "dir": None, "offline": False} + opts: dict = {"bin": None, "gate": False, "warn": True, "dir": None, "refresh": False} it = iter(argv) for a in it: if a == "--bin": @@ -301,8 +325,8 @@ def parse_args(argv: list[str]) -> dict: opts["warn"] = False elif a == "--conformance-dir": opts["dir"] = next(it, None) - elif a == "--offline": - opts["offline"] = True + elif a == "--refresh-upstream": + opts["refresh"] = True return opts @@ -331,6 +355,9 @@ def enforce_gate(root: Path, files: list[Path], totals: Totals) -> bool: def main(argv: list[str]) -> int: opts = parse_args(argv) + if opts["refresh"]: + return refresh_upstream() + root = repo_root() conf_dir = Path(opts["dir"]) if opts["dir"] else root / "crates/basilisk-cli/tests/conformance" @@ -343,10 +370,9 @@ def main(argv: list[str]) -> int: print(" ✗ basilisk binary not found. Build it or pass --bin .", file=sys.stderr) return 1 - cache = conf_dir / ".tool" / "main.py" try: - get_expected, diff_errors, digest = load_official_calc(cache, opts["offline"]) - except Exception as exc: # noqa: BLE001 — surface any fetch/parse failure clearly + get_expected, diff_errors, digest = load_official_calc() + except Exception as exc: # noqa: BLE001 — surface any load/verify failure clearly print(f" ✗ could not load the official calculator: {exc}", file=sys.stderr) return 1 diff --git a/conformance/upstream_main.py b/conformance/upstream_main.py new file mode 100644 index 00000000..62a827bc --- /dev/null +++ b/conformance/upstream_main.py @@ -0,0 +1,275 @@ +""" +Type system conformance test for static type checkers. +""" + +import os +from pathlib import Path +import re +import sys +from time import time +from typing import Sequence + +import tomli +import tomlkit + +from options import parse_options +from reporting import generate_summary +from test_groups import get_test_cases, get_test_groups +from type_checker import TYPE_CHECKERS, TypeChecker + + +def run_tests( + root_dir: Path, + type_checker: TypeChecker, + test_cases: Sequence[Path], +): + print(f"Running tests for {type_checker.name}") + + test_start_time = time() + tests_output = type_checker.run_tests([file.name for file in test_cases]) + test_duration = time() - test_start_time + + print(f"Completed tests for {type_checker.name} in {test_duration:.2f} seconds") + + for _, output in tests_output.items(): + type_checker.parse_errors(output.splitlines()) + + results_dir = root_dir / "results" / type_checker.name + + for test_case in test_cases: + update_output_for_test( + type_checker, results_dir, test_case, tests_output.get(test_case.name, "") + ) + + update_type_checker_info(type_checker, root_dir) + + +def get_expected_errors(test_case: Path) -> tuple[ + dict[int, tuple[int, int]], + dict[str, tuple[list[int], bool]], +]: + """Return the line numbers where type checkers are expected to produce an error. + + The return value is a tuple of two dictionaries: + - The format of the first is {line number: (number of required errors, number of optional errors)}. + - The format of the second is {error tag: ([lines where the error may appear], allow multiple}. + If allow multiple is True, the error may appear on multiple lines; otherwise, it must + appear exactly once. + + For example, the following test case: + + x: int = "x" # E + y: int = "y" # E? + @final # E[final] + def f(): pass # E[final] + + will return: + + ( + {1: (1, 0), 2: (0, 1)}, + {"final": ([3, 4], False)} + ) + """ + with open(test_case, "r", encoding="utf-8") as f: + lines = f.readlines() + output: dict[int, tuple[int, int]] = {} + groups: dict[str, tuple[list[int], bool]] = {} + for i, line in enumerate(lines, start=1): + line_without_comment, *_ = line.split("#") + # Ignore lines with no non-comment content. This allows commenting out test cases. + if not line_without_comment.strip(): + continue + required = 0 + optional = 0 + for match in re.finditer(r"# E\??(?=:|$| )", line): + if match.group() == "# E": + required += 1 + else: + optional += 1 + if required or optional: + output[i] = (required, optional) + for match in re.finditer(r"# E\[([^\]]+)\]", line): + tag = match.group(1) + if tag.endswith("+"): + allow_multiple = True + tag = tag[:-1] + else: + allow_multiple = False + if tag not in groups: + groups[tag] = ([i], allow_multiple) + else: + if groups[tag][1] != allow_multiple: + raise ValueError(f"Error group {tag} has inconsistent allow_multiple value in {test_case}") + groups[tag][0].append(i) + for group, linenos in groups.items(): + if len(linenos) == 1: + raise ValueError(f"Error group {group} only appears on a single line in {test_case}") + return output, groups + + +def diff_expected_errors( + type_checker: TypeChecker, + test_case: Path, + output: str, + ignored_errors: Sequence[str], +) -> str: + """Return a list of errors that were expected but not produced by the type checker.""" + expected_errors, error_groups = get_expected_errors(test_case) + errors = type_checker.parse_errors(output.splitlines()) + if ignored_errors: + errors = { + lineno: [ + error + for error in errors_list + if not any(ignored in error for ignored in ignored_errors)] + for lineno, errors_list in errors.items() + } + errors = {lineno: errors_list for lineno, errors_list in errors.items() if errors_list} + + differences: list[str] = [] + for expected_lineno, (expected_count, _) in expected_errors.items(): + if expected_lineno not in errors and expected_count > 0: + differences.append(f"Line {expected_lineno}: Expected {expected_count} errors") + # We don't report an issue if the count differs, because type checkers may produce + # multiple error messages for a single line. + linenos_used_by_groups: set[int] = set() + for group, (linenos, allow_multiple) in error_groups.items(): + num_errors = sum(1 for lineno in linenos if lineno in errors) + if num_errors == 0: + differences.append(f"Lines {', '.join(map(str, linenos))}: Expected error (tag {group!r})") + elif num_errors == 1 or allow_multiple: + linenos_used_by_groups.update(linenos) + else: + differences.append(f"Lines {', '.join(map(str, linenos))}: Expected exactly one error (tag {group!r})") + for actual_lineno, actual_errors in errors.items(): + if actual_lineno not in expected_errors and actual_lineno not in linenos_used_by_groups: + differences.append(f"Line {actual_lineno}: Unexpected errors {actual_errors}") + return "".join(f"{diff}\n" for diff in differences) + + +def update_output_for_test( + type_checker: TypeChecker, + results_dir: Path, + test_case: Path, + output: str, +): + test_name = test_case.stem + output = f"\n{output}" + + results_file = results_dir / f"{test_name}.toml" + results_file.parent.mkdir(parents=True, exist_ok=True) + should_write = False + + # Read the existing results file if present. + try: + with open(results_file, "rb") as f: + existing_results = tomli.load(f) + except FileNotFoundError: + should_write = True + existing_results = {} + except tomli.TOMLDecodeError: + print(f"Error decoding {results_file}") + existing_results = {} + + ignored_errors = existing_results.get("ignore_errors", []) + errors_diff = "\n" + diff_expected_errors(type_checker, test_case, output, ignored_errors) + old_errors_diff = "\n" + existing_results.get("errors_diff", "") + + if errors_diff != old_errors_diff: + should_write = True + print(f"Result changed for {test_name} when running {type_checker.name}") + print(f"Old output: {old_errors_diff}") + print(f"New output: {errors_diff}") + print("") + + conformance_automated = "Fail" if errors_diff.strip() else "Pass" + if existing_results.get("conformance_automated") != conformance_automated: + should_write = True + existing_results["conformance_automated"] = conformance_automated + + old_output = existing_results.get("output", "") + old_output = f"\n{old_output}" + + # Did the type checker output change since last time the + # test was run? + if old_output != output: + should_write = True + print(f"Output changed for {test_name} when running {type_checker.name}") + print(f"Old output: {old_output}") + print(f"New output: {output}") + print("") + + # Use multiline formatting for any strings that contain newlines. + for key, value in existing_results.items(): + if isinstance(value, str) and "\n" in value: + existing_results[key] = tomlkit.string(f"\n{value}", multiline=True) + + if should_write: + # Always reapply tomlkit.string, or it will turn into a single line. + existing_results["errors_diff"] = tomlkit.string(errors_diff, multiline=True) + existing_results["output"] = tomlkit.string(output, multiline=True) + if "notes" in existing_results: + notes = existing_results["notes"] + if not notes.startswith("\n"): + notes = "\n" + notes + existing_results["notes"] = tomlkit.string(notes, multiline=True) + results_file.parent.mkdir(parents=True, exist_ok=True) + with open(results_file, "w", encoding="utf-8") as f: + tomlkit.dump(existing_results, f) + + +def update_type_checker_info(type_checker: TypeChecker, root_dir: Path): + # Record the version of the type checker used for the latest run. + version_file = root_dir / "results" / type_checker.name / "version.toml" + + # Read the existing version file if present. + try: + with open(version_file, "rb") as f: + existing_info = tomli.load(f) + except FileNotFoundError: + existing_info = {} + except tomli.TOMLDecodeError: + print(f"Error decoding {version_file}") + existing_info = {} + + existing_info["version"] = type_checker.get_version() + + version_file.parent.mkdir(parents=True, exist_ok=True) + with open(version_file, "w") as f: + tomlkit.dump(existing_info, f) + + +def main(): + # Some tests cover features that are available only in the + # latest version of Python (3.12), so we need this version. + assert sys.version_info >= (3, 12) + + options = parse_options(sys.argv[1:]) + + root_dir = Path(__file__).resolve().parent.parent + + if not options.report_only: + tests_dir = root_dir / "tests" + assert tests_dir.is_dir() + + test_groups = get_test_groups(root_dir) + test_cases = get_test_cases(test_groups, tests_dir) + + # Switch to the tests directory. + os.chdir(tests_dir) + + # Run each test case with each type checker. + for type_checker in TYPE_CHECKERS: + if options.only_run and options.only_run != type_checker.name: + continue + if not type_checker.install(): + print(f"Skipping tests for {type_checker.name}") + else: + run_tests(root_dir, type_checker, test_cases) + + # Generate a summary report. + generate_summary(root_dir) + + +if __name__ == "__main__": + main() diff --git a/crates/basilisk-cli/tests/conformance_tests.rs b/crates/basilisk-cli/tests/conformance_tests.rs index 13721452..e9dc1150 100644 --- a/crates/basilisk-cli/tests/conformance_tests.rs +++ b/crates/basilisk-cli/tests/conformance_tests.rs @@ -3,12 +3,14 @@ //! PEP conformance gate — thin wrapper around the OFFICIAL Python scorer. //! //! The conformance score is **not** computed in Rust. It is computed by -//! `conformance/score.py`, which **downloads** the `python/typing` conformance -//! tool (`conformance/src/main.py`, pinned to the same commit the fixtures come -//! from) and **runs its own `get_expected_errors` + `diff_expected_errors` -//! functions unmodified**. That guarantees Basilisk is graded by the exact same -//! algorithm as pyright, mypy, pyrefly, ty, zuban and pycroscope — no -//! Basilisk-specific scoring, no excluded diagnostic codes. +//! `conformance/score.py`, which **imports the committed, sha256-verified +//! `conformance/upstream_main.py`** (a byte-identical copy of the +//! `python/typing` conformance tool `conformance/src/main.py`, pinned to the +//! same commit the fixtures come from) and **runs its own `get_expected_errors` +//! + `diff_expected_errors` functions unmodified**. That guarantees Basilisk is +//! graded by the exact same algorithm as pyright, mypy, pyrefly, ty, zuban and +//! pycroscope — no Basilisk-specific scoring, no excluded diagnostic codes, and +//! nothing fetched from the network at score time. //! //! This test exists only so the gate runs inside `make test`: it builds the //! real `basilisk` binary (via `CARGO_BIN_EXE_basilisk`), invokes the scorer diff --git a/docs/specs/CHECKER-ARCHITECTURE-SPEC.md b/docs/specs/CHECKER-ARCHITECTURE-SPEC.md index 3ff8405a..e748e17a 100644 --- a/docs/specs/CHECKER-ARCHITECTURE-SPEC.md +++ b/docs/specs/CHECKER-ARCHITECTURE-SPEC.md @@ -1371,13 +1371,18 @@ calculator**, not a Basilisk reimplementation. This is non-negotiable: the number must be one anyone can reproduce with the same tooling the reference checkers (pyright, mypy, pyrefly, ty, zuban, pycroscope) are graded with. -- **Scorer**: [`conformance/score.py`](../../conformance/score.py) **downloads** - `python/typing`'s `conformance/src/main.py` (pinned to the same commit the - fixtures come from, `scripts/conformance.sh` → `TYPING_REF`) and executes its - own `get_expected_errors` + `diff_expected_errors` functions **unmodified** - (extracted verbatim from the downloaded file). The only Basilisk-specific code - is a checker *adapter* that runs the real `basilisk` binary and turns its JSON - output into the `{line: [errors]}` mapping the upstream algorithm consumes — +- **Scorer**: [`conformance/score.py`](../../conformance/score.py) **imports the + committed [`conformance/upstream_main.py`](../../conformance/upstream_main.py)** — + a byte-identical, sha256-verified copy of `python/typing`'s + `conformance/src/main.py`, pinned to the same commit the fixtures come from + (`scripts/conformance.sh` → `TYPING_REF`, currently `268d0c4e`, sha256 + `b4e3bd08…0fc6a2`) — and calls its own `get_expected_errors` + + `diff_expected_errors` functions **unmodified**. Nothing is downloaded at score + time; the verbatim upstream file lives in the repo and `score.py` refuses to run + if its hash drifts. Refresh it only when bumping the ref: + `python3 conformance/score.py --refresh-upstream`. The only Basilisk-specific + code is a checker *adapter* that runs the real `basilisk` binary and turns its + JSON output into the `{line: [errors]}` mapping the upstream algorithm consumes — exactly the role of upstream's per-checker adapters in `type_checker.py`. - **Pass rule** (upstream's, verbatim): a file passes iff the upstream `errors_diff` is empty — every `# E` line gets an error, every `# E[tag]` diff --git a/scripts/conformance.sh b/scripts/conformance.sh index 544cd628..88cab662 100755 --- a/scripts/conformance.sh +++ b/scripts/conformance.sh @@ -60,17 +60,12 @@ for i, f in enumerate(files, 1): print(f' {i}/{len(files)}') " "$CONFORMANCE_DIR" - # Also fetch the OFFICIAL scorer (conformance/src/main.py). score.py runs - # upstream's own get_expected_errors + diff_expected_errors from this exact - # file — we never reimplement the algorithm. Cached under .tool/ (a subdir, - # so the *.py glob that collects fixtures never picks it up). - mkdir -p "$CONFORMANCE_DIR/.tool" - curl "${CURL_ARGS[@]}" \ - "https://raw.githubusercontent.com/${TYPING_REPO}/${TYPING_REF}/conformance/src/main.py" \ - -o "$CONFORMANCE_DIR/.tool/main.py" - + # The OFFICIAL scorer (conformance/src/main.py) is NOT fetched here — it is + # committed to the repo at conformance/upstream_main.py (byte-identical, + # sha256-verified) and score.py imports it directly. To re-pin it after a + # ref bump: `python3 conformance/score.py --refresh-upstream`. echo "$TYPING_REF" > "$REF_STAMP_FILE" - ok "${COUNT} conformance files + official scorer written to ${CONFORMANCE_DIR}/ (ref: ${TYPING_REF})" + ok "${COUNT} conformance files written to ${CONFORMANCE_DIR}/ (ref: ${TYPING_REF})" } FETCH_ONLY=0 @@ -98,17 +93,6 @@ if [[ "${1:-}" == "--fetch" ]] || \ else COUNT=$(find "$CONFORMANCE_DIR" -maxdepth 1 -name "*.py" | wc -l | tr -d ' ') ok "Conformance suite present ($COUNT files, ref ${TYPING_REF}) — skipping download" - # Self-heal: caches created before the scorer was added lack .tool/main.py. - # Fetch just the scorer so score.py never has to download it at test time. - if [[ ! -f "$CONFORMANCE_DIR/.tool/main.py" ]]; then - mkdir -p "$CONFORMANCE_DIR/.tool" - HEAL_ARGS=(-fsSL) - [[ -n "${GITHUB_TOKEN:-}" ]] && HEAL_ARGS+=(-H "Authorization: token ${GITHUB_TOKEN}") - curl "${HEAL_ARGS[@]}" \ - "https://raw.githubusercontent.com/${TYPING_REPO}/${TYPING_REF}/conformance/src/main.py" \ - -o "$CONFORMANCE_DIR/.tool/main.py" - ok "Fetched official scorer into existing cache" - fi fi if [[ "$FETCH_ONLY" -eq 1 ]]; then @@ -116,8 +100,9 @@ if [[ "$FETCH_ONLY" -eq 1 ]]; then fi # ── Score with the OFFICIAL python/typing calculator ───────────────────────── -# We do NOT compute the score ourselves. conformance/score.py downloads -# python/typing's own conformance tool (pinned ref) and runs its real +# We do NOT compute the score ourselves. conformance/score.py imports the +# committed, sha256-verified conformance/upstream_main.py (byte-identical to +# python/typing's own conformance tool at the pinned ref) and runs its real # get_expected_errors + diff_expected_errors against the actual `basilisk` # binary. No excluded diagnostic codes; a file passes only with an empty # upstream errors_diff. From 4d7e97265f20862834d3d8fbaa7b2cec090af503 Mon Sep 17 00:00:00 2001 From: Christian Findlay <16697547+MelbourneDeveloper@users.noreply.github.com> Date: Tue, 23 Jun 2026 21:16:32 +1000 Subject: [PATCH 05/12] fixes --- .github/workflows/ci.yml | 8 +- .gitignore | 2 +- Makefile | 3 +- conformance/score.py | 93 ++++++++++++++++-- docs/specs/CHECKER-ARCHITECTURE-SPEC.md | 2 +- scripts/conformance.sh | 120 ------------------------ scripts/test-rust.sh | 7 +- 7 files changed, 96 insertions(+), 139 deletions(-) delete mode 100755 scripts/conformance.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fd027dec..c2f34dba 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -227,14 +227,14 @@ jobs: # The PEP conformance suite is fetched fresh from the upstream typing repo # on every run (network-bound, one HTTP request per test file). Cache it - # keyed on conformance.sh, which holds the pinned TYPING_REF — bumping the - # ref edits that file and busts the cache; conformance.sh itself re-fetches - # whenever the stamped ref differs, so a stale restore self-heals. + # keyed on score.py, which holds the pinned ref (PINNED_TYPING_REF) — bumping + # the ref edits that file and busts the cache; score.py itself re-fetches + # whenever the stamped ref differs, so a stale prefix restore self-heals. - name: Cache conformance suite uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 with: path: crates/basilisk-cli/tests/conformance - key: conformance-suite-${{ hashFiles('scripts/conformance.sh') }} + key: conformance-suite-${{ hashFiles('conformance/score.py') }} restore-keys: conformance-suite- - name: Run Rust tests with coverage diff --git a/.gitignore b/.gitignore index 34051a68..5d6a96d5 100644 --- a/.gitignore +++ b/.gitignore @@ -106,7 +106,7 @@ deslop-*.log benchmarks/results/ # ============================================================================= -# Conformance test suite (fetched via make conformance / scripts/conformance.sh) +# Conformance test suite (fetched on demand via make conformance / score.py) # ============================================================================= crates/basilisk-cli/tests/conformance/ diff --git a/Makefile b/Makefile index 70d8d93e..bcef2c3e 100644 --- a/Makefile +++ b/Makefile @@ -167,7 +167,8 @@ mutation-test: ## conformance/conformance_status.csv. Fetches the upstream suite if missing; ## use FETCH=1 to force a re-download. conformance: - @bash scripts/conformance.sh $(if $(filter 1,$(FETCH)),--fetch,) + @cargo build -p basilisk-cli --bin basilisk + @python3 conformance/score.py --bin target/debug/basilisk $(if $(filter 1,$(FETCH)),--fetch,) ## bench: Benchmark Basilisk vs pyright/mypy/ty/pyrefly on the fixture suite. ## Requires hyperfine; competitor tools are skipped if not installed. diff --git a/conformance/score.py b/conformance/score.py index 22f87f9d..544f2e16 100644 --- a/conformance/score.py +++ b/conformance/score.py @@ -25,13 +25,18 @@ errors-only view. Either way, any diagnostic on a line the suite does not mark `# E` is a real false positive and fails the file — same as for any checker. -The vendored calculator is committed at `conformance/upstream_main.py`. Refresh -it ONLY when bumping the pinned ref: +This one file is the whole Basilisk side of conformance: it fetches the +git-ignored `# E`-annotated test fixtures on demand (`--fetch` / `--fetch-only`), +runs the binary, scores with the official functions, writes +`conformance/conformance_status.csv`, and enforces the ratchet gate (`--gate`). +There is no separate shell script. The vendored calculator is committed at +`conformance/upstream_main.py`; refresh it ONLY when bumping the pinned ref: python3 conformance/score.py --refresh-upstream Usage: python3 conformance/score.py [--bin PATH] [--gate] [--errors-only] - [--conformance-dir DIR] [--refresh-upstream] + [--conformance-dir DIR] [--fetch | --fetch-only] + [--refresh-upstream] """ from __future__ import annotations @@ -45,8 +50,9 @@ from pathlib import Path from typing import Callable, Sequence -# Pinned to the SAME commit the fixtures are fetched from -# (scripts/conformance.sh TYPING_REF). Bump both together, then --refresh-upstream. +# The single home for the pinned upstream commit. The fixtures (FIXTURES_API) +# and the vendored calculator (UPSTREAM_MAIN) both track it. To bump: edit this, +# run `--refresh-upstream` (re-pins upstream_main.py + its sha256), then `--fetch`. PINNED_TYPING_REF = "268d0c4e" UPSTREAM_MAIN_URL = ( f"https://raw.githubusercontent.com/python/typing/{PINNED_TYPING_REF}" @@ -57,6 +63,12 @@ UPSTREAM_MAIN_SHA256 = "b4e3bd089c73856f9920ef494350d622c2914fac238c9193ec0bb3f93f0fc6a2" # The two functions that constitute the official scoring algorithm. OFFICIAL_FUNCS = ("get_expected_errors", "diff_expected_errors") +# The `# E`-annotated test fixtures live under conformance/tests at the same +# pinned ref. They are git-ignored and fetched on demand (one HTTP GET each). +FIXTURES_API = ( + "https://api.github.com/repos/python/typing/contents/conformance/tests" + f"?ref={PINNED_TYPING_REF}" +) # --------------------------------------------------------------------------- @@ -136,6 +148,50 @@ def refresh_upstream() -> int: return 0 +# --------------------------------------------------------------------------- +# Fetch the test fixtures (the `# E`-annotated .py files) — git-ignored +# --------------------------------------------------------------------------- + + +def ensure_fixtures(conf_dir: Path, force: bool) -> None: + """Download python/typing's conformance `.py` fixtures into `conf_dir`. + + No-op when they are already present at the pinned ref (a `.ref-sha` stamp + records it) unless `force`. Bumping `PINNED_TYPING_REF` invalidates the stamp + and triggers a re-fetch. Honors `GITHUB_TOKEN` to raise the API rate limit. + """ + import os + import urllib.request # local: network only happens here and in refresh + + stamp = conf_dir / ".ref-sha" + cached_ref = stamp.read_text(encoding="utf-8").strip() if stamp.exists() else "" + present = conf_dir.exists() and any(conf_dir.glob("*.py")) + if present and cached_ref == PINNED_TYPING_REF and not force: + return + + headers = {"Accept": "application/vnd.github+json"} + token = os.environ.get("GITHUB_TOKEN") + if token: + headers["Authorization"] = f"token {token}" + + listing_req = urllib.request.Request(FIXTURES_API, headers=headers) + with urllib.request.urlopen(listing_req, timeout=60) as resp: # noqa: S310 (pinned https) + entries = json.loads(resp.read()) + fixtures = [e for e in entries if e.get("type") == "file" and e["name"].endswith(".py")] + if not fixtures: + raise RuntimeError(f"no .py fixtures found at {FIXTURES_API}") + + conf_dir.mkdir(parents=True, exist_ok=True) + for stale in conf_dir.glob("*.py"): + stale.unlink() + for entry in fixtures: + with urllib.request.urlopen(entry["download_url"], timeout=60) as resp: # noqa: S310 + (conf_dir / entry["name"]).write_bytes(resp.read()) + stamp.write_text(PINNED_TYPING_REF + "\n", encoding="utf-8") + print(f" fetched {len(fixtures)} conformance fixtures " + f"(python/typing@{PINNED_TYPING_REF}) -> {conf_dir}") + + # --------------------------------------------------------------------------- # Checker adapter — same role as upstream's per-checker adapters # --------------------------------------------------------------------------- @@ -312,7 +368,10 @@ def parse_args(argv: list[str]) -> dict: # AND warnings) is counted as "an error was reported", which is also how the # reference checker pyright is graded upstream. `--errors-only` reports the # looser errors-only view. `--count-warnings` is accepted for back-compat. - opts: dict = {"bin": None, "gate": False, "warn": True, "dir": None, "refresh": False} + opts: dict = { + "bin": None, "gate": False, "warn": True, "dir": None, + "refresh": False, "fetch": False, "fetch_only": False, + } it = iter(argv) for a in it: if a == "--bin": @@ -327,6 +386,10 @@ def parse_args(argv: list[str]) -> dict: opts["dir"] = next(it, None) elif a == "--refresh-upstream": opts["refresh"] = True + elif a == "--fetch": + opts["fetch"] = True + elif a == "--fetch-only": + opts["fetch_only"] = True return opts @@ -361,9 +424,21 @@ def main(argv: list[str]) -> int: root = repo_root() conf_dir = Path(opts["dir"]) if opts["dir"] else root / "crates/basilisk-cli/tests/conformance" - if not conf_dir.exists() or not any(conf_dir.glob("*.py")): - print(" ⚠ Conformance suite not downloaded. Run: make conformance") - return 0 # fresh checkout: skip, do not fail CI + # Fetch fixtures when forced, in fetch-only mode, or when they are absent. + # A network failure is fatal only if a fetch was explicitly requested; on the + # plain score path a missing suite is skipped (fresh checkout, offline CI). + present = conf_dir.exists() and any(conf_dir.glob("*.py")) + if opts["fetch"] or opts["fetch_only"] or not present: + try: + ensure_fixtures(conf_dir, force=opts["fetch"]) + except Exception as exc: # noqa: BLE001 — surface fetch failure clearly + if opts["fetch"] or opts["fetch_only"]: + print(f" ✗ could not fetch conformance fixtures: {exc}", file=sys.stderr) + return 1 + print(" ⚠ Conformance suite not present and fetch failed — skipping.") + return 0 + if opts["fetch_only"]: + return 0 binary = find_binary(opts["bin"], root) if binary is None: diff --git a/docs/specs/CHECKER-ARCHITECTURE-SPEC.md b/docs/specs/CHECKER-ARCHITECTURE-SPEC.md index e748e17a..a8a4ac7b 100644 --- a/docs/specs/CHECKER-ARCHITECTURE-SPEC.md +++ b/docs/specs/CHECKER-ARCHITECTURE-SPEC.md @@ -1375,7 +1375,7 @@ checkers (pyright, mypy, pyrefly, ty, zuban, pycroscope) are graded with. committed [`conformance/upstream_main.py`](../../conformance/upstream_main.py)** — a byte-identical, sha256-verified copy of `python/typing`'s `conformance/src/main.py`, pinned to the same commit the fixtures come from - (`scripts/conformance.sh` → `TYPING_REF`, currently `268d0c4e`, sha256 + (`score.py` → `PINNED_TYPING_REF`, currently `268d0c4e`, sha256 `b4e3bd08…0fc6a2`) — and calls its own `get_expected_errors` + `diff_expected_errors` functions **unmodified**. Nothing is downloaded at score time; the verbatim upstream file lives in the repo and `score.py` refuses to run diff --git a/scripts/conformance.sh b/scripts/conformance.sh deleted file mode 100755 index 88cab662..00000000 --- a/scripts/conformance.sh +++ /dev/null @@ -1,120 +0,0 @@ -#!/usr/bin/env bash -# Run the PEP conformance test suite. -# -# Downloads the python/typing conformance files first if they are missing. -# Outputs: conformance/conformance_status.csv (committed to the repo). -# -# Usage: -# ./scripts/conformance.sh # fetch if needed, then score -# ./scripts/conformance.sh --fetch # force re-download, then score -# ./scripts/conformance.sh --fetch-only # fetch only, no test run - -set -euo pipefail - -REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -source "$REPO_ROOT/scripts/common.sh" -cd "$REPO_ROOT" - -CONFORMANCE_DIR="crates/basilisk-cli/tests/conformance" - -# ── Fetch configuration ────────────────────────────────────────────────────── -TYPING_REPO="python/typing" -# Pinned upstream SHA. Bump deliberately, then re-run tests and update -# the conformance threshold in coverage-thresholds.json. Leaving this on -# `main` makes CI non-deterministic — upstream suite changes break us. -TYPING_REF="268d0c4e" -API_URL="https://api.github.com/repos/${TYPING_REPO}/contents/conformance/tests?ref=${TYPING_REF}" - -# ── Fetch if missing or forced ─────────────────────────────────────────────── -REF_STAMP_FILE="$CONFORMANCE_DIR/.ref-sha" - -fetch_conformance() { - header "Fetching conformance suite from ${TYPING_REPO} (ref: ${TYPING_REF})" - rm -rf "$CONFORMANCE_DIR" - mkdir -p "$CONFORMANCE_DIR" - - CURL_ARGS=(-fsSL) - if [[ -n "${GITHUB_TOKEN:-}" ]]; then - CURL_ARGS+=(-H "Authorization: token ${GITHUB_TOKEN}") - fi - FILE_LIST=$(curl "${CURL_ARGS[@]}" "$API_URL") - - COUNT=$(echo "$FILE_LIST" | python3 -c " -import json, sys -files = [f for f in json.load(sys.stdin) if f['type'] == 'file' and f['name'].endswith('.py')] -print(len(files)) -") - - echo "Downloading ${COUNT} test files to ${CONFORMANCE_DIR}..." - - echo "$FILE_LIST" | python3 -c " -import json, sys, urllib.request, os - -dest = sys.argv[1] -files = [f for f in json.load(sys.stdin) if f['type'] == 'file' and f['name'].endswith('.py')] - -for i, f in enumerate(files, 1): - out = os.path.join(dest, f['name']) - urllib.request.urlretrieve(f['download_url'], out) - if i % 25 == 0 or i == len(files): - print(f' {i}/{len(files)}') -" "$CONFORMANCE_DIR" - - # The OFFICIAL scorer (conformance/src/main.py) is NOT fetched here — it is - # committed to the repo at conformance/upstream_main.py (byte-identical, - # sha256-verified) and score.py imports it directly. To re-pin it after a - # ref bump: `python3 conformance/score.py --refresh-upstream`. - echo "$TYPING_REF" > "$REF_STAMP_FILE" - ok "${COUNT} conformance files written to ${CONFORMANCE_DIR}/ (ref: ${TYPING_REF})" -} - -FETCH_ONLY=0 -for arg in "$@"; do - case "$arg" in - --fetch-only) FETCH_ONLY=1 ;; - esac -done - -CURRENT_REF="" -if [[ -f "$REF_STAMP_FILE" ]]; then - CURRENT_REF=$(cat "$REF_STAMP_FILE") -fi - -# --fetch forces a re-download; --fetch-only only ensures the pinned ref is -# present (fetch if missing/stale, then exit) so `make test` works offline. -if [[ "${1:-}" == "--fetch" ]] || \ - [[ ! -d "$CONFORMANCE_DIR" ]] || \ - [[ -z "$(ls -A "$CONFORMANCE_DIR" 2>/dev/null)" ]] || \ - [[ "$CURRENT_REF" != "$TYPING_REF" ]]; then - if [[ -n "$CURRENT_REF" ]] && [[ "$CURRENT_REF" != "$TYPING_REF" ]]; then - warn "Cached conformance ref ($CURRENT_REF) != pinned ($TYPING_REF) — refetching" - fi - fetch_conformance -else - COUNT=$(find "$CONFORMANCE_DIR" -maxdepth 1 -name "*.py" | wc -l | tr -d ' ') - ok "Conformance suite present ($COUNT files, ref ${TYPING_REF}) — skipping download" -fi - -if [[ "$FETCH_ONLY" -eq 1 ]]; then - exit 0 -fi - -# ── Score with the OFFICIAL python/typing calculator ───────────────────────── -# We do NOT compute the score ourselves. conformance/score.py imports the -# committed, sha256-verified conformance/upstream_main.py (byte-identical to -# python/typing's own conformance tool at the pinned ref) and runs its real -# get_expected_errors + diff_expected_errors against the actual `basilisk` -# binary. No excluded diagnostic codes; a file passes only with an empty -# upstream errors_diff. -header "Building basilisk binary" -cargo build -p basilisk-cli --bin basilisk - -header "Scoring with the official python/typing calculator" -echo "" -python3 conformance/score.py --bin target/debug/basilisk 2>&1 - -echo "" -header "Done" -echo -e " Score computed by the REAL python/typing calculator (pinned ${TYPING_REF})." -echo -e " Per-file results: ${CYAN}conformance/conformance_status.csv${RESET}" -echo "" diff --git a/scripts/test-rust.sh b/scripts/test-rust.sh index 06dd81e2..70a1c01a 100755 --- a/scripts/test-rust.sh +++ b/scripts/test-rust.sh @@ -25,10 +25,11 @@ HTML_DIR="$REPO_ROOT/target/llvm-cov/html" rustup component add llvm-tools-preview 2>/dev/null || true # ── Fetch conformance suite if missing or stale ────────────────────────────── -# `conformance.sh` is the single source of truth — it pins TYPING_REF and -# re-fetches when the cached ref differs. Do not duplicate that logic here. +# `conformance/score.py` is the single source of truth — it pins the upstream +# ref (PINNED_TYPING_REF) and re-fetches when the cached ref differs. Do not +# duplicate that logic here. header "Ensuring PEP conformance suite is current" -bash "$REPO_ROOT/scripts/conformance.sh" --fetch-only +python3 "$REPO_ROOT/conformance/score.py" --fetch-only # ── Rust tests with coverage ───────────────────────────────────────────────── # cargo-llvm-cov uses target/llvm-cov-target/ as its target directory, From ec55c6215f3756479633107782d858a5bd3b2048 Mon Sep 17 00:00:00 2001 From: Christian Findlay <16697547+MelbourneDeveloper@users.noreply.github.com> Date: Tue, 23 Jun 2026 21:24:30 +1000 Subject: [PATCH 06/12] fixes --- .github/workflows/ci.yml | 2 +- .gitignore | 2 +- conformance/score.py | 2 +- .../basilisk-checker/tests/checker_tests.rs | 4 +- .../basilisk-cli/tests/conformance_tests.rs | 89 ------------------- scripts/test-rust.sh | 8 ++ 6 files changed, 13 insertions(+), 94 deletions(-) delete mode 100644 crates/basilisk-cli/tests/conformance_tests.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c2f34dba..f092db00 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -233,7 +233,7 @@ jobs: - name: Cache conformance suite uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 with: - path: crates/basilisk-cli/tests/conformance + path: conformance/tests key: conformance-suite-${{ hashFiles('conformance/score.py') }} restore-keys: conformance-suite- diff --git a/.gitignore b/.gitignore index 5d6a96d5..702e03f6 100644 --- a/.gitignore +++ b/.gitignore @@ -108,7 +108,7 @@ benchmarks/results/ # ============================================================================= # Conformance test suite (fetched on demand via make conformance / score.py) # ============================================================================= -crates/basilisk-cli/tests/conformance/ +conformance/tests/ # ============================================================================= # Tool / agent state diff --git a/conformance/score.py b/conformance/score.py index 544f2e16..4b7a5a1b 100644 --- a/conformance/score.py +++ b/conformance/score.py @@ -422,7 +422,7 @@ def main(argv: list[str]) -> int: return refresh_upstream() root = repo_root() - conf_dir = Path(opts["dir"]) if opts["dir"] else root / "crates/basilisk-cli/tests/conformance" + conf_dir = Path(opts["dir"]) if opts["dir"] else root / "conformance/tests" # Fetch fixtures when forced, in fetch-only mode, or when they are absent. # A network failure is fatal only if a fetch was explicitly requested; on the diff --git a/crates/basilisk-checker/tests/checker_tests.rs b/crates/basilisk-checker/tests/checker_tests.rs index eafb5c42..a88d4ed0 100644 --- a/crates/basilisk-checker/tests/checker_tests.rs +++ b/crates/basilisk-checker/tests/checker_tests.rs @@ -2100,7 +2100,7 @@ fn debug_e0047_qualifiers_annotated_fp() -> Result<(), Box Result<(), Box PathBuf { - let mut dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - while !(dir.join("Cargo.toml").exists() && dir.join("crates").exists()) { - assert!( - dir.pop(), - "could not locate workspace root from CARGO_MANIFEST_DIR" - ); - } - dir -} - -/// First Python interpreter that responds to `--version`. -fn python() -> Option<&'static str> { - ["python3", "python"].into_iter().find(|exe| { - Command::new(exe) - .arg("--version") - .output() - .is_ok_and(|o| o.status.success()) - }) -} - -#[test] -fn conformance_score() { - let root = repo_root(); - let conformance_dir = root.join("crates/basilisk-cli/tests/conformance"); - - // Fresh checkout without fixtures: the scorer itself skips, but short-circuit - // here too so we don't require Python just to no-op. - if !conformance_dir.exists() { - println!(" ⚠ Conformance suite not downloaded — skipping. Run: make conformance"); - return; - } - - let score_py = root.join("conformance/score.py"); - assert!( - score_py.exists(), - "conformance/score.py is missing — the official scorer must be present" - ); - - let py = python().expect( - "python3 is required to run the official conformance scorer \ - (conformance/score.py). Install Python 3.12+.", - ); - - // `CARGO_BIN_EXE_basilisk` is injected by cargo for integration tests and - // points at the freshly built binary — the exact artifact users run. - let binary = env!("CARGO_BIN_EXE_basilisk"); - - let status = Command::new(py) - .arg(&score_py) - .arg("--bin") - .arg(binary) - .arg("--gate") - .status() - .expect("failed to spawn the official conformance scorer"); - - assert!( - status.success(), - "PEP conformance gate failed — see scorer output above. The score is \ - computed by the verbatim python/typing algorithm in conformance/score.py." - ); -} diff --git a/scripts/test-rust.sh b/scripts/test-rust.sh index 70a1c01a..d4677f3c 100755 --- a/scripts/test-rust.sh +++ b/scripts/test-rust.sh @@ -64,6 +64,14 @@ BASILISK_BIN=$(find_basilisk_bin) || { } ok "basilisk binary ready: $BASILISK_BIN" +# ── PEP conformance gate ────────────────────────────────────────────────────── +# Score the REAL compiled binary with the official python/typing calculator +# (conformance/score.py imports the committed, sha256-verified upstream_main.py) +# and enforce the ratchet gate from coverage-thresholds.json. This is the whole +# conformance system: two Python files + the gitignored fixtures. No Rust test. +header "Enforcing PEP conformance gate (official python/typing calculator)" +python3 "$REPO_ROOT/conformance/score.py" --bin "$BASILISK_BIN" --gate + cargo llvm-cov report --profile ci --html --output-dir "$HTML_DIR" ok "HTML report → $HTML_DIR/index.html" From da6066d4a7e733ece3488c8c5f3f41ee328bddd3 Mon Sep 17 00:00:00 2001 From: Christian Findlay <16697547+MelbourneDeveloper@users.noreply.github.com> Date: Tue, 23 Jun 2026 21:24:33 +1000 Subject: [PATCH 07/12] fixes --- coverage-thresholds.json | 2 +- docs/specs/CHECKER-ARCHITECTURE-SPEC.md | 12 +++++++----- scripts/fp_verify.sh | 8 ++++---- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/coverage-thresholds.json b/coverage-thresholds.json index c3a899e3..c69cf6e0 100644 --- a/coverage-thresholds.json +++ b/coverage-thresholds.json @@ -43,7 +43,7 @@ "conformance": { "_doc": "Minimum PEP conformance pass percentage (files passing / total files), computed by the REAL python/typing conformance calculator (conformance/score.py downloads upstream main.py at the pinned ref and runs its own get_expected_errors + diff_expected_errors; NO excluded diagnostic codes). A file passes only when upstream's errors_diff is empty. The score uses the STRICTEST grading: every basilisk diagnostic (errors AND warnings) counts, matching how the reference checker pyright is graded. Ratchet UP only. HONEST baseline (replacing a previously rigged 100% from a lenient in-repo harness): 59/146 = 40.4%, pinned to python/typing@268d0c4e. (The looser errors-only view is 70/146 = 47.9%, available via score.py --errors-only.) Target is 100%; this is the real current number.", "threshold": 40, - "_fp_ceiling_doc": "Maximum total false-positive diagnostics across the suite (diagnostics Basilisk reports on lines the suite does NOT mark with # E, plus diagnostics outside satisfied # E[tag] groups) under the strictest errors+warnings grading. Ratchet DOWN only \u2014 the mirror of the pass-percentage gate. Enforced via conformance/score.py --gate (run by conformance_tests.rs inside make test). HONEST baseline: 285 (the prior 0 was a lie produced by excluding 9 diagnostic codes from scoring). Drive this DOWN.", + "_fp_ceiling_doc": "Maximum total false-positive diagnostics across the suite (diagnostics Basilisk reports on lines the suite does NOT mark with # E, plus diagnostics outside satisfied # E[tag] groups) under the strictest errors+warnings grading. Ratchet DOWN only \u2014 the mirror of the pass-percentage gate. Enforced via conformance/score.py --gate (run on the compiled binary by scripts/test-rust.sh inside make test; no Rust test involved). HONEST baseline: 285 (the prior 0 was a lie produced by excluding 9 diagnostic codes from scoring). Drive this DOWN.", "max_false_positives": 285 } } diff --git a/docs/specs/CHECKER-ARCHITECTURE-SPEC.md b/docs/specs/CHECKER-ARCHITECTURE-SPEC.md index a8a4ac7b..fb70727e 100644 --- a/docs/specs/CHECKER-ARCHITECTURE-SPEC.md +++ b/docs/specs/CHECKER-ARCHITECTURE-SPEC.md @@ -819,7 +819,7 @@ operating directly on the module AST so it is independent of resolver state: `extra_items=T` whose type the argument matches. Implemented in `crates/basilisk-checker/src/rules/e0156/`; conformance fixture is -`crates/basilisk-cli/tests/conformance/typeddicts_extra_items.py`. +`conformance/tests/typeddicts_extra_items.py`. #### `ReadOnly` `TypedDict` inheritance {#CHKARCH-DIAG-TYPEDDICT-READONLY-INHERITANCE} @@ -1395,10 +1395,12 @@ checkers (pyright, mypy, pyrefly, ty, zuban, pycroscope) are graded with. grading; pass `--errors-only` for the looser errors-only view.) One firing on an unannotated line is a real false positive and fails the file — same as for any other checker. -- **Gate**: [`crates/basilisk-cli/tests/conformance_tests.rs`](../../crates/basilisk-cli/tests/conformance_tests.rs) - is a thin wrapper that runs `score.py --gate` inside `make test`. The - pass-percentage floor and false-positive ceiling live in - `coverage-thresholds.json` (`conformance.threshold`, +- **Gate**: `make test` (via [`scripts/test-rust.sh`](../../scripts/test-rust.sh)) + builds the `basilisk` binary, then runs `python3 conformance/score.py --gate` + on it — there is **no Rust conformance test**; the whole conformance system is + the two committed Python files plus the git-ignored downloaded fixtures under + `conformance/tests/`. The pass-percentage floor and false-positive ceiling live + in `coverage-thresholds.json` (`conformance.threshold`, `conformance.max_false_positives`); the former ratchets **up**, the latter **down**. Per-file results are written to `conformance/conformance_status.csv`. - **Honest baseline** (replacing a previously rigged in-repo harness that diff --git a/scripts/fp_verify.sh b/scripts/fp_verify.sh index 4b862088..724cb53e 100755 --- a/scripts/fp_verify.sh +++ b/scripts/fp_verify.sh @@ -15,7 +15,6 @@ ROOT="/Users/christianfindlay/Documents/Code/Basilisk" cd "$ROOT" BASELINE="/tmp/conf_baseline.csv" CSV="conformance/conformance_status.csv" -FPLOG="/tmp/fp_current.txt" if [[ "${1:-}" == "--save-baseline" ]]; then cp "$CSV" "$BASELINE" @@ -23,9 +22,10 @@ if [[ "${1:-}" == "--save-baseline" ]]; then exit 0 fi -# Run the conformance suite (release), capturing FP lines. -cargo test --test conformance_tests --release -- --nocapture 2>&1 \ - | grep -E '^ FP ' | sort > "$FPLOG" || true +# Regenerate the conformance CSV with the official scorer against the release +# binary. score.py writes per-file caught/missed/fp to $CSV, which we diff below. +cargo build --release -p basilisk-cli --bin basilisk >/dev/null 2>&1 +python3 conformance/score.py --bin target/release/basilisk >/dev/null 2>&1 || true echo "=== totals (current) ===" awk -F, 'NR>1{c+=$5;m+=$6;f+=$7; if($4=="PASS")p++; else if($4=="FAIL")fl++} \ From 19d34ce4c81881ac3babea32819892e8d77d9185 Mon Sep 17 00:00:00 2001 From: Christian Findlay <16697547+MelbourneDeveloper@users.noreply.github.com> Date: Tue, 23 Jun 2026 21:31:05 +1000 Subject: [PATCH 08/12] fixes --- .github/workflows/ci.yml | 17 ++---------- .gitignore | 4 ++- conformance/score.py | 59 +++++++++++++++++++++++----------------- scripts/test-rust.sh | 9 ++---- 4 files changed, 43 insertions(+), 46 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f092db00..080efd8e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -225,21 +225,10 @@ jobs: - name: Install cargo-llvm-cov uses: taiki-e/install-action@81ee9698f20724138a785d788c7567d40f14cd2d # cargo-llvm-cov - # The PEP conformance suite is fetched fresh from the upstream typing repo - # on every run (network-bound, one HTTP request per test file). Cache it - # keyed on score.py, which holds the pinned ref (PINNED_TYPING_REF) — bumping - # the ref edits that file and busts the cache; score.py itself re-fetches - # whenever the stamped ref differs, so a stale prefix restore self-heals. - - name: Cache conformance suite - uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 - with: - path: conformance/tests - key: conformance-suite-${{ hashFiles('conformance/score.py') }} - restore-keys: conformance-suite- - + # The PEP conformance suite (fixtures + the official calculator) is + # committed under conformance/ — nothing is fetched at test time, so there + # is no cache step and no network dependency. - name: Run Rust tests with coverage - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: ./scripts/test-rust.sh # No artifact upload of the HTML coverage report ([GITHUB-NO-ARTIFACTS]). diff --git a/.gitignore b/.gitignore index 702e03f6..87909bf7 100644 --- a/.gitignore +++ b/.gitignore @@ -106,7 +106,9 @@ deslop-*.log benchmarks/results/ # ============================================================================= -# Conformance test suite (fetched on demand via make conformance / score.py) +# Conformance test FIXTURES: downloaded on demand into conformance/tests/ (via +# make conformance / score.py --fetch) — never committed. NOTE: the official +# calculator conformance/upstream_main.py IS committed and is NOT ignored. # ============================================================================= conformance/tests/ diff --git a/conformance/score.py b/conformance/score.py index 4b7a5a1b..b327157c 100644 --- a/conformance/score.py +++ b/conformance/score.py @@ -25,13 +25,17 @@ errors-only view. Either way, any diagnostic on a line the suite does not mark `# E` is a real false positive and fails the file — same as for any checker. -This one file is the whole Basilisk side of conformance: it fetches the -git-ignored `# E`-annotated test fixtures on demand (`--fetch` / `--fetch-only`), -runs the binary, scores with the official functions, writes -`conformance/conformance_status.csv`, and enforces the ratchet gate (`--gate`). -There is no separate shell script. The vendored calculator is committed at -`conformance/upstream_main.py`; refresh it ONLY when bumping the pinned ref: - python3 conformance/score.py --refresh-upstream +This one file is the whole Basilisk side of conformance: it runs the binary, +scores with the official functions, writes `conformance/conformance_status.csv`, +and enforces the ratchet gate (`--gate`). There is no Rust test and no shell +script. Everything it needs is committed to the repo — nothing is a moving +target, nothing is fetched at score time: + • the official calculator → `conformance/upstream_main.py` (sha256-pinned) + • the `# E`-annotated test fixtures → `conformance/tests/*.py` + +Both are refreshed ONLY as deliberate maintenance, then committed: + python3 conformance/score.py --refresh-upstream # re-pin the calculator + python3 conformance/score.py --fetch # re-download the fixtures Usage: python3 conformance/score.py [--bin PATH] [--gate] [--errors-only] @@ -63,8 +67,8 @@ UPSTREAM_MAIN_SHA256 = "b4e3bd089c73856f9920ef494350d622c2914fac238c9193ec0bb3f93f0fc6a2" # The two functions that constitute the official scoring algorithm. OFFICIAL_FUNCS = ("get_expected_errors", "diff_expected_errors") -# The `# E`-annotated test fixtures live under conformance/tests at the same -# pinned ref. They are git-ignored and fetched on demand (one HTTP GET each). +# The `# E`-annotated test fixtures are committed under conformance/tests. This +# API lists them at the pinned ref for the maintenance-only `--fetch` refresh. FIXTURES_API = ( "https://api.github.com/repos/python/typing/contents/conformance/tests" f"?ref={PINNED_TYPING_REF}" @@ -149,16 +153,18 @@ def refresh_upstream() -> int: # --------------------------------------------------------------------------- -# Fetch the test fixtures (the `# E`-annotated .py files) — git-ignored +# MAINTENANCE: re-download the committed test fixtures (run periodically, commit) # --------------------------------------------------------------------------- def ensure_fixtures(conf_dir: Path, force: bool) -> None: """Download python/typing's conformance `.py` fixtures into `conf_dir`. - No-op when they are already present at the pinned ref (a `.ref-sha` stamp - records it) unless `force`. Bumping `PINNED_TYPING_REF` invalidates the stamp - and triggers a re-fetch. Honors `GITHUB_TOKEN` to raise the API rate limit. + The fixtures are COMMITTED to the repo — this is a maintenance helper invoked + only by `--fetch` / `--fetch-only`, after which the result is committed. The + normal score path never calls it. No-op when already present at the pinned ref + (a `.ref-sha` stamp records it) unless `force`. Honors `GITHUB_TOKEN` to raise + the API rate limit. """ import os import urllib.request # local: network only happens here and in refresh @@ -424,21 +430,24 @@ def main(argv: list[str]) -> int: root = repo_root() conf_dir = Path(opts["dir"]) if opts["dir"] else root / "conformance/tests" - # Fetch fixtures when forced, in fetch-only mode, or when they are absent. - # A network failure is fatal only if a fetch was explicitly requested; on the - # plain score path a missing suite is skipped (fresh checkout, offline CI). - present = conf_dir.exists() and any(conf_dir.glob("*.py")) - if opts["fetch"] or opts["fetch_only"] or not present: + # MAINTENANCE ONLY: --fetch / --fetch-only re-download the fixtures so they can + # be committed. The fixtures are committed to the repo (not a moving target); + # the normal score path NEVER touches the network. + if opts["fetch"] or opts["fetch_only"]: try: - ensure_fixtures(conf_dir, force=opts["fetch"]) + ensure_fixtures(conf_dir, force=True) except Exception as exc: # noqa: BLE001 — surface fetch failure clearly - if opts["fetch"] or opts["fetch_only"]: - print(f" ✗ could not fetch conformance fixtures: {exc}", file=sys.stderr) - return 1 - print(" ⚠ Conformance suite not present and fetch failed — skipping.") + print(f" ✗ could not fetch conformance fixtures: {exc}", file=sys.stderr) + return 1 + if opts["fetch_only"]: return 0 - if opts["fetch_only"]: - return 0 + + if not conf_dir.exists() or not any(conf_dir.glob("*.py")): + print(" ✗ conformance fixtures missing at conformance/tests/. They are " + "committed to the repo; restore them from git, or run " + "`python3 conformance/score.py --fetch` to re-download then commit.", + file=sys.stderr) + return 1 binary = find_binary(opts["bin"], root) if binary is None: diff --git a/scripts/test-rust.sh b/scripts/test-rust.sh index d4677f3c..a7419013 100755 --- a/scripts/test-rust.sh +++ b/scripts/test-rust.sh @@ -24,12 +24,9 @@ HTML_DIR="$REPO_ROOT/target/llvm-cov/html" # Ensure llvm-tools-preview is installed so cargo-llvm-cov never prompts. rustup component add llvm-tools-preview 2>/dev/null || true -# ── Fetch conformance suite if missing or stale ────────────────────────────── -# `conformance/score.py` is the single source of truth — it pins the upstream -# ref (PINNED_TYPING_REF) and re-fetches when the cached ref differs. Do not -# duplicate that logic here. -header "Ensuring PEP conformance suite is current" -python3 "$REPO_ROOT/conformance/score.py" --fetch-only +# The PEP conformance suite (fixtures + the official calculator) is committed +# under conformance/ — nothing is fetched here. The gate runs after the build +# (see below): build the binary, then score it with the official calculator. # ── Rust tests with coverage ───────────────────────────────────────────────── # cargo-llvm-cov uses target/llvm-cov-target/ as its target directory, From 3353f4505fb4a40dadc9efb4380a45e9a30a51fa Mon Sep 17 00:00:00 2001 From: Christian Findlay <16697547+MelbourneDeveloper@users.noreply.github.com> Date: Tue, 23 Jun 2026 21:36:24 +1000 Subject: [PATCH 09/12] Fixes --- .github/workflows/ci.yml | 17 +++++- conformance/score.py | 116 ++++++++++++++++----------------------- scripts/test-rust.sh | 9 ++- 3 files changed, 66 insertions(+), 76 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 080efd8e..e98d645e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -225,10 +225,21 @@ jobs: - name: Install cargo-llvm-cov uses: taiki-e/install-action@81ee9698f20724138a785d788c7567d40f14cd2d # cargo-llvm-cov - # The PEP conformance suite (fixtures + the official calculator) is - # committed under conformance/ — nothing is fetched at test time, so there - # is no cache step and no network dependency. + # Only the conformance FIXTURES are fetched (the calculator + # conformance/upstream_main.py is committed). Cache the downloaded fixtures + # keyed on score.py, which holds the pinned ref (PINNED_TYPING_REF) — bumping + # the ref edits that file and busts the cache; score.py re-fetches whenever + # the stamped ref differs, so a stale prefix restore self-heals. + - name: Cache conformance fixtures + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + path: conformance/tests + key: conformance-suite-${{ hashFiles('conformance/score.py') }} + restore-keys: conformance-suite- + - name: Run Rust tests with coverage + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: ./scripts/test-rust.sh # No artifact upload of the HTML coverage report ([GITHUB-NO-ARTIFACTS]). diff --git a/conformance/score.py b/conformance/score.py index b327157c..0fa15618 100644 --- a/conformance/score.py +++ b/conformance/score.py @@ -28,14 +28,11 @@ This one file is the whole Basilisk side of conformance: it runs the binary, scores with the official functions, writes `conformance/conformance_status.csv`, and enforces the ratchet gate (`--gate`). There is no Rust test and no shell -script. Everything it needs is committed to the repo — nothing is a moving -target, nothing is fetched at score time: - • the official calculator → `conformance/upstream_main.py` (sha256-pinned) - • the `# E`-annotated test fixtures → `conformance/tests/*.py` - -Both are refreshed ONLY as deliberate maintenance, then committed: - python3 conformance/score.py --refresh-upstream # re-pin the calculator - python3 conformance/score.py --fetch # re-download the fixtures +script. Two upstream inputs, handled differently: + • the official calculator → `conformance/upstream_main.py`: COMMITTED and + sha256-pinned, never downloaded at score time (re-pin with --refresh-upstream). + • the `# E`-annotated test fixtures → `conformance/tests/*.py`: git-ignored and + DOWNLOADED on demand (--fetch / --fetch-only; auto-fetched if missing). Usage: python3 conformance/score.py [--bin PATH] [--gate] [--errors-only] @@ -67,8 +64,8 @@ UPSTREAM_MAIN_SHA256 = "b4e3bd089c73856f9920ef494350d622c2914fac238c9193ec0bb3f93f0fc6a2" # The two functions that constitute the official scoring algorithm. OFFICIAL_FUNCS = ("get_expected_errors", "diff_expected_errors") -# The `# E`-annotated test fixtures are committed under conformance/tests. This -# API lists them at the pinned ref for the maintenance-only `--fetch` refresh. +# The `# E`-annotated test fixtures are downloaded (git-ignored) into +# conformance/tests. This API lists them at the pinned ref for the fetch. FIXTURES_API = ( "https://api.github.com/repos/python/typing/contents/conformance/tests" f"?ref={PINNED_TYPING_REF}" @@ -80,24 +77,22 @@ # --------------------------------------------------------------------------- -def _stub_module(name: str, **attrs: object) -> None: - """Register an empty stand-in module so upstream's unrelated top-level - imports resolve. The two scoring functions touch none of these.""" - module = types.ModuleType(name) - for attr, value in attrs.items(): - setattr(module, attr, value) - sys.modules[name] = module +class _StubModule(types.ModuleType): + """Stand-in that resolves ANY attribute to a dummy, so upstream main.py's + unrelated top-level imports (tomli/tomlkit/options/reporting/test_groups/ + type_checker) succeed. The two scoring functions reference none of them.""" + + def __getattr__(self, _name: str) -> object: + return object def load_official_calc() -> tuple[Callable, Callable, str]: """Return upstream's real (get_expected_errors, diff_expected_errors). Reads the committed `conformance/upstream_main.py`, verifies it is byte-for- - byte the pinned upstream `conformance/src/main.py` (sha256), imports it, and - hands back its two functions unmodified. No network access; no code of ours - in the calculation. `main.py` also imports tomli/tomlkit/options/reporting/ - test_groups/type_checker at module scope — the scoring functions use none of - them, so empty stubs let the import succeed. + byte the pinned upstream `conformance/src/main.py` (sha256), imports it behind + module stubs (above), and hands back its two functions unmodified. No network + access; no code of ours in the calculation. """ raw = UPSTREAM_MAIN.read_bytes() digest = hashlib.sha256(raw).hexdigest() @@ -108,12 +103,8 @@ def load_official_calc() -> tuple[Callable, Callable, str]: "modified. Restore it from git, or run --refresh-upstream to re-pin." ) - _stub_module("tomli") - _stub_module("tomlkit") - _stub_module("options", parse_options=None) - _stub_module("reporting", generate_summary=None) - _stub_module("test_groups", get_test_cases=None, get_test_groups=None) - _stub_module("type_checker", TYPE_CHECKERS=(), TypeChecker=object) + for dep in ("tomli", "tomlkit", "options", "reporting", "test_groups", "type_checker"): + sys.modules.setdefault(dep, _StubModule(dep)) spec = importlib.util.spec_from_file_location("typing_conformance_main", UPSTREAM_MAIN) if spec is None or spec.loader is None: @@ -121,15 +112,13 @@ def load_official_calc() -> tuple[Callable, Callable, str]: module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) - missing = [name for name in OFFICIAL_FUNCS if not hasattr(module, name)] - if missing: + funcs = tuple(getattr(module, name, None) for name in OFFICIAL_FUNCS) + if not all(funcs): raise RuntimeError( - f"committed upstream main.py is missing {missing}; the upstream " + f"committed upstream main.py is missing {OFFICIAL_FUNCS}; the upstream " "layout changed — re-check the pinned ref" ) - get_expected = getattr(module, OFFICIAL_FUNCS[0]) - diff_errors = getattr(module, OFFICIAL_FUNCS[1]) - return get_expected, diff_errors, f"sha256:{digest[:12]}" + return funcs[0], funcs[1], f"sha256:{digest[:12]}" def refresh_upstream() -> int: @@ -153,18 +142,17 @@ def refresh_upstream() -> int: # --------------------------------------------------------------------------- -# MAINTENANCE: re-download the committed test fixtures (run periodically, commit) +# Download the (git-ignored) test fixtures on demand # --------------------------------------------------------------------------- def ensure_fixtures(conf_dir: Path, force: bool) -> None: """Download python/typing's conformance `.py` fixtures into `conf_dir`. - The fixtures are COMMITTED to the repo — this is a maintenance helper invoked - only by `--fetch` / `--fetch-only`, after which the result is committed. The - normal score path never calls it. No-op when already present at the pinned ref - (a `.ref-sha` stamp records it) unless `force`. Honors `GITHUB_TOKEN` to raise - the API rate limit. + The fixtures are git-ignored and fetched on demand (auto when missing, or via + `--fetch` / `--fetch-only`). No-op when already present at the pinned ref (a + `.ref-sha` stamp records it) unless `force`; bumping `PINNED_TYPING_REF` + invalidates the stamp. Honors `GITHUB_TOKEN` to raise the API rate limit. """ import os import urllib.request # local: network only happens here and in refresh @@ -284,7 +272,6 @@ def category(name: str) -> str: Row = tuple[str, str, bool, int, int, int, list[str]] Totals = dict[str, int] -ByCat = dict[str, list[int]] def score( @@ -292,9 +279,9 @@ def score( get_expected: Callable, diff_errors: Callable, conf_dir: Path, -) -> tuple[list[Path], list[Row], Totals, ByCat]: +) -> tuple[list[Path], list[Row], Totals]: files = sorted(conf_dir.glob("*.py")) - rows, totals, by_cat = [], {"pass": 0, "missed": 0, "fp": 0, "caught": 0}, {} + rows, totals = [], {"pass": 0, "missed": 0, "fp": 0, "caught": 0} for f in files: output = checker.run_test(f) diff = diff_errors(checker, f, output, []) @@ -314,17 +301,13 @@ def score( totals["missed"] += missed totals["fp"] += fp totals["caught"] += caught - cat = by_cat.setdefault(category(f.name), [0, 0]) - cat[0] += int(passed) - cat[1] += 1 - return files, rows, totals, by_cat + return files, rows, totals def print_scorecard( files: list[Path], rows: list[Row], totals: Totals, - by_cat: ByCat, label: str, digest: str, ) -> None: @@ -341,11 +324,6 @@ def print_scorecard( print(f" Required: {totals['caught']} caught | {totals['missed']} missed") print(f" False+: {totals['fp']} unexpected diagnostics (THESE FAIL FILES)") print("-" * 68) - print(" Category breakdown:") - for cat in sorted(by_cat): - p, t = by_cat[cat] - print(f" {cat:<24} {p:>2}/{t:<2} {p * 100.0 / t:>5.1f}%") - print("-" * 68) print(" Failing files:") any_fail = False for name, _c, passed, _ca, missed, fp, _codes in rows: @@ -430,24 +408,22 @@ def main(argv: list[str]) -> int: root = repo_root() conf_dir = Path(opts["dir"]) if opts["dir"] else root / "conformance/tests" - # MAINTENANCE ONLY: --fetch / --fetch-only re-download the fixtures so they can - # be committed. The fixtures are committed to the repo (not a moving target); - # the normal score path NEVER touches the network. - if opts["fetch"] or opts["fetch_only"]: + # The fixtures are downloaded (git-ignored), unlike the committed calculator. + # Fetch them when forced (--fetch), in fetch-only mode, or when absent. A + # network failure is fatal only if a fetch was explicitly requested; on the + # plain score path a missing suite is skipped (fresh checkout, offline). + present = conf_dir.exists() and any(conf_dir.glob("*.py")) + if opts["fetch"] or opts["fetch_only"] or not present: try: - ensure_fixtures(conf_dir, force=True) + ensure_fixtures(conf_dir, force=opts["fetch"]) except Exception as exc: # noqa: BLE001 — surface fetch failure clearly - print(f" ✗ could not fetch conformance fixtures: {exc}", file=sys.stderr) - return 1 - if opts["fetch_only"]: + if opts["fetch"] or opts["fetch_only"]: + print(f" ✗ could not fetch conformance fixtures: {exc}", file=sys.stderr) + return 1 + print(" ⚠ Conformance suite not present and fetch failed — skipping.") return 0 - - if not conf_dir.exists() or not any(conf_dir.glob("*.py")): - print(" ✗ conformance fixtures missing at conformance/tests/. They are " - "committed to the repo; restore them from git, or run " - "`python3 conformance/score.py --fetch` to re-download then commit.", - file=sys.stderr) - return 1 + if opts["fetch_only"]: + return 0 binary = find_binary(opts["bin"], root) if binary is None: @@ -461,9 +437,9 @@ def main(argv: list[str]) -> int: return 1 checker = BasiliskTypeChecker(binary, count_warnings=opts["warn"]) - files, rows, totals, by_cat = score(checker, get_expected, diff_errors, conf_dir) + files, rows, totals = score(checker, get_expected, diff_errors, conf_dir) label = "errors+warnings" if opts["warn"] else "errors only" - print_scorecard(files, rows, totals, by_cat, label, digest) + print_scorecard(files, rows, totals, label, digest) write_csv(root, rows) if not opts["gate"]: diff --git a/scripts/test-rust.sh b/scripts/test-rust.sh index a7419013..fc50cd54 100755 --- a/scripts/test-rust.sh +++ b/scripts/test-rust.sh @@ -24,9 +24,12 @@ HTML_DIR="$REPO_ROOT/target/llvm-cov/html" # Ensure llvm-tools-preview is installed so cargo-llvm-cov never prompts. rustup component add llvm-tools-preview 2>/dev/null || true -# The PEP conformance suite (fixtures + the official calculator) is committed -# under conformance/ — nothing is fetched here. The gate runs after the build -# (see below): build the binary, then score it with the official calculator. +# ── Fetch the (git-ignored) conformance fixtures if missing or stale ────────── +# Only the fixtures are downloaded; the official calculator +# (conformance/upstream_main.py) is committed and never fetched. score.py pins +# the ref and re-fetches when the cached ref differs — single source of truth. +header "Ensuring PEP conformance fixtures are current" +python3 "$REPO_ROOT/conformance/score.py" --fetch-only # ── Rust tests with coverage ───────────────────────────────────────────────── # cargo-llvm-cov uses target/llvm-cov-target/ as its target directory, From f3f437106d9869364ab6f1bbbf3f249f90f16876 Mon Sep 17 00:00:00 2001 From: Christian Findlay <16697547+MelbourneDeveloper@users.noreply.github.com> Date: Tue, 23 Jun 2026 21:59:22 +1000 Subject: [PATCH 10/12] score methodology --- .github/workflows/ci.yml | 5 + .github/workflows/deploy-pages.yml | 5 + conf-chart.png | Bin 0 -> 100818 bytes website/src/_data/conformance.js | 236 ++++++++++++++++++++ website/src/_includes/conformance-chart.njk | 45 ++++ website/src/assets/css/styles.css | 91 ++++++++ website/src/docs/conformance.md | 129 +++++++++++ 7 files changed, 511 insertions(+) create mode 100644 conf-chart.png create mode 100644 website/src/_data/conformance.js create mode 100644 website/src/_includes/conformance-chart.njk create mode 100644 website/src/docs/conformance.md diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e98d645e..d15c0edf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -96,7 +96,12 @@ jobs: runs-on: ubuntu-24.04 timeout-minutes: 10 steps: + # fetch-depth: 0 — mirrors deploy-pages.yml so the conformance over-time + # chart (built from the git history of conformance_status.csv) is exercised + # by the pre-merge build check, not just the post-merge deploy. - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + with: + fetch-depth: 0 - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: diff --git a/.github/workflows/deploy-pages.yml b/.github/workflows/deploy-pages.yml index 8e9a8722..5a79df21 100644 --- a/.github/workflows/deploy-pages.yml +++ b/.github/workflows/deploy-pages.yml @@ -37,7 +37,12 @@ jobs: name: github-pages url: ${{ steps.deploy.outputs.page_url }} steps: + # fetch-depth: 0 — the conformance over-time chart (_data/conformance.js) + # reads the full git history of conformance/conformance_status.csv. A + # shallow clone would collapse the chart to a single point. - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + with: + fetch-depth: 0 - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: diff --git a/conf-chart.png b/conf-chart.png new file mode 100644 index 0000000000000000000000000000000000000000..b6b5d95224c4e5a7b7ea21d2c797e3509f72b6c6 GIT binary patch literal 100818 zcmeFZXHZjX`!|Z)Ep7|2w+#USl`c)`EnuY=>C#0y5s(@Jgs3QpNDW20(px}k2q8p8 zrG#EW2_YZ^gg^p>8VH=_^UVLune*+;`S8B)%>IU2>r+eRwjqP|Y8yma(Uw;5ciiE5mv$6fnrg#62MQF|n>14!(Mf479!eLA1W3=$? z3orgQ+MEuje^(m+k${-wrpfr^qcjvk{@UvEy6P-o%_4N&%OC?7{O(@(G8y-o9rfWQ`c0}@y{ z+~In&nRlXi9d>~-w@O{EIL6jyBr}N{TS3d_f&vc)4jp)TKlhSAZ3g}ACwDBYExjO} zo^q^B+|lkK@hCett(=rrRx*ev)uPQ&A@ehNTi|q6o;n+b6~aT~a7^n^zvbjv0iG+o zZBGYFgQHIcK(y3g1uhkjP$ozd)aR#dY{rWJ(mz)IM zdb^Fiu)O3S@WYjgkI$&-nQpy@5p-M~r|mT>y`a(R?DFvVquFY#SY6=UAIF=FFFU|z zOu`{1N=?>CMa2Ofoq&Bdy!`@;{>rvdW7j)S+0DL+OI_ z69UrqXT)h2Q9~0WJ+3V1H=y@S=Ery)K+m?kcJ1YO8! z-52ElDg3u*UxGf zK4lIKnQ^eS!RBpMgi9;Gemz*;_h%1Q&dhropZsKWObNYB;q>1itJ;OEw0KvG8{~i# z12?#c9eOc?k3-90#Es?UX}opDBi^f9_0Xt&+<4n+JrUBuzAHArU~oY*1my9oBV@x@ z-YFlxNo`bFPr2{9C|zDv)!BJk7@q%@3?sIVH%n!>u=7RZ0OB0rNk@HLM=o5J^3hxD1Bs%*?A(tnhMQ5EX9(}-J ze1<1|vd2`(RRNX<{qTdO{nn*pW#8Q9$0fWZ^vvo{arjB?JU_j`nEwMdFmOxSw(%CP zQJ3B##n{azbI%N8o{<+BdDxX9qaQ1Iz0T{$D1LwEsYy#HK@FLH%LlXl>paiJTo`K- z{p$Qibs>8D7w7}5)R)~z9lkx%x5|f%Y2D6k385=~CTrcMaSDQ3t$h=hSyTlxn%6d; zN<{l4g>^Fio%f`M*#2an51-tStX(O&lKx@5+4$ItJ6;kQ^Mu95cR^U30!AnQg;_Bs z9ct+yCF5T1f$&gZIL>_@D{t#@>Fclbnh1)L*OAjP#raFP6Rzl{^^M z#H$Rodi?O&?b}H?KNd7t0S};i1^rsTMj@Z3Fd5lngf9ykTN_zvar3@yA?otBN?hA( zyK{qxfUNrMck4U7HD%)&{zh;KvBRjb>YdYP)**om|KRli|A|ml?7aKIWP0AUTd1`B z{2+K*4ztpC{Y2Wuxokz&4H3(`44o*ZgM>`-6J$ut=`$NQZro4^yfX-%6f%*{J2IW< z{c6_HN8&pX<4bDRu3vdk_n;LPF$q~&o?UXA_Cr_NpFR+tRX>NwNK~&M!#f! z?`31&#f;xFDvbQ(19A6SqUKI=z55Fe9n)NT<98uQPHe}d!&sF2(nNv`FdOBha%orE9l$qz^6Bo83@YNoX1P2*mmM>2{)|2sWm^# z-agC1P?9|4GE4d*LRxRL1FAlqLr}_2DH29L($J8^kHsI7ui+c=SRE9b+bfp`T}xb1_@# zJuNHve&F`G_eN@8|8SUIbpDice#9E;-aaJ$vb@d9DtG9H-^i|)4(DpxArFtWsB!M9 zEuG3=w*m^|dP-;=jdeL$CnulN^I(!7)iPS-RUdbn;U=wjg{~Fq+OK(hYpD}9t|I_< zlzWlWO8FprK4-hrzUy!1n}9Hyvga%N8@ka+=i9$v)f4z9Wc~O-&DJfHRC>fx^jX2N zMjbL^(OA}DSI^)7BlN`NoGZvKSJVIgg82HcFsqWFE2<4TVeIURb$te6$|4R_7!+Tv zW2Im1-gtUz(@;vhs!s2Q*+AM+bZFQsTi?#SUVMF<$T8){vPaV?wAZnEM~z1h zi%>h5((}_Q0YBsONChxh-gHyt3a20S298ZsTPmqa$MX$_5_)-#E=G!r;iB>I0)0=t z@xf-nY!ues7k%8y!A>2@a-Xn9!b%$7q#t#|*ja9=|A`hvRJ zRnPQX^Xk)mA~}Evln<=;4HGbyq2b67M+2n z4ktea1rrbfuSKozoAD=Y=+GnY`8M6Omx(I6moQYn*cPP7+Bh&FgEF24^nBwv_99+^ ztr^~<<1AsKKWYS*?hsWax3U)Kn-xDOsRpt~yVgTGaARXm6UOvWr6W%op2?HQx_vI& z;;(r(+zX-kh!S#tT1BZvAmtpKPD5VY%9GpN+6t$_;e}%@XW0vj6tHF#+QPL(Rn-@I zs1d)(;FX~wuZC3Nt=gx2td`rs?$N8%qy_yl^>N`;+X^&$$MIi&mKI}D#)j$eAr=5?bzE$FNGd4*R4g9yCPm1b>eO1ru~($BrDDJs_8iEhOI#B9rD85&Qzt7lxgRl89eP{*iptbM5IZ}tPBes&M4 zx*y^*YW>Fk*|g^)l%|=0WBu2I@{D_Lr%vaqfvcq+Zy~K^`w}%aj_s|HaIS8SX};)^ z-2Ex=Ipp%6>*2L5?;j>R{1xR$mH}O$-VU{LZb%niR>Emr=x>4OMAzPewYM8n=6$gN zquUN#2tDnwFfd#-QvYpRh=;IGHT?F#3B}WgzeqUydZ|K-DkNT<`^cDc!OECZUrvXM ztG{;AaH+2!KicCqw}54yt@E{#IQ?hLD=NwC<}?Z%7kM@{$j;G9E#zkpD;!0Plo%fC zcLJ`>lpv*!?s<@)g+VF*ngL8h&V7yLU1W_Eaa7X$b5CsFC9@%fdN%Xp!=E2kR}bqw z5~!Av<;Qegr)syrRJ#;KN;P(#U3GXQlk%N$ion;Lk)sFk9H?h8h%RexqibIi0;h&`pIRuM4`>gND*lQsFyme|h-5L|FXX&1?P z=Y`+q3bSvi9wT238Z1eh;&e<)X_5vN{#b915c&RHFLrU0Ch65>wvj4*9a9;ip0I7V z=t7#iLJ7~|6g3!HO5QW5-TR>Dn6tGTXr_=x5Or!1w}wr^ zo*XLJSUFg_xtThD+!9aD&R(5eDu&=I5KoZ7vGXDEgOC3@lbG7$n%n1-sFCu1>!GBp zC0@)yrq3r3nV${b|M0o;dvV5Tz90vSj-et~mY9QW%G>QaD zbpFYTa!kS{azu-}-$J8kzO0;Q^g^4a-}dU>obf77-xvf&g9o40y&GGRE@VWZ(-;m7 zFXmtBBpCCJAwO;F>1!PAwL+F7@(sOv09MAw9ubB zlft($^gT?Ip)zLZTG0Hcs%CmxdiUT|h+4qVuoRg){o2;&3dG;$_CbZtl~z(uCvG%Z zQk$Y;;?wloyG5LIXU!Ap0f3QyAwCQBbQr1Qq*`Tg$KqT>t~*ib^PYXtEs=EDGt_za zhMcb;DP@A&>yh${T?SO3>Nx!AFXvSLx$9-BbwvH_=`*YO{JAxY(TweNY)9bG)==H0 zwl>kIkCV(-Ww^bk5NuV|)VZ+D3^nu~A7wT7Y5hqi1`SgvBPUZQ;3^R-bXFLIi(?w9 zI9Gq|_<}E~alNkn)BB8Yl2U#bjCcw8WOc#T$w`Zt0j!ta^6;sjTN6IfU3WvyQpHdj zk_-TV?Ad8TRL&4Z^>kD2KDOTvMaRo~j*lC}E~dlxY_?dtMh$A`YhhlbVpl&|DXCAO zORH0DwS99RQJ@W1MD|;Nu9w+ogW;MsG_eCTWJmJ52u>>^VySq3^Cw?0!P)d5lO`{b4A*d}{tSb3*ijF9 zhkbMD;-#=sG+|IqP3yg#D)|AQE{&`CySZ#FOhXcC3}Rj>HeJ%Vo~?jT0B)v`U4s|#i+vjsnbw|pLCCJ72^YyI zH7mYx)q38#rpTK^6kWv&DxcZ}nrFSbQ;f4tv(e zc1`Dxc`_w&%7RKsh*=^l9NN;Xfq|if8H2zuQb&J&q5ZA~sHDkrL3oT{YGYF#!-l?) ztmgD6^i?DusqVRd)3puS6i`~OqPde1fey40IuUC`7s@x6pia;E*_;&w>FGBCqj?5A zfPeRZ3oe=#NoDRebxq5lkVxfnO8rEcscx>U#PoKjc_nE9ylefoH{%1S{qAN)opLu^ zUf*}VzTxK+wAcswD;f5vm841jFwN@T#-X}vPDp%x1ZeOEK(mEfxLO*m=4F$!M~F!g zi*7DrC+~@!WcaaT1TkeRFD4fDO?x*2{8IVuwhM_GM$Qi>9y)r}&kW_gGVd5e+E(FG zsUjR~w91yz*YibZ0%EnEb$Y7GcJHlAq&hUAl}7d*fv1#x zAL|*vrBBL>Hs$6-&a?k|eywoc%HR6RYg)=yM6%) zjNP{3emD}N&x>*5$ARb|I&@GxpcUKGK5X$U{Y0yb{@6?iEK4`Ap&GAvA(ch~Gqv$hc7s zb9^9qt}$HbIa5(=9z~vf1IztaGSZc*Yhe6 zxp0u@QDXLUD2No243ueUGK!dj5#YP#)H}*C^*^L z4vDLLYqfsVchz||d3st?<1O2aW<~h>bj%t<3+DHAq@!hL#oESFLTo?HKod1>oTC3R z^>rR>No*tij{KEL+%VHX2Giv8%-^3!**|?SzPNPUf!g;g`UWtCXJx@*el*K)w6*KSwJQaq{4 zq9>nmsWOnZZMT_%pa$!^t5vG~1Pkk~Q#0L^)aV0kY$4h+oi`=5w*(|N=6SQbwMgS& zbVzm+HUIRQd|KgA(mx9w*NgR>BA9syzuHc_-%}#GQ*{N4!(Mwq?25qU9L=Wkm}so` zNM58+%wYCyyYh@TJAR;2Q8C|%X4?gsQjsoYu$4^M=K5k|_$Lr(PAi0dM6b@xd#mX5 zG}Y4jh`ero&SML56mgI`H42ld?uGxzKGSb@H{7TH7`9F_t$uyb=y0)f9h<4V-J`{? zz6TIfCLKR?rQXAD>B-m@EzDB|KZSRcM57if-53F~Cb8&Y zXeQnT?B>M%~O?9HK$U6S2x zKCGwjgw7{}j?nxVGL3BK`&Xxy#N<&B`OBF-Wu<`{F8))Djmx||*U~00W|(n0Vq}h? zX3y^g_}}6=n>_89V(+uRm}>ph2&d-08VxYNB%J?vQs<9zC5i5CxKDv5UTM%{*$O@~ zda(V4MgrNEFsgM`B@+YSsZs1oK~qtk4ByXMv@C%z(t^R@zqdv{Zw!7+-}b6sXxknp zTUuIn{}Vcko6gsO^|_d|tpSJ&m^KB^9^ToeS3yI=^lypE%8oIkk_os#@!E&y?)iqK z=awGli2m?FJK%FgOC{e2GyhX(p9-TNCUj0>lZC3_w;%iZ8ne!Sd}S@6n>xC`uMAMD zuV0^l1W}s~?W^FXR(-WEcR$M*$Xmx5y3P8ROs<8LNZa}oc;AryBhEwOr4`c zlwT>1d9yvU4%^{7HzNLRJA=btwzKhbdlatl3>#TC9700nTO5BL)6G8uL=8e`9@d-? z5P%k}-I&V3-7lZp?g!5vbR4;T3 zJk?0g)3hr+!W<7XdE*18O8tpvSfO=fC>kYXseGrkK;Dc4#3ea~>Zd_a| zd-EkZejjPiSez?8tPeaXi*Gj7oThKmu}8L-KK{;bphgGmB~`b$9n>8u?j-`M#KNs4tg!$}5N^pUJ3%h;1`Tl>1o91V0CQ+u(t-bRg&Si zM;NQY{J>m}$Ek%DT{9oam%TZ%?-tczhkDDgcGNW;^*SwWt7Fn%@!p}H4)OEzqt(&5 zs8@l25~m3K=MT>IW+~?l3=9@D_m|PH5anI5C%0EqpM%x!M?4#N^{XU54-?u~A|7tn z*oe}oo^h6Ac`S085m-O(79eUP92~R+M(7sCG{J-`g2M{l_Ys{~o-wQ9qIHGexuTrF zazK-uk3v0}Q&aMrD6vay6Z!tYsqaWdR+*@FUtUnKcLQ;PD3&lNm3Y1r9%JKpNZBo? zx=}Xj(s1N-N$aiIEiv%cp%t((?$Bu3y`S8^dSXq?1^vpGy6-DF7$1>lUBdctTT}{t zLCKA^P%d2nAH(A7ic+0I;xkUu{5bjlyR_fswrsuwbHg{rKh~s2t`qX%l8wY&8IVtm zcv+cYXD_~{tgLY$oZ!u@oQzA?c^vaV`%#SGxrs-I*^eI61c4j+$j~6R!m?3OF1b=f zq*0mlKEcRl=X*%4Q1*RIl*`U&%8=e*ua&(%Ykm4SJ;(U{P(da;@Dr#|p2%eal_e!& z=}Dwx!i9%Mq8U>;)TVnt2Wi==tR!SO=jDmH8%{>-RUATiibBI@Yzd#=tSP!g*BI+j zxdlYgN|g;y^~~y8_Z^QkN%x|NCr%th9L?TwLe}T5Uy=$wG% zZ?n;fvJ`>`iXK^LjRM%-me+q8JET_2w_fd)1kW_|ndLucnZTcpT) zc>KoB1vvwK#*G_K_m#IdT5X3uXkA`#mP;cs(;C?zsMZa?k_GCb)1zl6$qvWaK0i+7 zL--bd&(UPtf+2u9{E!QMDKu1Jlm6@XN+WyxAD93tT-(?leC*E+ZU>|j`O7p|D8R;c ziuO*CKn;Fha+Nm1`v!kDx3qNIiE?x62z9g31jyaI1U9zMklt}|m7qRzS@5UkxW@Gj zFfJ(gN)fMIYg`=kXn{58DMk=2m-IdM zo%eS87#!q7n0nJ$lAygZj_ydBool39>68-#}G z7yqRdS~Td&Bpw}>R8;6!!~&e`lMZ8&@fGzlSpWQjy;ex5lVxz@Hdfew&HihbqU0qn zvR|;33nLvF7+5b~QZFvvICUoyVd()&u{( zDJnW)dwU%O-%i~eF+oMeB;tF!-o{gfOBVraN*8`)J`QQy(Y2&1tL!n?a&cm;W~mUe zI;h-cV8R1h`ib5GXINTU==RHy_P;?^DKuK-dc@a-EvM0u5tU^qWVye=nhe-y@`y?` zin7Q`>PV&uZMt?nz`JhHGqu~(Fgs-E+L>l(dj)UyH_>b5;GH(G)%0I_fR+)XoG?`z zj14-;eu7I7BvGN)@GTiQCAEAEV>Z|*yK$-!7WK+@Nd_P+p(~H$tbCT|ZP8en^In%o zb!DGsIzM7{IBA3X`^-HqKE+yqn0|d?Ppcm>(aVf*3wcDIObUGO9N&!{%1iRwtv!|DopM9gP z^0{ffi>HQe_rwDB0QM6nsQcN!ls)f9I`#qnmxF`jQrQ?cV`D;_gPmI_)OtZ&Qb!X> zaVzdfSV7VF*w{3l_K^7)TU&_0gOVESY9~zVy$IAcKStj~HOe?kGaUb8^yr{NQ1Zr& zuuXxo#J?|J9>I_Y=5)BOjAI<&szLMLTBlYXw zQ>CeX{m^7pPi4$DJv9NaXD#vY5*~eJjY^i+$&wx!M%{U`IV$P3&C+B5cu=g3xJvm2&~)6NI`AWCuFvo0O3qmJo#mgdt_2AM$<@Q z(#Hm;UtuN3eeEc%>gupP#(N#`HYfzXY#3%_Z}Y?ls6jUSAGcC=uB`Mmw6?BP7CXJ; zRc-`~Gre|A$#NC$5_b9*?9lUZIV)7Nn8UE%y62y-B995{=|(A6ImpDxx~WR!+UCD* zRELPQ+vz2x%*ciAKJprRA1hz<&>CKH@gjw&T!Z*aa@`XjJsNiN?K?SmzhO#4RRsD3 z&SUXX7r~gPzfqRUh6Je+gLr4GQt?6q3`wTsA|8lk8y2JitnB7 z${0>5ra09&UeD*wY;})xCE$mLD_csmy?+pukd3*i4d%9zc1l`W)@rU2ReoK(;xaNr zivm2O7EZS(rqV1eQ&QZ%omn)_SBq>mB67+-FD)&VN@%#VJ-?TKXimVfvwgnTWzU^@ z&OMs44lqW+M!wz#F;h>|%o|f{g#Xaje-<2EpCB~&E#zFl%Q5we%1Vkf`i$%eRrHa% z``69MNiU{d00L!N_;+U9D|v}q-AQr;fN@X^Jlo=kD^0tw$Iii5sCsqluId*%C$+5d z`y6N&Nk70&lRO^`l~fU~x~gS;H&L^w>R7p~cLI+e`>#)HokiaQ8`JIjoA5Nx|7`7+ z^!1uVvhP@J-2K^aj8hrrGL7EnJ=C9D%>4E`IXP;;wGWqA#lqz_VtJv)yjL^(2MyiI z-P8|fUJKfZ6?4r+oFC!7a^=QgH}m-m=uubm6WG|`VN&zv8R|`7Ex%Sw7>H@087E|o zB?Ujy@i8(2_;3%McRQaBm@iU^AdOa6dK2ekNU|GD$%|0n5YxxJsa>ddgQ>^uml+W7 z6G~;=0E9W96@`a}GEcJWm85ztv!qEjb*q5C4V!fV>@os-Q&3ovP{?C4C$*^Jh9tF^4#CP294(Z7}ovv%#DlVc73_R;n8`7e=|1~U@m!B^tA3UNPyD@5MNoTP* zKvRGa^5>S`oyoB=6JLr!%cxZU(^s!v*^z@Av~;Bqr^_1kQpT4#=RQD<3232f&hdbv zzR`8kPtTl^HJ>&HH}_xzN-C9M`$L9mjwG-;tUN>Kl><+ms;mNBgR=7Ti_E=~x}QhE z)pd7WnR1FtLc{NO`jvl|Tdo|?&Ue(|CK&1Ly5E<>>RH&T6eh_y-ZZISVGn>Z_F7R? z1z8OZv$xG=C;YAqx~{GHYa_gNR;vvGM|Vm)4u7@ORW(t+EG?Y1?hw`ur$m%1J~I2c z2Q(6D|3hshukCsH8r?UW>+Afg?sbMb6*d9fmN-)|>OY;ZT-hW`)6V#SY9*ci$Li-V z3n#Awlca0NIZp)xQYGLQx#crCIvHWJZ0p`Kb$cw|DK-0$@CYg*ZIkj40C+>vkXlR- zof-k`l%wc7)M0qp6p|pNr677tTv23j8-~X4|1)6-XCM1M8oV}eiy8RWkd}%{)q~1M zy&LCE9a?sh)yfds>FKGu$&GgXhhckYz%88ek>BT!a{KPac=yC2lw^kaJ{PZqztI8_ zGP9<}^=d?{xVMi4$U2!BrrkqkFO4cQ!2F6ecc^PtbvxB~+=U}80*2i2E2Iq5JD zsBY@tAQ`VJG7VZC`&-vsu*7tWXT)#8skz$POOy5a=qI*zlF}KH2paZer@d>IL-=+t z9xe0HsrvN~;~a^LU};5u}1CrP2D zI0z5;1?FOv(o3VVl>8*Z>!t*o15_V)_wQ_+`h#ue#ljk-4FK8Vl!8*H-P(+dox%3~ z`2!X}F9RLXsUdoz3A)-GEAJGxmGSxn)NI$jG2%te9OYYtB4~BJ)CoYNjP30SZ|Ky3 zZ$AduHfCQ^Uit5S_T_oJ_4c)i)=?qg98zV;+T8T(?hzAys#j~p{%D(0+JpcMl_@XW zICTGzk&u`~g=ag8DsZ8H4f`Y9(=#&gZ|sz-feJ9)HI;kT4YnBG?Ce=bik4Gn4jlk1 zYnHiOZ|!-H*0t__w~`@Yn=2TcyBiA1VG|Rq`xTRyw*IpcSJt3`Z2|B9@Jj4q&9hKd z$HyJ!;}-iBPIE$+(SluF`7=SaPj=g)eu7%WQ{9FDNS8XFvvcU=7B$(H z_H5x>XRSHs%0x9T6~f(r^vEjQ&8;}%2I5@5h^>H*YO~)WFnv6VgrEtoay51wY|mp- zzG#V0brH>KQlu>1>3sqBbocb{-phUVSZ0dJwDHqZUxEsHEWb(Rb0efEt>AoQ@aR#r zboI^-gONcX5bzP(i?q;PKW&A~hzOR*E3br@^({umD+@~-uTr>P?V*7)M`pQHV=7rh zhOD+LoqcS$CRDa}VoYynTBbOrJfu zvTSIinZm5;w9&ifbfYoYnwO2i;!r);oOX#E!#MMtjwa6AYBWqXtX|by8i{QWAqNLS z5`6^{+J*x1%!x3QPlKFBQtd~SQ$Z|Z|f^Z)e6%g6}iMP{{lH%?|PcXz*)`~Bz~ z{lHsUBQg`b_9TrdjR^hIf!<)S0&rZso8nyN*3trnhMi2kK=A|)Jsi2W9^vEmAhzrK zsAs?+aO6W-3?;9x{f!D(z@M_-ogse*fV|&>XQ)(INiYtqLKN50EU|PT?%Vmt< zCv&O)Wf^k0!pFN~=clRr(sm5-T{~u-dl92usPOWce0FNYL->(@8Uo zoz0^01$(1Wfj{_vVzOzgzrT5AcK%6_fBZYThyyf3_M%h&$UZar!Cv$Odl0mfi8`xs zocDEuuhV;fRb@p}`uV|+`;Cm!uf*UkJx!++gq!-{r>1pw1b);!rbzvRo&jC^nkXdk z(|Bmjhoqs_m7MuAA8T`kOVcTf-E5lI;vKN9f#x#$FJmC`*4&i=XhGaD6>k{s;4A-mC@fM$GjN9ZfBjw2xY`7l}l7hmr|JIYt6 z!_bT8e0DYhwmC43=-?XV!FP=L?f%m1EwHh+fl zO7C-C7Cqn78uozQz_{h=T$9h3gt&^r%`($x&&I_BTZ@5BaeLVCxD&GxlbHW!Z$VY% ziKX&$m)eOCpQ$#?D^h}ao<=?4N*^zZuC(6o)V#mjVFK6wgO=tJUcA_oNZ42OJvZK9 zN%w$@UQ?8n?ou1{O~eLmeS1+U#-$Ib{4!_D!OM4nG)d|}nc|y7tx-MlI+&g?O^xcn z$yo=v&mRPF&I1kLMdNSrxuecQbEXda55Div6jqnyE4e!{MlD-${EEoc_nD(<7kNq( ze+{G5skd-PAE0#Sk3MRRyb3=ibtSpEQ56+$v$Aerz}Hk1A_yyjE7P@J051e^D}V|c z22c$fbwkN6q8Ky==y9^rBL0At69^JiS5$3oZ+j6n57%Be$W&Jc=7sGA*lsLeJcB^= zaRC9OqGrGp><+-{=IwRBO)?-RFxmhGj2ZJ|0Bd&6RW?s+4v4pmBV?;s-s?@hV4=bS zx93H1i*iH5GC3fAa^u)nBEMS1V^r%18(RVo5Mdbv%60Xzhq(q1{AZcuHdmqn=P|xWkI2Z)S`*ZtsHc*R zL)+&UujC!|bzRJdo{Ry>`3--Bvydg-SN`s=c1nqifK#0jlkeV#dF}k*GY{JFRQ2r1 z=ua4S%Cp^*-4jDhZuQZt8XaTtbL3rDatu7cf9L5?BYhipXhxSUhivudr6$-2BDs8q zO~3m7+-}^cp~vB>hG}Z`>Ct+i-Qi5EcSWVn!H!mg`USo*(;D|$GFOFKGR z8L%BDue{IBmYy&@6mifXsx^_;g-%wv=our+#K~>Y+#o!gol;f8o2Mzyn7ck6)~1ea zz>Hqrwb1DQjGQ1Axz-KvI}5H(85xa@jR3=!mXUe%m;FM!(c1J7M);fp95+6CyCY$cilisep)5%ZYsQ6~23`ty} zqBMQ~5lBux>>cJbdy@v=5|erTQVj--qm#kEMEIajaeY z`MQ4SJs>?3cqFxCQBb^Jq z%9fX)63{o5vC^bpBJ1X|>R!jek(i|UyDG2T%9@&D2^HpY311v=$3}az-wVZ5o1I58 zq;J`ofjGQ|XNU6QgP8^cXJftmh>Jf7qjHU(OOR$$Qwp>Q0+Gb2nctroK z&tOG-99}x(7{EUWUC(}?=u>TT0}~aKaOxCC1`V9Kl=AzqK5DbJw@rY6ZZF7&T!&)GTajzng$cgR6eAUdH)W_!m>(vd}%HgB&f}L=!{rX7RqvetAey)g( zlFf1{Nh2-)55md@@X*^3&@Jb$M~9N1Yz({Gq-_>W^zzr?2MR^O;d(7*!nBP}Mya** z_|ba)YG2qMSl&g)5GgL*77GL#EaAW%KhXWy z^b7>xB}GTBZU8RW1&X^9zIFZ013mx2p$GYf?>BTGK+!c$w|~BV1yWZwII5_)QN95T zk%r|KFoU<-tJ9Zw3F9FlVhQ}Iko?Hm4oAmn9WJ_ok#WTGgtb=0j_d|u;?;Sr7z^Wt z|KS2c*SA)xdf>2wZ6K@pN7P*)1l68WaMw=|2O!BjP~cr)cRmM2&#e7o=Uo0tGu8S; zSRXI7Q)0nu-14w2NaZ060=*_gY*BdR zbZmufd)9ZC-%lkUbc!wj%NTl zYs+?K*8lg14USIOFT!X+FLzXcCm^zEG`H2;@6+9HcJjl1qoucD;sMAnS5Y7BD=TmO z2juCq?7=sANhDae!RcMlZ;<;M9U8g^)<-ZG(f| zpf;v%4}Qz7ZEjrJ)mz(V(tnwmT3OiH6&~Z_EbJN`Rv++`N)4(!#`fHA_;?bS1cIGq zzX$dD$gbg8QF)`=yFz%C4Ne-+k;gJ^?bjFbTlV0CJuoZnDkW zMh!l)+owZ~8R0)3|MfOuaBT+R3HBT>oc%8}Zii7;=OlL`6cnB(*5v_fbo_?}aOBi{%-O@S1Y@{FNyrDg?!=5;puRJupAK_dDfBy$O(<`U`xe zoRsYdJNX};JtV*ay6`<;w_#L3&N$(-aqGdPtb>EVd;0m~xA}ME$AC))BAGQ;wM^hV z=LkTcH<5` zLGX!IH?YtiEyrgN;w3+sFOzL6VrLj_At#LDDFLA4;{LAdBk&O;1-}s1TRf1 zYsq?$D!%&=gO^8&txcT|v(lXO^NE}f&CP+Ia~%jD0%G8npvDl9sMAL<%4*bYP5K1H zjhF}XqGOvn`GEGt*x^e0X{6^r3uH1=0e}-r9~#!W`q0=SdkmhIpAo*_#CNvs<3F#* zh7_|)FW6&S<3-0dzVb~>eka|Fu0$gJ{Tbr2-6&2%*mw~m+giY3H*a-wIi4yZ)V*pD z)n#F0CIx#ZBO}Ufjlk&GL{`VZKOBsyRhPmaKJfAnetcy8<6!eS^m)12jmsAX? zz3&<|(w1t=^HO)B&wP4Y@UpvZNG(2=rWm?|fq&Xg9;zF$NA_ZFk)LbJ#Z_$Z^$UjY zo=Cko4?XcZb>4M?li>VwE_-^{)cxx%V&mfZD^?M#tS!8u9IJci+@9Y9e`g<~#%Z1O z&iMsj8lB?cXz`f1@N)&s%u>!Wlo4>eDDuGZ#h2g(DoNa8(%iwu8N*%#eVu9DKwJk; z-M32BtbRvXUb=XJACVn!C%v|$Lf;+Y!ZBV2jr#J7MqLyf98s3iXGdttK_~&nY#2%)F$#FTn!M4?=sUykbGm0_6-4DNze(-jqut= z$Pz{fG7c&xbaF*7@i0n9Z(?O6Rwz8gx9yhB(R1fQQ6(ROoR}DxT#a1p&#RDU{XZ4m zml1E~8NWQMfCfidaQ*%xIHd|5*((>Tm3EPm$+NVPe&z4~x1!0sADwVnHlI=j<{8RM zHQ@GH1oJy+N&-1JZ=T_%)~e0%WELRM*76%R-2=qi1c6E7e-+HGb>W-hMdZ)_U{HRk zRueu4%5yC(376&3mWITyUta>rNg$lB8MyH)4KHzcD3_U3x#Sg+cw?)pP0e+;`Mr|= zkOkwNQdLEDMSvWOX+FRem1$kmIp{aKaiZ=nz*0+g$dsQHbI?FVuGV#*X2>`z@W#Bm zEI3?}8`L?}_0C)Bg=2^be!BXV4-F-7rbN#mt+Wk1$D9Zt_OK$P% z1>~|J_~!1CSc1SOrQn4XL0~xXe-!G<68aPbq;T$JS^VGfGReN+@r1XjQ30UNkJZ$| zuQ|YLJjvzYhY!~hkkqYXMP@Rq8qk%8_qLTWKIs<=j(r9N@@VZP%Pzu&W4ySzt@Vnb z=!v*R_&DVAGc?bQQh(Bs0RajCkHJdsWvwUZKj*5UQZTd3eZadSdIu$_8zp-M~X~j zRy%5jAMR>LSb3QGzhGlmA3~+Qt{g9r6s8TPKP+RZ2<7 zJbxU{3Z42wYdc(2me#8KoRg+=$n=SYwauDFIwz|itrO?53!c}V zTgY*^9!$#h(v})HF@v7=A#G*qA00INDI=bbO=ayN$2Pe3sC7A4bnM!bchJjNDfAf?jE_ZagV?6 z?|bXoZHMw@2d2Z%J3?by7&4qN9VLlT zDdC&hn@-2N=WSWOX(u90ZwmNxsb1w}F zlgqUXEyEzVz91pw=w|sjW+3i|*x9i_tG`oo-T`+<&_SKdPbVUFHQnXG&c+er z&qUA>c`(Zu*|*8T-o5qGbn+KV6i20Dn!G!(Ht8LmV}aJVLssRHxLx*L$Q-p>MncKQ zA#qG7H7fz}V=W^ij@NBwp%Y(X%nNA6>kYE4fzI7Z6mulEhs!K&{4U$>w7Wa88vMDJ zbh2}oR5xXLbF&c$5!}5_dO$_)@A3?rcPIh89PfJGtIO z-;#4*FN zi9+ge`^S_{`Ayv6RHc*a1JlQ)!bAc=;}q#HPt(M zgepXdAnRebA`;be+x2S#<dtVHq**?bHXl6-l_p#g~7*BKdQE2)3qjTo<1kX?0P5Z(-Xhpd+buO z()tNakx88s_fN#;7SD3(aldfw41P3CKFsA%9G|ua@+x zhZGeSK4;+^DL>4c#_XdU3F<}EZ4?YK!N+;zBtI!{=8(PqHx?i#-;owhc5mFU{H>Ld zAp9h8NEah*ogtQm+Kx*xe5Fa)@+H;hr2ax@m#=? z&ZRs<*y3^BtHt=0m|Jddcv0PDg@v#(9{pCMr}vMD!nV)Hwr&+QH#GR&x}{^xhhQkv zTTM-m|Nfiiwx_I4-3&RJ$go9{{URp?`&m}LQlpWP|6IVySB$%TMj|`e#5=A=N+ETDSo(x!-6hEgBu|u9Kv$@3%86Yz7 zcSGJh{jNZ`r9Ymi6M}#&8bIsx+g!M4Fo(usJ{3NhX6mmz(6J0&OB*d)3b!#d=i9l} zRn0u%4L$|LTYY&HJb+I<*N&>L!t;IVux`>IaiTjhfdKl#NT`w77U3)RNk|_}6ytM} z6wYReIEo|Saxk{V=6>dAvY&H8lX;ao3Yo@l$8lgHX3j!&;C@dsOT$Y9GDf=x8<|c^ z^PaJbO7_~8H{me>Lk|78b`P(oLEzz;Sy7AF>7goPMX%*1$knO|shsvMnV)~# zqdn9#ATwb0C8v}cDWASEWzKO1`sq%^IHswn*U&kF@TweSK zD!w~0gb5!n47UPUlfUB)KbOt{u)K6gnUOIg&gGbTDq6}OAX1P zIgD$EEix1Ix($X^!rYD{o%Oa6^ zTI=biuvv#Hg3V8F0Fi_yPq<6{{yI3ls zF~!*0c6utXDT-p$c>)+gJrl*a8sF{R2ZxujmezxTR%nb?tP=%SQURYS*SJEeSeva| zPUwCC>lJ^$aF?B>Jn`ZHDmy+zYMZfUX4uR`TXG*KWBT3!+;%Br{TD)!2Ri-pA@%B&m zv|YCho!;?AI2GxY$>tAwXDb`ZO<(o*_m_)IW>-_Ui*~(43{?G7M^h5{FNqBzDF2ms z)L(_o2Gn^1W_|e(hLooUH)B7%zr=|c-z7b|%E6eRIH0~eth6#TFfiNGY&YR8$O0Ouf7Q`@JJ*y%V7E7Dj#RbAIn<;uC#5cBTZl)*3DJm`4+Z zEekls_I>F)zyrLyK4~9lJaIVPwLq!{MAJv=HnP6@)1C*bdrpmwo;$;*6MbWlNU-&)M?-fC zDdhZ49`pLwoD1cFzzA24VV-=LdsCC2`^H=y-hXHJK}th!hL)P~M435-LO@sx~atLCOr!k$4(cbS}6pqtbEk)w0%*x$a4!aFqy3o9Gm#gWe2J&GQ-%J5BB2aY)`ZZ{-k8MEug%)z5>*^`4X*V$(g}M z`WrJX+G~yy5P~dL|4m!f||DMgphU9Krly1jvq-X zt~0W@YSG-JwfvI$L+RDWbqrX2EkN_5aDRsD^L(bB1>rnpT(YFClhfgs$#6n~%d?1+ z`&P_57rtu}fm=2M%zLvlo=4%#5BJy_Jw1BD?pqE}>+SE3wQ(eTiK!1r$7|f}oI>1c z#WZEOnZ%7kHa|Gte4Kdf!q{0$%fB&i4e7aNM?BG-v)c1($y#)2IcpANG$}3?q0O%? zI{f@f*L>E<@Ck4pwK*rlJP-)o`5Qiq zQelQ#{(p&%9z0w=2dYcB)+K|}T!r1ynJrm}sA$U_G@6V?^qIReY7l_E6&H9X*yo4m zPed{iU%2CdxmjQj*M@X#hA|SZ^{9(p_6{*f(#*86aWEF)Z){O(Z4bI1TttCXuqu#0 zxGHz^&f&v=gDmt$-@qxbJVnuo%@-CoI--#-_pMuxHn}V#6o^~_2<2O4SYL6{4Bs4-9_)S7k~(_fgMj~pE}$Zjjug|+93WBS9&`#2eH4n}NgualH)qpFRp zj(VelI5t4iXgVf}d3}@m0|nQa_hu<`ljoj>krAUDa>XPiGlZwkakyZKX@H%JD>1*< zx8%~mHuB_-TijsUBO%Y;4`S2Di9|D~lf7AS%aDeFDq4%ohMi&BE0lxtbnXIPN3BGz z2b`flF>NP{RQL4NwAD4t*GgE+Xfr$lY5Pohdv^9H2~hSfyT)A}fXgmOK7 z++VB6Y-2g!=uZjukd41!@OL0Bj++nGeDgt9=qWwF0JyrSb|jI^3`o_ zk3ij5t_E6`!8=?}CmDmKSn8qV6d<5gZ3&h;Q7Eth`~cH+ekEh1@Fse=`X{Th;yF(Y zQ``pd5eJ?OOQZ$;TApNdX$};BIV9g($LL|rkeXAITfeD7%vX+IyTtC#O7!Wr23|cs zZ-Cc1QL;Sh>vHTyS;jLxj>vB`kgKiFY%BF))^M)TnH-PCDff10X(f!0kN*|&af$mN zdal=>s))D!J*|sdg7H@2p5M_Ht2#@ipW;h8TbddD();%Yt|K%NbNE7gR7?2uzN;;c zAMyAU5T7s-B$6k{Z(|jLG!Vl4)R3AnDjatzE!+&bYwHD{EW28yZc=q_m9(?S`+|?( zc>n=?(c0s)jhG=agc|c0n0WpjQn2I|wLYL|KixqV&r+>Ppjq51MSjrW1PkB%s@=BM zvV(Yx>j0Wh%Li*u+l2NQQ|WOW9)MgZ+uqiV_$bB3+6!;e($+`**r>Ck@ml_oF>Nf+ zTiNF{T!{~N_re*#Vz2G2O02bg^TskIx2=1o;--d;%@B?h6+mi=;~MJGiY_S7SrMcD zedWi*_5G3$;|Z>x`dhLwPQ8UB^f+rGS~6Dw6TEFP$T@^QT;sbqz4|tk{JlZfFMShQ z@d^=~Yq960U`p$8RC2(Y3ax~zQF)ErJA>NJpS;t?TJxEQ;!9K_XgbLFGCHsZt94CJK#_x zDklp-=d>n%n$mZA=Bl*NLU`qU2$l)KZ;MD*md9cmgrv7gf*oc_>@@Ei< z%F4#3nl9LE{hLWHjP>+aXjH}+dm{*)YLAifi=qaO^3!899SGSRwc-E~464*3-Zqz< z8!UY$_fj(xtDVLdYj=b_Hb+VdUjz;Ga^(#<5=3|Y%guGWX`l=xDslNf z>seP{*ox~J(K3Tkv)fMe%g2v%g2t!_ztMsHXre(ypaltic7G1@u=V4 zqoa6}6^*hFIQqH<_Ngaq7iTdzKw`w(>OX}2rzH$1-d1c@j&=p4SLq$a-8KtLJM5G~ zfOGj|Q^ZtPcd5Guq>gs7v9YE(@ND^J;MYghXF&32Jo<34|t|GmgzODj~#TxURl1FE!m4U65&_0 zi;2U5$QxB!iRXF+N_EDSJBz1#Q=Xg)(t5T<9;{@F3^9g+Ub30-3?Y^#fho?LNuTv0 zbS4cgBA26HE26Vm8UjFweZ*E&%kfBJMeX*hEp0BLfI}NF_kq86h&e5GM%1<6Lcr^k zk#IFHk0CNyaB2CVDw?;VKr!gKHKi)6(3Ykj zB*gfwhXeB15@^VIVxB(hQn=EXkzl)V87tl6Q374Ius+@u(anAX_#6|HOHy+tAVF!g z>?q#vQj6udbLS9*hd!nzo2w1DNRE?(h`jH2U++EuHou|1o-))nhJ#wrz2jXB~V{hrjNrgtmX);{zyA$&s?lSxB^C)n>?>zB2m`T15 z6;g>#Xz;_ze$9gG@1!+w87Yc#C5r%P4dfq#ykJ5?^wyAYnbg{@7?#86Xrr>jujzD? z%YYw?;R~=^hphO*i!Laf6U1pqezACJ^k(EP;`{mNN|Iw}~`-_Y6o}?!nyt2V`(4L;IR-DJKG_H~{taR0`DX&84LlAXd2QHRCIQ zlE(6E?Zv5_m~qjW=t)$O0a2Jm4yy1K)m;#-Sqo^gZ@qWc{@!50pF{c*mV3+z{(8m` z%vH4-|DhY~s)17@vCmpEFG#=eqEHJ;lb z{_|@N!LkKcR{QR<(;E=y{xQZ~p4`*TElxkZsY%X9Zwjduw(`b7q*?R$-L&WA*>NwM zEsJ`Uew$~*XHAfP_esGiZq(mO))&Btz&o3ekkHE;7DfnsBDqqBb!meJb&g!-mL@~7 zHdX(!t?lr$gQRI0nR9?w9o=@Cs?b*tK8gyrV{)9Xa0>p#(?HZ>y#@u$#nTARffRO1)@e0_U%t~J;pYi(hzw%fHP+A$0=xXd)z(Qvu7Q5>md_mUiOny`yB ziiq0La1CpL3z)<}x2-y>-la+#FwJSAs_Kk}B2QW-J=0^kjN;?jON=DlfPl|s#|a%? zT7zt^v$pJ&je(&+Zq=<_BcSEEyB+j^iC2`~WrJcO@n3E+ko-9t>kb!r%JO<}9XLwa z&R^xfU90r~Y07G9Sq!YWyz^f{$Gs8IhQ>kfBin`!w%Z*8vr_ajxUYTdK%^+)z3_WN zoTp;e>T|H1eAXc5aPj=*q3T|y`$3wgTPd)=fqeVK62A@TmnVEO8-<01(Mh_-#`#_b zbWP1WSG+8?y2f+5lyM7NZBS2Iv1V11IpCTr;U6$=+`sQO)D@#H9si)@w8q^!_;aw* zpD_piX!aEHCni2P1V*VrFy@zo*91VRv;%E)%^%B3R>xDpARAOh7S=|9)EZ55;$e1b z;$zh9OZR2*%x7R$Bpp-0`W?|ef1n2f!>YihW;3nQ;aeq{&frFHeakh^=28EnDY;pQ!h1C{%cuh!Zv8{p7kjp z^szC&o-w-uHJ77h12g_4wT;A%oZd@~Svghga`d!xZ*^_0SJ@2IK|R4Kc@H0vQ@;!B zFpYk?(z?@0Xw8`TCY28nD1vq?DJh=AVyWs{PFV#`Q|lZY1E!ucu=Da7x_&)MY}%i| zSLFglTgoTri=CJk3~c_OyE8H}O67}+Iny%KjE1@@(3b#xXRE8UKO%!-h`C!`=~X;y z(by)h5|OHQ75-H*$Hp_4O|%chU~R#sAtaJI)8+Uu_9mUu;MQD8kb4*es6E0An6_-< zgp5Sj3^brJ`T~2IkY0pNLyq_wjn@&QDXv-N|g3Tqab z$$;h?*wRsGG-@`@n0SA@ZGj>rf|Sm_sdeQpg8b_3RRm&>t$j7+R1-zfa<9aM1Tc(0 zu401{7t}}2|HF>uV+*ym&a+GMlxwK1)$T3yEr^QR>*kJ&V2T?fUwH^S_H0mbDp#5+ z@&n=QOYx1G0ERgtUC9dzE1AL9=0<-9c#%JvO$nZ|H9%(?JG^4$QkQ`{PSG9`|CGC0 zpw|i+gUmZmHx|v*M0{~zB7I#wMod6LlY=6J;@jKJIP^R1peC!ax8z;)vHX*~20-qb zW9!}rnJa}Qq z^{(k;J2lbG=u$kHLyxzpYRfH#9~hI`fD%>$K9y*7)Ac4XG%%&8{l(~B&@^I! zDMzRKN38urN;NaOs9K-d$=ffD19uWZfWR(ohSI~q(M(TW(aW5_GP)619mCI$ADd-N z{B?sOt8CP-KxN`ew^vp;ycX_r)U|C5>1%iXDvI*h8P>7uFyJqv6?G^zq;)W?`#D@S zISVcwGKCCveAOb$)5%C;X}AFFYyJT^Vwy>gjj5_6Pb37S3pX!MSm-9~Izy6HGZ9usZF(I3oxa@m2li;F{6Og_4fJZo8t9J-N%XRU>4$K6P#N zkFoC(W<<(bjtkm1<7zB;Q%X;_ECbb}X2~NY-Z$ejGp=E>5vTNRi+6td*@G3( zbSSC7Mp2`EO~qB&hZCzkA<<`Amm`u;Q+7~q|7V=4O&&>0g zdc_G1N5fc@IapasU%pB|K?qoOEv=EMC_apri3|8%Wkz*ci(O7{bQIay^8;$!p!W_l z!cif8vI4rTsfDvbUL+*RO2?;X1YRG4m_N;=b2lVBM8gzc1q{0GeEMs?c5G5<@^ol) ze1b1K+7x-taU6?RE)TVyK#_?dGHNesDX)zEhe^zCRbNz7*lR1@=U6cvC3EWv&EMFV zzh{QCK0{n1`M+KD`hRo?`h51)E8jUkSpm1e@D_YB*D?`&pVeFop>9di4V1O zv}~@>GJ$}e_jm5f#tpC2UjossQs3d3!QCHH-JRnDO4qN9SMd(WsG_ytgYyBa)vfu3)t@F5H-K3A+F3dmKq>zBC%~UwPBoF7J=&8Xo>x7Y zBW2+ELdwQZ>bKW&>Jau&t61CMvQ|HyIKH+J99v&0=TNEeh5hO6$KA%4L$gu8k7Lsa zhh1^aCZLgU8xq6ro{OSTZbPKtv2krxpB_9$ZuY~a!6TP2$^Pq?FJI&-ULeT8^D&&J zWsHAcVD!UK>F)~~-#0+P^IzPrx^evf`5&Z3zUTZm7VzJV$A_$pjEuh{I9gR*%}77u z5t6^k4~8&iw|8(*QNfp-lOB8E)-(eHeGR@}7`lP{4jd(^udkjILN( zT9}0{FRc9&K|@nh&QhGJ_&M%WA~rTQH%M{1*0ETpJTfw0IrR)PA?|Y+e{}241J~~s zKu--)6`+*JX;kZ==WbIbYO9Nee1gLxCR2uoxkp6i>5To=Ut0A)vbVQaaKQ*){J1Ky zLC0&^OKxw^=5E?#zSW1j>?F~mr%#WMGNdARmvfhvmBn{twt4M}X1ZHht+x0*9Z9XM zs**gvPqVPFuou|;f`?~wvhve9@!h+3iHKTUSMD@6xo@ybI?Bp+Ep*5+u!wMTA1G^& zNmK-&%K1#@@k?Qsl+(-`ugGl7)`WkFd>C(j*qTxQZ9T4#2#>22T4hFWXpsJ3kr(*pS=Kw1Br+ju)f7$ zFiBd}gnT3+>eN=(Z)TO2va?83bcu?I-6tb^_s)1&I|B-33f0%DtgI|1LF=nsgWimj z8m3^;+hG})M$F7mD^TV!Nv~aSng`mpseG4@p|`TKI^CU(o1=X848%T&5+ zF`R6sZJj8aI+C+#-xbjG9wMhOW*l1u8AgrsSFPkESE25JPD~5kk%0 z2iJse5m7R%v`7 zry(F1+zDyI9z4&VJ73}h)&|o%A2F$sBs)you(PnB1L2Rjej* z7$8gaUT#E0&`B8g>mtg4-?`ayd-CbYX7kC_v~z#V#=7Hl?YjYL(e=<%LjI{bRN%An z?2BD*o-`J-tQV5olaqFnH22F`7}Sor&T5w%<3>RILzdk* zYq2>kpI!U~Fo`3x^hg*z@CkDxeRx8$%>Y$=kehG7z+9x6}T!q(n_=JgPO_ol>e-( z@YtG=*TG>)z*B|sbtDoO^IPR@+AWqJn$&Y-BYv%R3)LMeM#rOG#bOOu6uwRS>8&t(`JlaFO1srB*{$7#iUZS-gm zt^;kZ>D{;hfB$=Dno$Q}<<{Mapc0^!z=!eKPsl@l^!BRv8>9<+FsWvIZqe*_F2i0? zxw|@mg&8%~7KdpY-^NNw@?|AdvI0Ya3n>%C2-6y@K1AVA}`VoFF3XEHP2hxP3 zjsrrSnicF3C$=NG8#!`OV-vSevLUVKnW8%0gii`4inU50sMbUF6?;!Yb=pD^!} zp13ynbhh^=&!BJumE7Y3r`lI}o9GMu*kYm2{(L)F{OI+wdc$&-7CXZ# z6w8g8pj-VxK-&C|b2fH+s=7es+(2Ky#%B1hWpa+%<`21>y^|A@lW++C47g~1zR*{N zVtyxvFnvCmr}xe4J+}CTIgOi>wwB*YJ`CYKN#IU}nq!*XvrPBimxNpEKh*5ptJM>DWMh$el$N|Z)*!&$0&B**Kd*!8JW1fcBx4Z121tT1J+4g6UO8F`}<(6)GF4z z-6W=M#Fw|ezOJ0gpUA%PHI6O(-in$3Aho!^?#S-!2gEx@^G6o%NG&S)GsH9u)$Wm# z$0Pb}8s>|+9&^9**3=|dTnY)1{pT0G*iaHTjR=W$k+k$tX4cudTaV z-+7ImhPY?F-g)6#RU`~H02aSLi^*ulLPeFN@AYW5$RK*-?f}=|hq7u~7(ABlMdoai zmj-7{c{lI#xbGPpEK62k+38HIu^PhP=DsFl6YuzzxsMOA_+7;J$Yxl~V1ap9EC6$t zw>^i91wzaXDWv#j7)F#OTql7S)L2G>5z2YX?Av?ON^hk%p_7~E*)#KBUr24bp;*XV z^KJXPzxyt6dsGcP1+47scc*KNnmqq8b6Xpf$&?BuW@HlYVEL9?N@JI7Z_=;|H(`#g z3efwk{|gkUp73|?+DtDKUqpF|PBq)8`F<@L%s>p5eJr;La{OtXu<=yYd9r=qdhVp@S zYJ4WwQEfqO5L93t1oe;vZc-xwy-V>X!A3p>e(#4HRuqyMs zRi2I}F1GAxjb7UQG3;?L65LQ?nq{U^ zW8yrnV-_U|!yGLAUdL~ilt4Z`{w`NnGC_r?*h_dvhD_$ePLu>bK2-Yb?69vq_W~5evCrKXezXPR8lOsqeE)Me zsw$DC|7q#@ow$+I*D6{q=WKU`vUBXl?Xs_HNe#O!{FHgqI8NxbcdPBuci%GeE*GV- z1?MN1D~X8|<7Qf_>6nYP>#k0bh>1cqso@Ww%BbzGTArngc)j~uh(HmYRf4Y1Sf9dk zcp`c;>tbhaZSL+qeQ<9Far;{4jJ}x9A^EwVybW3=J#C*@@@WI8^{x{T zOKLyVXip*M>h~afPS-2+U~y@QbWGZ7kVf3(`jX~rVz|0EVh4P_U%&s^Ed9s75U13O z4Slj_cTU7Q*08eY_jU5l?{E+cC*vogmtqf8t9$ z)tDl0kkE<$8rJD0p)UJF38pV1>AX=fc2N*#af!!`<|~JN`v#p02Qw<>noQ^gjzn$v zD(^~3K!RMMu9{1O@n8q5dg6LLxOn=?q?W7A{J-tvn7!~9H{k`c5tO*z#2oj}wMOKW zZpIh~)95xjvx!$&k9Ura9GtDc#F+v!B|{o%K9b(ieD_%ZG8a47&vts%r`FB*=p)n% zZgxn>O!YjU-x0}cO=R5bxYBkgmb9h$RxF)wOD;|4yG4dMS%cvj8<)f{4k4jdKoXiz{SXoGN4Lne zA4}CAWhfn!wfJ1vl|SA*eGM*ToZ7p5Xw(5J_`UGQqo_?13PNQ0eV=Z_z5Mv(93;)zQ8t_VmU85bzMJC3hLcr=676}SQKc56G9GgyA^LqDVJI2lg|Dhnx zEQhHoJAzL2JjDlRt6#$XjSEZEWm;s&f_K0hA|G)}Ui_n>5`Z~+==SB-Te?i7Wv}-F ziQ|O&>(Rh;X*szthtI6}`e!>sqYq=^*0k>%f*guiw+tMWEgy%*Z^=6!uW}O}+Q3Q?*%GNC>yzLE`%1 zAxCc8#bH={(5G8O!fvZa8>4tolS+#mcZMYf;yj3X%Dii>22zmW(lZll$7v#7D(>z( zNSR~$Z=kL;?Yc!w{oU7IS1Xargf#g9!7Lt!?ZK(F$3j%K-={7u_pIgEA#1*F9+T11 z?&2`VGE$q%J#oJT^v6tBzGpDAu;sMj`01{06~&g9my5nXY8*2z!3n=E6mwfGT}O}W zm=r%OyZ5JaX9yJ4gDYGNa*mq z?uAzQp9FBwpH|7q1#2ScZp`kg-rLL+=Mxa%GC1*3=jHXsC2+-+6YE#L>pb22On#p`6^QUVbf%civFjwb=gfpmw3VPc=EcI z>LiKFy5_kM$T}QFQ$BN4-l73eH-_EN=!}e3)Aqd)gIf2ED%;W4xOmi9nMoV%X1bMC zKHTp7i_$-bY)_xIkB>i~(I`-n8DAb_36hfw&5{cJW-SvkNGQp_NE$gMFGTpTpXAOR z$xKEisN(sD*q-E<)^_8?!Ak=@am*(Gl-Qm;pyVqCt2JfbFVL6Ib$1IQOvn`9yfJJ( zD<95zP-iUp3+#DIdqKTFf0A8oF>G*Ee+?nF2$J5X$6*E(RPN`84;orUb$npw z{;)VyAolMNK2iXqXJ|AVtq~WeLCNp%Z1 z+kDHXvj^*H;b#%yLT;;|)|6MDF4H3BsKf-{;K^%DGcU8HnNM zm)n~QfN~gwyyJ+(i9IkD4>m{VD#RW7E&v2ky0g%ruI%i-7->j`m{O@h9kB}YTRt$@ zhOMrS#`eZo57)E(1-ZJaHOaJgg=EMT3H5%3R8UqGEw zDE@$q>?1?5uK?$cS2F-YrDbHE-G16zh$kBd<#H$q9jDV95%2wZfqpnx)*lHvKRLWV z@pRBfTGb`)h8o}RqMse1YQ+r?Z^Pdt^DGA1@qG5+O<2CjHg3H&sL`shRlCQDqMas2|V{W+b0E0;nzrTKBIA_pv&Uffr$-%=h!gIn8)VNe1qFs*YDpY zCvC1Pil7SPGP?8U!Zlv^EpiwiLee04ZGF51fjOqqJ3B zxI+-=^MZQW`6z%Y^}6})D8aC&kiODNhAxDP-@yUZ+5y%hwC@6#g&+QWZ-hF{)E^AX zb%5SJoX1(VYis6>az_dxFdcM^1moJ8waW&wxdqk$C-K>ybSVtAO%mR;10b+p1df9% zNhWP6zD%B zLBG`rOT4+=0wA1r9g_gK+gh2S2E|-6_}xrXGk%5uz8v#R&g@KpT9Das!VxUTp+s1D z^*{}~{-mK_5rJ17S;8K2K;v~Z`qM?$;N|m(Ct%z`Nx#&g=%Hm3jY|La<%@;yD)c6r zCgtMifh^v)w=<057h|!wI9#hbk^k+;bK~av>S{MTJ4J7A=|B_Xx4HfsEiInJe_S6F zP&PKYWrjjyeWZ49)e-=r08CBhY<;F7Ow$~`$IqjVy9zupOkbMyGu6!fBt(FX&3If1^_#Uxn@0j z-_#5MW&*-NK3+8@CT206iiSp`;yQT!gYQ8*e&&BEHtCz|9)Zmne`%t1qmKU%Xt!Ik zT~;GhY+Cs<`>M<|63wc<*8yRX-gv5{suUJLy`)*cKG}oHlyJL7;zo0ZW|3g{^}ADA zEE=y>jAn1ST0lRz+o{wgEiv}u&5E5k3ro*g9F}}uaUE&@HZkKm6~AM^7G?$TOeaRZ z00TNwkTh&pxtR+0_)Qx_YF4b2#W+I-BFJ}G#h{C zM63RqO|!mGXX+bqWN_|9iE)Y3&AnoM;tQtxn7ZYe#F{BKaDnBm0EtqCz-zkdA!J0B>hShM~m@R1NMDAPK(KG?T1Ve9`oEO=_H-&{BOjVob%oK#EiuwJP#CkeCBncC4ny~#tUM}P3B|RzD z)-s*J@GGpq_XszC^i`_e%PA~0Ki?`MwT?rtF)bs*c)djgoH^ZF(tv7GB-&%;OAl)h z`xfYp##ml1g}T!8SI1M_F@3)Jv}3mUq4UO1TOrxWNhF`s9C2R)6zaR@^klVo?>p(V z04$LFez5h&ENA;m7g&W&k zvxl5JXsS#d$I7OkA52wyuu=40KbwH)&ozCE$*O$k>Q|T7nv~YsD*o`H{m@)HH@)c5 z#l})`PJ)1PXYV)>QN*9wmZ9kI@I*aNRN263htzlo;)Tjfx3ET{R3GS(B*8@2MqFqP z?Tn6}kT!^&xIV#-6B80r7@lppL_Q#_d`ZrwCT!5#8nCok*@4a7b{J25^oWx9kuVue zWbhqx9r6cTL?xH8h@i5ta^bAziDpWN00%<=?~yU~-7pk%JD6c%*~2L|TSn5(%;w7Y zHOo~D%@8$j)Ipm3dA=8vo0OZOWApmnEs1{#ulK+m>Rp;2)*PnL`IwAU=x7#d5|wkT zZqm|GP?1r87Lf79pzS>>ux4SVC^uF^_uo)HL5as_3+uDf6B7QFPb;#pBshyTD17+x z-W!XXb6)$u3+HG=>M2~V?qtbVeh?`C8b5D5 zQ+@iwtTKv@MyP^q*~xUH>Opg_PtF4^LlcAAW`Uo-SHi4UY`;fL7gfAmFm0OC#k^~r8SRQMxcR8~?G~4k3UN*RC_>^= z_1S`EukrWjf=-(ipNF%$>YvXDtucNn_y3IrsLqj)uxJcUH@OnUL~qmFA-dQf4YO9= znQdTU!n_`Z(vaQHlo4x=q7H$Z+1S|F+6J8c7~KD3`s;ON__YxUR-g5#N}~q%6J$b| z;Nvf00Swxb3)!8erKK^+&UPPaK1cg*q)BA_On$XSdwnJ>Q8eh}#z<08k!>7whpto9 zpZY$mQDtOwui?E`f3*XaT)8eAT6PYiu+xiD2+X#kS11 z#wRdi(Y?&{ggjv(dwYXla?-|cbcScMjYzF2Zs;AgBln9&?{Rakg(xZdhv@yBk~iF5 zs>aI=)7(FP!N#W1u~?$=yqvF`KVcKTsSVpmg+e{~=C9S#Fcq7%{WQpTh3hja`k0y1 zh}9Dey&Dm!+%tHtg2~ zr5gn4lvjq`P5LD&2x~cf*$0z^0Y%ZUN~A>4vj-o_XfXoIl{qyz~C(s5`!U zt#!xsx$b4)_0p`$w6S8D{1@WIp^TiIL>H541x3L(7`Z{zKEW9)R`t<8Q3)wCvw1}! zs6PBV#=~vS=jYGcxrx2>Uh}eK%7U|TJ9cxLoPdzPPVXeq`?hqtsI$E%1lIq6gYKC0 zmhXV!ZK}Kd(wAUYLeBO|=Y_CMYVps5%ek~xe9d4JBQCT=g~fOf5IVWf^svxto`@!M zY__qDqSx7JW$&Sgof8Xq>VU6{8KNF63Wg)+5SM6}D-naYxG_@7q#^Sqw=MG79O&r4 zAr}v@mq46&z~}}eUf;dzbK=4zyHDbv*HSL4C{M-bW(6q7D5gSO17*IpGVLZ(N-<2eH8);PPYwNo zCkY@W@JWYIFy3}7!Lg$eTEboOhO9d|b+=U6~QC?+Nb-im(Q zU)O5C6nDj5_$6Amx=JGNIWDN^jLcBxoyX>KxKs_(5PM!mG0>EU=>{iGoz8k5L-^9t z#;{drUJda*JKic(Z*o&sEG+>XQ3?ju!n6aAAxh$c;etV)!Fo^dDLzgi51mag0_X~J zBa~yjDv(SICwFAlw)ix27}=48(w9uyiCi-PI)=E6x;^X?H+e^d|?@AV__Y4`pT%OV^!O@uInx3&!6FSEbO$3SNiaaZ=4rf48 z&WUb`m1lFOLYX92c9Gyhiy`j(Bs*HMAAsk4gd+zBQ5Q2?quyy-k7+3==jJHaBH(pg zpUwVW<*Oc**s58fX}$0wa(7BtZX|(oNM0ZoSyG7$W<)+bdZ)t!+kw|BWb}j3x0{33ESnFC zeKix#Phz$J${n=B(5zV$*_N0*~qqFr@SrX~h>^x7!=e)vI z+d$r%Ij?WV2?~Q12x*BoWLv?3i|uP`dR~Wi-@);HYIr6%ZB|@^zxvnK;xiR9-*;+S z>NlxLf__sY80eE3ldPz^x;L99Q{x=$oN_l_Z`4~pvKOYQirT0Ub@gbmX<8^q>;42V zSyUvSu8z+xSMhui-?N5#8T>Qut9OiXIFJTqWHrmW1a_x{A)}2()O52Rc^Eba z0Nobi`|mJt_7|43ahpm@5_0=AO9=6>ZdYc1hQ*iaH{Mp+sb#CU!ikg%r1r0!^f55u zYRSh&*?rSyrjh}{9v%r|1hK)=^N#=0b-U3woruK(`zA;385&|V`@A%pea~{{SP95D zC@J~)u6`$D=b)=dDkmEAwHj%_3J!a7eQBdfFwnhDncj&6N|ugiZ6ECKuFK;$l}^NE zmTSQgtn5>#m~qjqCoSpLNi~#}H;oa`ZE$VJwGyKAZ^Dq@=hu-5o*Lwu*(&q<;1E+^ z^YZeOxeSWAI#2-%oote?nQr{#|Uhx3+Xy$t8yX%`6Xj0pDsRg$Nr{3|jA=18xnCX7gzeao-!v8BI_dLD3tY2(d-DwvI*?_FZ*_P58E70drwe4T?EI~N!ce(t+g@C3kk1}!+6Gg z>6LI=GXL?2F++O3ve`fPcJBM%_c*4b*e^#ZhC1im4+3LjZ}$SLX-8WO^C7mBCMHd9 z{g>;*RV*s1zjA(TFC;-*3``5Fv0EA(7`WAGdkHhb5N1xlfU35>K7RU}bE8(guEkd1 zjGa|2dhhj-qeZ3UDmzX2B{#%T30qaQGd~{!v6%lL|6zajQ2a>WiP7G1c9hAfE*#zaB7||ny0b#uaP9-joss=rPo|eN zI{6LJ(xECd%t=L76`g{9?C0`LgO6;de$36CHPjFe5|NpomZ?D>bSMWpyM%S>oMOD; zJ0LgvpfI?@V>Mf3xjRrckTuWaW(R1ML&O#Bw@uSfoj(O8=%2rJbeMtds`G710U)t;F6zn-ybh?m3|Q(a5?jsbFb^FTM*mDOukW- zSU_fbc7(h-;7K|H#!tE&a-5#96jG9tuY!S^h4RB+DZ71yR{d5%m|`*HtdpD{B6z;B zW8gqmQcdc|E!l*z&c+jsmFasV!ugc)n~io|S(7`A#3$G|9gG0bgv4Uh`Ay@krK0KI zWHp4sYFyiquB?Kybf>MaC!~koUtAbQ51uVnn%_lF+K!-@KP!y7D%QVC!^ZPlUs>Y6 zcst@g@=7*26C9`W&9?rh2;fW#Svqh_2X@m7+!aQ-RCj;7bUsftdR|TDbn9vOv^jQb zBPk}hr_E;Hu*h@JI?J;)EIL}q(L)3eUR_-LwZ-@IXI?jQ$3(WWuejlxt~;Zx(o5+7 z&P4?~eXD%NcW#H2hMJ9s&ry?Y*;DEcW@=5))1oD%0zmYXZcjWpw^^Dg_VTk~*_(_e z?Y5c!>Gbd4aEI!7l%mXl%p{>P+hYqta`=zRIJG->ts6K`;iSB0r*GdPQB!rSyFT|r zmSs(Dxwntu0)i?2NxF?F>gGb?3R2a2&Mer@8u?y{h8|8 zJ$&6)w>VDwdgZ_a8gLrH5t-LvKI8DCCHZdpP((Ul;YVEJd{;MaTOvN7l@~Fgm(7)l zJhDCS1Ri5EDs;Au>5%m8%gZ^v24+ieZidE#`;259w!yiU+@f`%sIL!5(a;j?4FXM3 z4rGbH3Djvl9ICc3#ZM@znw`2^P#jZ3oevez&vGC)d~xx+glB#&EwFs=q2WgKP1+>eTL$nhqh1H)dEbG6K(Pc~=T(dh1o85U z`2F1OcwwE|`|s9+gM<2ddQHvE`F{S@)f@@0{D1_dw*O#Xe$}+Gxo~lJv1C&F2*NzB zkwxdAFyWT&WKZ?goHFg+VjB z&5SgkG{4hY573eC0UhX+34pZWRGy3D^(VJi08172;U_#YAyk<-!^TgH~2Wq44TYKEk3({MO%S# zyLLSXBvsHEe%=0QpIF%20<1EbwBu@^FLb^=KU{)3u6FHCL))t|g}ltp_q3;`G>pIS zIs0}#5pdhXKxd24qB~;UObMPU*M$OQvwR<)#h}fslS)rtUmcN7zu~mt1Cjr?XKdww zpZl~nS)v|alk4f**57YEniHti4z!96!B2{S*v>Ty01_y;o$TS>Ycc~UPoheDPAl0> zq!i`Gtqs0+gy7!9wM0W-fSarM7y;-=vqI1DWCInY+I-k<543B;VESOGv1RVh!8+T) zQwR$t^*PPX6WsNvELH2gtX#xnF`$wPiR7{zldv(|WA#Y2S!l=yxNC`O2lLT(VeXNR z0#1Jfs*vxkN5n^xC-bj{jk`;A>zLgRy)F)nY>atGI4v;TUZ09S0M0`eoTzc6K|S6@ z^s;@cY{%Ft_zlV-U_n?q=F7#?pY6>euPewN;Zks*L>rF*gjv~F^}KhTUs0_DLzj|* zqC-65^JGap0hUh`K!o8=rfXp5_POm$Xp5g!s1>UK{javVIy@qRQLPA!OW6e6b2(_f zcy&BUdO8ph7zp1f-PR^> zXfq>lsQIP4#XM$!91T{=&MwhsQ&T*EocF#yna!0}QbQzu`&JDfX;jbUZyFU~;B5!R zf%pIpBJyJ%qy(VR45o0E0z?8xlwh+2Wh6aK>m?)08Svg#c^`YqPZ zx79g^(cLm8OErxaa?r!ZYinx(G$~4-4z#;4Tn1-i@}OU@*fL9m%F7r>IoQ~gAuac~ zH{?^xU6DkkYQ+&c_h1?P{NZxAIN09atO;Gri2zQB%}n2v$k}?&6B+|uxD)OGy3@Y< z(g*P0HPUTuJ|1BRBqezsVVJ)vGGe%4SYel&3xPjEkvs(+jn8Hsgug`YL7ig%dv!emJ z9!u@#x;1iiyxtFL5llO=j4`+Rr6Q!E?@MU0MXlZf981nYE>hA8Q3?yr?^uTr61TO_NW{qY@ z|Q=p!Qw9+J_~!9pV-1GFeP3^TCwBr}P?StbqMZ6LZ;{Q7+#I zK0g2UvNe%WyUJMd5FiM-N@qKh%D*I~AMT(3e5LinxChu$!UDx0{d^{L9XTse`!Mip zf^7kCG&Ghb&l9|<{f<)QhSr7v>Qmy-?k!NuxvQDBXrvG0K$NpR~5Vlv6Lit8hg&iN7|VS(G(3V>(PV+_*?uu@mD z^DNHK)EoWA0cQgudcc#@K4jVD#byjy&mORc1XR$%azjL6*17K<(t4+UkM14C#vXtxDAa)~1!MaK_4~N5%AdMa z@$m4d@6jAtr(c897x@7J0YNc^Q&tTrMs4KJ?OHGPXs5CY zcUdV~s4*XsM~@PLO>b;|(QS5Nz4x@v#FULvJooT#xjlC3_U;F}%Wp?!t1wi^YW@7=tEE+Rd2%5_&ForkpdUj9ICqzo4#&yXVc{Fb>B`WK(D5ge z{q)~IDBEigqdb*YF|a2jB&1NP0W|3`!Dj%|s702T578Yo{N31~U<=}XL1POk(s zTW-{RgDcT!S+qhBI}=jT)2E3Y%?$v?B6~M~ozJtnnj3N>GX`{CCnbeb;O~-+Irhop z@z)pI_r;kRr;@yCLUQAljggxU3JS^@^#d%8sOVP^*$LP`hs?dKPa)=akvYqr`Su*) zG?9mtJHKcm%m)F)_R-U?OmrFr;PNH6&LKc#tpE60U+;12wc7Q;lOq&b|Aq*EhBy%< zg@8BnGkLu%b!+o=?j;nEml(uWJpa>Ij~MH|N5f?%SYUF zsgnUDH70v2_xuDY$$2uEnRzG{Z!IP(C^0tYKHg2)`GdT?v=NK>Ls~~4W_6*QEoKK# z0*Ro<7lmAIsuGtH^utBeAwCiq^<>a1sqJk)BEA)}zJTNQ*=qApc{H#sS?#yzRPu`E zaB-mBJu+o@XZn(RYAdr%YcpkV?|uyAoZ27`4&3qIy`dg_D(2N@gJ7L3)mZ)jaZsbQ z;Bt4NQS;`U_+Y7(#ZollqiHD4E_SL?8J+sYAw!jb+yv0t>uPE`2+@#`e(xD6IyLt? z4(&=gHOB^k)VJnBaGJg}+dP%aI_&o&B)?XOi9lr<`N6w;>x?We>*>W*`Mia5w^0j- zoq(uf!s`0+^f}ks9Qb2yw%&cocPBF(cLIQVNMj6|Ew|5p1P3=-oCf>coX0b08l+aHi};CNnU(?0aP%KA2F|vkyS0G<eIm zyRqXLKYn1R+;jb5-uaNF68kgoXt#d-5cx392kzKHuNO5ewDensS2ci<>_#dsU6aqORmw;#tm1o5GLkqZ|?Ug`a z3Djy{!Bab?CY1?8CFJ~$Q&h)+`0ofB_$D; zt?xx~_{@3|wt_=LlCZ0npDJGh%Bro7N>@O?v0hwUW5`qE}ltY-j9mk4kyq18e3!wxn22h<9&CSjA#JF8wP^i`s z<)VSjY6dB1vH-(iZBI18{ms&{)mpXk0KaxoG>Z$l@ zZm)F8ze)j5eH-Mh*mJV$c*4 zAMv!q{qQHrT0>(Bh%TXTF8Av^RSSJ1_XEqLH=YeB4SlVg1Ya?##>&nvO0vBVSZUKl z{J!Qj-7jA(JtPTwVkvTu=zVD}^1ebUDqbDv8y;37d43*ABGizZyaHMQGY4R|kB@Lx zIxPQ&obBWsG}_FLB_$@QN!fK8Ha*I1^*kg*<=8Mjxh z%j>|C!}8c0n9pWea=;lZ07Njb$x6DoaQ2y4v`gl_4*`!*RR&ySj_}=d^w{@ggX!%8 zsF5O_$f`Pl?ExGW=BQ9H8gZ3~@Ni%W z!(rAV*uU@j27Z3!JI`lET@i0YZdr-n{h64k0dVU|zXLEmiiy|8k!rm*^f}`iV;$p2 zkcoTk%;j~y_fEB?$^9@x#@7STi!%kZih>W{{*Gje4`m32|7O}beFf7}2L70KzULaq zD!JV13LqE7vIMB9a<;UegnEF^K<~7nQYWOxT8%dw;P!QyOQJ4x_J!a1EXSxOttHif zPxT(~gGFqhyd@N;z`ocTRjm+uJeD4AmL4$PD{mjp*Fo#Y_v;-qO>+B9W&jKe3Yp52 zPl#=4*7qfNi^TN=Im{TSNqka<90`=!W?#PVp#F9Z8R| zf6lOHE70)p6Yv>27hv~Kd*Tib?2?Qt&NL;oB-lE3HHWTtMeUe%rx-Xn6>i&txqs^0 z*w}QMsrzdLf*2$d*5!jgVWAw+ecj}^Dpe64bW{(AQ&tRqDojRro{~`8twNR{{@79E+i_Av|GGpaiY~0y~9D z-vz$^;yZA*fR2s&(EUoIGLfsT3Sk0vo5g({!Maa*yPwmC?r~dTOP;ZAyoOa=iPW5my z(};mzfu6UiQAnbNOERUDPT}Tc1$Js7-|kl)=QqVvykhD zdo!`xjdet|FJN>9_~1Yk%K3Jpgh*q8Tq?C%Fwfg4Z?BrXkwM34Cn_q=4{(`g;# z@cnG+W7{#qkAFJBW@4(@ti4d#{-}Y!BT>h{%PcQn&b|P$6tnNAmu+uB)$GN{ST0P+ zwOo3*sc>~}E~`n9oPx`&tE~k6dm(!`^8=l9Wi-3wFe5w>VYjt9q5443~tZxT2sujHcAg2{vW;nGS`9Aa#h`^{!9m zSpWTHtDS65ZS<;QawlnaZi%l+9KsPqL(}u-@Hf{JzIeKAQ)_dPhfT}gUfD6Po}NEB zH>8c{-d9F8GFgAdhjLvu`H1jg7m6$+f$Wtv%m#(CE%wq@8Q0e19IULYY?2Cbiwctl z?N+A`oHScHW{{L-uVSAiUo6inlxto5cT;vDY;QD6m zW;AbRglspbBHG{nFpZav>8O~y+E%?@tL|$4w!jaM)K?QSoctdklI`4a004(M(Q`zD zv$K=A62;Vhw^;Z$bZke-bN$VI>T+^&g02Hff;E+uw&RuEk=9Rp-#q-eQ<&ITKD%Xb zR+La9dt_-hH$w52DXVE^9@m)uK{|iZF_i&?qY!^Mc2PY<8d5HJHwAKWk;ZXV1 z=Bm$!RGfVOwF+w2CGLLhw1znD}Em#57Zsq zLzf%j3h}Ik=Tg|z1AqcbhQDlC+PuwLw@qKla5o|W^_XN32`p93VyVsnP9LK=WUy3; zgvfm)0i{M{w%6O6&4QVPR*43?>uK?^@R4f>1}3T<^7Pee9fgF`LJ#$beedhHqBnXF zGJSW~)UUK%)+WpQ*|=e$%RR){9$eg1Tc(+2=}kJWEGiXNd(HDa7iF!rKKWZq7JKNx zyCczFWZtZ?j2~sFQ}@sD--kg}e1`~4oXz`-%G;kenF7!6YK_NeeQydjz9?1Vc(syb zTr-MB&`+D%-(D#s(!)l-3g!`N_tow1AB=DPnsmk$K=r<675vn`;zfHQPsl~`U?TRp zDEcDVJ1NJB2&OLEvJ^)&tKTl!~tNE}3s{v2Jh(6RrbnInQ$mZm};GX$r6 zD|G^wL;H_-r-)Azpa&EK!w+VvHjF;MpdUOV01L6=QaTsdKm9uE7rm4>YfVUeRw;U2Ip3LZ=xdVMx% zvF4#bRgrrn@#;;urqM8T)PCaH$hMlD{dg_}jk>S){y>TC7H-R@hWCfGVbD?oHe>8vX~x-`;J<5Y*vdPa-heuNeu+)adW zs9pI`NK1`CJQp=cD-u4aT;OSrjcjc*%aEGb5!hVTudySDaq{zG*Qi8QnnBv1Z3(Uc z4OS0+mJsyEI4ElK(Zz15TH?^xhCg=j+QM|hlO;A-&2r0vYWWOD~l zZ~^^M?|2q_D@LR%!1)W%$K{2E!FBD+&l`_ZUgTJzt&&kTh*4g@8gdjaV-SsHVuPt!A1Zq1|!zK2wcoot5a-viV-Vjq2uw?%C zUs>6Jz*nAV;<-|8S`Q*_`*C8-i-@fx~X9gmNlAI~#LxT1(!ewRG@#!%?!sO3(do$+V-&Po}p1iKH{lTgZ*W0OKSbW>VSu=4Jt#iGkr*fc688J^cv0=tW5$dr<{yRKgioIJ7iw>Ajl>~5)zKN-)F+A5B;7;G0RaWhbA5cyueT@=F zUC!yCbb)i@9EcXi=NO{KDehaFxv9$L0h_&W^-{`rP`Lr?Sxjj3-L?A2FWZE`u%f6x zGry}o79(}xO*PV@hoUuw$xSloX0q|ct>oCh{7aoQ$xqpWb65E)1V4=QK*d04dostl zgSuRJ1**sIb)2-6KF{O8DTa7&oP)dH!`%7Dfl5(H=|seFOIGY{)IoZ~S*b?#&_b4O zY2tlU+?QALHR_0LSYQMX2;Sh%T-Tc}bwHydJ`lFL$yyg zE$fMlSSVzO?&Yzn^5b=Nq{83`sqL0j!N#$AAvIC0XuYU4Z%L|F9j#wxPzL_=ZIs<@ zXW(Lt^wysK?ns2=rp7=qmxaP|RlKB9St-4cNJEt=eCZ-HMN6md-Y%wZg3A_D;38Hg z_}HM(XeZko4YzcK;qdFsU5=gMkV;TV6E{UwF2VB^%D%Lc@xk4F{|N(aZ6u+yO_wwc zBi18Z?rs%aERuliT(uu)LU;#u_v=joH{@ESVN5Naq2A3ecItZ_qu2V!9XA{vj^CgJ zJ+ZYhqE@<_tHoVRM=<`iV9oVA&2R}Ob;@!uKhW6!{vnWq_4xr8QD3`Bem=u0+M@L( z{=%`>Q*$)+20wzIzRMX<5%y{r^rm;3Wx71od{ERR`j+!`p+=^`3F>am2riiHl$+M!Jwf8hwAZ#_jc(R^6h%bNY*C+S7~v3~D7o3QbPE-f5$*dsFDiS|m>n zDLK{)2Ck&qmjI)A6!&lhrd6S%Y7oKwbFp*i`Nh8Zv$m}xhxN;VLC;|a0MD63;0E|L zySE-hXRZMk8C;tXk-vLwoWb^E;Rc0CGaE^TF+&noZ*t+A-xpU`RfYNdbW30Yo;1Hm zoI1sLL;RYcKkBxe8{Jr&kjq-PAdP3}DTYHhFaEB;rHLm3YZ>m`}11LH)IClPR(;16Z=!JmbOET%0dC^d?-xD)9+ zW{a-RpZ#{!tndglWUJ`K_wBB57=4z;$G3yzNMKx)Ui#z=FC8z9QCU#RXCvq*MAaR0?`x zI9fl@xTNU{7tSBfJN#4hew#i$v96iT>~+(o0keN4)Z#ThGPAaP7G6Z$CH<$`(M^1~ zY#^d3L0_-L!Xj4QO!66T-7S7c`ekQzU+vDa4lHViePd_4J%p%n`W@cqS(fxoI|)p} z{Q9=%x;{Y>EE?qoGqaQ+uO*iXVc%p3TvS!-9~fYiODqpjE4fztTW-y}7JiV!T*2$X zqRUHrnn6($7`)(h`PaAb;O;h>RE!;?J?%^ca+PB|xxKsVzRl;bnnIoP#LUKK>=EQ| zS<@*tL+C0nPrOTskBPcmpR6jijzpkiSTXwht2{r|4aZe_Q)=gjo{V;gSJQx--wbvNAy}`>+@g3d#N#yuT`JU&{ggNTK;8o9!Y_^Hya zxcHrmLJjjXe{%ZuI_J$d1HP{C>(d$3N&Y*!qw5YU&c%3tyYatX08%Nxuf)mM-QmlJ zD}O7cx)js{k|qCfF_+Ac`vP!$e<-(A&FEN-E9=FtK6pDDN=$HMG1D?X-*w(4fVoh? zwL^O689U=(p)C5Nr&y#VvF3{vot!^iA3eA9#B)w!kN6YenujY|1o|jm#705~w1tZU z1b%#HmP7rQH-aP(&0%XEYN_OV`~l)6e55rn#aT z;N~uLspOQD?2Mh26^#-Fc)Z_TP*tTd4W*cJ)c)ed6{-PBWP~wkXPSWXv(i`SF`*0^ zB?mhiw^NY}e1*ogCLHclJUIfw%y>WE_e6yI1Plhtv3|4MGwDpV*i7Kc$nPirX~nVG z(+$0~8GX`TW(b#|pnwRVjN*jj8RWf?)mGtx)y8E0AbP7z`unS%i!MK3cd_?80`;3w zqxH44_>3qG<)e{?SM0k>PT88_6?B7P$WLB$ym}E%5`Iqh^iS(rw8>|woqmA_u)*bB z{u1~(T=R~8PMe;1=@qS_XxfB6c&j(|jJ={5rf<-nkw}aFTLKlmI_$HF+2+4KkU%ZO z2sljC(eHi3r|1j5yve2u(fU}W3?gm8rrWFDL@#$k126z|`HG61gelr7^S{(~BRsL* z^R8-(GAadw^hWE99L=kXhUOXq(Vkn^S?5rf>c{PCHleCsg}?{#29Pv~PD- z%S~8k;9ST=`Kyo+DWA+Z`P_V34`{A;l7fjoTp9Djljddu(*?q?zjXhqQ_K9FjIKlL zKTu3o(jM&?_f2Up;r&{Q3hC zaj~8kH}k<)-o_r2otfv)ryfGPP+x%g;){;Qo~zYK!HtF9WDN|i#KvZtTWjI{+@D`{ z%MA1!F~?hnC+hO!#@ArG+(_x@5cbvNTyJUh^eW^dWh5uRQz`rMN#|E%%9fieOH`p6 zG!0JZKr*<*N4vaZ@C;6Wd0-1y`dhwRT>S7GJxb6QCz4F7Swmk}z$ZO<<@p3O#~E z;Nc}yQhd9xRN<@?ftZPv{mXqQQ<&LOI&~NiRV^_*q^RwU+r6t0z10d?*eCktr`{Sl zThtAsU!TR^;zTue`lM!As_avz3U99*jK7*J*truvBIb4+Y%@DnZ(_$WlnmUC|5z3& z=E$s2dT5G&VLNB>=k%M-`Lu)cVON6TLy($&liN5b|0$X)_x2K7Q@^AzpnadE|Ak{p z{&q4#O_96m{$2oC`=t*NW7_wuIb6i%0@~u7bxOfIb%~`Ie zXc`iIMKhn*M7D(XQQYBA*?Yes40R(P@l`^9;rVlI7FlEqqL{ed{dn;?c%ocX_`7*I zN1vBF-d4jo?=vp7j`SJ4(O!_wGgx-l8T@F7Ovydc$H4^d%LneE8R?Q4^bg$Hh z`G7yjkDxQRrkM4~Y);?#A&8my z3Yg!8JDaAssaZEnJ_%mCWxiq1=H*=%?8^F_KS#bK{CXF0AMP6TK~YIItX`{aDf1ON zsvEuDgzUOl9Ts)7g9HjX(`71~Ha{Mwe5BuKImE9h)r-f2+d{obvHE>0(-@IVl-K7w zRz~1tU2FE>Z|rxQ*LN>bp8mNC>p>! zfDsl6g1gR0|F<5a;LR=WA?glK9o^Af7C%5BOkKMnaW0xWC;T^>D?JJT?{ZzZ!#&HmoJWG= z#mEj#M~aBBIpBoM*7GLo&){0>qQzf@UkEWgn#6tS(!{_%BT0Ow% zHbo4HW7MB&8^Msidvpo3VdLNI@Fv+`U$4)d|4w0Cs;Em&H#sd&w2g5`+kZb*$6_G0 zzhb%eJD}$y`VviTXPC*^PU{ZZznOy!R2}@%mGB64*hL5}I%S5`!t^bc4LfJ|CSeJu zf<9))F6{kl!?RKw5KTr0x(xP65TvA>>bjC+4@l<2pUipnemxhdXv<){7*_c33u!<> z9#!yX{`j!9zQD_}d2j3_)+dS(ImmO5}Ykdj_}F zBC`_{3*zb91%6xp+C=;l-cOI1KbS%RCL`bfWGJMO+>y zvFE!Lvi_+Nnd&{I=PSyJOzgL1{63?jD(8L(2qpa zVABpAFU#;+NrhGK*fqAc#bubZ9E!8DO&vN{JkuekhocwI-!?d8HTazDoPQ|Iulu|0 z@1S;b4HXKsaj~0)mWVw4o^Xot4)I27X#RDk0dF;=RjK4cUGICd)%#W27$ZWOuQ-7c z^G-X@#5t-#LgTJPLeVS-j_e;RB+_dZ6eEIXH;K275WFOl@U;fa(sPoi2|9$LsZEpH z?A2%u18q;gV!CDDp5qMHkL^;nxPD2pqokwb&2HEdlew+Fq4CMbREkR@GJocg%?+i@ zHf&oSWh8nAAQKA(qa`nTdEw0s`XGv<%)fW8R-6jWX@vmv~NMAM@A7`nR^Gr=h;>*yUIBz zZ`Ccbx9}EedszEsIac~M?aDKpW%FF$laqsK{};@;+wVqdZ0LSLx8dT=^;P0HfZV@3 zCNigu=Wn*fthQtvwH~Dy5@4TwI;&a|aM@Qawi+CWh)7B*iY$G&{=ApS7cAk=w}`(0 zK|kR7M6g&MI-GGZ*HEG8&vW;+;?E{M%?|J;3O^5~OoSR-H{R-^|K@uzu_K`tzFYAF z$h*b(Qk9*y=b@F*HJW~%6Xs$GL5up0T~9jGyqhS(LbZ?2$<%P`TCh{`L#Vh9LZnPP z{Cnv3sK3`YJALcLf*&m{_xJj9zOkRe1nMem=c>ekf14NgS9fQnxYs%njzxy^U?!7H z-L58+-FT&R`t=83s!kVirZ_*M#MeX*4cQ{i-qey%1~;NGwrXPM@TsWTIc+~MigGn) z?^nsp_N7X@_xqroq7Ll8#TpGrNsnfDI#CcMHunW~-&lx(+Ug+I7d@@(hH9dEbFG(Sv$3jBD<$-pNzRaB}x8r+vrt#-4ulSxu*W?zHtt-Xg zDm?vGC(KZdQ)$`x#YHw@=5y56Q&7$u8(CgK26;jJ{HYg~`I#Y$2N^Ra0moJWdhy{_ zPu#ZRP8mt4dX4rSy2Upo@7o>z%8gTVp#Ci>Q(eijEhI|Z9+A1 z7~kTMN3rWV`e*Y+tRHj{;6XaF+Kj`n)}YLH zL%W*StQ=QJu$01_+0@S;)I4Et*{S*7trK6O?f}n`Y-kFAYhXp4DJjXtzKCQ(OiVjQ zd8b`qF3q^H;fb{~_{uf#O?*t{uQ#;5()b^e$8^tmFAHw{KOe>kNV{#|#txM2AG}?} zyu3K1ZFVPFtdGmMICHj;716)CocdocK={IsFwD*VwIb^+zLl4DVM#!U7vK2LpvX%? zm5dG=;aZ6|xY!Vf{cnFr^VfB6xcR-Nf6}kekNTyS>KiNul8@2m$&t@r|AHxD!& zUW6_h*?SO+iT0lH2lNRQ%1h|f!>u)T##q7}dZ(meG+yj37Z2lH zP{S_h^kLJi(>rI_2f?5`^mW~JVslwN5)t;qu-jcZ&?q(PkbDR)_4~n*AiYmBc@tU1 zq#?Mn(z`352?V=)?`Je-s9oNhI?VOnTei>vZbBPlj;(`)v}0Fr*g0QX*;KfSigK`|DO_?+$?bS zJ|&b-t1F{_$L)b)CK4Xi@y&3lQZ8c>1W~%#EG`wNkXFOCAW26f_tENw2-Bp%b$8!yYXs>J)S4>TNOk`8ff1|n7K#za;73Fd2!wu4@A*8BhHHUOpAjlQQ@ z14B5CY+1^0a)Ex7M0$!Y0`OC)A^B(Xy!;&TTT_PBsu;NBGW+7jd6WPxs;|So_qc2; z*-hA$<-=-U;b=^%@PUWX+3g&u;=g~Ej0Gzi)#O;4pbO>pfyCHq_v1}xRq8zc?*@KW zTTYFMz{yr}W3cXXj3Rfpca5H6(w)FI+F9Oq4`dpgQW{u)`b-r)IP)8;oj_O1BwYg0 zz*f+=;A>@Qp0YtH+vz|yKtMW6e#8|bDc7-V5?U=E(01<`pXb6?jgG^(@z#IHbc|VhYM2r*y}T~sPkki9WV5;Sv;`bK%9pWT!07^D)(%Q z69^0M4FwqycO=dau{^-QVN$^Dre*JO$$P0WkiNV9t~cp=wXyEwfbaS@!T)Fk5;t7Y zIqdhWooTsZ--YVlBFAF3dT7jQ2wRSNM+GPo?a8tYLTEgo3HtjBxJ!-FFfKrf+?H8*2h%EFC@*kUYOLwV({9#(fA2{U zXC{f*mqU{_~|n8w2yv=(MjA_)jBp-&*RZdVAme(t0n>EQv+l#=G4OasAE ziRKMi$7l+c%$Wb2kl64w9ks*ja(*|^aFcegST)L4S zKTpi;*)w~d+0V;0xQ%Zl26jZgS0lbc*!6s45WnaNLtuO7Wdo|~;CUiEY@8X+ z62TntOjEs2pa!VjZuu6=PEM6aafHLBLamoqu%z17*iTNwz{$xfRICfH-J$+PHQISc z$=0e;+j8TDDWmUjeShXmpqPnLwbe$i%D;DMV25+fvn|ija{EMMh5iA{+)oV|zNH7$ z$8_-paPO4J*d|l{J3{eqHW!jVcQQM4azApW#gFmrqKy`d99Y@M*~>yN%{iP1jy)`$ zApOaOYk!EZ(9mVH0lQF(qui8n+^rS);c`h@DZI+|DUVbxk?*xcBvkvVFl>pbqJ|gi zXS4pzv}NArcJQwy13iPat%1?zriO}E8k3Vl$`uCz1w;Xp|1q=4XYsz@241`!u3zlr zU)~hTb>#Ld{#5TG{j70i>`ncup;*#}rbhXgx*1dUayVwCdJv^B2NdB$l#`Q{zYA4- zpvM%~9(k=}i6jDwfbOQrf6sf~SBK-h41!y+9KDwTv6RJJWj6F1ov001%*?GKgMK0{ z%O4MM_Us#aYFw?Bn+wXN7LmkIx-Y7l!Y$8pIie!Nj%LTp7iyhJ&yxOlJzhI*h=|eL zS7k|3hz~f=;|zU@8`%tkyT3zTr5zY}R z=qp7rlJKj4*uEUzWBmF|5virVM}shTMOm)lt5#~-X4KoVwi9_72d8gB_u@xwZ`%IB z)h2F@+X%)rf#`Ua=7&zJw7vODiq888^(*^Zrq_+ND0!-rd-}(6{tPsvPmofE8E!rf z*raF?*{S&~^)>Txp4d0bd3p8x_q#7n6E#37zL)tePmowNomvJDEt>Hd>p5OzWYV7u z+XFKO&#TF}{KF96s~{z6r@J=_qF7FgJzs3g+K%6GfI|ngX7-$+>jp0eei=5pFW9@^ zVAdcFJ8t{)SifVU-svxWA)$cn0?8ABw|N2q3XJphq|Zo*TAlA4cgYZZKD_zs4$L@g zcEcglX!?4QBc&EQFJJcI^;Y4=a|J){QX=3`-Y5J*xP8qJ<>2;Dj=Vgw_Rt_Nds&^O z|CF)HiFBy?U9F>?b#;`xLR>EUU*U{fiC5P%(WgMw=cl7(WWCyT%dVZVh`9L!<)6Ch zi1t__udcRm`)d{5s9RlQzXTOT zCM$x(mev?wGI(A>JY;1PguI;}1ods%21UbI97qSGDZY-`FyiENIS+)7>dN`U3;1;aMsGU6!5FNBkV=dF)U2&%8|X$q z4-|YqbZz*A!&;i3mvN+Sw6U+y$mqSAgGI21#3~*xCbi7FVlk(a>OtK{EBV%ko$p0m zFbSyiwH7w~$L#i=E5$KW={T8zl?^W%TDh&HQRhJVO?D(Cge+<27MGUR+H7Xl z*1CdVKIN;XmKGq}93Yn#Y}_KJZ^8NwjzZkGdS)Q#1ivykMLBgkBjPw6mCMZDwR5uv z^v5n6DGg35^6DI3n%&u3T!I21puMrZq^pQO6Xp0LfjLyhRwFn}k8lnKiMaI$dHrrD zF-S^*3I%nFpJrHZe=0E~Z5Lud9nQ|!n~z$hJ^IfCMY@Nb1s;#hQHE46Mrapa4P;1yfn{U|yGUxHD{K4a-N1nQ3gB+oa5uP+> zgKr+Tu>5BhfHgIA=A<-Pnz1td_Lt8JRjOC?ge1LdycplFC>SUk&;1(fUjnV0Y(5Bn?k)OH>LSx+HB(au6ZR z?5VOWfPQmN>dZqxv?fAZ&FDrHG30{)`fog!Bj;VVE>s)3Y?U3DWUQzrI~7B*#2UI= zQ|)Sb{h*eqO7(1^$7D?G|6H%=&Q}>*Pfq@2str*Fum~h=Vee6HW^Ak0Z zNbh+)dBl^>Z!63*xp_CbnjIxL&A(IOuikKd7VSLONQDFw;cn6j&4Yg&UX4_=fSyeT zYoKzV4r)1Lq7hFlVhCdGa}=%xh>$&}=h;fVyQ@lt)h@WMz*&}+mX>xAl9WVx2COkQ z+pP@<-;l{s;CE@St5dr;*+oDhtg>ETg760pPLWK%68XYZ@J)f~830+1KoL{ZI@qK5 zoe3uPG{}bcpmznX0;{e*yt*eN6BBsX3i9$`b!?_vB@c1da_pfPi@XRw*j8{93M;$QF#}`?Q97@C7SH7 zD2DrexVAx2gJwI?b|oMrJIypO zDEOPW5AuO;l?5_Y;WUL5)Q@<6(Wow0d`@K~(W&HI1!gaO=bEsyeb76E#n7e}CysLa zFB?7|A{{$j21VvLPl?rFUeY&tj?i(%SK}UvW^aB)-VW4#%pj%P7;+xogB>Y<<4oY& zrntnQk$o<@@xYP!xAZQ7di2YxDFOhHnKaTE(J@k&D~eF6>>g0HF8RG!YCs~k)D~9wy9iF)LxqY z%Si6_C*@qx079hWh_R}Rv699J1Z?ia%`xrT=WIS{8=S&p6?P}rteoUKYm1^HZzKm@ zTW_HmGo4}$aYaPZCRK(T4p{c4?t^%D<7$t$k9%)V{>3U=19?KP;xCB_Nl`YwnHrKf zi>$HPtzAxLn;d>`PMEc%laQiiiKaVEsMrQVXIsDcAbWURG4DkMP*naHWn+DX*>Ofim{%IQ^_P~zGWI7!a7RCjzCypBc z16cAQJLai+(We3puuVB1tVjU12cU=Ir!hb>srzzQT7aF3le608@_~eegx$&cb}evw zav=|3xhI|ooW{k)Lnz!I;@NFq#aWn}zl&vA#!pL5{`85m-5Y>Zr~5tbOG`Qk$oPRp zm{>M~Tltn!TIQsdJ`XfgW0lW-qW zM=#pk!?)Mn-sy`{>u9#L3N#Lv>ktN{HrWvt+;*c%N*Mg)7S2t6vTcASy zo*+)#lw~lS{YA;gmEp}B@fVn4Q?;%$;DXm)sL~<03y@P82Th$}8=Vs5)M1;@3gq7n z^^2M5T+wGQiOE+gRs=kia6*ao1e93R7v_npBjLn28R^?!EZIpPn5;;j7*EI%FLQ>e z%5^b?k8CzE&vI-7`z{=JGbT%WKY1r4?hac}Yb#m{ZV2FZAC7@P0rpnJH&sxzCpuQ} z8cp7kw6;!anc7>ftHqq8{{H^pA3yq3RaI+w+!4Vqh{Gl=p%RKd!`m9T`SopUW&^WD2B@y!|@NTIou ze?O2ci|guw-riowS2#NxekGaweMPF)#aUVl0GQFCp+mp4MWmz#AWH)dA>EZ;l9P;F;{I`zJ1wK4g6(7Jz4A2oZ-v- zm0&*BvI&>BMCF$??U$>9V-u{yT!{irS1H2@NxyMcxlA1M)kQM2)+B6h9`(>IB5)MC zB;9tOZfv2KaYSY)D6%=$H`*uRgv*0}s>*lRHLJb}u6c0t;G3@&oy5AQM~8dMAzOvQhBU9Qp?^$LBWf<&|JSQEI>M#AU}p* z$7ExBs=&1broVf^XBFB?J|zy~I0Lp--Mp|V#L!)n!_D}JQekC;5h7MV)<(80;#KbfV2Oe*+oRUeU`GN&iA@)0}c{=)_09SqGE;-H3lrEUx*kwnHxC3WN05JY&TSwuam5SPEFyrk7JZ zAgemReMN!wLCU!VE|A|8M;YW((DM+eJ~m?8O}okx(@IX6Ox~`86SuXM^3|)4))8_`3e^AnO}9Jnx3kW5ww2D3HSSB~4-3PD z?1jh3$lZpom&RS6{SE!ACY=tL=`)_GnS$7p*0Q$U#8wz(7xX``^9PmGSm~^+Sa>uU{9@NWWyI=VE6UNi6Mb9DKvb z$hxW@bMs^zFQZ?^VRtdt~CF5(c{H; zYLl`|)%0%u33@O3CW2x^L!*L2!h-)E@O?=4V+=*RUP3cJFd+5o4Z{lu_Je~1+MLY+ z(XROBmcReXWa+?iS=E_a2~1?Ck9apH!h+&740Mc5)PICTJioanBz~)?`e*srn4MTvs%wcBAla8dDK5|;BisA!!? z$6FGV5$U}9rmr+t3-+jtzObc8(j6NOtULJg;z?3J^V^rwnO_a!Pj(z(`8E^#>wnhh zOD_I~pJmf0ytxh*R`c~+nf|{A$6-abH<132|9Zoy>jy(&L8*<+xnkbNM!I77ZY{6Z zFR-9mY$uOKfF$_u@Wz4mnlHC_pK{8+yP1Epno&!328H6estMtc=zjW#Y$LF|C7+pX z527A_+!C~nRsRwAPV92mZHaym`!yRW<6)~9TAy-}h9A=mt*||=shU=HT3)TD({;DSFkk?G0ms}(w_#*Qr5*MX==EOASb4b$Fg`K;T^jX$&7r&4Ue zL$g-Q@raLOkoonSw+!di_WD-L`1`SbK9wt`S|--c=j~|wwV^lTrjT?u+Np!W`Ub{2`n*FdIJ;Pj^g z>KeGTjPd*CLRHyC{2dSEme+!R#VH&w&`CU~aa|2LsEBgfT`1n9`FPEUr^$Pk&3!gO z8*d}%8ZOKQw|6dXFZPhCbWkPj=*@C;Fc=oEdpK+^Bgwzjm;EEhM~g4jGqwfMkbC0N z?Ww)!QREIX=qj=fPs_?orTVJ%wXpgmGR@DU+_)-h4E_GT9C;mPr+eBXc{2c`m4#Lc z5XIp4XEx&Jq{IHtEMW9I1*B{po^K=h`uajqqN}MnkfS&PX$25*w+>-vq15(r8Im{+ zvt-fCro$G(-P6V5G>ipBd=>1}kp>U;Vg#?LJt6y|4J&<+8>NBcyP) z;l|XhEwiYboNmiBE4ZF~*8c=rn5FuakJ3f4%XABCM+gpB$An`L1x z*aAN+Uim2Wz0z5&(02LV-e8b?$J=d?ju1F$%!GD{G|sE3p*uI-df&6Ca9oz}P_w4t zV2k^yMmcYD;cSQG9=u+O%n8?R6!AsSZ(1(DIvE%kz~gi%veTGveDtsw8m30o1`j{- z4-vDov#%}wP}r@(jKuY-+zcKZ=wEtWgu%-~7{7lAA~YutUmAsiipkoo?{xn-(}A ze_2eSO8vR=YZes~`#8>MPnk&FpR=N`XXNYJM?@trHuId6>`?+H!3vK3BEFJ*NGWFlc=bZ#@we**G*#gpuP`q*cH}?05`vlX(J=i*_-QTHTk3#>Z@Mik>edWo@_E@2uRV;*CzJ8t0=k=^L z@8idh5E@!#vx)0nJTH%K4w;=2z5KQS*cr@_f-IqVXfDUasCH&c;N&pfk|#3XmG*L= z)vtPl(9({u;&Nfg#n4HE$cZ_AXZy>yK`dCD^d z6-v>BU}&Q+EC@fRXy&LpK{f7qb$-83ogkE7= zZTivgP9vPDYNhBnH>OxyN(r6ga3b%kpzt0Np_`GR`*g71c$h&y@!1i^Zir&n1KWV@ z@&ZFz$qlcWns*pDk)MotVBS&TsHF=lf=LQiE=-Bh0@VMqrY3d73)=9otmm- zMlLSG>nJKJqM8cy>=<*Hyo<11WfIU_pl=s-(oXtZuH0VF-nJ~{3uND z(!Z3rZcwcjgS@puS|p6n_-dkASBr&)tWUg1u66N;Af5- zmi_mWBp=c%A!Kp|k^(;gsy;C>5#E*^NT>3I&{-dJs;|f#*{ZcQfCZ1A`JMf;$+`ew zdjMht_urXo%xUYDOlw8c*Jrl6dymlxkJCJgwMfr4yGExQBmV5BDyfDuwIYDLjQkj z9-z^d%@$Xp_y!b>?@})k=R4k5pMjd6IV9O^cY4n)wO}`}Shge}b^t+$fE4dwa zr!xMWw2VbY!r({_PY6Vx9mQJ(#!5uHy_3_cpmojsA-?(E+wq$#nx^Js(GM`7m3%he zD78CxjQ->OcYdQ{T3P&odfsS_CaL&lU7h5lN)Tk+?3AM!w31W7mN)lDM*fdNg4TkK zyRh6RhV;lu1kwW8Fi*0@sg}&c)06tlnb$mh2u+&MD&1vz8R993QGRHPWcvY|(xeQO z^XrxTG}3~%@d~DM1l$4#JYP4wzaB@>yE*T!$uA-QvJhB|kfut!mIt{iarBz0b{f-F z_A^fVq6!LUkemavwX+@d!QKR-?{wboo}Lus-Yf{v6P2JIiZpQNf_++JDo)>2#7W9GVG$EOT>nI0m^$%39(p!7*&Q zJ%jEt_igD*l8F)b8LQ1>1RZy)P&UL+|;2^2tuvh5&=LNbi zgXE9h*R-q>@MMvY_}y&EQtoKzFTiRlFfJ^3fh#h~M=*iV$#8P)`F}QB za1itiP9B*{a|tiq99e7|z{ZpQ3>a@X0HmvzL@RGjg74E*_ahAp3xDB+T)%VJGex~g z;+A;V+*>sGf|pd&>k1W`lBz#$@HgMf29{&e)6g=GHFgS%#mh{TcKm10nSJvO_=NAD zR@48UY#$!%R8UnFODyF_Sw`uJXZtT1;Xyx9a|b3 zb#EHaYEKo3JmaC7iRA09tF?1hWdDze0q!X>rChw^PMrI0s0+;>qmKI0A9cSn+1r7f zQy_=LvRGrD1>hbZmI)1juP&FZUcnmIKlfXb{-cRUZwmGZ7GN$w}XGYp3!jf|^UD>rUqOE^out>&}%T3@9 z8+X*P83{={ooIX-z)EHrY=N3%%x2{}Eh3gDIFoR8_Mo;Wl91rCx4$2eXyDSr>0+X1 zAdb`d>oS1Pq{?0!2^$n z#j%ygJ?))U=CrqZY*zXwhNdjH{Y3#@R8UZyCEHNCJ&Wq&5{*G{_z$&s6Q_t=hRQjx zw}Sntl|~_oDqpWJv7ftXY;3HxwY6_oyYXV9um_OCkTh%oA#)rTvu+9SxBy-DAtGYG zOA!-O3Cqy$t)Q5op#3o(cOQq)9J}3(wv)0J$*a>reXGTO)*&v!CXuPKBLPQ#uV9rD zo1=Z#xUiU*Z=N8DX_w2kSQ-p!)D=r{4ej3Ex8J3fgUtHfL3-}p9aU8ngZ(6X6bh2W zi~bP9=+aemtMjvJ_R>YJII?Q*atzfPz1-HQ)?bb#rrEQ~Ibo&^Zs#Rn9xYIHQ8cc-z z+>IRPMBFb_{CAT&fl2vQDkxE%*p^O}#qA~8qVd{bhD@fk?55@H=NU+%M$ame&Ilsq z&VX&Bj?OSp&mrv9-`AJV_3dKbr-FHic!UsMXpTF8zYE#+Mj9G#OeRO6S?1&vQG#vP z_0=T=REHEv!7KnWCu>Zm&KM1cTS<@i*-vn9=8_q%5B7#-E>^ zkvPDN*=BdX77*64>v}LP?oSijhM;gyclWLcdfvkf$onof7+UL~t3`bDNWR>{J-O`8 zMA`cw5*T4)+_kf}pSuZB1$57csJExkh(}>1c!*U4A_@;D`e$WN-5ijxN4xwXj)u)Q zwwgEXFaJ>)U9h^{Q4^`c(XN?KP(8YxjGmEp+@{&zSj)&)bfUlz#>g9%X#uJ%j?d^m zh>0|a&6hdIk^kh?0+q(LXbGfnK-(k^UvaI%ZI$K*GqEtI>^m6U`_vZIx?Y&%-sWle~sr#>_pYpiZvzy|15{UL?=_DNe(J+j*N zC%u{OuJYU`G2aewSd7k7hcr@(jYfvXnhwVok>(dtfLjQ_r98z1yV|!# z#1G){O^mZj=dAkt@#cU3J)q$W7KS!(rtIHyY7=Rq@yFAIcY}{4oJz5vOgHjJ`=ncB zOYM;J8lzD^?q-wc(dIvT=`v}={)#S+C36|=Lr&tRD&U*)S0ob-`)zd{!M*@+8!8k4 z+Y%(_Ir5U92YilJhJiTh?fA|R5VeiFscQ>lv1Kp1V5G!r} zj3|%kG0FWqlpj5N{i=%;w~5bfjmIh>;Xk3Cpz@Gc^A3}3H@#}D#cKB^9-}iJm(Y6_ zbG{(fMnWIouRn^w^Ez2$zu4JE8k#w1G?Z&-H(Ko(glY$6##)zx^*{5hHg-hK0S`xO zM79A086rqylk+T28_;Z=rzDc3?}>ZVQKX|0pz^6&onO!>2v9f8lP;Fc-F_#7DLXuR ziI0yo_YyhH_cRhwuR* z%LQR@$zv|to0?7!HSjwq}a7J-L^=?1G;LkbW_}B!_gK)Ai$c(rwR0{JT7~OG;xn z-`KiF+XnI{_R*y%QHc-@bG` zne&9xy*rXY@Ns}R*`l$3;}3Rr=I1nBzeoG8=ya@^9& z3jej*?3cUVGSy~t@y6pt88RIRC^r~;@k%I+@@md-#X_m1mOu#viPPhSx^%_#PG_rn z+381{?2r(w(h1agx>nW%;ARnHCXuq7ydtSw@qn1t=O-HGMG%ucf#nLE)t~2Jn*5W@ z*VRC(EY#k7dxPVo;Sq|YX)dq84c5vLH@F3ouPj^x=2P-N8-`tf>HOyTo;Lk7GFtH` z9&sLT_%xpEHQ6s@T%7D2;{w-Af7uNM9UaV+4%rafoIlCQO3Sx}9tN4rRS^a4?e0uZ z)hW%m8DwW0n`PQF&c@h1I-1*?mT7#M;Ng8iyKGeC=!<_GDi-TeS!&v99WSS;q2ZlP zHLw3+@a{d^Jd0^oOIpN003lu;?Y~7W%qQ%ZloChabe@0x@zL8(w-q;h4%^{t@p^Q{ zxzz!Rhh`K3XI`O!s&?@j$7L2m)Cxt|)7s$ZKeOvZH?rz{w8X$6eMy47nRyo-_(`{Q6y`Drg~#Rqc)o zVjMmi3D~oTW#x_}Ek$}0@>@w=j!zrXJ#s4FF$cZDPf$`qs7*#@Z$embR))$PL-^38atP)(pY9qyOiX6l|NN4MXVXWwhuLJfxjaHKqTO1w zoW~k}Ld2y^z3Z9NFh?2DmP%E!My_{wBlc^Zd)jYL?#-0@<=YKkrzaP4$Qcp-7|uU^ z{W>abCxMtd^khSSXX+mz!@T1!ynK9L)vX5aUg)3pd1dm>C3fqb%2ca76(mQ;?%kC> zrMokKxg&l$miWp#oR2=!GT!CTFyKuT#<%@GMmX{g+@^1~Ylr>IR$lEjWEpsgs zWgj&()RrpLJf4BBBv@en8S@9+ppRSSx-9}h`DpVCz~@cUTK%;}CY)f8I+W36819w# zdoe{YV2eTb>bvz&u^}_-Ly`rOY)?;5?dmdbQc6n7H+ze6^MA-4h0F=Q&ZpLArgWX0 z+|zco6e`bc8%gSvFO~H-|*OL@kJyTSIHWw z-27V6N2IiMvuLT}c#uMqV@FQvh)GBy=fE%CH0jOl?ULTrtCz&5KagbCHjBk9R3}&k zs7k%>At5cd@Rx-hWu&``Q>xors<7<~WeEZZ1ja|>Ejy1%d4fVaWU@(IpWOFslT^Ww z&XW|ENw8on_xFKm{C4I@o(dncz}|t-oiNIS>HVWKfX0j9s&ho*;CwfSGWw~rSvKvO z&FV0UlVt3md?1) zP`zqq#DAl;)9e>5%}e|3+qa)=>kCpHV`K78BRTNOLvnaOdlJ+t{INa+qlGd?wi_q} zl}DV;duF+{cwb+j<66iS%iMPE)rv{6m@5<9l~kwm_|1B2JZg~y|LB8U>HP7Y)Hf+N z*}Y6gr-IrxslhAG2(%;nss(C0Iho56sZ(K@0zL)*1Ow5TAA*;EjuirG$SesS)7%ovh;iQK<=xe0 zY;vsqaGH-b^6clYXiFWEZO^1F|N8Q1&$P1l%r8!msbkY8xgF2`tp4~~zA2D<^B|Cr z)-x3TqL$7icilxNBGF zO8*I1m!lif?;WfZ7Uk20O3fzLA@LesWy@GJd~x&Vjl`ImbsTDcM((`&p4RL8DWw9X zpQ%|YMPM_g9CL1>zWx~l>6&E3waD`2`k?98*IIgdTDse#35qi%u?}9*DGOyjnO-OI zh~Mn+*b&Snl)Zwrxvq>5Gu;eGx5uQ+>F-J!fsPnN#-D3ds?kR>a}|ma-G=~-bp?zl zkdaLl=`R3*2vmrH>-_{QX3t^b4_g}8JlABMAE`WxA^{M)TT8w0~_*dr#q3 ze#+aKO2VKlZ;cij3M$vx2ABu(^;fxEcMj#;6ZXPRQxfI#eo}p0inpGqK%V(II1%@_ z7ZR{HEU+ylbVpP!x$FJCi;1gWNeiG(DU_^xUm{kN#A#R)6-j3 z?sC-|Y0Avf_EuR7X4^|-IxTV}r>2x_PvW6I?NBMOIAlAFCaLfm@tAz_=FJ-cz2*%J z^H;;UQh9PEJU^YUg?N+}4T@j;%r8-lem$9_Tv`fR4;2~DMxce#;>4^B*DN>8l=p>E z_6J=oW^^T~E0?fzChq>3#lHl4BL*g4gq`(syM}I~CF=}Ck!@BU=?_RxJkeQd(yz6@z4>>7c%;R2<_u@>;ntMm7MJ<_ zpD0Gdk8vFCCoBL}xqbIc#GC2z1k#a+SOU+h(={|SppiV7Fx?08&ep=hkC2ce&6aL@ z&1Uoylpz5@LEF`SYgk3y@MB=1sGu+&@A~QjI4qXPtj6PSfvE|FI5OT5>XI(7x!o13I;g^8;kTI)&;3+b1$Kd@+}mC2#e|2-#8I2tqso zG#^@v1r@#Dl#6v@g=Gx4rb`I*UoOWQ7UW^RQhNLL#J+MZJyUTX|!EqR~4&)0N zzEy4fmwFu2kurH?KNP+_Q=Q@%pAhfL%E`cR7sqzBo>kca8J~4)9p@S^?@V*VPB8n^ z+dQd&+ji$7-iAws&{Gvzt}DpQka$bS=$-Vi<@oE}CIW^0zTJ0!q|*A@I81hr+7Ew{J&8sE-tt6= zl$evhy7?QkOxO9>9=7I(t=7`4e;?k?E?RNX?>*<4<|HI{IFV>PH#VGRsPNtIIXbpL zD;50A@-&g{4w)np^{YpZ%q6)5{Ygm-FesbS3B~un_Be+aSzeY?eZwodMNY=WyAylR zwNdMGE$@VS75bw&fq8_6pnxwT%PMpoiAE+zrg4lcPtjfZ|D%ciEqP&9Z+_jY#5V0;(dgR*w8YkuD~P+&^LJg=a1=Uhb(?R9B> zqt~!umFnJCf>DwdmI56!os*VHoKvEBElopD(R~LWQ^V!YrMIR<^OXM4F7&wj5!aFv z4m-k%LqcEX1t@KIa~toF94u=IX~4DJ+f-Ro}x+vNcwwSws^~9!H6R)gkuD*@(FT9gq;CAI&L|sd9hI`aSWm zICzhFC%F{R?o{PJYa`<<5d^e081x!XvBDje!{;%(^F+?$Nd-FWK1|EqHH`BtDw2Lu z`<&90tI8N@=VFewZ|Br){K(Mt0U~R3CgNJ6ItdAZ4&`cM!H?9oaPCAipZ*{Z^ecyY zUbp3I4p9F-p=I>HNMZL(F=*(R zrcp0_AUD0&Hz^^+P+!~6#^zLAQUkObWMrU!?C<)Qym8H#=bCr3Aq2Z3!vFtvUv>ZI z^Fy|+av7PJj`sF~S2hcaib7VV_D!qrmvKJ@KixnX$< z66tTAp8NvSEj&Wk51gHy4auz1kFGZg=|KvoRAQ9Dm#5wM9o1B`aOn{9^_w^K0!6=m z1yB2fo_4Cr{w@+(wKk{(>K*XdZL$yIdR@Af4^fHRz^lK2c~8=HUch9s%^n!GUug6W6uO=P*oWUwxm= zdhNTP-P)kgD(ycJM~8<^{^M0ZqPzNd&KN)9=m(qKOk3s=LT;Cvf(R7fz`JX17U*}( z1-17HTq4@=^R12-XC1tX1Q8$16Kl@5Dnmlhy{2)l+ZCS^6VG`Pr2+!z3}*Z{lAdXr z)9zwWHl;7>VgH1oyq+_3=jzVRUCw4KL@*1|jdu=%foM=r##u-~0ez;hq|4|SXh0%` z?$a(y>S3ay5?9lAq47VcClB0f0C#5E$5>!;MeI`ALv!|@4rIo%|;jma$ zJ=a;aw;&Q7g#J^myHimC1&$u!>x)y%6J8$*!jrRMfnzVU7ETThCZka^P(!OXa6N07 zv)ch-OB}l`)AQMWKfIA}T1DAH9nm3F;M@AqQV$tm$yrc?y&w-xv z1IT=WrQ_ab;1uh%tp7&W*bqc2I8Q8FkqI!gm6AT0m%k zsM+SYOtzVvoZRkU3~Q~%i@)?^_%jL}-ne@u8}Y7J8EhpmCxK}jrY3B2<`+9|!wg$8 zHZd&gPR=sa+D%pknrhOcHKSkq6=BC@&?yOzQzP$2%H9JE)o;`?nIU$1p!fFpy#?^Q zMmL1Z5KBA5^VwmFKz3gzlKL^0no4Y_@fWxYiORb_bGR6xS3Py~5A&(A zT79;GGMWvnWdu;?!*$6y+H03Hst;;rN?zN6^yFYOL+Ps~(eIW2YSJGG{p9F&#< zfy62qYq$@M+S4#1`0JA+MxJ2o$%Ze7pxnaF`8-39O3+ZDM?=||;9 zmD{O{6-GcXe#i!HM80F#JBWVprJ=&0Q@O*K-`NQps`mval-@s2hbAhY5qf<4Hm{}x ziw^o{_|&~%B*2_{Z=_*|Z()Ys@&RP=AyV~hr)TRKtjoXq`-Q9c>iR_R>wJCDyV^ft z*bUd$Uu+j{evV^e>TwGP<{csIH;fP07;mUr>F)0Lk-fpDu-%(LO5%>44ZoiEJBjbz zw159%21Yb^=m3FM4jr1Co7}xuazq67 z!Aq&sPIn|MZ!9|hEqIt3TUtcg4aTbMEn#{Xuyr$S9v+nDNxFt#IOOgh$)J}MjHp+0 z+U?w;IRgC?A|j$B-s8$KAf@F~_xfRc*KUzV+ZzA?uw3>hDi74{@5lsfKs%OEoq0|} zgSL&EZC}*~GbyiQ|BwaR&vT2vm`nUe@2RV+mv?7kUjxn^*t*`HqjSYdS{sC;MAO_U{Bk0^;x?G2f4duZ6wdr%11PS%P#t*{2rD~D;y3Tbien8vr2I^9pS;Y zA%<%W@aF18Bm_&zT~*c1Ev+=%HI2(EAWikg=m?sk*Z0L_Pz6$DcD6b^eBC2aBNr-Y zQ?v?ii8vk53DuqF|DbK7cB=HmRW}N_imZS6zEb_e?qK_C&67RB9c&@SiI0OO2y7YJP2STJr8MmrL{6`zjU8C( z7R)3otRlf_Ez-`5>^}*u4sUz{FLz)_3kv>HQyoi10MYe*jYvDn8t6jKkH9T6_i#K* zP7I#{aM2yb%#1Ux*PJ}YfbjFLp^OWCDZd7DrFk;=IyYVagMTQ|5ga9kC>3SRdC3~l zACr!d^SAAT+&fb=kous7;FQ$$<7#Zr_cNomt$v51Z*ZPfMi=dvQ&qCm3e!GYD=V@>*k z8CbOQN^Y?}RA+?&4K@>{2EGQ{^nI9cw4BewrT!aNis11h9T3E~ioNe2<)+u!nEqb) z$(li@^PdAbf-<1uAz@zlpK+#g+3#ry*;6r5m(9x;m);s?D8aSSwBmNT_+-=w4kE)( z6c3BBUDt<=UsL22w7xOAglZy;PWAS-`3q|;x~iN&aj=2jV4^nyzso}7D(?KKo_^SM zD03*^RPD)2g`$U?5~_S!oz~CBdlYN91XNCU$~2l( za6MIo`bfCX+e5QXvIP(C$LCX;{*?Y)<(`I&ztiAp3Ly-`|Nd(i(ORDudGo`H?~_7#6KB8nV375^mamj#$^Mm&+07q@+9 zEThz#6=n#Snf#nL?3W<*eQFe$3 z3>QAa4BZ0aand%PA0l4gM&qgAamLEyVg5eX)KH~!R|3Nj#Y?OHn-?WkLeJ&-v2OQ; zgk)IVP_Aol6?NlPZ?1AB5WF>S*2%~ky>N6zty`HJNb?xJmtbxAypf0>pkYWdxd2;f zvj^t9E-On%rG@CI`Qi_c_xBr8e|zC*&+3yKQm$q%vWi`vDl~eA71_#fLaCQk`q%V{T`l28P2L!K2ZAk7oWn1hnyP2F zQMtNEb$`5S9#$(_TwQg78?=`Z>F=DKw+qnK8OD-f8zP1sEl~-#+?6fh83aN z$R~3yL&~)SKV5G=Um;7Dw(sTl?+bHt{$*|?_=puOowI76DZfP#-H9o?xMj|#bqB|v z#KKlf>xIhKIc4RU`4l5?kte##u{fW-#m!#ySr(UXsRib?1ZQtzy?4@^y>B66^P~1> z2Q$qcL*GAtEeiYE7pqy!2=;t5s;070int9VuR9_#v`0wGSC^^6&j4-4vX8)Uxz6 z4fAV&7_Cs`stiKI{K5f>=&9aqxl8Zq1YQIgUU-<&nvkh50%1#+#nrh@(y@~oSg4>B zGymZkjqEh}RjF=ls>YLAiw*5Pfu-deIRa z3a8OJTaD*n&{vRUR!5=O^~!}W;57nET<6~s7uilV*?ITpYEw*FG}81*PBv=B4;jrT zL+>?S!<|pI+n|V5GQHn3Q+bqb&-8Az<{PIX+4pp}a*x*+fQIpHVFr2qcbfIIb>5+A z5j6~_?hx}iA_{|Jf zqzIdmdC`b@Y15xrGtZBaY<)CwTzlCwv$2tpR;AuRO3my3Q!mx~m782h*7d~g8)?l# zKRDy+TPcqoH>2-bbN|9`kq(Tky1*+D5_}>0j_^hMtOdnP=Bde6y3wepM3$;#g1n}n zg^&+0Bd|V#H!)oO>aTmNie*P64Gl_$PpxULtbe)A*!SF4_uR%B>`s2%DERxu#9diM zbJI_9w)(77Zyg4@7RCA^4EV01-t!{7zX)nKx%vP*+|kp0PLVtc+`lk&U7DI*2FaSJ z__%{2Vc1Hw#~wcxD3vO8dxF9nX5Vdau(H-SHFcH#h71t#XmfvNHhWl@pLYbo3ZTD2 zQ&J{j0$<=s=FazI!7Z7+xeOIk~Ld46&*1+`NDR$?au|`__d*t4k3?eM+)(ddN+K`(0DIxBHCCVa--u?E z;652KlGZi93`|6Hv@LFr%=%P@CV%&E+f8+}Wkf^_c7MFo#W`|1Z5*{z2}(+ywtGHn zxZn3+S)B9m$SWxwoSluWw$d{e@7a_De2hi|9bCh?9tQR9 z<*H5R5g#ts4*poFF16cEf)n`Xg8=3P-g+XOI9&D;sB0Hs5g3=isPpNmDR1dg3gZeH zenV4s`s_WIvzDRNFW!%>?$6=C)H_ZV8Cl16@Eg*7yMF`71j`qO(NVZ;_YsoHDRx+_ zlHO8C2N>=DUS3vZZitph^hp)wU)LZZiZyMC5eNvUB~ z5ruea`fJ_$n^>-!dxa};VC4AB+~dn7l`6HI?c~Z7OS03v?NXwiYJh?Gp0Aq0abrUZ zy+j|05o>cRToIew^+WhH&~J4Gx(#1 ziSDR35S4Q*6dm`54OD*?;3PTjrPQsgJZf4l3=}XNyH8 z`f;j-L2rWnsu%nlsH&@U{=jzH-=!?KxtsCx^D8M~=n^iIlu|t}f?Wh`V<^JAzA&wp z;fzf8Z6s;Q$;;DalHl3|Bv*Ju1`Jv&o6Ajcbv5HnFRq_b+ta1^hStHs0s4YZSBp-c zy)A#KddtJCiuhNbn^l*yj2~90#Ui?phXY{3`%%gpX_|dI)n-Sl^>$BCXz*e1&GFB+ zvvuWQ$ud*zb$I`h=ebgki0%xRO#cN8>RsIZi7_!>lNW)da@PI%q4RF7;qMsGRBmp5 zxq@cpEBnU8lw-b*P=n`wHxbVBvc9#2go*hD3F)0D+=kaDB68OBoCq19@bsl_uscFE z$Jl0jK%cMC>gx4d7CZ1v^Y>7Ym&cD37Xu?m=g$H8Cgx-=7rp%D{Qd8XGk^#*q3E+c zE(6zgAGKOzH&VAIzo%@5VCYbQ->FFu$a?U+Lj*Noh*1Kn8 z-`;4qRz=4h!U2y?`=;@*W=>tD$^LUP|Nfswi*TlJu!10WDR8dQ+zx`VE&G?2TsjZUmy9prD}PsTPm4GN1KpZ3OB1iW6HZ zpNS9*27uLP_<;VLc9hj%R29_uxtCq=Jg%1m%*%auy7#vPb#yz;4uYPw5goCcV1%y5o-IjFJ2CHO+(;P%{@8z`39bR${1>k?k zOd-h1ob}20;pBv?D$8kh^o-AjjMD1?^XU^XOJW%}D#LARYH0=L5|WZaqeb;vb^OA$ zHM6?dinkZW!J00;?Shkl{4zK>x%u*JIN=mJ0=4_oAMWQlN6%`x(U)`Q3_~VH#(8KY z^-eY~bYc+$UXW~LUl!xX_{0;U-4z&F1L<|vnBrLovTeUs)-}KpCHR@j3fF0w&*>ft+PkpY+ynhk?{R13w^X9(hKL-f9{I;d~pF`hI;Nt&#qJKYBLw}$9 z@}C1v-cJ7i{m^x*_ZQ2NTi^2YjV>KOCBup|vro@X|CEo3;~d=kpHuT0{N9M08nJzi-%jf$0iwmjZsx1XfMSjd~RJ6Kf_S5d4t z)--H(9uZbxvMasLcabrorKRr(RK@a+X>B+r`g$KtGiPJ=V+GmiKE(EnCxGY$rvDom zroDCK&VY@N;PwuRr9gs`?fSoKK`~_SWKLOWiH((=f}HGA9V`aj==^lDiTI(qJSO}6 z{Ov8~U=7`DEu8Wqdt2$N^K)HM19Pr<5H@zxB8Tnn%Mk3Vu}Pj*bi*s$NLYDd>&0ED zzq}y3=l@(j0hi5f{+18(o;G=5@`_w@bAIamLOGxqC@rpLB;|;9K{Q>cuZ?@O$SO3* zPG3wsbyxYD7EJr^8hlb*Z)vy^kf1uLhrT@%Tn=eDF?l4`+P_Pv$tg&ft#&$;6Q4&}$Z9W#GysNwwTp3oTn0(v$4I0Rix))sr@5&b zgS6+S)b2K$xzo{7bN$^o$a|;$(%{xSateyWmY~AK#>7UbrokMTJgf6%mKSJ(7h0mA z?dygAVgVg z*crWO>$)+EbVVuRUggrKnyM`y2yMfwzE*i35E|Fg^3YoI(0!R81TR;?Zh2pK88o=p zrtGIz20x3EOIm>S$Nn5W85P&n1laIou;tZ=K!XqcPwsUMMD%t!YQk0PPztWpFQTLPxc7m&v;4FKWION`JOzM zRMUq0&YCS|hI#jq8y)H|fBZaOUig(p#af!29m?Eht@fDd&BS-Oj~`ZRPi6GUNpYC% z;n1Z(Jnras@dgJ{&SQPrqhMy`lZ*FV4Bdl^xYs<~%$RjpB4D*NNA(Z%UqVo)?toe8 z39LXAPRBz7ts*f(1tL)dyvi&gWH{VL`2!k;&_=UKhW|WWH&6gd3lo4Q@nAML>HO=}&c&!cMe(czD?9)f|(3 z7Zx@esl{?CxGE+g**=r;*ZJzyK;K+JMh4Zkr!70XzOL^LxN;a3r69Ts6CXl>>Z*$2 zF*Bv=B;P<28GAo1KK|%{sX8;0lZsfBGO{xmj3^OF;}Z}`<90tE?6EOC6OsB8Rv#OO z9E_`>{xdN$wR7uYdYX~?Ayx1z1H)tx9uFq1;rq$hds+$MpZ1;8Z2Pdkme_p!p|)a9 z&2fzn59_)FDweYpHS$+ux*gm8k|KB*E{BDwrT3V;|J*jOU~FP+XM0VP%rROjtE9wc zy*$kk$pQnD`a}|)jBU|9|1EMOL&z5r6H{4TKo}$8Po@xjt8+#DT?IbN^sn7?89WxJ zIB&VRqIkD5C{@R&{Rv=*(%sok=R1;+aZ>J93lD;L8r_V6W4;-$ z-)D=7i_<(Ck3@onm9V|KB8Zui{UT#LO3fv+Iq!Qn=C*YL#6&4vPOKah@7!<&VH@Nv ze4`bmqqYVB0XDnBw0U%*=j#;DQv6d|;fU2y)yn**Y&lOYM_BT#*jN55z=yrvoh1BG}7uw$Gbg`Q?>_qHTJRe9X=(<~}xNLzs|yvF_sB zO&+I~dPY`S#$H0_vS!;{N{W@{H5juQAeTP{Msgaf-z{ z;mKJ~$*5n0XNS1QnNs=w{B%SF0%y(T#hTbmP4FA_diCG>hD&nt1H`#|BRsmAz2rKJ z(*@VFEBAg|e*z?>QSlFhc85v*rQ$rL^$xqgMd#FW)m-{C*i#VvJ!BoBpkjttvVy8= zQ^5<(>Imv(XkgIi#%m0^gv9KM_deQt_rtQC%HZ`>JzcOA2E9 zLK1HYJ}^YDT5DyQbJnBklQC>ju0HB)Ee?TGgCC#GFM0V01hmg0v`_l9FFOOlZ2{3S z2>u>5)pioYltM&tsd1*drkEJL7+6Ugf&mA0tM`KH>TlJHGb+Ay6OtmtuL=nBMQZrj z3|k=t$kcVMy{t2EK$Cu7YE+c@`LpJ1TF4QlU%HeoB7xFI`j=(XeS*y{IuA>T1P6!1 ztKRcm1Dh)c(qK(yGi(ncwqr#Gav((3t(Nm)Yn!`4kpqzhm753C`}YrTOoRP$cgNN| zI^rT+nMO&zLqSno=j!1?13dih{@2+sAqY;Hab-)|XjfG|9 zCfV8rw>W+&ik~e@uDpi5s$N=l;D1?^j`%{#2@^zhc;#+?eZhk#J3C25)o(^dhJm5f zzgE=do`8PU4SBZC{fCMXSxfCGtk4;%oLT&@&1Ljf@#6!p3h5_CH~&_F&9Y?w#oo%y zL}8J%izuZ2Hs`UI__?zFkWIzfCemA5T&};17A8K>-FLVH?v`jC(!&UYmgx&O)hctc z=1|&i2WO0?uOd>g%1t+z@d?g38Nw;Vg8M93`>oaar6V2_Wa|Ee@!a04Bu|r(lSAI6 z30WWwEhJz#Kls6gjlS$jeS$a4U(`u|(|LO(ee^yeQIrw9|xjDHp zvsQ-o(>GPWYF}vBzkZ?4M7$)%eD3bJ!b=JHJ7v2&%1ei$a2Rog;50D) zJglD3d*XfjjE-A#tQidX1k({T#N{lnYrgQd74hmc6u&jq50u}s`n^ z#t*&xb~9fQkcDrQq+N|o5Rp|VM+cf~?~mS|m5v0qV7Xg1?JITmYrgH2Ss6!etj$Q@ zR**+rYVL+MWhhp95)F%O^c0oQ(;HI~Unum8WIJ1Pz-E%5(xJwFzCE;3)hRDhxy8~y zob=C89BKb`^?TXJuoj`wBrCHqxBrF|XK{WgPyCa0s^IWA=9gKI6n`OG(cg@n%0~Ywl+$`e} z(G$jj)ZTVQ0H0QsLdBaNs9P*Z!pFMMwZ%n;%IaDyT0_?3VWLO-%!HWT<&f%ab=-I5 znnq?x=woQqq95TYIp%kC4KhDVN=fX#$AF-yYi|B=)jc_>t@~l_y{?ZqXcIOk4DoG{ zkx|G~W6S3$cBG4oiz!`?CJl()jYK~l{iVg#5BL$|^P!|bYfi}d_IO#_NKZ#+Z+vFP z>~4R6e@nu-kb**&6@v}3=P<}xl2)`@_A7mO6~j7A5GNxhVFbut0N~pOGv(-sJ7QXhly$0%iW)@2YFuXe5WS)e~YTrQ!BC z+==urr4Og6OzB(e7s8S&BrndCrfYFn9%9#TId0uo-Jl=RDSC2kiwaBvaK<%j6s2{m z@_Yg$Ckh0z9(tvwQ<&(>RrV!)nhNvoSpmC{$v-`>Zs8&tI9-bgWhxqx8)&~ z%TDl)NH!& zv)E9vK-vA-W2sZ$b7;8puqt_B@49mkwLCIU6oR~hlBj}HL1U!SjW@?Hru}`>)vDK# zm87rTh1vLulIjFt--fwQP-{>+d{tMg$A4>oUHqqCSVG%~Y-DVLmR@JbGLnt25Q__D z^tPlWsbbt{{%c*snOK2IQ$vGBi|fXro<=RZUM>|W&NtBuaQjVTbC;fHINO}`{s(-h zi|S?=&M+~yxC(ccPEF4Gl7HdYe3%j6jhoFP{lQYV|NG*$B3hqs>ErEcXfrV>DU0(H zqkxVau^tMGgVY?Z8>oMC!rrVo)6p{eaS<7RuEmKsC^xdZhGl&jqWF}!*r=co`&sCR zf`WvIqJ|+3ia-nMZjbe9_rINeKYbEq=6TP=Nm6(0(-?scpb z2GPH#F3T>?wO~Af*8r{G@%ml6&Fr)P27h5ARP8$vtI&?Nhm-NqEQ3wQ_osgcdOe5Q zb`oJG4Y$~qhi7mOYnQHiKqrQ$mn+Jxch$H8Xk}UXDf!89`f>+-i#E}B&YkBLNqOgLIy*&$1-LHRN3nSr%`cBJY|u8AL#|@7pzT|(Pe~_}KZv%NEam3G`!W9) z3$T*grBuB^?DLpuzA@LGAIN(An)m%fBN->IMSD8pyJmkK9~K%KgV$3t(?(z2^y{&- zhd!5mL&vUt90I8@++|2lU*0QNSb|E0nduzSm)JC(VD_4^{sNcwUMa1_Pj#S|C^XV$ zbKiQKF9qwQq5djb({CHxPvqf9smi8zWQ3vAAP!>P&xZLv#`lMOYN|Y_@;vh>N-tE8 ziNtPxe)K|aB{-pX^7Jd%Ihb+%2#0}l;?n)-Z3m4Os*ffHdwIre7Fi&U7qAFpvZj|7 zZ%jtEetYsL@bj5-=Twj3gIA7biem9q27S5<)46iHf4}fVq@WcxEj0< zO@(kkRhrXVPF_qr3~8$4f4Pf{4*Q$;n!u^kajCE-uc*kk_0nLswWPw|h{y>P)l7a`t?B|^{K0|2z>%rg8`V_KkkaMl zMw*Edhw^CoR2zV163 zEu(XhSf4)aQe3t*ena^oqXyGhdrG_=tAHOVRCoKjhNjJPdcLoh^nX~%@OoNaTqJqb{BTHRz89sP zs0twGi^I;LQD?H9jLx;AO^>+($*gx9gwOtv>QIZSUURcS?uqFI?LSI3$s6=#6coO> zRm|or*km2uglTf}{E&*R)YimqTblO$B|X9PTh!^|5nkw)pGb_znC<0h)c^QzQD%VK z+)ptX3bB=evM6ukCxU=Nrnl>o9mIm*(Qt(?xaN2R(1>2W1QLD zi^LWk$i=u^|9@{30h_IjFKZJhbFrpJbm11Y7$J+YxtC@!0 zHcAY|U=rv3pMMX;6e4en!&eSoOibJqaUwTfMA_+Kni+AW3%eU5?2?n_@5{xn6XMAi z{)aDL^EbvhuEwCEDJUv@D>5P{D-3wirkyz5@KDakJeF~{m-Bc@{P(Qce1*5Y|MTWS zgG=n6%l7|73gl(z^w(g_D8%OU2r zhr~NFIpMCAm4NziXTsk)5X~M+>&iOxp zxyjoof%2pM`pcnf)hU^uGAF0I1f<%|*T+*H7aSA~`dXU0oAW)fk2Jp>!%`TOVx z5QdM-*x77f;XtqqH0Fs@P@m5PzO82(2-yNz)6;IJMlA|l-nwo|o9Qg$-Mk~i=>Z~OY)Lcfru2g1f z8>m=KlAxx+=0oriRBtY|vphCF8&5EKO9h{y&d5|F9Fmfmp59*rg=2dB=<<2z1%_9N z9bo$}NtP9^yjA`RW;k4>X8W6yS?Ty^eKl#=JWnh?Whz0DIxrF89E?aYizXy)UNqZ6 z{E<$*@OXxW(SCngNKs0oK1ZaiH3hOT1^)v-1dTfNKjhwb3ReXef?)7K2YNJxT~_T{ zoBKC8Y2VP;i2Kw2fIke}$zwEWpNmYSkyP`%HgN!n70c!9bN=_j()WCfv1nx`(6E^5 zKb4(Ng7D(<#?+7<51KO~Me|0f4>1!50eX7nK|tL3C16O=Fmp#!B3AL`z}tQf}&7!Rh?ZFPxFiz^E=Bg4y; z_&hXpFC9W_ae+~a;fn%u8tN%Km7GTS>fEWC9$`1{w!%`cp(YCj?=`o!Vp*KU_B zHb6u;z&(Dd58l}sz6)K$*n0DR>b2eegk@YrYK@lo+s1I9z>Zh3{qeEa6HoKmSmXU~ zNahGG0AU7QRvs7olZGAZO<3BW1fnPYQQFq!0_kCd`$28Amr>$AZ$<1!y>fnyPcW_5 zE62%%6c2|)zaAI41SK>moVlyV{bc^YOu*fHc6<>OWF2PsMyta5semI3z^(kL4hM@Z zm4%nP*X{k~2&UCGmi!e^8ZF3n93d$^=Az5aDQeF92N<9q%Bbw&swag-ov~YAM@0R} zTv%wFFO)I&&evr@Ax1&I*o?z(t(lsXbZ*>i2hawC-kHOrgWV+{^ug=bc9NSovu&HW z2S

    lACad$cn&Z=BbarFBBI!Ir2!6AFjrCZu;)ptSmo2eP`vMvjdNMKRRZ*+jDFm zmE6Wgb|$us&IKI+-G&OM6Da>_5fd8()sB+#l0rfTm4gOHZY zgBI%x>^=)`g}vt1;@sLB5UF(jZXAIhX-ia%WW0}`oPzw^mU?)suHwvg=jyz27OrqhS83mUZ&!(f0b#;GWbgW9{zb+XZ$H1{b^eN$x24%0f}u?PX5OA_UL7(b)Vk8)~`b7u!Q#}2NH+kF|ZBS zEhdtFKMSu>GMR>yCe4l~>f0L$xgvrhkJGwim`6SxD@)LyNfJsD<=rN?{16pz=`Tz( z8vJ1uAvOt&2P^b8uun1ru>M;es* zDQrJpPKS`2pN~Lz`RS9lf2i++Rveo#nqWy-sk`BMEC*r+I?l}%<|ie;ppnCq|C^^|%EjwHUK;$p^AC-dHj5#;?E3*}p^gEXV5F7H=QQ z596HlF%uJcJYFGHgarcjNPZwQ22@&G1EA-eZ{oVI6zsP(6%Ib?h>wm)sj1~}Z_XA6 ze{Q^`d|2!2Y*A~v=EqlIDJd!}>^A0C9nP#%tiRdc@_6wv#4jhf@ZZHAs-Tm}A(m=$ z37Q&9WrdwW914IPen)DcsNYKXW1h#cRfmm~0%;Oy6*f#%Y@Ol@s2h+NhW7IxAVaHy zv82@9$9C}&V3H!^Aat)p33q2Rdm`nYZb5*JwKkTDCp8gbY!E&K(RAef82+OO|*w=||W$rtY0 zT}-ZuIn?g&8B#N4%bd<;BNzJ zo}XIJH)s14tH90UkZw(nj}-t&oT{@K6c6D3d%*8*N<1A7VDuT5Oi}^8D_b?O4P;jKC?#D5%j| z*(=s=?@eN}plh@ojyXA86jxEXePp7L9;YM=k%Ya%_-NHN>YL}NdCR;Z1lE-F3>#n1 zZ{omz90=rJDau-~l(1UXl59ENFfm;}${^Nb!yu%Pai(h;e3DoI_dybpT^tU(`^WGR z%C$+^D0&82QpvQw8R$whSryi(=2^o3VgbT$N9rsO{Ha%5ws0*NA?%E_ZXg0QnJK;1 zE-x?pc?~s;Diss=Wba1SA03Fkc5B?}S{}?c<(Fw+1p^bp{cNx&&oVbi6wk< z*}r3{wQF|x5fMN)iLAFFEw({8@%Bf`gCo3@f+>r8jK=I!H4&ZR^m0ltDe?85W~KxP zW}SMR;~9TpA-jSN+uqTn;W4=v>Y$++Hf7(ug>HB*+riYTc^?^~`X8cNwR1E_-PP~Y2WnlsQSX_I%uI5d=UBra`abgDVlladm*?;%XoOqjk!6}MARa9xg8(7(+*K}@CKti-2Lv_)YH zF&rA~pJsd}DtkgFCr|j3*rlVdq6_&aQHg}4TZ}qVpO8q$eBy0s#(B^i;^0iv!cwz* zxFkCpkK`1@W;2|)s!XDp(BZ2S_B@f*_8cl_XZ58gyG{jn?-FyvNvm_#ToVksR*_1) z<(@Q&SFPGqqI46x5{|+{416-!kaqi;(<7Y)lXP7*m~G@3^W){;R#DB}=>8cYkPu*YAHGM#nBH*K3Or+CiBY?GsC*M4q54^tJpJ zLwBQz0z8VT(A;TDtb%+3v)8Nrp)(aCHWy6pbeo%RF)VXG>K% zJzmYTQv?jeDV99Fg};(;&bPT>U6r0&8h(SljgRAGDB!h4Q71AjEFOm4K&1?p;E#Y! zQp8s+DJ?`5wm2-F16b}jRC%7gogI?jz&gAF_mK^Zj_(ahYV|K9FyL@FkC zZ2kG~c41+8v6ks*?HY&8xYLu#xwlDC(bY-QU8$45397b`#@W+nDDowSB4T1|bgsy) z48JVvOypFqdY=QTToV&4xSt;iZ&OoLC`(LDkFgsY8$aW5v9-UNt{9PjCfJBGy>hoW zJi-4XUq!aNih~}?RKrwQxQW+pyno^QhqongSJ=)@LCA?qKqY_alJh<&$iRJ7l{LP4 z;=^=~?*a_{q?}MD)gRqgi_FBhmYGmRC?FX_?Q~i9Xci@n+1Be(=NyQ`GvHZf8tEE! z39*V#^;R_J4R*guDNtQ{v(#nMpYPUGs}G~@JU%_CetAmHSuFIKz;L-bW}BLNUsB?cjFzJxwho2Kxt3DiQ%H1?PA4!-*k*%apua|5gRK9gWBNK z1}{*n1)5{9I(`+FzkO<8qhi81us+_z{DRzW-$7YXVD=>(nz-O3#Hz^xUn@m286!z*Q4y{WxC%9M&7c2?S`?0e_Z|K)=q+uqs`MUFP ztuKnPvAU};BCpXVI*qm`1P`d1d!;K33=H!W&i$5-V16qOB6;oT=t{cnLb1s=J~vus zy1!gDF)d^V+skIh-3c9vG(WkEP7?Ww*3Q&aoe2kyS2)IpbbiN>5|X@w4yUE7X&>pV z{bgU?Ban3(89<&&0NO%JVP7mzwZrN>?9a0hLA+)h69crGO{z^pB`Phr<7X`;5d}0_ zVKsi;#9xXbu`J0G?<70Ji(gx#T$)5ow%I`Gt{E2$}rm^XHP6cmA|RynwX` z>nke!i}L3@iI2G!&JFr5ePn!mx;G*r>~v}&0sZUFzz{qR6JjqpEDtp4-ooE_TiavV zGn51d&keiohKaL0?HB}|Fy@<-bVDm6nzYFJh`3s6#(Y{v1qCL-rd8~EEGL*IztGg| z!dy6+fZY6ee}ihxD8jEgfD8e&8o2j#;djuk0fvgMv`;;x8r(Qd!h)URAou=^r8bB%XQG{ir?K~Z1|>AJbaWVf5eIxcDi zC`WvA-KQ1UZ{CGzJBNM{a4?Fc>@k>3jGQu%?PvYuGW9}^vOt2Ew~Fx%LEwu>N(WPZ_s zKWu@+7S**U=fyKPiKR4Cn~2n^ZL73{>Rro+@LDHC$APN3!A0|nB?_ghukZX-6xZHZ z$;COlqX9S!UkXqC0I{QGuWij}emnapp8X+2QFd;)9*6K=7jS9lY<7P&Znc!OQY)&k z5}!7gp*W7l&7g!yGOPHL-lz7EhM!_F+QKeM)@uN=3thrn3*Y`%cL`Pykj_EF;jkJy z+DTk{@8^T^HHGu9T`da>smX1ahOpG1iItIep1hg&X*>h!*ahPD_9T?ifT7@1I=;6F z&UceV6Ws5PY{}}C5a@!hfu0DIBN_X{RFU<)vKw2zU0@fjk5mh$CgOY!DJbbxr$G(A6J%n^(BiFvquxylD0VBk?JdY0SYz}GkHXSc3OY?O~m=L#* zr6)-w5@{@Eqx^#JSZI1x#%IcIHD^_L*Bn8XePtyu zZny583KzR#3Uiv{eMg}wE}gQ2;;xo^^QAy6Gu`6X?DZx2cb{(qE#?N_r?psf+;Rp$ zKwbX*ny5=p-`M!z>wB+#Df6M3xIPb^tgOUrB+ZtW==~8yNz=IjWa1~n+|10x_;|Uo zA9{S5xF{&C)fz^=t%8?e!mxZe;>@o=Ml1=cl*L8NjX5q1AXfkaTSBz>875kqZ>0v7 zs{A;ooX+}V3$JpM)1!f*jc%rM&sES1_IgxmX;RFXYh#jG zX(?oTkifdxgqF$5V@eu|@!^LOj5f^Qe>7qJB|0pSr_F3PjzHau<{8iI(o}!-n;>+R z1AHPA+Nl#Yu#hM?M3-u8f7s-a6tnK6;c@Q)pnvt3 zu_Fgt6`Sfb{6#kStWNT@=f*g)h-pRlm*&RGirWR1t=vbG$6OM(<=_8seRcfnVF5!s zFOu#adD>z+bRR7t(|oD~;0QGSN<>;3Lq zn8-foUQuzRYi>lf(?L^9h5>2<6Qd(*ny?GG-DUtg2$SfmKJr&vVv!qy0^{@<+Dbx1 zNvNhN$saqMba`?*DH6~_Z4luu1M3%#V8lpbVq#%oA$Y&46k6uyU?g@k_yT?*NNDKb z6DB%3Pl4jR+v*OGr6_G{d)4l#zAQc#`ObsRMP;qs@yh1_+TOzDDj%T;V3yby)>B!E zYnh!-G1-o%Je?7UTVZ$$DiLWzE%@45S;NC6wzk)Jxp~%Q=x||b;$lH%kgDs60lB%e z0Tprpo{s;iUF=bAwCuv-40MU)8Oe&2J!Z=LT_tP`5%Q9@m@+m06Y{-UiVe2UvC1b z4<;r@<>lwVzeYez?2D6;mse-K^9de4d~k3O1m56syWUleaRcl39T=GK75ZdjgWKVB z8MpOmdiuXu0GBh+9|M=D`EorSHFektmJPRHLJ#AQ}!N z^Squm+4c1FfKe>>`&sZa)6*{VRbVM>V)?CiqCXn_kDL)QP7R8ebl{&w2IWh8sDzI}Xr=%)q-^U>X&O`C)Rr%$I4UHMXu$?ag_ zVA9;XNP>C8y>&3(8`bq%cKU?YKD*hg6X?^;j|FejO`FOLrSOrnSBUf5>W^3k3`nfV$`FWqU|NFfeoJ z=$r!it)9y@ENqLLFcx~k_TEl<;z|<|HK)vAA4E_WXayE$Gn$JTsc&9TUTt!6h@<2V z=NBM_{3ZpNgi(8zoBqMLRVHKRTq{;pYfV^I17{($IyoUwfX+owVyue@K_UrRYp}^H zrCys(=5(3_vEb?XtS@ikc0J!c=KV;>%+3Z@Q6{U+Y#ff4_6zjccx-CjO|Tl%=?Um4 z$jL#F3-GD)jN6-@fiS35_eYQMaS#x9IGHm9#yo>z{ znJo%-P>{{geFOZwpK(>kzE%M@u-oHFopi;AsrmUk;5`>1NATE(%3q3#lEv`zR@l%DK?D6eR2Or+8wvep6dRbWgGoAoxBxZ90o(fl$c#Z!SE)TteO>}44q z8v6I*;(jZfCn9Yg5;4T}Xr&1)4n+I-={Y$8VBO0Lg79O*FukgX2y{~%jmK+ISt}rH zz|cSCW=BLpo;e6g_Ic;e7`{+jPX$Eu^p(=`n2E}!@dXV#Ax^=;cqn}GkgA*}hPv!- zr~L9CD-a?kt)dMwHavyzZH{`&$2HFgvhdsHSAq2M;q#T}o5hQzhw1I^ZwzR>y;idC z9!G$DD>it~ed&&ByYudwjqNq8k4pwA5VL~%8hWrMzG*L=AZy-UhToE8VC1!#*pIdbw9@hKyi?c<^j6do4t=YOa4QEwxJHJiROiU{QMd=c`!_xd_)Z+YlUakaSSVd{4W zI8b|$KI&u5kN=jYFDE~=-?bz095w-$^ED2exv>l%w~vvo;|Cx5AQj*qXc|{KoNl1t z7+xHGn+Qukx9r8Ur6eOWkRgYr54)vF_IM$u52%d$6b`sps1{bk{r$!{Ss4YPjQs;F zHCC77Gae5KanZz!#Y|w1`L`}3*vU+_?D@(w>A&{&;Ri<$RNcq&J%zFdI3lA?v&EHZ zG==N{Phh25zW-Jp@GcKDZYbt-r3vx>)4#;t_+fm?&x%cx4I9e4C)&jlR?xXm{T!3`<_cqjSp?$C z1x-Ql7ckU!_`~B7S~|ARuNFk>08>Bv7ZB6dM|Zpo23~#W;)CuQDIu-=@Ug*(0XgT* zjEekKfC;MQ3v=;faXCKTkmdTSMgtTf)aIL_^O-N&I-|*sm$i5yIj?T#4SLX6zbG>a z+7S+qm!IRWHTHIOwNOg%DH-z<*4u*-0ALiG7mnBb4==L*zbZS+uqN2B4UcY^!eA1T zQqq#bND+`yVWc7r5+aC*qzo9+F@}@^6O5`C;$>1dv0YPzm&#UioeBa-1 zKX>fsv*)?*`?|05s#6ejc2r4-6`(*WHR5onm}XB`DXGiH1MElKv`MAx1_#c9%A$-) z{b0xfOgSz{Q`6rOmehMf_!OjguUfTi^rD5;LGd`HQxniz1`neQlJvjl2eTl; ze#fQ>xtN=@BXct}+dlA76+ItXT4o_ckEzZLvMwrEaE{9jla{6?S-!D;yYI~U@pA5W zkCX$+y-WBmZrAk6GQ63wDd**E3iX4%S}LE9Jrk7W#3s{4`5ifFB_l8~dVV4hl(1SZ zWSW)WU>)$mY3Uo~DK-Wgb@qyy`_KRCZ&#aYnA5LIOY`le7cNy*Ze2ZpB_R>$;-6i1 zc)0Tlue&A8EE9sPV5gi8r;f{mzhuu4^KZ^W;}Cg~u0(8l7K)qZ373SI;*n`@&&2V| z9ZkL8PHLj1%&g8>LG^zG$xGz{DD~2YVw+}TTU(l@;jORGEForYRkha&9yj~nqbl=x zDLdc2nfIxDcI;7|+$OBROCzXO5?rZc(yhKmt^80$C3SzX()lKRAZEm%&3~(IZxqz4 zP+~kjmS-@U-Qpr3nPVnT|-KF3`sO zjwcHsgdwo%^=ozZ9%&I7nQidoQPHu+oJoo;Ec7Om$dH90p?=@wVnFBQt_-6; z^&$q6J6GJJnl!fy+~0+@W8>H5jwLnV(giIgjWz>OI>?3;6CZ@pY};HJ&uQDwEd2B? ziEua)8g`yVGc6%OQcMY05uM&hM|cpUNv|c080cFz7mG{;E-ELW&yac9s~FjlHc z`|^oUzdI`ir{vt2om*DXkBj$*iLh&i+&4{*v3yw~va+a>C1pTWkio12TNrV5YiRBw zqKp)+(Z0Obxh=Q0yO z#EMN!DU&x)b#)44X>oCmJ7uM%9VD$+1O5GV%b^n8z?^dJnk{m_!7aAo!vOmeZb`{w zOZ#HhoNKHMF;aKd97xgGe$F?kyP&G9PKE@lQif!&v7Ee ztpVIoTDr>Y>BXl2qNl(m1pf`VOD3DJaPKVufLK;w+FD%kWMvz-xMW>XQ6U%jm?o2Y zf;RVE<>1R|l9QUc#c?u%Mn9G@{e>OW@cw9f3*!FNKX5_)ieU$gpCxTPMG@NH)&7@R z)>IG`ElV8Yq3faUJuSiu>L32ocqstefb~T9j7bGs;(hnC%iVdZ-4uv5v64X zyqTT7!ccY@IbVM-u>{Lf2{6`ox(&=(ziK6>Z?}%3b;E-iUu?^#)%ZyrLHgmiqPn{4 z(1%91rZD-#H@fw0xkil?Y^Ekc++$jc20Mw+$y*V}jCx~G=)!_V;oQOYJY{32l8Nuu zc*FDPsG)3|PdG)}FcE9m?K(@2KBYCIFb>$Y7eu0)bDsyFp!fyVx|$0)r^B<;8HeGz zjG_o-_hQk+XOmZvVP66m6R^W(5Eyn=`i(_T+FOMQ;|K?{mdE1N_;UBYbaE}^e^tN0 zK3Z1p^z5&WU|pRs`Hr#m%g4!(-6Equx6T;h2Xx8QEJxwtRAml`!46g~lFwH5<(`HE z)-60Jj@(_S!C6xRpj%*~!6kFJ9FLY$dux(^IBw1}V=|?7&s~MX^6J%*7zTEbcj5Oke^U4lyWuo2sxvy0#4XSf4AHQ9wZ(9cr#;Xh;|&3G*EHGZHM#^DZi#gq@CoQmq4U?x6C5gu7JsV%&YJK#QO{G|& z3*4g6vu>Vk@&2(ozXmb|={<}t=~xa9G9Qe^$5=ktp6#CL83d-l1AHpxUQ;c*2UzSQ zIEZ*jDhEB~z-9)WeDlU?Pv}ZYF!PHo2QwVqn2yiX)^SM5(WhH3kHt35cs%0i=S?ay z&(%Bu#W`46TQ&!_?By}G<@jAwgLP0<-Ti79+y)PKw=eX&TcN93?(O-iru?y>>4&H? zh8%EZA#LUf4)&q%Eew9tPwq9i3bS9hL>nQt!oX*B+)Nnk>L<)Ft92-y$}D^ujUjQa z(xMN6{?{^zS-fm`V)JNtOUZTO?M?f7njI_LPQ&T~lEnVS)#H^$$(TVjSX<4!{PPE( zrS?LP0YVCVl>Nv`8X8H-#VY+Ne39BGM@KggR}w*b(R>ZSS#{1s$dGVn=P?k8$IQYa zEYuYV8X}PJV){&@r0DJe$WL%^cz@QxvNH7z6Fp&zB>!$# z)M4}i%GcA;Q|ecBZB<#-XN0SwGo{h=%L1+piTn_8!AZkR?_tdmgw5qE`+F?c#&fqS zI(>CDZRjH&QV2N~4vUjqZiZ7W=idzWu|Zl)J7U;HR_|HJ3>3)R_H|)v1Sp@F*_Cy~ zjX>v;n87GATJ+$IJgZm$%4m=vG`gZ~C4f2Q6iXXXAbU;o1}KT$g1V_UyFgwnH3^bxYR7m#HzD?MT@mzgQei1CaWuddwKC>jnBK^zsr-!yS68>Zx}UKxP#s zKm7h7mWfkpiQ4V}4D4KXN}`%R+2ubY_oWGHUL8zNPxpKC?Hw2wZm_J)HWu2}AMWJW z-eqKB0&(!)0p)rM5wJZ!{VyebxjU}y`_No)3WzS1z1eDc*jV?X4uDYEG!G$M`t$%< z7quPsW8ncHi64Mw#rX}V!p~nj?Qt1+6##I>^C5Ci@QTG7Y+(_up*gn(G`|Q;G~Rt4 zxLIGuJ7c%#fY-PD;1kIvLl-z)#}F+}baIP3|bKOHbIW zJ1Nqg1mTFtAD^ooO?^-H=@^dYMW9l-kx4HB{{}$41 zKhe$65xUhLVjH}YTnGvFBeVj#$qZANvjD^ErW1vTjr^SIZ(5FYw?ox6R`+V7Z+Kh6 zbE)jIZ|BQ^pDOhj&B7#kXzkzYo;+N)*L%^YWfx zv7HyI9>{VBYy2h>AIe<aU(QT9&`x^|{acAJUgE?ukZ-Oktn;v9XM-FggB= z-`B1=xw)}lK6YB|fDa6Oz(*X!WoAXzWwoVz(9%Y-P(9zb;$bL5*Y}=~=03YeMC)?Q z#9X`>BCb?x`|Px*ctIZZwyhfso3R3Bz+XaEn}M6YzB~KVcJywGk#^M- ztPy8O;N|dSN|ui}y%E~`5?P)Azdo5iJG*v!AB)YJP!BDZ&R7gGJhIWYE)p_-(=?JG zQNn*!gkCp live per-file pass/fail (score.py) +// conformance/score.py -> pinned upstream ref + sha256 +// conformance/upstream_main.py -> re-hashed here to re-verify the pin +// git log of conformance_status.csv -> the over-time chart (real commits) +// +// A file passes iff the official calculator's `errors_diff` is empty. Every +// number the website shows is whatever that scorer last produced and committed — +// and the over-time chart is read straight from this file's GIT history, not a +// hand-maintained ledger, so it cannot drift from what actually happened. +import { readFileSync, existsSync, statSync } from "fs"; +import { execFileSync } from "child_process"; +import { createHash } from "crypto"; +import { dirname, join } from "path"; +import { fileURLToPath } from "url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const REPO_ROOT = join(__dirname, "../../.."); +const CONF_DIR = join(REPO_ROOT, "conformance"); +const STATUS_REL = "conformance/conformance_status.csv"; +const STATUS_CSV = join(CONF_DIR, "conformance_status.csv"); +const SCORE_PY = join(CONF_DIR, "score.py"); +const UPSTREAM_MAIN = join(CONF_DIR, "upstream_main.py"); + +// The day the REAL python/typing calculator replaced the rigged in-repo harness +// (the harness excluded 9 diagnostic codes and ignored false positives, inflating +// the score to a fake 100%). Commits dated on/after this used the official scorer. +const OFFICIAL_SINCE = "2026-06-23"; + +// The CSV stores lowercase category slugs; these render the few that are not a +// plain title-case word. Everything else falls back to capitalising the slug. +const CATEGORY_LABELS = { + typeddicts: "TypedDicts", + namedtuples: "NamedTuples", + typeforms: "TypeForms", + specialtypes: "Special types", +}; + +const MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]; + +const round1 = (n) => Math.round(n * 10) / 10; +const labelFor = (slug) => CATEGORY_LABELS[slug] || (slug ? slug.charAt(0).toUpperCase() + slug.slice(1) : "—"); + +// "2026-06-21" -> "Jun 21" (manual parse — no timezone surprises). +function shortDate(iso) { + const [, m, d] = iso.split("-").map((p) => parseInt(p, 10)); + return Number.isFinite(m) && Number.isFinite(d) ? `${MONTHS[m - 1]} ${d}` : iso; +} + +// Pull a `NAME = "value"` string constant straight out of score.py so the pin +// shown on the website is the exact one the scorer enforces, not a copy. +function constFromScorePy(name) { + if (!existsSync(SCORE_PY)) return null; + const src = readFileSync(SCORE_PY, "utf-8"); + const m = src.match(new RegExp(`^${name}\\s*=\\s*"([^"]+)"`, "m")); + return m ? m[1] : null; +} + +// Tally one CSV body (pass/total/fp/missed) from its raw text. +function tally(csvText) { + const rows = csvText.split(/\r?\n/).slice(1).filter((l) => l.trim() && !l.startsWith("#")); + const t = { pass: 0, total: 0, fp: 0, missed: 0, byFile: rows }; + for (const line of rows) { + const f = line.split(","); + if (f.length < 7) continue; + t.total += 1; + if (f[3] === "PASS") t.pass += 1; + t.missed += parseInt(f[5], 10) || 0; + t.fp += parseInt(f[6], 10) || 0; + } + return t; +} + +function parseStatus() { + if (!existsSync(STATUS_CSV)) return null; + const text = readFileSync(STATUS_CSV, "utf-8"); + const t = tally(text); + if (!t.total) return null; + + const cats = new Map(); + const failing = []; + let caught = 0; + for (const line of t.byFile) { + const f = line.split(","); + if (f.length < 7) continue; + const passed = f[3] === "PASS"; + const slug = f[2]; + const missed = parseInt(f[5], 10) || 0; + const fp = parseInt(f[6], 10) || 0; + caught += parseInt(f[4], 10) || 0; + if (!cats.has(slug)) cats.set(slug, { slug, label: labelFor(slug), pass: 0, total: 0 }); + const entry = cats.get(slug); + entry.total += 1; + entry.pass += passed ? 1 : 0; + if (!passed) failing.push({ file: f[1], category: slug, missed, fp }); + } + + const categories = [...cats.values()] + .filter((c) => c.slug) + .map((c) => ({ ...c, pct: round1((c.pass / c.total) * 100) })) + .sort((a, b) => a.label.localeCompare(b.label)); + + return { + pass: t.pass, + total: t.total, + fail: t.total - t.pass, + caught, + missed: t.missed, + fp: t.fp, + scorePct: round1((t.pass / t.total) * 100), + categories, + categoriesTotal: categories.length, + categoriesPass100: categories.filter((c) => c.pass === c.total).length, + failing: failing.sort((a, b) => b.fp + b.missed - (a.fp + a.missed)), + }; +} + +function git(args) { + // stderr ignored: early commits hold the file under an old path, so `git show` + // legitimately fails for those — we skip them, no need to spam the build log. + return execFileSync("git", args, { cwd: REPO_ROOT, encoding: "utf-8", maxBuffer: 1 << 26, stdio: ["ignore", "pipe", "ignore"] }); +} + +// The over-time series, read from the GIT history of conformance_status.csv. +// One real data point per commit that changed the file: its commit date and the +// score that commit recorded. Points dated before OFFICIAL_SINCE were produced +// by the rigged harness; on/after, by the official calculator. +function gitHistory() { + let log; + try { + log = git(["log", "--follow", "--format=%H|%cs", "--", STATUS_REL]); + } catch { + return []; + } + const points = []; + for (const line of log.split(/\r?\n/).filter(Boolean)) { + const [hash, date] = line.split("|"); + let csv; + try { + csv = git(["show", `${hash}:${STATUS_REL}`]); + } catch { + continue; + } + const t = tally(csv); + if (!t.total) continue; + points.push({ + hash: hash.slice(0, 8), + date, + shortDate: shortDate(date), + pass: t.pass, + total: t.total, + fp: t.fp, + missed: t.missed, + score: round1((t.pass / t.total) * 100), + official: date >= OFFICIAL_SINCE, + }); + } + return points.reverse(); // oldest -> newest +} + +// Inline-SVG geometry for the over-time chart. Computed here (testable, DRY) so +// the Nunjucks include only loops over coordinates. Points are spaced evenly by +// commit (each is a real event); the y-axis is the pass percentage 0–100. +function buildChart(points) { + if (points.length < 2) return null; + const width = 760, height = 360, left = 48, right = 24, top = 28, bottom = 64; + const plotW = width - left - right, plotH = height - top - bottom; + const n = points.length; + const xAt = (i) => round1(left + (i / (n - 1)) * plotW); + const yAt = (score) => round1(top + (1 - score / 100) * plotH); + + let lastLabel = null; + const pts = points.map((p, i) => { + const showDate = p.shortDate !== lastLabel; + lastLabel = p.shortDate; + return { ...p, i, x: xAt(i), y: yAt(p.score), showDate }; + }); + const yTicks = [0, 25, 50, 75, 100].map((value) => ({ value, y: yAt(value) })); + + const rigged = pts.filter((p) => !p.official); + const official = pts.filter((p) => p.official); + const lastRigged = rigged[rigged.length - 1]; + const firstOfficial = official[0]; + const peak = pts.reduce((a, b) => (b.score > a.score ? b : a), pts[0]); + + return { + width, height, left, right, top, bottom, + baselineY: yAt(0), + pts, + yTicks, + riggedPolyline: rigged.map((p) => `${p.x},${p.y}`).join(" "), + officialPolyline: official.map((p) => `${p.x},${p.y}`).join(" "), + // The correction "cliff": last rigged point down to the first official one. + drop: lastRigged && firstOfficial + ? { x1: lastRigged.x, y1: lastRigged.y, x2: firstOfficial.x, y2: firstOfficial.y, from: lastRigged.score, to: firstOfficial.score } + : null, + peak, + current: pts[pts.length - 1], + }; +} + +export default function () { + const status = parseStatus(); + if (!status) { + return { hasData: false, scorePct: null, categories: [], failing: [], history: [], chart: null }; + } + + const pinnedRef = constFromScorePy("PINNED_TYPING_REF"); + const sha256 = constFromScorePy("UPSTREAM_MAIN_SHA256"); + // Re-verify the committed calculator at build time — the page states this. + let liveSha = null, upstreamBytes = null, verified = false; + if (existsSync(UPSTREAM_MAIN)) { + const raw = readFileSync(UPSTREAM_MAIN); + liveSha = createHash("sha256").update(raw).digest("hex"); + upstreamBytes = statSync(UPSTREAM_MAIN).size; + verified = sha256 != null && liveSha === sha256; + } + + const history = gitHistory(); + return { + hasData: true, + ...status, + pinnedRef, + sha256, + sha256Short: sha256 ? sha256.slice(0, 12) : null, + liveSha256Short: liveSha ? liveSha.slice(0, 12) : null, + verified, + upstreamBytes, + officialSince: OFFICIAL_SINCE, + history, + chart: buildChart(history), + }; +} diff --git a/website/src/_includes/conformance-chart.njk b/website/src/_includes/conformance-chart.njk new file mode 100644 index 00000000..8377d459 --- /dev/null +++ b/website/src/_includes/conformance-chart.njk @@ -0,0 +1,45 @@ +{# + Shared PEP-conformance over-time chart — the SINGLE source of truth for + rendering the history of conformance/conformance_status.csv in EVERY locale. + Pure inline SVG (no JS, no chart library), data-driven from _data/conformance.js + (which reads the file's real git history). Pages supply only translated prose. + + WHITESPACE: this macro is embedded inside MARKDOWN pages. markdown-it ends a + raw-HTML block at the first blank line, so the rendered SVG MUST contain no + blank lines or it gets shredded (text nodes leak out of ). Every njk + control tag therefore uses `{%- ... -%}` trimming to keep the output contiguous. + + Args: + c — the global `conformance` data object (from _data/conformance.js) + t — locale strings: { label, heading, subhead, prevLegend, officialLegend, + dropNote, caption } — rendered with `| safe` (may contain inline HTML). +#} +{%- macro chart(c, t) -%} +{%- if c.chart -%} +{%- set ch = c.chart -%} +
    +
    {{ t.label }}{{ t.heading | safe }} + +{%- for tick in ch.yTicks %} +{{ tick.value }}% +{%- endfor %} + +{%- if ch.drop %} + +{%- endif %} + +{%- for p in ch.pts %} +{{ p.shortDate }} ({{ p.hash }}): {{ p.score }}% — {{ p.pass }}/{{ p.total }}, {{ p.fp }} false positives{{ ' · official calculator' if p.official else ' · earlier in-repo harness' }} +{%- if p.showDate %} +{{ p.shortDate }} +{%- endif %} +{%- endfor %} +{{ ch.peak.score }}% +{{ ch.current.score }}% + +

    {{ t.dropNote | safe }}

    +
    • {{ t.prevLegend | safe }}
    • {{ t.officialLegend | safe }}
    +

    {{ t.caption | safe }}

    + +{%- endif -%} +{%- endmacro -%} diff --git a/website/src/assets/css/styles.css b/website/src/assets/css/styles.css index 6fe3f6d6..d9c2137d 100644 --- a/website/src/assets/css/styles.css +++ b/website/src/assets/css/styles.css @@ -998,6 +998,97 @@ button { cursor: pointer; font-family: inherit; border: none; background: none; text-underline-offset: 2px; } +/* ── PEP conformance over-time chart ───────────────────────── */ +.conf-chart { + margin: var(--space-8) 0; + padding: var(--space-6); + background: var(--color-bg-secondary); + border: 1px solid var(--color-border); + border-radius: var(--radius-lg); +} +.conf-chart__head { display: flex; flex-direction: column; gap: var(--space-1); margin-bottom: var(--space-4); } +.conf-chart__label { + font-size: 0.75rem; + letter-spacing: 0.08em; + text-transform: uppercase; + color: var(--color-primary); + font-weight: 600; +} +.conf-chart__title { font-size: 1.125rem; font-weight: 600; color: var(--color-text-primary); } +.conf-chart__svg { display: block; width: 100%; height: auto; overflow: visible; } +.conf-chart__grid { stroke: var(--color-border); stroke-width: 1; } +.conf-chart__axis, .conf-chart__date { fill: var(--color-text-muted); font-family: var(--font-mono); font-size: 11px; } +.conf-chart__line { fill: none; stroke-width: 2.5; stroke-linejoin: round; stroke-linecap: round; } +.conf-chart__line--rigged { stroke: var(--color-warning); stroke-dasharray: 5 4; opacity: 0.85; } +.conf-chart__line--official { stroke: var(--color-primary); } +.conf-chart__drop { stroke: var(--color-error); stroke-width: 3; stroke-linecap: round; } +.conf-chart__dot--rigged { fill: var(--color-bg-secondary); stroke: var(--color-warning); stroke-width: 2; } +.conf-chart__dot--official { fill: var(--color-primary); } +.conf-chart__value { font-family: var(--font-mono); font-size: 14px; font-weight: 700; } +.conf-chart__value--peak { fill: var(--color-warning); } +.conf-chart__value--current { fill: var(--color-error); } +.conf-chart__drop-note { + margin: var(--space-5) 0 0; + font-size: 0.9375rem; + line-height: 1.6; + color: var(--color-text-secondary); +} +.conf-chart__drop-note strong { color: var(--color-error); } +.conf-chart__legend { + list-style: none; + display: flex; + flex-wrap: wrap; + gap: var(--space-5); + margin: var(--space-4) 0 0; + padding: 0; + font-size: 0.8125rem; + color: var(--color-text-secondary); +} +.conf-chart__legend li { display: flex; align-items: center; gap: var(--space-2); } +.conf-chart__swatch { width: 18px; height: 3px; border-radius: 2px; display: inline-block; } +.conf-chart__swatch--rigged { background: var(--color-warning); } +.conf-chart__swatch--official { background: var(--color-primary); } +.conf-chart__caption { margin: var(--space-4) 0 0; font-size: 0.8125rem; color: var(--color-text-muted); line-height: 1.6; } +.conf-chart__caption code { font-family: var(--font-mono); font-size: 0.85em; } + +/* ── Conformance methodology page bits ─────────────────────── */ +.conf-correction { + display: flex; + align-items: center; + gap: var(--space-4); + flex-wrap: wrap; + margin: var(--space-6) 0; + padding: var(--space-5) var(--space-6); + background: var(--color-bg-secondary); + border: 1px solid var(--color-border-bright); + border-radius: var(--radius-lg); +} +.conf-correction__old, .conf-correction__new { font-family: var(--font-mono); font-weight: 700; font-size: 1.75rem; } +.conf-correction__old { color: var(--color-warning); text-decoration: line-through; text-decoration-thickness: 2px; } +.conf-correction__new { color: var(--color-primary); } +.conf-correction__arrow { color: var(--color-text-muted); font-size: 1.5rem; } +.conf-correction__text { font-size: 0.9375rem; color: var(--color-text-secondary); flex: 1; min-width: 220px; } +.conf-cat-bar { + display: inline-block; + height: 8px; + border-radius: 4px; + background: var(--color-primary); + vertical-align: middle; + min-width: 2px; +} +.conf-verified { + display: inline-flex; + align-items: center; + gap: var(--space-2); + padding: var(--space-1) var(--space-3); + border-radius: var(--radius-sm); + background: rgba(52, 211, 153, 0.12); + border: 1px solid rgba(52, 211, 153, 0.35); + color: var(--color-success); + font-family: var(--font-mono); + font-size: 0.8125rem; +} + /* ── Responsive ────────────────────────────────────────────── */ @media (max-width: 640px) { .section-heading { font-size: 1.75rem; } diff --git a/website/src/docs/conformance.md b/website/src/docs/conformance.md new file mode 100644 index 00000000..4b878da6 --- /dev/null +++ b/website/src/docs/conformance.md @@ -0,0 +1,129 @@ +--- +layout: layouts/docs.njk +title: "How Basilisk Calculates PEP Conformance" +description: "Exactly how Basilisk's PEP conformance score is computed — the byte-identical, sha256-pinned python/typing calculator, the wrapper, the proof it isn't an approximation, and the honest correction from a rigged 100% to the real number." +keywords: pep conformance, python typing conformance suite, basilisk conformance score, type checker scoring, python/typing calculator +date: 2026-06-23 +dateModified: 2026-06-23 +author: The Basilisk Project +eleventyNavigation: + key: Conformance + order: 8 +--- +{% from "conformance-chart.njk" import chart %} + +# How we calculate PEP conformance + +Basilisk's headline conformance number is **{{ conformance.scorePct }}%** — **{{ conformance.pass }} of {{ conformance.total }}** test files passing, with **{{ conformance.fp }} false positives** and **{{ conformance.missed }} missed required errors** still to clear. {{ conformance.caught }} required errors are caught. {{ conformance.categoriesPass100 }} of {{ conformance.categoriesTotal }} categories pass at 100%. + +We do not grade ourselves. The number above is produced by the **official `python/typing` conformance calculator** — the exact tool that grades pyright, mypy, pyrefly, ty, zuban, and pycroscope — run **unmodified**. This page shows precisely how, proves it is the real tool and not an approximation, and is fully transparent about the bug that once inflated this number to a fake 100%. + +{% if conformance.verified %} +

    ✓ verified at build — upstream_main.py sha256 {{ conformance.sha256Short }}… matches the pin

    +{% endif %} + +## Full transparency: the number used to be wrong + +For months this site reported a number that climbed all the way to **100%**. That was a lie produced by a since-removed *in-repo* harness that **excluded 9 diagnostic codes from scoring and ignored false positives entirely**. When we replaced it with the real `python/typing` calculator, the honest number dropped to **{{ conformance.scorePct }}%**. + +
    + 100% + + {{ conformance.scorePct }}% + Not a regression — a correction. The checker did not get worse; the scorer got honest. 100% remains the target, not a present-day claim. +
    + +The chart below is read straight from the **git history of `conformance/conformance_status.csv`** at build time — one point per commit that changed the file, plotting the score that commit actually recorded. Nothing here is hand-typed. + +{{ chart(conformance, { + "label": "Conformance score over time", + "heading": "A rigged climb to 100%, then the official calculator told the truth", + "riggedLegend": "Old in-repo harness — excluded 9 codes, ignored false positives", + "officialLegend": "Official python/typing calculator", + "dropNote": "On " + conformance.chart.peak.shortDate + " the in-repo harness reported a full " + conformance.chart.peak.score + "%. Run for the first time on " + conformance.chart.current.shortDate + ", the official calculator reports " + conformance.chart.current.score + "%.", + "caption": "Each dot is a real commit to conformance/conformance_status.csv; the series is its git log, recomputed every build. Hover a point for its date, commit, score, and false-positive count." +}) }} + +## Proof the scoring is the official tool, not an approximation + +Four checks, all reproducible against the files in this repository. + +### (a) The calculator is byte-identical to upstream + +Download `conformance/src/main.py` from [`python/typing@{{ conformance.pinnedRef }}`](https://github.com/python/typing/blob/main/conformance/src/main.py) and diff it against our committed [`conformance/upstream_main.py`](https://github.com/Nimblesite/Basilisk/blob/main/conformance/upstream_main.py): + +- **upstream (downloaded):** `{{ conformance.sha256Short }}…` +- **committed in repo:** `{{ conformance.liveSha256Short }}…` + +Same sha256, same {{ conformance.upstreamBytes }} bytes, **zero-line diff**. It is *the* file, not a copy-with-edits. + +### (b) We call upstream's own functions — we don't reimplement them + +The entire scoring algorithm is two functions, `get_expected_errors` and `diff_expected_errors`, inside that committed file. `score.py` imports them and calls them; it contains **zero scoring logic of its own**. The call shapes match upstream's own call in the same file: + +- upstream (`upstream_main.py:175`): `diff_expected_errors(type_checker, test_case, output, ignored_errors)` +- ours (`score.py:287`): `diff_errors(checker, f, output, [])` + +Same four arguments, same order. + +### (c) Pass/fail is upstream's exact rule + +A file passes **iff** the diff string is empty — upstream's literal rule: + +- upstream (`upstream_main.py:185`): `"Fail" if errors_diff.strip() else "Pass"` +- ours (`score.py:291`): `passed = not diff.strip()` + +### (d) Tamper-proofing is live + +`score.py` re-hashes the calculator on **every run** and refuses to score if the sha256 doesn't match the pin (`score.py:99`), so the official file cannot silently drift. This website re-hashes it again at build time — that is the green badge above. + +### (e) It runs and produces the number + +Live, against the real compiled binary: **{{ conformance.scorePct }}% ({{ conformance.pass }}/{{ conformance.total }})**, {{ conformance.fp }} false positives, {{ conformance.missed }} missed — gate **PASS**. That is the strictest grading: **errors *and* warnings count**, the same way the reference checker pyright is graded upstream. + +## Why a wrapper exists at all + +The only Basilisk-specific code is a `BasiliskTypeChecker` **adapter** — and even that is not a departure from the method. Upstream requires one adapter per checker (`PyrightTypeChecker`, `MypyTypeChecker`, …); ours runs `basilisk check --output json` and shapes the result into the `{line: [errors]}` dict the official functions consume. That is the contract every checker fulfills. + +`upstream_main.py` cannot be run directly to score Basilisk — **by design**. It is a batch test harness for the `python/typing` maintainers, not a single-checker scorer: + +- it imports `tomli`, `tomlkit`, `options`, `reporting`, `test_groups`, `type_checker` at module load — extra deps and a TOML config/reporting pipeline irrelevant to "score this one binary"; +- it has no Basilisk adapter — it only knows pyright/mypy/pyrefly/ty, with no way to invoke our binary; +- it writes per-checker TOML result files and an HTML matrix across all checkers — not a CI gate. + +So the wrapper is the **minimum glue** to use upstream's real scoring without forking it: + +1. **Adapter** — run the `basilisk` binary, turn its JSON into the `{line: errors}` dict (the one thing upstream genuinely can't do for us). +2. **Loader** — import the two scoring functions out of the committed file behind stubs for those unrelated imports, *after* verifying the sha256. The stub module is not manipulation of the scoring — the two functions touch none of those imports; it just lets the file import when `tomlkit` et al. aren't installed. +3. **Gate** — compare the live {{ conformance.scorePct }}% / {{ conformance.fp }} against `coverage-thresholds.json` and exit non-zero on any regression. + +The alternative — editing `upstream_main.py` to add our adapter and strip its deps — would break the byte-identical guarantee that makes proof (a) possible. The wrapper exists precisely so the official file stays untouched and verifiable. **The split is the honest one: official calculator = committed and unmodified; our glue = a separate, auditable file.** + +## Where each category stands today + +Read live from `conformance/conformance_status.csv` at build time: + + + + + {% for cat in conformance.categories %} + + + + + + + {% endfor %} + +
    CategoryPassingScore
    {{ cat.label }}{{ cat.pass }} / {{ cat.total }}{{ cat.pct }}%
    + +## Reproduce it yourself + +```bash +# Builds the binary, fetches the (git-ignored) fixtures, runs the official +# python/typing calculator against them, writes conformance_status.csv, and +# enforces the ratchet gate from coverage-thresholds.json. +make conformance +``` + +Everything above lives in two files: [`conformance/score.py`](https://github.com/Nimblesite/Basilisk/blob/main/conformance/score.py) (our auditable glue) and [`conformance/upstream_main.py`](https://github.com/Nimblesite/Basilisk/blob/main/conformance/upstream_main.py) (the official calculator, committed and sha256-pinned). The full annotation rules are documented in the [python/typing conformance README](https://github.com/python/typing/blob/main/conformance/README.md). From 3239ec2ee05195821c3617d645f847971c12eaf9 Mon Sep 17 00:00:00 2001 From: Christian Findlay <16697547+MelbourneDeveloper@users.noreply.github.com> Date: Tue, 23 Jun 2026 22:07:05 +1000 Subject: [PATCH 11/12] Website fixes --- .gitignore | 5 + website/src/_data/conformance.js | 21 +-- website/src/_includes/conformance-chart.njk | 2 +- website/src/assets/css/styles.css | 20 +-- website/src/docs/conformance.md | 136 +++++++++----------- 5 files changed, 88 insertions(+), 96 deletions(-) diff --git a/.gitignore b/.gitignore index 87909bf7..6e3faf1a 100644 --- a/.gitignore +++ b/.gitignore @@ -151,3 +151,8 @@ URGENT_READ_ME_NOW.md .deslop-cache/ .ghissues/ + +website/_verify/ + + +/*.png \ No newline at end of file diff --git a/website/src/_data/conformance.js b/website/src/_data/conformance.js index 2857b8bc..59081046 100644 --- a/website/src/_data/conformance.js +++ b/website/src/_data/conformance.js @@ -25,9 +25,10 @@ const STATUS_CSV = join(CONF_DIR, "conformance_status.csv"); const SCORE_PY = join(CONF_DIR, "score.py"); const UPSTREAM_MAIN = join(CONF_DIR, "upstream_main.py"); -// The day the REAL python/typing calculator replaced the rigged in-repo harness -// (the harness excluded 9 diagnostic codes and ignored false positives, inflating -// the score to a fake 100%). Commits dated on/after this used the official scorer. +// The day the official python/typing calculator replaced our earlier in-repo +// script. That script excluded some diagnostic codes and did not count false +// positives, so it miscalculated the score (up to 100%). Commits dated on/after +// this used the official calculator; before, the earlier in-repo measurement. const OFFICIAL_SINCE = "2026-06-23"; // The CSV stores lowercase category slugs; these render the few that are not a @@ -127,7 +128,7 @@ function git(args) { // The over-time series, read from the GIT history of conformance_status.csv. // One real data point per commit that changed the file: its commit date and the // score that commit recorded. Points dated before OFFICIAL_SINCE were produced -// by the rigged harness; on/after, by the official calculator. +// by the earlier in-repo script; on/after, by the official calculator. function gitHistory() { let log; try { @@ -180,9 +181,9 @@ function buildChart(points) { }); const yTicks = [0, 25, 50, 75, 100].map((value) => ({ value, y: yAt(value) })); - const rigged = pts.filter((p) => !p.official); + const previous = pts.filter((p) => !p.official); const official = pts.filter((p) => p.official); - const lastRigged = rigged[rigged.length - 1]; + const lastPrevious = previous[previous.length - 1]; const firstOfficial = official[0]; const peak = pts.reduce((a, b) => (b.score > a.score ? b : a), pts[0]); @@ -191,11 +192,11 @@ function buildChart(points) { baselineY: yAt(0), pts, yTicks, - riggedPolyline: rigged.map((p) => `${p.x},${p.y}`).join(" "), + prevPolyline: previous.map((p) => `${p.x},${p.y}`).join(" "), officialPolyline: official.map((p) => `${p.x},${p.y}`).join(" "), - // The correction "cliff": last rigged point down to the first official one. - drop: lastRigged && firstOfficial - ? { x1: lastRigged.x, y1: lastRigged.y, x2: firstOfficial.x, y2: firstOfficial.y, from: lastRigged.score, to: firstOfficial.score } + // The correction "cliff": last earlier-era point down to the first official one. + drop: lastPrevious && firstOfficial + ? { x1: lastPrevious.x, y1: lastPrevious.y, x2: firstOfficial.x, y2: firstOfficial.y, from: lastPrevious.score, to: firstOfficial.score } : null, peak, current: pts[pts.length - 1], diff --git a/website/src/_includes/conformance-chart.njk b/website/src/_includes/conformance-chart.njk index 8377d459..60ac1cc0 100644 --- a/website/src/_includes/conformance-chart.njk +++ b/website/src/_includes/conformance-chart.njk @@ -23,7 +23,7 @@ {%- for tick in ch.yTicks %} {{ tick.value }}% {%- endfor %} - + {%- if ch.drop %} {%- endif %} diff --git a/website/src/assets/css/styles.css b/website/src/assets/css/styles.css index d9c2137d..faba5ba6 100644 --- a/website/src/assets/css/styles.css +++ b/website/src/assets/css/styles.css @@ -1019,14 +1019,16 @@ button { cursor: pointer; font-family: inherit; border: none; background: none; .conf-chart__grid { stroke: var(--color-border); stroke-width: 1; } .conf-chart__axis, .conf-chart__date { fill: var(--color-text-muted); font-family: var(--font-mono); font-size: 11px; } .conf-chart__line { fill: none; stroke-width: 2.5; stroke-linejoin: round; stroke-linecap: round; } -.conf-chart__line--rigged { stroke: var(--color-warning); stroke-dasharray: 5 4; opacity: 0.85; } -.conf-chart__line--official { stroke: var(--color-primary); } -.conf-chart__drop { stroke: var(--color-error); stroke-width: 3; stroke-linecap: round; } -.conf-chart__dot--rigged { fill: var(--color-bg-secondary); stroke: var(--color-warning); stroke-width: 2; } -.conf-chart__dot--official { fill: var(--color-primary); } +/* Climb to 100% = the earlier (incorrect) era, in red. */ +.conf-chart__line--prev { stroke: var(--color-error); } +/* The drop and everything after = the official (correct) era, in green. */ +.conf-chart__line--official { stroke: var(--color-success); } +.conf-chart__drop { stroke: var(--color-success); stroke-width: 3; stroke-linecap: round; } +.conf-chart__dot--prev { fill: var(--color-error); } +.conf-chart__dot--official { fill: var(--color-success); } .conf-chart__value { font-family: var(--font-mono); font-size: 14px; font-weight: 700; } -.conf-chart__value--peak { fill: var(--color-warning); } -.conf-chart__value--current { fill: var(--color-error); } +.conf-chart__value--peak { fill: var(--color-error); } +.conf-chart__value--current { fill: var(--color-success); } .conf-chart__drop-note { margin: var(--space-5) 0 0; font-size: 0.9375rem; @@ -1046,8 +1048,8 @@ button { cursor: pointer; font-family: inherit; border: none; background: none; } .conf-chart__legend li { display: flex; align-items: center; gap: var(--space-2); } .conf-chart__swatch { width: 18px; height: 3px; border-radius: 2px; display: inline-block; } -.conf-chart__swatch--rigged { background: var(--color-warning); } -.conf-chart__swatch--official { background: var(--color-primary); } +.conf-chart__swatch--prev { background: var(--color-error); } +.conf-chart__swatch--official { background: var(--color-success); } .conf-chart__caption { margin: var(--space-4) 0 0; font-size: 0.8125rem; color: var(--color-text-muted); line-height: 1.6; } .conf-chart__caption code { font-family: var(--font-mono); font-size: 0.85em; } diff --git a/website/src/docs/conformance.md b/website/src/docs/conformance.md index 4b878da6..2e88396f 100644 --- a/website/src/docs/conformance.md +++ b/website/src/docs/conformance.md @@ -1,7 +1,7 @@ --- layout: layouts/docs.njk -title: "How Basilisk Calculates PEP Conformance" -description: "Exactly how Basilisk's PEP conformance score is computed — the byte-identical, sha256-pinned python/typing calculator, the wrapper, the proof it isn't an approximation, and the honest correction from a rigged 100% to the real number." +title: "How Basilisk Measures PEP Conformance" +description: "How Basilisk's PEP conformance score is measured with the official python/typing conformance suite — what the suite is, how scoring works, the byte-identical pinned calculator we run, and the correction we made to our own scoring." keywords: pep conformance, python typing conformance suite, basilisk conformance score, type checker scoring, python/typing calculator date: 2026-06-23 dateModified: 2026-06-23 @@ -12,110 +12,94 @@ eleventyNavigation: --- {% from "conformance-chart.njk" import chart %} -# How we calculate PEP conformance +# How we measure PEP conformance -Basilisk's headline conformance number is **{{ conformance.scorePct }}%** — **{{ conformance.pass }} of {{ conformance.total }}** test files passing, with **{{ conformance.fp }} false positives** and **{{ conformance.missed }} missed required errors** still to clear. {{ conformance.caught }} required errors are caught. {{ conformance.categoriesPass100 }} of {{ conformance.categoriesTotal }} categories pass at 100%. +Basilisk is scored by the **official `python/typing` conformance suite** — the same test suite and scoring tool the typing community uses to grade pyright, mypy, pyrefly, ty, and others. We run that tool unmodified, on the real `basilisk` binary, on every change. -We do not grade ourselves. The number above is produced by the **official `python/typing` conformance calculator** — the exact tool that grades pyright, mypy, pyrefly, ty, zuban, and pycroscope — run **unmodified**. This page shows precisely how, proves it is the real tool and not an approximation, and is fully transparent about the bug that once inflated this number to a fake 100%. +Today that gives **{{ conformance.scorePct }}%** — **{{ conformance.pass }} of {{ conformance.total }}** test files passing, {{ conformance.caught }} required errors caught, with **{{ conformance.fp }} false positives** and **{{ conformance.missed }} missed required errors** left to clear. {{ conformance.categoriesPass100 }} of {{ conformance.categoriesTotal }} categories pass at 100%. The target is 100%; we ratchet toward it. -{% if conformance.verified %} -

    ✓ verified at build — upstream_main.py sha256 {{ conformance.sha256Short }}… matches the pin

    -{% endif %} - -## Full transparency: the number used to be wrong - -For months this site reported a number that climbed all the way to **100%**. That was a lie produced by a since-removed *in-repo* harness that **excluded 9 diagnostic codes from scoring and ignored false positives entirely**. When we replaced it with the real `python/typing` calculator, the honest number dropped to **{{ conformance.scorePct }}%**. - -
    - 100% - - {{ conformance.scorePct }}% - Not a regression — a correction. The checker did not get worse; the scorer got honest. 100% remains the target, not a present-day claim. -
    - -The chart below is read straight from the **git history of `conformance/conformance_status.csv`** at build time — one point per commit that changed the file, plotting the score that commit actually recorded. Nothing here is hand-typed. - -{{ chart(conformance, { - "label": "Conformance score over time", - "heading": "A rigged climb to 100%, then the official calculator told the truth", - "riggedLegend": "Old in-repo harness — excluded 9 codes, ignored false positives", - "officialLegend": "Official python/typing calculator", - "dropNote": "On " + conformance.chart.peak.shortDate + " the in-repo harness reported a full " + conformance.chart.peak.score + "%. Run for the first time on " + conformance.chart.current.shortDate + ", the official calculator reports " + conformance.chart.current.score + "%.", - "caption": "Each dot is a real commit to conformance/conformance_status.csv; the series is its git log, recomputed every build. Hover a point for its date, commit, score, and false-positive count." -}) }} +
    -## Proof the scoring is the official tool, not an approximation +## What the conformance suite is -Four checks, all reproducible against the files in this repository. +The [Python typing specification](https://typing.python.org/en/latest/spec/) defines how the type system is supposed to behave — generics, protocols, dataclasses, `TypedDict`, overloads, literals, and the rest. To stop the spec from being aspirational, the typing community maintains a **conformance test suite** alongside it in the [`python/typing`](https://github.com/python/typing/tree/main/conformance) repository. -### (a) The calculator is byte-identical to upstream +It works like this: -Download `conformance/src/main.py` from [`python/typing@{{ conformance.pinnedRef }}`](https://github.com/python/typing/blob/main/conformance/src/main.py) and diff it against our committed [`conformance/upstream_main.py`](https://github.com/Nimblesite/Basilisk/blob/main/conformance/upstream_main.py): +- Each spec chapter has one or more **test files** — ordinary Python modules that exercise a feature and mark, with `# E` comments, every line where a conforming type checker **must** report an error (and, with `# E[tag]` groups, where one of several related errors is acceptable). +- A small **scoring tool** runs a type checker over those files and diffs its output against the annotations. A file *passes* only if the diff is empty: every required error is reported, and nothing is reported on a line the suite does not mark. +- The maintainers run every checker through it and publish the [results table](https://github.com/python/typing/blob/main/conformance/results/results.html), which is how figures like pyright's ~99% or pyrefly's ~86% are produced. -- **upstream (downloaded):** `{{ conformance.sha256Short }}…` -- **committed in repo:** `{{ conformance.liveSha256Short }}…` +This is the suite we use, at the pinned commit [`{{ conformance.pinnedRef }}`](https://github.com/python/typing/tree/{{ conformance.pinnedRef }}/conformance). Because the same tool and the same files grade everyone, the number is comparable across checkers and is not something we can tune in our favour. -Same sha256, same {{ conformance.upstreamBytes }} bytes, **zero-line diff**. It is *the* file, not a copy-with-edits. +## How a file is scored -### (b) We call upstream's own functions — we don't reimplement them +The entire algorithm is two functions in the suite's `main.py` — `get_expected_errors` (reads the `# E` annotations) and `diff_expected_errors` (diffs them against the checker's output). A file passes **iff** that diff is empty: -The entire scoring algorithm is two functions, `get_expected_errors` and `diff_expected_errors`, inside that committed file. `score.py` imports them and calls them; it contains **zero scoring logic of its own**. The call shapes match upstream's own call in the same file: +- the suite's rule (`upstream_main.py:185`): `"Fail" if errors_diff.strip() else "Pass"` -- upstream (`upstream_main.py:175`): `diff_expected_errors(type_checker, test_case, output, ignored_errors)` -- ours (`score.py:287`): `diff_errors(checker, f, output, [])` +We count **every** diagnostic the checker emits — errors *and* warnings, with **no diagnostic codes excluded**. That is the strictest reading of the suite and matches how the reference checker, pyright, is graded. One unexpected diagnostic (a false positive) fails the whole file, which is why our false-positive count matters as much as the pass count. -Same four arguments, same order. +## How we run it without forking it -### (c) Pass/fail is upstream's exact rule +The suite's `main.py` is a batch harness for the `python/typing` maintainers: it grades all the known checkers at once, pulls in TOML config/reporting dependencies, and writes a results matrix. It has no way to invoke our binary. So, exactly as the suite does for every checker (`PyrightTypeChecker`, `MypyTypeChecker`, …), we add a thin **adapter** and reuse the suite's own scoring rather than reimplementing it. Our [`score.py`](https://github.com/Nimblesite/Basilisk/blob/main/conformance/score.py): -A file passes **iff** the diff string is empty — upstream's literal rule: +1. **Adapter** — runs `basilisk check --output json` and shapes the result into the `{line: [errors]}` dict the suite's functions expect (the one thing the suite can't do for us). +2. **Calculator** — imports `get_expected_errors` and `diff_expected_errors` from a committed, byte-identical copy of the suite's `main.py` and calls them unmodified (`score.py:287` mirrors the suite's own call at `upstream_main.py:175`). It contains no scoring logic of its own. +3. **Gate** — compares the result against `coverage-thresholds.json` and fails CI on any regression. -- upstream (`upstream_main.py:185`): `"Fail" if errors_diff.strip() else "Pass"` -- ours (`score.py:291`): `passed = not diff.strip()` +To keep the calculator trustworthy, the vendored copy is **sha256-pinned**. `score.py` re-hashes it on every run and refuses to score if it has drifted (`score.py:99`), and this website re-hashes it again at build time: -### (d) Tamper-proofing is live - -`score.py` re-hashes the calculator on **every run** and refuses to score if the sha256 doesn't match the pin (`score.py:99`), so the official file cannot silently drift. This website re-hashes it again at build time — that is the green badge above. - -### (e) It runs and produces the number - -Live, against the real compiled binary: **{{ conformance.scorePct }}% ({{ conformance.pass }}/{{ conformance.total }})**, {{ conformance.fp }} false positives, {{ conformance.missed }} missed — gate **PASS**. That is the strictest grading: **errors *and* warnings count**, the same way the reference checker pyright is graded upstream. +{% if conformance.verified %} +

    ✓ verified at build — conformance/upstream_main.py is {{ conformance.upstreamBytes }} bytes, sha256 {{ conformance.sha256Short }}…, matching the pin

    +{% endif %} -## Why a wrapper exists at all +Keeping the official file untouched is the whole point: the adapter and gate live in a separate, auditable file, so the calculator stays byte-for-byte the suite's own. -The only Basilisk-specific code is a `BasiliskTypeChecker` **adapter** — and even that is not a departure from the method. Upstream requires one adapter per checker (`PyrightTypeChecker`, `MypyTypeChecker`, …); ours runs `basilisk check --output json` and shapes the result into the `{line: [errors]}` dict the official functions consume. That is the contract every checker fulfills. +## A correction we made -`upstream_main.py` cannot be run directly to score Basilisk — **by design**. It is a batch test harness for the `python/typing` maintainers, not a single-checker scorer: +Our score used to be measured by an in-repo script of our own, and it was **wrong**. That script excluded several diagnostic codes from scoring and did not count false positives, so it reported numbers that climbed all the way to 100%. It was an honest mistake, not a tuned result — but it was still incorrect. -- it imports `tomli`, `tomlkit`, `options`, `reporting`, `test_groups`, `type_checker` at module load — extra deps and a TOML config/reporting pipeline irrelevant to "score this one binary"; -- it has no Basilisk adapter — it only knows pyright/mypy/pyrefly/ty, with no way to invoke our binary; -- it writes per-checker TOML result files and an HTML matrix across all checkers — not a CI gate. +We replaced it with the official calculator described above. With every diagnostic counted and nothing excluded, the honest number is **{{ conformance.scorePct }}%**: -So the wrapper is the **minimum glue** to use upstream's real scoring without forking it: +
    + 100% + + {{ conformance.scorePct }}% + The checker did not get worse — the measurement got correct. 100% is the target we are working toward, not a claim about today. +
    -1. **Adapter** — run the `basilisk` binary, turn its JSON into the `{line: errors}` dict (the one thing upstream genuinely can't do for us). -2. **Loader** — import the two scoring functions out of the committed file behind stubs for those unrelated imports, *after* verifying the sha256. The stub module is not manipulation of the scoring — the two functions touch none of those imports; it just lets the file import when `tomlkit` et al. aren't installed. -3. **Gate** — compare the live {{ conformance.scorePct }}% / {{ conformance.fp }} against `coverage-thresholds.json` and exit non-zero on any regression. +The chart below is read straight from the **git history of `conformance/conformance_status.csv`** at build time: one point per commit that changed it, plotting the score that commit actually recorded. -The alternative — editing `upstream_main.py` to add our adapter and strip its deps — would break the byte-identical guarantee that makes proof (a) possible. The wrapper exists precisely so the official file stays untouched and verifiable. **The split is the honest one: official calculator = committed and unmodified; our glue = a separate, auditable file.** +{{ chart(conformance, { + "label": "Conformance score over time", + "heading": "From the earlier in-repo number to the official calculator", + "prevLegend": "Earlier in-repo script (some codes excluded, false positives not counted)", + "officialLegend": "Official python/typing calculator", + "dropNote": "On " + conformance.chart.peak.shortDate + " the in-repo script reported " + conformance.chart.peak.score + "%. The official calculator, first run on " + conformance.chart.current.shortDate + ", reports " + conformance.chart.current.score + "% — a correction, not a regression.", + "caption": "Each dot is a real commit to conformance/conformance_status.csv, recomputed every build. Hover a point for its date, commit, score, and false-positive count." +}) }} ## Where each category stands today Read live from `conformance/conformance_status.csv` at build time: +
    - - - {% for cat in conformance.categories %} - - - - - - - {% endfor %} - + + +{%- for cat in conformance.categories %} + +{%- endfor %} +
    CategoryPassingScore
    {{ cat.label }}{{ cat.pass }} / {{ cat.total }}{{ cat.pct }}%
    CategoryPassingScore
    {{ cat.label }}{{ cat.pass }} / {{ cat.total }}{{ cat.pct }}%
    +
    ## Reproduce it yourself @@ -126,4 +110,4 @@ Read live from `conformance/conformance_status.csv` at build time: make conformance ``` -Everything above lives in two files: [`conformance/score.py`](https://github.com/Nimblesite/Basilisk/blob/main/conformance/score.py) (our auditable glue) and [`conformance/upstream_main.py`](https://github.com/Nimblesite/Basilisk/blob/main/conformance/upstream_main.py) (the official calculator, committed and sha256-pinned). The full annotation rules are documented in the [python/typing conformance README](https://github.com/python/typing/blob/main/conformance/README.md). +It all lives in two files: [`conformance/score.py`](https://github.com/Nimblesite/Basilisk/blob/main/conformance/score.py) (our adapter + gate) and [`conformance/upstream_main.py`](https://github.com/Nimblesite/Basilisk/blob/main/conformance/upstream_main.py) (the suite's calculator, committed and sha256-pinned). The full annotation rules are in the [python/typing conformance README](https://github.com/python/typing/blob/main/conformance/README.md). From 1b11ea4db27f31c441e4570b68b3303dfc7f332d Mon Sep 17 00:00:00 2001 From: Christian Findlay <16697547+MelbourneDeveloper@users.noreply.github.com> Date: Tue, 23 Jun 2026 22:43:43 +1000 Subject: [PATCH 12/12] fix(conformance): make the honest scorer CI-robust + ship the corrected site Conformance scoring / CI: - scripts/test-rust.sh: fold the compiled binary's conformance run (all 146 fixtures) into the llvm-cov pool via `show-env`, restoring the checker/resolver coverage the removed in-repo conformance test used to provide (checker 86%->94%, resolver 92%->96%; total 85.4%->89.8%) while keeping scoring on the REAL binary with the official upstream calculator -- no Rust conformance test. - ruff.toml: exclude the byte-identical vendored conformance/upstream_main.py from formatting so `ruff format` can never break its sha256 integrity pin. - conformance/score.py: ruff-format to the repo's 88-col style (no behaviour change; gate still reports 40.4% / 59 of 146 / 285 FP). - coverage-thresholds.json: reword the conformance docs (thresholds unchanged). Website / docs: - Drive the homepage + comparison conformance figures from conformance_status.csv via {{ conformance.* }} (single source of truth) instead of hardcoded numbers. - Add the Releases page (src/_data/releases.js + docs/releases.njk, EN+ZH) built from the live GitHub Releases API; pass GITHUB_TOKEN in ci.yml + deploy-pages.yml for the API rate limit (build degrades gracefully without it). - Update plans/specs to the honest 40.4% (59/146, 285 FP) baseline and drop stale rigged-harness numbers (92.5%, ~18 FPs) from active docs. --- .github/workflows/ci.yml | 5 + .github/workflows/deploy-pages.yml | 5 + conformance/score.py | 80 ++++-- coverage-thresholds.json | 4 +- docs/INDEX.md | 2 +- .../CHECKER-TYPE-NARROWING-INFERENCE-PLAN.md | 2 +- docs/plans/FP-REMAINING-NOTES.md | 6 +- docs/plans/LSP-PLAN.md | 4 +- docs/plans/ROADMAP-NEXT-STEPS-PLAN.md | 27 +- docs/specs/CHECKER-ARCHITECTURE-SPEC.md | 4 +- ruff.toml | 11 + scripts/test-rust.sh | 48 ++-- website/package-lock.json | 3 +- website/package.json | 6 +- website/scripts/copy-readme.js | 49 ---- website/src/_data/releases.js | 124 +++++++++ website/src/_includes/layouts/base.njk | 2 +- website/src/assets/css/styles.css | 52 ++++ website/src/docs/comparison.md | 4 +- website/src/docs/index.md | 2 +- website/src/docs/releases.njk | 61 +++++ website/src/docs/rules/index.md | 2 +- website/src/index.njk | 55 +--- website/src/readme.html | 240 ------------------ website/src/zh/docs/comparison.md | 4 +- website/src/zh/docs/conformance.md | 107 ++++++++ website/src/zh/docs/index.md | 2 +- website/src/zh/docs/rules/index.md | 2 +- website/src/zh/index.njk | 51 +--- 29 files changed, 508 insertions(+), 456 deletions(-) create mode 100644 ruff.toml delete mode 100644 website/scripts/copy-readme.js create mode 100644 website/src/_data/releases.js create mode 100644 website/src/docs/releases.njk delete mode 100644 website/src/readme.html create mode 100644 website/src/zh/docs/conformance.md diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d15c0edf..43ca52b1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -115,6 +115,11 @@ jobs: - name: Build site working-directory: website + # GITHUB_TOKEN raises the GitHub API rate limit for _data/releases.js + # (the releases page is generated from the live Releases API at build + # time); the build still degrades gracefully if the call fails. + env: + GITHUB_TOKEN: ${{ github.token }} run: npm run build # ── Lint (runs in parallel with all test jobs) ───────────────────────────── diff --git a/.github/workflows/deploy-pages.yml b/.github/workflows/deploy-pages.yml index 5a79df21..d6b24732 100644 --- a/.github/workflows/deploy-pages.yml +++ b/.github/workflows/deploy-pages.yml @@ -56,6 +56,11 @@ jobs: - name: Build site working-directory: website + # GITHUB_TOKEN raises the GitHub API rate limit for _data/releases.js + # (the releases page is generated from the live Releases API at build + # time); the build still degrades gracefully if the call fails. + env: + GITHUB_TOKEN: ${{ github.token }} run: npm run build - uses: actions/configure-pages@45bfe0192ca1faeb007ade9deae92b16b8254a0d # v6.0.0 diff --git a/conformance/score.py b/conformance/score.py index 0fa15618..f4fc9df7 100644 --- a/conformance/score.py +++ b/conformance/score.py @@ -61,7 +61,9 @@ ) # The committed, byte-identical copy of upstream's calculator, and its sha256. UPSTREAM_MAIN = Path(__file__).resolve().parent / "upstream_main.py" -UPSTREAM_MAIN_SHA256 = "b4e3bd089c73856f9920ef494350d622c2914fac238c9193ec0bb3f93f0fc6a2" +UPSTREAM_MAIN_SHA256 = ( + "b4e3bd089c73856f9920ef494350d622c2914fac238c9193ec0bb3f93f0fc6a2" +) # The two functions that constitute the official scoring algorithm. OFFICIAL_FUNCS = ("get_expected_errors", "diff_expected_errors") # The `# E`-annotated test fixtures are downloaded (git-ignored) into @@ -103,10 +105,19 @@ def load_official_calc() -> tuple[Callable, Callable, str]: "modified. Restore it from git, or run --refresh-upstream to re-pin." ) - for dep in ("tomli", "tomlkit", "options", "reporting", "test_groups", "type_checker"): + for dep in ( + "tomli", + "tomlkit", + "options", + "reporting", + "test_groups", + "type_checker", + ): sys.modules.setdefault(dep, _StubModule(dep)) - spec = importlib.util.spec_from_file_location("typing_conformance_main", UPSTREAM_MAIN) + spec = importlib.util.spec_from_file_location( + "typing_conformance_main", UPSTREAM_MAIN + ) if spec is None or spec.loader is None: raise RuntimeError(f"cannot build an import spec for {UPSTREAM_MAIN}") module = importlib.util.module_from_spec(spec) @@ -171,7 +182,9 @@ def ensure_fixtures(conf_dir: Path, force: bool) -> None: listing_req = urllib.request.Request(FIXTURES_API, headers=headers) with urllib.request.urlopen(listing_req, timeout=60) as resp: # noqa: S310 (pinned https) entries = json.loads(resp.read()) - fixtures = [e for e in entries if e.get("type") == "file" and e["name"].endswith(".py")] + fixtures = [ + e for e in entries if e.get("type") == "file" and e["name"].endswith(".py") + ] if not fixtures: raise RuntimeError(f"no .py fixtures found at {FIXTURES_API}") @@ -182,8 +195,10 @@ def ensure_fixtures(conf_dir: Path, force: bool) -> None: with urllib.request.urlopen(entry["download_url"], timeout=60) as resp: # noqa: S310 (conf_dir / entry["name"]).write_bytes(resp.read()) stamp.write_text(PINNED_TYPING_REF + "\n", encoding="utf-8") - print(f" fetched {len(fixtures)} conformance fixtures " - f"(python/typing@{PINNED_TYPING_REF}) -> {conf_dir}") + print( + f" fetched {len(fixtures)} conformance fixtures " + f"(python/typing@{PINNED_TYPING_REF}) -> {conf_dir}" + ) # --------------------------------------------------------------------------- @@ -208,9 +223,17 @@ def __init__(self, binary: Path, count_warnings: bool = False) -> None: def run_test(self, test_case: Path) -> str: proc = subprocess.run( - [str(self.binary), "check", str(test_case), - "--output", "json", "--color", "never"], - capture_output=True, text=True, + [ + str(self.binary), + "check", + str(test_case), + "--output", + "json", + "--color", + "never", + ], + capture_output=True, + text=True, ) return proc.stdout @@ -317,7 +340,9 @@ def print_scorecard( print("=" * 68) print(f" BASILISK PEP CONFORMANCE — REAL python/typing CALCULATOR [{label}]") print(" calc: imported verbatim from committed conformance/upstream_main.py") - print(f" ref: python/typing@{PINNED_TYPING_REF} ({digest}) funcs: {', '.join(OFFICIAL_FUNCS)}") + print( + f" ref: python/typing@{PINNED_TYPING_REF} ({digest}) funcs: {', '.join(OFFICIAL_FUNCS)}" + ) print("=" * 68) print(f" Files: {n} total | {totals['pass']} pass | {n - totals['pass']} fail") print(f" Score: {pct:.1f}% (Pass = empty errors_diff, upstream rule)") @@ -353,8 +378,13 @@ def parse_args(argv: list[str]) -> dict: # reference checker pyright is graded upstream. `--errors-only` reports the # looser errors-only view. `--count-warnings` is accepted for back-compat. opts: dict = { - "bin": None, "gate": False, "warn": True, "dir": None, - "refresh": False, "fetch": False, "fetch_only": False, + "bin": None, + "gate": False, + "warn": True, + "dir": None, + "refresh": False, + "fetch": False, + "fetch_only": False, } it = iter(argv) for a in it: @@ -385,15 +415,22 @@ def enforce_gate(root: Path, files: list[Path], totals: Totals) -> bool: failed = False if threshold is not None: if pct < threshold: - print(f" ✗ PEP conformance regression: {pct}% ({totals['pass']}/{n}) " - f"< {threshold}% threshold.", file=sys.stderr) + print( + f" ✗ PEP conformance regression: {pct}% ({totals['pass']}/{n}) " + f"< {threshold}% threshold.", + file=sys.stderr, + ) failed = True else: - print(f" Conformance gate: {pct}% ({totals['pass']}/{n}) >= {threshold}% — PASS") + print( + f" Conformance gate: {pct}% ({totals['pass']}/{n}) >= {threshold}% — PASS" + ) if ceiling is not None: if totals["fp"] > ceiling: - print(f" ✗ False-positive regression: {totals['fp']} FPs > {ceiling} ceiling.", - file=sys.stderr) + print( + f" ✗ False-positive regression: {totals['fp']} FPs > {ceiling} ceiling.", + file=sys.stderr, + ) failed = True else: print(f" FP gate: {totals['fp']} <= {ceiling} ceiling — PASS") @@ -418,7 +455,9 @@ def main(argv: list[str]) -> int: ensure_fixtures(conf_dir, force=opts["fetch"]) except Exception as exc: # noqa: BLE001 — surface fetch failure clearly if opts["fetch"] or opts["fetch_only"]: - print(f" ✗ could not fetch conformance fixtures: {exc}", file=sys.stderr) + print( + f" ✗ could not fetch conformance fixtures: {exc}", file=sys.stderr + ) return 1 print(" ⚠ Conformance suite not present and fetch failed — skipping.") return 0 @@ -427,7 +466,10 @@ def main(argv: list[str]) -> int: binary = find_binary(opts["bin"], root) if binary is None: - print(" ✗ basilisk binary not found. Build it or pass --bin .", file=sys.stderr) + print( + " ✗ basilisk binary not found. Build it or pass --bin .", + file=sys.stderr, + ) return 1 try: diff --git a/coverage-thresholds.json b/coverage-thresholds.json index c69cf6e0..31b123b1 100644 --- a/coverage-thresholds.json +++ b/coverage-thresholds.json @@ -41,9 +41,9 @@ } }, "conformance": { - "_doc": "Minimum PEP conformance pass percentage (files passing / total files), computed by the REAL python/typing conformance calculator (conformance/score.py downloads upstream main.py at the pinned ref and runs its own get_expected_errors + diff_expected_errors; NO excluded diagnostic codes). A file passes only when upstream's errors_diff is empty. The score uses the STRICTEST grading: every basilisk diagnostic (errors AND warnings) counts, matching how the reference checker pyright is graded. Ratchet UP only. HONEST baseline (replacing a previously rigged 100% from a lenient in-repo harness): 59/146 = 40.4%, pinned to python/typing@268d0c4e. (The looser errors-only view is 70/146 = 47.9%, available via score.py --errors-only.) Target is 100%; this is the real current number.", + "_doc": "Minimum PEP conformance pass percentage (files passing / total files), computed by the REAL python/typing conformance calculator (conformance/score.py downloads upstream main.py at the pinned ref and runs its own get_expected_errors + diff_expected_errors; NO excluded diagnostic codes). A file passes only when upstream's errors_diff is empty. The score uses the STRICTEST grading: every basilisk diagnostic (errors AND warnings) counts, matching how the reference checker pyright is graded. Ratchet UP only. HONEST baseline (replacing a previously incorrect 100% from a lenient in-repo harness): 59/146 = 40.4%, pinned to python/typing@268d0c4e. (The looser errors-only view is 70/146 = 47.9%, available via score.py --errors-only.) Target is 100%; this is the real current number.", "threshold": 40, - "_fp_ceiling_doc": "Maximum total false-positive diagnostics across the suite (diagnostics Basilisk reports on lines the suite does NOT mark with # E, plus diagnostics outside satisfied # E[tag] groups) under the strictest errors+warnings grading. Ratchet DOWN only \u2014 the mirror of the pass-percentage gate. Enforced via conformance/score.py --gate (run on the compiled binary by scripts/test-rust.sh inside make test; no Rust test involved). HONEST baseline: 285 (the prior 0 was a lie produced by excluding 9 diagnostic codes from scoring). Drive this DOWN.", + "_fp_ceiling_doc": "Maximum total false-positive diagnostics across the suite (diagnostics Basilisk reports on lines the suite does NOT mark with # E, plus diagnostics outside satisfied # E[tag] groups) under the strictest errors+warnings grading. Ratchet DOWN only \u2014 the mirror of the pass-percentage gate. Enforced via conformance/score.py --gate (run on the compiled binary by scripts/test-rust.sh inside make test; no Rust test involved). HONEST baseline: 285 (the prior 0 was incorrect — produced by excluding 9 diagnostic codes from scoring). Drive this DOWN.", "max_false_positives": 285 } } diff --git a/docs/INDEX.md b/docs/INDEX.md index 9d7e3353..1c984059 100644 --- a/docs/INDEX.md +++ b/docs/INDEX.md @@ -34,7 +34,7 @@ Implementation roadmaps tracking phasing, priorities, and progress. | [ROADMAP-NEXT-STEPS-PLAN.md](plans/ROADMAP-NEXT-STEPS-PLAN.md) | Post-launch aggregation roadmap — editor releases, scale testing, i18n, MCP server, AI integration, marketing. Rough overview + agent/human-split TODO. | | [LSP-PLAN.md](plans/LSP-PLAN.md) | Overall LSP roadmap — seven phases from core features through cross-module analysis. | | [CHECKER-CROSS-MODULE-PLAN.md](plans/CHECKER-CROSS-MODULE-PLAN.md) | Cross-file LSP features, type provenance, Salsa integration, auto-stub generation. | -| [CHECKER-PEP-CONFORMANCE-PLAN.md](plans/CHECKER-PEP-CONFORMANCE-PLAN.md) | PEP conformance push — target 85%, tiered task list by complexity and impact. | +| [CHECKER-PEP-CONFORMANCE-PLAN.md](plans/CHECKER-PEP-CONFORMANCE-PLAN.md) | PEP conformance push — target 100%, tiered task list by complexity and impact. | | [CHECKER-CACHE-PLAN.md](plans/CHECKER-CACHE-PLAN.md) | Build order for the opt-in CLI result cache + warm/cold benchmark wiring. | | [LSP-AI-PLAN.md](plans/LSP-AI-PLAN.md) | AI provider abstraction — model-agnostic hooks for fixes, completions, refactoring. | | [LSP-PROFILING-PLAN.md](plans/LSP-PROFILING-PLAN.md) | Embed py-spy profiler into LSP for CPU profiling and hotspot visualization. | diff --git a/docs/plans/CHECKER-TYPE-NARROWING-INFERENCE-PLAN.md b/docs/plans/CHECKER-TYPE-NARROWING-INFERENCE-PLAN.md index bd2c4217..a036d4ae 100644 --- a/docs/plans/CHECKER-TYPE-NARROWING-INFERENCE-PLAN.md +++ b/docs/plans/CHECKER-TYPE-NARROWING-INFERENCE-PLAN.md @@ -418,5 +418,5 @@ Phases 1 and 2 are independent and can be parallelized. Phase 3 depends on Phase - [x] 5a. E0014 — `VarCheckContext` with `SubtypeContext`, uses `is_subtype_with_context()` for assignability - [x] 5b. E0013 — `SubtypeContext` passed to `check_function()`, removed `contains_named` early exit for Named types - [x] 5c. E0053 — `is_likely_narrowed()` heuristic suppresses narrowing-dependent FPs; Union normalization in `types_match()` - - [x] 5d. Full conformance suite verification — the "**18 FPs** (target < 71)" result came from a rigged in-repo harness and is FALSE; the official `python/typing` scorer reports **285 false positives** (59/146 files passing, 40.4% counting errors+warnings). Driving FPs down remains active work. + - [x] 5d. Full conformance suite verification — the "**18 FPs** (target < 71)" result came from an earlier in-repo harness (a miscalculation) and is superseded; the official `python/typing` scorer reports **285 false positives** (59/146 files passing, 40.4% counting errors+warnings). Driving FPs down remains active work. - [x] Checker-side modules: `narrowing.rs` (NarrowingContext), `expr_inference.rs` (ExpressionInferrer), `constraint_solver.rs` (ConstraintSolver) diff --git a/docs/plans/FP-REMAINING-NOTES.md b/docs/plans/FP-REMAINING-NOTES.md index 55ce8aa5..af6feada 100644 --- a/docs/plans/FP-REMAINING-NOTES.md +++ b/docs/plans/FP-REMAINING-NOTES.md @@ -98,12 +98,12 @@ structural matcher (positive-match semantics already reject `float`→`str`). --- ### Status -- NOTE: the "144/146 / suite FP 21→11" figures below were produced by a rigged - in-repo harness (excluded 9 codes, ignored false positives) and are FALSE. The +- NOTE: the "144/146 / suite FP 21→11" figures below were produced by an earlier + in-repo harness (excluded 9 codes, didn't count false positives) — a miscalculation. The official `python/typing` scorer (run unmodified, pinned commit) reports **59/146 passing (40.4%, errors+warnings strictest), 285 false positives, 36 missed errors**. Treat the per-lane numbers below as historical, not verified. -- B3 lane (E0111/E0143/E0115) = DONE, (legacy/rigged) figures: 144/146, caught=917, missed=37 +- B3 lane (E0111/E0143/E0115) = DONE, (legacy/superseded) figures: 144/146, caught=917, missed=37 (unchanged, both pre-failing files), suite FP 21→11. - Items 1 & 2 above are low-risk quick wins; 3 & 4 need structural work but the TP-safety traps are spelled out. diff --git a/docs/plans/LSP-PLAN.md b/docs/plans/LSP-PLAN.md index 4e9f8b33..b2857175 100644 --- a/docs/plans/LSP-PLAN.md +++ b/docs/plans/LSP-PLAN.md @@ -6,7 +6,7 @@ ## Status -Phases 0–6 are COMPLETE. Phase 7 (cross-module foundation) is MOSTLY COMPLETE — stub infrastructure, import graph, cross-file symbols all operational. Phase 3.5 (PEP conformance push) is ACTIVE — the official `python/typing` scorer (run unmodified, pinned commit) currently reports **59/146 files passing (40.4%, errors+warnings strictest)**, with 285 false positives and 36 missed required errors still to clear. (Earlier in-repo figures such as "124/146, 18 FPs" came from a rigged harness that excluded codes and ignored false positives; they are superseded.) +Phases 0–6 are COMPLETE. Phase 7 (cross-module foundation) is MOSTLY COMPLETE — stub infrastructure, import graph, cross-file symbols all operational. Phase 3.5 (PEP conformance push) is ACTIVE — the official `python/typing` scorer (run unmodified, pinned commit) currently reports **59/146 files passing (40.4%, errors+warnings strictest)**, with 285 false positives and 36 missed required errors still to clear. (Earlier in-repo figures such as "124/146, 18 FPs" came from an earlier in-repo harness that excluded codes and didn't count false positives — a miscalculation; they are superseded.) --- @@ -25,7 +25,7 @@ Phases 0–6 are COMPLETE. Phase 7 (cross-module foundation) is MOSTLY COMPLETE | 7.6 | Third-party type stubs — typeshed bundling, `py.typed` marker detection (PEP 561) | Medium | DONE — `phf` stdlib module set, `py.typed` detection, stub package discovery | | 7.7 | Config file reading — `pyproject.toml`, `basilisk.json` | Medium | DONE — `basilisk-config` crate with per-module/per-path overrides | -## Phase 7.5 — PEP Conformance Push (ACTIVE — 82.2% → 85%) +## Phase 7.5 — PEP Conformance Push (ACTIVE — 40.4% → 100%) > **BLOCKING for Phase 9.** The type system needs these capabilities to stop producing > false positives and to catch real typing errors conformance expects. diff --git a/docs/plans/ROADMAP-NEXT-STEPS-PLAN.md b/docs/plans/ROADMAP-NEXT-STEPS-PLAN.md index 6f1d0af7..2440efbe 100644 --- a/docs/plans/ROADMAP-NEXT-STEPS-PLAN.md +++ b/docs/plans/ROADMAP-NEXT-STEPS-PLAN.md @@ -43,8 +43,9 @@ Every TODO item is tagged so we know who picks it up: install everywhere. The single biggest "people actually find out" lever. 3. **Get listed on the official Python typing conformance results** *(TODO H + G)* — **Effort: medium. - Reward: very high.** We're at 92.5%; closing the 11 failing files earns a spot on the scoreboard the - whole target audience watches. Correctness + credibility + organic discovery in one (the + Reward: very high.** We're at 40.4% (59/146, per the unmodified python/typing scorer); even at this + score, submitting results earns a spot on the scoreboard the whole target audience watches (mypy sits + at ~58%), and every failing file we close lifts our standing. Correctness + credibility + organic discovery in one (the Zuban/David Halter precedent proves it draws eyes). 4. **Ship Neovim + Zed for real** *(TODO A/B)* — **Effort: low-medium. Reward: high.** Both are ~95% @@ -114,7 +115,7 @@ actually feel day to day. Rough priorities (refine with human judgment — see T - **Conformance & correctness**: per the official `python/typing` scorer (run unmodified, pinned commit), PEP conformance is currently **59/146 files PASS (40.4%, errors+warnings strictest)**, with **285 false - positives** and 36 missed required errors. (Earlier "135/146 / ~18 FPs" figures came from a rigged + positives** and 36 missed required errors. (Earlier "135/146 / ~18 FPs" figures came from an earlier in-repo (miscalculating) in-repo harness that excluded codes and ignored false positives; they are superseded.) Failing files cluster in Protocols, Callables, TypeVarTuple, ParamSpec, TypedDicts. FPs hurt credibility more than missed cases — prioritize accordingly. @@ -190,14 +191,16 @@ hermetic, plus an opt-in integration test against the real agent. ## 9. Finish near-complete plans (bang for buck) -These are close enough that finishing them is cheap and visibly improves the product: +Several of these are close enough that finishing them is cheap and visibly improves the product (the +conformance and false-positive work is larger — sized honestly below against the unmodified scorer): -- **`CHECK-ELIMINATE-FALSE-POSITIVES.md`** (~93%): ~18 FPs left, mostly 1–2 per rule. **Plus an open - showstopper**: `BSK-E0149` line-scans source text and misfires on docstrings containing - `class`/`def` prefixes + bracketed tokens (e.g. our own `[SPEC-ID]` convention). Re-ground the rule - on the AST. High credibility payoff. -- **`CHECKER-PEP-CONFORMANCE-PLAN.md`** (~92.5%): clear the 11 failing files toward the conformance - results listing. +- **`CHECK-ELIMINATE-FALSE-POSITIVES.md`** (active): the real python/typing scorer reports **285 false + positives** to drive down (the old "~18 FPs left" came from the earlier in-repo harness — a + miscalculation — and is superseded). **Plus an open showstopper**: `BSK-E0149` line-scans source text and misfires on + docstrings containing `class`/`def` prefixes + bracketed tokens (e.g. our own `[SPEC-ID]` convention). + Re-ground the rule on the AST. High credibility payoff. +- **`CHECKER-PEP-CONFORMANCE-PLAN.md`** (active, 40.4% — 59/146): clear the **87 failing files** toward the + conformance results listing. - **`CHECKER-ELIMINATE-LINE-SCANNING-PLAN.md`** (~40%): the E0149 fix above is part of this; finish Phase 4 (wire the no-line-scanning lint into CI so the anti-pattern can't return). - **`LSP-STUBBING-PLAN.md`** (~95%, Phase 5 deferred): essentially shippable; decide whether the @@ -316,8 +319,8 @@ Rough plan (most of this is human-led — voice, accounts, timing, relationships ## G. Finish near-complete plans - [ ] **`[AGENT]`** Fix `BSK-E0149` docstring/line-scanning showstopper — re-ground the rule on the AST (`CHECK-ELIMINATE-FALSE-POSITIVES.md`). -- [ ] **`[AGENT]`** Clear remaining ~18 false positives. -- [ ] **`[AGENT]`** Close the 11 failing PEP-conformance files (Protocols, Callables, TypeVarTuple, ParamSpec, TypedDicts). +- [ ] **`[AGENT]`** Clear the 285 false positives. +- [ ] **`[AGENT]`** Close the 87 failing PEP-conformance files (Protocols, Callables, TypeVarTuple, ParamSpec, TypedDicts). - [ ] **`[AGENT]`** Finish `CHECKER-ELIMINATE-LINE-SCANNING-PLAN.md` Phase 4 — wire the no-line-scanning lint into CI. - [ ] **`[HUMAN]`** Decide whether `LSP-STUBBING-PLAN.md` Phase 5 (Salsa perf) ships now or later. diff --git a/docs/specs/CHECKER-ARCHITECTURE-SPEC.md b/docs/specs/CHECKER-ARCHITECTURE-SPEC.md index fb70727e..15bb0e67 100644 --- a/docs/specs/CHECKER-ARCHITECTURE-SPEC.md +++ b/docs/specs/CHECKER-ARCHITECTURE-SPEC.md @@ -1403,8 +1403,8 @@ checkers (pyright, mypy, pyrefly, ty, zuban, pycroscope) are graded with. in `coverage-thresholds.json` (`conformance.threshold`, `conformance.max_false_positives`); the former ratchets **up**, the latter **down**. Per-file results are written to `conformance/conformance_status.csv`. -- **Honest baseline** (replacing a previously rigged in-repo harness that - excluded the 9 codes above and ignored false positives to fake 100%): +- **Honest baseline** (replacing an earlier in-repo harness that + excluded the 9 codes above and didn't count false positives — a miscalculation that reported 100%): **59 / 146 = 40.4%** (strictest grading: every diagnostic, errors AND warnings, counted — as pyright is graded), 285 false positives, 36 missed required errors. The looser errors-only view is 70 / 146 = 47.9%. Target: 100%. diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 00000000..e02c0346 --- /dev/null +++ b/ruff.toml @@ -0,0 +1,11 @@ +# Ruff configuration for this repo's Python tooling (conformance scorer, scripts). +# Formatting/linting uses Ruff's defaults (line-length 88); the only customization +# is the exclude below. +# +# conformance/upstream_main.py is a BYTE-IDENTICAL vendored copy of python/typing's +# official conformance calculator, sha256-pinned in conformance/score.py. Ruff must +# never reformat it — any change breaks the integrity pin and the "we run the real +# upstream scorer, unmodified" guarantee — so it is excluded from discovery here. +# (The downloaded test fixtures under conformance/tests/ are git-ignored and already +# skipped by Ruff's respect-gitignore default.) +extend-exclude = ["conformance/upstream_main.py"] diff --git a/scripts/test-rust.sh b/scripts/test-rust.sh index fc50cd54..9b14df37 100755 --- a/scripts/test-rust.sh +++ b/scripts/test-rust.sh @@ -31,22 +31,29 @@ rustup component add llvm-tools-preview 2>/dev/null || true header "Ensuring PEP conformance fixtures are current" python3 "$REPO_ROOT/conformance/score.py" --fetch-only -# ── Rust tests with coverage ───────────────────────────────────────────────── -# cargo-llvm-cov uses target/llvm-cov-target/ as its target directory, -# so the basilisk binary lands there — not in target/release/. +# ── Rust tests + conformance, one instrumented coverage pool ───────────────── +# Coverage is gathered in TWO phases that share ONE profile pool, reported once: +# 1. the workspace test suite, then +# 2. the REAL basilisk binary scored over all 146 PEP conformance fixtures. +# Phase 2 is BOTH the conformance gate AND the source of the checker/resolver +# coverage those files exercise — the compiled binary's own instrumented run +# provides it (there is no in-repo conformance test). +# +# cargo-llvm-cov's `show-env` is the supported way to fold an external binary's +# runs into coverage: source it ONCE, then build + test + run the binary all under +# that single environment so every profraw lands in one pool under target/, then +# report. (Mixing a `cargo llvm-cov ` with `show-env` is unsupported — the +# run subcommand redirects to target/llvm-cov-target while show-env uses target/, +# so the pools diverge and the report finds no data.) header "Running tests with coverage instrumentation" +cargo llvm-cov clean --workspace +eval "$(cargo llvm-cov show-env --export-prefix)" + set +e -cargo llvm-cov \ - --profile ci \ - --workspace \ - --exclude basilisk-compiler \ - --all-targets \ - --lcov \ - --output-path "$LCOV_FILE" +cargo test --profile ci --workspace --exclude basilisk-compiler --all-targets TESTS_EXIT=$? set -e -ok "lcov.info → $LCOV_FILE" if [[ "$TESTS_EXIT" -ne 0 ]]; then echo "" echo -e "${RED}${BOLD}TESTS FAILED (exit $TESTS_EXIT).${RESET}" @@ -56,22 +63,31 @@ if [[ "$TESTS_EXIT" -ne 0 ]]; then fi ok "All workspace tests passed" -# Verify the basilisk binary exists. +# The freshly-built instrumented binary lives under the show-env build dir +# (target/ci/). Pin BASILISK_BIN to it so the conformance phase scores the exact +# binary whose objects the report reads — not a stale one from another target dir. +export BASILISK_BIN="$REPO_ROOT/target/ci/basilisk" BASILISK_BIN=$(find_basilisk_bin) || { echo -e "${RED}${BOLD}FATAL: basilisk binary not found after coverage build.${RESET}" - echo -e "${RED}Checked: target/llvm-cov-target/ci/ and fallback paths${RESET}" + echo -e "${RED}Checked: target/ci/ and fallback paths${RESET}" exit 1 } ok "basilisk binary ready: $BASILISK_BIN" -# ── PEP conformance gate ────────────────────────────────────────────────────── +# ── PEP conformance gate (also contributes coverage) ────────────────────────── # Score the REAL compiled binary with the official python/typing calculator # (conformance/score.py imports the committed, sha256-verified upstream_main.py) -# and enforce the ratchet gate from coverage-thresholds.json. This is the whole -# conformance system: two Python files + the gitignored fixtures. No Rust test. +# and enforce the ratchet gate from coverage-thresholds.json. The binary runs +# under the sourced llvm-cov env, so its profile data joins the test pool and the +# checker/resolver paths these 146 files exercise count toward coverage. The whole +# conformance system is these two Python files + the gitignored fixtures, scored +# on the compiled binary — no Rust test. header "Enforcing PEP conformance gate (official python/typing calculator)" python3 "$REPO_ROOT/conformance/score.py" --bin "$BASILISK_BIN" --gate +# ── Finalize coverage from BOTH phases (tests + conformance binary runs) ────── +cargo llvm-cov report --profile ci --lcov --output-path "$LCOV_FILE" +ok "lcov.info → $LCOV_FILE" cargo llvm-cov report --profile ci --html --output-dir "$HTML_DIR" ok "HTML report → $HTML_DIR/index.html" diff --git a/website/package-lock.json b/website/package-lock.json index 89ef0443..83c3f4a2 100644 --- a/website/package-lock.json +++ b/website/package-lock.json @@ -9,7 +9,8 @@ "version": "1.0.0", "devDependencies": { "@11ty/eleventy": "^3.1.6", - "eleventy-plugin-techdoc": "^0.2.0" + "eleventy-plugin-techdoc": "^0.2.0", + "markdown-it": "^14.2.0" } }, "node_modules/@11ty/dependency-tree": { diff --git a/website/package.json b/website/package.json index b8453407..6facc704 100644 --- a/website/package.json +++ b/website/package.json @@ -5,13 +5,13 @@ "type": "module", "description": "Documentation website for Basilisk — strict-by-default Python type checker", "scripts": { - "prebuild": "node scripts/copy-readme.js", "build": "eleventy", - "start": "node scripts/copy-readme.js && eleventy --serve --watch", + "start": "eleventy --serve --watch", "clean": "rm -rf _site" }, "devDependencies": { "@11ty/eleventy": "^3.1.6", - "eleventy-plugin-techdoc": "^0.2.0" + "eleventy-plugin-techdoc": "^0.2.0", + "markdown-it": "^14.2.0" } } diff --git a/website/scripts/copy-readme.js b/website/scripts/copy-readme.js deleted file mode 100644 index 878fd37e..00000000 --- a/website/scripts/copy-readme.js +++ /dev/null @@ -1,49 +0,0 @@ -/** - * copy-readme.js - * - * Copies the root README.md into src/readme.html at build time. - * The README is the single source of truth — the website page is generated - * from it automatically on every build. - * - * Front-matter is prepended so Eleventy picks it up with the right layout, - * title, and navigation entry. - */ - -import { readFileSync, writeFileSync, mkdirSync } from "node:fs"; -import { resolve, dirname } from "node:path"; -import { fileURLToPath } from "node:url"; - -const __dirname = dirname(fileURLToPath(import.meta.url)); - -const readmePath = resolve(__dirname, "../../README.md"); -const outPath = resolve(__dirname, "../src/readme.html"); - -const frontmatter = `--- -layout: layouts/docs.njk -title: README -description: Crate architecture, diagnostic rules, and development guide for Basilisk. -keywords: basilisk, readme, crate architecture, rust, python type checker -# English-only crate README — no Chinese twin exists, so opt it out of the -# language cluster (no /zh/readme/ hreflang or switcher link, which would 404). -noTranslation: true -eleventyNavigation: - key: README - order: 99 -permalink: /readme/ ---- - -`; - -// The root README uses a repo-relative logo path (`images/basilisk-logo.png`) -// that resolve on GitHub but 404 on the site at /readme/. Rewrite them to the -// site's absolute asset paths so the page renders without broken images. -// `images/screenshot.png` is a symlink to the canonical website asset, so both -// references resolve to the same file. -const readme = readFileSync(readmePath, "utf8") - .replace(/images\/basilisk-logo\.png/g, "/assets/images/logo.svg") - .replace(/images\/screenshot\.png/g, "/assets/images/screenshot.png"); - -mkdirSync(dirname(outPath), { recursive: true }); -writeFileSync(outPath, frontmatter + readme, "utf8"); - -console.log("✓ README.md copied to src/readme.html"); diff --git a/website/src/_data/releases.js b/website/src/_data/releases.js new file mode 100644 index 00000000..6cf6e7b1 --- /dev/null +++ b/website/src/_data/releases.js @@ -0,0 +1,124 @@ +// Eleventy global data: the Basilisk GitHub Releases, fetched FRESH at every +// build from the public GitHub REST API — never hand-maintained. This mirrors +// the build-time data pattern of _data/conformance.js and _data/benchmarks.js: +// everything the /docs/releases/ page shows is whatever the API returns at build +// time (tag, title, date, release notes rendered from the release's markdown +// body, and downloadable assets). +// +// Drafts are excluded (not yet published). Prereleases are kept and badged. +// +// The build NEVER fails on a network/API error: exactly like conformance.js it +// degrades to `{ hasData: false }` and the page renders an empty state linking +// to GitHub, so an offline dev build or a rate-limited CI run still produces a +// valid site. When `GITHUB_TOKEN`/`GH_TOKEN` is present (CI) it is used to raise +// the API rate limit; the public, unauthenticated path works too. +import markdownIt from "markdown-it"; + +const OWNER = "Nimblesite"; +const REPO = "Basilisk"; +const API = `https://api.github.com/repos/${OWNER}/${REPO}/releases?per_page=100`; +const RELEASES_URL = `https://github.com/${OWNER}/${REPO}/releases`; + +// Release notes are authored by the maintainers (trusted), so raw HTML is +// allowed. `breaks: true` matches how GitHub itself renders release bodies. +const md = markdownIt({ html: true, linkify: true, breaks: true }); + +const MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]; + +// "2026-06-23T10:16:43Z" -> "Jun 23, 2026". UTC getters keep the output +// deterministic regardless of the build machine's timezone. +function formatDate(iso) { + if (!iso) return null; + const date = new Date(iso); + return Number.isNaN(date.getTime()) + ? iso + : `${MONTHS[date.getUTCMonth()]} ${date.getUTCDate()}, ${date.getUTCFullYear()}`; +} + +// Bytes -> "1.2 MB" style, base-1024. +function formatBytes(bytes) { + if (!Number.isFinite(bytes) || bytes <= 0) return "0 B"; + const units = ["B", "KB", "MB", "GB"]; + const exp = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1); + const value = bytes / 1024 ** exp; + return `${exp === 0 ? value : Math.round(value * 10) / 10} ${units[exp]}`; +} + +// Pull the `rel="next"` URL out of a GitHub `Link` response header (string +// splitting, no regex). Returns null when there is no next page. +function nextPageUrl(linkHeader) { + if (!linkHeader) return null; + for (const part of linkHeader.split(",")) { + const [target, ...attrs] = part.split(";"); + if (attrs.some((attr) => attr.trim() === 'rel="next"')) { + return target.trim().slice(1, -1); // strip the surrounding < > + } + } + return null; +} + +async function fetchAllReleases() { + const headers = { + Accept: "application/vnd.github+json", + "User-Agent": `${OWNER}-${REPO}-website-build`, + "X-GitHub-Api-Version": "2022-11-28", + }; + const token = process.env.GITHUB_TOKEN || process.env.GH_TOKEN; + if (token) headers.Authorization = `Bearer ${token}`; + + const releases = []; + let url = API; + while (url) { + const response = await fetch(url, { headers }); + if (!response.ok) { + throw new Error(`GitHub API ${response.status} ${response.statusText}`); + } + releases.push(...(await response.json())); + url = nextPageUrl(response.headers.get("link")); + } + return releases; +} + +// Shape one API release into the flat record the template renders. +function toRecord(release) { + return { + tag: release.tag_name, + name: release.name || release.tag_name, + url: release.html_url, + date: formatDate(release.published_at || release.created_at), + dateIso: release.published_at || release.created_at, + prerelease: release.prerelease === true, + bodyHtml: release.body ? md.render(release.body) : "", + assets: (release.assets || []).map((asset) => ({ + name: asset.name, + url: asset.browser_download_url, + size: formatBytes(asset.size), + downloads: asset.download_count || 0, + })), + }; +} + +const EMPTY = { hasData: false, releasesUrl: RELEASES_URL, count: 0, releases: [] }; + +export default async function () { + try { + const published = (await fetchAllReleases()) + .filter((release) => release.draft !== true) + .sort((a, b) => new Date(b.published_at || b.created_at) - new Date(a.published_at || a.created_at)) + .map(toRecord); + + if (!published.length) return EMPTY; + + return { + hasData: true, + releasesUrl: RELEASES_URL, + count: published.length, + latest: published[0], + releases: published, + }; + } catch (error) { + // Degrade gracefully — a broken build is worse than a stale releases page. + console.warn(`⚠ releases.js: ${error.message} — rendering empty state`); + return EMPTY; + } +} diff --git a/website/src/_includes/layouts/base.njk b/website/src/_includes/layouts/base.njk index 438d1b49..e4d48c62 100644 --- a/website/src/_includes/layouts/base.njk +++ b/website/src/_includes/layouts/base.njk @@ -3,7 +3,7 @@ path straight from the URL, so language alternates never double-prefix (/zh/zh/...) even when an auto-generated page reports the wrong `lang`. Set `noTranslation: true` in a page's front matter to opt it out of the - language cluster entirely (e.g. the English-only README). -#} + language cluster entirely (e.g. the English-only Releases page). -#} {%- set effLang = 'zh' if (page.url == '/zh/' or page.url.startsWith('/zh/')) else (lang | default('en')) -%} {%- set basePath = (page.url | replace('/zh/', '/')) if effLang == 'zh' else page.url -%} diff --git a/website/src/assets/css/styles.css b/website/src/assets/css/styles.css index faba5ba6..6dfb6a13 100644 --- a/website/src/assets/css/styles.css +++ b/website/src/assets/css/styles.css @@ -1054,6 +1054,26 @@ button { cursor: pointer; font-family: inherit; border: none; background: none; .conf-chart__caption code { font-family: var(--font-mono); font-size: 0.85em; } /* ── Conformance methodology page bits ─────────────────────── */ +.conf-links { + display: flex; + flex-wrap: wrap; + gap: var(--space-3); + margin: var(--space-6) 0; +} +.conf-links a { + display: inline-flex; + align-items: center; + padding: var(--space-2) var(--space-4); + border: 1px solid var(--color-border-bright); + border-radius: var(--radius-md); + background: var(--color-bg-secondary); + color: var(--color-text-primary); + font-size: 0.875rem; + font-weight: 600; + text-decoration: none; + transition: border-color 0.15s, color 0.15s; +} +.conf-links a:hover { border-color: var(--color-primary); color: var(--color-primary); } .conf-correction { display: flex; align-items: center; @@ -1091,6 +1111,38 @@ button { cursor: pointer; font-family: inherit; border: none; background: none; font-size: 0.8125rem; } +/* ── Releases (/docs/releases/) ────────────────────────────── */ +.releases-intro { font-size: 1.0625rem; } +.release-list { display: flex; flex-direction: column; gap: var(--space-6); margin-top: var(--space-8); } +.release { + padding: var(--space-6); + background: var(--color-bg-secondary); + border: 1px solid var(--color-border-bright); + border-radius: var(--radius-lg); +} +.release-head { display: flex; align-items: center; flex-wrap: wrap; gap: var(--space-3); margin-bottom: var(--space-4); } +.release-title { font-family: var(--font-mono); font-size: 1.25rem; font-weight: 700; margin: 0; padding: 0; border: none; } +.release-title a { color: var(--color-text-primary); text-decoration: none; } +.release-title a:hover { color: var(--color-primary); } +.release-badge { font-size: 0.6875rem; font-weight: 600; text-transform: uppercase; letter-spacing: 0.06em; padding: 0.15em 0.5em; border-radius: var(--radius-sm); } +.release-badge--latest { color: var(--color-success); background: rgba(52, 211, 153, 0.12); border: 1px solid rgba(52, 211, 153, 0.35); } +.release-badge--pre { color: var(--color-warning); background: rgba(251, 191, 36, 0.12); border: 1px solid rgba(251, 191, 36, 0.35); } +.release-date { margin-left: auto; font-size: 0.8125rem; color: var(--color-text-muted); font-family: var(--font-mono); } +/* Tame the heading scale of the maintainer-authored notes so a release's own + `## headings` don't impersonate the docs page's top-level sections. */ +.release-notes > :first-child { margin-top: 0; } +.release-notes h1, .release-notes h2, .release-notes h3, .release-notes h4 { + font-size: 1rem; font-weight: 600; margin-top: var(--space-5); margin-bottom: var(--space-2); + padding: 0; border: none; color: var(--color-text-primary); letter-spacing: 0; +} +.release-assets { margin-top: var(--space-5); border-top: 1px solid var(--color-border); padding-top: var(--space-4); } +.release-assets summary { cursor: pointer; font-size: 0.875rem; font-weight: 600; color: var(--color-text-secondary); } +.release-assets summary:hover { color: var(--color-primary); } +.release-asset-list { list-style: none !important; padding-left: 0 !important; margin-top: var(--space-3); } +.release-asset-list li { display: flex; flex-wrap: wrap; align-items: baseline; gap: var(--space-3); margin-bottom: var(--space-2); } +.release-asset-list a { font-family: var(--font-mono); font-size: 0.875rem; } +.release-asset-meta { font-size: 0.75rem; color: var(--color-text-muted); font-family: var(--font-mono); } + /* ── Responsive ────────────────────────────────────────────── */ @media (max-width: 640px) { .section-heading { font-size: 1.75rem; } diff --git a/website/src/docs/comparison.md b/website/src/docs/comparison.md index 4d9bfa9a..1991ab7b 100644 --- a/website/src/docs/comparison.md +++ b/website/src/docs/comparison.md @@ -40,7 +40,7 @@ Basilisk removes the choice. There is no permissive mode to fall back to. | Feature | Basilisk | Pyright | mypy | ty | Pyrefly | |---|---|---|---|---|---| | Strict by default | ✅ | ❌ opt-in | ❌ opt-in | ❌ opt-in | ❌ opt-in | -| PEP conformance¹ | 40.4% current (→100% target) | ~99% | ~58% | early alpha | ~86% | +| PEP conformance¹ | {{ conformance.scorePct }}% current (→100% target) | ~99% | ~58% | early alpha | ~86% | | Implementation | Rust | TypeScript | Python/C | Rust | Rust | | Runtime required | None | Node.js | Python | None | None | | Full LSP (completions, hover, goto) | ✅ | Pylance only | ❌ | Basic | Basic | @@ -158,7 +158,7 @@ Basilisk is not a faster version of an existing tool. It occupies a different po 5. WASM plugin system (planned) — extensible without forking, secure by design **Where Basilisk is not yet the best choice:** -- PEP conformance: Basilisk currently passes 40.4% of the official conformance suite (59/146, counting errors+warnings — the strictest grading), with 285 false positives and 36 missed required errors still being driven down. Pyright covers far more edge cases today. Basilisk's target is 100%; it's not there yet. +- PEP conformance: Basilisk currently passes {{ conformance.scorePct }}% of the official conformance suite ({{ conformance.pass }}/{{ conformance.total }}, counting errors+warnings — the strictest grading), with {{ conformance.fp }} false positives and {{ conformance.missed }} missed required errors still being driven down. Pyright covers far more edge cases today. Basilisk's target is 100%; it's not there yet. See [how we measure conformance](/docs/conformance/). - Plugin ecosystem: mypy's Django and SQLAlchemy plugins are mature. Basilisk's WASM plugins are planned. - Maturity: Pylance is feature-complete today (though proprietary and VS Code only). Basilisk is in alpha. diff --git a/website/src/docs/index.md b/website/src/docs/index.md index d78b1707..1d7d38e8 100644 --- a/website/src/docs/index.md +++ b/website/src/docs/index.md @@ -68,7 +68,7 @@ Basilisk is currently in **alpha** — the core checker, LSP server, and editor |---|---|---| | 1 | Parser, resolver, type checker, CLI | Complete | | 2 | LSP server, editor extensions (VS Code, Cursor, Zed, Neovim) | Complete | -| 3 | Expanded rule set, PEP conformance push (currently 40.4%, target 100%), gradual adoption | In progress | +| 3 | Expanded rule set, PEP conformance push (currently {{ conformance.scorePct }}%, target 100%), gradual adoption | In progress | | 4 | Ownership & immutability analysis (Mojo-inspired) | Planned | | 5 | WASM plugins, Django/Pydantic/SQLAlchemy | Planned | | 6 | 95%+ PEP, SARIF/JUnit, JetBrains extension | Planned | diff --git a/website/src/docs/releases.njk b/website/src/docs/releases.njk new file mode 100644 index 00000000..98b7e47a --- /dev/null +++ b/website/src/docs/releases.njk @@ -0,0 +1,61 @@ +--- +layout: layouts/docs.njk +title: "Basilisk Releases — Downloads & Changelog" +description: "Every published Basilisk release — version, date, release notes, and downloadable binaries and editor extensions — generated at build time straight from GitHub Releases." +keywords: basilisk releases, download basilisk, changelog, release notes, python language server downloads, vsix +date: 2026-06-23 +dateModified: 2026-06-23 +author: The Basilisk Project +# English-only — the notes come verbatim from GitHub Releases, so opt this page +# out of the language cluster (no /zh/ hreflang or switcher link that would 404). +noTranslation: true +eleventyNavigation: + key: Releases + order: 9 +permalink: /docs/releases/ +--- +

    Releases

    + +{%- if releases.hasData %} +

    + All {{ releases.count }} published Basilisk releases, generated at + build time straight from GitHub Releases. + Each entry links to its tag and lists every downloadable asset. +

    + +
    + {%- for release in releases.releases %} +
    +
    +

    {{ release.name }}

    + {%- if loop.first %}Latest{% endif %} + {%- if release.prerelease %}Pre-release{% endif %} + {%- if release.date %}{% endif %} +
    + + {%- if release.bodyHtml %} +
    {{ release.bodyHtml | safe }}
    + {%- endif %} + + {%- if release.assets.length %} +
    + {{ release.assets.length }} download{{ "s" if release.assets.length != 1 }} +
      + {%- for asset in release.assets %} +
    • + {{ asset.name }} + {{ asset.size }} · {{ asset.downloads }} download{{ "s" if asset.downloads != 1 }} +
    • + {%- endfor %} +
    +
    + {%- endif %} +
    + {%- endfor %} +
    +{%- else %} +

    + Release data could not be loaded at build time. See + all Basilisk releases on GitHub. +

    +{%- endif %} diff --git a/website/src/docs/rules/index.md b/website/src/docs/rules/index.md index 0120defe..78e9d19d 100644 --- a/website/src/docs/rules/index.md +++ b/website/src/docs/rules/index.md @@ -17,7 +17,7 @@ Every Basilisk diagnostic has a unique code in the format `BSK-EXXXX` (error) or Rules are enabled by default. You can dial individual rules down per-file or per-path from your editor or `pyproject.toml` — strict is the default, not a cage. -Basilisk ships **155 diagnostic codes** (150 errors, 5 warnings) spanning the full Python typing surface — generics, protocols, dataclasses, TypedDicts, overloads, literals, enums, and more — and is scored by the [official Python typing conformance suite](https://github.com/python/typing/blob/main/conformance/results/results.html) (currently **40.4%**, 59 / 146 (errors+warnings, strictest); target 100%). The two foundational groups have worked examples: +Basilisk ships **155 diagnostic codes** (150 errors, 5 warnings) spanning the full Python typing surface — generics, protocols, dataclasses, TypedDicts, overloads, literals, enums, and more — and is scored by the [official Python typing conformance suite](https://github.com/python/typing/blob/main/conformance/results/results.html) (currently **{{ conformance.scorePct }}%**, {{ conformance.pass }} / {{ conformance.total }} (errors+warnings, strictest); target 100% — [how we measure](/docs/conformance/)). The two foundational groups have worked examples: | Group | Codes | Description | |---|---|---| diff --git a/website/src/index.njk b/website/src/index.njk index f8535b2f..cd84eb01 100644 --- a/website/src/index.njk +++ b/website/src/index.njk @@ -252,19 +252,19 @@ benchmarkStrings:
    - 40.4% + {{ conformance.scorePct }}% PEP conformance score

    04 — PEP conformance

    -

    59 of 146 tests passing (40.4%).
    Target: 100%.

    +

    {{ conformance.pass }} of {{ conformance.total }} tests passing ({{ conformance.scorePct }}%).
    Target: 100%.

    - Scored by the official Python typing conformance suite — + Scored by the official Python typing conformance suite — the same harness used to measure Pyright (~99%¹), mypy (~58%¹), and Pyrefly (~86%¹). - Today 3 of 21 categories pass at 100%; the suite reports 285 false-positive diagnostics and 36 missed required errors, and we are driving both to zero. 100% is the target, not a present-day claim — this is honest, in-progress work, measured by a scorer we don’t control. + Today {{ conformance.categoriesPass100 }} of {{ conformance.categoriesTotal }} categories pass at 100%; the suite reports {{ conformance.fp }} false-positive diagnostics and {{ conformance.missed }} missed required errors, and we are driving both to zero. 100% is the target, not a present-day claim. How we measure this →

    @@ -372,7 +372,7 @@ benchmarkStrings: PEP conformance ¹ - 40.4% (59/146, target 100%) + {{ conformance.scorePct }}% ({{ conformance.pass }}/{{ conformance.total }}, target 100%) ~99% ~58% full-pass alpha @@ -440,51 +440,6 @@ benchmarkStrings: - -
    -
    - -

    A scorer we don’t control.

    -

    - Our 40.4% is not graded by us. We download and run python/typing’s own - conformance tooling — the exact harness that grades pyright, mypy, - pyrefly, ty, zuban, and pycroscope. -

    -
      -
    1. - We pin python/typing’s conformance tool to commit 268d0c4e and - run its get_expected_errors and diff_expected_errors - functions unmodified — we do not reimplement or relax the scorer. -
    2. -
    3. - For each of the 146 test files, the real basilisk check output is - compared against the suite’s # E annotations. Every - severity == error diagnostic counts — no diagnostic - codes are excluded. -
    4. -
    5. - A file passes only with zero discrepancies: every - # E line gets an error, every # E[tag] group is - satisfied, and no error lands on a line the suite does not mark. - One extra diagnostic (a false positive) fails the whole file. -
    6. -
    7. - Today that yields 59 of 146 files passing (40.4%) — counting - every diagnostic basilisk emits, errors and warnings, the strictest grading (and how - pyright is graded). 285 false positives and 36 missed required errors remain to clear. - The looser errors-only view is 70 of 146 (47.9%). The headline is the strict 40.4%. -
    8. -
    -

    - Full methodology and annotation rules are documented in the - python/typing conformance README. - 100% remains the target — not a claim we make today. -

    -
    -
    - diff --git a/website/src/readme.html b/website/src/readme.html deleted file mode 100644 index 7e4628ee..00000000 --- a/website/src/readme.html +++ /dev/null @@ -1,240 +0,0 @@ ---- -layout: layouts/docs.njk -title: README -description: Crate architecture, diagnostic rules, and development guide for Basilisk. -keywords: basilisk, readme, crate architecture, rust, python type checker -# English-only crate README — no Chinese twin exists, so opt it out of the -# language cluster (no /zh/readme/ hreflang or switcher link, which would 404). -noTranslation: true -eleventyNavigation: - key: README - order: 99 -permalink: /readme/ ---- - -

    - Basilisk -

    - -

    Basilisk

    - -

    - The open-source Python language server.
    - Complete language server, type checker, debugger, and profiler — strict by default.
    - VS Code, Cursor & Windsurf (Open VSX) • Zed • Neovim. Built in Rust — single binary, no runtime. -

    - -

    - Website  •  - Install  •  - Quick Start  •  - Rules  •  - Refactoring  •  - Compare -

    - ---- - -

    - Basilisk in action — type checking, diagnostics, and refactoring in the editor -

    - -## Try it - -The `examples/` folder has ready-to-go Python files: - -```sh -basilisk check examples/bad.py # everything flagged -basilisk check examples/good.py # clean -basilisk check examples/mixed.py # some errors, some clean -basilisk check examples/ # all three at once -``` - ---- - -## Quick example - - - - - - - - - - -
    Basilisk rejects thisFixed
    - -```python -def greet(name): - return "Hello " + name -``` - - - -```python -def greet(name: str) -> str: - return "Hello " + name -``` - -
    - ---- - -## Rules - -All rules are on by default. There is no way to relax them globally. - -### Annotation rules (E0001-E0005) - -| Code | Triggers when | -|------|---------------| -| `BSK-E0001` | Function parameter has no type annotation | -| `BSK-E0002` | Function is missing a return type annotation | -| `BSK-E0003` | Variable assignment has no type annotation | -| `BSK-E0004` | `*args` or `**kwargs` has no type annotation | -| `BSK-E0005` | Class attribute has no type annotation | - -### Type correctness (E0010-E0029) - -| Code | Triggers when | -|------|---------------| -| `BSK-E0010` | Import cannot be resolved | -| `BSK-E0011` | Explicit `Any` annotation (emitted as a warning), or a return type mismatch | -| `BSK-E0012` | Argument type does not match parameter type | -| `BSK-E0013` | Return type does not match declared return type | -| `BSK-E0014` | Assignment type does not match declared variable type | -| `BSK-E0015` | Wrong number of type arguments (e.g. `list[int, str]`) | -| `BSK-E0016` | Method override has incompatible signature | -| `BSK-E0017` | Class variable override has incompatible type | -| `BSK-E0018` | Reference to an undefined name | -| `BSK-E0019` | Variable used before it is assigned | -| `BSK-E0020` | `@overload` group has no non-decorated implementation | -| `BSK-E0021` | Two `@overload` signatures overlap | -| `BSK-E0022` | Dict key type is not hashable | -| `BSK-E0023` | `match` statement is not exhaustive | -| `BSK-E0024` | Type expression is not valid (e.g. a numeric literal used as a type) | -| `BSK-E0025` | Override method is missing the `@override` decorator | -| `BSK-E0026` | `TypeVar` declared with a single constraint | -| `BSK-E0027` | Duplicate `TypeVar` in a `Generic[...]` base | -| `BSK-E0029` | Method defined inside a `TypedDict` class | - -These are the most common rules. Basilisk ships **155 diagnostic codes** in total (150 errors, 5 warnings) — see the [complete diagnostic reference](https://www.basilisk-python.dev/docs/rules/) (generated from the checker source by `scripts/gen_rules_reference.py`). - ---- - -## Refactoring - -Basilisk ships a suite of refactoring code actions — available via the lightbulb (code actions) menu in VS Code, Cursor, and Windsurf (via Open VSX), plus Zed and Neovim. No extra extensions required. - -| Action | Kind | What it does | -|--------|------|-------------| -| **Extract variable** | `refactor.extract` | Extract expression into a named variable | -| **Extract variable (replace all)** | `refactor.extract` | Replace all identical occurrences | -| **Extract constant** | `refactor.extract` | Extract to module-level `SCREAMING_SNAKE` constant | -| **Extract function** | `refactor.extract` | Extract selected statements into a new function | -| **Inline variable** | `refactor.inline` | Replace variable with its value, delete assignment | -| **Inline function** | `refactor.inline` | Replace call with function body (single-expression) | -| **Move to new file** | `refactor.move` | Move class/function to a new file, leave import behind | -| **Move to existing file** | `refactor.move` | Move class/function to a chosen file via command | -| **Rename symbol** | — | Scope-aware rename with keyword arg, `self.attr`, docstring, and `__all__` updates | -| **Remove parameter** | `refactor.rewrite` | Remove parameter from function + all call sites | -| **Add parameter** | `refactor.rewrite` | Add `new_param=None` to function signature | -| **Sort parameters** | `refactor.rewrite` | Alphabetically sort parameters (keeps `self`/`cls` first) | -| **Implement abstract methods** | `refactor.rewrite` | Generate method stubs for abstract base class | -| **Convert Union/Optional** | `refactor.rewrite` | `Union[X, Y]` ↔ `X \| Y`, `Optional[X]` ↔ `X \| None` | -| **Convert constructs** | `refactor.rewrite` | f-string ↔ `.format()`, `dict()` ↔ `{}`, `list()` ↔ `[]`, ternary ↔ if/else, NamedTuple class ↔ functional | - -Extract function detects async functions, methods (`self`/`cls`), and rejects selections containing `yield`, `break`, or `continue`. - ---- - -## Output format - -Diagnostics use rustc-style output: - -``` -error[BSK-E0001]: Missing parameter type annotation for `data` - --> src/utils.py:14:13 - | -14 | def process(data): - | ^^^^ - | - = help: Add a type annotation: `data: ` - = note: In Basilisk, all function parameters require explicit types - = see: https://www.basilisk-python.dev/errors/BSK-E0001 -``` - -| Exit code | Meaning | -|-----------|---------| -| `0` | Clean — no errors | -| `1` | Type errors found | -| `3` | Internal error | - ---- - -## Architecture - -Basilisk is a Cargo workspace. Each crate owns one layer of the analysis pipeline. - -> **Pipeline:** source text → parser → AST → resolver → scopes → checker → diagnostics -> -> **Incremental:** `basilisk-db` caches ASTs and resolved modules by content hash so only changed files re-run the pipeline. - -### Analysis pipeline - -| Crate | What it does | Status | -|-------|-------------|--------| -| [basilisk-parser](crates/basilisk-parser/) | Wraps `ruff_python_parser` to parse `.py` source into a typed AST | Done | -| [basilisk-resolver](crates/basilisk-resolver/) | Name resolution and scope analysis — catches undefined names and use-before-assignment | Done | -| [basilisk-checker](crates/basilisk-checker/) | Core type checker — implements all E0001-E0025 rules | Done | -| [basilisk-cli](crates/basilisk-cli/) | The `basilisk` binary — wires the full pipeline together | Done | - -### LSP and infrastructure - -| Crate | What it does | Status | -|-------|-------------|--------| -| [basilisk-lsp](crates/basilisk-lsp/) | LSP server — diagnostics, hover, go-to-def, code actions, refactoring, debugging | Working | -| [basilisk-db](crates/basilisk-db/) | Salsa-based incremental computation for <10ms latency | Working | -| [basilisk-config](crates/basilisk-config/) | Configuration parsing (`pyproject.toml`, `basilisk.json`) | Done | -| [basilisk-stubs](crates/basilisk-stubs/) | Bundled type stubs (typeshed) — no internet needed | Working | -| [basilisk-uv](crates/basilisk-uv/) | uv package manager integration for the LSP | Working | -| [basilisk-common](crates/basilisk-common/) | Shared constants and types — zero deps, WASM-compatible | Done | -| [basilisk-test-utils](crates/basilisk-test-utils/) | Shared E2E test helpers | Done | - -### Future capabilities - -| Crate | What it does | Status | -|-------|-------------|--------| -| [basilisk-mojo](crates/basilisk-mojo/) | Mojo-inspired ownership/immutability analysis (`Borrowed`, `InOut`, `Owned`) | Phase 4 | -| [basilisk-compiler](crates/basilisk-compiler/) | Compiles typed Python to native code | Future | -| [basilisk-plugin](crates/basilisk-plugin/) | WASM plugin host for Django, Pydantic, SQLAlchemy type extensions | Phase 5 | - -### Editor extensions - -| Extension | Editor | Status | -|-----------|--------|--------| -| [vscode-extension](vscode-extension/) | VS Code | Working | -| [basilisk.nvim](basilisk.nvim/) | Neovim 0.10+ | Working | -| [basilisk-zed](basilisk-zed/) | Zed | Phase 2 | - ---- - -## Development - -```sh -cargo build # build all crates -cargo test # run all tests -cargo clippy # lint (zero warnings policy) -cargo fmt # format -``` - -Rust 1.87+ required. - ---- - -## License - -MIT. - -Built by [NIMBLESITE PTY LTD](https://www.nimblesite.co). diff --git a/website/src/zh/docs/comparison.md b/website/src/zh/docs/comparison.md index 559f5843..94091243 100644 --- a/website/src/zh/docs/comparison.md +++ b/website/src/zh/docs/comparison.md @@ -35,7 +35,7 @@ Basilisk 消除了这个选择。没有宽松模式可以回退。 | 功能 | Basilisk | Pyright | mypy | ty | Pyrefly | |---|---|---|---|---|---| | 默认严格 | ✅ | ❌ 选择加入 | ❌ 选择加入 | ❌ 选择加入 | ❌ 选择加入 | -| PEP 符合性¹ | 当前 40.4%(目标 →100%) | ~99% | ~58% | 早期 alpha | ~86% | +| PEP 符合性¹ | 当前 {{ conformance.scorePct }}%(目标 →100%) | ~99% | ~58% | 早期 alpha | ~86% | | 实现语言 | Rust | TypeScript | Python/C | Rust | Rust | | 需要运行时 | 无 | Node.js | Python | 无 | 无 | | 完整 LSP(补全、悬停、跳转) | ✅ | 仅 Pylance | ❌ | 基础 | 基础 | @@ -153,7 +153,7 @@ Basilisk 不是现有工具的更快版本。它占据了不同的位置: 5. WASM 插件系统(计划中)——无需分叉即可扩展,设计安全 **Basilisk 尚不是最佳选择的地方:** -- PEP 符合性:Basilisk 当前通过官方符合性套件的 40.4%(59/146,错误加警告,最严格评分),仍有 285 处误报和 36 处遗漏的必需错误正在被压低。Pyright 今天覆盖远更多边缘情况。Basilisk 的目标是 100%;还未达到。 +- PEP 符合性:Basilisk 当前通过官方符合性套件的 {{ conformance.scorePct }}%({{ conformance.pass }}/{{ conformance.total }},错误加警告,最严格评分),仍有 {{ conformance.fp }} 处误报和 {{ conformance.missed }} 处遗漏的必需错误正在被压低。Pyright 今天覆盖远更多边缘情况。Basilisk 的目标是 100%;还未达到。参见[我们如何衡量符合性](/zh/docs/conformance/)。 - 插件生态系统:mypy 的 Django 和 SQLAlchemy 插件已经成熟。Basilisk 的 WASM 插件是计划中的。 - 成熟度:Pylance 今天功能完整(虽然是专有的,且仅限 VS Code)。Basilisk 处于 alpha 阶段。 diff --git a/website/src/zh/docs/conformance.md b/website/src/zh/docs/conformance.md new file mode 100644 index 00000000..a389cbf7 --- /dev/null +++ b/website/src/zh/docs/conformance.md @@ -0,0 +1,107 @@ +--- +layout: layouts/docs.njk +title: "Basilisk 如何衡量 PEP 符合性" +description: "Basilisk 的 PEP 符合性得分如何用官方 python/typing 符合性套件衡量——套件是什么、评分如何进行、我们运行的字节级一致且 sha256 固定的计算器,以及我们对自己评分所做的更正。" +keywords: pep 符合性, python 类型符合性套件, basilisk 符合性得分, 类型检查器评分, python/typing 计算器 +lang: zh +--- +{% from "conformance-chart.njk" import chart %} + +# 我们如何衡量 PEP 符合性 + +Basilisk 由**官方 `python/typing` 符合性套件**评分——也就是类型社区用来为 pyright、mypy、pyrefly、ty 等打分的同一套测试与评分工具。我们在每次改动时,对真实的 `basilisk` 二进制文件原样运行该工具。 + +目前的结果是 **{{ conformance.scorePct }}%**——{{ conformance.total }} 个测试文件中 **{{ conformance.pass }}** 个通过,捕获 {{ conformance.caught }} 个必需错误,仍有 **{{ conformance.fp }} 处误报**和 **{{ conformance.missed }} 处遗漏的必需错误**待清除。{{ conformance.categoriesTotal }} 个类别中有 {{ conformance.categoriesPass100 }} 个达到 100%。目标是 100%,我们逐步逼近。 + + + +## 符合性套件是什么 + +[Python 类型规范](https://typing.python.org/en/latest/spec/)定义了类型系统应当如何运作——泛型、协议、dataclass、`TypedDict`、重载、字面量等。为了让规范不停留在纸面上,类型社区在 [`python/typing`](https://github.com/python/typing/tree/main/conformance) 仓库中与规范并行维护着一套**符合性测试套件**。 + +它的工作方式是: + +- 每个规范章节对应一个或多个**测试文件**——普通的 Python 模块,用 `# E` 注释标出每一行符合规范的类型检查器**必须**报告错误的位置(以及用 `# E[tag]` 组标出多个相关错误中报告其一即可的位置)。 +- 一个小型**评分工具**对这些文件运行某个类型检查器,并将其输出与注释做差异比对。文件只有在差异为空时才*通过*:每个必需错误都被报告,且没有任何诊断落在套件未标记的行上。 +- 维护者用它为每个检查器打分,并发布[结果表](https://github.com/python/typing/blob/main/conformance/results/results.html)——pyright 约 99%、pyrefly 约 86% 等数字便是这样得出的。 + +我们使用的正是这套套件,固定在提交 [`{{ conformance.pinnedRef }}`](https://github.com/python/typing/tree/{{ conformance.pinnedRef }}/conformance)。因为同样的工具与文件为所有人打分,这个数字在各检查器之间可比,也不是我们能朝自己有利方向调整的。 + +## 一个文件如何评分 + +整个算法就是套件 `main.py` 中的两个函数——`get_expected_errors`(读取 `# E` 注释)与 `diff_expected_errors`(与检查器输出比对)。文件**当且仅当**该差异为空时通过: + +- 套件的规则(`upstream_main.py:185`):`"Fail" if errors_diff.strip() else "Pass"` + +我们计入检查器发出的**每一个**诊断——错误*和*警告,**不排除任何诊断代码**。这是套件最严格的读法,也是参考检查器 pyright 的评分方式。一个多余的诊断(一处误报)就会让整个文件失败,这正是误报数与通过数同样重要的原因。 + +## 我们如何在不分叉的情况下运行它 + +套件的 `main.py` 是给 `python/typing` 维护者用的批处理工具:它一次性为所有已知检查器打分,引入 TOML 配置/报告依赖,并写出结果矩阵。它无法调用我们的二进制文件。因此,正如套件为每个检查器所做的那样(`PyrightTypeChecker`、`MypyTypeChecker` 等),我们加一个薄薄的**适配器**,复用套件自己的评分而非重新实现。我们的 [`score.py`](https://github.com/Nimblesite/Basilisk/blob/main/conformance/score.py): + +1. **适配器**——运行 `basilisk check --output json`,把结果整理成套件函数期望的 `{line: [errors]}` 字典(这是套件唯一无法替我们做的事)。 +2. **计算器**——从一份字节级一致的套件 `main.py` committed 副本中导入 `get_expected_errors` 与 `diff_expected_errors` 并原样调用(`score.py:287` 对应套件自己在 `upstream_main.py:175` 的调用)。它不含任何自己的评分逻辑。 +3. **门禁**——将结果与 `coverage-thresholds.json` 比较,任何回归都让 CI 失败。 + +为保证计算器可信,内置副本经 **sha256 固定**。`score.py` 在每次运行时重新哈希它,若有漂移则拒绝评分(`score.py:99`);本网站在构建时也会再次重新哈希: + +{% if conformance.verified %} +

    ✓ 构建时已校验 —— conformance/upstream_main.py 为 {{ conformance.upstreamBytes }} 字节,sha256 {{ conformance.sha256Short }}…,与固定值一致

    +{% endif %} + +保持官方文件不被改动正是要点所在:适配器与门禁住在另一个可审计的文件里,因此计算器逐字节就是套件自己的那一份。 + +## 我们做的一处更正 + +我们的得分过去由仓库内自己的一个脚本衡量,而它是**错误的**。该脚本将若干诊断代码排除在评分之外,且未计入误报,因此报出的数字一路爬到了 100%。这是一个诚实的失误,并非有意调高——但它仍然是错的。 + +我们用上面所述的官方计算器替换了它。在计入每个诊断、不排除任何代码之后,诚实的数字是 **{{ conformance.scorePct }}%**: + +
    + 100% + + {{ conformance.scorePct }}% + 检查器没有变差——是衡量变正确了。100% 是我们正在努力达成的目标,而非对当下的宣称。 +
    + +下面的图表在构建时直接读取 **`conformance/conformance_status.csv` 的 git 历史**:每个改动该文件的提交对应一个点,绘制该提交实际记录的得分。 + +{{ chart(conformance, { + "label": "符合性得分随时间变化", + "heading": "从早期仓库内数字到官方计算器", + "prevLegend": "早期仓库内脚本(排除部分代码、未计入误报)", + "officialLegend": "官方 python/typing 计算器", + "dropNote": "在 " + conformance.chart.peak.shortDate + ",仓库内脚本报告了 " + conformance.chart.peak.score + "%。官方计算器首次于 " + conformance.chart.current.shortDate + " 运行,报出 " + conformance.chart.current.score + "%——这是更正,而非回归。", + "caption": "每个点都是对 conformance/conformance_status.csv 的真实提交,每次构建重新计算。悬停某点可查看其日期、提交、得分与误报数。" +}) }} + +## 各类别现状 + +构建时从 `conformance/conformance_status.csv` 实时读取: + +
    + + + +{%- for cat in conformance.categories %} + +{%- endfor %} + +
    类别通过得分
    {{ cat.label }}{{ cat.pass }} / {{ cat.total }}{{ cat.pct }}%
    +
    + +## 自己复现 + +```bash +# 构建二进制、获取(被 git 忽略的)测试夹具、对其运行官方 python/typing +# 计算器、写出 conformance_status.csv,并强制执行 coverage-thresholds.json 中的棘轮门禁。 +make conformance +``` + +这一切都在两个文件里:[`conformance/score.py`](https://github.com/Nimblesite/Basilisk/blob/main/conformance/score.py)(我们的适配器与门禁)和 [`conformance/upstream_main.py`](https://github.com/Nimblesite/Basilisk/blob/main/conformance/upstream_main.py)(套件的计算器,committed 且经 sha256 固定)。完整的注解规则见 [python/typing 符合性 README](https://github.com/python/typing/blob/main/conformance/README.md)。 diff --git a/website/src/zh/docs/index.md b/website/src/zh/docs/index.md index e23211f9..1528a5df 100644 --- a/website/src/zh/docs/index.md +++ b/website/src/zh/docs/index.md @@ -63,7 +63,7 @@ Basilisk 目前处于 **alpha**——核心检查器、LSP 服务器和编辑器 |---|---|---| | 1 | 解析器、解析器、类型检查器、CLI | 完成 | | 2 | LSP 服务器、编辑器扩展(VS Code、Cursor、Zed、Neovim) | 完成 | -| 3 | 扩展规则集,PEP 符合性攻坚(当前 40.4%,目标 100%),渐进式采用 | 进行中 | +| 3 | 扩展规则集,PEP 符合性攻坚(当前 {{ conformance.scorePct }}%,目标 100%),渐进式采用 | 进行中 | | 4 | 所有权与不可变性分析(Mojo 启发) | 计划中 | | 5 | WASM 插件,Django/Pydantic/SQLAlchemy | 计划中 | | 6 | 95%+ PEP,SARIF/JUnit,JetBrains 扩展 | 计划中 | diff --git a/website/src/zh/docs/rules/index.md b/website/src/zh/docs/rules/index.md index af72091e..227ffc7f 100644 --- a/website/src/zh/docs/rules/index.md +++ b/website/src/zh/docs/rules/index.md @@ -12,7 +12,7 @@ lang: zh 规则默认全部启用。您可以通过编辑器或 `pyproject.toml`,按文件或路径将单个规则调低——严格是默认值,而不是牢笼。 -Basilisk 内置 **155 个诊断代码**(150 个错误,5 个警告),覆盖完整的 Python 类型表面(泛型、协议、dataclass、TypedDict、重载、字面量、枚举等),由[官方 Python 类型符合性套件](https://github.com/python/typing/blob/main/conformance/results/results.html)评分(当前符合率 **40.4%**,59 / 146(错误加警告,最严格);目标 100%)。下面记录了两个基础组;完整集合由检查器强制执行。 +Basilisk 内置 **155 个诊断代码**(150 个错误,5 个警告),覆盖完整的 Python 类型表面(泛型、协议、dataclass、TypedDict、重载、字面量、枚举等),由[官方 Python 类型符合性套件](https://github.com/python/typing/blob/main/conformance/results/results.html)评分(当前符合率 **{{ conformance.scorePct }}%**,{{ conformance.pass }} / {{ conformance.total }}(错误加警告,最严格);目标 100% —— [我们如何衡量](/zh/docs/conformance/))。下面记录了两个基础组;完整集合由检查器强制执行。 | 组 | 代码 | 描述 | |---|---|---| diff --git a/website/src/zh/index.njk b/website/src/zh/index.njk index 94546413..a2f3dedb 100644 --- a/website/src/zh/index.njk +++ b/website/src/zh/index.njk @@ -263,19 +263,19 @@ benchmarkStrings:
    - 40.4% + {{ conformance.scorePct }}% PEP 符合性得分

    04 — PEP 符合性

    -

    146 个测试中 59 个通过(40.4%)。
    目标:100%。

    +

    {{ conformance.total }} 个测试中 {{ conformance.pass }} 个通过({{ conformance.scorePct }}%)。
    目标:100%。

    - 由官方 Python 类型符合性套件评分—— + 由官方 Python 类型符合性套件评分—— 与衡量 Pyright(约 99%¹)、 mypy(约 58%¹) 和 Pyrefly(约 86%¹)的套件相同。 - 目前 21 个类别中有 3 个达到 100%;套件报告 285 处误报和 36 处遗漏的必需错误,我们正将两者都降到零。100% 是目标,而非当下的宣称 —— 这是诚实、进行中的工作,由我们无法操控的评分器衡量。 + 目前 {{ conformance.categoriesTotal }} 个类别中有 {{ conformance.categoriesPass100 }} 个达到 100%;套件报告 {{ conformance.fp }} 处误报和 {{ conformance.missed }} 处遗漏的必需错误,我们正将两者都降到零。100% 是目标,而非当下的宣称。我们如何衡量 →

    @@ -382,7 +382,7 @@ benchmarkStrings: PEP 符合性 ¹ - 40.4% (59/146,目标 100%) + {{ conformance.scorePct }}% ({{ conformance.pass }}/{{ conformance.total }},目标 100%) ~99% ~58% 完全通过 alpha @@ -450,47 +450,6 @@ benchmarkStrings: - -
    -
    - -

    一个我们无法操控的评分器。

    -

    - 我们的 40.4% 不是我们自己打的分。我们下载并运行 python/typing 自己的符合性工具—— - 正是用来为 pyright、mypy、pyrefly、ty、zuban 和 pycroscope 打分的同一套程序。 -

    -
      -
    1. - 我们将 python/typing 的符合性工具固定在提交 268d0c4e,并 - 原样运行其 get_expected_errors 与 - diff_expected_errors 函数——我们既不重新实现也不放宽评分器。 -
    2. -
    3. - 对全部 146 个测试文件,真实的 basilisk check 输出会与套件的 - # E 注解逐一比对。每一个 severity == error 诊断都计入—— - 不排除任何诊断代码。 -
    4. -
    5. - 文件只有在零差异时才通过:每个 # E 行都得到一个错误, - 每个 # E[tag] 组都被满足,且没有错误落在套件未标记的行上。 - 哪怕多出一个诊断(一处误报)也会使整个文件失败。 -
    6. -
    7. - 今天的结果是 146 个文件中 59 个通过(40.4%)—— - 计入 basilisk 发出的所有诊断(错误和警告),这是最严格的评分(也是 pyright 的评分方式); - 285 处误报、36 处遗漏待清除;更宽松的「仅错误」视图为 70/146(47.9%)。头条数字是严格的 40.4%。 -
    8. -
    -

    - 完整方法论与注解规则记录在 - python/typing 符合性 README。 - 100% 仍是目标——而非我们当下作出的宣称。 -

    -
    -
    -