diff --git a/Makefile b/Makefile index 9a952ae..828475f 100644 --- a/Makefile +++ b/Makefile @@ -23,7 +23,7 @@ testcaches = .hypothesis .pytest_cache .pytype coverage.xml htmlcov .coverage all: version test build -develop: devversion package +develop: devversion package test python3 setup.py develop --uninstall python3 setup.py develop @@ -33,7 +33,9 @@ publish: distclean version package test @git push origin `cat VERSION` $(generatedcode): VERSION - python3 setup.py donothing + # this will generate the version subpackage inside clams package + python3 setup.py --help 2>/dev/null || echo "Ignore setuptools import error for now" + ls $(generatedcode)* # generating jsonschema depends on mmif-python and pydantic docs: mmif := $(shell grep mmif-python requirements.txt) @@ -97,5 +99,8 @@ distclean: @rm -rf dist $(artifact) build/bdist* clean: distclean @rm -rf VERSION VERSION.dev $(testcaches) $(buildcaches) $(generatedcode) + @rm -rf docs + @rm -rf .*cache + @rm -rf .hypothesis tests/.hypothesis cleandocs: @git checkout -- docs && git clean -fx docs diff --git a/clams/__init__.py b/clams/__init__.py index 6943815..bd1753f 100644 --- a/clams/__init__.py +++ b/clams/__init__.py @@ -1,10 +1,8 @@ +import argparse import sys -from mmif import __specver__ - +import mmif from clams import develop -from clams.mmif_utils import source -from clams.mmif_utils import rewind from clams.app import * from clams.app import __all__ as app_all from clams.appmetadata import AppMetadata @@ -16,34 +14,41 @@ def prep_argparser(): - import argparse parser = argparse.ArgumentParser() parser.add_argument( '-v', '--version', action='version', - version=version_template.format(__version__, __specver__) + version=version_template.format(__version__, mmif.__specver__) ) subparsers = parser.add_subparsers(title='sub-command', dest='subcmd') - for subcmd_module in [source, rewind, develop]: - subcmd_name = subcmd_module.__name__.rsplit('.')[-1] - subcmd_parser = subcmd_module.prep_argparser(add_help=False) - subparsers.add_parser(subcmd_name, parents=[subcmd_parser], - help=subcmd_module.describe_argparser()[0], - description=subcmd_module.describe_argparser()[1], - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - return parser + return parser, subparsers def cli(): - parser = prep_argparser() + parser, subparsers = prep_argparser() + cli_modules = {} + # thinly wrap all `mmif` subcommands + # this is primarily for backward compatibility for `souce` and `rewind` subcmds + to_register = list(mmif.find_all_modules('mmif.utils.cli')) + # then add my own subcommands + to_register.append(develop) + for cli_module in to_register: + cli_module_name = cli_module.__name__.rsplit('.')[-1] + cli_modules[cli_module_name] = cli_module + subcmd_parser = cli_module.prep_argparser(add_help=False) + subparsers.add_parser(cli_module_name, parents=[subcmd_parser], + help=cli_module.describe_argparser()[0], + description=cli_module.describe_argparser()[1], + formatter_class=argparse.RawDescriptionHelpFormatter, + ) if len(sys.argv) == 1: parser.print_help(sys.stderr) sys.exit(1) args = parser.parse_args() - if args.subcmd == 'source': - source.main(args) - if args.subcmd == 'rewind': - rewind.main(args) - if args.subcmd == 'develop': - develop.main(args) + if args.subcmd not in cli_modules: + parser.print_help(sys.stderr) + else: + cli_modules[args.subcmd].main(args) + +if __name__ == '__main__': + cli() \ No newline at end of file diff --git a/clams/app/__init__.py b/clams/app/__init__.py index d873204..d6c6dcf 100644 --- a/clams/app/__init__.py +++ b/clams/app/__init__.py @@ -404,7 +404,13 @@ def cast(self, args: Dict[str, List[str]]) \ if valuetype == dict: casted.setdefault(k, {}).update(v) else: - casted.setdefault(k, []).append(v) + # pytype will complain about the next line, but it is actually correct + # casted.setdefault(k, []).append(v) + # so doing it in a more explicit way + if k in casted and isinstance(casted[k], list): + casted[k].append(v) + else: + casted[k] = [v] else: casted[k] = v # when an empty value is passed (usually as a default value) diff --git a/clams/appmetadata/__init__.py b/clams/appmetadata/__init__.py index 2265f2e..4a7bd3c 100644 --- a/clams/appmetadata/__init__.py +++ b/clams/appmetadata/__init__.py @@ -3,7 +3,7 @@ import subprocess import sys from pathlib import Path -from typing import Union, Dict, List, Optional, Literal +from typing import Union, Dict, List, Optional, Literal, Any import mmif import pydantic @@ -31,8 +31,8 @@ def get_clams_pyver(): import clams return clams.__version__ except ImportError: - version_fname = os.path.join(os.path.dirname(__file__), '..', '..', 'VERSION') - if os.path.exists(version_fname): + version_fname = Path(__file__).joinpath('../../VERSION') + if version_fname.exists(): with open(version_fname) as version_f: return version_f.read().strip() else: @@ -59,13 +59,21 @@ def get_mmif_specver(): return mmif.__specver__ +def pop_titles(js): + for prop in js.get('properties', {}).values(): + prop.pop('title', None) + + +def jsonschema_versioning(js): + js['$schema'] = pydantic.json_schema.GenerateJsonSchema.schema_dialect + js['$comment'] = f"clams-python SDK {get_clams_pyver()} was used to generate this schema" + + class _BaseModel(pydantic.BaseModel): - class Config: - @staticmethod - def json_schema_extra(schema, model) -> None: - for prop in schema.get('properties', {}).values(): - prop.pop('title', None) + model_config = { + "json_schema_extra": pop_titles + } class Output(_BaseModel): @@ -93,21 +101,53 @@ class Output(_BaseModel): "and also can be used as a expansion specification for the type definition beyond the base " "vocabulary." ) - properties: Dict[str, real_valued_primitives] = pydantic.Field( - {}, + # TODO (krim @ 5/12/21): currently there's no way to validate the property + # types based on vocabulary specification of an annotation type. As a result, + # we allow "any" type and do some basic validation below, but we need a + # better way for validation. + properties: Dict[str, Any] = pydantic.Field( + {}, description="(optional) Specification for type properties, if any. ``\"*\"`` indicates any value." ) + + @pydantic.field_validator('properties', mode='before') + @classmethod + def validate_properties(cls, value): + if not isinstance(value, dict): + raise ValueError("Properties must be a dictionary.") + for key, val in value.items(): + if not isinstance(key, str): + raise ValueError(f"Property key '{key}' must be a string.") + if isinstance(val, list): + if not all(isinstance(item, type(val[0])) for item in val): + raise ValueError(f"All elements in the list for key '{key}' must be of the same type.") + elif isinstance(val, dict): + if not all(isinstance(k, str) for k in val.keys()): + raise ValueError(f"All keys in the dictionary for key '{key}' must be strings.") + if not all(isinstance(v, type(next(iter(val.values())))) for v in val.values()): + raise ValueError(f"All values in the dictionary for key '{key}' must be of the same type.") + return value + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + @pydantic.field_validator('at_type', mode='after') # because pydantic v2 doesn't auto-convert url to string + @classmethod + def stringify(cls, val): + return str(val) - @pydantic.validator('at_type', pre=True) + @pydantic.field_validator('at_type', mode='before') + @classmethod def at_type_must_be_str(cls, v): if not isinstance(v, str): return str(v) return v - class Config: - title = 'CLAMS Output Specification' - extra = 'forbid' - allow_population_by_field_name = True + model_config = { + 'title': 'CLAMS Output Specification', + 'extra': 'forbid', + 'validate_by_name': True, + } def add_description(self, description: str): """ @@ -127,20 +167,21 @@ class Input(Output): Developers should take diligent care to include all input types and their properties in the app metadata. """ - required: bool = pydantic.Field( + required: Optional[bool] = pydantic.Field( None, description="(optional, True by default) Indicating whether this input type is mandatory or optional." ) - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) if self.required is None: self.required = True - class Config: - title = 'CLAMS Input Specification' - extra = 'forbid' - allow_population_by_field_name = True + model_config = { + 'title': 'CLAMS Input Specification', + 'extra': 'forbid', + 'validate_by_name': True, + } class RuntimeParameter(_BaseModel): @@ -178,12 +219,13 @@ class RuntimeParameter(_BaseModel): "desired dictionary is ``{'key1': 'value1', 'key2': 'value2'}``, the default value (used when " "initializing a parameter) should be ``['key1:value1','key2:value2']``\n." ) - choices: List[real_valued_primitives] = pydantic.Field( + choices: Optional[List[real_valued_primitives]] = pydantic.Field( None, description="(optional) List of string values that can be accepted." ) - default: Union[real_valued_primitives, List[real_valued_primitives]] = pydantic.Field( + default: Optional[Union[real_valued_primitives, List[real_valued_primitives]]] = pydantic.Field( None, + union_mode='left_to_right', description="(optional) Default value for the parameter.\n\n" "Notes for developers: \n\n" "Setting a default value makes a parameter `optional`. \n\n" @@ -208,9 +250,10 @@ def __init__(self, **kwargs): if self.multivalued and self.default is not None and not isinstance(self.default, list): self.default = [self.default] - class Config: - title = 'CLAMS App Runtime Parameter' - extra = 'forbid' + model_config = { + 'title': 'CLAMS App Runtime Parameter', + 'extra': 'forbid', + } class AppMetadata(pydantic.BaseModel): @@ -236,18 +279,19 @@ class AppMetadata(pydantic.BaseModel): description="A longer description of the app (what it does, how to use, etc.)." ) app_version: str = pydantic.Field( - default_factory=generate_app_version, + '', # instead of using default_factory, I will use model_validator to set the default value + # this will work around the limitation of exclude_defaults=True condition when serializing description="(AUTO-GENERATED, DO NOT SET MANUALLY)\n\n" "Version of the app.\n\n" "When the metadata is generated using clams-python SDK, this field is automatically filled in" ) mmif_version: str = pydantic.Field( - default_factory=get_mmif_specver, + '', # same as above description="(AUTO-GENERATED, DO NOT SET MANUALLY)\n\n" "Version of MMIF specification the app.\n\n" "When the metadata is generated using clams-python SDK, this field is automatically filled in." ) - analyzer_version: str = pydantic.Field( + analyzer_version: Optional[str] = pydantic.Field( None, description="(optional) Version of an analyzer software, if the app is working as a wrapper for one. " ) @@ -255,7 +299,7 @@ class AppMetadata(pydantic.BaseModel): ..., description="License information of the app." ) - analyzer_license: str = pydantic.Field( + analyzer_license: Optional[str] = pydantic.Field( None, description="(optional) License information of an analyzer software, if the app works as a wrapper for one. " ) @@ -298,7 +342,7 @@ class AppMetadata(pydantic.BaseModel): [], description="List of runtime parameters. Can be empty." ) - dependencies: List[str] = pydantic.Field( + dependencies: Optional[List[str]] = pydantic.Field( None, description="(optional) List of software dependencies of the app. \n\n" "This list is completely optional, as in most cases such dependencies are specified in a separate " @@ -307,36 +351,38 @@ class AppMetadata(pydantic.BaseModel): "List items must be strings, not any kind of structured data. Thus, it is recommended to include " "a package name and its version in the string value at the minimum (e.g., ``clams-python==1.2.3``)." ) - more: Dict[str, str] = pydantic.Field( + more: Optional[Dict[str, str]] = pydantic.Field( None, description="(optional) A string-to-string map that can be used to store any additional metadata of the app." ) - class Config: - title = "CLAMS AppMetadata" - extra = 'forbid' - allow_population_by_field_name = True - - @staticmethod - def json_schema_extra(schema, model) -> None: - for prop in schema.get('properties', {}).values(): - prop.pop('title', None) - schema['$schema'] = "http://json-schema.org/draft-07/schema#" # currently pydantic doesn't natively support the $schema field. See https://github.com/samuelcolvin/pydantic/issues/1478 - schema['$comment'] = f"clams-python SDK {get_clams_pyver()} was used to generate this schema" # this is only to hold version information - - @pydantic.validator('identifier', pre=True) + model_config = { + 'title': 'CLAMS AppMetadata', + 'extra': 'forbid', + 'validate_by_name': True, + 'json_schema_extra': lambda schema, model: [adjust(schema) for adjust in [pop_titles, jsonschema_versioning]], + } + + @pydantic.model_validator(mode='after') + @classmethod + def assign_versions(cls, data): + if data.app_version == '': + data.app_version = generate_app_version() + if data.mmif_version == '': + data.mmif_version = get_mmif_specver() + return data + + @pydantic.field_validator('identifier', mode='before') + @classmethod def append_version(cls, val): prefix = f'{app_directory_baseurl if "/" not in val else""}' suffix = generate_app_version() return '/'.join(map(lambda x: x.strip('/'), filter(None, (prefix, val, suffix)))) - @pydantic.validator('mmif_version', pre=True) - def auto_mmif_version(cls, val): - return get_mmif_specver() - - @pydantic.validator('app_version', pre=True) - def auto_app_version(cls, val): - return generate_app_version() + @pydantic.field_validator('url', 'identifier', mode='after') # because pydantic v2 doesn't auto-convert url to string + @classmethod + def stringify(cls, val): + return str(val) def _check_input_duplicate(self, a_input): for elem in self.input: @@ -400,9 +446,7 @@ def add_output(self, at_type: Union[str, vocabulary.ThingTypesBase], **propertie :param properties: additional property specifications :return: the newly added Output object """ - new = Output(at_type=at_type) - if len(properties) > 0: - new.properties = properties + new = Output(at_type=at_type, properties=properties) if new not in self.output: self.output.append(new) else: @@ -412,7 +456,7 @@ def add_output(self, at_type: Union[str, vocabulary.ThingTypesBase], **propertie def add_parameter(self, name: str, description: str, type: param_value_types, choices: Optional[List[real_valued_primitives]] = None, multivalued: bool = False, - default: Union[real_valued_primitives, List[real_valued_primitives]] = None): + default: Union[None, real_valued_primitives, List[real_valued_primitives]] = None): """ Helper method to add an element to the ``parameters`` list. """ @@ -456,10 +500,7 @@ def add_more(self, key: str, value: str): raise ValueError("Key and value should not be empty!") def jsonify(self, pretty=False): - if pretty: - return self.json(exclude_defaults=True, by_alias=True, indent=2) - else: - return self.json(exclude_defaults=True, by_alias=True) + return self.model_dump_json(exclude_defaults=True, by_alias=True, indent=2 if pretty else None) if __name__ == '__main__': diff --git a/clams/develop/templates/app/metadata.py.template b/clams/develop/templates/app/metadata.py.template index a074d5f..8b1f8c7 100644 --- a/clams/develop/templates/app/metadata.py.template +++ b/clams/develop/templates/app/metadata.py.template @@ -3,6 +3,7 @@ The purpose of this file is to define the metadata of the app with minimal impor DO NOT CHANGE the name of the file """ +import pathlib from mmif import DocumentTypes, AnnotationTypes @@ -36,7 +37,7 @@ def appmetadata() -> AppMetadata: # (but, when the app doesn't implementaion any additional algorithms/model/architecture, but simply use API's of existing, for exmaple, OCR software, it is a wrapper) # if the analyzer is a python app, and it's specified in the requirements.txt # this trick can also be useful (replace ANALYZER_NAME with the pypi dist name) - analyzer_version=[l.strip().rsplit('==')[-1] for l in open('requirements.txt').readlines() if re.match(r'^ANALYZER_NAME==', l)][0], + analyzer_version=[l.strip().rsplit('==')[-1] for l in open(pathlib.Path(__file__).parent / 'requirements.txt').readlines() if re.match(r'^ANALYZER_NAME==', l)][0], analyzer_license="", # short name for a software license ) # and then add I/O specifications: an app must have at least one input and one output diff --git a/clams/mmif_utils/__init__.py b/clams/mmif_utils/__init__.py deleted file mode 100644 index 4ece749..0000000 --- a/clams/mmif_utils/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from clams.mmif_utils import rewind -from clams.mmif_utils import source - diff --git a/clams/mmif_utils/rewind.py b/clams/mmif_utils/rewind.py deleted file mode 100644 index c8bb9e1..0000000 --- a/clams/mmif_utils/rewind.py +++ /dev/null @@ -1,116 +0,0 @@ -""" -This module provides a CLI to rewind a MMIF from a CLAMS pipeline. -""" -import argparse -import sys -import textwrap - -import mmif - - -def prompt_user(mmif_obj: mmif.Mmif) -> int: - """ - Function to ask user to choose the rewind range. - """ - - ## Give a user options (#, "app", "timestamp") - time order - n = len(mmif_obj.views) - i = 0 # option number - aname = "" - a = 0 - # header - print("\n" + "{:<8} {:<8} {:<30} {:<100}".format("view-num", "app-num", "timestamp", "app")) - for view in reversed(mmif_obj.views): - if view.metadata.app != aname: - aname = view.metadata.app - a += 1 - i += 1 - print("{:<8} {:<8} {:<30} {:<100}".format(i, a, str(view.metadata.timestamp), str(view.metadata.app))) - - ## User input - return int(input("\nEnter the number to delete from that point by rewinding: ")) - - -def rewind_mmif(mmif_obj: mmif.Mmif, choice: int, choice_is_viewnum: bool = True) -> mmif.Mmif: - """ - Rewind MMIF by deleting the last N views. - The number of views to rewind is given as a number of "views", or number of "producer apps". - By default, the number argument is interpreted as the number of "views". - Note that when the same app is repeatedly run in a CLAMS pipeline and produces multiple views in a row, - rewinding in "app" mode will rewind all those views at once. - - :param mmif_obj: mmif object - :param choice: number of views to rewind - :param choice_is_viewnum: if True, choice is the number of views to rewind. If False, choice is the number of producer apps to rewind. - :return: rewound mmif object - - """ - if choice_is_viewnum: - for vid in list(v.id for v in mmif_obj.views)[-1:-choice-1:-1]: - mmif_obj.views._items.pop(vid) - else: - app_count = 0 - cur_app = "" - vid_to_pop = [] - for v in reversed(mmif_obj.views): - vid_to_pop.append(v.id) - if app_count >= choice: - break - if v.metadata.app != cur_app: - app_count += 1 - cur_app = v.metadata.app - for vid in vid_to_pop: - mmif_obj.views._items.pop(vid) - return mmif_obj - - -def describe_argparser(): - """ - returns two strings: one-line description of the argparser, and addition material, - which will be shown in `clams --help` and `clams --help`, respectively. - """ - oneliner = 'provides CLI to rewind a MMIF from a CLAMS pipeline.' - additional = textwrap.dedent(""" - MMIF rewinder rewinds a MMIF by deleting the last N views. - N can be specified as a number of views, or a number of producer apps. """) - return oneliner, oneliner + '\n\n' + additional - - -def prep_argparser(**kwargs): - parser = argparse.ArgumentParser(description=describe_argparser()[1], - formatter_class=argparse.RawDescriptionHelpFormatter, **kwargs) - parser.add_argument("IN_MMIF_FILE", - nargs="?", type=argparse.FileType("r"), - default=None if sys.stdin.isatty() else sys.stdin, - help='input MMIF file path, or STDIN if `-` or not provided.') - parser.add_argument("OUT_MMIF_FILE", - nargs="?", type=argparse.FileType("w"), - default=sys.stdout, - help='output MMIF file path, or STDOUT if `-` or not provided.') - parser.add_argument("-p", '--pretty', action='store_true', - help="Pretty-print rewound MMIF") - parser.add_argument("-n", '--number', default="0", type=int, - help="Number of views or apps to rewind, must be a positive integer. " - "If 0, the user will be prompted to choose. (default: 0)") - parser.add_argument("-m", '--mode', choices=['app', 'view'], default='view', - help="Choose to rewind by number of views or number of producer apps. (default: view)") - return parser - - -def main(args): - mmif_obj = mmif.Mmif(args.IN_MMIF_FILE.read()) - - if args.number == 0: # If user doesn't know how many views to rewind, give them choices. - choice = prompt_user(mmif_obj) - else: - choice = args.number - if not isinstance(choice, int) or choice <= 0: - raise ValueError(f"Only can rewind by a positive number of views. Got {choice}.") - - args.OUT_MMIF_FILE.write(rewind_mmif(mmif_obj, choice, args.mode == 'view').serialize(pretty=args.pretty)) - - -if __name__ == "__main__": - parser = prep_argparser() - args = parser.parse_args() - main(args) diff --git a/clams/mmif_utils/source.py b/clams/mmif_utils/source.py deleted file mode 100644 index f07d482..0000000 --- a/clams/mmif_utils/source.py +++ /dev/null @@ -1,296 +0,0 @@ -""" -This module provides a class for creating a "source" MMIF JSON object. -""" -import argparse -import itertools -import json -import pathlib -import sys -import textwrap -from typing import Union, Generator, List, Optional, Iterable -from urllib.parse import urlparse - -from mmif import Mmif, Document, DocumentTypes, __specver__ -from mmif.serialize.mmif import MmifMetadata - -__all__ = ['WorkflowSource'] - -DOC_JSON = Union[str, dict] -DOC = Union[DOC_JSON, Document] -METADATA_JSON = Union[str, dict] -METADATA = Union[METADATA_JSON, MmifMetadata] - - -class WorkflowSource: - """ - A WorkflowSource object is used at the beginning of a - CLAMS workflow to populate a new MMIF file with media. - - The same WorkflowSource object can be used repeatedly - to generate multiple MMIF objects. - - :param common_documents_json: - JSON doc_lists for any documents that should be common - to all MMIF objects produced by this workflow. - - :param common_metadata_json: - JSON doc_lists for metadata that should be common to - all MMIF objects produced by this workflow. - """ - mmif: Mmif - - def __init__( - self, - common_documents_json: Optional[List[DOC_JSON]] = None, - common_metadata_json: Optional[METADATA_JSON] = None - ) -> None: - if common_documents_json is None: - common_documents_json = [] - if common_metadata_json is None: - common_metadata_json = dict() - self.mmif_start: dict = {"documents": [json.loads(document) - if isinstance(document, str) - else document - for document in common_documents_json], - "views": [], - "metadata": { - "mmif": f"http://mmif.clams.ai/{__specver__}", - **common_metadata_json - }} - self.prime() - - def add_document(self, document: Union[str, dict, Document]) -> None: - """ - Adds a document to the working source MMIF. - - When you're done, fetch the source MMIF with produce(). - - :param document: the medium to add, as a JSON dict - or string or as a MMIF Medium object - """ - if isinstance(document, (str, dict)): - document = Document(document) - self.mmif.add_document(document) - - def change_metadata(self, key: str, value): - """ - Adds or changes a metadata entry in the working source MMIF. - - :param key: the desired key of the metadata property - :param value: the desired value of the metadata property - """ - self.mmif.metadata[key] = value - - def prime(self) -> None: - """ - Primes the WorkflowSource with a fresh MMIF object. - - Call this method if you want to reset the WorkflowSource - without producing a MMIF object with produce(). - """ - self.mmif = Mmif(self.mmif_start) - - def produce(self) -> Mmif: - """ - Returns the source MMIF and resets the WorkflowSource. - - Call this method once you have added all the documents - for your Workflow. - - :return: the current MMIF object that has been prepared - """ - source = self.mmif - self.prime() - return source - - def __call__( - self, - documents: Optional[List[DOC]] = None, - metadata: Optional[METADATA] = None - ) -> Mmif: - """ - Callable API that produces a new MMIF object from - this workflow source given a list of documents and - a metadata object. - - Call with no parameters to produce the default MMIF - object from ``self.mmif_start``. - - :param documents: a list of additional documents to add - :param metadata: additional metadata fields to add - :return: the produced MMIF object - """ - if documents is None: - documents = [] - if metadata is None: - metadata = {} - - if isinstance(documents, str): - documents = json.loads(documents) - if isinstance(metadata, MmifMetadata): - metadata = metadata.serialize() # pytype: disable=attribute-error # bug in pytype? (https://github.com/google/pytype/issues/533) - if isinstance(metadata, str): - metadata = json.loads(metadata) - - for document in documents: - self.add_document(document) - for key, value in metadata.items(): - self.change_metadata(key, value) - return self.produce() - - def from_data( - self, - doc_lists: Iterable[List[DOC]], - metadata_objs: Optional[Iterable[Optional[METADATA]]] = None - ) -> Generator[Mmif, None, None]: - """ - Provided with an iterable of document lists and an - optional iterable of metadata objects, generates - MMIF objects produced from that data. - - ``doc_lists`` and ``metadata_objs`` should be matched pairwise, - so that if they are zipped together, each pair defines - a single MMIF object from this workflow source. - - :param doc_lists: an iterable of document lists to generate MMIF from - :param metadata_objs: an iterable of metadata objects paired with the document lists - :return: a generator of produced MMIF files from the data - """ - if metadata_objs is None: - metadata_objs = itertools.repeat(None) - for documents, metadata in zip(doc_lists, metadata_objs): - yield self(documents, metadata) - - def __iter__(self): - """ - Endlessly produces MMIF directly from ``self.mmif_start``. - - If called after adding documents or changing metadata, - these changes are discarded, as the workflow source - gets re-primed. - """ - self.prime() - while True: - yield self.produce() - - -def generate_source_mmif_from_file(documents, prefix=None, scheme='file', **ignored): - at_types = { - 'video': DocumentTypes.VideoDocument, - 'audio': DocumentTypes.AudioDocument, - 'text': DocumentTypes.TextDocument, - 'image': DocumentTypes.ImageDocument - } - pl = WorkflowSource() - if prefix: - prefix = pathlib.PurePosixPath(prefix) - if not prefix.is_absolute(): - raise ValueError(f"prefix must be an absolute path; given \"{prefix}\".") - for doc_id, arg in enumerate(documents, start=1): - arg = arg.strip() - if len(arg) < 1: - continue - result = arg.split(':', maxsplit=1) - if len(result) == 2 and result[0].split('/', maxsplit=1)[0] in at_types: - mime, location = result - else: - raise ValueError( - f'Invalid MIME types, or no MIME type and/or path provided, in argument {doc_id-1} to source' - ) - location_uri = urlparse(location, scheme=scheme) - if location_uri.scheme == 'file': - location = pathlib.PurePosixPath(location_uri.path) - if prefix and location.is_absolute(): - raise ValueError(f"when prefix is used, file location must not be an absolute path; given \"{location}\".") - elif not prefix and not location.is_absolute(): - raise ValueError(f'file location must be an absolute path, or --prefix must be used; given \"{location}\".') - elif prefix and not location.is_absolute(): - location = prefix / location - location = str(location) - doc = Document() - doc.at_type = at_types[mime.split('/', maxsplit=1)[0]] - doc.properties.location = f"{location_uri.scheme}://{location if not location.startswith(location_uri.scheme) else location[len(location_uri.scheme)+3:]}" - doc.properties.id = f'd{doc_id}' - doc.properties.mime = mime - pl.add_document(doc) - return pl.produce().serialize(pretty=True) - - -def describe_argparser(): - """ - returns two strings: one-line description of the argparser, and addition material, - which will be shown in `clams --help` and `clams --help`, respectively. - """ - oneliner = 'provides CLI to create a "source" MMIF json.' - additional = textwrap.dedent(""" - A source MMIF is a MMIF with a list of source documents but empty views. - It can be used as a starting point for a CLAMS workflow. """) - return oneliner, oneliner + '\n\n' + additional - - -def prep_argparser(**kwargs): - import pkgutil - import re - import importlib - discovered_docloc_plugins = { - name[len('mmif_docloc_'):]: importlib.import_module(name) for _, name, _ in pkgutil.iter_modules() if - re.match(r'mmif[-_]docloc[-_]', name) - } - parser = argparse.ArgumentParser(description=describe_argparser()[1], formatter_class=argparse.RawTextHelpFormatter, **kwargs) - parser.add_argument( - 'documents', - default=None, - action='store', - nargs='+', - help='This list of documents MUST be colon-delimited pairs of document types and file locations. A document ' - 'type can be one of `audio`, `video`, `text`, `image`, or a MIME type string (such as video/mp4). The ' - 'file locations MUST be valid URI strings (e.g. `file:///path/to/file.mp4`, or URI scheme part can be ' - 'omitted, when `--scheme` flag is used). Note that when `file://` scheme is used (default), locations ' - 'MUST BE POSIX forms (Windows forms are not supported). The output will be a MMIF file containing a ' - 'document for each of those file paths, with the appropriate ``@type`` and MIME type (if given).' - ) - parser.add_argument( - '-p', '--prefix', - default=None, - metavar='PATH', - nargs='?', - help='An absolute path to use as prefix for file paths (ONLY WORKS with the default `file://` scheme, ignored ' - 'otherwise. MUST BE a POSIX form, Windows form is not supported). If prefix is set, document file paths ' - 'MUST be relative. Useful when creating source MMIF files from a system that\'s different from the ' - 'environment that actually runs the workflow (e.g. in a container).' - ) - parser.add_argument( - '-o', '--output', - default=None, - action='store', - nargs='?', - help='A name of a file to capture a generated MMIF json. When not given, MMIF is printed to stdout.' - ) - scheme_help = 'A scheme to associate with the document location URI. When not given, the default scheme is `file://`.' - if len(discovered_docloc_plugins) > 0: - plugin_help = [f'"{scheme_name}" ({scheme_plugin.help() if "help" in dir(scheme_plugin) else "help msg not provided by developer"})' - for scheme_name, scheme_plugin in discovered_docloc_plugins.items()] - scheme_help += ' (AVAILABLE ADDITIONAL SCHEMES) ' + ', '.join(plugin_help) - parser.add_argument( - '-s', '--scheme', - default='file', - action='store', - nargs='?', - help=scheme_help - ) - return parser - - -def main(args): - if args.output: - out_f = open(args.output, 'w') - else: - out_f = sys.stdout - mmif = generate_source_mmif_from_file(windows_path=False, **vars(args)) - out_f.write(mmif) - return mmif - -if __name__ == '__main__': - parser = prep_argparser() - args = parser.parse_args() - main(args) diff --git a/container/ffmpeg-hf.containerfile b/container/ffmpeg-hf.containerfile new file mode 100644 index 0000000..51a2ab3 --- /dev/null +++ b/container/ffmpeg-hf.containerfile @@ -0,0 +1,7 @@ +ARG clams_version +FROM ghcr.io/clamsproject/clams-python-ffmpeg-torch2:$clams_version +LABEL org.opencontainers.image.description="clams-python-ffmpeg-hf image is shipped with clams-python, ffmpeg, and vairous huggingface libraries (PyTorch backend)" + +RUN pip install --no-cache-dir transformers[torch,tokenizers]==4.* +RUN pip install --no-cache-dir datasets + diff --git a/container/ffmpeg-tf2-hf.containerfile b/container/ffmpeg-tf2-hf.containerfile new file mode 100644 index 0000000..a981a63 --- /dev/null +++ b/container/ffmpeg-tf2-hf.containerfile @@ -0,0 +1,7 @@ +ARG clams_version +FROM ghcr.io/clamsproject/clams-python-ffmpeg-tf2-torch2:$clams_version +LABEL org.opencontainers.image.description="clams-python-ffmpeg-tf2-hf image is shipped with clams-python, ffmpeg, tensorflow2, and vairous huggingface libraries" + +RUN pip install --no-cache-dir transformers[tf,tokenizers]==4.* +RUN pip install --no-cache-dir datasets + diff --git a/container/ffmpeg-tf2.containerfile b/container/ffmpeg-tf2.containerfile index c75a071..428c590 100644 --- a/container/ffmpeg-tf2.containerfile +++ b/container/ffmpeg-tf2.containerfile @@ -1,6 +1,7 @@ ARG clams_version FROM ghcr.io/clamsproject/clams-python-ffmpeg:$clams_version -LABEL org.opencontainers.image.description="clams-python-ffmpeg-tf2 image is shipped with clams-python, ffmpeg and tensorflow2 libraries" +LABEL org.opencontainers.image.description="clams-python-ffmpeg-tf2 image is shipped with clams-python, ffmpeg, and tensorflow2" RUN apt-get install -y build-essential libhdf5-dev RUN pip install --no-cache-dir tensorflow==2.* + diff --git a/container/ffmpeg-torch2.containerfile b/container/ffmpeg-torch2.containerfile index 3ab1c75..5573c68 100644 --- a/container/ffmpeg-torch2.containerfile +++ b/container/ffmpeg-torch2.containerfile @@ -1,5 +1,6 @@ ARG clams_version FROM ghcr.io/clamsproject/clams-python-ffmpeg:$clams_version -LABEL org.opencontainers.image.description="clams-python-ffmpeg-torch image is shipped with clams-python, ffmpeg and PyTorch2" +LABEL org.opencontainers.image.description="clams-python-ffmpeg-torch2 image is shipped with clams-python, ffmpeg, and PyTorch2" RUN pip install --no-cache-dir torch==2.* + diff --git a/container/ffmpeg-torch.containerfile b/container/ffmpeg-transformers4.containerfile similarity index 52% rename from container/ffmpeg-torch.containerfile rename to container/ffmpeg-transformers4.containerfile index e317cc7..4f355a7 100644 --- a/container/ffmpeg-torch.containerfile +++ b/container/ffmpeg-transformers4.containerfile @@ -1,5 +1,5 @@ ARG clams_version FROM ghcr.io/clamsproject/clams-python-ffmpeg:$clams_version -LABEL org.opencontainers.image.description="clams-python-ffmpeg-torch image is shipped with clams-python, ffmpeg and PyTorch" +LABEL org.opencontainers.image.description="clams-python-ffmpeg-transformers4 image is shipped with clams-python, ffmpeg and HF's transformers4" -RUN pip install --no-cache-dir torch==1.* +RUN pip install --no-cache-dir transformers==4.* diff --git a/container/ffmpeg.containerfile b/container/ffmpeg.containerfile index 01d6ad8..5475bca 100644 --- a/container/ffmpeg.containerfile +++ b/container/ffmpeg.containerfile @@ -1,6 +1,7 @@ ARG clams_version FROM ghcr.io/clamsproject/clams-python:$clams_version -LABEL org.opencontainers.image.description="clams-python-ffmpeg image is shipped with clams-python and ffmpeg (+ python binding)" +LABEL org.opencontainers.image.description="clams-python-ffmpeg image is shipped with clams-python and ffmpeg" RUN apt-get update && apt-get install -y ffmpeg RUN pip install --no-cache-dir ffmpeg-python==0.2.* + diff --git a/container/hf.containerfile b/container/hf.containerfile new file mode 100644 index 0000000..a798c98 --- /dev/null +++ b/container/hf.containerfile @@ -0,0 +1,7 @@ +ARG clams_version +FROM ghcr.io/clamsproject/clams-python--torch2:$clams_version +LABEL org.opencontainers.image.description="clams-python-hf image is shipped with clams-python and vairous huggingface libraries (PyTorch backend)" + +RUN pip install --no-cache-dir transformers[torch,tokenizers]==4.* +RUN pip install --no-cache-dir datasets + diff --git a/container/jdk8.containerfile b/container/jdk8.containerfile index 834b15a..e2281e9 100644 --- a/container/jdk8.containerfile +++ b/container/jdk8.containerfile @@ -5,3 +5,4 @@ LABEL org.opencontainers.image.description="clams-python-jdk8 image is shipped w ENV JAVA_HOME=/opt/java/openjdk COPY --from=eclipse-temurin:8 $JAVA_HOME $JAVA_HOME ENV PATH="${JAVA_HOME}/bin:${PATH}" + diff --git a/container/opencv4-hf.containerfile b/container/opencv4-hf.containerfile new file mode 100644 index 0000000..b28670f --- /dev/null +++ b/container/opencv4-hf.containerfile @@ -0,0 +1,7 @@ +ARG clams_version +FROM ghcr.io/clamsproject/clams-python-opencv4-torch2:$clams_version +LABEL org.opencontainers.image.description="clams-python-opencv4-hf image is shipped with clams-python, opencv4 (ffmpeg backend), and vairous huggingface libraries (PyTorch backend)" + +RUN pip install --no-cache-dir transformers[torch,tokenizers]==4.* +RUN pip install --no-cache-dir datasets + diff --git a/container/opencv4-tf2-hf.containerfile b/container/opencv4-tf2-hf.containerfile new file mode 100644 index 0000000..a7fe5f3 --- /dev/null +++ b/container/opencv4-tf2-hf.containerfile @@ -0,0 +1,7 @@ +ARG clams_version +FROM ghcr.io/clamsproject/clams-python-opencv4-tf2-torch2:$clams_version +LABEL org.opencontainers.image.description="clams-python-opencv4-tf2-hf image is shipped with clams-python, opencv4 (ffmpeg backend), tensorflow2, and vairous huggingface libraries" + +RUN pip install --no-cache-dir transformers[tf,tokenizers]==4.* +RUN pip install --no-cache-dir datasets + diff --git a/container/opencv4-tf2.containerfile b/container/opencv4-tf2.containerfile index db5dd34..45c2cc6 100644 --- a/container/opencv4-tf2.containerfile +++ b/container/opencv4-tf2.containerfile @@ -1,6 +1,7 @@ ARG clams_version FROM ghcr.io/clamsproject/clams-python-opencv4:$clams_version -LABEL org.opencontainers.image.description="clams-python-opencv4-tf2 image is shipped with clams-python, ffmpeg, opencv4 and tensorflow2 libraries" +LABEL org.opencontainers.image.description="clams-python-opencv4-tf2 image is shipped with clams-python, opencv4 (ffmpeg backend), and tensorflow2" RUN apt-get install -y build-essential libhdf5-dev RUN pip install --no-cache-dir tensorflow==2.* + diff --git a/container/opencv4-torch2.containerfile b/container/opencv4-torch2.containerfile index 083f1a0..fc9489d 100644 --- a/container/opencv4-torch2.containerfile +++ b/container/opencv4-torch2.containerfile @@ -1,5 +1,6 @@ ARG clams_version FROM ghcr.io/clamsproject/clams-python-opencv4:$clams_version -LABEL org.opencontainers.image.description="clams-python-opencv4-torch image is shipped with clams-python, ffmpeg, opencv4, and PyTorch2" +LABEL org.opencontainers.image.description="clams-python-opencv4-torch2 image is shipped with clams-python, opencv4 (ffmpeg backend), and PyTorch2" RUN pip install --no-cache-dir torch==2.* + diff --git a/container/opencv4-torch.containerfile b/container/opencv4-transformers4.containerfile similarity index 50% rename from container/opencv4-torch.containerfile rename to container/opencv4-transformers4.containerfile index 801ad2e..66c3ccb 100644 --- a/container/opencv4-torch.containerfile +++ b/container/opencv4-transformers4.containerfile @@ -1,5 +1,5 @@ ARG clams_version FROM ghcr.io/clamsproject/clams-python-opencv4:$clams_version -LABEL org.opencontainers.image.description="clams-python-opencv4-torch image is shipped with clams-python, ffmpeg, opencv4, and PyTorch" +LABEL org.opencontainers.image.description="clams-python-opencv4-transformers4 image is shipped with clams-python, ffmpeg, opencv4, and HF's transformers4" -RUN pip install --no-cache-dir torch==1.* +RUN pip install --no-cache-dir transformers==4.* diff --git a/container/opencv4.containerfile b/container/opencv4.containerfile index 6b270c6..5ea7381 100644 --- a/container/opencv4.containerfile +++ b/container/opencv4.containerfile @@ -1,6 +1,6 @@ ARG clams_version -FROM ghcr.io/clamsproject/clams-python-ffmpeg:$clams_version -LABEL org.opencontainers.image.description="clams-python-opencv image is shipped with clams-python, ffmpeg, and opencv4 with their python bindings" +FROM ghcr.io/clamsproject/clams-python--ffmpeg:$clams_version +LABEL org.opencontainers.image.description="clams-python-opencv4 image is shipped with clams-python and opencv4 (ffmpeg backend)" ARG OPENCV_VERSION=4.10.0 ARG OPENCV_PATH=/opt/opencv-${OPENCV_VERSION} @@ -43,3 +43,4 @@ RUN rm -rf ${OPENCV_PATH} ${OPENCV_EXTRA_PATH} RUN pip uninstall opencv-python RUN pip install --no-cache-dir opencv-python-headless~=${OPENCV_VERSION} RUN apt-get remove -y g++ cmake make wget unzip libavcodec-dev libavformat-dev libavutil-dev libswscale-dev && apt-get autoremove -y + diff --git a/container/tf2-hf.containerfile b/container/tf2-hf.containerfile new file mode 100644 index 0000000..9f43e42 --- /dev/null +++ b/container/tf2-hf.containerfile @@ -0,0 +1,7 @@ +ARG clams_version +FROM ghcr.io/clamsproject/clams-python-tf2-torch2:$clams_version +LABEL org.opencontainers.image.description="clams-python-tf2-hf image is shipped with clams-python, tensorflow2, and vairous huggingface libraries" + +RUN pip install --no-cache-dir transformers[tf,tokenizers]==4.* +RUN pip install --no-cache-dir datasets + diff --git a/container/tf2.containerfile b/container/tf2.containerfile index f3a6c8d..1a8f4b1 100644 --- a/container/tf2.containerfile +++ b/container/tf2.containerfile @@ -4,3 +4,4 @@ LABEL org.opencontainers.image.description="clams-python-tf2 image is shipped wi RUN apt-get install -y build-essential libhdf5-dev RUN pip install --no-cache-dir tensorflow==2.* + diff --git a/container/torch.containerfile b/container/torch.containerfile deleted file mode 100644 index 1454d85..0000000 --- a/container/torch.containerfile +++ /dev/null @@ -1,5 +0,0 @@ -ARG clams_version -FROM ghcr.io/clamsproject/clams-python:$clams_version -LABEL org.opencontainers.image.description="clams-python-ffmpeg image is shipped with clams-python and PyTorch" - -RUN pip install --no-cache-dir torch==1.* diff --git a/container/torch2.containerfile b/container/torch2.containerfile index 62ea186..9a867f6 100644 --- a/container/torch2.containerfile +++ b/container/torch2.containerfile @@ -1,5 +1,6 @@ ARG clams_version FROM ghcr.io/clamsproject/clams-python:$clams_version -LABEL org.opencontainers.image.description="clams-python-ffmpeg image is shipped with clams-python and PyTorch2" +LABEL org.opencontainers.image.description="clams-python-torch2 image is shipped with clams-python and PyTorch2" RUN pip install --no-cache-dir torch==2.* + diff --git a/container/transformers4.containerfile b/container/transformers4.containerfile new file mode 100644 index 0000000..d1d01b4 --- /dev/null +++ b/container/transformers4.containerfile @@ -0,0 +1,5 @@ +ARG clams_version +FROM ghcr.io/clamsproject/clams-python:$clams_version +LABEL org.opencontainers.image.description="clams-python-transformers4 image is shipped with clams-python and HF's transformers4" + +RUN pip install --no-cache-dir transformers==4.* diff --git a/documentation/clamsapp.md b/documentation/clamsapp.md index f9967d8..27d1a6e 100644 --- a/documentation/clamsapp.md +++ b/documentation/clamsapp.md @@ -172,6 +172,9 @@ $ curl -H "Accept: application/json" -X POST -d@input.mmif -s http://localhost:5 $ clams source audio:/data/audio/some-audio-file.mp3 | curl -X POST -d@- -s http://localhost:5000 > output.mmif ``` +Windows PowerShell users may encounter an `Invoke-WebRequest` exception when attempting to send an input file with `curl`. +This can be resolved for the duration of the current session by using the command `remove-item alias:curl` before proceeding to use `curl`. + #### Configuring the app Running as an HTTP server, CLAMS Apps are stateless, but can be configured for each HTTP request by providing configuration parameters as [query string](https://en.wikipedia.org/wiki/Query_string). diff --git a/requirements.dev b/requirements.dev index 242b1ac..e5d9f9d 100644 --- a/requirements.dev +++ b/requirements.dev @@ -9,4 +9,4 @@ sphinx-autobuild autodoc m2r2 pillow -setuptools>=62 +setuptools diff --git a/requirements.txt b/requirements.txt index 69e85a6..6e38737 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ -mmif-python==1.0.19 +mmif-python==1.1.1 Flask>=2 Flask-RESTful>=0.3.9 gunicorn>=20 lapps>=0.0.2 -pydantic>=1.8,<2 +pydantic>=2 jsonschema>=3 diff --git a/setup.py b/setup.py index ba4758f..b96a0ea 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,8 @@ #! /usr/bin/env python3 -import distutils.cmd import os from os import path import shutil -import setuptools - name = "clams-python" cmdclass = {} @@ -25,22 +22,7 @@ init_mod.write(f'__version__ = "{version}"') init_mod.close() - -class DoNothing(distutils.cmd.Command): - description = "run base code until `setuptools.setup()` line and exits 0." - user_options = [] - - def initialize_options(self) -> None: - pass - - def finalize_options(self) -> None: - pass - - def run(self): - pass - - -cmdclass['donothing'] = DoNothing +import setuptools setuptools.setup( name=name, @@ -51,12 +33,12 @@ def run(self): long_description=long_desc, long_description_content_type="text/markdown", url="https://clams.ai", + license="Apache-2.0", classifiers=[ - 'Development Status :: 2 - Pre-Alpha', + 'Development Status :: 5 - Production/Stable', 'Framework :: Flask', 'Framework :: Pytest', - 'Intended Audience :: Developers ', - 'License :: OSI Approved :: Apache Software License', + 'Intended Audience :: Developers', 'Programming Language :: Python :: 3 :: Only', ], cmdclass=cmdclass, @@ -66,7 +48,7 @@ def run(self): 'clams': ['develop/templates/**/*', 'develop/templates/**/.*'] }, install_requires=requires, - python_requires='>=3.8', + python_requires='>=3.10', packages=setuptools.find_packages(), entry_points={ 'console_scripts': [ diff --git a/tests/metadata.py b/tests/metadata.py index 5cb7610..454e2fa 100644 --- a/tests/metadata.py +++ b/tests/metadata.py @@ -15,6 +15,5 @@ def appmetadata() -> AppMetadata: ) metadata.add_input(DocumentTypes.TextDocument) metadata.add_input_oneof(DocumentTypes.AudioDocument, str(DocumentTypes.VideoDocument)) - metadata.add_parameter(name='raise_error', description='force raise a ValueError', - type='boolean', default='false') + metadata.add_parameter(name='raise_error', description='force raise a ValueError', type='boolean', default='false') return metadata diff --git a/tests/test_clamscli.py b/tests/test_clamscli.py deleted file mode 100644 index 43f4e99..0000000 --- a/tests/test_clamscli.py +++ /dev/null @@ -1,220 +0,0 @@ -import contextlib -import copy -import io -import os -import unittest -import unittest.mock - -from mmif.serialize import Mmif -from mmif.vocabulary import DocumentTypes, AnnotationTypes - -import clams -from clams.mmif_utils import rewind -from clams.mmif_utils import source - - -class TestCli(unittest.TestCase): - def setUp(self) -> None: - self.parser = clams.prep_argparser() - - def test_clams_cli(self): - stdout = io.StringIO() - with self.assertRaises(SystemExit) as e, contextlib.redirect_stdout(stdout): - self.parser.parse_args("-v".split()) - self.assertEqual(e.exception.code, 0) - self.assertEqual(stdout.getvalue().strip(), - clams.version_template.format(clams.__version__, clams.__specver__)) - - -class TestSource(unittest.TestCase): - - def setUp(self) -> None: - self.parser = clams.source.prep_argparser() - self.prefix = None - self.scheme = None - self.docs = [] - - def get_params(self): - - params = [] - if self.prefix: - params.extend(f'--prefix {self.prefix}'.split()) - if self.scheme: - params.extend(f'--scheme {self.scheme}'.split()) - params.extend(self.docs) - return params - - def generate_source_mmif(self): - - # to suppress output (otherwise, set to stdout by default - args = self.parser.parse_args(self.get_params()) - args.output = os.devnull - - return source.main(args) - - def test_accept_file_paths(self): - self.docs.append("video:/a/b/c.mp4") - self.docs.append('text:/a/b/c.txt') - source_mmif = Mmif(self.generate_source_mmif()) - self.assertEqual(len(source_mmif.documents), 2) - self.assertTrue(all(map(lambda x: x.location_scheme() == 'file', source_mmif.documents))) - - # relative path - self.docs.append('audio:a/b/c.mp3') - with self.assertRaises(ValueError): - self.generate_source_mmif() - - @unittest.mock.patch('os.name', 'nt') - def test_on_windows(self): - self.test_accept_file_paths() - - def test_accept_prefixed_file_paths(self): - self.prefix = '/a/b' - self.docs.append("video:c.mp4") - self.docs.append("text:c.txt") - source_mmif = Mmif(self.generate_source_mmif()) - self.assertEqual(len(source_mmif.documents), 2) - - # absolute path + prefix flag - self.docs.append("audio:/c.mp3") - with self.assertRaises(ValueError): - self.generate_source_mmif() - - def test_reject_relative_prefix(self): - self.prefix = '/' - self.docs.append("video:c.mp4") - source_mmif = Mmif(self.generate_source_mmif()) - self.assertEqual(len(source_mmif.documents), 1) - - self.prefix = '.' - with self.assertRaises(ValueError): - self.generate_source_mmif() - - def test_reject_unknown_mime(self): - self.docs.append("unknown_mime/more_unknown:/c.mp4") - with self.assertRaises(ValueError): - self.generate_source_mmif() - - def test_accept_scheme_files(self): - self.scheme = 'baapb' - self.docs.append("video:cpb-aacip-123-4567890.video") - self.docs.append("audio:cpb-aacip-111-1111111.audio") - source_mmif = Mmif(self.generate_source_mmif()) - self.assertEqual(len(source_mmif.documents), 2) - self.assertTrue(all(map(lambda x: x.location_scheme() == self.scheme, source_mmif.documents))) - - def test_generate_mixed_scheme(self): - self.scheme = 'baapb' - self.docs.append("video:file:///data/cpb-aacip-123-4567890.mp4") - self.docs.append("audio:cpb-aacip-111-1111111.audio") - source_mmif = Mmif(self.generate_source_mmif()) - self.assertEqual(len(source_mmif.documents), 2) - schemes = set(doc.location_scheme() for doc in source_mmif.documents) - self.assertEqual(len(schemes), 2) - self.assertTrue('baapb' in schemes) - self.assertTrue('file' in schemes) - - -class TestRewind(unittest.TestCase): - def setUp(self): - self.dummy_app_one = ExampleApp() - self.dummy_app_one.metadata.identifier = "dummy_app_one" - self.dummy_app_two = ExampleApp() - self.dummy_app_two.metadata.identifier = "dummy_app_two" - - # mmif we add views to - self.mmif_one = Mmif( - { - "metadata": {"mmif": "http://mmif.clams.ai/1.0.0"}, - "documents": [], - "views": [], - } - ) - - # baseline empty mmif for comparison - self.empty_mmif = Mmif( - { - "metadata": {"mmif": "http://mmif.clams.ai/1.0.0"}, - "documents": [], - "views": [], - } - ) - - def test_view_rewind(self): - """ - Tests the use of "view-rewiding" to remove multiple views from a single app. - """ - # Regular Case - mmif_added_views = self.dummy_app_one.mmif_add_views(self.mmif_one, 10) - self.assertEqual(len(mmif_added_views.views), 10) - rewound = rewind.rewind_mmif(mmif_added_views, 5) - self.assertEqual(len(rewound.views), 5) - # rewinding is done "in-place" - self.assertEqual(len(rewound.views), len(mmif_added_views.views)) - - def test_app_rewind(self): - # Regular Case - app_one_views = 3 - app_two_views = 2 - app_one_out = self.dummy_app_one.mmif_add_views(self.mmif_one, app_one_views) - app_two_out = self.dummy_app_two.mmif_add_views(app_one_out, app_two_views) - self.assertEqual(len(app_two_out.views), app_one_views + app_two_views) - rewound = rewind.rewind_mmif(app_two_out, 1, choice_is_viewnum=False) - self.assertEqual(len(rewound.views), app_one_views) - -def compare_views(a: Mmif, b: Mmif) -> bool: - perfect_match = True - for view_a, view_b in zip(a.views, b.views): - if view_a != view_b: - perfect_match = False - return perfect_match - - -class ExampleApp(clams.app.ClamsApp): - """This is a barebones implementation of a CLAMS App - used to generate simple Views within a mmif object - for testing purposes. The three methods here all streamline - the mmif annotation process for the purposes of repeated insertion - and removal. - """ - - app_version = "lorem_ipsum" - - def _appmetadata(self): - pass - - def _annotate(self, mmif: Mmif, message: str, idx: int, **kwargs): - if type(mmif) is not Mmif: - mmif_obj = Mmif(mmif, validate=False) - else: - mmif_obj = mmif - - new_view = mmif_obj.new_view() - self.sign_view(new_view, runtime_conf=kwargs) - self.gen_annotate(new_view, message, idx) - - d1 = DocumentTypes.VideoDocument - d2 = DocumentTypes.from_str(f"{str(d1)[:-1]}99") - if mmif.get_documents_by_type(d2): - new_view.new_annotation(AnnotationTypes.TimePoint, "tp1") - if "raise_error" in kwargs and kwargs["raise_error"]: - raise ValueError - return mmif - - def gen_annotate(self, mmif_view, message, idx=0): - mmif_view.new_contain( - AnnotationTypes.TimeFrame, **{"producer": "dummy-producer"} - ) - ann = mmif_view.new_annotation( - AnnotationTypes.TimeFrame, "a1", start=10, end=99 - ) - ann.add_property("f1", message) - - def mmif_add_views(self, mmif_obj, idx: int): - """Helper Function to add an arbitrary number of views to a mmif""" - for i in range(idx): - mmif_obj = self._annotate(mmif_obj, message=f"message {i}", idx=idx) - return mmif_obj - -if __name__ == '__main__': - unittest.main()