diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ click chardet file_magic +pyld diff --git a/swh/indexer/codemeta.py b/swh/indexer/codemeta.py new file mode 100644 --- /dev/null +++ b/swh/indexer/codemeta.py @@ -0,0 +1,95 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import csv +import json +import os.path + +import swh.indexer +from pyld import jsonld + +_DATA_DIR = os.path.join(os.path.dirname(swh.indexer.__file__), 'data') + +CROSSWALK_TABLE_PATH = os.path.join(_DATA_DIR, 'codemeta', 'crosswalk.csv') + +CODEMETA_CONTEXT_PATH = os.path.join(_DATA_DIR, 'codemeta', 'codemeta.jsonld') + + +with open(CODEMETA_CONTEXT_PATH) as fd: + CODEMETA_CONTEXT = json.load(fd) + +CODEMETA_CONTEXT_URL = 'https://doi.org/10.5063/schema/codemeta-2.0' +CODEMETA_URI = 'https://codemeta.github.io/terms/' + + +# CodeMeta properties that we cannot properly represent. +PROPERTY_BLACKLIST = { + 'https://codemeta.github.io/terms/softwareRequirements', + 'https://codemeta.github.io/terms/softwareSuggestions', + } + + +def _read_crosstable(fd): + reader = csv.reader(fd) + try: + header = next(reader) + except StopIteration: + raise ValueError('empty file') + + data_sources = set(header) - {'Parent Type', 'Property', + 'Type', 'Description'} + assert 'codemeta-V1' in data_sources + + codemeta_translation = {data_source: {} for data_source in data_sources} + + for line in reader: # For each canonical name + canonical_name = CODEMETA_URI + dict(zip(header, line))['Property'] + if canonical_name in PROPERTY_BLACKLIST: + continue + for (col, value) in zip(header, line): # For each cell in the row + if col in data_sources: + # If that's not the parentType/property/type/description + for local_name in value.split('/'): + # For each of the data source's properties that maps + # to this canonical name + if local_name.strip(): + codemeta_translation[col][local_name.strip()] = \ + canonical_name + + return codemeta_translation + + +with open(CROSSWALK_TABLE_PATH) as fd: + CROSSWALK_TABLE = _read_crosstable(fd) + + +def _document_loader(url): + """Document loader for pyld. + + Reads the local codemeta.jsonld file instead of fetching it + from the Internet every single time.""" + if url == CODEMETA_CONTEXT_URL: + return { + 'contextUrl': None, + 'documentUrl': url, + 'document': CODEMETA_CONTEXT, + } + elif url == CODEMETA_URI: + raise Exception('{} is CodeMeta\'s URI, use {} as context url'.format( + CODEMETA_URI, CODEMETA_CONTEXT_URL)) + else: + raise Exception(url) + + +def compact(doc): + """Same as `pyld.jsonld.compact`, but in the context of CodeMeta.""" + return jsonld.compact(doc, CODEMETA_CONTEXT_URL, + options={'documentLoader': _document_loader}) + + +def expand(doc): + """Same as `pyld.jsonld.expand`, but in the context of CodeMeta.""" + return jsonld.expand(doc, + options={'documentLoader': _document_loader}) diff --git a/swh/indexer/data/codemeta/CITATION b/swh/indexer/data/codemeta/CITATION new file mode 100644 --- /dev/null +++ b/swh/indexer/data/codemeta/CITATION @@ -0,0 +1,2 @@ +Matthew B. Jones, Carl Boettiger, Abby Cabunoc Mayes, Arfon Smith, Peter Slaughter, Kyle Niemeyer, Yolanda Gil, Martin Fenner, Krzysztof Nowak, Mark Hahnel, Luke Coy, Alice Allen, Mercè Crosas, Ashley Sands, Neil Chue Hong, Patricia Cruse, Daniel S. Katz, Carole Goble. 2017. CodeMeta: an exchange schema for software metadata. Version 2.0. KNB Data Repository. doi:10.5063/schema/codemeta-2.0 +swh:1:dir:39c509fd2002f9e531fb4b3a321ceb5e6994e54a;origin=https://github.com/codemeta/codemeta diff --git a/swh/indexer/data/codemeta/codemeta.jsonld b/swh/indexer/data/codemeta/codemeta.jsonld new file mode 100644 --- /dev/null +++ b/swh/indexer/data/codemeta/codemeta.jsonld @@ -0,0 +1,80 @@ +{ + "@context": { + "type": "@type", + "id": "@id", + "schema":"http://schema.org/", + "codemeta": "https://codemeta.github.io/terms/", + "Organization": {"@id": "schema:Organization"}, + "Person": {"@id": "schema:Person"}, + "SoftwareSourceCode": {"@id": "schema:SoftwareSourceCode"}, + "SoftwareApplication": {"@id": "schema:SoftwareApplication"}, + "Text": {"@id": "schema:Text"}, + "URL": {"@id": "schema:URL"}, + "address": { "@id": "schema:address"}, + "affiliation": { "@id": "schema:affiliation"}, + "applicationCategory": { "@id": "schema:applicationCategory", "@type": "@id"}, + "applicationSubCategory": { "@id": "schema:applicationSubCategory", "@type": "@id"}, + "citation": { "@id": "schema:citation"}, + "codeRepository": { "@id": "schema:codeRepository", "@type": "@id"}, + "contributor": { "@id": "schema:contributor"}, + "copyrightHolder": { "@id": "schema:copyrightHolder"}, + "copyrightYear": { "@id": "schema:copyrightYear"}, + "creator": { "@id": "schema:creator"}, + "dateCreated": {"@id": "schema:dateCreated", "@type": "schema:Date" }, + "dateModified": {"@id": "schema:dateModified", "@type": "schema:Date" }, + "datePublished": {"@id": "schema:datePublished", "@type": "schema:Date" }, + "description": { "@id": "schema:description"}, + "downloadUrl": { "@id": "schema:downloadUrl", "@type": "@id"}, + "email": { "@id": "schema:email"}, + "editor": { "@id": "schema:editor"}, + "encoding": { "@id": "schema:encoding"}, + "familyName": { "@id": "schema:familyName"}, + "fileFormat": { "@id": "schema:fileFormat", "@type": "@id"}, + "fileSize": { "@id": "schema:fileSize"}, + "funder": { "@id": "schema:funder"}, + "givenName": { "@id": "schema:givenName"}, + "hasPart": { "@id": "schema:hasPart" }, + "identifier": { "@id": "schema:identifier", "@type": "@id"}, + "installUrl": { "@id": "schema:installUrl", "@type": "@id"}, + "isAccessibleForFree": { "@id": "schema:isAccessibleForFree"}, + "isPartOf": { "@id": "schema:isPartOf"}, + "keywords": { "@id": "schema:keywords"}, + "license": { "@id": "schema:license", "@type": "@id"}, + "memoryRequirements": { "@id": "schema:memoryRequirements", "@type": "@id"}, + "name": { "@id": "schema:name"}, + "operatingSystem": { "@id": "schema:operatingSystem"}, + "permissions": { "@id": "schema:permissions"}, + "position": { "@id": "schema:position"}, + "processorRequirements": { "@id": "schema:processorRequirements"}, + "producer": { "@id": "schema:producer"}, + "programmingLanguage": { "@id": "schema:programmingLanguage"}, + "provider": { "@id": "schema:provider"}, + "publisher": { "@id": "schema:publisher"}, + "relatedLink": { "@id": "schema:relatedLink", "@type": "@id"}, + "releaseNotes": { "@id": "schema:releaseNotes", "@type": "@id"}, + "runtimePlatform": { "@id": "schema:runtimePlatform"}, + "sameAs": { "@id": "schema:sameAs", "@type": "@id"}, + "softwareHelp": { "@id": "schema:softwareHelp"}, + "softwareRequirements": { "@id": "schema:softwareRequirements", "@type": "@id"}, + "softwareVersion": { "@id": "schema:softwareVersion"}, + "sponsor": { "@id": "schema:sponsor"}, + "storageRequirements": { "@id": "schema:storageRequirements", "@type": "@id"}, + "supportingData": { "@id": "schema:supportingData"}, + "targetProduct": { "@id": "schema:targetProduct"}, + "url": { "@id": "schema:url", "@type": "@id"}, + "version": { "@id": "schema:version"}, + + "author": { "@id": "schema:author", "@container": "@list" }, + + "softwareSuggestions": { "@id": "codemeta:softwareSuggestions", "@type": "@id"}, + "contIntegration": { "@id": "codemeta:contIntegration", "@type": "@id"}, + "buildInstructions": { "@id": "codemeta:buildInstructions", "@type": "@id"}, + "developmentStatus": { "@id": "codemeta:developmentStatus", "@type": "@id"}, + "embargoDate": { "@id":"codemeta:embargoDate", "@type": "schema:Date" }, + "funding": { "@id": "codemeta:funding" }, + "readme": { "@id":"codemeta:readme", "@type": "@id" }, + "issueTracker": { "@id":"codemeta:issueTracker", "@type": "@id" }, + "referencePublication": { "@id": "codemeta:referencePublication", "@type": "@id"}, + "maintainer": { "@id": "codemeta:maintainer" } + } +} diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py --- a/swh/indexer/metadata.py +++ b/swh/indexer/metadata.py @@ -160,7 +160,7 @@ - id (str): rev's identifier (sha1_git) - indexer_configuration_id (bytes): tool used - - translated_metadata (bytes): dict of retrieved metadata + - translated_metadata: dict of retrieved metadata """ try: @@ -175,7 +175,7 @@ files = [entry for entry in dir_ls if entry['type'] == 'file'] detected_files = detect_metadata(files) result['translated_metadata'] = self.translate_revision_metadata( - detected_files) + detected_files) except Exception as e: self.log.exception( 'Problem when indexing rev: %r', e) diff --git a/swh/indexer/metadata_detector.py b/swh/indexer/metadata_detector.py --- a/swh/indexer/metadata_detector.py +++ b/swh/indexer/metadata_detector.py @@ -3,7 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information - +from swh.indexer.codemeta import compact, expand, CODEMETA_URI from swh.indexer.metadata_dictionary import MAPPINGS @@ -25,6 +25,15 @@ return results +_MINIMAL_PROPERTY_SET = { + "developmentStatus", "version", "operatingSystem", "description", + "keywords", "issueTracker", "name", "author", "relatedLink", + "url", "license", "maintainer", "email", "identifier", + "codeRepository"} + +MINIMAL_METADATA_SET = {CODEMETA_URI+prop for prop in _MINIMAL_PROPERTY_SET} + + def extract_minimal_metadata_dict(metadata_list): """ Every item in the metadata_list is a dict of translated_metadata in the @@ -37,29 +46,13 @@ Returns: - minimal_dict (dict): one dict with selected values of metadata """ - minimal_dict = { - "developmentStatus": [], - "version": [], - "operatingSystem": [], - "description": [], - "keywords": [], - "issueTracker": [], - "name": [], - "author": [], - "relatedLink": [], - "url": [], - "license": [], - "maintainer": [], - "email": [], - "softwareRequirements": [], - "identifier": [], - "codeRepository": [] - } - for term in minimal_dict.keys(): - for metadata_item in metadata_list: - if term in metadata_item: - if not metadata_item[term] in minimal_dict[term]: - minimal_dict[term].append(metadata_item[term]) - if not minimal_dict[term]: - minimal_dict[term] = None - return minimal_dict + minimal_dict = {} + for document in metadata_list: + for metadata_item in expand(document): + for (term, value) in metadata_item.items(): + if term in MINIMAL_METADATA_SET: + if term not in minimal_dict: + minimal_dict[term] = [value] + elif value not in minimal_dict[term]: + minimal_dict[term].append(value) + return compact(minimal_dict) diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py --- a/swh/indexer/metadata_dictionary.py +++ b/swh/indexer/metadata_dictionary.py @@ -4,47 +4,10 @@ # See top-level LICENSE file for more information import abc -import csv import json -import os.path import logging -import swh.indexer - -CROSSWALK_TABLE_PATH = os.path.join(os.path.dirname(swh.indexer.__file__), - 'data', 'codemeta', 'crosswalk.csv') - - -def read_crosstable(fd): - reader = csv.reader(fd) - try: - header = next(reader) - except StopIteration: - raise ValueError('empty file') - - data_sources = set(header) - {'Parent Type', 'Property', - 'Type', 'Description'} - assert 'codemeta-V1' in data_sources - - codemeta_translation = {data_source: {} for data_source in data_sources} - - for line in reader: # For each canonical name - canonical_name = dict(zip(header, line))['Property'] - for (col, value) in zip(header, line): # For each cell in the row - if col in data_sources: - # If that's not the parentType/property/type/description - for local_name in value.split('/'): - # For each of the data source's properties that maps - # to this canonical name - if local_name.strip(): - codemeta_translation[col][local_name.strip()] = \ - canonical_name - - return codemeta_translation - - -with open(CROSSWALK_TABLE_PATH) as fd: - CROSSWALK_TABLE = read_crosstable(fd) +from swh.indexer.codemeta import CROSSWALK_TABLE, compact MAPPINGS = {} @@ -85,6 +48,9 @@ def translate(self, file_content): pass + def normalize_translation(self, metadata): + return compact(metadata) + class DictMapping(BaseMapping): """Base class for mappings that take as input a file that is mostly @@ -110,33 +76,24 @@ """ translated_metadata = {} - default = 'other' - translated_metadata['other'] = {} - try: - for k, v in content_dict.items(): - try: - term = self.mapping.get(k, default) - if term not in translated_metadata: - translated_metadata[term] = v - continue - if isinstance(translated_metadata[term], str): - in_value = translated_metadata[term] - translated_metadata[term] = [in_value, v] - continue - if isinstance(translated_metadata[term], list): - translated_metadata[term].append(v) - continue - if isinstance(translated_metadata[term], dict): - translated_metadata[term][k] = v - continue - except KeyError: - self.log.exception( - "Problem during item mapping") - continue - except Exception: - raise - return None - return translated_metadata + for k, v in content_dict.items(): + # First, check if there is a specific translation + # method for this key + translation_method = getattr(self, 'translate_' + k, None) + if translation_method: + translation_method(translated_metadata, v) + elif k in self.mapping: + # if there is no method, but the key is known from the + # crosswalk table + + # if there is a normalization method, use it on the value + normalization_method = getattr(self, 'normalize_' + k, None) + if normalization_method: + v = normalization_method(v) + + # set the translation metadata with the normalized value + translated_metadata[self.mapping[k]] = v + return self.normalize_translation(translated_metadata) class JsonMapping(DictMapping): @@ -188,6 +145,12 @@ mapping = CROSSWALK_TABLE['NodeJS'] filename = b'package.json' + def normalize_repository(self, d): + return '{type}+{url}'.format(**d) + + def normalize_bugs(self, d): + return '{url}'.format(**d) + @register_mapping class CodemetaMapping(JsonMapping): diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -84,27 +84,22 @@ def test_crosstable(self): self.assertEqual(CROSSWALK_TABLE['NodeJS'], { - 'repository': 'codeRepository', - 'os': 'operatingSystem', - 'cpu': 'processorRequirements', - 'engines': 'processorRequirements', - 'dependencies': 'softwareRequirements', - 'bundleDependencies': 'softwareRequirements', - 'bundledDependencies': 'softwareRequirements', - 'peerDependencies': 'softwareRequirements', - 'author': 'creator', - 'author.email': 'email', - 'author.name': 'name', - 'contributor': 'contributor', - 'keywords': 'keywords', - 'license': 'license', - 'version': 'version', - 'description': 'description', - 'name': 'name', - 'devDependencies': 'softwareSuggestions', - 'optionalDependencies': 'softwareSuggestions', - 'bugs': 'issueTracker', - 'homepage': 'url' + 'repository': 'https://codemeta.github.io/terms/codeRepository', + 'os': 'https://codemeta.github.io/terms/operatingSystem', + 'cpu': 'https://codemeta.github.io/terms/processorRequirements', + 'engines': + 'https://codemeta.github.io/terms/processorRequirements', + 'author': 'https://codemeta.github.io/terms/creator', + 'author.email': 'https://codemeta.github.io/terms/email', + 'author.name': 'https://codemeta.github.io/terms/name', + 'contributor': 'https://codemeta.github.io/terms/contributor', + 'keywords': 'https://codemeta.github.io/terms/keywords', + 'license': 'https://codemeta.github.io/terms/license', + 'version': 'https://codemeta.github.io/terms/version', + 'description': 'https://codemeta.github.io/terms/description', + 'name': 'https://codemeta.github.io/terms/name', + 'bugs': 'https://codemeta.github.io/terms/issueTracker', + 'homepage': 'https://codemeta.github.io/terms/url' }) def test_compute_metadata_none(self): @@ -139,14 +134,12 @@ } """ declared_metadata = { - 'name': 'test_metadata', - 'version': '0.0.2', - 'description': 'Simple package.json test for indexer', - 'codeRepository': { - 'type': 'git', - 'url': 'https://github.com/moranegg/metadata_test' - }, - 'other': {} + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'codemeta:name': 'test_metadata', + 'codemeta:version': '0.0.2', + 'codemeta:description': 'Simple package.json test for indexer', + 'codemeta:codeRepository': + 'git+https://github.com/moranegg/metadata_test', } # when @@ -160,28 +153,24 @@ """ # given metadata_list = [{ - 'name': 'test_1', - 'version': '0.0.2', - 'description': 'Simple package.json test for indexer', - 'codeRepository': { - 'type': 'git', - 'url': 'https://github.com/moranegg/metadata_test' - }, - 'other': {} + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'codemeta:name': 'test_1', + 'codemeta:version': '0.0.2', + 'codemeta:description': 'Simple package.json test for indexer', + 'codemeta:codeRepository': + 'git+https://github.com/moranegg/metadata_test', }, { - 'name': 'test_0_1', - 'version': '0.0.2', - 'description': 'Simple package.json test for indexer', - 'codeRepository': { - 'type': 'git', - 'url': 'https://github.com/moranegg/metadata_test' - }, - 'other': {} + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'codemeta:name': 'test_0_1', + 'codemeta:version': '0.0.2', + 'codemeta:description': 'Simple package.json test for indexer', + 'codemeta:codeRepository': + 'git+https://github.com/moranegg/metadata_test' }, { - 'name': 'test_metadata', - 'version': '0.0.2', - 'author': 'moranegg', - 'other': {} + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'codemeta:name': 'test_metadata', + 'codemeta:version': '0.0.2', + 'codemeta:author': 'moranegg', }] # when @@ -189,25 +178,13 @@ # then expected_results = { - "developmentStatus": None, - "version": ['0.0.2'], - "operatingSystem": None, - "description": ['Simple package.json test for indexer'], - "keywords": None, - "issueTracker": None, - "name": ['test_1', 'test_0_1', 'test_metadata'], - "author": ['moranegg'], - "relatedLink": None, - "url": None, - "license": None, - "maintainer": None, - "email": None, - "softwareRequirements": None, - "identifier": None, - "codeRepository": [{ - 'type': 'git', - 'url': 'https://github.com/moranegg/metadata_test' - }] + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + "codemeta:version": '0.0.2', + "codemeta:description": 'Simple package.json test for indexer', + "codemeta:name": ['test_1', 'test_0_1', 'test_metadata'], + "codemeta:author": 'moranegg', + "codemeta:codeRepository": + 'git+https://github.com/moranegg/metadata_test', } self.assertEqual(expected_results, results) @@ -233,56 +210,35 @@ expected_results = [('content_metadata', False, [{ 'indexer_configuration_id': 30, 'translated_metadata': { - 'other': {}, - 'codeRepository': { - 'type': 'git', - 'url': 'https://github.com/moranegg/metadata_test' - }, - 'description': 'Simple package.json test for indexer', - 'name': 'test_metadata', - 'version': '0.0.1' + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'codemeta:codeRepository': + 'git+https://github.com/moranegg/metadata_test', + 'codemeta:description': 'Simple package.json test for indexer', + 'codemeta:name': 'test_metadata', + 'codemeta:version': '0.0.1' }, 'id': '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5' }, { 'indexer_configuration_id': 30, 'translated_metadata': { - 'softwareRequirements': { - 'JSONStream': '~1.3.1', - 'abbrev': '~1.1.0', - 'ansi-regex': '~2.1.1', - 'ansicolors': '~0.3.2', - 'ansistyles': '~0.1.3' - }, - 'issueTracker': { - 'url': 'https://github.com/npm/npm/issues' - }, - 'creator': + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'codemeta:issueTracker': + 'https://github.com/npm/npm/issues', + 'codemeta:creator': 'Isaac Z. Schlueter (http://blog.izs.me)', - 'codeRepository': { - 'type': 'git', - 'url': 'https://github.com/npm/npm' - }, - 'description': 'a package manager for JavaScript', - 'softwareSuggestions': { - 'tacks': '~1.2.6', - 'tap': '~10.3.2' - }, - 'license': 'Artistic-2.0', - 'version': '5.0.3', - 'other': { - 'preferGlobal': True, - 'config': { - 'publishtest': False - } - }, - 'name': 'npm', - 'keywords': [ + 'codemeta:codeRepository': + 'git+https://github.com/npm/npm', + 'codemeta:description': 'a package manager for JavaScript', + 'codemeta:license': 'Artistic-2.0', + 'codemeta:version': '5.0.3', + 'codemeta:name': 'npm', + 'codemeta:keywords': [ 'install', 'modules', 'package manager', 'package.json' ], - 'url': 'https://docs.npmjs.com/' + 'codemeta:url': 'https://docs.npmjs.com/' }, 'id': 'd4c647f0fc257591cc9ba1722484229780d1c607' }, { @@ -342,35 +298,20 @@ expected_results = [('revision_metadata', True, [{ 'id': '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f', 'translated_metadata': { - 'identifier': None, - 'maintainer': None, - 'url': [ - 'https://github.com/librariesio/yarn-parser#readme' - ], - 'codeRepository': [{ - 'type': 'git', - 'url': 'git+https://github.com/librariesio/yarn-parser.git' - }], - 'author': ['Andrew Nesbitt'], - 'license': ['AGPL-3.0'], - 'version': ['1.0.0'], - 'description': [ - 'Tiny web service for parsing yarn.lock files' - ], - 'relatedLink': None, - 'developmentStatus': None, - 'operatingSystem': None, - 'issueTracker': [{ - 'url': 'https://github.com/librariesio/yarn-parser/issues' - }], - 'softwareRequirements': [{ - 'express': '^4.14.0', - 'yarn': '^0.21.0', - 'body-parser': '^1.15.2' - }], - 'name': ['yarn-parser'], - 'keywords': [['yarn', 'parse', 'lock', 'dependencies']], - 'email': None + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'codemeta:url': + 'https://github.com/librariesio/yarn-parser#readme', + 'codemeta:codeRepository': + 'git+https://github.com/librariesio/yarn-parser.git', + 'codemeta:author': 'Andrew Nesbitt', + 'codemeta:license': 'AGPL-3.0', + 'codemeta:version': '1.0.0', + 'codemeta:description': + 'Tiny web service for parsing yarn.lock files', + 'codemeta:issueTracker': + 'https://github.com/librariesio/yarn-parser/issues', + 'codemeta:name': 'yarn-parser', + 'codemeta:keywords': ['yarn', 'parse', 'lock', 'dependencies'], }, 'indexer_configuration_id': 7 }])] diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py --- a/swh/indexer/tests/test_origin_metadata.py +++ b/swh/indexer/tests/test_origin_metadata.py @@ -93,35 +93,20 @@ self.run_ready_tasks() # Run the second task metadata = { - 'identifier': None, - 'maintainer': None, - 'url': [ - 'https://github.com/librariesio/yarn-parser#readme' - ], - 'codeRepository': [{ - 'type': 'git', - 'url': 'git+https://github.com/librariesio/yarn-parser.git' - }], - 'author': ['Andrew Nesbitt'], - 'license': ['AGPL-3.0'], - 'version': ['1.0.0'], - 'description': [ - 'Tiny web service for parsing yarn.lock files' - ], - 'relatedLink': None, - 'developmentStatus': None, - 'operatingSystem': None, - 'issueTracker': [{ - 'url': 'https://github.com/librariesio/yarn-parser/issues' - }], - 'softwareRequirements': [{ - 'express': '^4.14.0', - 'yarn': '^0.21.0', - 'body-parser': '^1.15.2' - }], - 'name': ['yarn-parser'], - 'keywords': [['yarn', 'parse', 'lock', 'dependencies']], - 'email': None + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'codemeta:url': + 'https://github.com/librariesio/yarn-parser#readme', + 'codemeta:codeRepository': + 'git+https://github.com/librariesio/yarn-parser.git', + 'codemeta:author': 'Andrew Nesbitt', + 'codemeta:license': 'AGPL-3.0', + 'codemeta:version': '1.0.0', + 'codemeta:description': + 'Tiny web service for parsing yarn.lock files', + 'codemeta:issueTracker': + 'https://github.com/librariesio/yarn-parser/issues', + 'codemeta:name': 'yarn-parser', + 'codemeta:keywords': ['yarn', 'parse', 'lock', 'dependencies'], } rev_metadata = { 'id': '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f', diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py --- a/swh/indexer/tests/test_utils.py +++ b/swh/indexer/tests/test_utils.py @@ -299,31 +299,21 @@ }, 'id': b'cde', 'translated_metadata': { - 'issueTracker': { - 'url': 'https://github.com/librariesio/yarn-parser/issues' - }, - 'version': '1.0.0', - 'name': 'yarn-parser', - 'author': 'Andrew Nesbitt', - 'url': 'https://github.com/librariesio/yarn-parser#readme', - 'processorRequirements': {'node': '7.5'}, - 'other': { - 'scripts': { - 'start': 'node index.js' - }, - 'main': 'index.js' - }, - 'license': 'AGPL-3.0', - 'keywords': ['yarn', 'parse', 'lock', 'dependencies'], - 'codeRepository': { - 'type': 'git', - 'url': 'git+https://github.com/librariesio/yarn-parser.git' - }, - 'description': 'Tiny web service for parsing yarn.lock files', - 'softwareRequirements': { - 'yarn': '^0.21.0', - 'express': '^4.14.0', - 'body-parser': '^1.15.2'} + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'codemeta:issueTracker': + 'https://github.com/librariesio/yarn-parser/issues', + 'codemeta:version': '1.0.0', + 'codemeta:name': 'yarn-parser', + 'codemeta:author': 'Andrew Nesbitt', + 'codemeta:url': + 'https://github.com/librariesio/yarn-parser#readme', + 'codemeta:processorRequirements': {'node': '7.5'}, + 'codemeta:license': 'AGPL-3.0', + 'codemeta:keywords': ['yarn', 'parse', 'lock', 'dependencies'], + 'codemeta:codeRepository': + 'git+https://github.com/librariesio/yarn-parser.git', + 'codemeta:description': + 'Tiny web service for parsing yarn.lock files', } }] @@ -339,9 +329,8 @@ if origin[k] != v: break else: - # This block is run if and only if we didn't break, - # ie. if all supplied parts of the id are set to the - # expected value. + # This block is run iff we didn't break, ie. if all supplied + # parts of the id are set to the expected value. return origin assert False, id_