diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ click chardet file_magic +pyld diff --git a/swh/indexer/codemeta.py b/swh/indexer/codemeta.py new file mode 100644 --- /dev/null +++ b/swh/indexer/codemeta.py @@ -0,0 +1,99 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import csv +import json +import os.path + +import swh.indexer +from pyld import jsonld + +_DATA_DIR = os.path.join(os.path.dirname(swh.indexer.__file__), 'data') + +CROSSWALK_TABLE_PATH = os.path.join(_DATA_DIR, 'codemeta', 'crosswalk.csv') + +CODEMETA_CONTEXT_PATH = os.path.join(_DATA_DIR, 'codemeta', 'codemeta.jsonld') + + +with open(CODEMETA_CONTEXT_PATH) as fd: + CODEMETA_CONTEXT = json.load(fd) + +CODEMETA_CONTEXT_URL = 'https://doi.org/10.5063/schema/codemeta-2.0' +CODEMETA_URI = 'https://codemeta.github.io/terms/' +SCHEMA_URI = 'http://schema.org/' + + +PROPERTY_BLACKLIST = { + # CodeMeta properties that we cannot properly represent. + CODEMETA_URI + 'softwareRequirements', + CODEMETA_URI + 'softwareSuggestions', + + # Duplicate of 'author' + CODEMETA_URI + 'creator', + } + + +def _read_crosstable(fd): + reader = csv.reader(fd) + try: + header = next(reader) + except StopIteration: + raise ValueError('empty file') + + data_sources = set(header) - {'Parent Type', 'Property', + 'Type', 'Description'} + assert 'codemeta-V1' in data_sources + + codemeta_translation = {data_source: {} for data_source in data_sources} + + for line in reader: # For each canonical name + canonical_name = CODEMETA_URI + dict(zip(header, line))['Property'] + if canonical_name in PROPERTY_BLACKLIST: + continue + for (col, value) in zip(header, line): # For each cell in the row + if col in data_sources: + # If that's not the parentType/property/type/description + for local_name in value.split('/'): + # For each of the data source's properties that maps + # to this canonical name + if local_name.strip(): + codemeta_translation[col][local_name.strip()] = \ + canonical_name + + return codemeta_translation + + +with open(CROSSWALK_TABLE_PATH) as fd: + CROSSWALK_TABLE = _read_crosstable(fd) + + +def _document_loader(url): + """Document loader for pyld. + + Reads the local codemeta.jsonld file instead of fetching it + from the Internet every single time.""" + if url == CODEMETA_CONTEXT_URL: + return { + 'contextUrl': None, + 'documentUrl': url, + 'document': CODEMETA_CONTEXT, + } + elif url == CODEMETA_URI: + raise Exception('{} is CodeMeta\'s URI, use {} as context url'.format( + CODEMETA_URI, CODEMETA_CONTEXT_URL)) + else: + raise Exception(url) + + +def compact(doc): + """Same as `pyld.jsonld.compact`, but in the context of CodeMeta.""" + return jsonld.compact(doc, CODEMETA_CONTEXT_URL, + options={'documentLoader': _document_loader}) + + +def expand(doc): + """Same as `pyld.jsonld.expand`, but in the context of CodeMeta.""" + return jsonld.expand(doc, + options={'documentLoader': _document_loader}) diff --git a/swh/indexer/data/codemeta/codemeta.jsonld b/swh/indexer/data/codemeta/codemeta.jsonld new file mode 100644 --- /dev/null +++ b/swh/indexer/data/codemeta/codemeta.jsonld @@ -0,0 +1,80 @@ +{ + "@context": { + "type": "@type", + "id": "@id", + "schema":"http://schema.org/", + "codemeta": "https://codemeta.github.io/terms/", + "Organization": {"@id": "schema:Organization"}, + "Person": {"@id": "schema:Person"}, + "SoftwareSourceCode": {"@id": "schema:SoftwareSourceCode"}, + "SoftwareApplication": {"@id": "schema:SoftwareApplication"}, + "Text": {"@id": "schema:Text"}, + "URL": {"@id": "schema:URL"}, + "address": { "@id": "schema:address"}, + "affiliation": { "@id": "schema:affiliation"}, + "applicationCategory": { "@id": "schema:applicationCategory", "@type": "@id"}, + "applicationSubCategory": { "@id": "schema:applicationSubCategory", "@type": "@id"}, + "citation": { "@id": "schema:citation"}, + "codeRepository": { "@id": "schema:codeRepository", "@type": "@id"}, + "contributor": { "@id": "schema:contributor"}, + "copyrightHolder": { "@id": "schema:copyrightHolder"}, + "copyrightYear": { "@id": "schema:copyrightYear"}, + "creator": { "@id": "schema:creator"}, + "dateCreated": {"@id": "schema:dateCreated", "@type": "schema:Date" }, + "dateModified": {"@id": "schema:dateModified", "@type": "schema:Date" }, + "datePublished": {"@id": "schema:datePublished", "@type": "schema:Date" }, + "description": { "@id": "schema:description"}, + "downloadUrl": { "@id": "schema:downloadUrl", "@type": "@id"}, + "email": { "@id": "schema:email"}, + "editor": { "@id": "schema:editor"}, + "encoding": { "@id": "schema:encoding"}, + "familyName": { "@id": "schema:familyName"}, + "fileFormat": { "@id": "schema:fileFormat", "@type": "@id"}, + "fileSize": { "@id": "schema:fileSize"}, + "funder": { "@id": "schema:funder"}, + "givenName": { "@id": "schema:givenName"}, + "hasPart": { "@id": "schema:hasPart" }, + "identifier": { "@id": "schema:identifier", "@type": "@id"}, + "installUrl": { "@id": "schema:installUrl", "@type": "@id"}, + "isAccessibleForFree": { "@id": "schema:isAccessibleForFree"}, + "isPartOf": { "@id": "schema:isPartOf"}, + "keywords": { "@id": "schema:keywords"}, + "license": { "@id": "schema:license", "@type": "@id"}, + "memoryRequirements": { "@id": "schema:memoryRequirements", "@type": "@id"}, + "name": { "@id": "schema:name"}, + "operatingSystem": { "@id": "schema:operatingSystem"}, + "permissions": { "@id": "schema:permissions"}, + "position": { "@id": "schema:position"}, + "processorRequirements": { "@id": "schema:processorRequirements"}, + "producer": { "@id": "schema:producer"}, + "programmingLanguage": { "@id": "schema:programmingLanguage"}, + "provider": { "@id": "schema:provider"}, + "publisher": { "@id": "schema:publisher"}, + "relatedLink": { "@id": "schema:relatedLink", "@type": "@id"}, + "releaseNotes": { "@id": "schema:releaseNotes", "@type": "@id"}, + "runtimePlatform": { "@id": "schema:runtimePlatform"}, + "sameAs": { "@id": "schema:sameAs", "@type": "@id"}, + "softwareHelp": { "@id": "schema:softwareHelp"}, + "softwareRequirements": { "@id": "schema:softwareRequirements", "@type": "@id"}, + "softwareVersion": { "@id": "schema:softwareVersion"}, + "sponsor": { "@id": "schema:sponsor"}, + "storageRequirements": { "@id": "schema:storageRequirements", "@type": "@id"}, + "supportingData": { "@id": "schema:supportingData"}, + "targetProduct": { "@id": "schema:targetProduct"}, + "url": { "@id": "schema:url", "@type": "@id"}, + "version": { "@id": "schema:version"}, + + "author": { "@id": "schema:author", "@container": "@list" }, + + "softwareSuggestions": { "@id": "codemeta:softwareSuggestions", "@type": "@id"}, + "contIntegration": { "@id": "codemeta:contIntegration", "@type": "@id"}, + "buildInstructions": { "@id": "codemeta:buildInstructions", "@type": "@id"}, + "developmentStatus": { "@id": "codemeta:developmentStatus", "@type": "@id"}, + "embargoDate": { "@id":"codemeta:embargoDate", "@type": "schema:Date" }, + "funding": { "@id": "codemeta:funding" }, + "readme": { "@id":"codemeta:readme", "@type": "@id" }, + "issueTracker": { "@id":"codemeta:issueTracker", "@type": "@id" }, + "referencePublication": { "@id": "codemeta:referencePublication", "@type": "@id"}, + "maintainer": { "@id": "codemeta:maintainer" } + } +} diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py --- a/swh/indexer/metadata.py +++ b/swh/indexer/metadata.py @@ -160,7 +160,7 @@ - id (str): rev's identifier (sha1_git) - indexer_configuration_id (bytes): tool used - - translated_metadata (bytes): dict of retrieved metadata + - translated_metadata: dict of retrieved metadata """ try: @@ -175,7 +175,7 @@ files = [entry for entry in dir_ls if entry['type'] == 'file'] detected_files = detect_metadata(files) result['translated_metadata'] = self.translate_revision_metadata( - detected_files) + detected_files) except Exception as e: self.log.exception( 'Problem when indexing rev: %r', e) diff --git a/swh/indexer/metadata_detector.py b/swh/indexer/metadata_detector.py --- a/swh/indexer/metadata_detector.py +++ b/swh/indexer/metadata_detector.py @@ -3,7 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information - +from swh.indexer.codemeta import compact, expand, CODEMETA_URI from swh.indexer.metadata_dictionary import MAPPINGS @@ -25,6 +25,15 @@ return results +_MINIMAL_PROPERTY_SET = { + "developmentStatus", "version", "operatingSystem", "description", + "keywords", "issueTracker", "name", "author", "relatedLink", + "url", "license", "maintainer", "email", "identifier", + "codeRepository"} + +MINIMAL_METADATA_SET = {CODEMETA_URI+prop for prop in _MINIMAL_PROPERTY_SET} + + def extract_minimal_metadata_dict(metadata_list): """ Every item in the metadata_list is a dict of translated_metadata in the @@ -37,29 +46,13 @@ Returns: - minimal_dict (dict): one dict with selected values of metadata """ - minimal_dict = { - "developmentStatus": [], - "version": [], - "operatingSystem": [], - "description": [], - "keywords": [], - "issueTracker": [], - "name": [], - "author": [], - "relatedLink": [], - "url": [], - "license": [], - "maintainer": [], - "email": [], - "softwareRequirements": [], - "identifier": [], - "codeRepository": [] - } - for term in minimal_dict.keys(): - for metadata_item in metadata_list: - if term in metadata_item: - if not metadata_item[term] in minimal_dict[term]: - minimal_dict[term].append(metadata_item[term]) - if not minimal_dict[term]: - minimal_dict[term] = None - return minimal_dict + minimal_dict = {} + for document in metadata_list: + for metadata_item in expand(document): + for (term, value) in metadata_item.items(): + if term in MINIMAL_METADATA_SET: + if term not in minimal_dict: + minimal_dict[term] = [value] + elif value not in minimal_dict[term]: + minimal_dict[term].append(value) + return compact(minimal_dict) diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py --- a/swh/indexer/metadata_dictionary.py +++ b/swh/indexer/metadata_dictionary.py @@ -3,48 +3,12 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import re import abc -import csv import json -import os.path import logging -import swh.indexer - -CROSSWALK_TABLE_PATH = os.path.join(os.path.dirname(swh.indexer.__file__), - 'data', 'codemeta', 'crosswalk.csv') - - -def read_crosstable(fd): - reader = csv.reader(fd) - try: - header = next(reader) - except StopIteration: - raise ValueError('empty file') - - data_sources = set(header) - {'Parent Type', 'Property', - 'Type', 'Description'} - assert 'codemeta-V1' in data_sources - - codemeta_translation = {data_source: {} for data_source in data_sources} - - for line in reader: # For each canonical name - canonical_name = dict(zip(header, line))['Property'] - for (col, value) in zip(header, line): # For each cell in the row - if col in data_sources: - # If that's not the parentType/property/type/description - for local_name in value.split('/'): - # For each of the data source's properties that maps - # to this canonical name - if local_name.strip(): - codemeta_translation[col][local_name.strip()] = \ - canonical_name - - return codemeta_translation - - -with open(CROSSWALK_TABLE_PATH) as fd: - CROSSWALK_TABLE = read_crosstable(fd) +from swh.indexer.codemeta import CROSSWALK_TABLE, CODEMETA_URI, compact MAPPINGS = {} @@ -85,6 +49,9 @@ def translate(self, file_content): pass + def normalize_translation(self, metadata): + return compact(metadata) + class DictMapping(BaseMapping): """Base class for mappings that take as input a file that is mostly @@ -110,33 +77,24 @@ """ translated_metadata = {} - default = 'other' - translated_metadata['other'] = {} - try: - for k, v in content_dict.items(): - try: - term = self.mapping.get(k, default) - if term not in translated_metadata: - translated_metadata[term] = v - continue - if isinstance(translated_metadata[term], str): - in_value = translated_metadata[term] - translated_metadata[term] = [in_value, v] - continue - if isinstance(translated_metadata[term], list): - translated_metadata[term].append(v) - continue - if isinstance(translated_metadata[term], dict): - translated_metadata[term][k] = v - continue - except KeyError: - self.log.exception( - "Problem during item mapping") - continue - except Exception: - raise - return None - return translated_metadata + for k, v in content_dict.items(): + # First, check if there is a specific translation + # method for this key + translation_method = getattr(self, 'translate_' + k, None) + if translation_method: + translation_method(translated_metadata, v) + elif k in self.mapping: + # if there is no method, but the key is known from the + # crosswalk table + + # if there is a normalization method, use it on the value + normalization_method = getattr(self, 'normalize_' + k, None) + if normalization_method: + v = normalization_method(v) + + # set the translation metadata with the normalized value + translated_metadata[self.mapping[k]] = v + return self.normalize_translation(translated_metadata) class JsonMapping(DictMapping): @@ -188,6 +146,64 @@ mapping = CROSSWALK_TABLE['NodeJS'] filename = b'package.json' + _schema_shortcuts = { + 'github': 'https://github.com/', + 'gist': 'https://gist.github.com/', + 'bitbucket': 'https://bitbucket.org/', + 'gitlab': 'https://gitlab.com/', + } + + def normalize_repository(self, d): + """https://docs.npmjs.com/files/package.json#repository""" + if isinstance(d, dict): + return '{type}+{url}'.format(**d) + elif isinstance(d, str): + if '://' in d: + return d + elif ':' in d: + (schema, rest) = d.split(':', 1) + if schema in self._schema_shortcuts: + return self._schema_shortcuts[schema] + rest + else: + return None + else: + return self._schema_shortcuts['github'] + d + + else: + return None + + def normalize_bugs(self, d): + return '{url}'.format(**d) + + _parse_author = re.compile(r'^ *' + r'(?P.*?)' + r'( +<(?P.*)>)?' + r'( +\((?P.*)\))?' + r' *$') + + def normalize_author(self, d): + 'https://docs.npmjs.com/files/package.json' \ + '#people-fields-author-contributors' + author = {'@type': CODEMETA_URI+'Person'} + if isinstance(d, dict): + name = d.get('name', None) + email = d.get('email', None) + url = d.get('url', None) + elif isinstance(d, str): + match = self._parse_author.match(d) + name = match.group('name') + email = match.group('email') + url = match.group('url') + else: + return None + if name: + author[CODEMETA_URI+'name'] = name + if email: + author[CODEMETA_URI+'email'] = email + if url: + author[CODEMETA_URI+'url'] = url + return author + @register_mapping class CodemetaMapping(JsonMapping): diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -84,27 +84,22 @@ def test_crosstable(self): self.assertEqual(CROSSWALK_TABLE['NodeJS'], { - 'repository': 'codeRepository', - 'os': 'operatingSystem', - 'cpu': 'processorRequirements', - 'engines': 'processorRequirements', - 'dependencies': 'softwareRequirements', - 'bundleDependencies': 'softwareRequirements', - 'bundledDependencies': 'softwareRequirements', - 'peerDependencies': 'softwareRequirements', - 'author': 'creator', - 'author.email': 'email', - 'author.name': 'name', - 'contributor': 'contributor', - 'keywords': 'keywords', - 'license': 'license', - 'version': 'version', - 'description': 'description', - 'name': 'name', - 'devDependencies': 'softwareSuggestions', - 'optionalDependencies': 'softwareSuggestions', - 'bugs': 'issueTracker', - 'homepage': 'url' + 'repository': 'https://codemeta.github.io/terms/codeRepository', + 'os': 'https://codemeta.github.io/terms/operatingSystem', + 'cpu': 'https://codemeta.github.io/terms/processorRequirements', + 'engines': + 'https://codemeta.github.io/terms/processorRequirements', + 'author': 'https://codemeta.github.io/terms/author', + 'author.email': 'https://codemeta.github.io/terms/email', + 'author.name': 'https://codemeta.github.io/terms/name', + 'contributor': 'https://codemeta.github.io/terms/contributor', + 'keywords': 'https://codemeta.github.io/terms/keywords', + 'license': 'https://codemeta.github.io/terms/license', + 'version': 'https://codemeta.github.io/terms/version', + 'description': 'https://codemeta.github.io/terms/description', + 'name': 'https://codemeta.github.io/terms/name', + 'bugs': 'https://codemeta.github.io/terms/issueTracker', + 'homepage': 'https://codemeta.github.io/terms/url' }) def test_compute_metadata_none(self): @@ -135,18 +130,25 @@ "repository": { "type": "git", "url": "https://github.com/moranegg/metadata_test" + }, + "author": { + "email": "moranegg@example.com", + "name": "Morane G" } } """ declared_metadata = { - 'name': 'test_metadata', - 'version': '0.0.2', - 'description': 'Simple package.json test for indexer', - 'codeRepository': { - 'type': 'git', - 'url': 'https://github.com/moranegg/metadata_test' - }, - 'other': {} + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'codemeta:name': 'test_metadata', + 'codemeta:version': '0.0.2', + 'codemeta:description': 'Simple package.json test for indexer', + 'codemeta:codeRepository': + 'git+https://github.com/moranegg/metadata_test', + 'codemeta:author': { + 'type': 'codemeta:Person', + 'codemeta:name': 'Morane G', + 'codemeta:email': 'moranegg@example.com', + }, } # when @@ -160,28 +162,24 @@ """ # given metadata_list = [{ - 'name': 'test_1', - 'version': '0.0.2', - 'description': 'Simple package.json test for indexer', - 'codeRepository': { - 'type': 'git', - 'url': 'https://github.com/moranegg/metadata_test' - }, - 'other': {} + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'codemeta:name': 'test_1', + 'codemeta:version': '0.0.2', + 'codemeta:description': 'Simple package.json test for indexer', + 'codemeta:codeRepository': + 'git+https://github.com/moranegg/metadata_test', }, { - 'name': 'test_0_1', - 'version': '0.0.2', - 'description': 'Simple package.json test for indexer', - 'codeRepository': { - 'type': 'git', - 'url': 'https://github.com/moranegg/metadata_test' - }, - 'other': {} + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'codemeta:name': 'test_0_1', + 'codemeta:version': '0.0.2', + 'codemeta:description': 'Simple package.json test for indexer', + 'codemeta:codeRepository': + 'git+https://github.com/moranegg/metadata_test' }, { - 'name': 'test_metadata', - 'version': '0.0.2', - 'author': 'moranegg', - 'other': {} + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'codemeta:name': 'test_metadata', + 'codemeta:version': '0.0.2', + 'codemeta:author': 'moranegg', }] # when @@ -189,25 +187,13 @@ # then expected_results = { - "developmentStatus": None, - "version": ['0.0.2'], - "operatingSystem": None, - "description": ['Simple package.json test for indexer'], - "keywords": None, - "issueTracker": None, - "name": ['test_1', 'test_0_1', 'test_metadata'], - "author": ['moranegg'], - "relatedLink": None, - "url": None, - "license": None, - "maintainer": None, - "email": None, - "softwareRequirements": None, - "identifier": None, - "codeRepository": [{ - 'type': 'git', - 'url': 'https://github.com/moranegg/metadata_test' - }] + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + "codemeta:version": '0.0.2', + "codemeta:description": 'Simple package.json test for indexer', + "codemeta:name": ['test_1', 'test_0_1', 'test_metadata'], + "codemeta:author": 'moranegg', + "codemeta:codeRepository": + 'git+https://github.com/moranegg/metadata_test', } self.assertEqual(expected_results, results) @@ -233,56 +219,39 @@ expected_results = [('content_metadata', False, [{ 'indexer_configuration_id': 30, 'translated_metadata': { - 'other': {}, - 'codeRepository': { - 'type': 'git', - 'url': 'https://github.com/moranegg/metadata_test' - }, - 'description': 'Simple package.json test for indexer', - 'name': 'test_metadata', - 'version': '0.0.1' + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'codemeta:codeRepository': + 'git+https://github.com/moranegg/metadata_test', + 'codemeta:description': 'Simple package.json test for indexer', + 'codemeta:name': 'test_metadata', + 'codemeta:version': '0.0.1' }, 'id': '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5' }, { 'indexer_configuration_id': 30, 'translated_metadata': { - 'softwareRequirements': { - 'JSONStream': '~1.3.1', - 'abbrev': '~1.1.0', - 'ansi-regex': '~2.1.1', - 'ansicolors': '~0.3.2', - 'ansistyles': '~0.1.3' + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'codemeta:issueTracker': + 'https://github.com/npm/npm/issues', + 'codemeta:author': { + 'type': 'codemeta:Person', + 'codemeta:name': 'Isaac Z. Schlueter', + 'codemeta:email': 'i@izs.me', + 'codemeta:url': 'http://blog.izs.me', }, - 'issueTracker': { - 'url': 'https://github.com/npm/npm/issues' - }, - 'creator': - 'Isaac Z. Schlueter (http://blog.izs.me)', - 'codeRepository': { - 'type': 'git', - 'url': 'https://github.com/npm/npm' - }, - 'description': 'a package manager for JavaScript', - 'softwareSuggestions': { - 'tacks': '~1.2.6', - 'tap': '~10.3.2' - }, - 'license': 'Artistic-2.0', - 'version': '5.0.3', - 'other': { - 'preferGlobal': True, - 'config': { - 'publishtest': False - } - }, - 'name': 'npm', - 'keywords': [ + 'codemeta:codeRepository': + 'git+https://github.com/npm/npm', + 'codemeta:description': 'a package manager for JavaScript', + 'codemeta:license': 'Artistic-2.0', + 'codemeta:version': '5.0.3', + 'codemeta:name': 'npm', + 'codemeta:keywords': [ 'install', 'modules', 'package manager', 'package.json' ], - 'url': 'https://docs.npmjs.com/' + 'codemeta:url': 'https://docs.npmjs.com/' }, 'id': 'd4c647f0fc257591cc9ba1722484229780d1c607' }, { @@ -342,35 +311,20 @@ expected_results = [('revision_metadata', True, [{ 'id': '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f', 'translated_metadata': { - 'identifier': None, - 'maintainer': None, - 'url': [ - 'https://github.com/librariesio/yarn-parser#readme' - ], - 'codeRepository': [{ - 'type': 'git', - 'url': 'git+https://github.com/librariesio/yarn-parser.git' - }], - 'author': ['Andrew Nesbitt'], - 'license': ['AGPL-3.0'], - 'version': ['1.0.0'], - 'description': [ - 'Tiny web service for parsing yarn.lock files' - ], - 'relatedLink': None, - 'developmentStatus': None, - 'operatingSystem': None, - 'issueTracker': [{ - 'url': 'https://github.com/librariesio/yarn-parser/issues' - }], - 'softwareRequirements': [{ - 'express': '^4.14.0', - 'yarn': '^0.21.0', - 'body-parser': '^1.15.2' - }], - 'name': ['yarn-parser'], - 'keywords': [['yarn', 'parse', 'lock', 'dependencies']], - 'email': None + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'codemeta:url': + 'https://github.com/librariesio/yarn-parser#readme', + 'codemeta:codeRepository': + 'git+https://github.com/librariesio/yarn-parser.git', + 'codemeta:author': 'Andrew Nesbitt', + 'codemeta:license': 'AGPL-3.0', + 'codemeta:version': '1.0.0', + 'codemeta:description': + 'Tiny web service for parsing yarn.lock files', + 'codemeta:issueTracker': + 'https://github.com/librariesio/yarn-parser/issues', + 'codemeta:name': 'yarn-parser', + 'codemeta:keywords': ['yarn', 'parse', 'lock', 'dependencies'], }, 'indexer_configuration_id': 7 }])] diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py --- a/swh/indexer/tests/test_origin_metadata.py +++ b/swh/indexer/tests/test_origin_metadata.py @@ -93,35 +93,20 @@ self.run_ready_tasks() # Run the second task metadata = { - 'identifier': None, - 'maintainer': None, - 'url': [ - 'https://github.com/librariesio/yarn-parser#readme' - ], - 'codeRepository': [{ - 'type': 'git', - 'url': 'git+https://github.com/librariesio/yarn-parser.git' - }], - 'author': ['Andrew Nesbitt'], - 'license': ['AGPL-3.0'], - 'version': ['1.0.0'], - 'description': [ - 'Tiny web service for parsing yarn.lock files' - ], - 'relatedLink': None, - 'developmentStatus': None, - 'operatingSystem': None, - 'issueTracker': [{ - 'url': 'https://github.com/librariesio/yarn-parser/issues' - }], - 'softwareRequirements': [{ - 'express': '^4.14.0', - 'yarn': '^0.21.0', - 'body-parser': '^1.15.2' - }], - 'name': ['yarn-parser'], - 'keywords': [['yarn', 'parse', 'lock', 'dependencies']], - 'email': None + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'codemeta:url': + 'https://github.com/librariesio/yarn-parser#readme', + 'codemeta:codeRepository': + 'git+https://github.com/librariesio/yarn-parser.git', + 'codemeta:author': 'Andrew Nesbitt', + 'codemeta:license': 'AGPL-3.0', + 'codemeta:version': '1.0.0', + 'codemeta:description': + 'Tiny web service for parsing yarn.lock files', + 'codemeta:issueTracker': + 'https://github.com/librariesio/yarn-parser/issues', + 'codemeta:name': 'yarn-parser', + 'codemeta:keywords': ['yarn', 'parse', 'lock', 'dependencies'], } rev_metadata = { 'id': '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f', diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py --- a/swh/indexer/tests/test_utils.py +++ b/swh/indexer/tests/test_utils.py @@ -299,31 +299,21 @@ }, 'id': b'cde', 'translated_metadata': { - 'issueTracker': { - 'url': 'https://github.com/librariesio/yarn-parser/issues' - }, - 'version': '1.0.0', - 'name': 'yarn-parser', - 'author': 'Andrew Nesbitt', - 'url': 'https://github.com/librariesio/yarn-parser#readme', - 'processorRequirements': {'node': '7.5'}, - 'other': { - 'scripts': { - 'start': 'node index.js' - }, - 'main': 'index.js' - }, - 'license': 'AGPL-3.0', - 'keywords': ['yarn', 'parse', 'lock', 'dependencies'], - 'codeRepository': { - 'type': 'git', - 'url': 'git+https://github.com/librariesio/yarn-parser.git' - }, - 'description': 'Tiny web service for parsing yarn.lock files', - 'softwareRequirements': { - 'yarn': '^0.21.0', - 'express': '^4.14.0', - 'body-parser': '^1.15.2'} + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'codemeta:issueTracker': + 'https://github.com/librariesio/yarn-parser/issues', + 'codemeta:version': '1.0.0', + 'codemeta:name': 'yarn-parser', + 'codemeta:author': 'Andrew Nesbitt', + 'codemeta:url': + 'https://github.com/librariesio/yarn-parser#readme', + 'codemeta:processorRequirements': {'node': '7.5'}, + 'codemeta:license': 'AGPL-3.0', + 'codemeta:keywords': ['yarn', 'parse', 'lock', 'dependencies'], + 'codemeta:codeRepository': + 'git+https://github.com/librariesio/yarn-parser.git', + 'codemeta:description': + 'Tiny web service for parsing yarn.lock files', } }] @@ -339,9 +329,8 @@ if origin[k] != v: break else: - # This block is run if and only if we didn't break, - # ie. if all supplied parts of the id are set to the - # expected value. + # This block is run iff we didn't break, ie. if all supplied + # parts of the id are set to the expected value. return origin assert False, id_