diff --git a/swh/indexer/data/package-json/CITATION b/swh/indexer/data/package-json/CITATION new file mode 100644 --- /dev/null +++ b/swh/indexer/data/package-json/CITATION @@ -0,0 +1 @@ +swh:1:dir:49dd6f75450a37243dfcc4b418ca5bf5e0010748;origin=https://github.com/Bartvds/package.json-schema diff --git a/swh/indexer/data/package-json/LICENSE b/swh/indexer/data/package-json/LICENSE new file mode 100644 --- /dev/null +++ b/swh/indexer/data/package-json/LICENSE @@ -0,0 +1,22 @@ +Copyright (c) 2014 Bart van der Schoor + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. diff --git a/swh/indexer/data/package-json/schema.json b/swh/indexer/data/package-json/schema.json new file mode 100644 --- /dev/null +++ b/swh/indexer/data/package-json/schema.json @@ -0,0 +1,377 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema", + "id": "lib://package.json", + "title": "package.json-schema", + "description": "JSON Schema for node/npm package.json", + "$ref": "lib://package.json#/definitions/standard", + "definitions": { + "minimal": { + "allOf": [ + { + "$ref": "lib://package.json#/definitions/structure" + }, + { + "required": [ + "name", + "version" + ] + } + ] + }, + "standard": { + "allOf": [ + { + "$ref": "lib://package.json#/definitions/structure" + }, + { + "required": [ + "name", + "version", + "description", + "keywords", + "author", + "homepage", + "repository", + "bugs", + "licenses", + "engines", + "main", + "scripts", + "dependencies", + "devDependencies" + ], + "properties": { + "scripts": { + "type": "object", + "properties": { + "test": { + "type" : "string", + "pattern": "[a-zA-Z]" + } + } + }, + "author": { + "$ref": "lib://package.json#/definitions/person-object" + }, + "contributors": { + "type": "array", + "items": { + "$ref": "lib://package.json#/definitions/person-object" + } + }, + "maintainers": { + "type": "array", + "items": { + "$ref": "lib://package.json#/definitions/person-object" + } + } + } + } + ] + }, + "structure": { + "type": "object", + "properties": { + "name": { + "$ref": "lib://package.json#/definitions/name" + }, + "version": { + "$ref": "lib://package.json#/definitions/semver" + }, + "description": { + "type": "string", + "minLength": 1 + }, + "keywords": { + "type": "array", + "uniqueItems": true, + "items": { + "$ref": "lib://package.json#/definitions/name" + } + }, + "author": { + "$ref": "lib://package.json#/definitions/person" + }, + "contributors": { + "type": "array", + "uniqueItems": true, + "items": { + "$ref": "lib://package.json#/definitions/person" + } + }, + "maintainers": { + "type": "array", + "uniqueItems": true, + "items": { + "$ref": "lib://package.json#/definitions/person" + } + }, + "homepage": { + "$ref": "lib://package.json#/definitions/uri-http" + }, + "repository": { + "$ref": "lib://package.json#/definitions/repository" + }, + "man": { + "oneOf": [ + { + "$ref": "lib://package.json#/definitions/path" + }, + { + "type": "array", + "uniqueItems": true, + "items": { + "$ref": "lib://package.json#/definitions/path" + } + } + ] + }, + "bugs": { + "oneOf": [ + { + "$ref": "lib://package.json#/definitions/uri-http" + }, + { + "type": "object", + "required": [ + "url" + ], + "properties": { + "url": { + "$ref": "lib://package.json#/definitions/uri-http" + }, + "email": { + "$ref": "lib://package.json#/definitions/email" + } + } + } + ] + }, + "license": { + "$ref": "lib://package.json#/definitions/licence" + }, + "licenses": { + "type": "array", + "uniqueItems": true, + "items": { + "$ref": "lib://package.json#/definitions/licence" + } + }, + "private": { + "type": "boolean" + }, + "preferGlobal": { + "type": "boolean" + }, + "engines": { + "$ref": "lib://package.json#/definitions/string-map" + }, + "engineStrict": { + "type": "boolean" + }, + "main": { + "$ref": "lib://package.json#/definitions/path" + }, + "bin": { + "oneOf": [ + { + "$ref": "lib://package.json#/definitions/path" + }, + { + + "$ref": "lib://package.json#/definitions/path-map" + } + ] + }, + "files": { + "type": "array", + "uniqueItems": true, + "items": { + "$ref": "lib://package.json#/definitions/path" + } + }, + "os": { + "type": "array", + "uniqueItems": true, + "items": { + "$ref": "lib://package.json#/definitions/identifier" + } + }, + "cpu": { + "type": "array", + "uniqueItems": true, + "items": { + "$ref": "lib://package.json#/definitions/identifier" + } + }, + "config": { + "type": "object" + }, + "publishConfig": { + "type": "object" + }, + "directories": { + "type": "object", + "properties": { + "lib": { + "$ref": "lib://package.json#/definitions/path" + }, + "bin": { + "$ref": "lib://package.json#/definitions/path" + }, + "man": { + "$ref": "lib://package.json#/definitions/path" + }, + "doc": { + "$ref": "lib://package.json#/definitions/path" + }, + "example": { + "$ref": "lib://package.json#/definitions/path" + } + } + }, + "scripts": { + "$ref": "lib://package.json#/definitions/string-map" + }, + "dependencies": { + "$ref": "lib://package.json#/definitions/dependency-map" + }, + "devDependencies": { + "$ref": "lib://package.json#/definitions/dependency-map" + }, + "bundledDependencies": { + "$ref": "lib://package.json#/definitions/dependency-map" + }, + "bundleDependencies": { + "$ref": "lib://package.json#/definitions/dependency-map" + }, + "optionalDependencies": { + "$ref": "lib://package.json#/definitions/dependency-map" + }, + "peerDependencies": { + "$ref": "lib://package.json#/definitions/dependency-map" + } + } + }, + "uri-http": { + "type": "string", + "pattern": "^https?:\/\/" + }, + "email": { + "type": "string", + "pattern": "^([0-9a-zA-Z]([-\\.\\w]*[0-9a-zA-Z])*@([0-9a-zA-Z][-\\w]*[0-9a-zA-Z]\\.)+[a-zA-Z]{2,9})$" + }, + "path": { + "type": "string", + "minLength": 1 + }, + "name": { + "type": "string", + "pattern": "^[A-Za-z](?:[_\\.-]?[A-Za-z0-9]+)*$" + }, + "identifier": { + "type": "string", + "pattern": "^[A-Za-z](?:[_-]?[A-Za-z0-9]+)*$" + }, + "semver": { + "type": "string", + "pattern": "^\\d+\\.\\d+\\.\\d+(?:-[a-z]+(?:[_\\.-]*[a-z0-9]+)*)*$" + }, + "type-url": { + "type": "object", + "additionalProperties": false, + "required": [ + "type", + "url" + ], + "properties": { + "type": { + "type": "string", + "pattern": "[a-zA-Z]" + }, + "url": { + "$ref": "lib://package.json#/definitions/uri-http" + } + } + + }, + "repository": { + "$ref": "lib://package.json#/definitions/type-url" + }, + "licence": { + "oneOf": [ + { + "type": "string", + "pattern": "[a-zA-Z]" + }, + { + "$ref": "lib://package.json#/definitions/licence-object" + } + ] + }, + "licence-object": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { + "type": "string", + "pattern": "[a-zA-Z]" + }, + "url": { + "$ref": "lib://package.json#/definitions/uri-http" + } + } + }, + "person": { + "oneOf": [ + { + "type": "string", + "pattern": "[a-zA-Z]" + }, + { + "$ref": "lib://package.json#/definitions/person-object" + } + ] + }, + "person-object": { + "type": "object", + "required": [ + "name" + ], + "properties": { + "name": { + "type": "string", + "pattern": "[a-zA-Z]" + }, + "email": { + "$ref": "lib://package.json#/definitions/email" + }, + "url": { + "$ref": "lib://package.json#/definitions/uri-http" + } + } + }, + "string-map": { + "type": "object", + "additionalProperties": false, + "patternProperties": { + ".+": { + "type": "string" + } + } + }, + "path-map": { + "type": "object", + "additionalProperties": false, + "patternProperties": { + ".+": { + "$ref": "lib://package.json#/definitions/path", + "pattern": "[a-zA-Z]" + } + } + }, + "dependency-map": { + "$ref": "lib://package.json#/definitions/string-map" + } + } +} diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py --- a/swh/indexer/metadata.py +++ b/swh/indexer/metadata.py @@ -284,7 +284,7 @@ assert len(head_revs) == len(head_rev_ids) results = [] - for (orig, rev) in zip(origins, head_revs): + for (origin, rev) in zip(origins, head_revs): if not rev: self.warning('Missing head revision %s of origin %r', (hashutil.hash_to_bytes(rev['id']), origin)) diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py --- a/swh/indexer/tests/test_origin_metadata.py +++ b/swh/indexer/tests/test_origin_metadata.py @@ -77,7 +77,7 @@ assert results == [origin_metadata] -def test_origin_metadata_indexer_duplicates( +def test_origin_metadata_indexer_duplicate_origin( idx_storage, storage, obj_storage, origin_metadata_indexer): indexer = OriginMetadataIndexer() indexer.storage = storage @@ -161,3 +161,28 @@ for result in results: del result['tool'] assert results == [origin_metadata] + + +def test_origin_metadata_indexer_duplicate_revision( + idx_storage, storage, obj_storage, origin_metadata_indexer): + indexer = OriginMetadataIndexer() + indexer.storage = storage + indexer.idx_storage = idx_storage + indexer.run(["git+https://github.com/librariesio/yarn-parser", + "git+https://github.com/librariesio/yarn-parser.git"]) + + origin1 = storage.origin_get({ + 'type': 'git', + 'url': 'https://github.com/librariesio/yarn-parser'}) + origin2 = storage.origin_get({ + 'type': 'git', + 'url': 'https://github.com/librariesio/yarn-parser.git'}) + assert origin1['id'] != origin2['id'] + rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f') + + results = list(indexer.idx_storage.revision_metadata_get([rev_id])) + assert len(results) == 1 + + results = list(indexer.idx_storage.origin_intrinsic_metadata_get([ + origin1['id'], origin2['id']])) + assert len(results) == 2 diff --git a/swh/indexer/tests/utils.py b/swh/indexer/tests/utils.py --- a/swh/indexer/tests/utils.py +++ b/swh/indexer/tests/utils.py @@ -70,6 +70,12 @@ 'project': None, 'type': 'git', 'url': 'https://github.com/librariesio/yarn-parser'}, + { + 'id': 54974446, + 'lister': None, + 'project': None, + 'type': 'git', + 'url': 'https://github.com/librariesio/yarn-parser.git'}, ] SNAPSHOTS = { @@ -148,7 +154,13 @@ b'HEAD': { 'target': hash_to_bytes( '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), - 'target_type': 'revision'}}} + 'target_type': 'revision'}}}, + 54974446: { + 'branches': { + b'HEAD': { + 'target': hash_to_bytes( + '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), + 'target_type': 'revision'}}}, }