Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/test_utils.py
# Copyright (C) 2017-2018 The Software Heritage developers | # Copyright (C) 2017-2018 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | |||||
from swh.objstorage.exc import ObjNotFoundError | from swh.objstorage.exc import ObjNotFoundError | ||||
from swh.model import hashutil | from swh.model import hashutil | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.indexer.storage import INDEXER_CFG_KEY | from swh.indexer.storage import INDEXER_CFG_KEY | ||||
BASE_TEST_CONFIG = { | BASE_TEST_CONFIG = { | ||||
'storage': { | 'storage': { | ||||
'cls': 'remote', | 'cls': 'memory', | ||||
'args': { | 'args': { | ||||
'url': 'http://nowhere/', | |||||
}, | }, | ||||
}, | }, | ||||
'objstorage': { | 'objstorage': { | ||||
'cls': 'remote', | 'cls': 'memory', | ||||
'args': { | 'args': { | ||||
'url': 'http://nowhere2/', | |||||
}, | }, | ||||
}, | }, | ||||
INDEXER_CFG_KEY: { | INDEXER_CFG_KEY: { | ||||
'cls': 'memory', | 'cls': 'memory', | ||||
'args': { | 'args': { | ||||
}, | }, | ||||
}, | }, | ||||
} | } | ||||
▲ Show 20 Lines • Show All 112 Lines • ▼ Show 20 Lines | SNAPSHOTS = { | ||||
'branches': { | 'branches': { | ||||
b'HEAD': { | b'HEAD': { | ||||
'target': hash_to_bytes( | 'target': hash_to_bytes( | ||||
'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), | '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), | ||||
'target_type': 'revision'}}} | 'target_type': 'revision'}}} | ||||
} | } | ||||
REVISIONS = [{ | |||||
'id': hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), | |||||
'committer': { | |||||
'id': 26, | |||||
'name': b'Andrew Nesbitt', | |||||
'fullname': b'Andrew Nesbitt <andrewnez@gmail.com>', | |||||
'email': b'andrewnez@gmail.com' | |||||
}, | |||||
'synthetic': False, | |||||
'date': { | |||||
'negative_utc': False, | |||||
'timestamp': { | |||||
'seconds': 1487596456, | |||||
'microseconds': 0 | |||||
}, | |||||
'offset': 0 | |||||
}, | |||||
'directory': b'10' | |||||
}] | |||||
DIRECTORY_ID = b'10' | |||||
DIRECTORY = [{ | |||||
'sha1_git': b'abc', | |||||
'name': b'index.js', | |||||
'target': b'abc', | |||||
'length': 897, | |||||
'status': 'visible', | |||||
'type': 'file', | |||||
'perms': 33188, | |||||
'sha1': b'bcd' | |||||
}, | |||||
{ | |||||
'sha1_git': b'aab', | |||||
'name': b'package.json', | |||||
'target': b'aab', | |||||
'length': 712, | |||||
'status': 'visible', | |||||
'type': 'file', | |||||
'perms': 33188, | |||||
'sha1': b'cde' | |||||
}, | |||||
{ | |||||
'target': b'11', | |||||
'type': 'dir', | |||||
'length': None, | |||||
'name': b'.github', | |||||
'sha1': None, | |||||
'perms': 16384, | |||||
'sha1_git': None, | |||||
'status': None, | |||||
'sha256': None | |||||
} | |||||
] | |||||
SHA1_TO_LICENSES = { | SHA1_TO_LICENSES = { | ||||
'01c9379dfc33803963d07c1ccc748d3fe4c96bb5': ['GPL'], | '01c9379dfc33803963d07c1ccc748d3fe4c96bb5': ['GPL'], | ||||
'02fb2c89e14f7fab46701478c83779c7beb7b069': ['Apache2.0'], | '02fb2c89e14f7fab46701478c83779c7beb7b069': ['Apache2.0'], | ||||
'103bc087db1d26afc3a0283f38663d081e9b01e6': ['MIT'], | '103bc087db1d26afc3a0283f38663d081e9b01e6': ['MIT'], | ||||
'688a5ef812c53907562fe379d4b3851e69c7cb15': ['AGPL'], | '688a5ef812c53907562fe379d4b3851e69c7cb15': ['AGPL'], | ||||
'da39a3ee5e6b4b0d3255bfef95601890afd80709': [], | 'da39a3ee5e6b4b0d3255bfef95601890afd80709': [], | ||||
} | } | ||||
Show All 15 Lines | '688a5ef812c53907562fe379d4b3851e69c7cb15': [{ | ||||
'name': 'symbol', | 'name': 'symbol', | ||||
'kind': 'float', | 'kind': 'float', | ||||
'line': 99, | 'line': 99, | ||||
'lang': 'python', | 'lang': 'python', | ||||
}], | }], | ||||
} | } | ||||
class MockObjStorage: | OBJ_STORAGE_DATA = { | ||||
"""Mock an swh-objstorage objstorage with predefined contents. | |||||
""" | |||||
data = {} | |||||
def __init__(self): | |||||
self.data = { | |||||
'01c9379dfc33803963d07c1ccc748d3fe4c96bb5': b'this is some text', | '01c9379dfc33803963d07c1ccc748d3fe4c96bb5': b'this is some text', | ||||
'688a5ef812c53907562fe379d4b3851e69c7cb15': b'another text', | '688a5ef812c53907562fe379d4b3851e69c7cb15': b'another text', | ||||
'8986af901dd2043044ce8f0d8fc039153641cf17': b'yet another text', | '8986af901dd2043044ce8f0d8fc039153641cf17': b'yet another text', | ||||
'02fb2c89e14f7fab46701478c83779c7beb7b069': b""" | '02fb2c89e14f7fab46701478c83779c7beb7b069': b""" | ||||
import unittest | import unittest | ||||
import logging | import logging | ||||
from swh.indexer.mimetype import ContentMimetypeIndexer | from swh.indexer.mimetype import ContentMimetypeIndexer | ||||
from swh.indexer.tests.test_utils import MockObjStorage | from swh.indexer.tests.test_utils import MockObjStorage | ||||
class MockStorage(): | class MockStorage(): | ||||
def content_mimetype_add(self, mimetypes): | def content_mimetype_add(self, mimetypes): | ||||
self.state = mimetypes | self.state = mimetypes | ||||
self.conflict_update = conflict_update | self.conflict_update = conflict_update | ||||
def indexer_configuration_add(self, tools): | def indexer_configuration_add(self, tools): | ||||
return [{ | return [{ | ||||
'id': 10, | 'id': 10, | ||||
}] | }] | ||||
""", | """, | ||||
'103bc087db1d26afc3a0283f38663d081e9b01e6': b""" | '103bc087db1d26afc3a0283f38663d081e9b01e6': b""" | ||||
#ifndef __AVL__ | #ifndef __AVL__ | ||||
#define __AVL__ | #define __AVL__ | ||||
typedef struct _avl_tree avl_tree; | typedef struct _avl_tree avl_tree; | ||||
typedef struct _data_t { | typedef struct _data_t { | ||||
int content; | int content; | ||||
} data_t; | } data_t; | ||||
""", | """, | ||||
'93666f74f1cf635c8c8ac118879da6ec5623c410': b""" | '93666f74f1cf635c8c8ac118879da6ec5623c410': b""" | ||||
(should 'pygments (recognize 'lisp 'easily)) | (should 'pygments (recognize 'lisp 'easily)) | ||||
""", | """, | ||||
'26a9f72a7c87cc9205725cfd879f514ff4f3d8d5': b""" | '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5': b""" | ||||
{ | { | ||||
"name": "test_metadata", | "name": "test_metadata", | ||||
"version": "0.0.1", | "version": "0.0.1", | ||||
"description": "Simple package.json test for indexer", | "description": "Simple package.json test for indexer", | ||||
"repository": { | "repository": { | ||||
"type": "git", | "type": "git", | ||||
"url": "https://github.com/moranegg/metadata_test" | "url": "https://github.com/moranegg/metadata_test" | ||||
} | } | ||||
} | } | ||||
""", | """, | ||||
'd4c647f0fc257591cc9ba1722484229780d1c607': b""" | 'd4c647f0fc257591cc9ba1722484229780d1c607': b""" | ||||
{ | { | ||||
"version": "5.0.3", | "version": "5.0.3", | ||||
"name": "npm", | "name": "npm", | ||||
"description": "a package manager for JavaScript", | "description": "a package manager for JavaScript", | ||||
"keywords": [ | "keywords": [ | ||||
"install", | "install", | ||||
"modules", | "modules", | ||||
"package manager", | "package manager", | ||||
"package.json" | "package.json" | ||||
], | ], | ||||
"preferGlobal": true, | "preferGlobal": true, | ||||
"config": { | "config": { | ||||
"publishtest": false | "publishtest": false | ||||
}, | }, | ||||
"homepage": "https://docs.npmjs.com/", | "homepage": "https://docs.npmjs.com/", | ||||
"author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)", | "author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)", | ||||
"repository": { | "repository": { | ||||
"type": "git", | "type": "git", | ||||
"url": "https://github.com/npm/npm" | "url": "https://github.com/npm/npm" | ||||
}, | }, | ||||
"bugs": { | "bugs": { | ||||
"url": "https://github.com/npm/npm/issues" | "url": "https://github.com/npm/npm/issues" | ||||
}, | }, | ||||
"dependencies": { | "dependencies": { | ||||
"JSONStream": "~1.3.1", | "JSONStream": "~1.3.1", | ||||
"abbrev": "~1.1.0", | "abbrev": "~1.1.0", | ||||
"ansi-regex": "~2.1.1", | "ansi-regex": "~2.1.1", | ||||
"ansicolors": "~0.3.2", | "ansicolors": "~0.3.2", | ||||
"ansistyles": "~0.1.3" | "ansistyles": "~0.1.3" | ||||
}, | }, | ||||
"devDependencies": { | "devDependencies": { | ||||
"tacks": "~1.2.6", | "tacks": "~1.2.6", | ||||
"tap": "~10.3.2" | "tap": "~10.3.2" | ||||
}, | }, | ||||
"license": "Artistic-2.0" | "license": "Artistic-2.0" | ||||
} | } | ||||
""", | """, | ||||
'a7ab314d8a11d2c93e3dcf528ca294e7b431c449': b""" | 'a7ab314d8a11d2c93e3dcf528ca294e7b431c449': b""" | ||||
""", | """, | ||||
'da39a3ee5e6b4b0d3255bfef95601890afd80709': b'', | 'da39a3ee5e6b4b0d3255bfef95601890afd80709': b'', | ||||
} | } | ||||
CONTENT_METADATA = [{ | |||||
'tool': { | |||||
'configuration': { | |||||
'type': 'local', | |||||
'context': 'NpmMapping' | |||||
}, | |||||
'version': '0.0.1', | |||||
'id': 6, | |||||
'name': 'swh-metadata-translator' | |||||
}, | |||||
'id': b'cde', | |||||
'translated_metadata': { | |||||
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | |||||
'type': 'SoftwareSourceCode', | |||||
'codemeta:issueTracker': | |||||
'https://github.com/librariesio/yarn-parser/issues', | |||||
'version': '1.0.0', | |||||
'name': 'yarn-parser', | |||||
'schema:author': 'Andrew Nesbitt', | |||||
'url': | |||||
'https://github.com/librariesio/yarn-parser#readme', | |||||
'processorRequirements': {'node': '7.5'}, | |||||
'license': 'AGPL-3.0', | |||||
'keywords': ['yarn', 'parse', 'lock', 'dependencies'], | |||||
'schema:codeRepository': | |||||
'git+https://github.com/librariesio/yarn-parser.git', | |||||
'description': | |||||
'Tiny web service for parsing yarn.lock files', | |||||
} | |||||
}] | |||||
def fill_obj_storage(obj_storage): | |||||
"""Add some content in an object storage.""" | |||||
for (obj_id, content) in OBJ_STORAGE_DATA.items(): | |||||
obj_storage.add(content, obj_id=hash_to_bytes(obj_id)) | |||||
class MockObjStorage: | |||||
"""Mock an swh-objstorage objstorage with predefined contents. | |||||
""" | |||||
data = {} | |||||
def __init__(self): | |||||
self.data = OBJ_STORAGE_DATA.copy() | |||||
def __iter__(self): | def __iter__(self): | ||||
yield from self.data.keys() | yield from self.data.keys() | ||||
def __contains__(self, sha1): | def __contains__(self, sha1): | ||||
return self.data.get(sha1) is not None | return self.data.get(sha1) is not None | ||||
def get(self, sha1): | def get(self, sha1): | ||||
raw_content = self.data.get(sha1) | raw_content = self.data.get(sha1) | ||||
▲ Show 20 Lines • Show All 74 Lines • ▼ Show 20 Lines | def revision_metadata_get(self, ids): | ||||
item['tool'] = self.tools[tool_id].copy() | item['tool'] = self.tools[tool_id].copy() | ||||
yield item | yield item | ||||
def origin_intrinsic_metadata_add(self, metadata, conflict_update=None): | def origin_intrinsic_metadata_add(self, metadata, conflict_update=None): | ||||
self.added_data.append( | self.added_data.append( | ||||
('origin_intrinsic_metadata', conflict_update, metadata)) | ('origin_intrinsic_metadata', conflict_update, metadata)) | ||||
def content_metadata_get(self, sha1s): | def content_metadata_get(self, sha1s): | ||||
return [{ | assert sha1s == [b'cde'] | ||||
'tool': { | return CONTENT_METADATA | ||||
'configuration': { | |||||
'type': 'local', | |||||
'context': 'NpmMapping' | def fill_storage(storage): | ||||
}, | for origin in ORIGINS: | ||||
'version': '0.0.1', | origin = origin.copy() | ||||
'id': 6, | del origin['id'] | ||||
'name': 'swh-metadata-translator' | last_origin_id = storage.origin_add_one(origin) | ||||
}, | visit = storage.origin_visit_add(last_origin_id, datetime.datetime.now()) | ||||
'id': b'cde', | for (snap_id, snap_branches) in SNAPSHOTS.items(): | ||||
'translated_metadata': { | storage.snapshot_add(last_origin_id, visit['visit'], { | ||||
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | 'id': snap_id, | ||||
'type': 'SoftwareSourceCode', | 'branches': snap_branches | ||||
'codemeta:issueTracker': | }) | ||||
'https://github.com/librariesio/yarn-parser/issues', | storage.revision_add(REVISIONS) | ||||
'version': '1.0.0', | storage.directory_add([{ | ||||
'name': 'yarn-parser', | 'id': DIRECTORY_ID, | ||||
'schema:author': 'Andrew Nesbitt', | 'entries': DIRECTORY, | ||||
'url': | }]) | ||||
'https://github.com/librariesio/yarn-parser#readme', | |||||
'processorRequirements': {'node': '7.5'}, | |||||
'license': 'AGPL-3.0', | |||||
'keywords': ['yarn', 'parse', 'lock', 'dependencies'], | |||||
'schema:codeRepository': | |||||
'git+https://github.com/librariesio/yarn-parser.git', | |||||
'description': | |||||
'Tiny web service for parsing yarn.lock files', | |||||
} | |||||
}] | |||||
class MockStorage(): | class MockStorage(): | ||||
"""Mock a real swh-storage storage to simplify reading indexers' | """Mock a real swh-storage storage to simplify reading indexers' | ||||
outputs. | outputs. | ||||
""" | """ | ||||
def origin_get(self, id_): | def origin_get(self, id_): | ||||
Show All 9 Lines | class MockStorage(): | ||||
def snapshot_get_latest(self, origin_id): | def snapshot_get_latest(self, origin_id): | ||||
if origin_id in SNAPSHOTS: | if origin_id in SNAPSHOTS: | ||||
return SNAPSHOTS[origin_id] | return SNAPSHOTS[origin_id] | ||||
else: | else: | ||||
assert False, origin_id | assert False, origin_id | ||||
def revision_get(self, revisions): | def revision_get(self, revisions): | ||||
return [{ | return REVISIONS.copy() | ||||
'id': hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), | |||||
'committer': { | |||||
'id': 26, | |||||
'name': b'Andrew Nesbitt', | |||||
'fullname': b'Andrew Nesbitt <andrewnez@gmail.com>', | |||||
'email': b'andrewnez@gmail.com' | |||||
}, | |||||
'synthetic': False, | |||||
'date': { | |||||
'negative_utc': False, | |||||
'timestamp': { | |||||
'seconds': 1487596456, | |||||
'microseconds': 0 | |||||
}, | |||||
'offset': 0 | |||||
}, | |||||
'directory': b'10' | |||||
}] | |||||
def directory_ls(self, directory, recursive=False, cur=None): | def directory_ls(self, directory, recursive=False, cur=None): | ||||
# with directory: b'\x9d', | assert directory == DIRECTORY_ID | ||||
return [{ | return DIRECTORY | ||||
'sha1_git': b'abc', | |||||
'name': b'index.js', | |||||
'target': b'abc', | |||||
'length': 897, | |||||
'status': 'visible', | |||||
'type': 'file', | |||||
'perms': 33188, | |||||
'dir_id': b'10', | |||||
'sha1': b'bcd' | |||||
}, | |||||
{ | |||||
'sha1_git': b'aab', | |||||
'name': b'package.json', | |||||
'target': b'aab', | |||||
'length': 712, | |||||
'status': 'visible', | |||||
'type': 'file', | |||||
'perms': 33188, | |||||
'dir_id': b'10', | |||||
'sha1': b'cde' | |||||
}, | |||||
{ | |||||
'dir_id': b'10', | |||||
'target': b'11', | |||||
'type': 'dir', | |||||
'length': None, | |||||
'name': b'.github', | |||||
'sha1': None, | |||||
'perms': 16384, | |||||
'sha1_git': None, | |||||
'status': None, | |||||
'sha256': None | |||||
}] | |||||
class BasicMockStorage(): | class BasicMockStorage(): | ||||
"""In memory implementation to fake the content_get_range api. | """In memory implementation to fake the content_get_range api. | ||||
FIXME: To remove when the actual in-memory lands. | FIXME: To remove when the actual in-memory lands. | ||||
""" | """ | ||||
▲ Show 20 Lines • Show All 278 Lines • Show Last 20 Lines |