Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9339674
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
40 KB
Subscribers
None
View Options
diff --git a/swh/indexer/tests/test_fossology_license.py b/swh/indexer/tests/test_fossology_license.py
index 0fed6fa..2e0ec04 100644
--- a/swh/indexer/tests/test_fossology_license.py
+++ b/swh/indexer/tests/test_fossology_license.py
@@ -1,227 +1,172 @@
# Copyright (C) 2017-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import unittest
import logging
from swh.indexer.fossology_license import (
ContentFossologyLicenseIndexer, FossologyLicenseRangeIndexer
)
from swh.indexer.tests.test_utils import (
MockObjStorage, BasicMockStorage, BasicMockIndexerStorage,
- SHA1_TO_LICENSES, IndexerRangeTest
+ SHA1_TO_LICENSES, CommonContentIndexerTest, CommonContentIndexerRangeTest
)
class NoDiskIndexer:
"""Mixin to override the DiskIndexer behavior avoiding side-effects in
tests.
"""
def write_to_temp(self, filename, data): # noop
return filename
def cleanup(self, content_path): # noop
return None
class InjectLicenseIndexer:
"""Override license computations.
"""
def compute_license(self, path, log=None):
"""path is the content identifier
"""
return {
'licenses': SHA1_TO_LICENSES.get(path)
}
class FossologyLicenseTestIndexer(
NoDiskIndexer, InjectLicenseIndexer, ContentFossologyLicenseIndexer):
- """Specific mimetype whose configuration is enough to satisfy the
- indexing tests.
+ """Specific fossology license whose configuration is enough to satisfy
+ the indexing checks.
"""
def prepare(self):
self.config = {
'tools': {
'name': 'nomos',
'version': '3.1.0rc2-31-ga2cbb8c',
'configuration': {
'command_line': 'nomossa <filepath>',
},
},
}
self.idx_storage = BasicMockIndexerStorage()
self.log = logging.getLogger('swh.indexer')
self.objstorage = MockObjStorage()
self.tools = self.register_tools(self.config['tools'])
self.tool = self.tools[0]
class FossologyLicenseIndexerUnknownToolTestStorage(
FossologyLicenseTestIndexer):
"""Specific fossology license indexer whose configuration is not
enough to satisfy the indexing checks
"""
def prepare(self):
super().prepare()
self.tools = None
class TestFossologyLicenseIndexerWithErrors(unittest.TestCase):
def test_wrong_unknown_configuration_tool(self):
"""Indexer with unknown configuration tool should fail the check"""
with self.assertRaisesRegex(ValueError, 'Tools None is unknown'):
FossologyLicenseIndexerUnknownToolTestStorage()
-class TestFossologyLicenseIndexer(unittest.TestCase):
+class TestFossologyLicenseIndexer(CommonContentIndexerTest, unittest.TestCase):
"""Fossology license tests.
"""
def setUp(self):
self.indexer = FossologyLicenseTestIndexer()
- def test_index_no_update(self):
- """Index sha1s results in new computed licenses
-
- """
- id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5'
- id1 = '688a5ef812c53907562fe379d4b3851e69c7cb15'
- sha1s = [id0, id1]
-
- # when
- self.indexer.run(sha1s, policy_update='ignore-dups')
-
- # then
- expected_results = [{
- 'id': id0,
- 'indexer_configuration_id': 10,
- 'licenses': SHA1_TO_LICENSES[id0],
- }, {
- 'id': id1,
- 'indexer_configuration_id': 10,
- 'licenses': SHA1_TO_LICENSES[id1],
- }]
-
- self.assertFalse(self.indexer.idx_storage.conflict_update)
- self.assertEqual(expected_results, self.indexer.idx_storage.state)
-
- def test_index_update(self):
- """Index sha1s results in new computed licenses
-
- """
- id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5'
- id1 = '688a5ef812c53907562fe379d4b3851e69c7cb15'
- id2 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709' # empty content
- sha1s = [id0, id1, id2]
-
- # when
- self.indexer.run(sha1s, policy_update='update-dups')
-
- # then
- expected_results = [{
- 'id': id0,
- 'indexer_configuration_id': 10,
- 'licenses': SHA1_TO_LICENSES[id0],
- }, {
- 'id': id1,
- 'indexer_configuration_id': 10,
- 'licenses': SHA1_TO_LICENSES[id1],
- }, {
- 'id': id2,
- 'indexer_configuration_id': 10,
- 'licenses': SHA1_TO_LICENSES[id2],
- }]
-
- self.assertTrue(self.indexer.idx_storage.conflict_update)
- self.assertEqual(expected_results, self.indexer.idx_storage.state)
-
- def test_index_one_unknown_sha1(self):
- """Only existing contents are indexed
-
- """
- # given
- id0 = '688a5ef812c53907562fe379d4b3851e69c7cb15'
- sha1s = [id0,
- '799a5ef812c53907562fe379d4b3851e69c7cb15', # unknown
- '800a5ef812c53907562fe379d4b3851e69c7cb15'] # unknown
-
- # when
- self.indexer.run(sha1s, policy_update='update-dups')
-
+ self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5'
+ self.id1 = '688a5ef812c53907562fe379d4b3851e69c7cb15'
+ self.id2 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709' # empty content
# then
- expected_results = [{
- 'id': id0,
- 'indexer_configuration_id': 10,
- 'licenses': SHA1_TO_LICENSES[id0],
- }]
-
- self.assertTrue(self.indexer.idx_storage.conflict_update)
- self.assertEqual(expected_results, self.indexer.idx_storage.state)
+ self.expected_results = {
+ self.id0: {
+ 'id': self.id0,
+ 'indexer_configuration_id': 10,
+ 'licenses': SHA1_TO_LICENSES[self.id0],
+ },
+ self.id1: {
+ 'id': self.id1,
+ 'indexer_configuration_id': 10,
+ 'licenses': SHA1_TO_LICENSES[self.id1],
+ },
+ self.id2: {
+ 'id': self.id2,
+ 'indexer_configuration_id': 10,
+ 'licenses': SHA1_TO_LICENSES[self.id2],
+ }
+ }
class FossologyLicenseRangeIndexerTest(
NoDiskIndexer, InjectLicenseIndexer, FossologyLicenseRangeIndexer):
"""Testing the range indexer on fossology license.
"""
def prepare(self):
self.config = {
'tools': {
'name': 'nomos',
'version': '3.1.0rc2-31-ga2cbb8c',
'configuration': {
'command_line': 'nomossa <filepath>',
},
},
'write_batch_size': 100,
}
self.idx_storage = BasicMockIndexerStorage()
self.log = logging.getLogger('swh.indexer')
# this hardcodes some contents, will use this to setup the storage
self.objstorage = MockObjStorage()
# sync objstorage and storage
contents = [{'sha1': c_id} for c_id in self.objstorage]
self.storage = BasicMockStorage(contents)
self.tools = self.register_tools(self.config['tools'])
self.tool = self.tools[0]
-class TestFossologyLicenseRangeIndexer(IndexerRangeTest, unittest.TestCase):
+class TestFossologyLicenseRangeIndexer(
+ CommonContentIndexerRangeTest, unittest.TestCase):
def setUp(self):
self.indexer = FossologyLicenseRangeIndexerTest()
# will play along with the objstorage's mocked contents for now
self.contents = sorted(self.indexer.objstorage)
# FIXME: leverage swh.objstorage.in_memory_storage's
# InMemoryObjStorage, swh.storage.tests's gen_contents, and
# hypothesis to generate data to actually run indexer on those
self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5'
self.id1 = '02fb2c89e14f7fab46701478c83779c7beb7b069'
self.id2 = '103bc087db1d26afc3a0283f38663d081e9b01e6'
self.expected_results = {
self.id0: {
'id': self.id0,
'indexer_configuration_id': 10,
'licenses': SHA1_TO_LICENSES[self.id0]
},
self.id1: {
'id': self.id1,
'indexer_configuration_id': 10,
'licenses': SHA1_TO_LICENSES[self.id1]
},
self.id2: {
'id': self.id2,
'indexer_configuration_id': 10,
'licenses': SHA1_TO_LICENSES[self.id2]
}
}
diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py
index 6206e88..7fe178a 100644
--- a/swh/indexer/tests/test_mimetype.py
+++ b/swh/indexer/tests/test_mimetype.py
@@ -1,198 +1,156 @@
# Copyright (C) 2017-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import unittest
import logging
from swh.indexer.mimetype import (
ContentMimetypeIndexer, MimetypeRangeIndexer
)
from swh.indexer.tests.test_utils import (
- MockObjStorage, BasicMockStorage, BasicMockIndexerStorage, IndexerRangeTest
+ MockObjStorage, BasicMockStorage, BasicMockIndexerStorage,
+ CommonContentIndexerTest, CommonContentIndexerRangeTest
)
class MimetypeTestIndexer(ContentMimetypeIndexer):
- """Specific mimetype whose configuration is enough to satisfy the
- indexing tests.
+ """Specific mimetype indexer instance whose configuration is enough to
+ satisfy the indexing tests.
"""
def prepare(self):
self.config = {
'tools': {
'name': 'file',
'version': '1:5.30-1+deb9u1',
'configuration': {
"type": "library",
"debian-package": "python3-magic"
},
},
}
self.idx_storage = BasicMockIndexerStorage()
self.log = logging.getLogger('swh.indexer')
self.objstorage = MockObjStorage()
self.tools = self.register_tools(self.config['tools'])
self.tool = self.tools[0]
class MimetypeIndexerUnknownToolTestStorage(MimetypeTestIndexer):
"""Specific mimetype whose configuration is not enough to satisfy the
- indexing tests.
+ indexing checks.
"""
def prepare(self):
super().prepare()
self.tools = None
class TestMimetypeIndexerWithErrors(unittest.TestCase):
def test_wrong_unknown_configuration_tool(self):
"""Indexer with unknown configuration tool should fail the check"""
with self.assertRaisesRegex(ValueError, 'Tools None is unknown'):
MimetypeIndexerUnknownToolTestStorage()
-class TestMimetypeIndexer(unittest.TestCase):
+class TestMimetypeIndexer(CommonContentIndexerTest, unittest.TestCase):
+ """Mimetype indexer test scenarios:
+
+ - new data within range are indexed
+ - no data outside a range are indexed
+ - with filtering existing indexed data prior to compute new index
+ - without filtering existing indexed data prior to compute new index
+
+ """
def setUp(self):
self.indexer = MimetypeTestIndexer()
- def test_index_no_update(self):
- # given
- sha1s = [
- '01c9379dfc33803963d07c1ccc748d3fe4c96bb5',
- '688a5ef812c53907562fe379d4b3851e69c7cb15',
- ]
-
- # when
- self.indexer.run(sha1s, policy_update='ignore-dups')
-
- # then
- expected_results = [{
- 'id': '01c9379dfc33803963d07c1ccc748d3fe4c96bb5',
- 'indexer_configuration_id': 10,
- 'mimetype': b'text/plain',
- 'encoding': b'us-ascii',
- }, {
- 'id': '688a5ef812c53907562fe379d4b3851e69c7cb15',
- 'indexer_configuration_id': 10,
- 'mimetype': b'text/plain',
- 'encoding': b'us-ascii',
- }]
-
- self.assertFalse(self.indexer.idx_storage.conflict_update)
- self.assertEqual(expected_results, self.indexer.idx_storage.state)
-
- def test_index_update(self):
- # given
- sha1s = [
- '01c9379dfc33803963d07c1ccc748d3fe4c96bb5',
- '688a5ef812c53907562fe379d4b3851e69c7cb15',
- 'da39a3ee5e6b4b0d3255bfef95601890afd80709', # empty content
- ]
-
- # when
- self.indexer.run(sha1s, policy_update='update-dups')
-
- # then
- expected_results = [{
- 'id': '01c9379dfc33803963d07c1ccc748d3fe4c96bb5',
- 'indexer_configuration_id': 10,
- 'mimetype': b'text/plain',
- 'encoding': b'us-ascii',
- }, {
- 'id': '688a5ef812c53907562fe379d4b3851e69c7cb15',
- 'indexer_configuration_id': 10,
- 'mimetype': b'text/plain',
- 'encoding': b'us-ascii',
- }, {
- 'id': 'da39a3ee5e6b4b0d3255bfef95601890afd80709',
- 'indexer_configuration_id': 10,
- 'mimetype': b'application/x-empty',
- 'encoding': b'binary',
- }]
-
- self.assertTrue(self.indexer.idx_storage.conflict_update)
- self.assertEqual(expected_results, self.indexer.idx_storage.state)
-
- def test_index_one_unknown_sha1(self):
- # given
- sha1s = ['688a5ef812c53907562fe379d4b3851e69c7cb15',
- '799a5ef812c53907562fe379d4b3851e69c7cb15', # unknown
- '800a5ef812c53907562fe379d4b3851e69c7cb15'] # unknown
-
- # when
- self.indexer.run(sha1s, policy_update='update-dups')
-
- # then
- expected_results = [{
- 'id': '688a5ef812c53907562fe379d4b3851e69c7cb15',
- 'indexer_configuration_id': 10,
- 'mimetype': b'text/plain',
- 'encoding': b'us-ascii',
- }]
-
- self.assertTrue(self.indexer.idx_storage.conflict_update)
- self.assertEqual(expected_results, self.indexer.idx_storage.state)
+ self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5'
+ self.id1 = '688a5ef812c53907562fe379d4b3851e69c7cb15'
+ self.id2 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'
+ self.expected_results = {
+ self.id0: {
+ 'id': self.id0,
+ 'indexer_configuration_id': 10,
+ 'mimetype': b'text/plain',
+ 'encoding': b'us-ascii',
+ },
+ self.id1: {
+ 'id': self.id1,
+ 'indexer_configuration_id': 10,
+ 'mimetype': b'text/plain',
+ 'encoding': b'us-ascii',
+ },
+ self.id2: {
+ 'id': self.id2,
+ 'indexer_configuration_id': 10,
+ 'mimetype': b'application/x-empty',
+ 'encoding': b'binary',
+ }
+ }
class MimetypeRangeIndexerTest(MimetypeRangeIndexer):
"""Specific mimetype whose configuration is enough to satisfy the
indexing tests.
"""
def prepare(self):
self.config = {
'tools': {
'name': 'file',
'version': '1:5.30-1+deb9u1',
'configuration': {
"type": "library",
"debian-package": "python3-magic"
},
},
'write_batch_size': 100,
}
self.idx_storage = BasicMockIndexerStorage()
self.log = logging.getLogger('swh.indexer')
# this hardcodes some contents, will use this to setup the storage
self.objstorage = MockObjStorage()
# sync objstorage and storage
contents = [{'sha1': c_id} for c_id in self.objstorage]
self.storage = BasicMockStorage(contents)
self.tools = self.register_tools(self.config['tools'])
self.tool = self.tools[0]
-class TestMimetypeRangeIndexer(IndexerRangeTest, unittest.TestCase):
- """Range Mimetype Indexer tests on """
+class TestMimetypeRangeIndexer(
+ CommonContentIndexerRangeTest, unittest.TestCase):
+ """Range Mimetype Indexer tests.
+
+ """
def setUp(self):
self.indexer = MimetypeRangeIndexerTest()
# will play along with the objstorage's mocked contents for now
self.contents = sorted(self.indexer.objstorage)
# FIXME: leverage swh.objstorage.in_memory_storage's
# InMemoryObjStorage, swh.storage.tests's gen_contents, and
# hypothesis to generate data to actually run indexer on those
self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5'
self.id1 = '02fb2c89e14f7fab46701478c83779c7beb7b069'
self.id2 = '103bc087db1d26afc3a0283f38663d081e9b01e6'
self.expected_results = {
self.id0: {
'encoding': b'us-ascii',
'id': self.id0,
'indexer_configuration_id': 10,
'mimetype': b'text/plain'},
self.id1: {
'encoding': b'us-ascii',
'id': self.id1,
'indexer_configuration_id': 10,
'mimetype': b'text/x-python'},
self.id2: {
'encoding': b'us-ascii',
'id': self.id2,
'indexer_configuration_id': 10,
'mimetype': b'text/plain'}
}
diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py
index 6a4e705..89c7afb 100644
--- a/swh/indexer/tests/test_utils.py
+++ b/swh/indexer/tests/test_utils.py
@@ -1,600 +1,648 @@
# Copyright (C) 2017-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.objstorage.exc import ObjNotFoundError
from swh.model import hashutil
ORIGINS = [
{
'id': 52189575,
'lister': None,
'project': None,
'type': 'git',
'url': 'https://github.com/SoftwareHeritage/swh-storage'},
{
'id': 4423668,
'lister': None,
'project': None,
'type': 'ftp',
'url': 'rsync://ftp.gnu.org/gnu/3dldf'},
{
'id': 77775770,
'lister': None,
'project': None,
'type': 'deposit',
'url': 'https://forge.softwareheritage.org/source/jesuisgpl/'},
{
'id': 85072327,
'lister': None,
'project': None,
'type': 'pypi',
'url': 'https://pypi.org/project/limnoria/'},
{
'id': 49908349,
'lister': None,
'project': None,
'type': 'svn',
'url': 'http://0-512-md.googlecode.com/svn/'},
{
'id': 54974445,
'lister': None,
'project': None,
'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser'},
]
SNAPSHOTS = {
52189575: {
'branches': {
b'refs/heads/add-revision-origin-cache': {
'target': b'L[\xce\x1c\x88\x8eF\t\xf1"\x19\x1e\xfb\xc0'
b's\xe7/\xe9l\x1e',
'target_type': 'revision'},
b'HEAD': {
'target': b'8K\x12\x00d\x03\xcc\xe4]bS\xe3\x8f{\xd7}'
b'\xac\xefrm',
'target_type': 'revision'},
b'refs/tags/v0.0.103': {
'target': b'\xb6"Im{\xfdLb\xb0\x94N\xea\x96m\x13x\x88+'
b'\x0f\xdd',
'target_type': 'release'},
}},
4423668: {
'branches': {
b'3DLDF-1.1.4.tar.gz': {
'target': b'dJ\xfb\x1c\x91\xf4\x82B%]6\xa2\x90|\xd3\xfc'
b'"G\x99\x11',
'target_type': 'revision'},
b'3DLDF-2.0.2.tar.gz': {
'target': b'\xb6\x0e\xe7\x9e9\xac\xaa\x19\x9e='
b'\xd1\xc5\x00\\\xc6\xfc\xe0\xa6\xb4V',
'target_type': 'revision'},
b'3DLDF-2.0.3-examples.tar.gz': {
'target': b'!H\x19\xc0\xee\x82-\x12F1\xbd\x97'
b'\xfe\xadZ\x80\x80\xc1\x83\xff',
'target_type': 'revision'},
b'3DLDF-2.0.3.tar.gz': {
'target': b'\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee'
b'\xcc\x1a\xb4`\x8c\x8by',
'target_type': 'revision'},
b'3DLDF-2.0.tar.gz': {
'target': b'F6*\xff(?\x19a\xef\xb6\xc2\x1fv$S\xe3G'
b'\xd3\xd1m',
b'target_type': 'revision'}
}},
77775770: {
'branches': {
b'master': {
'target': b'\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{'
b'\xa6\xe9\x99\xb1\x9e]q\xeb',
'target_type': 'revision'}
},
'id': b"h\xc0\xd2a\x04\xd4~'\x8d\xd6\xbe\x07\xeda\xfa\xfbV"
b"\x1d\r "},
85072327: {
'branches': {
b'HEAD': {
'target': b'releases/2018.09.09',
'target_type': 'alias'},
b'releases/2018.09.01': {
'target': b'<\xee1(\xe8\x8d_\xc1\xc9\xa6rT\xf1\x1d'
b'\xbb\xdfF\xfdw\xcf',
'target_type': 'revision'},
b'releases/2018.09.09': {
'target': b'\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8k'
b'A\x10\x9d\xc5\xfa2\xf8t',
'target_type': 'revision'}},
'id': b'{\xda\x8e\x84\x7fX\xff\x92\x80^\x93V\x18\xa3\xfay'
b'\x12\x9e\xd6\xb3'},
49908349: {
'branches': {
b'master': {
'target': b'\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8'
b'\xc9\xad#.\x1bw=\x18',
'target_type': 'revision'}},
'id': b'\xa1\xa2\x8c\n\xb3\x87\xa8\xf9\xe0a\x8c\xb7'
b'\x05\xea\xb8\x1f\xc4H\xf4s'},
54974445: {
'branches': {
b'HEAD': {
'target': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
'target_type': 'revision'}}}
}
SHA1_TO_LICENSES = {
'01c9379dfc33803963d07c1ccc748d3fe4c96bb5': ['GPL'],
'02fb2c89e14f7fab46701478c83779c7beb7b069': ['Apache2.0'],
'103bc087db1d26afc3a0283f38663d081e9b01e6': ['MIT'],
'688a5ef812c53907562fe379d4b3851e69c7cb15': ['AGPL'],
'da39a3ee5e6b4b0d3255bfef95601890afd80709': [],
}
class MockObjStorage:
"""Mock an swh-objstorage objstorage with predefined contents.
"""
data = {}
def __init__(self):
self.data = {
'01c9379dfc33803963d07c1ccc748d3fe4c96bb5': b'this is some text',
'688a5ef812c53907562fe379d4b3851e69c7cb15': b'another text',
'8986af901dd2043044ce8f0d8fc039153641cf17': b'yet another text',
'02fb2c89e14f7fab46701478c83779c7beb7b069': b"""
import unittest
import logging
from swh.indexer.mimetype import ContentMimetypeIndexer
from swh.indexer.tests.test_utils import MockObjStorage
class MockStorage():
def content_mimetype_add(self, mimetypes):
self.state = mimetypes
self.conflict_update = conflict_update
def indexer_configuration_add(self, tools):
return [{
'id': 10,
}]
""",
'103bc087db1d26afc3a0283f38663d081e9b01e6': b"""
#ifndef __AVL__
#define __AVL__
typedef struct _avl_tree avl_tree;
typedef struct _data_t {
int content;
} data_t;
""",
'93666f74f1cf635c8c8ac118879da6ec5623c410': b"""
(should 'pygments (recognize 'lisp 'easily))
""",
'26a9f72a7c87cc9205725cfd879f514ff4f3d8d5': b"""
{
"name": "test_metadata",
"version": "0.0.1",
"description": "Simple package.json test for indexer",
"repository": {
"type": "git",
"url": "https://github.com/moranegg/metadata_test"
}
}
""",
'd4c647f0fc257591cc9ba1722484229780d1c607': b"""
{
"version": "5.0.3",
"name": "npm",
"description": "a package manager for JavaScript",
"keywords": [
"install",
"modules",
"package manager",
"package.json"
],
"preferGlobal": true,
"config": {
"publishtest": false
},
"homepage": "https://docs.npmjs.com/",
"author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
"repository": {
"type": "git",
"url": "https://github.com/npm/npm"
},
"bugs": {
"url": "https://github.com/npm/npm/issues"
},
"dependencies": {
"JSONStream": "~1.3.1",
"abbrev": "~1.1.0",
"ansi-regex": "~2.1.1",
"ansicolors": "~0.3.2",
"ansistyles": "~0.1.3"
},
"devDependencies": {
"tacks": "~1.2.6",
"tap": "~10.3.2"
},
"license": "Artistic-2.0"
}
""",
'a7ab314d8a11d2c93e3dcf528ca294e7b431c449': b"""
""",
'da39a3ee5e6b4b0d3255bfef95601890afd80709': b'',
}
def __iter__(self):
yield from self.data.keys()
def __contains__(self, sha1):
return self.data.get(sha1) is not None
def get(self, sha1):
raw_content = self.data.get(sha1)
if raw_content is None:
raise ObjNotFoundError(sha1)
return raw_content
class MockIndexerStorage():
"""Mock an swh-indexer storage.
"""
added_data = []
def indexer_configuration_add(self, tools):
tool = tools[0]
if tool['tool_name'] == 'swh-metadata-translator':
return [{
'id': 30,
'tool_name': 'swh-metadata-translator',
'tool_version': '0.0.1',
'tool_configuration': {
'type': 'local',
'context': 'NpmMapping'
},
}]
elif tool['tool_name'] == 'swh-metadata-detector':
return [{
'id': 7,
'tool_name': 'swh-metadata-detector',
'tool_version': '0.0.1',
'tool_configuration': {
'type': 'local',
'context': 'NpmMapping'
},
}]
elif tool['tool_name'] == 'origin-metadata':
return [{
'id': 8,
'tool_name': 'origin-metadata',
'tool_version': '0.0.1',
'tool_configuration': {},
}]
else:
assert False, 'Unknown tool {tool_name}'.format(**tool)
def content_metadata_missing(self, sha1s):
yield from []
def content_metadata_add(self, metadata, conflict_update=None):
self.added_data.append(
('content_metadata', conflict_update, metadata))
def revision_metadata_add(self, metadata, conflict_update=None):
self.added_data.append(
('revision_metadata', conflict_update, metadata))
def origin_intrinsic_metadata_add(self, metadata, conflict_update=None):
self.added_data.append(
('origin_intrinsic_metadata', conflict_update, metadata))
def content_metadata_get(self, sha1s):
return [{
'tool': {
'configuration': {
'type': 'local',
'context': 'NpmMapping'
},
'version': '0.0.1',
'id': 6,
'name': 'swh-metadata-translator'
},
'id': b'cde',
'translated_metadata': {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
'codemeta:issueTracker':
'https://github.com/librariesio/yarn-parser/issues',
'version': '1.0.0',
'name': 'yarn-parser',
'schema:author': 'Andrew Nesbitt',
'url':
'https://github.com/librariesio/yarn-parser#readme',
'processorRequirements': {'node': '7.5'},
'license': 'AGPL-3.0',
'keywords': ['yarn', 'parse', 'lock', 'dependencies'],
'schema:codeRepository':
'git+https://github.com/librariesio/yarn-parser.git',
'description':
'Tiny web service for parsing yarn.lock files',
}
}]
class MockStorage():
"""Mock a real swh-storage storage to simplify reading indexers'
outputs.
"""
def origin_get(self, id_):
for origin in ORIGINS:
for (k, v) in id_.items():
if origin[k] != v:
break
else:
# This block is run iff we didn't break, ie. if all supplied
# parts of the id are set to the expected value.
return origin
assert False, id_
def snapshot_get_latest(self, origin_id):
if origin_id in SNAPSHOTS:
return SNAPSHOTS[origin_id]
else:
assert False, origin_id
def revision_get(self, revisions):
return [{
'id': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
'committer': {
'id': 26,
'name': b'Andrew Nesbitt',
'fullname': b'Andrew Nesbitt <andrewnez@gmail.com>',
'email': b'andrewnez@gmail.com'
},
'synthetic': False,
'date': {
'negative_utc': False,
'timestamp': {
'seconds': 1487596456,
'microseconds': 0
},
'offset': 0
},
'directory': b'10'
}]
def directory_ls(self, directory, recursive=False, cur=None):
# with directory: b'\x9d',
return [{
'sha1_git': b'abc',
'name': b'index.js',
'target': b'abc',
'length': 897,
'status': 'visible',
'type': 'file',
'perms': 33188,
'dir_id': b'10',
'sha1': b'bcd'
},
{
'sha1_git': b'aab',
'name': b'package.json',
'target': b'aab',
'length': 712,
'status': 'visible',
'type': 'file',
'perms': 33188,
'dir_id': b'10',
'sha1': b'cde'
},
{
'dir_id': b'10',
'target': b'11',
'type': 'dir',
'length': None,
'name': b'.github',
'sha1': None,
'perms': 16384,
'sha1_git': None,
'status': None,
'sha256': None
}]
class BasicMockStorage():
"""In memory implementation to fake the content_get_range api.
FIXME: To remove when the actual in-memory lands.
"""
contents = []
def __init__(self, contents):
self.contents = contents
def content_get_range(self, start, end, limit=1000):
# to make input test data consilient with actual runtime the
# other way of doing properly things would be to rewrite all
# tests (that's another task entirely so not right now)
if isinstance(start, bytes):
start = hashutil.hash_to_hex(start)
if isinstance(end, bytes):
end = hashutil.hash_to_hex(end)
results = []
_next_id = None
counter = 0
for c in self.contents:
_id = c['sha1']
if start <= _id and _id <= end:
results.append(c)
if counter >= limit:
break
counter += 1
return {
'contents': results,
'next': _next_id
}
class BasicMockIndexerStorage():
"""Mock Indexer storage to simplify reading indexers' outputs.
"""
state = []
def _internal_add(self, data, conflict_update=None):
"""All content indexer have the same structure. So reuse `data` as the
same data. It's either mimetype, language,
fossology_license, etc...
"""
self.state = data
self.conflict_update = conflict_update
def content_mimetype_add(self, data, conflict_update=None):
self._internal_add(data, conflict_update=conflict_update)
def content_fossology_license_add(self, data, conflict_update=None):
self._internal_add(data, conflict_update=conflict_update)
def _internal_get_range(self, start, end,
indexer_configuration_id, limit=1000):
"""Same logic as _internal_add, we retrieve indexed data given an
identifier. So the code here does not change even though
the underlying data does.
"""
# to make input test data consilient with actual runtime the
# other way of doing properly things would be to rewrite all
# tests (that's another task entirely so not right now)
if isinstance(start, bytes):
start = hashutil.hash_to_hex(start)
if isinstance(end, bytes):
end = hashutil.hash_to_hex(end)
results = []
_next = None
counter = 0
for m in self.state:
_id = m['id']
_tool_id = m['indexer_configuration_id']
if (start <= _id and _id <= end and
_tool_id == indexer_configuration_id):
results.append(_id)
if counter >= limit:
break
counter += 1
return {
'ids': results,
'next': _next
}
def content_mimetype_get_range(
self, start, end, indexer_configuration_id, limit=1000):
return self._internal_get_range(
start, end, indexer_configuration_id, limit=limit)
def content_fossology_license_get_range(
self, start, end, indexer_configuration_id, limit=1000):
return self._internal_get_range(
start, end, indexer_configuration_id, limit=limit)
def indexer_configuration_add(self, tools):
return [{
'id': 10,
}]
-class IndexerRangeTest:
+class CommonContentIndexerTest:
+ def assert_results_ok(self, actual_results, expected_results=None):
+ if expected_results is None:
+ expected_results = self.expected_results
+
+ for indexed_data in actual_results:
+ _id = indexed_data['id']
+ self.assertEqual(indexed_data, expected_results[_id])
+ _tool_id = indexed_data['indexer_configuration_id']
+ self.assertEqual(_tool_id, self.indexer.tool['id'])
+
+ def test_index(self):
+ """Known sha1 have their data indexed
+
+ """
+ sha1s = [self.id0, self.id1, self.id2]
+
+ # when
+ self.indexer.run(sha1s, policy_update='update-dups')
+
+ actual_results = self.indexer.idx_storage.state
+ self.assertTrue(self.indexer.idx_storage.conflict_update)
+ self.assert_results_ok(actual_results)
+
+ # 2nd pass
+ self.indexer.run(sha1s, policy_update='ignore-dups')
+
+ self.assertFalse(self.indexer.idx_storage.conflict_update)
+ self.assert_results_ok(actual_results)
+
+ def test_index_one_unknown_sha1(self):
+ """Unknown sha1 are not indexed"""
+ sha1s = [self.id1,
+ '799a5ef812c53907562fe379d4b3851e69c7cb15', # unknown
+ '800a5ef812c53907562fe379d4b3851e69c7cb15'] # unknown
+
+ # when
+ self.indexer.run(sha1s, policy_update='update-dups')
+ actual_results = self.indexer.idx_storage.state
+
+ # then
+ expected_results = {
+ k: v for k, v in self.expected_results.items() if k in sha1s
+ }
+
+ self.assert_results_ok(actual_results, expected_results)
+
+
+class CommonContentIndexerRangeTest:
"""Allows to factorize tests on range indexer.
"""
def assert_results_ok(self, start, end, actual_results,
expected_results=None):
if expected_results is None:
expected_results = self.expected_results
for indexed_data in actual_results:
_id = indexed_data['id']
self.assertEqual(indexed_data, expected_results[_id])
self.assertTrue(start <= _id and _id <= end)
_tool_id = indexed_data['indexer_configuration_id']
self.assertEqual(_tool_id, self.indexer.tool['id'])
def test__index_contents(self):
"""Indexing contents without existing data results in indexed data
"""
start, end = [self.contents[0], self.contents[2]] # output hex ids
# given
actual_results = list(self.indexer._index_contents(
start, end, indexed={}))
self.assert_results_ok(start, end, actual_results)
def test__index_contents_with_indexed_data(self):
"""Indexing contents with existing data results in less indexed data
"""
start, end = [self.contents[0], self.contents[2]] # output hex ids
data_indexed = [self.id0, self.id2]
# given
actual_results = self.indexer._index_contents(
start, end, indexed=set(data_indexed))
# craft the expected results
expected_results = self.expected_results.copy()
for already_indexed_key in data_indexed:
expected_results.pop(already_indexed_key)
self.assert_results_ok(
start, end, actual_results, expected_results)
- def test_generate_content_mimetype_get(self):
+ def test_generate_content_get(self):
"""Optimal indexing should result in indexed data
"""
start, end = [self.contents[0], self.contents[2]] # output hex ids
# given
actual_results = self.indexer.run(start, end)
# then
self.assertTrue(actual_results)
- def test_generate_content_mimetype_get_input_as_bytes(self):
+ def test_generate_content_get_input_as_bytes(self):
"""Optimal indexing should result in indexed data
Input are in bytes here.
"""
_start, _end = [self.contents[0], self.contents[2]] # output hex ids
start, end = map(hashutil.hash_to_bytes, (_start, _end))
# given
actual_results = self.indexer.run( # checks the bytes input this time
start, end, skip_existing=False) # no data so same result
# then
self.assertTrue(actual_results)
- def test_generate_content_mimetype_get_no_result(self):
+ def test_generate_content_get_no_result(self):
"""No result indexed returns False"""
start, end = ['0000000000000000000000000000000000000000',
'0000000000000000000000000000000000000001']
# given
actual_results = self.indexer.run(
start, end, incremental=False)
# then
self.assertFalse(actual_results)
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Jul 4 2025, 9:50 AM (5 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3252009
Attached To
rDCIDX Metadata indexer
Event Timeline
Log In to Comment