Page MenuHomeSoftware Heritage

D669.diff
No OneTemporary

D669.diff

diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
--- a/swh/indexer/storage/__init__.py
+++ b/swh/indexer/storage/__init__.py
@@ -456,6 +456,35 @@
cur=cur)
db.content_fossology_license_add_from_temp(conflict_update, cur)
+ @remote_api_endpoint('content/fossology_license/range')
+ @db_transaction()
+ def content_fossology_license_get_range(
+ self, start, end, indexer_configuration_id,
+ limit=1000, db=None, cur=None):
+ """Retrieve licenses within range [start, end] bound by limit.
+
+ Args:
+ **start** (bytes): Starting identifier range (expected smaller
+ than end)
+ **end** (bytes): Ending identifier range (expected larger
+ than start)
+ **indexer_configuration_id** (int): The tool used to index data
+ **limit** (int): Limit result (default to 1000)
+
+ Raises:
+ ValueError for limit to None
+
+ Returns:
+ a dict with keys:
+ - **ids** [bytes]: iterable of content ids within the range.
+ - **next** (Optional[bytes]): The next range of sha1 starts at
+ this sha1 if any
+
+ """
+ return self._content_get_range('fossology_license', start, end,
+ indexer_configuration_id, limit=limit,
+ db=db, cur=cur)
+
@remote_api_endpoint('content_metadata/missing')
@db_transaction_generator()
def content_metadata_missing(self, metadata, db=None, cur=None):
diff --git a/swh/indexer/storage/db.py b/swh/indexer/storage/db.py
--- a/swh/indexer/storage/db.py
+++ b/swh/indexer/storage/db.py
@@ -115,7 +115,7 @@
content_indexer_names = {
'mimetype': 'content_mimetype',
- 'language': 'content_language',
+ 'fossology_license': 'content_fossology_license',
}
def content_get_range(self, content_type, start, end,
diff --git a/swh/indexer/tests/storage/__init__.py b/swh/indexer/tests/storage/__init__.py
--- a/swh/indexer/tests/storage/__init__.py
+++ b/swh/indexer/tests/storage/__init__.py
@@ -42,6 +42,16 @@
return one_of(sampled_from(ENCODINGS))
+def _init_content(uuid):
+ """Given a uuid, initialize a content
+
+ """
+ return {
+ 'id': MultiHash.from_data(uuid.bytes, {'sha1'}).digest()['sha1'],
+ 'indexer_configuration_id': 1,
+ }
+
+
@composite
def gen_content_mimetypes(draw, *, min_size=0, max_size=100):
"""Generate valid and consistent content_mimetypes.
@@ -73,11 +83,60 @@
content_mimetypes = []
for uuid, mimetype, encoding in _ids:
- content_id = MultiHash.from_data(uuid.bytes, {'sha1'}).digest()['sha1']
content_mimetypes.append({
- 'id': content_id,
+ **_init_content(uuid),
'mimetype': mimetype,
'encoding': encoding,
- 'indexer_configuration_id': 1,
})
return content_mimetypes
+
+
+FOSSOLOGY_LICENSES = [
+ b'3DFX',
+ b'BSD',
+ b'GPL',
+ b'Apache2',
+ b'MIT',
+]
+
+
+def gen_license():
+ return one_of(sampled_from(FOSSOLOGY_LICENSES))
+
+
+@composite
+def gen_content_fossology_licenses(draw, *, min_size=0, max_size=100):
+ """Generate valid and consistent content_fossology_licenses.
+
+ Context: Test purposes
+
+ Args:
+ **draw** (callable): Used by hypothesis to generate data
+ **min_size** (int): Minimal number of elements to generate
+ (default: 0)
+ **max_size** (int): Maximal number of elements to generate
+ (default: 100)
+
+ Returns:
+ List of content_fossology_licenses as expected by the
+ content_fossology_license_add api endpoint.
+
+ """
+ _ids = draw(
+ sets(
+ tuples(
+ uuids(),
+ gen_license(),
+ ),
+ min_size=min_size, max_size=max_size
+ )
+ )
+
+ content_licenses = []
+ for uuid, license in _ids:
+ content_licenses.append({
+ **_init_content(uuid),
+ 'licenses': [license],
+ 'indexer_configuration_id': 1,
+ })
+ return content_licenses
diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py
--- a/swh/indexer/tests/storage/test_storage.py
+++ b/swh/indexer/tests/storage/test_storage.py
@@ -13,7 +13,9 @@
from swh.indexer.storage import get_indexer_storage
from swh.core.tests.db_testing import SingleDbTestFixture
-from swh.indexer.tests.storage import SQL_DIR, gen_content_mimetypes
+from swh.indexer.tests.storage import (
+ SQL_DIR, gen_content_mimetypes, gen_content_fossology_licenses
+)
@pytest.mark.db
@@ -1694,6 +1696,83 @@
expected_mimetypes2 = [content_ids[-1]]
self.assertEqual(expected_mimetypes2, actual_ids2)
+ def test_generate_content_fossology_license_get_range_limit_none(self):
+ """license_get_range call with wrong limit input should fail"""
+ with self.assertRaises(ValueError) as e:
+ self.storage.content_fossology_license_get_range(
+ start=None, end=None, indexer_configuration_id=None,
+ limit=None)
+
+ self.assertEqual(e.exception.args, (
+ 'Development error: limit should not be None',))
+
+ @given(gen_content_fossology_licenses(min_size=1, max_size=4))
+ def test_generate_content_fossology_license_get_range_no_limit(
+ self, fossology_licenses):
+ """license_get_range returns licenses within range provided"""
+ self.reset_storage_tables()
+ # add fossology_licenses to storage
+ self.storage.content_fossology_license_add(fossology_licenses)
+
+ # All ids from the db
+ content_ids = sorted([c['id'] for c in fossology_licenses])
+
+ start = content_ids[0]
+ end = content_ids[-1]
+
+ # retrieve fossology_licenses
+ tool_id = fossology_licenses[0]['indexer_configuration_id']
+ actual_result = self.storage.content_fossology_license_get_range(
+ start, end, indexer_configuration_id=tool_id)
+
+ actual_ids = actual_result['ids']
+ actual_next = actual_result['next']
+
+ self.assertEqual(len(fossology_licenses), len(actual_ids))
+ self.assertIsNone(actual_next)
+ self.assertEqual(content_ids, actual_ids)
+
+ @given(gen_content_fossology_licenses(min_size=4, max_size=4))
+ def test_generate_fossology_license_get_range_limit(
+ self, fossology_licenses):
+ """fossology_license_get_range paginates results if limit exceeded"""
+ self.reset_storage_tables()
+
+ # add fossology_licenses to storage
+ self.storage.content_fossology_license_add(fossology_licenses)
+
+ # input the list of sha1s we want from storage
+ content_ids = sorted([c['id'] for c in fossology_licenses])
+ start = content_ids[0]
+ end = content_ids[-1]
+
+ # retrieve fossology_licenses limited to 3 results
+ limited_results = len(fossology_licenses) - 1
+ tool_id = fossology_licenses[0]['indexer_configuration_id']
+ actual_result = self.storage.content_fossology_license_get_range(
+ start, end,
+ indexer_configuration_id=tool_id, limit=limited_results)
+
+ actual_ids = actual_result['ids']
+ actual_next = actual_result['next']
+
+ self.assertEqual(limited_results, len(actual_ids))
+ self.assertIsNotNone(actual_next)
+ self.assertEqual(actual_next, content_ids[-1])
+
+ expected_fossology_licenses = content_ids[:-1]
+ self.assertEqual(expected_fossology_licenses, actual_ids)
+
+ # retrieve next part
+ actual_results2 = self.storage.content_fossology_license_get_range(
+ start=end, end=end, indexer_configuration_id=tool_id)
+ actual_ids2 = actual_results2['ids']
+ actual_next2 = actual_results2['next']
+
+ self.assertIsNone(actual_next2)
+ expected_fossology_licenses2 = [content_ids[-1]]
+ self.assertEqual(expected_fossology_licenses2, actual_ids2)
+
class IndexerTestStorage(CommonTestStorage, unittest.TestCase):
"""Running the tests locally.

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 3:38 PM (1 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3215937

Event Timeline