Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9345972
D669.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
8 KB
Subscribers
None
D669.diff
View Options
diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
--- a/swh/indexer/storage/__init__.py
+++ b/swh/indexer/storage/__init__.py
@@ -456,6 +456,35 @@
cur=cur)
db.content_fossology_license_add_from_temp(conflict_update, cur)
+ @remote_api_endpoint('content/fossology_license/range')
+ @db_transaction()
+ def content_fossology_license_get_range(
+ self, start, end, indexer_configuration_id,
+ limit=1000, db=None, cur=None):
+ """Retrieve licenses within range [start, end] bound by limit.
+
+ Args:
+ **start** (bytes): Starting identifier range (expected smaller
+ than end)
+ **end** (bytes): Ending identifier range (expected larger
+ than start)
+ **indexer_configuration_id** (int): The tool used to index data
+ **limit** (int): Limit result (default to 1000)
+
+ Raises:
+ ValueError for limit to None
+
+ Returns:
+ a dict with keys:
+ - **ids** [bytes]: iterable of content ids within the range.
+ - **next** (Optional[bytes]): The next range of sha1 starts at
+ this sha1 if any
+
+ """
+ return self._content_get_range('fossology_license', start, end,
+ indexer_configuration_id, limit=limit,
+ db=db, cur=cur)
+
@remote_api_endpoint('content_metadata/missing')
@db_transaction_generator()
def content_metadata_missing(self, metadata, db=None, cur=None):
diff --git a/swh/indexer/storage/db.py b/swh/indexer/storage/db.py
--- a/swh/indexer/storage/db.py
+++ b/swh/indexer/storage/db.py
@@ -115,7 +115,7 @@
content_indexer_names = {
'mimetype': 'content_mimetype',
- 'language': 'content_language',
+ 'fossology_license': 'content_fossology_license',
}
def content_get_range(self, content_type, start, end,
diff --git a/swh/indexer/tests/storage/__init__.py b/swh/indexer/tests/storage/__init__.py
--- a/swh/indexer/tests/storage/__init__.py
+++ b/swh/indexer/tests/storage/__init__.py
@@ -42,6 +42,16 @@
return one_of(sampled_from(ENCODINGS))
+def _init_content(uuid):
+ """Given a uuid, initialize a content
+
+ """
+ return {
+ 'id': MultiHash.from_data(uuid.bytes, {'sha1'}).digest()['sha1'],
+ 'indexer_configuration_id': 1,
+ }
+
+
@composite
def gen_content_mimetypes(draw, *, min_size=0, max_size=100):
"""Generate valid and consistent content_mimetypes.
@@ -73,11 +83,60 @@
content_mimetypes = []
for uuid, mimetype, encoding in _ids:
- content_id = MultiHash.from_data(uuid.bytes, {'sha1'}).digest()['sha1']
content_mimetypes.append({
- 'id': content_id,
+ **_init_content(uuid),
'mimetype': mimetype,
'encoding': encoding,
- 'indexer_configuration_id': 1,
})
return content_mimetypes
+
+
+FOSSOLOGY_LICENSES = [
+ b'3DFX',
+ b'BSD',
+ b'GPL',
+ b'Apache2',
+ b'MIT',
+]
+
+
+def gen_license():
+ return one_of(sampled_from(FOSSOLOGY_LICENSES))
+
+
+@composite
+def gen_content_fossology_licenses(draw, *, min_size=0, max_size=100):
+ """Generate valid and consistent content_fossology_licenses.
+
+ Context: Test purposes
+
+ Args:
+ **draw** (callable): Used by hypothesis to generate data
+ **min_size** (int): Minimal number of elements to generate
+ (default: 0)
+ **max_size** (int): Maximal number of elements to generate
+ (default: 100)
+
+ Returns:
+ List of content_fossology_licenses as expected by the
+ content_fossology_license_add api endpoint.
+
+ """
+ _ids = draw(
+ sets(
+ tuples(
+ uuids(),
+ gen_license(),
+ ),
+ min_size=min_size, max_size=max_size
+ )
+ )
+
+ content_licenses = []
+ for uuid, license in _ids:
+ content_licenses.append({
+ **_init_content(uuid),
+ 'licenses': [license],
+ 'indexer_configuration_id': 1,
+ })
+ return content_licenses
diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py
--- a/swh/indexer/tests/storage/test_storage.py
+++ b/swh/indexer/tests/storage/test_storage.py
@@ -13,7 +13,9 @@
from swh.indexer.storage import get_indexer_storage
from swh.core.tests.db_testing import SingleDbTestFixture
-from swh.indexer.tests.storage import SQL_DIR, gen_content_mimetypes
+from swh.indexer.tests.storage import (
+ SQL_DIR, gen_content_mimetypes, gen_content_fossology_licenses
+)
@pytest.mark.db
@@ -1694,6 +1696,83 @@
expected_mimetypes2 = [content_ids[-1]]
self.assertEqual(expected_mimetypes2, actual_ids2)
+ def test_generate_content_fossology_license_get_range_limit_none(self):
+ """license_get_range call with wrong limit input should fail"""
+ with self.assertRaises(ValueError) as e:
+ self.storage.content_fossology_license_get_range(
+ start=None, end=None, indexer_configuration_id=None,
+ limit=None)
+
+ self.assertEqual(e.exception.args, (
+ 'Development error: limit should not be None',))
+
+ @given(gen_content_fossology_licenses(min_size=1, max_size=4))
+ def test_generate_content_fossology_license_get_range_no_limit(
+ self, fossology_licenses):
+ """license_get_range returns licenses within range provided"""
+ self.reset_storage_tables()
+ # add fossology_licenses to storage
+ self.storage.content_fossology_license_add(fossology_licenses)
+
+ # All ids from the db
+ content_ids = sorted([c['id'] for c in fossology_licenses])
+
+ start = content_ids[0]
+ end = content_ids[-1]
+
+ # retrieve fossology_licenses
+ tool_id = fossology_licenses[0]['indexer_configuration_id']
+ actual_result = self.storage.content_fossology_license_get_range(
+ start, end, indexer_configuration_id=tool_id)
+
+ actual_ids = actual_result['ids']
+ actual_next = actual_result['next']
+
+ self.assertEqual(len(fossology_licenses), len(actual_ids))
+ self.assertIsNone(actual_next)
+ self.assertEqual(content_ids, actual_ids)
+
+ @given(gen_content_fossology_licenses(min_size=4, max_size=4))
+ def test_generate_fossology_license_get_range_limit(
+ self, fossology_licenses):
+ """fossology_license_get_range paginates results if limit exceeded"""
+ self.reset_storage_tables()
+
+ # add fossology_licenses to storage
+ self.storage.content_fossology_license_add(fossology_licenses)
+
+ # input the list of sha1s we want from storage
+ content_ids = sorted([c['id'] for c in fossology_licenses])
+ start = content_ids[0]
+ end = content_ids[-1]
+
+ # retrieve fossology_licenses limited to 3 results
+ limited_results = len(fossology_licenses) - 1
+ tool_id = fossology_licenses[0]['indexer_configuration_id']
+ actual_result = self.storage.content_fossology_license_get_range(
+ start, end,
+ indexer_configuration_id=tool_id, limit=limited_results)
+
+ actual_ids = actual_result['ids']
+ actual_next = actual_result['next']
+
+ self.assertEqual(limited_results, len(actual_ids))
+ self.assertIsNotNone(actual_next)
+ self.assertEqual(actual_next, content_ids[-1])
+
+ expected_fossology_licenses = content_ids[:-1]
+ self.assertEqual(expected_fossology_licenses, actual_ids)
+
+ # retrieve next part
+ actual_results2 = self.storage.content_fossology_license_get_range(
+ start=end, end=end, indexer_configuration_id=tool_id)
+ actual_ids2 = actual_results2['ids']
+ actual_next2 = actual_results2['next']
+
+ self.assertIsNone(actual_next2)
+ expected_fossology_licenses2 = [content_ids[-1]]
+ self.assertEqual(expected_fossology_licenses2, actual_ids2)
+
class IndexerTestStorage(CommonTestStorage, unittest.TestCase):
"""Running the tests locally.
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 3:38 PM (1 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3215937
Attached To
D669: storage: Open content_fossology_license_get_range endpoint
Event Timeline
Log In to Comment