Page MenuHomeSoftware Heritage

D1564.id5248.diff
No OneTemporary

D1564.id5248.diff

diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -2,5 +2,5 @@
swh.model >= 0.0.15
swh.objstorage >= 0.0.28
swh.scheduler >= 0.0.47
-swh.storage >= 0.0.123
+swh.storage >= 0.0.141
swh.journal >= 0.0.6
diff --git a/sql/upgrades/125.sql b/sql/upgrades/125.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/125.sql
@@ -0,0 +1,11 @@
+-- SWH Indexer DB schema upgrade
+-- from_version: 124
+-- to_version: 125
+-- description: Add 'origin_url' column to origin_intrinsic_metadata.
+
+insert into dbversion(version, release, description)
+values(125, now(), 'Work In Progress');
+
+alter origin_intrinsic_metadata
+ add column origin_url type text;
+
diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py
--- a/swh/indexer/indexer.py
+++ b/swh/indexer/indexer.py
@@ -4,7 +4,6 @@
# See top-level LICENSE file for more information
import abc
-import ast
import os
import logging
import shutil
@@ -526,37 +525,6 @@
return with_indexed_data
-def origin_get_params(id_):
- """From any of the two types of origin identifiers (int or
- type+url), returns a dict that can be passed to Storage.origin_get.
- Also accepts JSON-encoded forms of these (used via the task scheduler).
-
- >>> from pprint import pprint
- >>> origin_get_params(123)
- {'id': 123}
- >>> pprint(origin_get_params(['git', 'https://example.com/foo.git']))
- {'type': 'git', 'url': 'https://example.com/foo.git'}
- >>> origin_get_params("123")
- {'id': 123}
- >>> pprint(origin_get_params('["git", "https://example.com/foo.git"]'))
- {'type': 'git', 'url': 'https://example.com/foo.git'}
- """
- if isinstance(id_, str):
- # Data coming from JSON, which requires string keys, so
- # one extra level of deserialization is needed
- id_ = ast.literal_eval(id_)
- if isinstance(id_, (tuple, list)):
- if len(id_) != 2:
- raise TypeError('Expected a (type, url) tuple.')
- (type_, url) = id_
- params = {'type': type_, 'url': url}
- elif isinstance(id_, int):
- params = {'id': id_}
- else:
- raise TypeError('Invalid value in "ids": %r' % id_)
- return params
-
-
class OriginIndexer(BaseIndexer):
"""An object type indexer, inherits from the :class:`BaseIndexer` and
implements Origin indexing using the run method
@@ -567,7 +535,7 @@
class.
"""
- def run(self, ids, policy_update='update-dups', parse_ids=True,
+ def run(self, origin_urls, policy_update='update-dups',
next_step=None, **kwargs):
"""Given a list of origin ids:
@@ -587,21 +555,7 @@
**kwargs: passed to the `index` method
"""
- if parse_ids:
- ids = [o.split('+', 1) if ':' in o else int(o) # type+url or id
- for o in ids]
-
- origins_filtered = []
- origins = self.storage.origin_get(
- [origin_get_params(id_) for id_ in ids])
- for (id_, origin) in zip(ids, origins):
- if not origin:
- self.log.warning('Origin %s not found in storage' %
- id_)
- continue
- origins_filtered.append(origin)
-
- results = self.index_list(origins_filtered, **kwargs)
+ results = self.index_list(origin_urls, **kwargs)
self.persist_index_computations(results, policy_update)
self.results = results
diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -264,12 +264,15 @@
self.origin_head_indexer = OriginHeadIndexer(config=config)
self.revision_metadata_indexer = RevisionMetadataIndexer(config=config)
- def index_list(self, origins):
+ def index_list(self, origin_urls):
head_rev_ids = []
origins_with_head = []
+ origins = self.storage.origin_get(
+ [{'url': url} for url in origin_urls])
for origin in origins:
- head_result = self.origin_head_indexer.index(origin)
+ head_result = self.origin_head_indexer.index(origin['url'])
if head_result:
+ head_result['origin_id'] = origin['id']
origins_with_head.append(origin)
head_rev_ids.append(head_result['revision_id'])
@@ -280,13 +283,14 @@
for (origin, rev) in zip(origins_with_head, head_revs):
if not rev:
self.log.warning('Missing head revision of origin %r',
- origin)
+ origin['url'])
continue
rev_metadata = self.revision_metadata_indexer.index(rev)
orig_metadata = {
'from_revision': rev_metadata['id'],
'id': origin['id'],
+ 'origin_url': origin['url'],
'metadata': rev_metadata['metadata'],
'mappings': rev_metadata['mappings'],
'indexer_configuration_id':
diff --git a/swh/indexer/origin_head.py b/swh/indexer/origin_head.py
--- a/swh/indexer/origin_head.py
+++ b/swh/indexer/origin_head.py
@@ -27,22 +27,25 @@
# Dispatch
- def index(self, origin):
- origin_id = origin['id']
- latest_snapshot = self.storage.snapshot_get_latest(origin_id)
- if latest_snapshot is None:
+ def index(self, origin_url):
+ latest_visit = self.storage.origin_visit_get_latest(
+ origin_url, allowed_statuses=['full'], require_snapshot=True)
+ if latest_visit is None:
return None
- method = getattr(self, '_try_get_%s_head' % origin['type'], None)
- if method is None:
- method = self._try_get_head_generic
+ latest_snapshot = self.storage.snapshot_get(latest_visit['snapshot'])
+ method = getattr(
+ self, '_try_get_%s_head' % latest_visit['type'],
+ self._try_get_head_generic)
+
rev_id = method(latest_snapshot)
- if rev_id is None:
- return None
- result = {
- 'origin_id': origin_id,
+ if rev_id is not None:
+ return {
+ 'origin_url': origin_url,
'revision_id': rev_id,
}
- return result
+
+ # could not find a head revision
+ return None
# VCSs
diff --git a/swh/indexer/sql/30-swh-schema.sql b/swh/indexer/sql/30-swh-schema.sql
--- a/swh/indexer/sql/30-swh-schema.sql
+++ b/swh/indexer/sql/30-swh-schema.sql
@@ -14,7 +14,7 @@
);
insert into dbversion(version, release, description)
- values(124, now(), 'Work In Progress');
+ values(125, now(), 'Work In Progress');
-- Computing metadata on sha1's contents
-- a SHA1 checksum (not necessarily originating from Git)
@@ -130,6 +130,7 @@
create table origin_intrinsic_metadata(
id bigserial not null,
+ origin_url text,
metadata jsonb,
indexer_configuration_id bigint not null,
from_revision sha1_git not null,
diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
--- a/swh/indexer/storage/__init__.py
+++ b/swh/indexer/storage/__init__.py
@@ -713,7 +713,8 @@
Args:
metadata (iterable): dictionaries with keys:
- - **id**: origin identifier
+ - **id**: legacy origin identifier
+ - **origin_url**: URL of the origin
- **from_revision**: sha1 id of the revision used to generate
these metadata.
- **metadata**: arbitrary dict
@@ -731,7 +732,8 @@
db.mktemp_origin_intrinsic_metadata(cur)
db.copy_to(metadata, 'tmp_origin_intrinsic_metadata',
- ['id', 'metadata', 'indexer_configuration_id',
+ ['id', 'origin_url', 'metadata',
+ 'indexer_configuration_id',
'from_revision', 'mappings'],
cur)
db.origin_intrinsic_metadata_add_from_temp(conflict_update, cur)
@@ -763,7 +765,8 @@
Yields:
list: dictionaries with the following keys:
- - **id** (int)
+ - **id** (int): legacy origin identifier
+ - **origin_url** (str)
- **metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
- **mappings** (List[str]): list of mappings used to translate
@@ -796,7 +799,8 @@
list: list of origin ids (int) if `ids_only=True`, else
dictionaries with the following keys:
- - **id** (int)
+ - **id** (int): legacy origin identifier
+ - **origin_url** (str)
- **metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
- **mappings** (List[str]): list of mappings used to translate
diff --git a/swh/indexer/tests/test_origin_head.py b/swh/indexer/tests/test_origin_head.py
--- a/swh/indexer/tests/test_origin_head.py
+++ b/swh/indexer/tests/test_origin_head.py
@@ -41,20 +41,37 @@
self.indexer.catch_exceptions = False
fill_storage(self.indexer.storage)
- def _get_origin_id(self, type_, url):
- origin = self.indexer.storage.origin_get({
- 'type': type_, 'url': url})
- return origin['id']
-
def test_git(self):
self.indexer.run(
- ['git+https://github.com/SoftwareHeritage/swh-storage'])
- origin_id = self._get_origin_id(
- 'git', 'https://github.com/SoftwareHeritage/swh-storage')
+ ['https://github.com/SoftwareHeritage/swh-storage'])
self.assertEqual(self.indexer.results, [{
'revision_id': b'8K\x12\x00d\x03\xcc\xe4]bS\xe3\x8f{'
b'\xd7}\xac\xefrm',
- 'origin_id': origin_id}])
+ 'origin_url': 'https://github.com/SoftwareHeritage/swh-storage'}])
+
+ def test_git_partial_snapshot(self):
+ """Checks partial snapshots are ignored."""
+ origin_url = 'https://github.com/SoftwareHeritage/swh-core'
+ self.indexer.storage.origin_add_one({
+ 'type': 'git',
+ 'url': origin_url,
+ })
+ visit = self.indexer.storage.origin_visit_add(
+ origin_url, '2019-02-27')
+ self.indexer.storage.snapshot_add([{
+ 'id': b'foo',
+ 'branches': {
+ b'foo': None,
+ b'HEAD': {
+ 'target_type': 'alias',
+ 'target': b'foo',
+ }
+ }
+ }])
+ self.indexer.storage.origin_visit_update(
+ origin_url, visit['visit'], status='partial', snapshot=b'foo')
+ self.indexer.run([origin_url])
+ self.assertEqual(self.indexer.results, [])
def test_vcs_missing_snapshot(self):
self.indexer.storage.origin_add([{
@@ -62,18 +79,19 @@
'url': 'https://github.com/SoftwareHeritage/swh-indexer',
}])
self.indexer.run(
- ['git+https://github.com/SoftwareHeritage/swh-indexer'])
+ ['https://github.com/SoftwareHeritage/swh-indexer'])
self.assertEqual(self.indexer.results, [])
def test_pypi_missing_branch(self):
- origin_id = self.indexer.storage.origin_add_one({
+ origin_url = 'https://pypi.org/project/abcdef/'
+ self.indexer.storage.origin_add_one({
'type': 'pypi',
- 'url': 'https://pypi.org/project/abcdef/',
+ 'url': origin_url,
})
visit = self.indexer.storage.origin_visit_add(
- origin_id, '2019-02-27')
- self.indexer.storage.snapshot_add(origin_id, visit['visit'], {
- 'id': 'foo',
+ origin_url, '2019-02-27')
+ self.indexer.storage.snapshot_add([{
+ 'id': b'foo',
'branches': {
b'foo': None,
b'HEAD': {
@@ -81,19 +99,19 @@
'target': b'foo',
}
}
- })
- self.indexer.run(['pypi+https://pypi.org/project/abcdef/'])
+ }])
+ self.indexer.storage.origin_visit_update(
+ origin_url, visit['visit'], status='full', snapshot=b'foo')
+ self.indexer.run(['https://pypi.org/project/abcdef/'])
self.assertEqual(self.indexer.results, [])
def test_ftp(self):
self.indexer.run(
- ['ftp+rsync://ftp.gnu.org/gnu/3dldf'])
- origin_id = self._get_origin_id(
- 'ftp', 'rsync://ftp.gnu.org/gnu/3dldf')
+ ['rsync://ftp.gnu.org/gnu/3dldf'])
self.assertEqual(self.indexer.results, [{
'revision_id': b'\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee'
b'\xcc\x1a\xb4`\x8c\x8by',
- 'origin_id': origin_id}])
+ 'origin_url': 'rsync://ftp.gnu.org/gnu/3dldf'}])
def test_ftp_missing_snapshot(self):
self.indexer.storage.origin_add([{
@@ -101,19 +119,18 @@
'url': 'rsync://ftp.gnu.org/gnu/foobar',
}])
self.indexer.run(
- ['ftp+rsync://ftp.gnu.org/gnu/foobar'])
+ ['rsync://ftp.gnu.org/gnu/foobar'])
self.assertEqual(self.indexer.results, [])
def test_deposit(self):
self.indexer.run(
- ['deposit+https://forge.softwareheritage.org/source/'
+ ['https://forge.softwareheritage.org/source/'
'jesuisgpl/'])
- origin_id = self._get_origin_id(
- 'deposit', 'https://forge.softwareheritage.org/source/jesuisgpl/')
self.assertEqual(self.indexer.results, [{
'revision_id': b'\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{'
b'\xa6\xe9\x99\xb1\x9e]q\xeb',
- 'origin_id': origin_id}])
+ 'origin_url': 'https://forge.softwareheritage.org/source/'
+ 'jesuisgpl/'}])
def test_deposit_missing_snapshot(self):
self.indexer.storage.origin_add([{
@@ -121,25 +138,21 @@
'url': 'https://forge.softwareheritage.org/source/foobar',
}])
self.indexer.run(
- ['deposit+https://forge.softwareheritage.org/source/foobar'])
+ ['https://forge.softwareheritage.org/source/foobar'])
self.assertEqual(self.indexer.results, [])
def test_pypi(self):
self.indexer.run(
- ['pypi+https://pypi.org/project/limnoria/'])
- origin_id = self._get_origin_id(
- 'pypi', 'https://pypi.org/project/limnoria/')
+ ['https://pypi.org/project/limnoria/'])
self.assertEqual(self.indexer.results, [{
'revision_id': b'\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8k'
b'A\x10\x9d\xc5\xfa2\xf8t',
- 'origin_id': origin_id}])
+ 'origin_url': 'https://pypi.org/project/limnoria/'}])
def test_svn(self):
self.indexer.run(
- ['svn+http://0-512-md.googlecode.com/svn/'])
- origin_id = self._get_origin_id(
- 'svn', 'http://0-512-md.googlecode.com/svn/')
+ ['http://0-512-md.googlecode.com/svn/'])
self.assertEqual(self.indexer.results, [{
'revision_id': b'\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8'
b'\xc9\xad#.\x1bw=\x18',
- 'origin_id': origin_id}])
+ 'origin_url': 'http://0-512-md.googlecode.com/svn/'}])
diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py
--- a/swh/indexer/tests/test_origin_metadata.py
+++ b/swh/indexer/tests/test_origin_metadata.py
@@ -17,10 +17,9 @@
idx_storage, storage, obj_storage):
indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG)
- indexer.run(["git+https://github.com/librariesio/yarn-parser"])
+ indexer.run(["https://github.com/librariesio/yarn-parser"])
origin = storage.origin_get({
- 'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser'})
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
@@ -31,6 +30,7 @@
}
origin_metadata = {
'id': origin['id'],
+ 'origin_url': origin['url'],
'from_revision': rev_id,
'metadata': YARN_PARSER_METADATA,
'mappings': ['npm'],
@@ -54,12 +54,11 @@
indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG)
indexer.storage = storage
indexer.idx_storage = idx_storage
- indexer.run(["git+https://github.com/librariesio/yarn-parser"])
+ indexer.run(["https://github.com/librariesio/yarn-parser"])
- indexer.run(["git+https://github.com/librariesio/yarn-parser"]*2)
+ indexer.run(["https://github.com/librariesio/yarn-parser"]*2)
origin = storage.origin_get({
- 'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser'})
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
@@ -81,10 +80,9 @@
}])
indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG)
- indexer.run(["git+https://example.com"])
+ indexer.run(["https://example.com"])
origin = storage.origin_get({
- 'type': 'git',
'url': 'https://example.com'})
results = list(indexer.idx_storage.origin_intrinsic_metadata_get([
@@ -101,14 +99,12 @@
}])
indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG)
- indexer.run(["git+https://example.com",
- "git+https://github.com/librariesio/yarn-parser"])
+ indexer.run(["https://example.com",
+ "https://github.com/librariesio/yarn-parser"])
origin1 = storage.origin_get({
- 'type': 'git',
'url': 'https://example.com'})
origin2 = storage.origin_get({
- 'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser'})
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
@@ -119,6 +115,7 @@
}
origin_metadata = {
'id': origin2['id'],
+ 'origin_url': origin2['url'],
'from_revision': rev_id,
'metadata': YARN_PARSER_METADATA,
'mappings': ['npm'],
@@ -142,14 +139,12 @@
indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG)
indexer.storage = storage
indexer.idx_storage = idx_storage
- indexer.run(["git+https://github.com/librariesio/yarn-parser",
- "git+https://github.com/librariesio/yarn-parser.git"])
+ indexer.run(["https://github.com/librariesio/yarn-parser",
+ "https://github.com/librariesio/yarn-parser.git"])
origin1 = storage.origin_get({
- 'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser'})
origin2 = storage.origin_get({
- 'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser.git'})
assert origin1['id'] != origin2['id']
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
@@ -169,10 +164,9 @@
indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG)
with patch('swh.indexer.metadata_dictionary.npm.NpmMapping.filename',
b'foo.json'):
- indexer.run(["git+https://github.com/librariesio/yarn-parser"])
+ indexer.run(["https://github.com/librariesio/yarn-parser"])
origin = storage.origin_get({
- 'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser'})
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
@@ -192,10 +186,9 @@
with patch('swh.indexer.metadata.RevisionMetadataIndexer'
'.translate_revision_intrinsic_metadata',
return_value=(['npm'], {'@context': 'foo'})):
- indexer.run(["git+https://github.com/librariesio/yarn-parser"])
+ indexer.run(["https://github.com/librariesio/yarn-parser"])
origin = storage.origin_get({
- 'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser'})
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
@@ -215,10 +208,9 @@
with patch('swh.indexer.metadata.RevisionMetadataIndexer'
'.translate_revision_intrinsic_metadata',
return_value=None):
- indexer.run(["git+https://github.com/librariesio/yarn-parser"])
+ indexer.run(["https://github.com/librariesio/yarn-parser"])
origin = storage.origin_get({
- 'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser'})
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
@@ -235,10 +227,9 @@
idx_storage, storage, obj_storage):
indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG)
- indexer.run(["git+https://github.com/librariesio/yarn-parser"])
+ indexer.run(["https://github.com/librariesio/yarn-parser"])
origin = storage.origin_get({
- 'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser'})
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
@@ -252,7 +243,7 @@
with patch('swh.indexer.metadata_dictionary.npm.NpmMapping.filename',
b'foo.json'):
- indexer.run(["git+https://github.com/librariesio/yarn-parser"])
+ indexer.run(["https://github.com/librariesio/yarn-parser"])
results = list(
indexer.idx_storage.revision_intrinsic_metadata_get([rev_id]))
diff --git a/swh/indexer/tests/utils.py b/swh/indexer/tests/utils.py
--- a/swh/indexer/tests/utils.py
+++ b/swh/indexer/tests/utils.py
@@ -36,133 +36,133 @@
ORIGINS = [
{
- 'id': 52189575,
'lister': None,
'project': None,
'type': 'git',
'url': 'https://github.com/SoftwareHeritage/swh-storage'},
{
- 'id': 4423668,
'lister': None,
'project': None,
'type': 'ftp',
'url': 'rsync://ftp.gnu.org/gnu/3dldf'},
{
- 'id': 77775770,
'lister': None,
'project': None,
'type': 'deposit',
'url': 'https://forge.softwareheritage.org/source/jesuisgpl/'},
{
- 'id': 85072327,
'lister': None,
'project': None,
'type': 'pypi',
'url': 'https://pypi.org/project/limnoria/'},
{
- 'id': 49908349,
'lister': None,
'project': None,
'type': 'svn',
'url': 'http://0-512-md.googlecode.com/svn/'},
{
- 'id': 54974445,
'lister': None,
'project': None,
'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser'},
{
- 'id': 54974446,
'lister': None,
'project': None,
'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser.git'},
]
-SNAPSHOTS = {
- 52189575: {
- 'branches': {
- b'refs/heads/add-revision-origin-cache': {
- 'target': b'L[\xce\x1c\x88\x8eF\t\xf1"\x19\x1e\xfb\xc0'
- b's\xe7/\xe9l\x1e',
- 'target_type': 'revision'},
- b'HEAD': {
- 'target': b'8K\x12\x00d\x03\xcc\xe4]bS\xe3\x8f{\xd7}'
- b'\xac\xefrm',
- 'target_type': 'revision'},
- b'refs/tags/v0.0.103': {
- 'target': b'\xb6"Im{\xfdLb\xb0\x94N\xea\x96m\x13x\x88+'
- b'\x0f\xdd',
- 'target_type': 'release'},
- }},
- 4423668: {
- 'branches': {
- b'3DLDF-1.1.4.tar.gz': {
- 'target': b'dJ\xfb\x1c\x91\xf4\x82B%]6\xa2\x90|\xd3\xfc'
- b'"G\x99\x11',
- 'target_type': 'revision'},
- b'3DLDF-2.0.2.tar.gz': {
- 'target': b'\xb6\x0e\xe7\x9e9\xac\xaa\x19\x9e='
- b'\xd1\xc5\x00\\\xc6\xfc\xe0\xa6\xb4V',
- 'target_type': 'revision'},
- b'3DLDF-2.0.3-examples.tar.gz': {
- 'target': b'!H\x19\xc0\xee\x82-\x12F1\xbd\x97'
- b'\xfe\xadZ\x80\x80\xc1\x83\xff',
- 'target_type': 'revision'},
- b'3DLDF-2.0.3.tar.gz': {
- 'target': b'\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee'
- b'\xcc\x1a\xb4`\x8c\x8by',
- 'target_type': 'revision'},
- b'3DLDF-2.0.tar.gz': {
- 'target': b'F6*\xff(?\x19a\xef\xb6\xc2\x1fv$S\xe3G'
- b'\xd3\xd1m',
- b'target_type': 'revision'}
- }},
- 77775770: {
- 'branches': {
- b'master': {
- 'target': b'\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{'
- b'\xa6\xe9\x99\xb1\x9e]q\xeb',
- 'target_type': 'revision'}
- },
- 'id': b"h\xc0\xd2a\x04\xd4~'\x8d\xd6\xbe\x07\xeda\xfa\xfbV"
- b"\x1d\r "},
- 85072327: {
- 'branches': {
- b'HEAD': {
- 'target': b'releases/2018.09.09',
- 'target_type': 'alias'},
- b'releases/2018.09.01': {
- 'target': b'<\xee1(\xe8\x8d_\xc1\xc9\xa6rT\xf1\x1d'
- b'\xbb\xdfF\xfdw\xcf',
- 'target_type': 'revision'},
- b'releases/2018.09.09': {
- 'target': b'\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8k'
- b'A\x10\x9d\xc5\xfa2\xf8t',
- 'target_type': 'revision'}},
- 'id': b'{\xda\x8e\x84\x7fX\xff\x92\x80^\x93V\x18\xa3\xfay'
- b'\x12\x9e\xd6\xb3'},
- 49908349: {
- 'branches': {
- b'master': {
- 'target': b'\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8'
- b'\xc9\xad#.\x1bw=\x18',
- 'target_type': 'revision'}},
- 'id': b'\xa1\xa2\x8c\n\xb3\x87\xa8\xf9\xe0a\x8c\xb7'
- b'\x05\xea\xb8\x1f\xc4H\xf4s'},
- 54974445: {
- 'branches': {
- b'HEAD': {
- 'target': hash_to_bytes(
- '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'),
- 'target_type': 'revision'}}},
- 54974446: {
- 'branches': {
- b'HEAD': {
- 'target': hash_to_bytes(
- '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'),
- 'target_type': 'revision'}}},
- }
+SNAPSHOTS = [
+ {
+ 'origin': 'https://github.com/SoftwareHeritage/swh-storage',
+ 'branches': {
+ b'refs/heads/add-revision-origin-cache': {
+ 'target': b'L[\xce\x1c\x88\x8eF\t\xf1"\x19\x1e\xfb\xc0'
+ b's\xe7/\xe9l\x1e',
+ 'target_type': 'revision'},
+ b'HEAD': {
+ 'target': b'8K\x12\x00d\x03\xcc\xe4]bS\xe3\x8f{\xd7}'
+ b'\xac\xefrm',
+ 'target_type': 'revision'},
+ b'refs/tags/v0.0.103': {
+ 'target': b'\xb6"Im{\xfdLb\xb0\x94N\xea\x96m\x13x\x88+'
+ b'\x0f\xdd',
+ 'target_type': 'release'},
+ }},
+ {
+ 'origin': 'rsync://ftp.gnu.org/gnu/3dldf',
+ 'branches': {
+ b'3DLDF-1.1.4.tar.gz': {
+ 'target': b'dJ\xfb\x1c\x91\xf4\x82B%]6\xa2\x90|\xd3\xfc'
+ b'"G\x99\x11',
+ 'target_type': 'revision'},
+ b'3DLDF-2.0.2.tar.gz': {
+ 'target': b'\xb6\x0e\xe7\x9e9\xac\xaa\x19\x9e='
+ b'\xd1\xc5\x00\\\xc6\xfc\xe0\xa6\xb4V',
+ 'target_type': 'revision'},
+ b'3DLDF-2.0.3-examples.tar.gz': {
+ 'target': b'!H\x19\xc0\xee\x82-\x12F1\xbd\x97'
+ b'\xfe\xadZ\x80\x80\xc1\x83\xff',
+ 'target_type': 'revision'},
+ b'3DLDF-2.0.3.tar.gz': {
+ 'target': b'\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee'
+ b'\xcc\x1a\xb4`\x8c\x8by',
+ 'target_type': 'revision'},
+ b'3DLDF-2.0.tar.gz': {
+ 'target': b'F6*\xff(?\x19a\xef\xb6\xc2\x1fv$S\xe3G'
+ b'\xd3\xd1m',
+ b'target_type': 'revision'}
+ }},
+ {
+ 'origin': 'https://forge.softwareheritage.org/source/jesuisgpl/',
+ 'branches': {
+ b'master': {
+ 'target': b'\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{'
+ b'\xa6\xe9\x99\xb1\x9e]q\xeb',
+ 'target_type': 'revision'}
+ },
+ 'id': b"h\xc0\xd2a\x04\xd4~'\x8d\xd6\xbe\x07\xeda\xfa\xfbV"
+ b"\x1d\r "},
+ {
+ 'origin': 'https://pypi.org/project/limnoria/',
+ 'branches': {
+ b'HEAD': {
+ 'target': b'releases/2018.09.09',
+ 'target_type': 'alias'},
+ b'releases/2018.09.01': {
+ 'target': b'<\xee1(\xe8\x8d_\xc1\xc9\xa6rT\xf1\x1d'
+ b'\xbb\xdfF\xfdw\xcf',
+ 'target_type': 'revision'},
+ b'releases/2018.09.09': {
+ 'target': b'\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8k'
+ b'A\x10\x9d\xc5\xfa2\xf8t',
+ 'target_type': 'revision'}},
+ 'id': b'{\xda\x8e\x84\x7fX\xff\x92\x80^\x93V\x18\xa3\xfay'
+ b'\x12\x9e\xd6\xb3'},
+ {
+ 'origin': 'http://0-512-md.googlecode.com/svn/',
+ 'branches': {
+ b'master': {
+ 'target': b'\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8'
+ b'\xc9\xad#.\x1bw=\x18',
+ 'target_type': 'revision'}},
+ 'id': b'\xa1\xa2\x8c\n\xb3\x87\xa8\xf9\xe0a\x8c\xb7'
+ b'\x05\xea\xb8\x1f\xc4H\xf4s'},
+ {
+ 'origin': 'https://github.com/librariesio/yarn-parser',
+ 'branches': {
+ b'HEAD': {
+ 'target': hash_to_bytes(
+ '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'),
+ 'target_type': 'revision'}}},
+ {
+ 'origin': 'https://github.com/librariesio/yarn-parser.git',
+ 'branches': {
+ b'HEAD': {
+ 'target': hash_to_bytes(
+ '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'),
+ 'target_type': 'revision'}}},
+]
REVISIONS = [{
@@ -551,24 +551,18 @@
def fill_storage(storage):
for origin in ORIGINS:
- origin = origin.copy()
- del origin['id']
storage.origin_add_one(origin)
- for (orig_pseudo_id, snap) in SNAPSHOTS.items():
- for orig in ORIGINS:
- if orig_pseudo_id == orig['id']:
- origin_id = storage.origin_get(
- {'type': orig['type'], 'url': orig['url']})['id']
- break
- else:
- assert False
- visit = storage.origin_visit_add(origin_id, datetime.datetime.now())
+ for snap in SNAPSHOTS:
+ origin_url = snap['origin']
+ visit = storage.origin_visit_add(origin_url, datetime.datetime.now())
snap_id = snap.get('id') or \
bytes([random.randint(0, 255) for _ in range(32)])
- storage.snapshot_add(origin_id, visit['visit'], {
+ storage.snapshot_add([{
'id': snap_id,
'branches': snap['branches']
- })
+ }])
+ storage.origin_visit_update(
+ origin_url, visit['visit'], status='full', snapshot=snap_id)
storage.revision_add(REVISIONS)
storage.directory_add([{
'id': DIRECTORY_ID,

File Metadata

Mime Type
text/plain
Expires
Fri, Jun 20, 7:39 PM (3 w, 21 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3234078

Event Timeline