Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9124853
D1564.id5248.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
31 KB
Subscribers
None
D1564.id5248.diff
View Options
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -2,5 +2,5 @@
swh.model >= 0.0.15
swh.objstorage >= 0.0.28
swh.scheduler >= 0.0.47
-swh.storage >= 0.0.123
+swh.storage >= 0.0.141
swh.journal >= 0.0.6
diff --git a/sql/upgrades/125.sql b/sql/upgrades/125.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/125.sql
@@ -0,0 +1,11 @@
+-- SWH Indexer DB schema upgrade
+-- from_version: 124
+-- to_version: 125
+-- description: Add 'origin_url' column to origin_intrinsic_metadata.
+
+insert into dbversion(version, release, description)
+values(125, now(), 'Work In Progress');
+
+alter origin_intrinsic_metadata
+ add column origin_url type text;
+
diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py
--- a/swh/indexer/indexer.py
+++ b/swh/indexer/indexer.py
@@ -4,7 +4,6 @@
# See top-level LICENSE file for more information
import abc
-import ast
import os
import logging
import shutil
@@ -526,37 +525,6 @@
return with_indexed_data
-def origin_get_params(id_):
- """From any of the two types of origin identifiers (int or
- type+url), returns a dict that can be passed to Storage.origin_get.
- Also accepts JSON-encoded forms of these (used via the task scheduler).
-
- >>> from pprint import pprint
- >>> origin_get_params(123)
- {'id': 123}
- >>> pprint(origin_get_params(['git', 'https://example.com/foo.git']))
- {'type': 'git', 'url': 'https://example.com/foo.git'}
- >>> origin_get_params("123")
- {'id': 123}
- >>> pprint(origin_get_params('["git", "https://example.com/foo.git"]'))
- {'type': 'git', 'url': 'https://example.com/foo.git'}
- """
- if isinstance(id_, str):
- # Data coming from JSON, which requires string keys, so
- # one extra level of deserialization is needed
- id_ = ast.literal_eval(id_)
- if isinstance(id_, (tuple, list)):
- if len(id_) != 2:
- raise TypeError('Expected a (type, url) tuple.')
- (type_, url) = id_
- params = {'type': type_, 'url': url}
- elif isinstance(id_, int):
- params = {'id': id_}
- else:
- raise TypeError('Invalid value in "ids": %r' % id_)
- return params
-
-
class OriginIndexer(BaseIndexer):
"""An object type indexer, inherits from the :class:`BaseIndexer` and
implements Origin indexing using the run method
@@ -567,7 +535,7 @@
class.
"""
- def run(self, ids, policy_update='update-dups', parse_ids=True,
+ def run(self, origin_urls, policy_update='update-dups',
next_step=None, **kwargs):
"""Given a list of origin ids:
@@ -587,21 +555,7 @@
**kwargs: passed to the `index` method
"""
- if parse_ids:
- ids = [o.split('+', 1) if ':' in o else int(o) # type+url or id
- for o in ids]
-
- origins_filtered = []
- origins = self.storage.origin_get(
- [origin_get_params(id_) for id_ in ids])
- for (id_, origin) in zip(ids, origins):
- if not origin:
- self.log.warning('Origin %s not found in storage' %
- id_)
- continue
- origins_filtered.append(origin)
-
- results = self.index_list(origins_filtered, **kwargs)
+ results = self.index_list(origin_urls, **kwargs)
self.persist_index_computations(results, policy_update)
self.results = results
diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -264,12 +264,15 @@
self.origin_head_indexer = OriginHeadIndexer(config=config)
self.revision_metadata_indexer = RevisionMetadataIndexer(config=config)
- def index_list(self, origins):
+ def index_list(self, origin_urls):
head_rev_ids = []
origins_with_head = []
+ origins = self.storage.origin_get(
+ [{'url': url} for url in origin_urls])
for origin in origins:
- head_result = self.origin_head_indexer.index(origin)
+ head_result = self.origin_head_indexer.index(origin['url'])
if head_result:
+ head_result['origin_id'] = origin['id']
origins_with_head.append(origin)
head_rev_ids.append(head_result['revision_id'])
@@ -280,13 +283,14 @@
for (origin, rev) in zip(origins_with_head, head_revs):
if not rev:
self.log.warning('Missing head revision of origin %r',
- origin)
+ origin['url'])
continue
rev_metadata = self.revision_metadata_indexer.index(rev)
orig_metadata = {
'from_revision': rev_metadata['id'],
'id': origin['id'],
+ 'origin_url': origin['url'],
'metadata': rev_metadata['metadata'],
'mappings': rev_metadata['mappings'],
'indexer_configuration_id':
diff --git a/swh/indexer/origin_head.py b/swh/indexer/origin_head.py
--- a/swh/indexer/origin_head.py
+++ b/swh/indexer/origin_head.py
@@ -27,22 +27,25 @@
# Dispatch
- def index(self, origin):
- origin_id = origin['id']
- latest_snapshot = self.storage.snapshot_get_latest(origin_id)
- if latest_snapshot is None:
+ def index(self, origin_url):
+ latest_visit = self.storage.origin_visit_get_latest(
+ origin_url, allowed_statuses=['full'], require_snapshot=True)
+ if latest_visit is None:
return None
- method = getattr(self, '_try_get_%s_head' % origin['type'], None)
- if method is None:
- method = self._try_get_head_generic
+ latest_snapshot = self.storage.snapshot_get(latest_visit['snapshot'])
+ method = getattr(
+ self, '_try_get_%s_head' % latest_visit['type'],
+ self._try_get_head_generic)
+
rev_id = method(latest_snapshot)
- if rev_id is None:
- return None
- result = {
- 'origin_id': origin_id,
+ if rev_id is not None:
+ return {
+ 'origin_url': origin_url,
'revision_id': rev_id,
}
- return result
+
+ # could not find a head revision
+ return None
# VCSs
diff --git a/swh/indexer/sql/30-swh-schema.sql b/swh/indexer/sql/30-swh-schema.sql
--- a/swh/indexer/sql/30-swh-schema.sql
+++ b/swh/indexer/sql/30-swh-schema.sql
@@ -14,7 +14,7 @@
);
insert into dbversion(version, release, description)
- values(124, now(), 'Work In Progress');
+ values(125, now(), 'Work In Progress');
-- Computing metadata on sha1's contents
-- a SHA1 checksum (not necessarily originating from Git)
@@ -130,6 +130,7 @@
create table origin_intrinsic_metadata(
id bigserial not null,
+ origin_url text,
metadata jsonb,
indexer_configuration_id bigint not null,
from_revision sha1_git not null,
diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
--- a/swh/indexer/storage/__init__.py
+++ b/swh/indexer/storage/__init__.py
@@ -713,7 +713,8 @@
Args:
metadata (iterable): dictionaries with keys:
- - **id**: origin identifier
+ - **id**: legacy origin identifier
+ - **origin_url**: URL of the origin
- **from_revision**: sha1 id of the revision used to generate
these metadata.
- **metadata**: arbitrary dict
@@ -731,7 +732,8 @@
db.mktemp_origin_intrinsic_metadata(cur)
db.copy_to(metadata, 'tmp_origin_intrinsic_metadata',
- ['id', 'metadata', 'indexer_configuration_id',
+ ['id', 'origin_url', 'metadata',
+ 'indexer_configuration_id',
'from_revision', 'mappings'],
cur)
db.origin_intrinsic_metadata_add_from_temp(conflict_update, cur)
@@ -763,7 +765,8 @@
Yields:
list: dictionaries with the following keys:
- - **id** (int)
+ - **id** (int): legacy origin identifier
+ - **origin_url** (str)
- **metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
- **mappings** (List[str]): list of mappings used to translate
@@ -796,7 +799,8 @@
list: list of origin ids (int) if `ids_only=True`, else
dictionaries with the following keys:
- - **id** (int)
+ - **id** (int): legacy origin identifier
+ - **origin_url** (str)
- **metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
- **mappings** (List[str]): list of mappings used to translate
diff --git a/swh/indexer/tests/test_origin_head.py b/swh/indexer/tests/test_origin_head.py
--- a/swh/indexer/tests/test_origin_head.py
+++ b/swh/indexer/tests/test_origin_head.py
@@ -41,20 +41,37 @@
self.indexer.catch_exceptions = False
fill_storage(self.indexer.storage)
- def _get_origin_id(self, type_, url):
- origin = self.indexer.storage.origin_get({
- 'type': type_, 'url': url})
- return origin['id']
-
def test_git(self):
self.indexer.run(
- ['git+https://github.com/SoftwareHeritage/swh-storage'])
- origin_id = self._get_origin_id(
- 'git', 'https://github.com/SoftwareHeritage/swh-storage')
+ ['https://github.com/SoftwareHeritage/swh-storage'])
self.assertEqual(self.indexer.results, [{
'revision_id': b'8K\x12\x00d\x03\xcc\xe4]bS\xe3\x8f{'
b'\xd7}\xac\xefrm',
- 'origin_id': origin_id}])
+ 'origin_url': 'https://github.com/SoftwareHeritage/swh-storage'}])
+
+ def test_git_partial_snapshot(self):
+ """Checks partial snapshots are ignored."""
+ origin_url = 'https://github.com/SoftwareHeritage/swh-core'
+ self.indexer.storage.origin_add_one({
+ 'type': 'git',
+ 'url': origin_url,
+ })
+ visit = self.indexer.storage.origin_visit_add(
+ origin_url, '2019-02-27')
+ self.indexer.storage.snapshot_add([{
+ 'id': b'foo',
+ 'branches': {
+ b'foo': None,
+ b'HEAD': {
+ 'target_type': 'alias',
+ 'target': b'foo',
+ }
+ }
+ }])
+ self.indexer.storage.origin_visit_update(
+ origin_url, visit['visit'], status='partial', snapshot=b'foo')
+ self.indexer.run([origin_url])
+ self.assertEqual(self.indexer.results, [])
def test_vcs_missing_snapshot(self):
self.indexer.storage.origin_add([{
@@ -62,18 +79,19 @@
'url': 'https://github.com/SoftwareHeritage/swh-indexer',
}])
self.indexer.run(
- ['git+https://github.com/SoftwareHeritage/swh-indexer'])
+ ['https://github.com/SoftwareHeritage/swh-indexer'])
self.assertEqual(self.indexer.results, [])
def test_pypi_missing_branch(self):
- origin_id = self.indexer.storage.origin_add_one({
+ origin_url = 'https://pypi.org/project/abcdef/'
+ self.indexer.storage.origin_add_one({
'type': 'pypi',
- 'url': 'https://pypi.org/project/abcdef/',
+ 'url': origin_url,
})
visit = self.indexer.storage.origin_visit_add(
- origin_id, '2019-02-27')
- self.indexer.storage.snapshot_add(origin_id, visit['visit'], {
- 'id': 'foo',
+ origin_url, '2019-02-27')
+ self.indexer.storage.snapshot_add([{
+ 'id': b'foo',
'branches': {
b'foo': None,
b'HEAD': {
@@ -81,19 +99,19 @@
'target': b'foo',
}
}
- })
- self.indexer.run(['pypi+https://pypi.org/project/abcdef/'])
+ }])
+ self.indexer.storage.origin_visit_update(
+ origin_url, visit['visit'], status='full', snapshot=b'foo')
+ self.indexer.run(['https://pypi.org/project/abcdef/'])
self.assertEqual(self.indexer.results, [])
def test_ftp(self):
self.indexer.run(
- ['ftp+rsync://ftp.gnu.org/gnu/3dldf'])
- origin_id = self._get_origin_id(
- 'ftp', 'rsync://ftp.gnu.org/gnu/3dldf')
+ ['rsync://ftp.gnu.org/gnu/3dldf'])
self.assertEqual(self.indexer.results, [{
'revision_id': b'\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee'
b'\xcc\x1a\xb4`\x8c\x8by',
- 'origin_id': origin_id}])
+ 'origin_url': 'rsync://ftp.gnu.org/gnu/3dldf'}])
def test_ftp_missing_snapshot(self):
self.indexer.storage.origin_add([{
@@ -101,19 +119,18 @@
'url': 'rsync://ftp.gnu.org/gnu/foobar',
}])
self.indexer.run(
- ['ftp+rsync://ftp.gnu.org/gnu/foobar'])
+ ['rsync://ftp.gnu.org/gnu/foobar'])
self.assertEqual(self.indexer.results, [])
def test_deposit(self):
self.indexer.run(
- ['deposit+https://forge.softwareheritage.org/source/'
+ ['https://forge.softwareheritage.org/source/'
'jesuisgpl/'])
- origin_id = self._get_origin_id(
- 'deposit', 'https://forge.softwareheritage.org/source/jesuisgpl/')
self.assertEqual(self.indexer.results, [{
'revision_id': b'\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{'
b'\xa6\xe9\x99\xb1\x9e]q\xeb',
- 'origin_id': origin_id}])
+ 'origin_url': 'https://forge.softwareheritage.org/source/'
+ 'jesuisgpl/'}])
def test_deposit_missing_snapshot(self):
self.indexer.storage.origin_add([{
@@ -121,25 +138,21 @@
'url': 'https://forge.softwareheritage.org/source/foobar',
}])
self.indexer.run(
- ['deposit+https://forge.softwareheritage.org/source/foobar'])
+ ['https://forge.softwareheritage.org/source/foobar'])
self.assertEqual(self.indexer.results, [])
def test_pypi(self):
self.indexer.run(
- ['pypi+https://pypi.org/project/limnoria/'])
- origin_id = self._get_origin_id(
- 'pypi', 'https://pypi.org/project/limnoria/')
+ ['https://pypi.org/project/limnoria/'])
self.assertEqual(self.indexer.results, [{
'revision_id': b'\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8k'
b'A\x10\x9d\xc5\xfa2\xf8t',
- 'origin_id': origin_id}])
+ 'origin_url': 'https://pypi.org/project/limnoria/'}])
def test_svn(self):
self.indexer.run(
- ['svn+http://0-512-md.googlecode.com/svn/'])
- origin_id = self._get_origin_id(
- 'svn', 'http://0-512-md.googlecode.com/svn/')
+ ['http://0-512-md.googlecode.com/svn/'])
self.assertEqual(self.indexer.results, [{
'revision_id': b'\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8'
b'\xc9\xad#.\x1bw=\x18',
- 'origin_id': origin_id}])
+ 'origin_url': 'http://0-512-md.googlecode.com/svn/'}])
diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py
--- a/swh/indexer/tests/test_origin_metadata.py
+++ b/swh/indexer/tests/test_origin_metadata.py
@@ -17,10 +17,9 @@
idx_storage, storage, obj_storage):
indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG)
- indexer.run(["git+https://github.com/librariesio/yarn-parser"])
+ indexer.run(["https://github.com/librariesio/yarn-parser"])
origin = storage.origin_get({
- 'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser'})
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
@@ -31,6 +30,7 @@
}
origin_metadata = {
'id': origin['id'],
+ 'origin_url': origin['url'],
'from_revision': rev_id,
'metadata': YARN_PARSER_METADATA,
'mappings': ['npm'],
@@ -54,12 +54,11 @@
indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG)
indexer.storage = storage
indexer.idx_storage = idx_storage
- indexer.run(["git+https://github.com/librariesio/yarn-parser"])
+ indexer.run(["https://github.com/librariesio/yarn-parser"])
- indexer.run(["git+https://github.com/librariesio/yarn-parser"]*2)
+ indexer.run(["https://github.com/librariesio/yarn-parser"]*2)
origin = storage.origin_get({
- 'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser'})
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
@@ -81,10 +80,9 @@
}])
indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG)
- indexer.run(["git+https://example.com"])
+ indexer.run(["https://example.com"])
origin = storage.origin_get({
- 'type': 'git',
'url': 'https://example.com'})
results = list(indexer.idx_storage.origin_intrinsic_metadata_get([
@@ -101,14 +99,12 @@
}])
indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG)
- indexer.run(["git+https://example.com",
- "git+https://github.com/librariesio/yarn-parser"])
+ indexer.run(["https://example.com",
+ "https://github.com/librariesio/yarn-parser"])
origin1 = storage.origin_get({
- 'type': 'git',
'url': 'https://example.com'})
origin2 = storage.origin_get({
- 'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser'})
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
@@ -119,6 +115,7 @@
}
origin_metadata = {
'id': origin2['id'],
+ 'origin_url': origin2['url'],
'from_revision': rev_id,
'metadata': YARN_PARSER_METADATA,
'mappings': ['npm'],
@@ -142,14 +139,12 @@
indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG)
indexer.storage = storage
indexer.idx_storage = idx_storage
- indexer.run(["git+https://github.com/librariesio/yarn-parser",
- "git+https://github.com/librariesio/yarn-parser.git"])
+ indexer.run(["https://github.com/librariesio/yarn-parser",
+ "https://github.com/librariesio/yarn-parser.git"])
origin1 = storage.origin_get({
- 'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser'})
origin2 = storage.origin_get({
- 'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser.git'})
assert origin1['id'] != origin2['id']
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
@@ -169,10 +164,9 @@
indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG)
with patch('swh.indexer.metadata_dictionary.npm.NpmMapping.filename',
b'foo.json'):
- indexer.run(["git+https://github.com/librariesio/yarn-parser"])
+ indexer.run(["https://github.com/librariesio/yarn-parser"])
origin = storage.origin_get({
- 'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser'})
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
@@ -192,10 +186,9 @@
with patch('swh.indexer.metadata.RevisionMetadataIndexer'
'.translate_revision_intrinsic_metadata',
return_value=(['npm'], {'@context': 'foo'})):
- indexer.run(["git+https://github.com/librariesio/yarn-parser"])
+ indexer.run(["https://github.com/librariesio/yarn-parser"])
origin = storage.origin_get({
- 'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser'})
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
@@ -215,10 +208,9 @@
with patch('swh.indexer.metadata.RevisionMetadataIndexer'
'.translate_revision_intrinsic_metadata',
return_value=None):
- indexer.run(["git+https://github.com/librariesio/yarn-parser"])
+ indexer.run(["https://github.com/librariesio/yarn-parser"])
origin = storage.origin_get({
- 'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser'})
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
@@ -235,10 +227,9 @@
idx_storage, storage, obj_storage):
indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG)
- indexer.run(["git+https://github.com/librariesio/yarn-parser"])
+ indexer.run(["https://github.com/librariesio/yarn-parser"])
origin = storage.origin_get({
- 'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser'})
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
@@ -252,7 +243,7 @@
with patch('swh.indexer.metadata_dictionary.npm.NpmMapping.filename',
b'foo.json'):
- indexer.run(["git+https://github.com/librariesio/yarn-parser"])
+ indexer.run(["https://github.com/librariesio/yarn-parser"])
results = list(
indexer.idx_storage.revision_intrinsic_metadata_get([rev_id]))
diff --git a/swh/indexer/tests/utils.py b/swh/indexer/tests/utils.py
--- a/swh/indexer/tests/utils.py
+++ b/swh/indexer/tests/utils.py
@@ -36,133 +36,133 @@
ORIGINS = [
{
- 'id': 52189575,
'lister': None,
'project': None,
'type': 'git',
'url': 'https://github.com/SoftwareHeritage/swh-storage'},
{
- 'id': 4423668,
'lister': None,
'project': None,
'type': 'ftp',
'url': 'rsync://ftp.gnu.org/gnu/3dldf'},
{
- 'id': 77775770,
'lister': None,
'project': None,
'type': 'deposit',
'url': 'https://forge.softwareheritage.org/source/jesuisgpl/'},
{
- 'id': 85072327,
'lister': None,
'project': None,
'type': 'pypi',
'url': 'https://pypi.org/project/limnoria/'},
{
- 'id': 49908349,
'lister': None,
'project': None,
'type': 'svn',
'url': 'http://0-512-md.googlecode.com/svn/'},
{
- 'id': 54974445,
'lister': None,
'project': None,
'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser'},
{
- 'id': 54974446,
'lister': None,
'project': None,
'type': 'git',
'url': 'https://github.com/librariesio/yarn-parser.git'},
]
-SNAPSHOTS = {
- 52189575: {
- 'branches': {
- b'refs/heads/add-revision-origin-cache': {
- 'target': b'L[\xce\x1c\x88\x8eF\t\xf1"\x19\x1e\xfb\xc0'
- b's\xe7/\xe9l\x1e',
- 'target_type': 'revision'},
- b'HEAD': {
- 'target': b'8K\x12\x00d\x03\xcc\xe4]bS\xe3\x8f{\xd7}'
- b'\xac\xefrm',
- 'target_type': 'revision'},
- b'refs/tags/v0.0.103': {
- 'target': b'\xb6"Im{\xfdLb\xb0\x94N\xea\x96m\x13x\x88+'
- b'\x0f\xdd',
- 'target_type': 'release'},
- }},
- 4423668: {
- 'branches': {
- b'3DLDF-1.1.4.tar.gz': {
- 'target': b'dJ\xfb\x1c\x91\xf4\x82B%]6\xa2\x90|\xd3\xfc'
- b'"G\x99\x11',
- 'target_type': 'revision'},
- b'3DLDF-2.0.2.tar.gz': {
- 'target': b'\xb6\x0e\xe7\x9e9\xac\xaa\x19\x9e='
- b'\xd1\xc5\x00\\\xc6\xfc\xe0\xa6\xb4V',
- 'target_type': 'revision'},
- b'3DLDF-2.0.3-examples.tar.gz': {
- 'target': b'!H\x19\xc0\xee\x82-\x12F1\xbd\x97'
- b'\xfe\xadZ\x80\x80\xc1\x83\xff',
- 'target_type': 'revision'},
- b'3DLDF-2.0.3.tar.gz': {
- 'target': b'\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee'
- b'\xcc\x1a\xb4`\x8c\x8by',
- 'target_type': 'revision'},
- b'3DLDF-2.0.tar.gz': {
- 'target': b'F6*\xff(?\x19a\xef\xb6\xc2\x1fv$S\xe3G'
- b'\xd3\xd1m',
- b'target_type': 'revision'}
- }},
- 77775770: {
- 'branches': {
- b'master': {
- 'target': b'\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{'
- b'\xa6\xe9\x99\xb1\x9e]q\xeb',
- 'target_type': 'revision'}
- },
- 'id': b"h\xc0\xd2a\x04\xd4~'\x8d\xd6\xbe\x07\xeda\xfa\xfbV"
- b"\x1d\r "},
- 85072327: {
- 'branches': {
- b'HEAD': {
- 'target': b'releases/2018.09.09',
- 'target_type': 'alias'},
- b'releases/2018.09.01': {
- 'target': b'<\xee1(\xe8\x8d_\xc1\xc9\xa6rT\xf1\x1d'
- b'\xbb\xdfF\xfdw\xcf',
- 'target_type': 'revision'},
- b'releases/2018.09.09': {
- 'target': b'\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8k'
- b'A\x10\x9d\xc5\xfa2\xf8t',
- 'target_type': 'revision'}},
- 'id': b'{\xda\x8e\x84\x7fX\xff\x92\x80^\x93V\x18\xa3\xfay'
- b'\x12\x9e\xd6\xb3'},
- 49908349: {
- 'branches': {
- b'master': {
- 'target': b'\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8'
- b'\xc9\xad#.\x1bw=\x18',
- 'target_type': 'revision'}},
- 'id': b'\xa1\xa2\x8c\n\xb3\x87\xa8\xf9\xe0a\x8c\xb7'
- b'\x05\xea\xb8\x1f\xc4H\xf4s'},
- 54974445: {
- 'branches': {
- b'HEAD': {
- 'target': hash_to_bytes(
- '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'),
- 'target_type': 'revision'}}},
- 54974446: {
- 'branches': {
- b'HEAD': {
- 'target': hash_to_bytes(
- '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'),
- 'target_type': 'revision'}}},
- }
+SNAPSHOTS = [
+ {
+ 'origin': 'https://github.com/SoftwareHeritage/swh-storage',
+ 'branches': {
+ b'refs/heads/add-revision-origin-cache': {
+ 'target': b'L[\xce\x1c\x88\x8eF\t\xf1"\x19\x1e\xfb\xc0'
+ b's\xe7/\xe9l\x1e',
+ 'target_type': 'revision'},
+ b'HEAD': {
+ 'target': b'8K\x12\x00d\x03\xcc\xe4]bS\xe3\x8f{\xd7}'
+ b'\xac\xefrm',
+ 'target_type': 'revision'},
+ b'refs/tags/v0.0.103': {
+ 'target': b'\xb6"Im{\xfdLb\xb0\x94N\xea\x96m\x13x\x88+'
+ b'\x0f\xdd',
+ 'target_type': 'release'},
+ }},
+ {
+ 'origin': 'rsync://ftp.gnu.org/gnu/3dldf',
+ 'branches': {
+ b'3DLDF-1.1.4.tar.gz': {
+ 'target': b'dJ\xfb\x1c\x91\xf4\x82B%]6\xa2\x90|\xd3\xfc'
+ b'"G\x99\x11',
+ 'target_type': 'revision'},
+ b'3DLDF-2.0.2.tar.gz': {
+ 'target': b'\xb6\x0e\xe7\x9e9\xac\xaa\x19\x9e='
+ b'\xd1\xc5\x00\\\xc6\xfc\xe0\xa6\xb4V',
+ 'target_type': 'revision'},
+ b'3DLDF-2.0.3-examples.tar.gz': {
+ 'target': b'!H\x19\xc0\xee\x82-\x12F1\xbd\x97'
+ b'\xfe\xadZ\x80\x80\xc1\x83\xff',
+ 'target_type': 'revision'},
+ b'3DLDF-2.0.3.tar.gz': {
+ 'target': b'\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee'
+ b'\xcc\x1a\xb4`\x8c\x8by',
+ 'target_type': 'revision'},
+ b'3DLDF-2.0.tar.gz': {
+ 'target': b'F6*\xff(?\x19a\xef\xb6\xc2\x1fv$S\xe3G'
+ b'\xd3\xd1m',
+ b'target_type': 'revision'}
+ }},
+ {
+ 'origin': 'https://forge.softwareheritage.org/source/jesuisgpl/',
+ 'branches': {
+ b'master': {
+ 'target': b'\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{'
+ b'\xa6\xe9\x99\xb1\x9e]q\xeb',
+ 'target_type': 'revision'}
+ },
+ 'id': b"h\xc0\xd2a\x04\xd4~'\x8d\xd6\xbe\x07\xeda\xfa\xfbV"
+ b"\x1d\r "},
+ {
+ 'origin': 'https://pypi.org/project/limnoria/',
+ 'branches': {
+ b'HEAD': {
+ 'target': b'releases/2018.09.09',
+ 'target_type': 'alias'},
+ b'releases/2018.09.01': {
+ 'target': b'<\xee1(\xe8\x8d_\xc1\xc9\xa6rT\xf1\x1d'
+ b'\xbb\xdfF\xfdw\xcf',
+ 'target_type': 'revision'},
+ b'releases/2018.09.09': {
+ 'target': b'\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8k'
+ b'A\x10\x9d\xc5\xfa2\xf8t',
+ 'target_type': 'revision'}},
+ 'id': b'{\xda\x8e\x84\x7fX\xff\x92\x80^\x93V\x18\xa3\xfay'
+ b'\x12\x9e\xd6\xb3'},
+ {
+ 'origin': 'http://0-512-md.googlecode.com/svn/',
+ 'branches': {
+ b'master': {
+ 'target': b'\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8'
+ b'\xc9\xad#.\x1bw=\x18',
+ 'target_type': 'revision'}},
+ 'id': b'\xa1\xa2\x8c\n\xb3\x87\xa8\xf9\xe0a\x8c\xb7'
+ b'\x05\xea\xb8\x1f\xc4H\xf4s'},
+ {
+ 'origin': 'https://github.com/librariesio/yarn-parser',
+ 'branches': {
+ b'HEAD': {
+ 'target': hash_to_bytes(
+ '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'),
+ 'target_type': 'revision'}}},
+ {
+ 'origin': 'https://github.com/librariesio/yarn-parser.git',
+ 'branches': {
+ b'HEAD': {
+ 'target': hash_to_bytes(
+ '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'),
+ 'target_type': 'revision'}}},
+]
REVISIONS = [{
@@ -551,24 +551,18 @@
def fill_storage(storage):
for origin in ORIGINS:
- origin = origin.copy()
- del origin['id']
storage.origin_add_one(origin)
- for (orig_pseudo_id, snap) in SNAPSHOTS.items():
- for orig in ORIGINS:
- if orig_pseudo_id == orig['id']:
- origin_id = storage.origin_get(
- {'type': orig['type'], 'url': orig['url']})['id']
- break
- else:
- assert False
- visit = storage.origin_visit_add(origin_id, datetime.datetime.now())
+ for snap in SNAPSHOTS:
+ origin_url = snap['origin']
+ visit = storage.origin_visit_add(origin_url, datetime.datetime.now())
snap_id = snap.get('id') or \
bytes([random.randint(0, 255) for _ in range(32)])
- storage.snapshot_add(origin_id, visit['visit'], {
+ storage.snapshot_add([{
'id': snap_id,
'branches': snap['branches']
- })
+ }])
+ storage.origin_visit_update(
+ origin_url, visit['visit'], status='full', snapshot=snap_id)
storage.revision_add(REVISIONS)
storage.directory_add([{
'id': DIRECTORY_ID,
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Jun 20, 7:39 PM (3 w, 21 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3234078
Attached To
D1564: Manipulate origin URLs instead of origin ids.
Event Timeline
Log In to Comment