Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9696994
D2203.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
9 KB
Subscribers
None
D2203.diff
View Options
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,3 +1,3 @@
swh.core[db,http] >= 0.0.60
swh.model >= 0.0.40
-swh.storage >= 0.0.147
+swh.storage >= 0.0.156
diff --git a/swh/journal/replay.py b/swh/journal/replay.py
--- a/swh/journal/replay.py
+++ b/swh/journal/replay.py
@@ -89,16 +89,18 @@
good_visits = []
for visit in visits:
visit = visit.copy()
- if isinstance(visit['origin'], str):
- # note that it will crash with the pg and
- # in-mem storages if the origin is not already known,
- # but there is no other choice because we can't add an
- # origin without knowing its type. Non-pg storages
- # don't use a numeric FK internally,
- visit['origin'] = {'url': visit['origin']}
- else:
- if 'type' not in visit:
+ if 'type' not in visit:
+ if isinstance(visit['origin'], dict) and 'type' in visit['origin']:
+ # Very old version of the schema: visits did not have a type,
+ # but their 'origin' field was a dict with a 'type' key.
visit['type'] = visit['origin']['type']
+ else:
+ # Very very old version of the schema: 'type' is missing,
+ # so there is nothing we can do to fix it.
+ raise ValueError('Got an origin_visit too old to be replayed.')
+ if isinstance(visit['origin'], dict):
+ # Old version of the schema: visit['origin'] was a dict.
+ visit['origin'] = visit['origin']['url']
good_visits.append(visit)
return good_visits
@@ -183,17 +185,20 @@
[]
- `visit['origin']` is an URL instead of a dict:
+ `visit['origin']` is a dict instead of an URL:
- >>> fix_objects('origin_visit', [{'origin': 'http://foo'}])
- [{'origin': {'url': 'http://foo'}}]
+ >>> pprint(fix_objects('origin_visit', [{
+ ... 'origin': {'url': 'http://foo'},
+ ... 'type': 'git',
+ ... }]))
+ [{'origin': 'http://foo', 'type': 'git'}]
`visit['type']` is missing , but `origin['visit']['type']` exists:
- >>> pprint(fix_objects(
- ... 'origin_visit',
- ... [{'origin': {'type': 'hg', 'url': 'http://foo'}}]))
- [{'origin': {'type': 'hg', 'url': 'http://foo'}, 'type': 'hg'}]
+ >>> pprint(fix_objects('origin_visit', [
+ ... {'origin': {'type': 'hg', 'url': 'http://foo'}
+ ... }]))
+ [{'origin': 'http://foo', 'type': 'hg'}]
""" # noqa
if object_type == 'revision':
@@ -220,7 +225,7 @@
method(objects)
elif object_type == 'origin_visit':
for visit in objects:
- storage.origin_add_one(visit['origin'])
+ storage.origin_add_one({'url': visit['origin']})
if 'metadata' not in visit:
visit['metadata'] = None
storage.origin_visit_upsert(objects)
diff --git a/swh/journal/tests/conftest.py b/swh/journal/tests/conftest.py
--- a/swh/journal/tests/conftest.py
+++ b/swh/journal/tests/conftest.py
@@ -128,7 +128,7 @@
ORIGIN_VISITS = [
{
- 'origin': ORIGINS[0],
+ 'origin': ORIGINS[0]['url'],
'date': '2013-05-07 04:20:39.369271+00:00',
'snapshot': None, # TODO
'status': 'ongoing', # TODO
@@ -136,7 +136,7 @@
'type': 'git',
},
{
- 'origin': ORIGINS[0],
+ 'origin': ORIGINS[0]['url'],
'date': '2018-11-27 17:20:39+00:00',
'snapshot': None, # TODO
'status': 'ongoing', # TODO
diff --git a/swh/journal/tests/test_kafka_writer.py b/swh/journal/tests/test_kafka_writer.py
--- a/swh/journal/tests/test_kafka_writer.py
+++ b/swh/journal/tests/test_kafka_writer.py
@@ -123,12 +123,13 @@
elif object_type in ('origin_visit',):
for object_ in objects:
object_ = object_.copy()
- origin_id = storage.origin_add_one(object_.pop('origin'))
- visit = method(origin=origin_id, date=object_.pop('date'),
+ origin_url = object_.pop('origin')
+ storage.origin_add_one({'url': origin_url})
+ visit = method(origin=origin_url, date=object_.pop('date'),
type=object_.pop('type'))
expected_messages += 1
visit_id = visit['visit']
- storage.origin_visit_update(origin_id, visit_id, **object_)
+ storage.origin_visit_update(origin_url, visit_id, **object_)
expected_messages += 1
else:
assert False, object_type
diff --git a/swh/journal/tests/test_replay.py b/swh/journal/tests/test_replay.py
--- a/swh/journal/tests/test_replay.py
+++ b/swh/journal/tests/test_replay.py
@@ -12,9 +12,9 @@
import dateutil
from confluent_kafka import Producer
from hypothesis import strategies, given, settings
+import pytest
from swh.storage import get_storage
-from swh.storage.in_memory import ENABLE_ORIGIN_IDS
from swh.journal.client import JournalClient
from swh.journal.serializers import key_to_kafka, value_to_kafka
@@ -88,19 +88,18 @@
assert OBJECT_TYPE_KEYS['origin'][1] == \
[{'url': orig['url']} for orig in origins]
for origin in origins:
- origin_id_or_url = \
- origin['id'] if ENABLE_ORIGIN_IDS else origin['url']
+ origin_url = origin['url']
expected_visits = [
{
**visit,
- 'origin': origin_id_or_url,
+ 'origin': origin_url,
'date': dateutil.parser.parse(visit['date']),
}
for visit in OBJECT_TYPE_KEYS['origin_visit'][1]
- if visit['origin']['url'] == origin['url']
+ if visit['origin'] == origin['url']
]
actual_visits = list(storage.origin_visit_get(
- origin_id_or_url))
+ origin_url))
for visit in actual_visits:
del visit['visit'] # opaque identifier
assert expected_visits == actual_visits
@@ -151,17 +150,14 @@
for vin, vout in zip(visits, actual_visits):
vin = vin.copy()
vout = vout.copy()
- if ENABLE_ORIGIN_IDS:
- assert vout.pop('origin') == 1
- else:
- assert vout.pop('origin') == 'http://example.com/'
+ assert vout.pop('origin') == 'http://example.com/'
vin.pop('origin')
vin.setdefault('type', 'git')
vin.setdefault('metadata', None)
assert vin == vout
-def test_write_replay_legacy_origin_visit1():
+def test_write_replay_origin_visit():
"""Test origin_visit when the 'origin' is just a string."""
now = datetime.datetime.now()
visits = [{
@@ -175,8 +171,23 @@
_test_write_replay_origin_visit(visits)
+def test_write_replay_legacy_origin_visit1():
+ """Test origin_visit when there is no type."""
+ now = datetime.datetime.now()
+ visits = [{
+ 'visit': 1,
+ 'origin': 'http://example.com/',
+ 'date': now,
+ 'status': 'partial',
+ 'snapshot': None,
+ }]
+ with pytest.raises(ValueError, match='too old'):
+ _test_write_replay_origin_visit(visits)
+
+
def test_write_replay_legacy_origin_visit2():
- """Test origin_visit when 'type' is missing."""
+ """Test origin_visit when 'type' is missing from the visit, but not
+ from the origin."""
now = datetime.datetime.now()
visits = [{
'visit': 1,
@@ -192,6 +203,22 @@
_test_write_replay_origin_visit(visits)
+def test_write_replay_legacy_origin_visit3():
+ """Test origin_visit when the origin is a dict"""
+ now = datetime.datetime.now()
+ visits = [{
+ 'visit': 1,
+ 'origin': {
+ 'url': 'http://example.com/',
+ },
+ 'date': now,
+ 'type': 'git',
+ 'status': 'partial',
+ 'snapshot': None,
+ }]
+ _test_write_replay_origin_visit(visits)
+
+
hash_strategy = strategies.binary(min_size=20, max_size=20)
diff --git a/swh/journal/tests/test_write_replay.py b/swh/journal/tests/test_write_replay.py
--- a/swh/journal/tests/test_write_replay.py
+++ b/swh/journal/tests/test_write_replay.py
@@ -30,7 +30,7 @@
for (obj_type, obj) in objects:
obj = obj.copy()
if obj_type == 'origin_visit':
- storage1.origin_add_one(obj['origin'])
+ storage1.origin_add_one({'url': obj['origin']})
storage1.origin_visit_upsert([obj])
else:
method = getattr(storage1, obj_type + '_add')
diff --git a/tox.ini b/tox.ini
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist=flake8,mypy,py3-no-origin-ids,py3
+envlist=flake8,mypy,py3
[testenv:py3]
passenv=SWH_KAFKA_ROOT
@@ -14,19 +14,6 @@
--cov-branch \
--doctest-modules {posargs}
-[testenv:py3-no-origin-ids]
-passenv=SWH_KAFKA_ROOT
-deps =
- .[testing]
- pytest-cov
-setenv =
- SWH_STORAGE_IN_MEMORY_ENABLE_ORIGIN_IDS=false
- SWH_KAFKA_ROOT = {env:SWH_KAFKA_ROOT:swh/journal/tests/kafka}
-commands =
- pytest --cov={envsitepackagesdir}/swh/journal \
- {envsitepackagesdir}/swh/journal \
- --cov-branch --doctest-modules {posargs}
-
[testenv:flake8]
skip_install = true
deps =
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Aug 17, 10:23 PM (6 d, 18 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220467
Attached To
D2203: Add support for origin_visit['origin'] with type 'str'.
Event Timeline
Log In to Comment