diff --git a/PKG-INFO b/PKG-INFO index c7fe2f9..3a19336 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,72 +1,72 @@ Metadata-Version: 2.1 Name: swh.journal -Version: 0.3.1 +Version: 0.3.2 Summary: Software Heritage Journal utilities Home-page: https://forge.softwareheritage.org/diffusion/DJNL/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-journal Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-journal/ Description: swh-journal =========== Persistent logger of changes to the archive, with publish-subscribe support. See the [documentation](https://docs.softwareheritage.org/devel/swh-journal/index.html#software-heritage-journal) for more details. # Local test As a pre-requisite, you need a kakfa installation path. The following target will take care of this: ``` make install ``` Then, provided you are in the right virtual environment as described in the [swh getting-started](https://docs.softwareheritage.org/devel/developer-setup.html#developer-setup): ``` pytest ``` or: ``` tox ``` # Running ## publisher Command: ``` $ swh-journal --config-file ~/.config/swh/journal/publisher.yml \ publisher ``` # Auto-completion To have the completion, add the following in your ~/.virtualenvs/swh/bin/postactivate: ``` eval "$(_SWH_JOURNAL_COMPLETE=$autocomplete_cmd swh-journal)" ``` Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing diff --git a/swh.journal.egg-info/PKG-INFO b/swh.journal.egg-info/PKG-INFO index c7fe2f9..3a19336 100644 --- a/swh.journal.egg-info/PKG-INFO +++ b/swh.journal.egg-info/PKG-INFO @@ -1,72 +1,72 @@ Metadata-Version: 2.1 Name: swh.journal -Version: 0.3.1 +Version: 0.3.2 Summary: Software Heritage Journal utilities Home-page: https://forge.softwareheritage.org/diffusion/DJNL/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-journal Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-journal/ Description: swh-journal =========== Persistent logger of changes to the archive, with publish-subscribe support. See the [documentation](https://docs.softwareheritage.org/devel/swh-journal/index.html#software-heritage-journal) for more details. # Local test As a pre-requisite, you need a kakfa installation path. The following target will take care of this: ``` make install ``` Then, provided you are in the right virtual environment as described in the [swh getting-started](https://docs.softwareheritage.org/devel/developer-setup.html#developer-setup): ``` pytest ``` or: ``` tox ``` # Running ## publisher Command: ``` $ swh-journal --config-file ~/.config/swh/journal/publisher.yml \ publisher ``` # Auto-completion To have the completion, add the following in your ~/.virtualenvs/swh/bin/postactivate: ``` eval "$(_SWH_JOURNAL_COMPLETE=$autocomplete_cmd swh-journal)" ``` Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing diff --git a/swh.journal.egg-info/SOURCES.txt b/swh.journal.egg-info/SOURCES.txt index d2d75c2..a17b80d 100644 --- a/swh.journal.egg-info/SOURCES.txt +++ b/swh.journal.egg-info/SOURCES.txt @@ -1,35 +1,34 @@ MANIFEST.in Makefile README.md pyproject.toml requirements-swh.txt requirements-test.txt requirements.txt setup.cfg setup.py version.txt swh/__init__.py swh.journal.egg-info/PKG-INFO swh.journal.egg-info/SOURCES.txt swh.journal.egg-info/dependency_links.txt swh.journal.egg-info/entry_points.txt swh.journal.egg-info/requires.txt swh.journal.egg-info/top_level.txt swh/journal/__init__.py swh/journal/cli.py swh/journal/client.py swh/journal/py.typed swh/journal/pytest_plugin.py swh/journal/serializers.py swh/journal/tests/__init__.py swh/journal/tests/conftest.py swh/journal/tests/journal_data.py swh/journal/tests/log4j.properties swh/journal/tests/test_client.py swh/journal/tests/test_kafka_writer.py swh/journal/tests/test_pytest_plugin.py swh/journal/tests/test_serializers.py -swh/journal/tests/utils.py swh/journal/writer/__init__.py swh/journal/writer/inmemory.py swh/journal/writer/kafka.py \ No newline at end of file diff --git a/swh/journal/tests/journal_data.py b/swh/journal/tests/journal_data.py index dbf1d0c..c1ab404 100644 --- a/swh/journal/tests/journal_data.py +++ b/swh/journal/tests/journal_data.py @@ -1,287 +1,295 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import copy import datetime from typing import Any, Dict, List, Type from swh.model.hashutil import MultiHash, hash_to_bytes from swh.journal.serializers import ModelObject from swh.model.model import ( BaseModel, Content, Directory, Origin, OriginVisit, OriginVisitStatus, Release, Revision, SkippedContent, Snapshot, ) OBJECT_TYPES: Dict[Type[BaseModel], str] = { Content: "content", Directory: "directory", Origin: "origin", OriginVisit: "origin_visit", OriginVisitStatus: "origin_visit_status", Release: "release", Revision: "revision", SkippedContent: "skipped_content", Snapshot: "snapshot", } UTC = datetime.timezone.utc CONTENTS = [ { **MultiHash.from_data(f"foo{i}".encode()).digest(), "length": 4, "status": "visible", } for i in range(10) +] + [ + { + **MultiHash.from_data(f"forbidden foo{i}".encode()).digest(), + "length": 14, + "status": "hidden", + } + for i in range(10) ] + SKIPPED_CONTENTS = [ { **MultiHash.from_data(f"bar{i}".encode()).digest(), "length": 4, "status": "absent", "reason": f"because chr({i}) != '*'", } for i in range(2) ] duplicate_content1 = { "length": 4, "sha1": hash_to_bytes("44973274ccef6ab4dfaaf86599792fa9c3fe4689"), "sha1_git": b"another-foo", "blake2s256": b"another-bar", "sha256": b"another-baz", "status": "visible", } # Craft a sha1 collision duplicate_content2 = duplicate_content1.copy() sha1_array = bytearray(duplicate_content1["sha1_git"]) sha1_array[0] += 1 duplicate_content2["sha1_git"] = bytes(sha1_array) DUPLICATE_CONTENTS = [duplicate_content1, duplicate_content2] COMMITTERS = [ {"fullname": b"foo", "name": b"foo", "email": b"",}, {"fullname": b"bar", "name": b"bar", "email": b"",}, ] DATES = [ { "timestamp": {"seconds": 1234567891, "microseconds": 0,}, "offset": 120, "negative_utc": False, }, { "timestamp": {"seconds": 1234567892, "microseconds": 0,}, "offset": 120, "negative_utc": False, }, ] REVISIONS = [ { "id": hash_to_bytes("7026b7c1a2af56521e951c01ed20f255fa054238"), "message": b"hello", "date": DATES[0], "committer": COMMITTERS[0], "author": COMMITTERS[0], "committer_date": DATES[0], "type": "git", "directory": b"\x01" * 20, "synthetic": False, "metadata": None, "parents": (), }, { "id": hash_to_bytes("368a48fe15b7db2383775f97c6b247011b3f14f4"), "message": b"hello again", "date": DATES[1], "committer": COMMITTERS[1], "author": COMMITTERS[1], "committer_date": DATES[1], "type": "hg", "directory": b"\x02" * 20, "synthetic": False, "metadata": None, "parents": (), }, ] RELEASES = [ { "id": hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), "name": b"v0.0.1", "date": { "timestamp": {"seconds": 1234567890, "microseconds": 0,}, "offset": 120, "negative_utc": False, }, "author": COMMITTERS[0], "target_type": "revision", "target": b"\x04" * 20, "message": b"foo", "synthetic": False, }, ] ORIGINS = [ {"url": "https://somewhere.org/den/fox",}, {"url": "https://overtherainbow.org/fox/den",}, ] ORIGIN_VISITS = [ { "origin": ORIGINS[0]["url"], "date": datetime.datetime(2013, 5, 7, 4, 20, 39, 369271, tzinfo=UTC), "snapshot": None, "status": "ongoing", - "metadata": {"foo": "bar"}, + "metadata": None, "type": "git", "visit": 1, }, { "origin": ORIGINS[1]["url"], "date": datetime.datetime(2014, 11, 27, 17, 20, 39, tzinfo=UTC), "snapshot": None, "status": "ongoing", - "metadata": {"baz": "qux"}, + "metadata": None, "type": "hg", "visit": 1, }, { "origin": ORIGINS[0]["url"], "date": datetime.datetime(2018, 11, 27, 17, 20, 39, tzinfo=UTC), "snapshot": None, "status": "ongoing", - "metadata": {"baz": "qux"}, + "metadata": None, "type": "git", "visit": 2, }, { "origin": ORIGINS[0]["url"], "date": datetime.datetime(2018, 11, 27, 17, 20, 39, tzinfo=UTC), "snapshot": hash_to_bytes("742cdc6be7bf6e895b055227c2300070f056e07b"), "status": "full", - "metadata": {"baz": "qux"}, + "metadata": None, "type": "git", "visit": 3, }, { "origin": ORIGINS[1]["url"], "date": datetime.datetime(2015, 11, 27, 17, 20, 39, tzinfo=UTC), "snapshot": hash_to_bytes("ecee48397a92b0d034e9752a17459f3691a73ef9"), "status": "partial", - "metadata": {"something": "wrong occurred"}, + "metadata": None, "type": "hg", "visit": 2, }, ] ORIGIN_VISIT_STATUSES = [] for visit in ORIGIN_VISITS: visit_status = copy.deepcopy(visit) visit_status.pop("type") ORIGIN_VISIT_STATUSES.append(visit_status) DIRECTORIES = [ {"id": hash_to_bytes("4b825dc642cb6eb9a060e54bf8d69288fbee4904"), "entries": ()}, { "id": hash_to_bytes("cc13247a0d6584f297ca37b5868d2cbd242aea03"), "entries": ( { "name": b"file1.ext", "perms": 0o644, "type": "file", "target": CONTENTS[0]["sha1_git"], }, { "name": b"dir1", "perms": 0o755, "type": "dir", "target": hash_to_bytes("4b825dc642cb6eb9a060e54bf8d69288fbee4904"), }, { "name": b"subprepo1", "perms": 0o160000, "type": "rev", "target": REVISIONS[1]["id"], }, ), }, ] SNAPSHOTS = [ { "id": hash_to_bytes("742cdc6be7bf6e895b055227c2300070f056e07b"), "branches": { b"master": {"target_type": "revision", "target": REVISIONS[0]["id"]} }, }, { "id": hash_to_bytes("ecee48397a92b0d034e9752a17459f3691a73ef9"), "branches": { b"target/revision": { "target_type": "revision", "target": REVISIONS[0]["id"], }, b"target/alias": {"target_type": "alias", "target": b"target/revision"}, b"target/directory": { "target_type": "directory", "target": DIRECTORIES[0]["id"], }, b"target/release": {"target_type": "release", "target": RELEASES[0]["id"]}, b"target/snapshot": { "target_type": "snapshot", "target": hash_to_bytes("742cdc6be7bf6e895b055227c2300070f056e07b"), }, }, }, ] TEST_OBJECT_DICTS: Dict[str, List[Dict[str, Any]]] = { "content": CONTENTS, "directory": DIRECTORIES, "origin": ORIGINS, "origin_visit": ORIGIN_VISITS, "origin_visit_status": ORIGIN_VISIT_STATUSES, "release": RELEASES, "revision": REVISIONS, "snapshot": SNAPSHOTS, "skipped_content": SKIPPED_CONTENTS, } MODEL_OBJECTS = {v: k for (k, v) in OBJECT_TYPES.items()} TEST_OBJECTS: Dict[str, List[ModelObject]] = {} for object_type, objects in TEST_OBJECT_DICTS.items(): converted_objects: List[ModelObject] = [] model = MODEL_OBJECTS[object_type] for (num, obj_d) in enumerate(objects): if object_type == "content": obj_d = {**obj_d, "data": b"", "ctime": datetime.datetime.now(tz=UTC)} converted_objects.append(model.from_dict(obj_d)) TEST_OBJECTS[object_type] = converted_objects diff --git a/swh/journal/tests/utils.py b/swh/journal/tests/utils.py deleted file mode 100644 index 0c08958..0000000 --- a/swh/journal/tests/utils.py +++ /dev/null @@ -1,80 +0,0 @@ -from swh.journal.client import JournalClient -from swh.journal.writer.kafka import KafkaJournalWriter -from swh.journal.serializers import kafka_to_value, key_to_kafka, value_to_kafka - - -class FakeKafkaMessage: - def __init__(self, topic, key, value): - self._topic = topic - self._key = key_to_kafka(key) - self._value = value_to_kafka(value) - - def topic(self): - return self._topic - - def value(self): - return self._value - - def key(self): - return self._key - - def error(self): - return None - - -class MockedKafkaWriter(KafkaJournalWriter): - def __init__(self, queue, anonymize: bool = False): - self._prefix = "prefix" - self.queue = queue - self.anonymize = anonymize - - def send(self, topic, key, value): - msg = FakeKafkaMessage(topic=topic, key=key, value=value) - self.queue.append(msg) - - def flush(self): - pass - - -class MockedKafkaConsumer: - """Mimic the confluent_kafka.Consumer API, producing the messages stored - in `queue`. - - You're only allowed to subscribe to topics in which the queue has - messages. - """ - - def __init__(self, queue): - self.queue = queue - self.committed = False - - def consume(self, num_messages, timeout=None): - L = self.queue[0:num_messages] - self.queue[0:num_messages] = [] - return L - - def commit(self): - if self.queue == []: - self.committed = True - - def list_topics(self, timeout=None): - return set(message.topic() for message in self.queue) - - def subscribe(self, topics): - unknown_topics = set(topics) - self.list_topics() - if unknown_topics: - raise ValueError("Unknown topics %s" % ", ".join(unknown_topics)) - - def close(self): - pass - - -class MockedJournalClient(JournalClient): - def __init__(self, queue, object_types=None): - self._object_types = object_types - self.consumer = MockedKafkaConsumer(queue) - self.process_timeout = None - self.stop_after_objects = None - self.value_deserializer = kafka_to_value - self.stop_on_eof = False - self.batch_size = 200 diff --git a/version.txt b/version.txt index e484cb1..7c6af42 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.3.1-0-gc91a4cf \ No newline at end of file +v0.3.2-0-g6ea2d6e \ No newline at end of file