diff --git a/.gitignore b/.gitignore --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,3 @@ dist/ version.txt .mypy_cache/ -.hypothesis \ No newline at end of file diff --git a/requirements-test.txt b/requirements-test.txt --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,7 +1,6 @@ -confluent-kafka -hypothesis pytest pytest-mock +confluent-kafka pytest-redis pyyaml requests_mock diff --git a/swh/counters/journal_client.py b/swh/counters/journal_client.py --- a/swh/counters/journal_client.py +++ b/swh/counters/journal_client.py @@ -3,9 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from collections import defaultdict from typing import Dict -from urllib.parse import urlparse import msgpack @@ -22,9 +20,6 @@ for key in messages.keys(): counters.add(key, messages[key]) - if "origin" in messages: - process_origins(messages["origin"], counters) - if "revision" in messages: process_revisions(messages["revision"], counters) @@ -32,23 +27,6 @@ process_releases(messages["release"], counters) -def process_origins(origins: Dict[bytes, bytes], counters: CountersInterface): - """Count the number of different network locations in origin URLs.""" - origins_netloc = defaultdict(set) - for origin_bytes in origins.values(): - origin = msgpack.loads(origin_bytes) - parsed_url = urlparse(origin["url"]) - netloc = parsed_url.netloc - if netloc.endswith("googlecode.com"): - # special case for googlecode origins where URL netloc - # has the form {account}.googlecode.com - netloc = "googlecode.com" - origins_netloc[f"origin_netloc:{netloc}"].add(origin["url"]) - - for k, v in origins_netloc.items(): - counters.add(k, v) - - def process_revisions(revisions: Dict[bytes, bytes], counters: CountersInterface): """Count the number of different authors and committers on the revisions (in the person collection)""" diff --git a/swh/counters/tests/test_journal_client.py b/swh/counters/tests/test_journal_client.py --- a/swh/counters/tests/test_journal_client.py +++ b/swh/counters/tests/test_journal_client.py @@ -3,26 +3,19 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from collections import Counter from typing import Dict, Optional -from urllib.parse import urlparse -from hypothesis import given -from hypothesis.strategies import lists import msgpack from swh.counters.journal_client import ( process_journal_messages, - process_origins, process_releases, process_revisions, ) from swh.counters.redis import Redis from swh.model.hashutil import hash_to_bytes -from swh.model.hypothesis_strategies import origins from swh.model.model import ( ObjectType, - Origin, Person, Release, Revision, @@ -152,50 +145,3 @@ process_releases(releases, redis) assert redis.get_counts(redis.get_counters()) == {} - - -def test_journal_client_process_origins(local_redis_host, redisdb): - # hypothesis does not play well with pytest function scoped fixtures - # so we use an inner test function as workaround - @given(lists(origins())) - def inner(origins): - origins_ = { - msgpack.dumps(origin.to_dict()): msgpack.dumps(origin.to_dict()) - for origin in origins - } - - # explicitly flush redis db content due to hypothesis use - redisdb.flushall() - redis = Redis(host=local_redis_host) - - process_origins(origins_, redis) - - expected_counts = Counter( - [ - f"origin_netloc:{urlparse(origin.url).netloc}".encode() - for origin in set(origins) - ] - ) - - assert redis.get_counts(redis.get_counters()) == expected_counts - - inner() - - -def test_journal_client_process_googlecode_origins(local_redis_host): - origins = [ - Origin(url="https://foo.googlecode.com"), - Origin(url="https://bar.googlecode.com"), - ] - origins_ = { - msgpack.dumps(origin.to_dict()): msgpack.dumps(origin.to_dict()) - for origin in origins - } - - redis = Redis(host=local_redis_host) - - process_origins(origins_, redis) - - assert redis.get_counts(redis.get_counters()) == { - b"origin_netloc:googlecode.com": 2 - }