cli_runner = <click.testing.CliRunner object at 0x7f77177ef278>
swh_config = '/tmp/pytest-of-jenkins/pytest-0/test_cli_journal_client_index_4/indexer.yml'
kafka_prefix = 'ywzhyhpiqk', kafka_server = '127.0.0.1:33507'
consumer = <cimpl.Consumer object at 0x7f7717637188>
idx_storage = <swh.indexer.storage.IndexerStorage object at 0x7f7717825400>
obj_storage = <swh.objstorage.backends.pathslicing.PathSlicingObjStorage object at 0x7f7717825518>
storage = <swh.storage.postgresql.storage.Storage object at 0x7f77177ef828>
mocker = <pytest_mock.plugin.MockerFixture object at 0x7f7717598d30>
swh_indexer_config = {'compute_checksums': ['blake2b512'], 'indexer_storage': {'cls': 'local', 'db': "user=postgres password=xxx dbname=tes...cheduler': {'cls': 'local', 'db': "user=postgres password=xxx dbname=tests host=127.0.0.1 port=28130 options=''"}, ...}
def test_cli_journal_client_index__content_mimetype(
cli_runner,
swh_config,
kafka_prefix: str,
kafka_server,
consumer: Consumer,
idx_storage,
obj_storage,
storage,
mocker,
swh_indexer_config,
):
"""Test the 'swh indexer journal-client' cli tool."""
journal_writer = get_journal_writer(
"kafka",
brokers=[kafka_server],
prefix=kafka_prefix,
client_id="test producer",
value_sanitizer=lambda object_type, value: value,
flush_timeout=3, # fail early if something is going wrong
)
contents = []
expected_results = []
content_ids = []
for content_id, content_d in RAW_CONTENTS.items():
raw_content = content_d[0]
content = Content.from_data(raw_content)
assert content_id == content.sha1
contents.append(content)
content_ids.append(content_id)
if isinstance(content_d[1], tuple):
mimetype = content_d[1][1]
else:
mimetype = content_d[1]
encoding = content_d[2]
content_mimetype_row = ContentMimetypeRow(
id=content.sha1,
tool={"id": 1, **swh_indexer_config["tools"]},
mimetype=mimetype,
encoding=encoding,
)
expected_results.append(content_mimetype_row)
assert len(contents) == len(RAW_CONTENTS)
> storage.content_add(contents)
.tox/py3/lib/python3.7/site-packages/swh/indexer/tests/test_cli.py:786:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
.tox/py3/lib/python3.7/site-packages/swh/storage/postgresql/storage.py:289: in content_add
self._content_add_metadata(db, cur, contents)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <swh.storage.postgresql.storage.Storage object at 0x7f77177ef828>
db = <swh.storage.postgresql.db.Db object at 0x7f7717598198>
cur = <cursor object at 0x7f77177b8330; closed: -1>
content = [Content(sha1=hash_to_bytes('97bb695e696adafa79ba827f35908091ae9f6193'), sha1_git=hash_to_bytes('799cb6e6e9832bb22bde5...00\x00\x00\x00\xff\xfe\xff\xff', ctime=datetime.datetime(2022, 7, 22, 8, 9, 20, 894178, tzinfo=datetime.timezone.utc))]
def _content_add_metadata(self, db, cur, content):
"""Add content to the postgresql database but not the object storage."""
# create temporary table for metadata injection
db.mktemp("content", cur)
db.copy_to(
(c.to_dict() for c in content), "tmp_content", db.content_add_keys, cur
)
# move metadata in place
try:
db.content_add_from_temp(cur)
except psycopg2.IntegrityError as e:
if e.diag.sqlstate == "23505" and e.diag.table_name == "content":
message_detail = e.diag.message_detail
if message_detail:
hash_name, hash_id = extract_collision_hash(message_detail)
collision_contents_hashes = [
c.hashes() for c in content if c.get_hash(hash_name) == hash_id
]
else:
constraint_to_hash_name = {
"content_pkey": "sha1",
"content_sha1_git_idx": "sha1_git",
"content_sha256_idx": "sha256",
}
hash_name = constraint_to_hash_name.get(e.diag.constraint_name)
hash_id = None
collision_contents_hashes = None
raise HashCollision(
hash_name, hash_id, collision_contents_hashes
> ) from None
E swh.storage.exc.HashCollision: ('sha1', '091e348aaa7e23e4ace2396af36fd4e1dadb43d0', [{'blake2s256': 'fd95252a5c7c29d97395944bf80039d519b532a3546f23fe92817ede2318c32d', 'sha1': '091e348aaa7e23e4ace2396af36fd4e1dadb43d0', 'sha1_git': 'f694645b54518daa4169ad6936d9db7419fb901f', 'sha256': '1f8f4519a9f6e3604fff4570dc3ca10b4c314c355022a8b770ac1f598d579310'}])
.tox/py3/lib/python3.7/site-packages/swh/storage/postgresql/storage.py:260: HashCollision
TEST RESULT
TEST RESULT
- Run At
- Jul 22 2022, 10:11 AM