Changeset View
Changeset View
Standalone View
Standalone View
swh/clearlydefined/tests/test_orchestrator.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU Affero General Public License version 3, or any later version | # License: GNU Affero General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from datetime import datetime | from datetime import datetime, timezone | ||||
from datetime import timezone | |||||
import gzip | import gzip | ||||
import os | import os | ||||
from typing import List, Optional, Tuple | from typing import List, Optional, Tuple | ||||
import uuid | import uuid | ||||
import psycopg2 | import psycopg2 | ||||
from swh.clearlydefined.orchestrator import get_last_run_date, orchestrator | from swh.clearlydefined.orchestrator import get_last_run_date, orchestrator | ||||
▲ Show 20 Lines • Show All 62 Lines • ▼ Show 20 Lines | rows = [ | ||||
datetime(year=2021, month=2, day=2, tzinfo=timezone.utc), | datetime(year=2021, month=2, day=2, tzinfo=timezone.utc), | ||||
datetime(year=2021, month=2, day=2, tzinfo=timezone.utc), | datetime(year=2021, month=2, day=2, tzinfo=timezone.utc), | ||||
"", | "", | ||||
), | ), | ||||
( | ( | ||||
"npm/npmjs/@fluidframework/replay-driver/revision/0.31.0/tool/licensee/" | "npm/npmjs/@fluidframework/replay-driver/revision/0.31.0/tool/licensee/" | ||||
"9.13.0.json", | "9.13.0.json", | ||||
gzip_compress_data("licensee_true.json", datadir=datadir), | gzip_compress_data("licensee_true.json", datadir=datadir), | ||||
datetime(year=2021, month=2, day=3,tzinfo=timezone.utc), | datetime(year=2021, month=2, day=3, tzinfo=timezone.utc), | ||||
datetime(year=2021, month=2, day=3,tzinfo=timezone.utc), | datetime(year=2021, month=2, day=3, tzinfo=timezone.utc), | ||||
"", | "", | ||||
), | ), | ||||
( | ( | ||||
"npm/npmjs/@pixi/mesh-extras/revision/5.3.5/tool/clearlydefined/1.3.4.json", | "npm/npmjs/@pixi/mesh-extras/revision/5.3.5/tool/clearlydefined/1.3.4.json", | ||||
gzip_compress_data("clearlydefined_true.json", datadir=datadir), | gzip_compress_data("clearlydefined_true.json", datadir=datadir), | ||||
datetime(year=2021, month=2, day=4,tzinfo=timezone.utc), | datetime(year=2021, month=2, day=4, tzinfo=timezone.utc), | ||||
datetime(year=2021, month=2, day=4,tzinfo=timezone.utc), | datetime(year=2021, month=2, day=4, tzinfo=timezone.utc), | ||||
"", | "", | ||||
), | ), | ||||
( | ( | ||||
"maven/mavencentral/za.co.absa.cobrix/cobol/revision/0.4.0.json", | "maven/mavencentral/za.co.absa.cobrix/cobol/revision/0.4.0.json", | ||||
gzip_compress_data("def_not_mapped.json", datadir=datadir), | gzip_compress_data("def_not_mapped.json", datadir=datadir), | ||||
datetime(year=2021, month=2, day=5,tzinfo=timezone.utc), | datetime(year=2021, month=2, day=5, tzinfo=timezone.utc), | ||||
datetime(year=2021, month=2, day=5,tzinfo=timezone.utc), | datetime(year=2021, month=2, day=5, tzinfo=timezone.utc), | ||||
"", | "", | ||||
), | ), | ||||
( | ( | ||||
"npm/npmjs/@pixi/mesh-extras/revision/5.3.6/tool/clearlydefined/1.3.4.json", | "npm/npmjs/@pixi/mesh-extras/revision/5.3.6/tool/clearlydefined/1.3.4.json", | ||||
gzip_compress_data("clearydefined_not_mapped.json", datadir=datadir), | gzip_compress_data("clearydefined_not_mapped.json", datadir=datadir), | ||||
datetime(year=2021, month=2, day=6,tzinfo=timezone.utc), | datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | ||||
datetime(year=2021, month=2, day=6,tzinfo=timezone.utc), | datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | ||||
"", | "", | ||||
), | ), | ||||
( | ( | ||||
"npm/npmjs/@pixi/mesh-extras/revision/5.3.5/tool/fossology/1.3.4.json", | "npm/npmjs/@pixi/mesh-extras/revision/5.3.5/tool/fossology/1.3.4.json", | ||||
gzip_compress_data(None, datadir=datadir), | gzip_compress_data(None, datadir=datadir), | ||||
datetime(year=2021, month=2, day=1,tzinfo=timezone.utc), | datetime(year=2021, month=2, day=1, tzinfo=timezone.utc), | ||||
datetime(year=2021, month=2, day=1,tzinfo=timezone.utc), | datetime(year=2021, month=2, day=1, tzinfo=timezone.utc), | ||||
"", | "", | ||||
), | ), | ||||
] | ] | ||||
fill_rows_in_table(rows=rows, cursor=cursor, connection=connection) | fill_rows_in_table(rows=rows, cursor=cursor, connection=connection) | ||||
def fill_data_after_updation_of_storage(connection, cursor, datadir): | def fill_data_after_updation_of_storage(connection, cursor, datadir): | ||||
rows = [ | rows = [ | ||||
( | ( | ||||
"maven/mavencentral/cobrix/cobol-parser/revision/0.4.0.json", | "maven/mavencentral/cobrix/cobol-parser/revision/0.4.0.json", | ||||
gzip_compress_data(None, datadir=datadir), | gzip_compress_data(None, datadir=datadir), | ||||
datetime(year=2021, month=2, day=1,tzinfo=timezone.utc), | datetime(year=2021, month=2, day=1, tzinfo=timezone.utc), | ||||
datetime(year=2021, month=2, day=8,tzinfo=timezone.utc), | datetime(year=2021, month=2, day=8, tzinfo=timezone.utc), | ||||
"", | "", | ||||
), | ), | ||||
] | ] | ||||
fill_rows_in_table(rows=rows, cursor=cursor, connection=connection) | fill_rows_in_table(rows=rows, cursor=cursor, connection=connection) | ||||
def get_length_of_unmapped_data(connection, cursor) -> int: | def get_length_of_unmapped_data(connection, cursor) -> int: | ||||
cursor.execute("SELECT COUNT(*) FROM unmapped_data") | cursor.execute("SELECT COUNT(*) FROM unmapped_data") | ||||
count = cursor.fetchall()[0][0] | count = cursor.fetchall()[0][0] | ||||
return count | return count | ||||
def test_orchestrator(swh_storage, clearcode_dsn, datadir): | def test_orchestrator(swh_storage, clearcode_dsn, datadir): | ||||
connection = psycopg2.connect(dsn=clearcode_dsn) | connection = psycopg2.connect(dsn=clearcode_dsn) | ||||
cursor = connection.cursor() | cursor = connection.cursor() | ||||
add_content_data(swh_storage) | add_content_data(swh_storage) | ||||
# Fill data in clearcode database, for first time orchestration | # Fill data in clearcode database, for first time orchestration | ||||
fill_data_before_updation_of_storage( | fill_data_before_updation_of_storage( | ||||
connection=connection, cursor=cursor, datadir=datadir | connection=connection, cursor=cursor, datadir=datadir | ||||
) | ) | ||||
orchestrator(storage=swh_storage, clearcode_dsn=clearcode_dsn) | orchestrator(storage=swh_storage, clearcode_dsn=clearcode_dsn) | ||||
# Check how much data is unmapped after first orchestration | # Check how much data is unmapped after first orchestration | ||||
assert 2 == get_length_of_unmapped_data(connection=connection, cursor=cursor) | assert 1 == get_length_of_unmapped_data(connection=connection, cursor=cursor) | ||||
assert datetime(2021, 2, 6, 0, 0, tzinfo=timezone.utc) == get_last_run_date( | assert datetime(2021, 2, 6, 0, 0, tzinfo=timezone.utc) == get_last_run_date( | ||||
cursor=cursor | cursor=cursor | ||||
) | ) | ||||
content_data.extend( | content_data.extend( | ||||
[Content.from_data(b"424242\n"), Content.from_data(b"42424242\n")] | [Content.from_data(b"424242\n"), Content.from_data(b"42424242\n")] | ||||
) | ) | ||||
add_content_data(swh_storage) | add_content_data(swh_storage) | ||||
# Run orchestration after insertion in swh storage and | # Run orchestration after insertion in swh storage and | ||||
Show All 13 Lines |