Changeset View
Changeset View
Standalone View
Standalone View
swh/fuse/tests/data/gen-api-data.py
#!/usr/bin/env python3 | #!/usr/bin/env python3 | ||||
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | import json | ||||
from typing import Any, Dict | from typing import Any, Dict | ||||
import requests | import requests | ||||
from swh.fuse.tests.data.config import ALL_ENTRIES | from swh.fuse.tests.data.config import ALL_ENTRIES | ||||
from swh.model.identifiers import CONTENT, DIRECTORY, REVISION, SWHID, parse_swhid | from swh.model.identifiers import ( | ||||
CONTENT, | |||||
DIRECTORY, | |||||
RELEASE, | |||||
REVISION, | |||||
SWHID, | |||||
parse_swhid, | |||||
) | |||||
API_URL_real = "https://archive.softwareheritage.org/api/1" | API_URL_real = "https://archive.softwareheritage.org/api/1" | ||||
API_URL_test = "https://invalid-test-only.archive.softwareheritage.org/api/1" | API_URL_test = "https://invalid-test-only.archive.softwareheritage.org/api/1" | ||||
SWHID2URL: Dict[str, str] = {} | SWHID2URL: Dict[str, str] = {} | ||||
MOCK_ARCHIVE: Dict[str, Any] = {} | MOCK_ARCHIVE: Dict[str, Any] = {} | ||||
# Temporary map (swhid -> metadata) to ease data generation | # Temporary map (swhid -> metadata) to ease data generation | ||||
METADATA: Dict[SWHID, Any] = {} | METADATA: Dict[SWHID, Any] = {} | ||||
def swhid2url(swhid: SWHID) -> str: | def swhid2url(swhid: SWHID) -> str: | ||||
prefix = { | prefix = { | ||||
CONTENT: "content/sha1_git:", | CONTENT: "content/sha1_git:", | ||||
DIRECTORY: "directory/", | DIRECTORY: "directory/", | ||||
REVISION: "revision/", | REVISION: "revision/", | ||||
RELEASE: "release/", | |||||
} | } | ||||
return f"{prefix[swhid.object_type]}{swhid.object_id}/" | return f"{prefix[swhid.object_type]}{swhid.object_id}/" | ||||
def generate_archive_data(swhid: SWHID, raw: bool = False) -> None: | def get_short_type(object_type: str) -> str: | ||||
short_type = { | |||||
CONTENT: "cnt", | |||||
DIRECTORY: "dir", | |||||
REVISION: "rev", | |||||
RELEASE: "rel", | |||||
} | |||||
return short_type[object_type] | |||||
def generate_archive_data( | |||||
swhid: SWHID, raw: bool = False, recursive: bool = False | |||||
) -> None: | |||||
# Already in mock archive | |||||
if swhid in METADATA and not raw: | |||||
return | |||||
url = swhid2url(swhid) | url = swhid2url(swhid) | ||||
SWHID2URL[str(swhid)] = url | SWHID2URL[str(swhid)] = url | ||||
if raw: | if raw: | ||||
url += "raw/" | url += "raw/" | ||||
data = requests.get(f"{API_URL_real}/{url}").text | data = requests.get(f"{API_URL_real}/{url}").text | ||||
else: | else: | ||||
data = requests.get(f"{API_URL_real}/{url}").text | data = requests.get(f"{API_URL_real}/{url}").text | ||||
data = json.loads(data) | data = json.loads(data) | ||||
MOCK_ARCHIVE[url] = data | MOCK_ARCHIVE[url] = data | ||||
METADATA[swhid] = data | METADATA[swhid] = data | ||||
# Retrieve additional needed data for different artifacts (eg: content's | |||||
for entry in ALL_ENTRIES: | # blob data, revision parents, etc.) | ||||
swhid = parse_swhid(entry) | if recursive: | ||||
generate_archive_data(swhid) | |||||
# Retrieve raw blob data for content artifact | |||||
if swhid.object_type == CONTENT: | if swhid.object_type == CONTENT: | ||||
generate_archive_data(swhid, raw=True) | generate_archive_data(swhid, raw=True) | ||||
# Retrieve parent commits for revision artifact | |||||
elif swhid.object_type == REVISION: | elif swhid.object_type == REVISION: | ||||
for parent in METADATA[swhid]["parents"]: | for parent in METADATA[swhid]["parents"]: | ||||
parent_swhid = parse_swhid(f"swh:1:rev:{parent['id']}") | parent_swhid = parse_swhid(f"swh:1:rev:{parent['id']}") | ||||
# Only retrieve one-level of parent (disable recursivity) | |||||
generate_archive_data(parent_swhid) | generate_archive_data(parent_swhid) | ||||
elif swhid.object_type == RELEASE: | |||||
target_type = METADATA[swhid]["target_type"] | |||||
target_id = METADATA[swhid]["target"] | |||||
target = parse_swhid(f"swh:1:{get_short_type(target_type)}:{target_id}") | |||||
seirl: Don't you want recursive=recursive here instead? | |||||
Done Inline ActionsIf we hit this line then recursive is necessarly True (because of the if), so I wanted to be more explicit in the function call (this function recursive behavior is very likely to be changed anyway once T2700 is fixed). haltode: If we hit this line then recursive is necessarly True (because of the if), so I wanted to be… | |||||
generate_archive_data(target, recursive=True) | |||||
for entry in ALL_ENTRIES: | |||||
swhid = parse_swhid(entry) | |||||
generate_archive_data(swhid, recursive=True) | |||||
print("# GENERATED FILE, DO NOT EDIT.") | print("# GENERATED FILE, DO NOT EDIT.") | ||||
print("# Run './gen-api-data.py > api_data.py' instead.") | print("# Run './gen-api-data.py > api_data.py' instead.") | ||||
print("# flake8: noqa") | print("# flake8: noqa") | ||||
print("") | print("") | ||||
print(f"API_URL = '{API_URL_test}'\n") | print(f"API_URL = '{API_URL_test}'\n") | ||||
print(f"SWHID2URL = {SWHID2URL}\n") | print(f"SWHID2URL = {SWHID2URL}\n") | ||||
print(f"MOCK_ARCHIVE = {MOCK_ARCHIVE}") | print(f"MOCK_ARCHIVE = {MOCK_ARCHIVE}") |
Don't you want recursive=recursive here instead?