Page MenuHomeSoftware Heritage

D6112.diff
No OneTemporary

D6112.diff

diff --git a/swh/vault/api/client.py b/swh/vault/api/client.py
--- a/swh/vault/api/client.py
+++ b/swh/vault/api/client.py
@@ -7,9 +7,13 @@
from swh.vault.exc import NotFoundExc
from swh.vault.interface import VaultInterface
+from .serializers import DECODERS, ENCODERS
+
class RemoteVaultClient(RPCClient):
"""Client to the Software Heritage vault cache."""
backend_class = VaultInterface
reraise_exceptions = [NotFoundExc]
+ extra_type_decoders = DECODERS
+ extra_type_encoders = ENCODERS
diff --git a/swh/vault/api/serializers.py b/swh/vault/api/serializers.py
new file mode 100644
--- /dev/null
+++ b/swh/vault/api/serializers.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from typing import Callable, Dict, List, Tuple
+
+from swh.model.identifiers import CoreSWHID
+
+ENCODERS: List[Tuple[type, str, Callable]] = [
+ (CoreSWHID, "core_swhid", str),
+]
+
+
+DECODERS: Dict[str, Callable] = {
+ "core_swhid": CoreSWHID.from_string,
+}
diff --git a/swh/vault/api/server.py b/swh/vault/api/server.py
--- a/swh/vault/api/server.py
+++ b/swh/vault/api/server.py
@@ -17,12 +17,13 @@
from swh.vault.backend import NotFoundExc
from swh.vault.interface import VaultInterface
+from .serializers import DECODERS, ENCODERS
+
# do not define default services here
DEFAULT_CONFIG = {
"client_max_size": 1024 ** 3,
}
-
vault = None
app = None
@@ -37,6 +38,8 @@
class VaultServerApp(RPCServerApp):
client_exception_classes = (NotFoundExc,)
+ extra_type_decoders = DECODERS
+ extra_type_encoders = ENCODERS
@asyncio.coroutine
diff --git a/swh/vault/backend.py b/swh/vault/backend.py
--- a/swh/vault/backend.py
+++ b/swh/vault/backend.py
@@ -13,14 +13,13 @@
from swh.core.db import BaseDb
from swh.core.db.common import db_transaction
-from swh.model import hashutil
+from swh.model.identifiers import CoreSWHID
from swh.scheduler import get_scheduler
from swh.scheduler.utils import create_oneshot_task_dict
from swh.storage import get_storage
from swh.vault.cache import VaultCache
from swh.vault.cookers import COOKER_TYPES, get_cooker_cls
from swh.vault.exc import NotFoundExc
-from swh.vault.interface import ObjectId
cooking_task_name = "swh.vault.cooking_tasks.SWHCookingTask"
@@ -33,7 +32,7 @@
Vault:
Bundle Type: {bundle_type}
-Object ID: {hex_id}
+Object SWHID: {swhid}
This bundle is now available for download at the following address:
@@ -51,7 +50,7 @@
Vault:
Bundle Type: {bundle_type}
-Object ID: {hex_id}
+Object SWHID: {swhid}
This bundle could not be cooked for the following reason:
@@ -64,12 +63,6 @@
"""
-def batch_to_bytes(batch: List[Tuple[str, str]]) -> List[Tuple[str, bytes]]:
- return [
- (bundle_type, hashutil.hash_to_bytes(hex_id)) for bundle_type, hex_id in batch
- ]
-
-
class VaultBackend:
"""
Backend for the Software Heritage Vault.
@@ -100,138 +93,128 @@
if db is not self._db:
db.put_conn()
- def _compute_ids(self, obj_id: ObjectId) -> Tuple[str, bytes]:
- """Internal method to reconcile multiple possible inputs
-
- """
- if isinstance(obj_id, str):
- return obj_id, hashutil.hash_to_bytes(obj_id)
- return hashutil.hash_to_hex(obj_id), obj_id
-
@db_transaction()
def progress(
self,
bundle_type: str,
- obj_id: ObjectId,
+ swhid: CoreSWHID,
raise_notfound: bool = True,
db=None,
cur=None,
) -> Optional[Dict[str, Any]]:
- hex_id, obj_id = self._compute_ids(obj_id)
cur.execute(
"""
- SELECT id, type, object_id, task_id, task_status, sticky,
+ SELECT id, type, swhid, task_id, task_status, sticky,
ts_created, ts_done, ts_last_access, progress_msg
FROM vault_bundle
- WHERE type = %s AND object_id = %s""",
- (bundle_type, obj_id),
+ WHERE type = %s AND swhid = %s""",
+ (bundle_type, str(swhid)),
)
res = cur.fetchone()
if not res:
if raise_notfound:
- raise NotFoundExc(f"{bundle_type} {hex_id} was not found.")
+ raise NotFoundExc(f"{bundle_type} {swhid} was not found.")
return None
-
- res["object_id"] = hashutil.hash_to_hex(res["object_id"])
+ res["swhid"] = CoreSWHID.from_string(res["swhid"])
return res
- def _send_task(self, bundle_type: str, hex_id: ObjectId):
+ def _send_task(self, bundle_type: str, swhid: CoreSWHID):
"""Send a cooking task to the celery scheduler"""
- task = create_oneshot_task_dict("cook-vault-bundle", bundle_type, hex_id)
+ task = create_oneshot_task_dict("cook-vault-bundle", bundle_type, str(swhid))
added_tasks = self.scheduler.create_tasks([task])
return added_tasks[0]["id"]
@db_transaction()
def create_task(
- self, bundle_type: str, obj_id: bytes, sticky: bool = False, db=None, cur=None
+ self,
+ bundle_type: str,
+ swhid: CoreSWHID,
+ sticky: bool = False,
+ db=None,
+ cur=None,
):
"""Create and send a cooking task"""
- hex_id, obj_id = self._compute_ids(obj_id)
-
- cooker_class = get_cooker_cls(bundle_type)
- cooker = cooker_class(bundle_type, hex_id, backend=self, storage=self.storage)
+ cooker_class = get_cooker_cls(bundle_type, swhid.object_type)
+ cooker = cooker_class(swhid, backend=self, storage=self.storage)
if not cooker.check_exists():
- raise NotFoundExc(f"{bundle_type} {hex_id} was not found.")
+ raise NotFoundExc(f"{bundle_type} {swhid} was not found.")
cur.execute(
"""
- INSERT INTO vault_bundle (type, object_id, sticky)
+ INSERT INTO vault_bundle (type, swhid, sticky)
VALUES (%s, %s, %s)""",
- (bundle_type, obj_id, sticky),
+ (bundle_type, str(swhid), sticky),
)
db.conn.commit()
- task_id = self._send_task(bundle_type, hex_id)
+ task_id = self._send_task(bundle_type, swhid)
cur.execute(
"""
UPDATE vault_bundle
SET task_id = %s
- WHERE type = %s AND object_id = %s""",
- (task_id, bundle_type, obj_id),
+ WHERE type = %s AND swhid = %s""",
+ (task_id, bundle_type, str(swhid)),
)
@db_transaction()
def add_notif_email(
- self, bundle_type: str, obj_id: bytes, email: str, db=None, cur=None
+ self, bundle_type: str, swhid: CoreSWHID, email: str, db=None, cur=None
):
"""Add an e-mail address to notify when a given bundle is ready"""
cur.execute(
"""
INSERT INTO vault_notif_email (email, bundle_id)
VALUES (%s, (SELECT id FROM vault_bundle
- WHERE type = %s AND object_id = %s))""",
- (email, bundle_type, obj_id),
+ WHERE type = %s AND swhid = %s))""",
+ (email, bundle_type, str(swhid)),
)
- def put_bundle(self, bundle_type: str, obj_id: ObjectId, bundle) -> bool:
- _, obj_id = self._compute_ids(obj_id)
- self.cache.add(bundle_type, obj_id, bundle)
+ def put_bundle(self, bundle_type: str, swhid: CoreSWHID, bundle) -> bool:
+ self.cache.add(bundle_type, swhid, bundle)
return True
@db_transaction()
def cook(
self,
bundle_type: str,
- obj_id: ObjectId,
+ swhid: CoreSWHID,
*,
sticky: bool = False,
email: Optional[str] = None,
db=None,
cur=None,
) -> Dict[str, Any]:
- hex_id, obj_id = self._compute_ids(obj_id)
- info = self.progress(bundle_type, obj_id, raise_notfound=False)
+ info = self.progress(bundle_type, swhid, raise_notfound=False)
if bundle_type not in COOKER_TYPES:
raise NotFoundExc(f"{bundle_type} is an unknown type.")
# If there's a failed bundle entry, delete it first.
if info is not None and info["task_status"] == "failed":
- obj_id = hashutil.hash_to_bytes(obj_id)
cur.execute(
- "DELETE FROM vault_bundle WHERE type = %s AND object_id = %s",
- (bundle_type, obj_id),
+ "DELETE FROM vault_bundle WHERE type = %s AND swhid = %s",
+ (bundle_type, str(swhid)),
)
db.conn.commit()
info = None
# If there's no bundle entry, create the task.
if info is None:
- self.create_task(bundle_type, obj_id, sticky)
+ self.create_task(bundle_type, swhid, sticky)
if email is not None:
# If the task is already done, send the email directly
if info is not None and info["task_status"] == "done":
self.send_notification(
- None, email, bundle_type, hex_id, info["task_status"]
+ None, email, bundle_type, swhid, info["task_status"]
)
# Else, add it to the notification queue
else:
- self.add_notif_email(bundle_type, obj_id, email)
+ self.add_notif_email(bundle_type, swhid, email)
- return self.progress(bundle_type, obj_id)
+ return self.progress(bundle_type, swhid)
@db_transaction()
def batch_cook(
@@ -252,32 +235,31 @@
RETURNING id"""
)
batch_id = cur.fetchone()["id"]
- batch_bytes = batch_to_bytes(batch)
# Delete all failed bundles from the batch
cur.execute(
"""
DELETE FROM vault_bundle
WHERE task_status = 'failed'
- AND (type, object_id) IN %s""",
- (tuple(batch_bytes),),
+ AND (type, swhid) IN %s""",
+ (tuple(batch),),
)
# Insert all the bundles, return the new ones
execute_values(
cur,
"""
- INSERT INTO vault_bundle (type, object_id)
+ INSERT INTO vault_bundle (type, swhid)
VALUES %s ON CONFLICT DO NOTHING""",
- batch_bytes,
+ batch,
)
# Get the bundle ids and task status
cur.execute(
"""
- SELECT id, type, object_id, task_id FROM vault_bundle
- WHERE (type, object_id) IN %s""",
- (tuple(batch_bytes),),
+ SELECT id, type, swhid, task_id FROM vault_bundle
+ WHERE (type, swhid) IN %s""",
+ (tuple(batch),),
)
bundles = cur.fetchall()
@@ -294,16 +276,13 @@
# Get the tasks to fetch
batch_new = [
- (row["type"], bytes(row["object_id"]))
+ (row["type"], CoreSWHID.from_string(row["swhid"]))
for row in bundles
if row["task_id"] is None
]
# Send the tasks
- args_batch = [
- (bundle_type, hashutil.hash_to_hex(obj_id))
- for bundle_type, obj_id in batch_new
- ]
+ args_batch = [(bundle_type, swhid) for bundle_type, swhid in batch_new]
# TODO: change once the scheduler handles priority tasks
tasks = [
create_oneshot_task_dict("swh-vault-batch-cooking", *args)
@@ -312,8 +291,8 @@
added_tasks = self.scheduler.create_tasks(tasks)
tasks_ids_bundle_ids = [
- (task_id, bundle_type, obj_id)
- for task_id, (bundle_type, obj_id) in zip(
+ (task_id, bundle_type, swhid)
+ for task_id, (bundle_type, swhid) in zip(
[task["id"] for task in added_tasks], batch_new
)
]
@@ -324,8 +303,8 @@
"""
UPDATE vault_bundle
SET task_id = s_task_id
- FROM (VALUES %s) AS sub (s_task_id, s_type, s_object_id)
- WHERE type = s_type::cook_type AND object_id = s_object_id """,
+ FROM (VALUES %s) AS sub (s_task_id, s_type, s_swhid)
+ WHERE type = s_type::cook_type AND swhid = s_swhid """,
tasks_ids_bundle_ids,
)
return {"id": batch_id}
@@ -335,7 +314,7 @@
cur.execute(
"""
SELECT vault_bundle.id as id,
- type, object_id, task_id, task_status, sticky,
+ type, swhid, task_id, task_status, sticky,
ts_created, ts_done, ts_last_access, progress_msg
FROM vault_batch_bundle
LEFT JOIN vault_bundle ON vault_bundle.id = bundle_id
@@ -347,7 +326,7 @@
raise NotFoundExc(f"Batch {batch_id} does not exist.")
for bundle in bundles:
- bundle["object_id"] = hashutil.hash_to_hex(bundle["object_id"])
+ bundle["swhid"] = CoreSWHID.from_string(bundle["swhid"])
counter = collections.Counter(b["status"] for b in bundles)
res = {
@@ -360,87 +339,82 @@
return res
@db_transaction()
- def is_available(self, bundle_type: str, obj_id: ObjectId, db=None, cur=None):
+ def is_available(self, bundle_type: str, swhid: CoreSWHID, db=None, cur=None):
"""Check whether a bundle is available for retrieval"""
- info = self.progress(bundle_type, obj_id, raise_notfound=False, cur=cur)
- obj_id = hashutil.hash_to_bytes(obj_id)
+ info = self.progress(bundle_type, swhid, raise_notfound=False, cur=cur)
return (
info is not None
and info["task_status"] == "done"
- and self.cache.is_cached(bundle_type, obj_id)
+ and self.cache.is_cached(bundle_type, swhid)
)
@db_transaction()
def fetch(
- self, bundle_type: str, obj_id: ObjectId, raise_notfound=True, db=None, cur=None
+ self, bundle_type: str, swhid: CoreSWHID, raise_notfound=True, db=None, cur=None
) -> Optional[bytes]:
"""Retrieve a bundle from the cache"""
- hex_id, obj_id = self._compute_ids(obj_id)
- available = self.is_available(bundle_type, obj_id, cur=cur)
+ available = self.is_available(bundle_type, swhid, cur=cur)
if not available:
if raise_notfound:
- raise NotFoundExc(f"{bundle_type} {hex_id} is not available.")
+ raise NotFoundExc(f"{bundle_type} {swhid} is not available.")
return None
- self.update_access_ts(bundle_type, obj_id, cur=cur)
- return self.cache.get(bundle_type, obj_id)
+ self.update_access_ts(bundle_type, swhid, cur=cur)
+ return self.cache.get(bundle_type, swhid)
@db_transaction()
- def update_access_ts(self, bundle_type: str, obj_id: bytes, db=None, cur=None):
+ def update_access_ts(self, bundle_type: str, swhid: CoreSWHID, db=None, cur=None):
"""Update the last access timestamp of a bundle"""
cur.execute(
"""
UPDATE vault_bundle
SET ts_last_access = NOW()
- WHERE type = %s AND object_id = %s""",
- (bundle_type, obj_id),
+ WHERE type = %s AND swhid = %s""",
+ (bundle_type, str(swhid)),
)
@db_transaction()
def set_status(
- self, bundle_type: str, obj_id: ObjectId, status: str, db=None, cur=None
+ self, bundle_type: str, swhid: CoreSWHID, status: str, db=None, cur=None
) -> bool:
- obj_id = hashutil.hash_to_bytes(obj_id)
req = (
"""
UPDATE vault_bundle
SET task_status = %s """
+ (""", ts_done = NOW() """ if status == "done" else "")
- + """WHERE type = %s AND object_id = %s"""
+ + """WHERE type = %s AND swhid = %s"""
)
- cur.execute(req, (status, bundle_type, obj_id))
+ cur.execute(req, (status, bundle_type, str(swhid)))
return True
@db_transaction()
def set_progress(
- self, bundle_type: str, obj_id: ObjectId, progress: str, db=None, cur=None
+ self, bundle_type: str, swhid: CoreSWHID, progress: str, db=None, cur=None
) -> bool:
- obj_id = hashutil.hash_to_bytes(obj_id)
cur.execute(
"""
UPDATE vault_bundle
SET progress_msg = %s
- WHERE type = %s AND object_id = %s""",
- (progress, bundle_type, obj_id),
+ WHERE type = %s AND swhid = %s""",
+ (progress, bundle_type, str(swhid)),
)
return True
@db_transaction()
- def send_notif(self, bundle_type: str, obj_id: ObjectId, db=None, cur=None) -> bool:
- hex_id, obj_id = self._compute_ids(obj_id)
+ def send_notif(self, bundle_type: str, swhid: CoreSWHID, db=None, cur=None) -> bool:
cur.execute(
"""
SELECT vault_notif_email.id AS id, email, task_status, progress_msg
FROM vault_notif_email
INNER JOIN vault_bundle ON bundle_id = vault_bundle.id
- WHERE vault_bundle.type = %s AND vault_bundle.object_id = %s""",
- (bundle_type, obj_id),
+ WHERE vault_bundle.type = %s AND vault_bundle.swhid = %s""",
+ (bundle_type, str(swhid)),
)
for d in cur:
self.send_notification(
d["id"],
d["email"],
bundle_type,
- hex_id,
+ swhid,
status=d["task_status"],
progress_msg=d["progress_msg"],
)
@@ -452,14 +426,14 @@
n_id: Optional[int],
email: str,
bundle_type: str,
- hex_id: str,
+ swhid: CoreSWHID,
status: str,
progress_msg: Optional[str] = None,
db=None,
cur=None,
) -> None:
"""Send the notification of a bundle to a specific e-mail"""
- short_id = hex_id[:7]
+ short_id = swhid.object_id.hex()[:7]
# TODO: instead of hardcoding this, we should probably:
# * add a "fetch_url" field in the vault_notif_email table
@@ -468,12 +442,12 @@
# the table
# * use this url for the notification e-mail
url = "https://archive.softwareheritage.org/api/1/vault/{}/{}/" "raw".format(
- bundle_type, hex_id
+ bundle_type, swhid
)
if status == "done":
text = NOTIF_EMAIL_BODY_SUCCESS.strip()
- text = text.format(bundle_type=bundle_type, hex_id=hex_id, url=url)
+ text = text.format(bundle_type=bundle_type, swhid=swhid, url=url)
msg = MIMEText(text)
msg["Subject"] = NOTIF_EMAIL_SUBJECT_SUCCESS.format(
bundle_type=bundle_type, short_id=short_id
@@ -481,7 +455,7 @@
elif status == "failed":
text = NOTIF_EMAIL_BODY_FAILURE.strip()
text = text.format(
- bundle_type=bundle_type, hex_id=hex_id, progress_msg=progress_msg
+ bundle_type=bundle_type, swhid=swhid, progress_msg=progress_msg
)
msg = MIMEText(text)
msg["Subject"] = NOTIF_EMAIL_SUBJECT_FAILURE.format(
@@ -530,7 +504,7 @@
WHERE sticky = false
{}
)
- RETURNING type, object_id
+ RETURNING type, swhid
""".format(
cond
),
@@ -538,7 +512,7 @@
)
for d in cur:
- self.cache.delete(d["type"], bytes(d["object_id"]))
+ self.cache.delete(d["type"], CoreSWHID.from_string(d["swhid"]))
@db_transaction()
def cache_expire_oldest(self, n=1, by="last_access", db=None, cur=None) -> None:
diff --git a/swh/vault/cache.py b/swh/vault/cache.py
--- a/swh/vault/cache.py
+++ b/swh/vault/cache.py
@@ -4,6 +4,7 @@
# See top-level LICENSE file for more information
from swh.model import hashutil
+from swh.model.identifiers import CoreSWHID
from swh.objstorage.factory import get_objstorage
from swh.objstorage.objstorage import compute_hash
@@ -11,37 +12,36 @@
class VaultCache:
"""The Vault cache is an object storage that stores Vault bundles.
- This implementation computes sha1('<bundle_type>:<object_id>') as the
+ This implementation computes sha1('<bundle_type>:<swhid>') as the
internal identifiers used in the underlying objstorage.
"""
def __init__(self, **objstorage):
self.objstorage = get_objstorage(**objstorage)
- def add(self, bundle_type, obj_id, content):
- sid = self._get_internal_id(bundle_type, obj_id)
+ def add(self, bundle_type, swhid: CoreSWHID, content):
+ sid = self._get_internal_id(bundle_type, swhid)
return self.objstorage.add(content, sid)
- def get(self, bundle_type, obj_id):
- sid = self._get_internal_id(bundle_type, obj_id)
+ def get(self, bundle_type, swhid: CoreSWHID):
+ sid = self._get_internal_id(bundle_type, swhid)
return self.objstorage.get(hashutil.hash_to_bytes(sid))
- def delete(self, bundle_type, obj_id):
- sid = self._get_internal_id(bundle_type, obj_id)
+ def delete(self, bundle_type, swhid: CoreSWHID):
+ sid = self._get_internal_id(bundle_type, swhid)
return self.objstorage.delete(hashutil.hash_to_bytes(sid))
- def add_stream(self, bundle_type, obj_id, content_iter):
- sid = self._get_internal_id(bundle_type, obj_id)
+ def add_stream(self, bundle_type, swhid: CoreSWHID, content_iter):
+ sid = self._get_internal_id(bundle_type, swhid)
return self.objstorage.add_stream(content_iter, sid)
- def get_stream(self, bundle_type, obj_id):
- sid = self._get_internal_id(bundle_type, obj_id)
+ def get_stream(self, bundle_type, swhid: CoreSWHID):
+ sid = self._get_internal_id(bundle_type, swhid)
return self.objstorage.get_stream(hashutil.hash_to_bytes(sid))
- def is_cached(self, bundle_type, obj_id):
- sid = self._get_internal_id(bundle_type, obj_id)
+ def is_cached(self, bundle_type, swhid: CoreSWHID):
+ sid = self._get_internal_id(bundle_type, swhid)
return hashutil.hash_to_bytes(sid) in self.objstorage
- def _get_internal_id(self, bundle_type, obj_id):
- obj_id = hashutil.hash_to_hex(obj_id)
- return compute_hash("{}:{}".format(bundle_type, obj_id).encode())
+ def _get_internal_id(self, bundle_type, swhid: CoreSWHID):
+ return compute_hash("{}:{}".format(bundle_type, swhid).encode())
diff --git a/swh/vault/cli.py b/swh/vault/cli.py
--- a/swh/vault/cli.py
+++ b/swh/vault/cli.py
@@ -52,7 +52,7 @@
@click.argument("swhid", type=SwhidParamType())
@click.argument("outfile", type=click.File("wb"))
@click.option(
- "--cooker-type",
+ "--bundle-type",
type=click.Choice(["flat", "gitfast", "git_bare"]),
help="Selects which cooker to use, when there is more than one available "
"for the given object type.",
@@ -63,44 +63,23 @@
config_file: str,
swhid: CoreSWHID,
outfile: io.RawIOBase,
- cooker_type: Optional[str],
+ bundle_type: Optional[str],
):
"""
Runs a vault cooker for a single object (identified by a SWHID),
and outputs it to the given file.
"""
from swh.core import config
+ from swh.model.identifiers import ObjectType
from swh.objstorage.exc import ObjNotFoundError
from swh.objstorage.factory import get_objstorage
from swh.storage import get_storage
- from .cookers import COOKER_TYPES, get_cooker_cls
+ from .cookers import get_cooker_cls
from .in_memory_backend import InMemoryVaultBackend
conf = config.read(config_file)
- supported_object_types = {name.split("_")[0] for name in COOKER_TYPES}
- if swhid.object_type.name.lower() not in supported_object_types:
- raise click.ClickException(
- f"No cooker available for {swhid.object_type.name} objects."
- )
-
- cooker_name = swhid.object_type.name.lower()
-
- if cooker_type:
- cooker_name = f"{cooker_name}_{cooker_type}"
- if cooker_name not in COOKER_TYPES:
- raise click.ClickException(
- f"{swhid.object_type.name.lower()} objects do not have "
- f"a {cooker_type} cooker."
- )
- else:
- if cooker_name not in COOKER_TYPES:
- raise click.ClickException(
- f"{swhid.object_type.name.lower()} objects need "
- f"an explicit --cooker-type."
- )
-
try:
from swh.graph.client import RemoteGraphClient # optional dependency
@@ -114,12 +93,27 @@
graph = None
backend = InMemoryVaultBackend()
+
+ if bundle_type is None:
+ if swhid.object_type in (ObjectType.RELEASE, ObjectType.SNAPSHOT,):
+ bundle_type = "git_bare"
+ elif swhid.object_type in (ObjectType.DIRECTORY,):
+ bundle_type = "flat"
+ else:
+ raise click.ClickException(
+ "No default bundle type for this kind of object, "
+ "use --bundle-type to choose one"
+ )
+
+ try:
+ cooker_cls = get_cooker_cls(bundle_type, swhid.object_type)
+ except ValueError as e:
+ raise click.ClickException(*e.args)
+
storage = get_storage(**conf["storage"])
objstorage = get_objstorage(**conf["objstorage"]) if "objstorage" in conf else None
- cooker_cls = get_cooker_cls(cooker_name)
cooker = cooker_cls(
- bundle_type=cooker_name,
- obj_id=swhid.object_id,
+ swhid=swhid,
backend=backend,
storage=storage,
graph=graph,
@@ -129,7 +123,7 @@
cooker.cook()
try:
- bundle = backend.fetch(cooker_name, swhid.object_id)
+ bundle = backend.fetch(cooker_cls.BUNDLE_TYPE, swhid)
except ObjNotFoundError:
bundle = None
if bundle is None:
diff --git a/swh/vault/cookers/__init__.py b/swh/vault/cookers/__init__.py
--- a/swh/vault/cookers/__init__.py
+++ b/swh/vault/cookers/__init__.py
@@ -6,30 +6,49 @@
from __future__ import annotations
import os
-from typing import Any, Dict
+from typing import Any, Dict, List, Type
from swh.core.config import load_named_config
from swh.core.config import read as read_config
+from swh.model.identifiers import CoreSWHID, ObjectType
from swh.storage import get_storage
from swh.vault import get_vault
-from swh.vault.cookers.base import DEFAULT_CONFIG, DEFAULT_CONFIG_PATH
+from swh.vault.cookers.base import DEFAULT_CONFIG, DEFAULT_CONFIG_PATH, BaseVaultCooker
from swh.vault.cookers.directory import DirectoryCooker
from swh.vault.cookers.git_bare import GitBareCooker
from swh.vault.cookers.revision_flat import RevisionFlatCooker
from swh.vault.cookers.revision_gitfast import RevisionGitfastCooker
-COOKER_TYPES = {
- "directory": DirectoryCooker,
- "revision_flat": RevisionFlatCooker,
- "revision_gitfast": RevisionGitfastCooker,
- "snapshot_git_bare": GitBareCooker,
- "revision_git_bare": GitBareCooker,
- "directory_git_bare": GitBareCooker,
-}
+_COOKER_CLS: List[Type[BaseVaultCooker]] = [
+ DirectoryCooker,
+ RevisionFlatCooker,
+ RevisionGitfastCooker,
+ GitBareCooker,
+]
+COOKER_TYPES: Dict[str, List[Type[BaseVaultCooker]]] = {}
-def get_cooker_cls(bundle_type):
- return COOKER_TYPES[bundle_type]
+for _cooker_cls in _COOKER_CLS:
+ COOKER_TYPES.setdefault(_cooker_cls.BUNDLE_TYPE, []).append(_cooker_cls)
+
+
+def get_cooker_cls(bundle_type: str, object_type: ObjectType):
+ cookers = COOKER_TYPES.get(bundle_type)
+
+ if not cookers:
+ raise ValueError(f"{bundle_type} is not a valid bundle type.")
+
+ for cooker in cookers:
+ try:
+ cooker.check_object_type(object_type)
+ except ValueError:
+ pass
+ else:
+ return cooker
+
+ raise ValueError(
+ f"{object_type.name.lower()} objects do not have a {bundle_type} cooker"
+ )
def check_config(cfg: Dict[str, Any]) -> Dict[str, Any]:
@@ -67,11 +86,11 @@
return cfg
-def get_cooker(bundle_type: str, obj_id: str):
+def get_cooker(bundle_type: str, swhid: CoreSWHID):
"""Instantiate a cooker class of type bundle_type.
Returns:
- Cooker class in charge of cooking the bundle_type with id obj_id.
+ Cooker class in charge of cooking the bundle_type with id swhid.
Raises:
ValueError in case of a missing top-level vault key configuration or a storage
@@ -83,7 +102,7 @@
cfg = read_config(os.environ["SWH_CONFIG_FILENAME"], DEFAULT_CONFIG)
else:
cfg = load_named_config(DEFAULT_CONFIG_PATH, DEFAULT_CONFIG)
- cooker_cls = get_cooker_cls(bundle_type)
+ cooker_cls = get_cooker_cls(bundle_type, swhid.object_type)
cfg = check_config(cfg)
vcfg = cfg["vault"]
@@ -104,8 +123,7 @@
graph = None
return cooker_cls(
- bundle_type,
- obj_id,
+ swhid,
backend=backend,
storage=storage,
graph=graph,
diff --git a/swh/vault/cookers/base.py b/swh/vault/cookers/base.py
--- a/swh/vault/cookers/base.py
+++ b/swh/vault/cookers/base.py
@@ -6,12 +6,11 @@
import abc
import io
import logging
-from typing import Optional
+from typing import ClassVar, Set
from psycopg2.extensions import QueryCanceledError
-from swh.model import hashutil
-from swh.model.model import Sha1Git
+from swh.model.identifiers import CoreSWHID, ObjectType
from swh.storage.interface import StorageInterface
MAX_BUNDLE_SIZE = 2 ** 29 # 512 MiB
@@ -60,12 +59,12 @@
- def cook(): cook the object into a bundle
"""
- CACHE_TYPE_KEY = None # type: Optional[str]
+ SUPPORTED_OBJECT_TYPES: ClassVar[Set[ObjectType]]
+ BUNDLE_TYPE: ClassVar[str]
def __init__(
self,
- bundle_type: str,
- obj_id: Sha1Git,
+ swhid: CoreSWHID,
backend,
storage: StorageInterface,
graph=None,
@@ -79,20 +78,23 @@
own cooker class.
Args:
- bundle_type: type of the object to be cooked into a bundle (directory,
- revision_flat or revision_gitfast; see
- swh.vault.cooker.COOKER_TYPES).
- obj_id: id of the object to be cooked into a bundle.
+ swhid: id of the object to be cooked into a bundle.
backend: the vault backend (swh.vault.backend.VaultBackend).
"""
- self.bundle_type = bundle_type
- self.obj_id = hashutil.hash_to_bytes(obj_id)
+ self.check_object_type(swhid.object_type)
+ self.swhid = swhid
+ self.obj_id = swhid.object_id
self.backend = backend
self.storage = storage
self.objstorage = objstorage
self.graph = graph
self.max_bundle_size = max_bundle_size
+ @classmethod
+ def check_object_type(cls, object_type: ObjectType) -> None:
+ if object_type not in cls.SUPPORTED_OBJECT_TYPES:
+ raise ValueError(f"{cls.__name__} does not support {object_type} objects.")
+
@abc.abstractmethod
def check_exists(self):
"""Checks that the requested object exists and can be cooked.
@@ -110,8 +112,8 @@
raise NotImplementedError
def cache_type_key(self) -> str:
- assert self.CACHE_TYPE_KEY
- return self.CACHE_TYPE_KEY
+ assert self.BUNDLE_TYPE
+ return self.BUNDLE_TYPE
def write(self, chunk):
self.fileobj.write(chunk)
@@ -119,8 +121,8 @@
def cook(self):
"""Cook the requested object into a bundle
"""
- self.backend.set_status(self.bundle_type, self.obj_id, "pending")
- self.backend.set_progress(self.bundle_type, self.obj_id, "Processing...")
+ self.backend.set_status(self.BUNDLE_TYPE, self.swhid, "pending")
+ self.backend.set_progress(self.BUNDLE_TYPE, self.swhid, "Processing...")
self.fileobj = BytesIOBundleSizeLimit(size_limit=self.max_bundle_size)
try:
@@ -132,21 +134,21 @@
)
bundle = self.fileobj.getvalue()
# TODO: use proper content streaming instead of put_bundle()
- self.backend.put_bundle(self.cache_type_key(), self.obj_id, bundle)
+ self.backend.put_bundle(self.cache_type_key(), self.swhid, bundle)
except PolicyError as e:
logging.info("Bundle cooking violated policy: %s", e)
- self.backend.set_status(self.bundle_type, self.obj_id, "failed")
- self.backend.set_progress(self.bundle_type, self.obj_id, str(e))
+ self.backend.set_status(self.BUNDLE_TYPE, self.swhid, "failed")
+ self.backend.set_progress(self.BUNDLE_TYPE, self.swhid, str(e))
except Exception:
- self.backend.set_status(self.bundle_type, self.obj_id, "failed")
+ self.backend.set_status(self.BUNDLE_TYPE, self.swhid, "failed")
self.backend.set_progress(
- self.bundle_type,
- self.obj_id,
+ self.BUNDLE_TYPE,
+ self.swhid,
"Internal Server Error. This incident will be reported.",
)
logging.exception("Bundle cooking failed.")
else:
- self.backend.set_status(self.bundle_type, self.obj_id, "done")
- self.backend.set_progress(self.bundle_type, self.obj_id, None)
+ self.backend.set_status(self.BUNDLE_TYPE, self.swhid, "done")
+ self.backend.set_progress(self.BUNDLE_TYPE, self.swhid, None)
finally:
- self.backend.send_notif(self.bundle_type, self.obj_id)
+ self.backend.send_notif(self.BUNDLE_TYPE, self.swhid)
diff --git a/swh/vault/cookers/directory.py b/swh/vault/cookers/directory.py
--- a/swh/vault/cookers/directory.py
+++ b/swh/vault/cookers/directory.py
@@ -6,7 +6,7 @@
import tarfile
import tempfile
-from swh.model import hashutil
+from swh.model.identifiers import ObjectType
from swh.vault.cookers.base import BaseVaultCooker
from swh.vault.to_disk import DirectoryBuilder
@@ -14,7 +14,8 @@
class DirectoryCooker(BaseVaultCooker):
"""Cooker to create a directory bundle """
- CACHE_TYPE_KEY = "directory"
+ BUNDLE_TYPE = "flat"
+ SUPPORTED_OBJECT_TYPES = {ObjectType.DIRECTORY}
def check_exists(self):
return not list(self.storage.directory_missing([self.obj_id]))
@@ -24,4 +25,4 @@
directory_builder = DirectoryBuilder(self.storage, td.encode(), self.obj_id)
directory_builder.build()
with tarfile.open(fileobj=self.fileobj, mode="w:gz") as tar:
- tar.add(td, arcname=hashutil.hash_to_hex(self.obj_id))
+ tar.add(td, arcname=str(self.swhid))
diff --git a/swh/vault/cookers/git_bare.py b/swh/vault/cookers/git_bare.py
--- a/swh/vault/cookers/git_bare.py
+++ b/swh/vault/cookers/git_bare.py
@@ -76,16 +76,18 @@
class GitBareCooker(BaseVaultCooker):
+ BUNDLE_TYPE = "git_bare"
+ SUPPORTED_OBJECT_TYPES = {
+ identifiers.ObjectType[obj_type.name] for obj_type in RootObjectType
+ }
+
use_fsck = True
obj_type: RootObjectType
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.obj_type = RootObjectType(self.bundle_type.split("_")[0])
-
- def cache_type_key(self) -> str:
- return self.bundle_type
+ self.obj_type = RootObjectType[self.swhid.object_type.name]
def check_exists(self) -> bool:
if self.obj_type is RootObjectType.REVISION:
@@ -97,12 +99,6 @@
else:
assert_never(self.obj_type, f"Unexpected root object type: {self.obj_type}")
- def obj_swhid(self) -> identifiers.CoreSWHID:
- return identifiers.CoreSWHID(
- object_type=identifiers.ObjectType[self.obj_type.name],
- object_id=self.obj_id,
- )
-
def _push(self, stack: List[Sha1Git], obj_ids: Iterable[Sha1Git]) -> None:
assert not isinstance(obj_ids, bytes)
revision_ids = [id_ for id_ in obj_ids if id_ not in self._seen]
@@ -250,7 +246,7 @@
def write_archive(self):
with tarfile.TarFile(mode="w", fileobj=self.fileobj) as tf:
- tf.add(self.gitdir, arcname=f"{self.obj_swhid()}.git", recursive=True)
+ tf.add(self.gitdir, arcname=f"{self.swhid}.git", recursive=True)
def _obj_path(self, obj_id: Sha1Git):
return os.path.join(self.gitdir, self._obj_relative_path(obj_id))
diff --git a/swh/vault/cookers/revision_flat.py b/swh/vault/cookers/revision_flat.py
--- a/swh/vault/cookers/revision_flat.py
+++ b/swh/vault/cookers/revision_flat.py
@@ -8,6 +8,7 @@
import tempfile
from swh.model import hashutil
+from swh.model.identifiers import ObjectType
from swh.vault.cookers.base import BaseVaultCooker
from swh.vault.cookers.utils import revision_log
from swh.vault.to_disk import DirectoryBuilder
@@ -16,15 +17,16 @@
class RevisionFlatCooker(BaseVaultCooker):
"""Cooker to create a revision_flat bundle """
- CACHE_TYPE_KEY = "revision_flat"
+ BUNDLE_TYPE = "flat"
+ SUPPORTED_OBJECT_TYPES = {ObjectType.REVISION}
def check_exists(self):
- return not list(self.storage.revision_missing([self.obj_id]))
+ return not list(self.storage.revision_missing([self.swhid.object_id]))
def prepare_bundle(self):
with tempfile.TemporaryDirectory(prefix="tmp-vault-revision-") as td:
root = Path(td)
- for revision in revision_log(self.storage, self.obj_id):
+ for revision in revision_log(self.storage, self.swhid.object_id):
revdir = root / hashutil.hash_to_hex(revision["id"])
revdir.mkdir()
directory_builder = DirectoryBuilder(
@@ -32,4 +34,4 @@
)
directory_builder.build()
with tarfile.open(fileobj=self.fileobj, mode="w:gz") as tar:
- tar.add(td, arcname=hashutil.hash_to_hex(self.obj_id))
+ tar.add(td, arcname=self.swhid)
diff --git a/swh/vault/cookers/revision_gitfast.py b/swh/vault/cookers/revision_gitfast.py
--- a/swh/vault/cookers/revision_gitfast.py
+++ b/swh/vault/cookers/revision_gitfast.py
@@ -18,6 +18,7 @@
from swh.model import hashutil
from swh.model.from_disk import DentryPerms, mode_to_perms
+from swh.model.identifiers import ObjectType
from swh.model.toposort import toposort
from swh.vault.cookers.base import BaseVaultCooker
from swh.vault.cookers.utils import revision_log
@@ -27,7 +28,8 @@
class RevisionGitfastCooker(BaseVaultCooker):
"""Cooker to create a git fast-import bundle """
- CACHE_TYPE_KEY = "revision_gitfast"
+ BUNDLE_TYPE = "gitfast"
+ SUPPORTED_OBJECT_TYPES = {ObjectType.REVISION}
def check_exists(self):
return not list(self.storage.revision_missing([self.obj_id]))
@@ -58,7 +60,7 @@
if last_progress_report is None or last_progress_report + 2 <= ct:
last_progress_report = ct
pg = "Computing revision {}/{}".format(i, len(self.log))
- self.backend.set_progress(self.bundle_type, self.obj_id, pg)
+ self.backend.set_progress(self.BUNDLE_TYPE, self.swhid, pg)
# Compute the current commit
self._compute_commit_command(rev)
diff --git a/swh/vault/cooking_tasks.py b/swh/vault/cooking_tasks.py
--- a/swh/vault/cooking_tasks.py
+++ b/swh/vault/cooking_tasks.py
@@ -1,21 +1,22 @@
-# Copyright (C) 2016-2017 The Software Heritage developers
+# Copyright (C) 2016-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from celery import current_app as app
+from swh.model.identifiers import CoreSWHID
from swh.vault.cookers import get_cooker
@app.task(name=__name__ + ".SWHCookingTask")
-def cook_bundle(bundle_type, obj_id):
+def cook_bundle(bundle_type, swhid):
"""Main task to cook a bundle."""
- get_cooker(bundle_type, obj_id).cook()
+ get_cooker(bundle_type, CoreSWHID.from_string(swhid)).cook()
# TODO: remove once the scheduler handles priority tasks
@app.task(name=__name__ + ".SWHBatchCookingTask")
-def batch_cook_bundle(bundle_type, obj_id):
+def batch_cook_bundle(bundle_type, swhid):
"""Temporary task for the batch queue."""
- get_cooker(bundle_type, obj_id).cook()
+ get_cooker(bundle_type, CoreSWHID.from_string(swhid)).cook()
diff --git a/swh/vault/in_memory_backend.py b/swh/vault/in_memory_backend.py
--- a/swh/vault/in_memory_backend.py
+++ b/swh/vault/in_memory_backend.py
@@ -3,14 +3,12 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple
-from swh.model.hashutil import hash_to_bytes
+from swh.model.identifiers import CoreSWHID
from .cache import VaultCache
-ObjectId = Union[str, bytes]
-
class InMemoryVaultBackend:
"""Stub vault backend, for use in the CLI."""
@@ -18,30 +16,30 @@
def __init__(self):
self._cache = VaultCache(cls="memory")
- def fetch(self, bundle_type: str, obj_id: ObjectId) -> Optional[bytes]:
- return self._cache.get(bundle_type, hash_to_bytes(obj_id))
+ def fetch(self, bundle_type: str, swhid: CoreSWHID) -> Optional[bytes]:
+ return self._cache.get(bundle_type, swhid)
def cook(
- self, bundle_type: str, obj_id: ObjectId, email: Optional[str] = None
+ self, bundle_type: str, swhid: CoreSWHID, email: Optional[str] = None
) -> Dict[str, Any]:
raise NotImplementedError("InMemoryVaultBackend.cook()")
- def progress(self, bundle_type: str, obj_id: ObjectId):
+ def progress(self, bundle_type: str, swhid: CoreSWHID):
raise NotImplementedError("InMemoryVaultBackend.progress()")
# Cookers endpoints
- def set_progress(self, bundle_type: str, obj_id: ObjectId, progress: str) -> None:
+ def set_progress(self, bundle_type: str, swhid: CoreSWHID, progress: str) -> None:
pass
- def set_status(self, bundle_type: str, obj_id: ObjectId, status: str) -> None:
+ def set_status(self, bundle_type: str, swhid: CoreSWHID, status: str) -> None:
pass
- def put_bundle(self, bundle_type: str, obj_id: ObjectId, bundle) -> bool:
- self._cache.add(bundle_type, hash_to_bytes(obj_id), bundle)
+ def put_bundle(self, bundle_type: str, swhid: CoreSWHID, bundle) -> bool:
+ self._cache.add(bundle_type, swhid, bundle)
return True
- def send_notif(self, bundle_type: str, obj_id: ObjectId):
+ def send_notif(self, bundle_type: str, swhid: CoreSWHID):
pass
# Batch endpoints
diff --git a/swh/vault/interface.py b/swh/vault/interface.py
--- a/swh/vault/interface.py
+++ b/swh/vault/interface.py
@@ -3,13 +3,12 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple
from typing_extensions import Protocol, runtime_checkable
from swh.core.api import remote_api_endpoint
-
-ObjectId = Union[str, bytes]
+from swh.model.identifiers import CoreSWHID
@runtime_checkable
@@ -19,41 +18,41 @@
"""
@remote_api_endpoint("fetch")
- def fetch(self, bundle_type: str, obj_id: ObjectId) -> Optional[bytes]:
+ def fetch(self, bundle_type: str, swhid: CoreSWHID) -> Optional[bytes]:
"""Fetch information from a bundle"""
...
@remote_api_endpoint("cook")
def cook(
- self, bundle_type: str, obj_id: ObjectId, email: Optional[str] = None
+ self, bundle_type: str, swhid: CoreSWHID, email: Optional[str] = None
) -> Dict[str, Any]:
"""Main entry point for cooking requests. This starts a cooking task if
needed, and add the given e-mail to the notify list"""
...
@remote_api_endpoint("progress")
- def progress(self, bundle_type: str, obj_id: ObjectId):
+ def progress(self, bundle_type: str, swhid: CoreSWHID):
...
# Cookers endpoints
@remote_api_endpoint("set_progress")
- def set_progress(self, bundle_type: str, obj_id: ObjectId, progress: str) -> None:
+ def set_progress(self, bundle_type: str, swhid: CoreSWHID, progress: str) -> None:
"""Set the cooking progress of a bundle"""
...
@remote_api_endpoint("set_status")
- def set_status(self, bundle_type: str, obj_id: ObjectId, status: str) -> bool:
+ def set_status(self, bundle_type: str, swhid: CoreSWHID, status: str) -> bool:
"""Set the cooking status of a bundle"""
...
@remote_api_endpoint("put_bundle")
- def put_bundle(self, bundle_type: str, obj_id: ObjectId, bundle):
+ def put_bundle(self, bundle_type: str, swhid: CoreSWHID, bundle):
"""Store bundle in vault cache"""
...
@remote_api_endpoint("send_notif")
- def send_notif(self, bundle_type: str, obj_id: ObjectId):
+ def send_notif(self, bundle_type: str, swhid: CoreSWHID):
"""Send all the e-mails in the notification list of a bundle"""
...
diff --git a/swh/vault/sql/30-schema.sql b/swh/vault/sql/30-schema.sql
--- a/swh/vault/sql/30-schema.sql
+++ b/swh/vault/sql/30-schema.sql
@@ -6,12 +6,12 @@
);
comment on table dbversion is 'Schema update tracking';
insert into dbversion (version, release, description)
- values (1, now(), 'Initial version');
+ values (4, now(), 'Initial version');
create domain obj_hash as bytea;
-create type cook_type as enum ('directory', 'revision_gitfast');
-comment on type cook_type is 'Type of the requested bundle';
+create type bundle_type as enum ('flat', 'gitfast', 'git_bare');
+comment on type bundle_type is 'Type of the requested bundle';
create type cook_status as enum ('new', 'pending', 'done', 'failed');
comment on type cook_status is 'Status of the cooking';
@@ -19,8 +19,8 @@
create table vault_bundle (
id bigserial primary key,
- type cook_type not null, -- requested cooking type
- object_id obj_hash not null, -- requested object ID
+ type bundle_type not null,
+ swhid text not null, -- requested object ID
task_id integer, -- scheduler task id
task_status cook_status not null default 'new', -- status of the task
@@ -32,8 +32,8 @@
progress_msg text -- progress message
);
-create unique index concurrently vault_bundle_type_object
- on vault_bundle (type, object_id);
+create unique index concurrently vault_bundle_type_swhid
+ on vault_bundle (type, swhid);
create index concurrently vault_bundle_task_id
on vault_bundle (task_id);
diff --git a/swh/vault/tests/test_backend.py b/swh/vault/tests/test_backend.py
--- a/swh/vault/tests/test_backend.py
+++ b/swh/vault/tests/test_backend.py
@@ -11,7 +11,8 @@
import psycopg2
import pytest
-from swh.model import hashutil
+from swh.model.identifiers import CoreSWHID
+from swh.model.model import Content
from swh.vault.exc import NotFoundExc
from swh.vault.tests.vault_testing import hash_content
@@ -42,26 +43,26 @@
def fake_cook(backend, bundle_type, result_content, sticky=False):
+ swhid = Content.from_data(result_content).swhid()
content, obj_id = hash_content(result_content)
with mock_cooking(backend):
- backend.create_task(bundle_type, obj_id, sticky)
- backend.cache.add(bundle_type, obj_id, b"content")
- backend.set_status(bundle_type, obj_id, "done")
- return obj_id, content
+ backend.create_task(bundle_type, swhid, sticky)
+ backend.cache.add(bundle_type, swhid, b"content")
+ backend.set_status(bundle_type, swhid, "done")
+ return swhid, content
-def fail_cook(backend, bundle_type, obj_id, failure_reason):
+def fail_cook(backend, bundle_type, swhid, failure_reason):
with mock_cooking(backend):
- backend.create_task(bundle_type, obj_id)
- backend.set_status(bundle_type, obj_id, "failed")
- backend.set_progress(bundle_type, obj_id, failure_reason)
+ backend.create_task(bundle_type, swhid)
+ backend.set_status(bundle_type, swhid, "failed")
+ backend.set_progress(bundle_type, swhid, failure_reason)
-TEST_TYPE = "revision_gitfast"
-TEST_HEX_ID = "4a4b9771542143cf070386f86b4b92d42966bdbc"
-TEST_OBJ_ID = hashutil.hash_to_bytes(TEST_HEX_ID)
+TEST_TYPE = "gitfast"
+TEST_SWHID = CoreSWHID.from_string("swh:1:rev:4a4b9771542143cf070386f86b4b92d42966bdbc")
TEST_PROGRESS = (
- "Mr. White, You're telling me you're cooking again?" " \N{ASTONISHED FACE} "
+ "Mr. White, You're telling me you're cooking again? \N{ASTONISHED FACE} "
)
TEST_EMAIL = "ouiche@lorraine.fr"
@@ -69,30 +70,29 @@
@pytest.fixture
def swh_vault(swh_vault, sample_data):
# make the vault's storage consistent with test data
- revision = attr.evolve(sample_data.revision, id=TEST_OBJ_ID)
+ revision = attr.evolve(sample_data.revision, id=TEST_SWHID.object_id)
swh_vault.storage.revision_add([revision])
return swh_vault
def test_create_task_simple(swh_vault):
with mock_cooking(swh_vault) as m:
- swh_vault.create_task(TEST_TYPE, TEST_HEX_ID)
+ swh_vault.create_task(TEST_TYPE, TEST_SWHID)
- m["get_cooker_cls"].assert_called_once_with(TEST_TYPE)
+ m["get_cooker_cls"].assert_called_once_with(TEST_TYPE, TEST_SWHID.object_type)
args = m["cooker_cls"].call_args[0]
- assert args[0] == TEST_TYPE
- assert args[1] == TEST_HEX_ID
+ assert args[0] == TEST_SWHID
assert m["cooker"].check_exists.call_count == 1
assert m["_send_task"].call_count == 1
args = m["_send_task"].call_args[0]
assert args[0] == TEST_TYPE
- assert args[1] == TEST_HEX_ID
+ assert args[1] == TEST_SWHID
- info = swh_vault.progress(TEST_TYPE, TEST_HEX_ID)
- assert info["object_id"] == TEST_HEX_ID
+ info = swh_vault.progress(TEST_TYPE, TEST_SWHID)
+ assert info["swhid"] == TEST_SWHID
assert info["type"] == TEST_TYPE
assert info["task_status"] == "new"
assert info["task_id"] == 42
@@ -105,63 +105,63 @@
def test_create_fail_duplicate_task(swh_vault):
with mock_cooking(swh_vault):
- swh_vault.create_task(TEST_TYPE, TEST_HEX_ID)
+ swh_vault.create_task(TEST_TYPE, TEST_SWHID)
with pytest.raises(psycopg2.IntegrityError):
- swh_vault.create_task(TEST_TYPE, TEST_HEX_ID)
+ swh_vault.create_task(TEST_TYPE, TEST_SWHID)
def test_create_fail_nonexisting_object(swh_vault):
with mock_cooking(swh_vault) as m:
m["cooker"].check_exists.side_effect = ValueError("Nothing here.")
with pytest.raises(ValueError):
- swh_vault.create_task(TEST_TYPE, TEST_HEX_ID)
+ swh_vault.create_task(TEST_TYPE, TEST_SWHID)
def test_create_set_progress(swh_vault):
with mock_cooking(swh_vault):
- swh_vault.create_task(TEST_TYPE, TEST_HEX_ID)
+ swh_vault.create_task(TEST_TYPE, TEST_SWHID)
- info = swh_vault.progress(TEST_TYPE, TEST_HEX_ID)
+ info = swh_vault.progress(TEST_TYPE, TEST_SWHID)
assert info["progress_msg"] is None
- swh_vault.set_progress(TEST_TYPE, TEST_HEX_ID, TEST_PROGRESS)
- info = swh_vault.progress(TEST_TYPE, TEST_HEX_ID)
+ swh_vault.set_progress(TEST_TYPE, TEST_SWHID, TEST_PROGRESS)
+ info = swh_vault.progress(TEST_TYPE, TEST_SWHID)
assert info["progress_msg"] == TEST_PROGRESS
def test_create_set_status(swh_vault):
with mock_cooking(swh_vault):
- swh_vault.create_task(TEST_TYPE, TEST_HEX_ID)
+ swh_vault.create_task(TEST_TYPE, TEST_SWHID)
- info = swh_vault.progress(TEST_TYPE, TEST_HEX_ID)
+ info = swh_vault.progress(TEST_TYPE, TEST_SWHID)
assert info["task_status"] == "new"
assert info["ts_done"] is None
- swh_vault.set_status(TEST_TYPE, TEST_HEX_ID, "pending")
- info = swh_vault.progress(TEST_TYPE, TEST_HEX_ID)
+ swh_vault.set_status(TEST_TYPE, TEST_SWHID, "pending")
+ info = swh_vault.progress(TEST_TYPE, TEST_SWHID)
assert info["task_status"] == "pending"
assert info["ts_done"] is None
- swh_vault.set_status(TEST_TYPE, TEST_HEX_ID, "done")
- info = swh_vault.progress(TEST_TYPE, TEST_HEX_ID)
+ swh_vault.set_status(TEST_TYPE, TEST_SWHID, "done")
+ info = swh_vault.progress(TEST_TYPE, TEST_SWHID)
assert info["task_status"] == "done"
assertTimestampAlmostNow(info["ts_done"])
def test_create_update_access_ts(swh_vault):
with mock_cooking(swh_vault):
- swh_vault.create_task(TEST_TYPE, TEST_OBJ_ID)
+ swh_vault.create_task(TEST_TYPE, TEST_SWHID)
- info = swh_vault.progress(TEST_TYPE, TEST_OBJ_ID)
+ info = swh_vault.progress(TEST_TYPE, TEST_SWHID)
access_ts_1 = info["ts_last_access"]
assertTimestampAlmostNow(access_ts_1)
- swh_vault.update_access_ts(TEST_TYPE, TEST_OBJ_ID)
- info = swh_vault.progress(TEST_TYPE, TEST_OBJ_ID)
+ swh_vault.update_access_ts(TEST_TYPE, TEST_SWHID)
+ info = swh_vault.progress(TEST_TYPE, TEST_SWHID)
access_ts_2 = info["ts_last_access"]
assertTimestampAlmostNow(access_ts_2)
- swh_vault.update_access_ts(TEST_TYPE, TEST_OBJ_ID)
- info = swh_vault.progress(TEST_TYPE, TEST_OBJ_ID)
+ swh_vault.update_access_ts(TEST_TYPE, TEST_SWHID)
+ info = swh_vault.progress(TEST_TYPE, TEST_SWHID)
access_ts_3 = info["ts_last_access"]
assertTimestampAlmostNow(access_ts_3)
@@ -172,9 +172,9 @@
def test_cook_idempotent(swh_vault, sample_data):
with mock_cooking(swh_vault):
- info1 = swh_vault.cook(TEST_TYPE, TEST_HEX_ID)
- info2 = swh_vault.cook(TEST_TYPE, TEST_HEX_ID)
- info3 = swh_vault.cook(TEST_TYPE, TEST_HEX_ID)
+ info1 = swh_vault.cook(TEST_TYPE, TEST_SWHID)
+ info2 = swh_vault.cook(TEST_TYPE, TEST_SWHID)
+ info3 = swh_vault.cook(TEST_TYPE, TEST_SWHID)
assert info1 == info2
assert info1 == info3
@@ -184,23 +184,23 @@
swh_vault, "add_notif_email"
) as madd, patch.object(swh_vault, "send_notification") as msend:
- swh_vault.cook(TEST_TYPE, TEST_HEX_ID)
+ swh_vault.cook(TEST_TYPE, TEST_SWHID)
madd.assert_not_called()
msend.assert_not_called()
madd.reset_mock()
msend.reset_mock()
- swh_vault.cook(TEST_TYPE, TEST_HEX_ID, email=TEST_EMAIL)
- madd.assert_called_once_with(TEST_TYPE, TEST_OBJ_ID, TEST_EMAIL)
+ swh_vault.cook(TEST_TYPE, TEST_SWHID, email=TEST_EMAIL)
+ madd.assert_called_once_with(TEST_TYPE, TEST_SWHID, TEST_EMAIL)
msend.assert_not_called()
madd.reset_mock()
msend.reset_mock()
- swh_vault.set_status(TEST_TYPE, TEST_HEX_ID, "done")
- swh_vault.cook(TEST_TYPE, TEST_HEX_ID, email=TEST_EMAIL)
- msend.assert_called_once_with(None, TEST_EMAIL, TEST_TYPE, TEST_HEX_ID, "done")
+ swh_vault.set_status(TEST_TYPE, TEST_SWHID, "done")
+ swh_vault.cook(TEST_TYPE, TEST_SWHID, email=TEST_EMAIL)
+ msend.assert_called_once_with(None, TEST_EMAIL, TEST_TYPE, TEST_SWHID, "done")
madd.assert_not_called()
@@ -208,12 +208,12 @@
with mock_cooking(swh_vault):
emails = ("a@example.com", "billg@example.com", "test+42@example.org")
for email in emails:
- swh_vault.cook(TEST_TYPE, TEST_HEX_ID, email=email)
+ swh_vault.cook(TEST_TYPE, TEST_SWHID, email=email)
- swh_vault.set_status(TEST_TYPE, TEST_HEX_ID, "done")
+ swh_vault.set_status(TEST_TYPE, TEST_SWHID, "done")
with patch.object(swh_vault, "smtp_server") as m:
- swh_vault.send_notif(TEST_TYPE, TEST_HEX_ID)
+ swh_vault.send_notif(TEST_TYPE, TEST_SWHID)
sent_emails = {k[0][0] for k in m.send_message.call_args_list}
assert {k["To"] for k in sent_emails} == set(emails)
@@ -221,49 +221,49 @@
for e in sent_emails:
assert "bot@softwareheritage.org" in e["From"]
assert TEST_TYPE in e["Subject"]
- assert TEST_HEX_ID[:5] in e["Subject"]
+ assert TEST_SWHID.object_id.hex()[:5] in e["Subject"]
assert TEST_TYPE in str(e)
assert "https://archive.softwareheritage.org/" in str(e)
- assert TEST_HEX_ID[:5] in str(e)
+ assert TEST_SWHID.object_id.hex()[:5] in str(e)
assert "--\x20\n" in str(e) # Well-formated signature!!!
# Check that the entries have been deleted and recalling the
# function does not re-send the e-mails
m.reset_mock()
- swh_vault.send_notif(TEST_TYPE, TEST_HEX_ID)
+ swh_vault.send_notif(TEST_TYPE, TEST_SWHID)
m.assert_not_called()
def test_available(swh_vault):
- assert not swh_vault.is_available(TEST_TYPE, TEST_HEX_ID)
+ assert not swh_vault.is_available(TEST_TYPE, TEST_SWHID)
with mock_cooking(swh_vault):
- swh_vault.create_task(TEST_TYPE, TEST_HEX_ID)
- assert not swh_vault.is_available(TEST_TYPE, TEST_HEX_ID)
+ swh_vault.create_task(TEST_TYPE, TEST_SWHID)
+ assert not swh_vault.is_available(TEST_TYPE, TEST_SWHID)
- swh_vault.cache.add(TEST_TYPE, TEST_HEX_ID, b"content")
- assert not swh_vault.is_available(TEST_TYPE, TEST_HEX_ID)
+ swh_vault.cache.add(TEST_TYPE, TEST_SWHID, b"content")
+ assert not swh_vault.is_available(TEST_TYPE, TEST_SWHID)
- swh_vault.set_status(TEST_TYPE, TEST_HEX_ID, "done")
- assert swh_vault.is_available(TEST_TYPE, TEST_HEX_ID)
+ swh_vault.set_status(TEST_TYPE, TEST_SWHID, "done")
+ assert swh_vault.is_available(TEST_TYPE, TEST_SWHID)
def test_fetch(swh_vault):
- assert swh_vault.fetch(TEST_TYPE, TEST_HEX_ID, raise_notfound=False) is None
+ assert swh_vault.fetch(TEST_TYPE, TEST_SWHID, raise_notfound=False) is None
with pytest.raises(
- NotFoundExc, match=f"{TEST_TYPE} {TEST_HEX_ID} is not available."
+ NotFoundExc, match=f"{TEST_TYPE} {TEST_SWHID} is not available."
):
- swh_vault.fetch(TEST_TYPE, TEST_HEX_ID)
+ swh_vault.fetch(TEST_TYPE, TEST_SWHID)
- obj_id, content = fake_cook(swh_vault, TEST_TYPE, b"content")
+ swhid, content = fake_cook(swh_vault, TEST_TYPE, b"content")
- info = swh_vault.progress(TEST_TYPE, obj_id)
+ info = swh_vault.progress(TEST_TYPE, swhid)
access_ts_before = info["ts_last_access"]
- assert swh_vault.fetch(TEST_TYPE, obj_id) == b"content"
+ assert swh_vault.fetch(TEST_TYPE, swhid) == b"content"
- info = swh_vault.progress(TEST_TYPE, obj_id)
+ info = swh_vault.progress(TEST_TYPE, swhid)
access_ts_after = info["ts_last_access"]
assertTimestampAlmostNow(access_ts_after)
@@ -276,8 +276,8 @@
for i in r:
sticky = i == 5
content = b"content%s" % str(i).encode()
- obj_id, content = fake_cook(swh_vault, TEST_TYPE, content, sticky)
- inserted[i] = (obj_id, content)
+ swhid, content = fake_cook(swh_vault, TEST_TYPE, content, sticky)
+ inserted[i] = (swhid, content)
swh_vault.update_access_ts(TEST_TYPE, inserted[2][0])
swh_vault.update_access_ts(TEST_TYPE, inserted[3][0])
@@ -296,8 +296,8 @@
for i in r:
sticky = i == 5
content = b"content%s" % str(i).encode()
- obj_id, content = fake_cook(swh_vault, TEST_TYPE, content, sticky)
- inserted[i] = (obj_id, content)
+ swhid, content = fake_cook(swh_vault, TEST_TYPE, content, sticky)
+ inserted[i] = (swhid, content)
if i == 7:
cutoff_date = datetime.datetime.now()
@@ -314,40 +314,40 @@
def test_fail_cook_simple(swh_vault):
- fail_cook(swh_vault, TEST_TYPE, TEST_HEX_ID, "error42")
- assert not swh_vault.is_available(TEST_TYPE, TEST_HEX_ID)
- info = swh_vault.progress(TEST_TYPE, TEST_HEX_ID)
+ fail_cook(swh_vault, TEST_TYPE, TEST_SWHID, "error42")
+ assert not swh_vault.is_available(TEST_TYPE, TEST_SWHID)
+ info = swh_vault.progress(TEST_TYPE, TEST_SWHID)
assert info["progress_msg"] == "error42"
def test_send_failure_email(swh_vault):
with mock_cooking(swh_vault):
- swh_vault.cook(TEST_TYPE, TEST_HEX_ID, email="a@example.com")
+ swh_vault.cook(TEST_TYPE, TEST_SWHID, email="a@example.com")
- swh_vault.set_status(TEST_TYPE, TEST_HEX_ID, "failed")
- swh_vault.set_progress(TEST_TYPE, TEST_HEX_ID, "test error")
+ swh_vault.set_status(TEST_TYPE, TEST_SWHID, "failed")
+ swh_vault.set_progress(TEST_TYPE, TEST_SWHID, "test error")
with patch.object(swh_vault, "smtp_server") as m:
- swh_vault.send_notif(TEST_TYPE, TEST_HEX_ID)
+ swh_vault.send_notif(TEST_TYPE, TEST_SWHID)
e = [k[0][0] for k in m.send_message.call_args_list][0]
assert e["To"] == "a@example.com"
assert "bot@softwareheritage.org" in e["From"]
assert TEST_TYPE in e["Subject"]
- assert TEST_HEX_ID[:5] in e["Subject"]
+ assert TEST_SWHID.object_id.hex()[:5] in e["Subject"]
assert "fail" in e["Subject"]
assert TEST_TYPE in str(e)
- assert TEST_HEX_ID[:5] in str(e)
+ assert TEST_SWHID.object_id.hex()[:5] in str(e)
assert "test error" in str(e)
assert "--\x20\n" in str(e) # Well-formated signature
def test_retry_failed_bundle(swh_vault):
- fail_cook(swh_vault, TEST_TYPE, TEST_HEX_ID, "error42")
- info = swh_vault.progress(TEST_TYPE, TEST_HEX_ID)
+ fail_cook(swh_vault, TEST_TYPE, TEST_SWHID, "error42")
+ info = swh_vault.progress(TEST_TYPE, TEST_SWHID)
assert info["task_status"] == "failed"
with mock_cooking(swh_vault):
- swh_vault.cook(TEST_TYPE, TEST_HEX_ID)
- info = swh_vault.progress(TEST_TYPE, TEST_HEX_ID)
+ swh_vault.cook(TEST_TYPE, TEST_SWHID)
+ info = swh_vault.progress(TEST_TYPE, TEST_SWHID)
assert info["task_status"] == "new"
diff --git a/swh/vault/tests/test_cache.py b/swh/vault/tests/test_cache.py
--- a/swh/vault/tests/test_cache.py
+++ b/swh/vault/tests/test_cache.py
@@ -5,15 +5,17 @@
from swh.model import hashutil
+from swh.model.identifiers import CoreSWHID
TEST_TYPE_1 = "revision_gitfast"
TEST_TYPE_2 = "directory"
-TEST_HEX_ID_1 = "4a4b9771542143cf070386f86b4b92d42966bdbc"
-TEST_HEX_ID_2 = "17a3e48bce37be5226490e750202ad3a9a1a3fe9"
-
-TEST_OBJ_ID_1 = hashutil.hash_to_bytes(TEST_HEX_ID_1)
-TEST_OBJ_ID_2 = hashutil.hash_to_bytes(TEST_HEX_ID_2)
+TEST_SWHID_1 = CoreSWHID.from_string(
+ "swh:1:rev:4a4b9771542143cf070386f86b4b92d42966bdbc"
+)
+TEST_SWHID_2 = CoreSWHID.from_string(
+ "swh:1:dir:17a3e48bce37be5226490e750202ad3a9a1a3fe9"
+)
TEST_CONTENT_1 = b"test content 1"
TEST_CONTENT_2 = b"test content 2"
@@ -25,38 +27,38 @@
def test_internal_id(swh_vault):
- sid = swh_vault.cache._get_internal_id(TEST_TYPE_1, TEST_OBJ_ID_1)
- assert hashutil.hash_to_hex(sid) == "6829cda55b54c295aa043a611a4e0320239988d9"
+ sid = swh_vault.cache._get_internal_id(TEST_TYPE_1, TEST_SWHID_1)
+ assert hashutil.hash_to_hex(sid) == "ec2a99d6b21a68648a9d0c99c5d7c35f69268564"
def test_simple_add_get(swh_vault):
- swh_vault.cache.add(TEST_TYPE_1, TEST_OBJ_ID_1, TEST_CONTENT_1)
- assert swh_vault.cache.get(TEST_TYPE_1, TEST_OBJ_ID_1) == TEST_CONTENT_1
- assert swh_vault.cache.is_cached(TEST_TYPE_1, TEST_OBJ_ID_1)
+ swh_vault.cache.add(TEST_TYPE_1, TEST_SWHID_1, TEST_CONTENT_1)
+ assert swh_vault.cache.get(TEST_TYPE_1, TEST_SWHID_1) == TEST_CONTENT_1
+ assert swh_vault.cache.is_cached(TEST_TYPE_1, TEST_SWHID_1)
def test_different_type_same_id(swh_vault):
- swh_vault.cache.add(TEST_TYPE_1, TEST_OBJ_ID_1, TEST_CONTENT_1)
- swh_vault.cache.add(TEST_TYPE_2, TEST_OBJ_ID_1, TEST_CONTENT_2)
- assert swh_vault.cache.get(TEST_TYPE_1, TEST_OBJ_ID_1) == TEST_CONTENT_1
- assert swh_vault.cache.get(TEST_TYPE_2, TEST_OBJ_ID_1) == TEST_CONTENT_2
- assert swh_vault.cache.is_cached(TEST_TYPE_1, TEST_OBJ_ID_1)
- assert swh_vault.cache.is_cached(TEST_TYPE_2, TEST_OBJ_ID_1)
+ swh_vault.cache.add(TEST_TYPE_1, TEST_SWHID_1, TEST_CONTENT_1)
+ swh_vault.cache.add(TEST_TYPE_2, TEST_SWHID_1, TEST_CONTENT_2)
+ assert swh_vault.cache.get(TEST_TYPE_1, TEST_SWHID_1) == TEST_CONTENT_1
+ assert swh_vault.cache.get(TEST_TYPE_2, TEST_SWHID_1) == TEST_CONTENT_2
+ assert swh_vault.cache.is_cached(TEST_TYPE_1, TEST_SWHID_1)
+ assert swh_vault.cache.is_cached(TEST_TYPE_2, TEST_SWHID_1)
def test_different_type_same_content(swh_vault):
- swh_vault.cache.add(TEST_TYPE_1, TEST_OBJ_ID_1, TEST_CONTENT_1)
- swh_vault.cache.add(TEST_TYPE_2, TEST_OBJ_ID_1, TEST_CONTENT_1)
- assert swh_vault.cache.get(TEST_TYPE_1, TEST_OBJ_ID_1) == TEST_CONTENT_1
- assert swh_vault.cache.get(TEST_TYPE_2, TEST_OBJ_ID_1) == TEST_CONTENT_1
- assert swh_vault.cache.is_cached(TEST_TYPE_1, TEST_OBJ_ID_1)
- assert swh_vault.cache.is_cached(TEST_TYPE_2, TEST_OBJ_ID_1)
+ swh_vault.cache.add(TEST_TYPE_1, TEST_SWHID_1, TEST_CONTENT_1)
+ swh_vault.cache.add(TEST_TYPE_2, TEST_SWHID_1, TEST_CONTENT_1)
+ assert swh_vault.cache.get(TEST_TYPE_1, TEST_SWHID_1) == TEST_CONTENT_1
+ assert swh_vault.cache.get(TEST_TYPE_2, TEST_SWHID_1) == TEST_CONTENT_1
+ assert swh_vault.cache.is_cached(TEST_TYPE_1, TEST_SWHID_1)
+ assert swh_vault.cache.is_cached(TEST_TYPE_2, TEST_SWHID_1)
def test_different_id_same_type(swh_vault):
- swh_vault.cache.add(TEST_TYPE_1, TEST_OBJ_ID_1, TEST_CONTENT_1)
- swh_vault.cache.add(TEST_TYPE_1, TEST_OBJ_ID_2, TEST_CONTENT_2)
- assert swh_vault.cache.get(TEST_TYPE_1, TEST_OBJ_ID_1) == TEST_CONTENT_1
- assert swh_vault.cache.get(TEST_TYPE_1, TEST_OBJ_ID_2) == TEST_CONTENT_2
- assert swh_vault.cache.is_cached(TEST_TYPE_1, TEST_OBJ_ID_1)
- assert swh_vault.cache.is_cached(TEST_TYPE_1, TEST_OBJ_ID_2)
+ swh_vault.cache.add(TEST_TYPE_1, TEST_SWHID_1, TEST_CONTENT_1)
+ swh_vault.cache.add(TEST_TYPE_1, TEST_SWHID_2, TEST_CONTENT_2)
+ assert swh_vault.cache.get(TEST_TYPE_1, TEST_SWHID_1) == TEST_CONTENT_1
+ assert swh_vault.cache.get(TEST_TYPE_1, TEST_SWHID_2) == TEST_CONTENT_2
+ assert swh_vault.cache.is_cached(TEST_TYPE_1, TEST_SWHID_1)
+ assert swh_vault.cache.is_cached(TEST_TYPE_1, TEST_SWHID_2)
diff --git a/swh/vault/tests/test_cli.py b/swh/vault/tests/test_cli.py
--- a/swh/vault/tests/test_cli.py
+++ b/swh/vault/tests/test_cli.py
@@ -10,6 +10,7 @@
import click.testing
import pytest
+from swh.model.identifiers import CoreSWHID
from swh.vault.cli import vault as vault_cli_group
from swh.vault.cookers.base import BaseVaultCooker
from swh.vault.in_memory_backend import InMemoryVaultBackend
@@ -26,24 +27,20 @@
assert isinstance(result.exception, SystemExit)
assert "expected core SWHID" in result.stdout
- result = runner.invoke(vault_cli_group, ["cook", "swh:1:cnt:" + "0" * 40, "-"])
- assert isinstance(result.exception, SystemExit)
- assert "No cooker available for CONTENT" in result.stdout
-
def test_cook_unknown_cooker():
runner = click.testing.CliRunner()
result = runner.invoke(
vault_cli_group,
- ["cook", "swh:1:dir:" + "0" * 40, "-", "--cooker-type", "gitfast"],
+ ["cook", "swh:1:dir:" + "0" * 40, "-", "--bundle-type", "gitfast"],
)
assert isinstance(result.exception, SystemExit)
assert "do not have a gitfast cooker" in result.stdout
result = runner.invoke(vault_cli_group, ["cook", "swh:1:rev:" + "0" * 40, "-"])
assert isinstance(result.exception, SystemExit)
- assert "explicit --cooker-type" in result.stdout
+ assert "use --bundle-type" in result.stdout
@pytest.mark.parametrize(
@@ -66,6 +63,8 @@
runner = click.testing.CliRunner()
+ swhid = CoreSWHID.from_string(f"swh:1:{swhid_type}:{'0'*40}")
+
with tempfile.NamedTemporaryFile("a", suffix=".yml") as config_fd:
config_fd.write('{"storage": {}}')
config_fd.seek(0)
@@ -78,24 +77,20 @@
"-",
"-C",
config_fd.name,
- "--cooker-type",
+ "--bundle-type",
cooker_name_suffix,
],
)
else:
result = runner.invoke(
- vault_cli_group,
- ["cook", f"swh:1:{swhid_type}:{'0'*40}", "-", "-C", config_fd.name],
+ vault_cli_group, ["cook", str(swhid), "-", "-C", config_fd.name],
)
if result.exception is not None:
raise result.exception
cooker_cls.assert_called_once_with(
- bundle_type=f"{bundle_type}_{cooker_name_suffix}"
- if cooker_name_suffix
- else bundle_type,
- obj_id=b"\x00" * 20,
+ swhid=swhid,
backend=backend,
storage=storage,
graph=None,
diff --git a/swh/vault/tests/test_cookers.py b/swh/vault/tests/test_cookers.py
--- a/swh/vault/tests/test_cookers.py
+++ b/swh/vault/tests/test_cookers.py
@@ -25,7 +25,8 @@
import pytest
from swh.loader.git.from_disk import GitLoaderFromDisk
-from swh.model import from_disk, hashutil, identifiers
+from swh.model import from_disk, hashutil
+from swh.model.identifiers import CoreSWHID, ObjectType
from swh.model.model import (
Directory,
DirectoryEntry,
@@ -172,11 +173,11 @@
@contextlib.contextmanager
-def cook_extract_directory_dircooker(storage, obj_id, fsck=True):
+def cook_extract_directory_dircooker(storage, swhid, fsck=True):
"""Context manager that cooks a directory and extract it."""
backend = unittest.mock.MagicMock()
backend.storage = storage
- cooker = DirectoryCooker("directory", obj_id, backend=backend, storage=storage)
+ cooker = DirectoryCooker(swhid, backend=backend, storage=storage)
cooker.fileobj = io.BytesIO()
assert cooker.check_exists()
cooker.prepare_bundle()
@@ -184,12 +185,12 @@
with tempfile.TemporaryDirectory(prefix="tmp-vault-extract-") as td:
with tarfile.open(fileobj=cooker.fileobj, mode="r") as tar:
tar.extractall(td)
- yield pathlib.Path(td) / hashutil.hash_to_hex(obj_id)
+ yield pathlib.Path(td) / str(swhid)
cooker.storage = None
@contextlib.contextmanager
-def cook_extract_directory_gitfast(storage, obj_id, fsck=True):
+def cook_extract_directory_gitfast(storage, swhid, fsck=True):
"""Context manager that cooks a revision containing a directory and extract it,
using RevisionGitfastCooker"""
test_repo = TestRepo()
@@ -198,7 +199,7 @@
datetime.datetime.now(datetime.timezone.utc)
)
revision = Revision(
- directory=obj_id,
+ directory=swhid.object_id,
message=b"dummy message",
author=Person.from_fullname(b"someone"),
committer=Person.from_fullname(b"someone"),
@@ -209,7 +210,9 @@
)
storage.revision_add([revision])
- with cook_stream_revision_gitfast(storage, revision.id) as stream, test_repo as p:
+ with cook_stream_revision_gitfast(
+ storage, revision.swhid()
+ ) as stream, test_repo as p:
processor = dulwich.fastexport.GitImportProcessor(test_repo.repo)
processor.import_stream(stream)
test_repo.checkout(b"HEAD")
@@ -218,9 +221,7 @@
@contextlib.contextmanager
-def cook_extract_directory_git_bare(
- storage, obj_id, fsck=True, direct_objstorage=False
-):
+def cook_extract_directory_git_bare(storage, swhid, fsck=True, direct_objstorage=False):
"""Context manager that cooks a revision and extract it,
using GitBareCooker"""
backend = unittest.mock.MagicMock()
@@ -228,8 +229,7 @@
# Cook the object
cooker = GitBareCooker(
- "directory",
- obj_id,
+ swhid,
backend=backend,
storage=storage,
objstorage=storage.objstorage if direct_objstorage else None,
@@ -249,12 +249,7 @@
with tempfile.TemporaryDirectory(prefix="tmp-vault-clone-") as clone_dir:
clone_dir = pathlib.Path(clone_dir)
subprocess.check_call(
- [
- "git",
- "clone",
- os.path.join(td, f"swh:1:dir:{obj_id.hex()}.git"),
- clone_dir,
- ]
+ ["git", "clone", os.path.join(td, f"{swhid}.git"), clone_dir,]
)
shutil.rmtree(clone_dir / ".git")
yield clone_dir
@@ -275,13 +270,11 @@
@contextlib.contextmanager
-def cook_stream_revision_gitfast(storage, obj_id):
+def cook_stream_revision_gitfast(storage, swhid):
"""Context manager that cooks a revision and stream its fastexport."""
backend = unittest.mock.MagicMock()
backend.storage = storage
- cooker = RevisionGitfastCooker(
- "revision_gitfast", obj_id, backend=backend, storage=storage
- )
+ cooker = RevisionGitfastCooker(swhid, backend=backend, storage=storage)
cooker.fileobj = io.BytesIO()
assert cooker.check_exists()
cooker.prepare_bundle()
@@ -292,11 +285,11 @@
@contextlib.contextmanager
-def cook_extract_revision_gitfast(storage, obj_id, fsck=True):
+def cook_extract_revision_gitfast(storage, swhid, fsck=True):
"""Context manager that cooks a revision and extract it,
using RevisionGitfastCooker"""
test_repo = TestRepo()
- with cook_stream_revision_gitfast(storage, obj_id) as stream, test_repo as p:
+ with cook_stream_revision_gitfast(storage, swhid) as stream, test_repo as p:
processor = dulwich.fastexport.GitImportProcessor(test_repo.repo)
processor.import_stream(stream)
yield test_repo, p
@@ -310,12 +303,7 @@
backend.storage = storage
# Cook the object
- cooker = GitBareCooker(
- swhid.object_type.name.lower(),
- swhid.object_id,
- backend=backend,
- storage=storage,
- )
+ cooker = GitBareCooker(swhid, backend=backend, storage=storage)
cooker.use_fsck = fsck # Some tests try edge-cases that git-fsck rejects
cooker.fileobj = io.BytesIO()
assert cooker.check_exists()
@@ -339,14 +327,8 @@
@contextlib.contextmanager
-def cook_extract_revision_git_bare(storage, obj_id, fsck=True):
- with cook_extract_git_bare(
- storage,
- identifiers.CoreSWHID(
- object_type=identifiers.ObjectType.REVISION, object_id=obj_id
- ),
- fsck=fsck,
- ) as res:
+def cook_extract_revision_git_bare(storage, swhid, fsck=True):
+ with cook_extract_git_bare(storage, swhid, fsck=fsck,) as res:
yield res
@@ -361,14 +343,8 @@
@contextlib.contextmanager
-def cook_extract_snapshot_git_bare(storage, obj_id, fsck=True):
- with cook_extract_git_bare(
- storage,
- identifiers.CoreSWHID(
- object_type=identifiers.ObjectType.SNAPSHOT, object_id=obj_id
- ),
- fsck=fsck,
- ) as res:
+def cook_extract_snapshot_git_bare(storage, swhid, fsck=True):
+ with cook_extract_git_bare(storage, swhid, fsck=fsck,) as res:
yield res
@@ -403,8 +379,9 @@
obj_id_hex = repo.repo[c].tree.decode()
obj_id = hashutil.hash_to_bytes(obj_id_hex)
+ swhid = CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=obj_id)
- with cook_extract_directory(loader.storage, obj_id) as p:
+ with cook_extract_directory(loader.storage, swhid) as p:
assert (p / "file").stat().st_mode == 0o100644
assert (p / "file").read_text() == TEST_CONTENT
assert (p / "executable").stat().st_mode == 0o100755
@@ -434,6 +411,7 @@
obj_id_hex = repo.repo[c].tree.decode()
obj_id = hashutil.hash_to_bytes(obj_id_hex)
+ swhid = CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=obj_id)
# FIXME: storage.content_update() should be changed to allow things
# like that
@@ -462,7 +440,7 @@
cur.execute("delete from content where sha1 = %s", (id_3,))
- with cook_extract_directory(loader.storage, obj_id) as p:
+ with cook_extract_directory(loader.storage, swhid) as p:
assert (p / "file").read_bytes() == b"test1"
assert (p / "hidden_file").read_bytes() == HIDDEN_MESSAGE
assert (p / "absent_file").read_bytes() == SKIPPED_MESSAGE
@@ -508,8 +486,9 @@
obj_id_hex = repo.repo[c].tree.decode()
obj_id = hashutil.hash_to_bytes(obj_id_hex)
+ swhid = CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=obj_id)
- with cook_extract_directory(loader.storage, obj_id) as p:
+ with cook_extract_directory(loader.storage, swhid) as p:
assert (p / "file").stat().st_mode == 0o100644
assert (p / "executable").stat().st_mode == 0o100755
assert (p / "wat").stat().st_mode == 0o100644
@@ -534,6 +513,7 @@
obj_id_hex = repo.repo[c].tree.decode()
obj_id = hashutil.hash_to_bytes(obj_id_hex)
+ swhid = CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=obj_id)
# Set-up spies
storage_content_get_data = mocker.patch.object(
@@ -544,7 +524,7 @@
)
with cook_extract_directory_git_bare(
- loader.storage, obj_id, direct_objstorage=direct_objstorage
+ loader.storage, swhid, direct_objstorage=direct_objstorage
) as p:
assert (p / "file").stat().st_mode == 0o100644
assert (p / "file").read_text() == TEST_CONTENT
@@ -580,7 +560,9 @@
)
swh_storage.directory_add([dir])
- with cook_extract_directory_dircooker(swh_storage, dir.id, fsck=False) as p:
+ with cook_extract_directory_dircooker(
+ swh_storage, dir.swhid(), fsck=False
+ ) as p:
assert (p / "submodule").is_symlink()
assert os.readlink(str(p / "submodule")) == target_rev
@@ -601,7 +583,7 @@
repo.commit("add file2")
(rp / "dir1/dir2").mkdir(parents=True)
(rp / "dir1/dir2/file").write_text(TEST_CONTENT)
- repo.commit("add dir1/dir2/file")
+
(rp / "bin1").write_bytes(TEST_EXECUTABLE)
(rp / "bin1").chmod(0o755)
repo.commit("add bin1")
@@ -615,9 +597,10 @@
loader.load()
obj_id_hex = repo.repo.refs[b"HEAD"].decode()
obj_id = hashutil.hash_to_bytes(obj_id_hex)
- return (loader, obj_id)
+ swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=obj_id)
+ return (loader, swhid)
- def check_revision_simple(self, ert, p, obj_id):
+ def check_revision_simple(self, ert, p, swhid):
ert.checkout(b"HEAD")
assert (p / "file1").stat().st_mode == 0o100644
assert (p / "file1").read_text() == TEST_CONTENT
@@ -627,7 +610,7 @@
assert (p / "bin").read_bytes() == TEST_EXECUTABLE
assert (p / "dir1/dir2/file").read_text() == TEST_CONTENT
assert (p / "dir1/dir2/file").stat().st_mode == 0o100644
- assert ert.repo.refs[b"HEAD"].decode() == obj_id.hex()
+ assert ert.repo.refs[b"HEAD"].decode() == swhid.object_id.hex()
def load_repo_two_roots(self, git_loader):
#
@@ -647,14 +630,15 @@
repo.commit("add file3")
obj_id_hex = repo.repo.refs[b"HEAD"].decode()
obj_id = hashutil.hash_to_bytes(obj_id_hex)
+ swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=obj_id)
loader = git_loader(str(rp))
loader.load()
- return (loader, obj_id)
+ return (loader, swhid)
- def check_revision_two_roots(self, ert, p, obj_id):
- assert ert.repo.refs[b"HEAD"].decode() == obj_id.hex()
+ def check_revision_two_roots(self, ert, p, swhid):
+ assert ert.repo.refs[b"HEAD"].decode() == swhid.object_id.hex()
- (c3,) = ert.repo[hashutil.hash_to_bytehex(obj_id)].parents
+ (c3,) = ert.repo[hashutil.hash_to_bytehex(swhid.object_id)].parents
assert len(ert.repo[c3].parents) == 2
def load_repo_two_heads(self, git_loader):
@@ -682,13 +666,14 @@
obj_id_hex = repo.repo.refs[b"HEAD"].decode()
obj_id = hashutil.hash_to_bytes(obj_id_hex)
+ swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=obj_id)
loader = git_loader(str(rp))
loader.load()
- return (loader, obj_id)
+ return (loader, swhid)
- def check_snapshot_two_heads(self, ert, p, obj_id):
+ def check_snapshot_two_heads(self, ert, p, swhid):
assert (
- hashutil.hash_to_bytehex(obj_id)
+ hashutil.hash_to_bytehex(swhid.object_id)
== ert.repo.refs[b"HEAD"]
== ert.repo.refs[b"refs/heads/master"]
== ert.repo.refs[b"refs/remotes/origin/HEAD"]
@@ -696,7 +681,7 @@
== ert.repo.refs[b"refs/remotes/origin/b1"]
)
- c4_id = hashutil.hash_to_bytehex(obj_id)
+ c4_id = hashutil.hash_to_bytehex(swhid.object_id)
c3_id = ert.repo.refs[b"refs/remotes/origin/b2"]
assert ert.repo[c3_id].parents == ert.repo[c4_id].parents
@@ -729,23 +714,24 @@
obj_id_hex = repo.repo.refs[b"HEAD"].decode()
obj_id = hashutil.hash_to_bytes(obj_id_hex)
+ swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=obj_id)
loader = git_loader(str(rp))
loader.load()
- return (loader, obj_id)
+ return (loader, swhid)
- def check_revision_two_double_fork_merge(self, ert, p, obj_id):
- assert ert.repo.refs[b"HEAD"].decode() == obj_id.hex()
+ def check_revision_two_double_fork_merge(self, ert, p, swhid):
+ assert ert.repo.refs[b"HEAD"].decode() == swhid.object_id.hex()
- def check_snapshot_two_double_fork_merge(self, ert, p, obj_id):
+ def check_snapshot_two_double_fork_merge(self, ert, p, swhid):
assert (
- hashutil.hash_to_bytehex(obj_id)
+ hashutil.hash_to_bytehex(swhid.object_id)
== ert.repo.refs[b"HEAD"]
== ert.repo.refs[b"refs/heads/master"]
== ert.repo.refs[b"refs/remotes/origin/HEAD"]
== ert.repo.refs[b"refs/remotes/origin/master"]
)
- (c4_id, c5_id) = ert.repo[obj_id.hex().encode()].parents
+ (c4_id, c5_id) = ert.repo[swhid.object_id.hex().encode()].parents
assert c5_id == ert.repo.refs[b"refs/remotes/origin/c3"]
(c2_id, c3_id) = ert.repo[c4_id].parents
@@ -773,23 +759,24 @@
obj_id_hex = repo.repo.refs[b"HEAD"].decode()
obj_id = hashutil.hash_to_bytes(obj_id_hex)
+ swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=obj_id)
loader = git_loader(str(rp))
loader.load()
- return (loader, obj_id)
+ return (loader, swhid)
- def check_revision_triple_merge(self, ert, p, obj_id):
- assert ert.repo.refs[b"HEAD"].decode() == obj_id.hex()
+ def check_revision_triple_merge(self, ert, p, swhid):
+ assert ert.repo.refs[b"HEAD"].decode() == swhid.object_id.hex()
- def check_snapshot_triple_merge(self, ert, p, obj_id):
+ def check_snapshot_triple_merge(self, ert, p, swhid):
assert (
- hashutil.hash_to_bytehex(obj_id)
+ hashutil.hash_to_bytehex(swhid.object_id)
== ert.repo.refs[b"HEAD"]
== ert.repo.refs[b"refs/heads/master"]
== ert.repo.refs[b"refs/remotes/origin/HEAD"]
== ert.repo.refs[b"refs/remotes/origin/master"]
)
- (c2_id, c3_id, c4_id) = ert.repo[obj_id.hex().encode()].parents
+ (c2_id, c3_id, c4_id) = ert.repo[swhid.object_id.hex().encode()].parents
assert c3_id == ert.repo.refs[b"refs/remotes/origin/b1"]
assert c4_id == ert.repo.refs[b"refs/remotes/origin/b2"]
@@ -813,6 +800,7 @@
repo.commit()
obj_id_hex = repo.repo.refs[b"HEAD"].decode()
obj_id = hashutil.hash_to_bytes(obj_id_hex)
+ swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=obj_id)
loader = git_loader(str(rp))
loader.load()
@@ -842,9 +830,9 @@
)
cur.execute("delete from content where sha1 = %s", (id_3,))
- return (loader, obj_id)
+ return (loader, swhid)
- def check_revision_filtered_objects(self, ert, p, obj_id):
+ def check_revision_filtered_objects(self, ert, p, swhid):
ert.checkout(b"HEAD")
assert (p / "file").read_bytes() == b"test1"
assert (p / "hidden_file").read_bytes() == HIDDEN_MESSAGE
@@ -878,9 +866,9 @@
storage = loader.storage
storage.revision_add([test_revision])
- return (loader, test_revision.id)
+ return (loader, test_revision.swhid())
- def check_revision_null_fields(self, ert, p, obj_id):
+ def check_revision_null_fields(self, ert, p, swhid):
ert.checkout(b"HEAD")
assert (p / "file").stat().st_mode == 0o100644
@@ -920,13 +908,14 @@
obj_id_hex = repo.repo.refs[b"HEAD"].decode()
obj_id = hashutil.hash_to_bytes(obj_id_hex)
+ swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=obj_id)
loader = git_loader(str(rp))
loader.load()
- return (loader, obj_id)
+ return (loader, swhid)
- def check_snapshot_tags(self, ert, p, obj_id):
+ def check_snapshot_tags(self, ert, p, swhid):
assert (
- hashutil.hash_to_bytehex(obj_id)
+ hashutil.hash_to_bytehex(swhid.object_id)
== ert.repo.refs[b"HEAD"]
== ert.repo.refs[b"refs/heads/master"]
== ert.repo.refs[b"refs/remotes/origin/HEAD"]
@@ -935,7 +924,7 @@
)
c2_id = ert.repo.refs[b"refs/tags/t2"]
- c5_id = hashutil.hash_to_bytehex(obj_id)
+ c5_id = hashutil.hash_to_bytehex(swhid.object_id)
assert ert.repo[c5_id].parents == [c2_id]
@@ -953,34 +942,34 @@
class TestRevisionCooker(RepoFixtures):
def test_revision_simple(self, git_loader, cook_extract_revision):
- (loader, obj_id) = self.load_repo_simple(git_loader)
- with cook_extract_revision(loader.storage, obj_id) as (ert, p):
- self.check_revision_simple(ert, p, obj_id)
+ (loader, swhid) = self.load_repo_simple(git_loader)
+ with cook_extract_revision(loader.storage, swhid) as (ert, p):
+ self.check_revision_simple(ert, p, swhid)
def test_revision_two_roots(self, git_loader, cook_extract_revision):
- (loader, obj_id) = self.load_repo_two_roots(git_loader)
- with cook_extract_revision(loader.storage, obj_id) as (ert, p):
- self.check_revision_two_roots(ert, p, obj_id)
+ (loader, swhid) = self.load_repo_two_roots(git_loader)
+ with cook_extract_revision(loader.storage, swhid) as (ert, p):
+ self.check_revision_two_roots(ert, p, swhid)
def test_revision_two_double_fork_merge(self, git_loader, cook_extract_revision):
- (loader, obj_id) = self.load_repo_two_double_fork_merge(git_loader)
- with cook_extract_revision(loader.storage, obj_id) as (ert, p):
- self.check_revision_two_double_fork_merge(ert, p, obj_id)
+ (loader, swhid) = self.load_repo_two_double_fork_merge(git_loader)
+ with cook_extract_revision(loader.storage, swhid) as (ert, p):
+ self.check_revision_two_double_fork_merge(ert, p, swhid)
def test_revision_triple_merge(self, git_loader, cook_extract_revision):
- (loader, obj_id) = self.load_repo_triple_merge(git_loader)
- with cook_extract_revision(loader.storage, obj_id) as (ert, p):
- self.check_revision_triple_merge(ert, p, obj_id)
+ (loader, swhid) = self.load_repo_triple_merge(git_loader)
+ with cook_extract_revision(loader.storage, swhid) as (ert, p):
+ self.check_revision_triple_merge(ert, p, swhid)
def test_revision_filtered_objects(self, git_loader, cook_extract_revision):
- (loader, obj_id) = self.load_repo_filtered_objects(git_loader)
- with cook_extract_revision(loader.storage, obj_id) as (ert, p):
- self.check_revision_filtered_objects(ert, p, obj_id)
+ (loader, swhid) = self.load_repo_filtered_objects(git_loader)
+ with cook_extract_revision(loader.storage, swhid) as (ert, p):
+ self.check_revision_filtered_objects(ert, p, swhid)
def test_revision_null_fields(self, git_loader, cook_extract_revision):
- (loader, obj_id) = self.load_repo_null_fields(git_loader)
- with cook_extract_revision(loader.storage, obj_id, fsck=False) as (ert, p):
- self.check_revision_null_fields(ert, p, obj_id)
+ (loader, swhid) = self.load_repo_null_fields(git_loader)
+ with cook_extract_revision(loader.storage, swhid, fsck=False) as (ert, p):
+ self.check_revision_null_fields(ert, p, swhid)
def test_revision_revision_data(self, swh_storage):
target_rev = "0e8a3ad980ec179856012b7eecf4327e99cd44cd"
@@ -1011,7 +1000,7 @@
)
swh_storage.revision_add([rev])
- with cook_stream_revision_gitfast(swh_storage, rev.id) as stream:
+ with cook_stream_revision_gitfast(swh_storage, rev.swhid()) as stream:
pattern = "M 160000 {} submodule".format(target_rev).encode()
assert pattern in stream.read()
@@ -1020,43 +1009,50 @@
def test_snapshot_simple(self, git_loader, cook_extract_snapshot):
(loader, main_rev_id) = self.load_repo_simple(git_loader)
snp_id = loader.loaded_snapshot_id
- with cook_extract_snapshot(loader.storage, snp_id) as (ert, p):
+ swhid = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snp_id)
+ with cook_extract_snapshot(loader.storage, swhid) as (ert, p):
self.check_revision_simple(ert, p, main_rev_id)
def test_snapshot_two_roots(self, git_loader, cook_extract_snapshot):
(loader, main_rev_id) = self.load_repo_two_roots(git_loader)
snp_id = loader.loaded_snapshot_id
- with cook_extract_snapshot(loader.storage, snp_id) as (ert, p):
+ swhid = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snp_id)
+ with cook_extract_snapshot(loader.storage, swhid) as (ert, p):
self.check_revision_two_roots(ert, p, main_rev_id)
def test_snapshot_two_heads(self, git_loader, cook_extract_snapshot):
(loader, main_rev_id) = self.load_repo_two_heads(git_loader)
snp_id = loader.loaded_snapshot_id
- with cook_extract_snapshot(loader.storage, snp_id) as (ert, p):
+ swhid = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snp_id)
+ with cook_extract_snapshot(loader.storage, swhid) as (ert, p):
self.check_snapshot_two_heads(ert, p, main_rev_id)
def test_snapshot_two_double_fork_merge(self, git_loader, cook_extract_snapshot):
(loader, main_rev_id) = self.load_repo_two_double_fork_merge(git_loader)
snp_id = loader.loaded_snapshot_id
- with cook_extract_snapshot(loader.storage, snp_id) as (ert, p):
+ swhid = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snp_id)
+ with cook_extract_snapshot(loader.storage, swhid) as (ert, p):
self.check_revision_two_double_fork_merge(ert, p, main_rev_id)
self.check_snapshot_two_double_fork_merge(ert, p, main_rev_id)
def test_snapshot_triple_merge(self, git_loader, cook_extract_snapshot):
(loader, main_rev_id) = self.load_repo_triple_merge(git_loader)
snp_id = loader.loaded_snapshot_id
- with cook_extract_snapshot(loader.storage, snp_id) as (ert, p):
+ swhid = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snp_id)
+ with cook_extract_snapshot(loader.storage, swhid) as (ert, p):
self.check_revision_triple_merge(ert, p, main_rev_id)
self.check_snapshot_triple_merge(ert, p, main_rev_id)
def test_snapshot_filtered_objects(self, git_loader, cook_extract_snapshot):
(loader, main_rev_id) = self.load_repo_filtered_objects(git_loader)
snp_id = loader.loaded_snapshot_id
- with cook_extract_snapshot(loader.storage, snp_id) as (ert, p):
+ swhid = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snp_id)
+ with cook_extract_snapshot(loader.storage, swhid) as (ert, p):
self.check_revision_filtered_objects(ert, p, main_rev_id)
def test_snapshot_tags(self, git_loader, cook_extract_snapshot):
(loader, main_rev_id) = self.load_repo_tags(git_loader)
snp_id = loader.loaded_snapshot_id
- with cook_extract_snapshot(loader.storage, snp_id) as (ert, p):
+ swhid = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snp_id)
+ with cook_extract_snapshot(loader.storage, swhid) as (ert, p):
self.check_snapshot_tags(ert, p, main_rev_id)
diff --git a/swh/vault/tests/test_cookers_base.py b/swh/vault/tests/test_cookers_base.py
--- a/swh/vault/tests/test_cookers_base.py
+++ b/swh/vault/tests/test_cookers_base.py
@@ -5,18 +5,17 @@
from unittest.mock import MagicMock
-from swh.model import hashutil
+from swh.model.identifiers import CoreSWHID
from swh.vault.cookers.base import BaseVaultCooker
TEST_BUNDLE_CHUNKS = [b"test content 1\n", b"test content 2\n", b"test content 3\n"]
TEST_BUNDLE_CONTENT = b"".join(TEST_BUNDLE_CHUNKS)
-TEST_OBJ_TYPE = "test_type"
-TEST_HEX_ID = "17a3e48bce37be5226490e750202ad3a9a1a3fe9"
-TEST_OBJ_ID = hashutil.hash_to_bytes(TEST_HEX_ID)
+TEST_BUNDLE_TYPE = "test_type"
+TEST_SWHID = CoreSWHID.from_string("swh:1:cnt:17a3e48bce37be5226490e750202ad3a9a1a3fe9")
class BaseVaultCookerMock(BaseVaultCooker):
- CACHE_TYPE_KEY = TEST_OBJ_TYPE
+ BUNDLE_TYPE = TEST_BUNDLE_TYPE
def __init__(self):
# we do not call super() here to bypass the building of db objects from
@@ -24,8 +23,8 @@
self.config = {}
self.storage = MagicMock()
self.backend = MagicMock()
- self.bundle_type = self.CACHE_TYPE_KEY
- self.obj_id = hashutil.hash_to_bytes(TEST_OBJ_ID)
+ self.swhid = TEST_SWHID
+ self.obj_id = TEST_SWHID.object_id
self.max_bundle_size = 1024
def check_exists(self):
@@ -40,11 +39,11 @@
cooker = BaseVaultCookerMock()
cooker.cook()
cooker.backend.put_bundle.assert_called_once_with(
- TEST_OBJ_TYPE, TEST_OBJ_ID, TEST_BUNDLE_CONTENT
+ TEST_BUNDLE_TYPE, TEST_SWHID, TEST_BUNDLE_CONTENT
)
- cooker.backend.set_status.assert_called_with(TEST_OBJ_TYPE, TEST_OBJ_ID, "done")
- cooker.backend.set_progress.assert_called_with(TEST_OBJ_TYPE, TEST_OBJ_ID, None)
- cooker.backend.send_notif.assert_called_with(TEST_OBJ_TYPE, TEST_OBJ_ID)
+ cooker.backend.set_status.assert_called_with(TEST_BUNDLE_TYPE, TEST_SWHID, "done")
+ cooker.backend.set_progress.assert_called_with(TEST_BUNDLE_TYPE, TEST_SWHID, None)
+ cooker.backend.send_notif.assert_called_with(TEST_BUNDLE_TYPE, TEST_SWHID)
def test_code_exception_cook():
@@ -56,9 +55,9 @@
# Potentially remove this when we have objstorage streaming
cooker.backend.put_bundle.assert_not_called()
- cooker.backend.set_status.assert_called_with(TEST_OBJ_TYPE, TEST_OBJ_ID, "failed")
+ cooker.backend.set_status.assert_called_with(TEST_BUNDLE_TYPE, TEST_SWHID, "failed")
assert "Nope" not in cooker.backend.set_progress.call_args[0][2]
- cooker.backend.send_notif.assert_called_with(TEST_OBJ_TYPE, TEST_OBJ_ID)
+ cooker.backend.send_notif.assert_called_with(TEST_BUNDLE_TYPE, TEST_SWHID)
def test_policy_exception_cook():
@@ -69,6 +68,6 @@
# Potentially remove this when we have objstorage streaming
cooker.backend.put_bundle.assert_not_called()
- cooker.backend.set_status.assert_called_with(TEST_OBJ_TYPE, TEST_OBJ_ID, "failed")
+ cooker.backend.set_status.assert_called_with(TEST_BUNDLE_TYPE, TEST_SWHID, "failed")
assert "exceeds" in cooker.backend.set_progress.call_args[0][2]
- cooker.backend.send_notif.assert_called_with(TEST_OBJ_TYPE, TEST_OBJ_ID)
+ cooker.backend.send_notif.assert_called_with(TEST_BUNDLE_TYPE, TEST_SWHID)
diff --git a/swh/vault/tests/test_git_bare_cooker.py b/swh/vault/tests/test_git_bare_cooker.py
--- a/swh/vault/tests/test_git_bare_cooker.py
+++ b/swh/vault/tests/test_git_bare_cooker.py
@@ -10,7 +10,6 @@
"""
import datetime
-import glob
import io
import subprocess
import tarfile
@@ -273,13 +272,11 @@
# Cook
backend = InMemoryVaultBackend()
if snapshot:
- cooker_name = "snapshot_gitbare"
- cooked_id = snp.id
+ cooked_swhid = snp.swhid()
else:
- cooker_name = "revision_gitbare"
- cooked_id = rev2.id
+ cooked_swhid = rev2.swhid()
cooker = GitBareCooker(
- cooker_name, cooked_id, backend=backend, storage=swh_storage, graph=swh_graph,
+ cooked_swhid, backend=backend, storage=swh_storage, graph=swh_graph,
)
if weird_branches:
@@ -290,7 +287,7 @@
cooker.cook()
# Get bundle
- bundle = backend.fetch(cooker_name, cooked_id)
+ bundle = backend.fetch("git_bare", cooked_swhid)
# Extract bundle and make sure both revisions are in it
with tempfile.TemporaryDirectory("swh-vault-test-bare") as tempdir:
@@ -301,7 +298,7 @@
[
"git",
"-C",
- glob.glob(f"{tempdir}/*{cooked_id.hex()}.git")[0],
+ f"{tempdir}/{cooked_swhid}.git",
"log",
"--format=oneline",
"--decorate=",
diff --git a/swh/vault/tests/test_init_cookers.py b/swh/vault/tests/test_init_cookers.py
--- a/swh/vault/tests/test_init_cookers.py
+++ b/swh/vault/tests/test_init_cookers.py
@@ -10,7 +10,7 @@
import yaml
from swh.vault.cookers import COOKER_TYPES, get_cooker
-from swh.vault.tests.test_backend import TEST_HEX_ID
+from swh.vault.tests.test_backend import TEST_SWHID
@pytest.fixture
@@ -74,7 +74,7 @@
write_config_to_env(config_ko, tmp_path, monkeypatch)
with pytest.raises(exception_class, match=exception_msg):
- get_cooker("directory", TEST_HEX_ID)
+ get_cooker("flat", TEST_SWHID)
@pytest.mark.parametrize(
@@ -106,7 +106,7 @@
for cooker_type in COOKER_TYPES.keys():
write_config_to_env(config_ok, tmp_path, monkeypatch)
- cooker = get_cooker(cooker_type, TEST_HEX_ID)
+ cooker = get_cooker(cooker_type, TEST_SWHID)
assert cooker is not None
- assert isinstance(cooker, COOKER_TYPES[cooker_type])
+ assert isinstance(cooker, tuple(COOKER_TYPES[cooker_type]))
diff --git a/swh/vault/tests/test_server.py b/swh/vault/tests/test_server.py
--- a/swh/vault/tests/test_server.py
+++ b/swh/vault/tests/test_server.py
@@ -11,13 +11,14 @@
import yaml
from swh.core.api.serializers import json_dumps, msgpack_dumps, msgpack_loads
+from swh.vault.api.serializers import ENCODERS
from swh.vault.api.server import (
VaultServerApp,
check_config,
make_app,
make_app_from_configfile,
)
-from swh.vault.tests.test_backend import TEST_HEX_ID
+from swh.vault.tests.test_backend import TEST_SWHID
def test_make_app_from_file_missing():
@@ -78,25 +79,29 @@
async def test_client_cook_notfound(cli):
resp = await cli.post(
"/cook",
- data=json_dumps({"bundle_type": "directory", "obj_id": TEST_HEX_ID}),
+ data=json_dumps(
+ {"bundle_type": "flat", "swhid": TEST_SWHID}, extra_encoders=ENCODERS
+ ),
headers=[("Content-Type", "application/json")],
)
assert resp.status == 400
content = msgpack_loads(await resp.content.read())
assert content["type"] == "NotFoundExc"
- assert content["args"] == [f"directory {TEST_HEX_ID} was not found."]
+ assert content["args"] == [f"flat {TEST_SWHID} was not found."]
async def test_client_progress_notfound(cli):
resp = await cli.post(
"/progress",
- data=json_dumps({"bundle_type": "directory", "obj_id": TEST_HEX_ID}),
+ data=json_dumps(
+ {"bundle_type": "flat", "swhid": TEST_SWHID}, extra_encoders=ENCODERS
+ ),
headers=[("Content-Type", "application/json")],
)
assert resp.status == 400
content = msgpack_loads(await resp.content.read())
assert content["type"] == "NotFoundExc"
- assert content["args"] == [f"directory {TEST_HEX_ID} was not found."]
+ assert content["args"] == [f"flat {TEST_SWHID} was not found."]
async def test_client_batch_cook_invalid_type(cli):

File Metadata

Mime Type
text/plain
Expires
Sun, Aug 24, 6:05 PM (3 d, 10 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225461

Event Timeline