Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/migrate_extrinsic_metadata.py
Show First 20 Lines • Show All 420 Lines • ▼ Show 20 Lines | for deposit_request_row in deposit_cur: | ||||
metadata = deposit_request["deposit_request.metadata"] | metadata = deposit_request["deposit_request.metadata"] | ||||
if metadata is not None: | if metadata is not None: | ||||
json.dumps(metadata).encode() # check it's valid | json.dumps(metadata).encode() # check it's valid | ||||
if "@xmlns" in metadata: | if "@xmlns" in metadata: | ||||
assert metadata["@xmlns"] == ATOM_NS | assert metadata["@xmlns"] == ATOM_NS | ||||
assert metadata["@xmlns:codemeta"] in (CODEMETA_NS, [CODEMETA_NS]) | assert metadata["@xmlns:codemeta"] in (CODEMETA_NS, [CODEMETA_NS]) | ||||
format = NEW_DEPOSIT_FORMAT | format = NEW_DEPOSIT_FORMAT | ||||
else: | elif "{http://www.w3.org/2005/Atom}id" in metadata: | ||||
assert "{http://www.w3.org/2005/Atom}id" in metadata | |||||
assert ( | assert ( | ||||
"{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}author" in metadata | "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}author" in metadata | ||||
or "{http://www.w3.org/2005/Atom}author" in metadata | or "{http://www.w3.org/2005/Atom}author" in metadata | ||||
) | ) | ||||
format = OLD_DEPOSIT_FORMAT | format = OLD_DEPOSIT_FORMAT | ||||
else: | |||||
# new format introduced in | |||||
# https://forge.softwareheritage.org/D4065 | |||||
# it's the same as the first case, but with the @xmlns | |||||
# declarations stripped | |||||
assert "id" in metadata | |||||
assert "codemeta:author" in metadata | |||||
format = NEW_DEPOSIT_FORMAT | |||||
metadata_entries.append((date, format, metadata)) | metadata_entries.append((date, format, metadata)) | ||||
if discovery_date is None: | if discovery_date is None: | ||||
discovery_date = max(dates) | discovery_date = max(dates) | ||||
# Sanity checks to make sure deposit requests are consistent with each other | # Sanity checks to make sure deposit requests are consistent with each other | ||||
assert len(metadata_entries) >= 1, deposit_id | assert len(metadata_entries) >= 1, deposit_id | ||||
assert len(provider_urls) == 1, f"expected 1 provider url, got {provider_urls}" | assert len(provider_urls) == 1, f"expected 1 provider url, got {provider_urls}" | ||||
▲ Show 20 Lines • Show All 273 Lines • ▼ Show 20 Lines | elif type_ == "tar": | ||||
if "client" in metadata: | if "client" in metadata: | ||||
del metadata["client"] | del metadata["client"] | ||||
del metadata["extrinsic"] | del metadata["extrinsic"] | ||||
else: | else: | ||||
# deposit loader format 2 | # deposit loader format 2 | ||||
actual_metadata = metadata["extrinsic"]["raw"]["origin_metadata"][ | actual_metadata = metadata["extrinsic"]["raw"]["origin_metadata"][ | ||||
"metadata" | "metadata" | ||||
] | ] | ||||
if isinstance(actual_metadata, str): | |||||
# new format introduced in | |||||
# https://forge.softwareheritage.org/D4105 | |||||
actual_metadata = json.loads(actual_metadata) | |||||
if "@xmlns" in actual_metadata: | if "@xmlns" in actual_metadata: | ||||
assert actual_metadata["@xmlns"] == ATOM_NS | assert actual_metadata["@xmlns"] == ATOM_NS | ||||
assert actual_metadata["@xmlns:codemeta"] in ( | assert actual_metadata["@xmlns:codemeta"] in ( | ||||
CODEMETA_NS, | CODEMETA_NS, | ||||
[CODEMETA_NS], | [CODEMETA_NS], | ||||
) | ) | ||||
else: | elif "{http://www.w3.org/2005/Atom}id" in actual_metadata: | ||||
assert "{http://www.w3.org/2005/Atom}id" in actual_metadata | |||||
assert ( | assert ( | ||||
"{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}author" | "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}author" | ||||
in actual_metadata | in actual_metadata | ||||
) | ) | ||||
else: | |||||
# new format introduced in | |||||
# https://forge.softwareheritage.org/D4065 | |||||
# it's the same as the first case, but with the @xmlns | |||||
# declarations stripped | |||||
assert "id" in actual_metadata | |||||
assert "codemeta:author" in actual_metadata | |||||
(origin, discovery_date) = handle_deposit_row( | (origin, discovery_date) = handle_deposit_row( | ||||
row, discovery_date, origin, storage, deposit_cur, dry_run | row, discovery_date, origin, storage, deposit_cur, dry_run | ||||
) | ) | ||||
del metadata["extrinsic"] | del metadata["extrinsic"] | ||||
else: | else: | ||||
assert False, f"unknown provider {provider}" | assert False, f"unknown provider {provider}" | ||||
▲ Show 20 Lines • Show All 369 Lines • Show Last 20 Lines |