diff --git a/swh/storage/migrate_extrinsic_metadata.py b/swh/storage/migrate_extrinsic_metadata.py --- a/swh/storage/migrate_extrinsic_metadata.py +++ b/swh/storage/migrate_extrinsic_metadata.py @@ -58,7 +58,15 @@ ATOM_KEYS = ["id", "author", "external_identifier", "title"] # columns of the revision table (of the storage DB) -REVISION_COLS = ["id", "date", "committer_date", "type", "message", "metadata"] +REVISION_COLS = [ + "id", + "directory", + "date", + "committer_date", + "type", + "message", + "metadata", +] # columns of the tables of the deposit DB DEPOSIT_COLS = [ @@ -391,6 +399,7 @@ def load_metadata( storage, revision_id, + directory_id, discovery_date: datetime.datetime, metadata: Dict[str, Any], format: str, @@ -399,16 +408,20 @@ dry_run: bool, ): """Does the actual loading to swh-storage.""" + directory_swhid = SWHID( + object_type="directory", object_id=hash_to_hex(directory_id) + ) revision_swhid = SWHID(object_type="revision", object_id=hash_to_hex(revision_id)) obj = RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=revision_swhid, + type=MetadataTargetType.DIRECTORY, + id=directory_swhid, discovery_date=discovery_date, authority=authority, fetcher=FETCHER, format=format, metadata=json.dumps(metadata).encode(), origin=origin, + revision=revision_swhid, ) if not dry_run: storage.raw_extrinsic_metadata_add([obj]) @@ -582,6 +595,7 @@ load_metadata( storage, row["id"], + row["directory"], date, metadata, format, @@ -642,6 +656,7 @@ load_metadata( storage, row["id"], + row["directory"], discovery_date, metadata["extrinsic"]["raw"], NPM_FORMAT, @@ -665,6 +680,7 @@ load_metadata( storage, row["id"], + row["directory"], discovery_date, metadata["extrinsic"]["raw"], PYPI_FORMAT, @@ -733,6 +749,7 @@ load_metadata( storage, row["id"], + row["directory"], discovery_date, metadata["extrinsic"]["raw"], NIXGUIX_FORMAT, @@ -837,6 +854,7 @@ load_metadata( storage, row["id"], + row["directory"], discovery_date, metadata["package"], NPM_FORMAT, @@ -953,6 +971,7 @@ load_metadata( storage, row["id"], + row["directory"], discovery_date, metadata["project"], PYPI_FORMAT, @@ -1048,6 +1067,7 @@ load_metadata( storage, row["id"], + row["directory"], discovery_date, metadata["original_artifact"], ORIGINAL_ARTIFACT_FORMAT, diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_cran.py b/swh/storage/tests/migrate_extrinsic_metadata/test_cran.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_cran.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_cran.py @@ -31,6 +31,9 @@ metadata={}, ) +DIRECTORY_ID = b"a" * 20 +DIRECTORY_SWHID = parse_swhid("swh:1:dir:" + DIRECTORY_ID.hex()) + def test_cran_package_from_url(): files = [ @@ -67,6 +70,7 @@ row = { "id": b"\x00\x03a\xaa3\x84,\xbd\xea_\xa6\xe7}\xb6\x96\xb97\xeb\xd2i", + "directory": DIRECTORY_ID, "date": datetime.datetime(2020, 5, 5, 0, 0, tzinfo=datetime.timezone.utc,), "committer_date": datetime.datetime( 2020, 5, 5, 0, 0, tzinfo=datetime.timezone.utc, @@ -114,10 +118,8 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:000361aa33842cbdea5fa6e77db696b937ebd269" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 5, 7, 15, 27, 38, 652281, tzinfo=datetime.timezone.utc, ), @@ -126,6 +128,9 @@ format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:000361aa33842cbdea5fa6e77db696b937ebd269" + ), ), ] ), @@ -158,6 +163,7 @@ row = { "id": b'\x00\x00\xd4\xef^\x16a"\xae\xe6\x86*\xd3\x8a\x18\xceS\x86\xcc>', + "directory": DIRECTORY_ID, "date": None, "committer_date": None, "type": "tar", @@ -201,10 +207,8 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:0000d4ef5e166122aee6862ad38a18ce5386cc3e" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 4, 30, 11, 1, 57, 832481, tzinfo=datetime.timezone.utc, ), @@ -213,6 +217,9 @@ format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:0000d4ef5e166122aee6862ad38a18ce5386cc3e" + ), ), ] ), @@ -234,6 +241,7 @@ row = { "id": b'."7\x82\xeeK\xa1R\xe4\xc8\x86\xf7\x97\x97bA\xc3\x9a\x9a\xab', + "directory": DIRECTORY_ID, "date": None, "committer_date": None, "type": "tar", @@ -277,10 +285,8 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:2e223782ee4ba152e4c886f797976241c39a9aab" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 9, 25, 14, 4, 20, 926667, tzinfo=datetime.timezone.utc, ), @@ -289,6 +295,9 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:2e223782ee4ba152e4c886f797976241c39a9aab" + ), ), ] ), diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_debian.py b/swh/storage/tests/migrate_extrinsic_metadata/test_debian.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_debian.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_debian.py @@ -47,6 +47,9 @@ metadata={}, ) +DIRECTORY_ID = b"a" * 20 +DIRECTORY_SWHID = parse_swhid("swh:1:dir:" + DIRECTORY_ID.hex()) + def now(): return datetime.datetime.now(tz=datetime.timezone.utc) @@ -112,6 +115,7 @@ revision_row = { "id": b"\x00\x00\x03l1\x1e\xf3:(\x1b\x05h\x8fn\xad\xcf\xc0\x94:\xee", + "directory": DIRECTORY_ID, "metadata": { # ... "original_artifact": [ @@ -141,6 +145,7 @@ revision_row = { "id": b"\x00\x00\x03l1\x1e\xf3:(\x1b\x05h\x8fn\xad\xcf\xc0\x94:\xee", + "directory": DIRECTORY_ID, "metadata": {"original_artifact": [{"filename": "kalgebra_19.12.1-1.dsc",},]}, } @@ -243,6 +248,7 @@ revision_row = { "id": b"\x00\x00\x03l1\x1e\xf3:(\x1b\x05h\x8fn\xad\xcf\xc0\x94:\xee", + "directory": DIRECTORY_ID, "metadata": {"original_artifact": [{"filename": "kalgebra_19.12.1-1.dsc",},]}, } @@ -358,6 +364,7 @@ row = { "id": b"\x00\x00\x03l1\x1e\xf3:(\x1b\x05h\x8fn\xad\xcf\xc0\x94:\xee", + "directory": DIRECTORY_ID, "date": datetime.datetime( 2020, 1, 26, 22, 3, 24, tzinfo=datetime.timezone.utc, ), @@ -430,10 +437,8 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:0000036c311ef33a281b05688f6eadcfc0943aee" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 1, 26, 22, 3, 24, tzinfo=datetime.timezone.utc, ), @@ -442,6 +447,9 @@ format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:0000036c311ef33a281b05688f6eadcfc0943aee" + ), ), ] ), @@ -505,6 +513,7 @@ row = { "id": b"\x00\x00\x01\xc2\x8c\x8f\xca\x01\xb9\x04\xde\x92\xa2d\n\x86l\xe0<\xb7", + "directory": DIRECTORY_ID, "date": datetime.datetime( 2011, 3, 31, 20, 17, 41, tzinfo=datetime.timezone.utc ), @@ -545,10 +554,8 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:000001c28c8fca01b904de92a2640a866ce03cb7" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2011, 3, 31, 20, 17, 41, tzinfo=datetime.timezone.utc ), @@ -557,6 +564,9 @@ format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:000001c28c8fca01b904de92a2640a866ce03cb7" + ), ), ] ) diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py b/swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py @@ -50,6 +50,9 @@ metadata={}, ) +DIRECTORY_ID = b"a" * 20 +DIRECTORY_SWHID = parse_swhid("swh:1:dir:" + DIRECTORY_ID.hex()) + def get_mock_deposit_cur(row_dicts): rows = [tuple(d[key] for key in DEPOSIT_COLS) for d in row_dicts] @@ -91,6 +94,7 @@ row = { "id": b"\x02#\x10\xdf\x16\xfd\x9eMO\x81\xfe6\xa1B\xe8-\xb9w\xc0\x1d", + "directory": DIRECTORY_ID, "date": datetime.datetime(2018, 1, 5, 0, 0, tzinfo=datetime.timezone.utc), "committer_date": datetime.datetime( 2018, 1, 5, 0, 0, tzinfo=datetime.timezone.utc @@ -182,10 +186,8 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 3, 11, 11, 7, 18, 688410, tzinfo=datetime.timezone.utc ), @@ -194,16 +196,17 @@ format="sword-v2-atom-codemeta-v2-in-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d" + ), ), ] ), call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 3, 11, 11, 11, 36, 336283, tzinfo=datetime.timezone.utc ), @@ -212,6 +215,9 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d" + ), ), ] ), @@ -251,6 +257,7 @@ row = { "id": b"\x01\x16\xca\xb7\x19d\xd5\x9c\x85p\xb4\xc5r\x9b(\xbd\xd6<\x9bF", + "directory": DIRECTORY_ID, "date": datetime.datetime( 2018, 1, 17, 12, 54, 0, 723882, tzinfo=datetime.timezone.utc ), @@ -345,10 +352,8 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2018, 1, 17, 12, 54, 0, 413748, tzinfo=datetime.timezone.utc ), @@ -357,16 +362,17 @@ format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46" + ), ), ] ), call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 5, 15, 14, 27, 21, 462270, tzinfo=datetime.timezone.utc ), @@ -375,6 +381,9 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46" + ), ), ] ), @@ -414,6 +423,7 @@ row = { "id": b'\x01"\x96nP\x93\x17\xae\xcejA\xd0\xf0\x88\xdas<\xc0\x9d\x0f', + "directory": DIRECTORY_ID, "date": datetime.datetime(2018, 1, 5, 0, 0, tzinfo=datetime.timezone.utc), "committer_date": datetime.datetime( 2018, 1, 5, 0, 0, tzinfo=datetime.timezone.utc @@ -507,10 +517,8 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 6, 26, 13, 50, 8, 216113, tzinfo=datetime.timezone.utc ), @@ -519,16 +527,17 @@ format="sword-v2-atom-codemeta-v2-in-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f" + ), ), ] ), call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 6, 26, 13, 50, 22, 640625, tzinfo=datetime.timezone.utc ), @@ -537,6 +546,9 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f" + ), ), ] ), @@ -577,6 +589,7 @@ row = { "id": b"J\x9dc{\xa5\x07\xa2\xb93e%\x04(\xe6\xe3\xf0!\xf1\x94\xd0", + "directory": DIRECTORY_ID, "date": datetime.datetime(2016, 1, 29, 0, 0, tzinfo=datetime.timezone.utc), "committer_date": datetime.datetime( 2020, 10, 8, 0, 0, tzinfo=datetime.timezone.utc @@ -668,10 +681,8 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 10, 9, 13, 38, 7, 394544, tzinfo=datetime.timezone.utc ), @@ -680,16 +691,17 @@ format="sword-v2-atom-codemeta-v2-in-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0" + ), ), ] ), call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 10, 9, 13, 38, 25, 888646, tzinfo=datetime.timezone.utc ), @@ -698,6 +710,9 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0" + ), ), ] ), @@ -746,6 +761,7 @@ row = { "id": b"\t5`S\xc4\x9a\xd0\xf9\xe6.Q\xc2\x9d>a|y\x11@\xdf", + "directory": DIRECTORY_ID, "date": datetime.datetime(2019, 5, 14, 0, 0, tzinfo=datetime.timezone.utc), "committer_date": datetime.datetime( 2019, 5, 14, 0, 0, tzinfo=datetime.timezone.utc @@ -840,10 +856,8 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 5, 14, 7, 49, 36, 775072, tzinfo=datetime.timezone.utc ), @@ -852,16 +866,17 @@ format="sword-v2-atom-codemeta-v2-in-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df" + ), ), ] ), call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 5, 14, 7, 28, 33, 210100, tzinfo=datetime.timezone.utc ), @@ -870,16 +885,17 @@ format="sword-v2-atom-codemeta-v2-in-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df" + ), ), ] ), call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 5, 14, 7, 49, 36, 775072, tzinfo=datetime.timezone.utc ), @@ -888,6 +904,9 @@ format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df" + ), ), ] ), @@ -914,6 +933,7 @@ row = { "id": b"\x91\xe5\xca\x8b'K\xf1\xa8cFd2\xd7Q\xf7A\xbc\x94\xba&", + "directory": DIRECTORY_ID, "date": datetime.datetime(2017, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "committer_date": datetime.datetime( 2019, 11, 6, 14, 47, 30, tzinfo=datetime.timezone.utc @@ -946,6 +966,7 @@ row = { "id": b"\x8e\x9c\xee\x14\xa6\xad9\xbc\xa44pw\xb8\x7f\xb5\xbb\xd8\x95;\xb1", + "directory": DIRECTORY_ID, "date": datetime.datetime( 2018, 7, 23, 12, 25, 45, 907132, tzinfo=datetime.timezone.utc ), @@ -986,6 +1007,7 @@ row = { "id": b"\x03\x98\x7f\x05n\xafE\x96\xcd \xd7\xb2\xee\x01\xc9\xb8L\xed\xdf\xa8", + "directory": DIRECTORY_ID, "date": datetime.datetime( 2018, 1, 17, 12, 49, 30, 902891, tzinfo=datetime.timezone.utc ), @@ -1053,10 +1075,8 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:03987f056eaf4596cd20d7b2ee01c9b84ceddfa8" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2018, 1, 17, 12, 49, 30, 645576, tzinfo=datetime.timezone.utc ), @@ -1065,6 +1085,9 @@ format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:03987f056eaf4596cd20d7b2ee01c9b84ceddfa8" + ), ), ] ), @@ -1092,6 +1115,7 @@ row = { "id": b"-{\xcec\x1f\xc7\x91\x08\x03\x11\xeb\x83\\GB\x8eXjn\xa4", + "directory": DIRECTORY_ID, "date": datetime.datetime( 2018, 1, 10, 13, 14, 51, 77033, tzinfo=datetime.timezone.utc ), @@ -1159,10 +1183,8 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:2d7bce631fc791080311eb835c47428e586a6ea4" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2018, 1, 10, 13, 14, 50, 555143, tzinfo=datetime.timezone.utc ), @@ -1171,6 +1193,9 @@ format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:2d7bce631fc791080311eb835c47428e586a6ea4" + ), ), ] ), @@ -1221,6 +1246,7 @@ row = { "id": b"\x03@v\xf3\xf4\x1e\xe1 N\xb9\xf6@\x82\xcb\xe6\xe9P\xd7\xbb\x8a", + "directory": DIRECTORY_ID, "date": datetime.datetime( 2019, 2, 25, 15, 49, 16, 594536, tzinfo=datetime.timezone.utc ), @@ -1290,10 +1316,8 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 2, 25, 15, 49, 12, 302745, tzinfo=datetime.timezone.utc ), @@ -1302,16 +1326,17 @@ format="sword-v2-atom-codemeta-v2-in-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a" + ), ), ] ), call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 2, 25, 15, 54, 30, 102072, tzinfo=datetime.timezone.utc ), @@ -1320,6 +1345,9 @@ format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a" + ), ), ] ), diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py b/swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py @@ -31,6 +31,9 @@ metadata={}, ) +DIRECTORY_ID = b"a" * 20 +DIRECTORY_SWHID = parse_swhid("swh:1:dir:" + DIRECTORY_ID.hex()) + def test_gnu(): original_artifacts = [ @@ -48,6 +51,7 @@ row = { "id": b"\x00\x1cqE\x8e@[%\xba\xcc\xc8\x0b\x99\xf6cM\xff\x9d+\x18", + "directory": DIRECTORY_ID, "date": datetime.datetime(2003, 6, 13, 0, 11, tzinfo=datetime.timezone.utc), "committer_date": datetime.datetime( 2003, 6, 13, 0, 11, tzinfo=datetime.timezone.utc @@ -88,10 +92,8 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:001c71458e405b25baccc80b99f6634dff9d2b18" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 11, 27, 11, 17, 38, 318997, tzinfo=datetime.timezone.utc ), @@ -100,6 +102,9 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:001c71458e405b25baccc80b99f6634dff9d2b18" + ), ), ] ), diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py b/swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py @@ -36,6 +36,9 @@ metadata={}, ) +DIRECTORY_ID = b"a" * 20 +DIRECTORY_SWHID = parse_swhid("swh:1:dir:" + DIRECTORY_ID.hex()) + def test_nixguix(): extrinsic_metadata = { @@ -55,6 +58,7 @@ row = { "id": b"\x00\x01\xbaM\xd0S\x94\x85\x02\x11\xd7\xb3\x85M\x99\x13\xd2:\xe3y", + "directory": DIRECTORY_ID, "date": None, "committer_date": None, "type": "tar", @@ -86,10 +90,8 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:0001ba4dd05394850211d7b3854d9913d23ae379" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 6, 3, 11, 25, 5, 259341, tzinfo=datetime.timezone.utc ), @@ -98,16 +100,17 @@ format="nixguix-sources-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:0001ba4dd05394850211d7b3854d9913d23ae379" + ), ), ] ), call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:0001ba4dd05394850211d7b3854d9913d23ae379" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 6, 3, 11, 25, 5, 259341, tzinfo=datetime.timezone.utc ), @@ -116,6 +119,9 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:0001ba4dd05394850211d7b3854d9913d23ae379" + ), ), ] ), diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_npm.py b/swh/storage/tests/migrate_extrinsic_metadata/test_npm.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_npm.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_npm.py @@ -37,6 +37,9 @@ metadata={}, ) +DIRECTORY_ID = b"a" * 20 +DIRECTORY_SWHID = parse_swhid("swh:1:dir:" + DIRECTORY_ID.hex()) + def test_npm_package_from_source_url(): package_urls = [ @@ -101,6 +104,7 @@ row = { "id": b"\x00\x00\x02\xa4\x9b\xba\x17\xca\x8c\xf3\x7f_=\x16\xaa\xac\xf9S`\xfc", + "directory": DIRECTORY_ID, "date": datetime.datetime(2020, 2, 22, 6, 5, 45, tzinfo=datetime.timezone.utc), "committer_date": datetime.datetime( 2020, 2, 22, 6, 5, 45, tzinfo=datetime.timezone.utc @@ -138,10 +142,8 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:000002a49bba17ca8cf37f5f3d16aaacf95360fc" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 2, 27, 1, 35, 47, 965375, tzinfo=datetime.timezone.utc, ), @@ -150,16 +152,17 @@ format="replicate-npm-package-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:000002a49bba17ca8cf37f5f3d16aaacf95360fc" + ), ), ] ), call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:000002a49bba17ca8cf37f5f3d16aaacf95360fc" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 2, 27, 1, 35, 47, 965375, tzinfo=datetime.timezone.utc, ), @@ -168,6 +171,9 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:000002a49bba17ca8cf37f5f3d16aaacf95360fc" + ), ), ] ), @@ -212,6 +218,7 @@ row = { "id": b"\x00\x00\x04\xae\xed\t\xee\x08\x9cx\x12d\xc0M%d\xfdX\xfe\xb5", + "directory": DIRECTORY_ID, "date": datetime.datetime( 2016, 12, 23, 7, 21, 29, tzinfo=datetime.timezone.utc ), @@ -240,10 +247,8 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:000004aeed09ee089c781264c04d2564fd58feb5" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2016, 12, 23, 7, 21, 29, tzinfo=datetime.timezone.utc, ), @@ -252,16 +257,17 @@ format="replicate-npm-package-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:000004aeed09ee089c781264c04d2564fd58feb5" + ), ), ] ), call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:000004aeed09ee089c781264c04d2564fd58feb5" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2016, 12, 23, 7, 21, 29, tzinfo=datetime.timezone.utc, ), @@ -270,6 +276,9 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:000004aeed09ee089c781264c04d2564fd58feb5" + ), ), ] ), @@ -312,6 +321,7 @@ row = { "id": b"\x00\x00 \x19\xc5wXt\xbc\xed\x00zR\x9b\xd3\xb7\x8b\xf6\x04W", + "directory": DIRECTORY_ID, "date": datetime.datetime(2019, 6, 7, 19, 56, 4, tzinfo=datetime.timezone.utc), "committer_date": datetime.datetime( 2019, 6, 7, 19, 56, 4, tzinfo=datetime.timezone.utc @@ -338,10 +348,8 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:00002019c5775874bced007a529bd3b78bf60457" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 6, 7, 19, 56, 4, tzinfo=datetime.timezone.utc, ), @@ -350,16 +358,17 @@ format="replicate-npm-package-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:00002019c5775874bced007a529bd3b78bf60457" + ), ), ] ), call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:00002019c5775874bced007a529bd3b78bf60457" - ), + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 6, 7, 19, 56, 4, tzinfo=datetime.timezone.utc, ), @@ -368,6 +377,9 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, + revision=parse_swhid( + "swh:1:rev:00002019c5775874bced007a529bd3b78bf60457" + ), ), ] ), diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py b/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py @@ -45,6 +45,9 @@ type=MetadataAuthorityType.REGISTRY, url="https://softwareheritage.org/", ) +DIRECTORY_ID = b"a" * 20 +DIRECTORY_SWHID = parse_swhid("swh:1:dir:" + DIRECTORY_ID.hex()) + def now(): return datetime.datetime.now(tz=datetime.timezone.utc) @@ -231,6 +234,7 @@ row = { "id": b"\x00\x00\x07a{S\xe7\xb1E\x8fi]\xd0}\xe4\xceU\xaf\x15\x17", + "directory": DIRECTORY_ID, "date": datetime.datetime( 2019, 11, 11, 6, 21, 20, tzinfo=datetime.timezone.utc, ), @@ -276,12 +280,12 @@ revision_swhid = parse_swhid("swh:1:rev:000007617b53e7b1458f695dd07de4ce55af1517") assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.REVISION, revision_swhid, authority=PYPI_AUTHORITY, + MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=revision_swhid, + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc, ), @@ -290,17 +294,18 @@ format="pypi-project-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, + revision=revision_swhid, ), ], next_page_token=None, ) assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.REVISION, revision_swhid, authority=SWH_AUTHORITY, + MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=revision_swhid, + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc, ), @@ -309,6 +314,7 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, + revision=revision_swhid, ), ], next_page_token=None, @@ -364,6 +370,7 @@ row = { "id": b"\x00\x00\x04\xd68,J\xd4\xc0Q\x92fbl6U\x1f\x0eQ\xca", + "directory": DIRECTORY_ID, "date": datetime.datetime( 2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc ), @@ -396,12 +403,12 @@ revision_swhid = parse_swhid("swh:1:rev:000004d6382c4ad4c0519266626c36551f0e51ca") assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.REVISION, revision_swhid, authority=PYPI_AUTHORITY, + MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=revision_swhid, + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc, ), @@ -410,17 +417,18 @@ format="pypi-project-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=None, + revision=revision_swhid, ), ], next_page_token=None, ) assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.REVISION, revision_swhid, authority=SWH_AUTHORITY, + MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=revision_swhid, + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc, ), @@ -429,6 +437,7 @@ format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), origin=None, + revision=revision_swhid, ), ], next_page_token=None, @@ -473,6 +482,7 @@ row = { "id": b"N\xa9\x91|\xdfS\xcd\x13SJ\x04.N\xb3x{\x86\xc84\xd2", + "directory": DIRECTORY_ID, "date": datetime.datetime(2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc), "committer_date": datetime.datetime( 2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc @@ -500,15 +510,15 @@ revision_swhid = parse_swhid("swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2") assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.REVISION, revision_swhid, authority=PYPI_AUTHORITY, + MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY, ) == PagedResult(results=[], next_page_token=None,) assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.REVISION, revision_swhid, authority=SWH_AUTHORITY, + MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=revision_swhid, + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc, ), @@ -517,6 +527,7 @@ format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), origin=None, + revision=revision_swhid, ), ], next_page_token=None, @@ -556,6 +567,7 @@ revision_id = b"N\xa9\x91|\xdfS\xcd\x13SJ\x04.N\xb3x{\x86\xc84\xd2" row = { "id": revision_id, + "directory": DIRECTORY_ID, "date": datetime.datetime(2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc), "committer_date": datetime.datetime( 2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc @@ -610,15 +622,15 @@ revision_swhid = parse_swhid("swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2") assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.REVISION, revision_swhid, authority=PYPI_AUTHORITY, + MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY, ) == PagedResult(results=[], next_page_token=None,) assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.REVISION, revision_swhid, authority=SWH_AUTHORITY, + MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=revision_swhid, + type=MetadataTargetType.DIRECTORY, + id=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc, ), @@ -627,6 +639,7 @@ format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), origin=origin_url, + revision=revision_swhid, ), ], next_page_token=None,