Page MenuHomeSoftware Heritage

D7941.id.diff
No OneTemporary

D7941.id.diff

diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -32,10 +32,13 @@
OriginIntrinsicMetadataRow,
)
from swh.model import hashutil
-from swh.model.model import Directory, Origin, Sha1Git
-from swh.model.swhids import ObjectType
+from swh.model.model import Directory
+from swh.model.model import ObjectType as ModelObjectType
+from swh.model.model import Origin, Sha1Git
+from swh.model.swhids import CoreSWHID, ObjectType
REVISION_GET_BATCH_SIZE = 10
+RELEASE_GET_BATCH_SIZE = 10
ORIGIN_GET_BATCH_SIZE = 10
@@ -329,7 +332,8 @@
self, origins: List[Origin], check_origin_known: bool = True, **kwargs
) -> List[Tuple[OriginIntrinsicMetadataRow, DirectoryIntrinsicMetadataRow]]:
head_rev_ids = []
- origins_with_head = []
+ head_rel_ids = []
+ origin_heads: Dict[Origin, CoreSWHID] = {}
# Filter out origins not in the storage
if check_origin_known:
@@ -348,25 +352,63 @@
continue
head_swhid = get_head_swhid(self.storage, origin.url)
if head_swhid:
- # TODO: add support for releases
- assert head_swhid.object_type == ObjectType.REVISION, head_swhid
- origins_with_head.append(origin)
- head_rev_ids.append(head_swhid.object_id)
-
- head_revs = list(
- call_with_batches(
- self.storage.revision_get, head_rev_ids, REVISION_GET_BATCH_SIZE
+ origin_heads[origin] = head_swhid
+ if head_swhid.object_type == ObjectType.REVISION:
+ head_rev_ids.append(head_swhid.object_id)
+ elif head_swhid.object_type == ObjectType.RELEASE:
+ head_rel_ids.append(head_swhid.object_id)
+ else:
+ assert False, head_swhid
+
+ head_revs = dict(
+ zip(
+ head_rev_ids,
+ call_with_batches(
+ self.storage.revision_get, head_rev_ids, REVISION_GET_BATCH_SIZE
+ ),
+ )
+ )
+ head_rels = dict(
+ zip(
+ head_rel_ids,
+ call_with_batches(
+ self.storage.release_get, head_rel_ids, RELEASE_GET_BATCH_SIZE
+ ),
)
)
- assert len(head_revs) == len(head_rev_ids)
results = []
- for (origin, rev) in zip(origins_with_head, head_revs):
- if not rev:
- self.log.warning("Missing head revision of origin %r", origin.url)
- continue
+ for (origin, head_swhid) in origin_heads.items():
+ if head_swhid.object_type == ObjectType.REVISION:
+ rev = head_revs[head_swhid.object_id]
+ if not rev:
+ self.log.warning(
+ "Missing head object %s of origin %r", head_swhid, origin.url
+ )
+ continue
+ directory_id = rev.directory
+ elif head_swhid.object_type == ObjectType.RELEASE:
+ rel = head_rels[head_swhid.object_id]
+ if not rel:
+ self.log.warning(
+ "Missing head object %s of origin %r", head_swhid, origin.url
+ )
+ continue
+ if rel.target_type != ModelObjectType.DIRECTORY:
+ # TODO
+ self.log.warning(
+ "Head release %s of %r has unexpected target type %s",
+ head_swhid,
+ origin.url,
+ rel.target_type,
+ )
+ continue
+ assert rel.target, rel
+ directory_id = rel.target
+ else:
+ assert False, head_swhid
- for dir_metadata in self.directory_metadata_indexer.index(rev.directory):
+ for dir_metadata in self.directory_metadata_indexer.index(directory_id):
# There is at most one dir_metadata
orig_metadata = OriginIntrinsicMetadataRow(
from_directory=dir_metadata.id,
diff --git a/swh/indexer/origin_head.py b/swh/indexer/origin_head.py
--- a/swh/indexer/origin_head.py
+++ b/swh/indexer/origin_head.py
@@ -113,7 +113,7 @@
elif branch.target_type == TargetType.DIRECTORY:
return None # TODO
elif branch.target_type == TargetType.RELEASE:
- return None # TODO
+ return CoreSWHID(object_type=ObjectType.RELEASE, object_id=branch.target)
else:
assert False, branch
except KeyError:
diff --git a/swh/indexer/tests/test_origin_head.py b/swh/indexer/tests/test_origin_head.py
--- a/swh/indexer/tests/test_origin_head.py
+++ b/swh/indexer/tests/test_origin_head.py
@@ -139,11 +139,16 @@
def test_pypi(storage):
- origin_url = "https://pypi.org/project/limnoria/"
+ origin_url = "https://old-pypi.example.org/project/limnoria/"
assert get_head_swhid(storage, origin_url) == CoreSWHID.from_string(
"swh:1:rev:83b9b6c705b125d0fe6dd86b41109dc5fa32f874"
)
+ origin_url = "https://pypi.org/project/limnoria/"
+ assert get_head_swhid(storage, origin_url) == CoreSWHID.from_string(
+ "swh:1:rel:83b9b6c705b125d0fe6dd86b41109dc5fa32f874"
+ )
+
def test_svn(storage):
origin_url = "http://0-512-md.googlecode.com/svn/"
diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py
--- a/swh/indexer/tests/test_origin_metadata.py
+++ b/swh/indexer/tests/test_origin_metadata.py
@@ -29,7 +29,47 @@
return cfg
-def test_origin_metadata_indexer(
+def test_origin_metadata_indexer_release(
+ swh_indexer_config,
+ idx_storage: IndexerStorageInterface,
+ storage: StorageInterface,
+ obj_storage,
+) -> None:
+ indexer = OriginMetadataIndexer(config=swh_indexer_config)
+ origin = "https://npm.example.org/yarn-parser"
+ indexer.run([origin])
+
+ tool = swh_indexer_config["tools"]
+
+ dir_id = DIRECTORY2.id
+ dir_metadata = DirectoryIntrinsicMetadataRow(
+ id=dir_id,
+ tool=tool,
+ metadata=YARN_PARSER_METADATA,
+ mappings=["npm"],
+ )
+ origin_metadata = OriginIntrinsicMetadataRow(
+ id=origin,
+ tool=tool,
+ from_directory=dir_id,
+ metadata=YARN_PARSER_METADATA,
+ mappings=["npm"],
+ )
+
+ dir_results = list(idx_storage.directory_intrinsic_metadata_get([dir_id]))
+ for dir_result in dir_results:
+ assert dir_result.tool
+ del dir_result.tool["id"]
+ assert dir_results == [dir_metadata]
+
+ orig_results = list(idx_storage.origin_intrinsic_metadata_get([origin]))
+ for orig_result in orig_results:
+ assert orig_result.tool
+ del orig_result.tool["id"]
+ assert orig_results == [origin_metadata]
+
+
+def test_origin_metadata_indexer_revision(
swh_indexer_config,
idx_storage: IndexerStorageInterface,
storage: StorageInterface,
diff --git a/swh/indexer/tests/utils.py b/swh/indexer/tests/utils.py
--- a/swh/indexer/tests/utils.py
+++ b/swh/indexer/tests/utils.py
@@ -19,10 +19,12 @@
Content,
Directory,
DirectoryEntry,
+ ObjectType,
Origin,
OriginVisit,
OriginVisitStatus,
Person,
+ Release,
Revision,
RevisionType,
Snapshot,
@@ -46,10 +48,15 @@
"type": "deposit",
"origin": "https://forge.softwareheritage.org/source/jesuisgpl/",
},
- {"type": "pypi", "origin": "https://pypi.org/project/limnoria/"},
+ {
+ "type": "pypi",
+ "origin": "https://old-pypi.example.org/project/limnoria/",
+ }, # with rev head
+ {"type": "pypi", "origin": "https://pypi.org/project/limnoria/"}, # with rel head
{"type": "svn", "origin": "http://0-512-md.googlecode.com/svn/"},
{"type": "git", "origin": "https://github.com/librariesio/yarn-parser"},
{"type": "git", "origin": "https://github.com/librariesio/yarn-parser.git"},
+ {"type": "git", "origin": "https://npm.example.org/yarn-parser"},
]
ORIGINS = [Origin(url=visit["origin"]) for visit in ORIGIN_VISITS]
@@ -120,7 +127,26 @@
REVISIONS = [REVISION]
+RELEASE = Release(
+ name=b"v0.0.0",
+ message=None,
+ author=Person(
+ name=b"Andrew Nesbitt",
+ fullname=b"Andrew Nesbitt <andrewnez@gmail.com>",
+ email=b"andrewnez@gmail.com",
+ ),
+ synthetic=False,
+ date=TimestampWithTimezone.from_datetime(
+ datetime.datetime(2017, 2, 20, 16, 14, 16, tzinfo=_utc_plus_2)
+ ),
+ target_type=ObjectType.DIRECTORY,
+ target=DIRECTORY2.id,
+)
+
+RELEASES = [RELEASE]
+
SNAPSHOTS = [
+ # https://github.com/SoftwareHeritage/swh-storage
Snapshot(
id=hash_to_bytes("a50fde72265343b7d28cecf6db20d98a81d21965"),
branches={
@@ -141,6 +167,7 @@
),
},
),
+ # rsync://ftp.gnu.org/gnu/3dldf
Snapshot(
id=hash_to_bytes("2c67f69a416bca4e1f3fcd848c588fab88ad0642"),
branches={
@@ -166,6 +193,7 @@
),
},
),
+ # https://forge.softwareheritage.org/source/jesuisgpl/",
Snapshot(
id=hash_to_bytes("68c0d26104d47e278dd6be07ed61fafb561d0d20"),
branches={
@@ -175,6 +203,7 @@
)
},
),
+ # https://old-pypi.example.org/project/limnoria/
Snapshot(
id=hash_to_bytes("f255245269e15fc99d284affd79f766668de0b67"),
branches={
@@ -191,6 +220,23 @@
),
},
),
+ # https://pypi.org/project/limnoria/
+ Snapshot(
+ branches={
+ b"HEAD": SnapshotBranch(
+ target=b"releases/2018.09.09", target_type=TargetType.ALIAS
+ ),
+ b"releases/2018.09.01": SnapshotBranch(
+ target=b"<\xee1(\xe8\x8d_\xc1\xc9\xa6rT\xf1\x1d\xbb\xdfF\xfdw\xcf",
+ target_type=TargetType.RELEASE,
+ ),
+ b"releases/2018.09.09": SnapshotBranch(
+ target=b"\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8kA\x10\x9d\xc5\xfa2\xf8t", # noqa
+ target_type=TargetType.RELEASE,
+ ),
+ },
+ ),
+ # http://0-512-md.googlecode.com/svn/
Snapshot(
id=hash_to_bytes("a1a28c0ab387a8f9e0618cb705eab81fc448f473"),
branches={
@@ -200,6 +246,7 @@
)
},
),
+ # https://github.com/librariesio/yarn-parser
Snapshot(
id=hash_to_bytes("bb4fd3a836930ce629d912864319637040ff3040"),
branches={
@@ -209,6 +256,7 @@
)
},
),
+ # https://github.com/librariesio/yarn-parser.git
Snapshot(
id=hash_to_bytes("bb4fd3a836930ce629d912864319637040ff3040"),
branches={
@@ -218,8 +266,19 @@
)
},
),
+ # https://npm.example.org/yarn-parser
+ Snapshot(
+ branches={
+ b"HEAD": SnapshotBranch(
+ target=RELEASE.id,
+ target_type=TargetType.RELEASE,
+ )
+ },
+ ),
]
+assert len(SNAPSHOTS) == len(ORIGIN_VISITS)
+
SHA1_TO_LICENSES = {
"01c9379dfc33803963d07c1ccc748d3fe4c96bb5": ["GPL"],
@@ -562,6 +621,7 @@
storage.origin_add(ORIGINS)
storage.directory_add([DIRECTORY, DIRECTORY2])
storage.revision_add(REVISIONS)
+ storage.release_add(RELEASES)
storage.snapshot_add(SNAPSHOTS)
for visit, snapshot in zip(ORIGIN_VISITS, SNAPSHOTS):

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 8:32 AM (11 w, 18 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216465

Event Timeline