Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066426
D7941.id.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
11 KB
Subscribers
None
D7941.id.diff
View Options
diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -32,10 +32,13 @@
OriginIntrinsicMetadataRow,
)
from swh.model import hashutil
-from swh.model.model import Directory, Origin, Sha1Git
-from swh.model.swhids import ObjectType
+from swh.model.model import Directory
+from swh.model.model import ObjectType as ModelObjectType
+from swh.model.model import Origin, Sha1Git
+from swh.model.swhids import CoreSWHID, ObjectType
REVISION_GET_BATCH_SIZE = 10
+RELEASE_GET_BATCH_SIZE = 10
ORIGIN_GET_BATCH_SIZE = 10
@@ -329,7 +332,8 @@
self, origins: List[Origin], check_origin_known: bool = True, **kwargs
) -> List[Tuple[OriginIntrinsicMetadataRow, DirectoryIntrinsicMetadataRow]]:
head_rev_ids = []
- origins_with_head = []
+ head_rel_ids = []
+ origin_heads: Dict[Origin, CoreSWHID] = {}
# Filter out origins not in the storage
if check_origin_known:
@@ -348,25 +352,63 @@
continue
head_swhid = get_head_swhid(self.storage, origin.url)
if head_swhid:
- # TODO: add support for releases
- assert head_swhid.object_type == ObjectType.REVISION, head_swhid
- origins_with_head.append(origin)
- head_rev_ids.append(head_swhid.object_id)
-
- head_revs = list(
- call_with_batches(
- self.storage.revision_get, head_rev_ids, REVISION_GET_BATCH_SIZE
+ origin_heads[origin] = head_swhid
+ if head_swhid.object_type == ObjectType.REVISION:
+ head_rev_ids.append(head_swhid.object_id)
+ elif head_swhid.object_type == ObjectType.RELEASE:
+ head_rel_ids.append(head_swhid.object_id)
+ else:
+ assert False, head_swhid
+
+ head_revs = dict(
+ zip(
+ head_rev_ids,
+ call_with_batches(
+ self.storage.revision_get, head_rev_ids, REVISION_GET_BATCH_SIZE
+ ),
+ )
+ )
+ head_rels = dict(
+ zip(
+ head_rel_ids,
+ call_with_batches(
+ self.storage.release_get, head_rel_ids, RELEASE_GET_BATCH_SIZE
+ ),
)
)
- assert len(head_revs) == len(head_rev_ids)
results = []
- for (origin, rev) in zip(origins_with_head, head_revs):
- if not rev:
- self.log.warning("Missing head revision of origin %r", origin.url)
- continue
+ for (origin, head_swhid) in origin_heads.items():
+ if head_swhid.object_type == ObjectType.REVISION:
+ rev = head_revs[head_swhid.object_id]
+ if not rev:
+ self.log.warning(
+ "Missing head object %s of origin %r", head_swhid, origin.url
+ )
+ continue
+ directory_id = rev.directory
+ elif head_swhid.object_type == ObjectType.RELEASE:
+ rel = head_rels[head_swhid.object_id]
+ if not rel:
+ self.log.warning(
+ "Missing head object %s of origin %r", head_swhid, origin.url
+ )
+ continue
+ if rel.target_type != ModelObjectType.DIRECTORY:
+ # TODO
+ self.log.warning(
+ "Head release %s of %r has unexpected target type %s",
+ head_swhid,
+ origin.url,
+ rel.target_type,
+ )
+ continue
+ assert rel.target, rel
+ directory_id = rel.target
+ else:
+ assert False, head_swhid
- for dir_metadata in self.directory_metadata_indexer.index(rev.directory):
+ for dir_metadata in self.directory_metadata_indexer.index(directory_id):
# There is at most one dir_metadata
orig_metadata = OriginIntrinsicMetadataRow(
from_directory=dir_metadata.id,
diff --git a/swh/indexer/origin_head.py b/swh/indexer/origin_head.py
--- a/swh/indexer/origin_head.py
+++ b/swh/indexer/origin_head.py
@@ -113,7 +113,7 @@
elif branch.target_type == TargetType.DIRECTORY:
return None # TODO
elif branch.target_type == TargetType.RELEASE:
- return None # TODO
+ return CoreSWHID(object_type=ObjectType.RELEASE, object_id=branch.target)
else:
assert False, branch
except KeyError:
diff --git a/swh/indexer/tests/test_origin_head.py b/swh/indexer/tests/test_origin_head.py
--- a/swh/indexer/tests/test_origin_head.py
+++ b/swh/indexer/tests/test_origin_head.py
@@ -139,11 +139,16 @@
def test_pypi(storage):
- origin_url = "https://pypi.org/project/limnoria/"
+ origin_url = "https://old-pypi.example.org/project/limnoria/"
assert get_head_swhid(storage, origin_url) == CoreSWHID.from_string(
"swh:1:rev:83b9b6c705b125d0fe6dd86b41109dc5fa32f874"
)
+ origin_url = "https://pypi.org/project/limnoria/"
+ assert get_head_swhid(storage, origin_url) == CoreSWHID.from_string(
+ "swh:1:rel:83b9b6c705b125d0fe6dd86b41109dc5fa32f874"
+ )
+
def test_svn(storage):
origin_url = "http://0-512-md.googlecode.com/svn/"
diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py
--- a/swh/indexer/tests/test_origin_metadata.py
+++ b/swh/indexer/tests/test_origin_metadata.py
@@ -29,7 +29,47 @@
return cfg
-def test_origin_metadata_indexer(
+def test_origin_metadata_indexer_release(
+ swh_indexer_config,
+ idx_storage: IndexerStorageInterface,
+ storage: StorageInterface,
+ obj_storage,
+) -> None:
+ indexer = OriginMetadataIndexer(config=swh_indexer_config)
+ origin = "https://npm.example.org/yarn-parser"
+ indexer.run([origin])
+
+ tool = swh_indexer_config["tools"]
+
+ dir_id = DIRECTORY2.id
+ dir_metadata = DirectoryIntrinsicMetadataRow(
+ id=dir_id,
+ tool=tool,
+ metadata=YARN_PARSER_METADATA,
+ mappings=["npm"],
+ )
+ origin_metadata = OriginIntrinsicMetadataRow(
+ id=origin,
+ tool=tool,
+ from_directory=dir_id,
+ metadata=YARN_PARSER_METADATA,
+ mappings=["npm"],
+ )
+
+ dir_results = list(idx_storage.directory_intrinsic_metadata_get([dir_id]))
+ for dir_result in dir_results:
+ assert dir_result.tool
+ del dir_result.tool["id"]
+ assert dir_results == [dir_metadata]
+
+ orig_results = list(idx_storage.origin_intrinsic_metadata_get([origin]))
+ for orig_result in orig_results:
+ assert orig_result.tool
+ del orig_result.tool["id"]
+ assert orig_results == [origin_metadata]
+
+
+def test_origin_metadata_indexer_revision(
swh_indexer_config,
idx_storage: IndexerStorageInterface,
storage: StorageInterface,
diff --git a/swh/indexer/tests/utils.py b/swh/indexer/tests/utils.py
--- a/swh/indexer/tests/utils.py
+++ b/swh/indexer/tests/utils.py
@@ -19,10 +19,12 @@
Content,
Directory,
DirectoryEntry,
+ ObjectType,
Origin,
OriginVisit,
OriginVisitStatus,
Person,
+ Release,
Revision,
RevisionType,
Snapshot,
@@ -46,10 +48,15 @@
"type": "deposit",
"origin": "https://forge.softwareheritage.org/source/jesuisgpl/",
},
- {"type": "pypi", "origin": "https://pypi.org/project/limnoria/"},
+ {
+ "type": "pypi",
+ "origin": "https://old-pypi.example.org/project/limnoria/",
+ }, # with rev head
+ {"type": "pypi", "origin": "https://pypi.org/project/limnoria/"}, # with rel head
{"type": "svn", "origin": "http://0-512-md.googlecode.com/svn/"},
{"type": "git", "origin": "https://github.com/librariesio/yarn-parser"},
{"type": "git", "origin": "https://github.com/librariesio/yarn-parser.git"},
+ {"type": "git", "origin": "https://npm.example.org/yarn-parser"},
]
ORIGINS = [Origin(url=visit["origin"]) for visit in ORIGIN_VISITS]
@@ -120,7 +127,26 @@
REVISIONS = [REVISION]
+RELEASE = Release(
+ name=b"v0.0.0",
+ message=None,
+ author=Person(
+ name=b"Andrew Nesbitt",
+ fullname=b"Andrew Nesbitt <andrewnez@gmail.com>",
+ email=b"andrewnez@gmail.com",
+ ),
+ synthetic=False,
+ date=TimestampWithTimezone.from_datetime(
+ datetime.datetime(2017, 2, 20, 16, 14, 16, tzinfo=_utc_plus_2)
+ ),
+ target_type=ObjectType.DIRECTORY,
+ target=DIRECTORY2.id,
+)
+
+RELEASES = [RELEASE]
+
SNAPSHOTS = [
+ # https://github.com/SoftwareHeritage/swh-storage
Snapshot(
id=hash_to_bytes("a50fde72265343b7d28cecf6db20d98a81d21965"),
branches={
@@ -141,6 +167,7 @@
),
},
),
+ # rsync://ftp.gnu.org/gnu/3dldf
Snapshot(
id=hash_to_bytes("2c67f69a416bca4e1f3fcd848c588fab88ad0642"),
branches={
@@ -166,6 +193,7 @@
),
},
),
+ # https://forge.softwareheritage.org/source/jesuisgpl/",
Snapshot(
id=hash_to_bytes("68c0d26104d47e278dd6be07ed61fafb561d0d20"),
branches={
@@ -175,6 +203,7 @@
)
},
),
+ # https://old-pypi.example.org/project/limnoria/
Snapshot(
id=hash_to_bytes("f255245269e15fc99d284affd79f766668de0b67"),
branches={
@@ -191,6 +220,23 @@
),
},
),
+ # https://pypi.org/project/limnoria/
+ Snapshot(
+ branches={
+ b"HEAD": SnapshotBranch(
+ target=b"releases/2018.09.09", target_type=TargetType.ALIAS
+ ),
+ b"releases/2018.09.01": SnapshotBranch(
+ target=b"<\xee1(\xe8\x8d_\xc1\xc9\xa6rT\xf1\x1d\xbb\xdfF\xfdw\xcf",
+ target_type=TargetType.RELEASE,
+ ),
+ b"releases/2018.09.09": SnapshotBranch(
+ target=b"\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8kA\x10\x9d\xc5\xfa2\xf8t", # noqa
+ target_type=TargetType.RELEASE,
+ ),
+ },
+ ),
+ # http://0-512-md.googlecode.com/svn/
Snapshot(
id=hash_to_bytes("a1a28c0ab387a8f9e0618cb705eab81fc448f473"),
branches={
@@ -200,6 +246,7 @@
)
},
),
+ # https://github.com/librariesio/yarn-parser
Snapshot(
id=hash_to_bytes("bb4fd3a836930ce629d912864319637040ff3040"),
branches={
@@ -209,6 +256,7 @@
)
},
),
+ # https://github.com/librariesio/yarn-parser.git
Snapshot(
id=hash_to_bytes("bb4fd3a836930ce629d912864319637040ff3040"),
branches={
@@ -218,8 +266,19 @@
)
},
),
+ # https://npm.example.org/yarn-parser
+ Snapshot(
+ branches={
+ b"HEAD": SnapshotBranch(
+ target=RELEASE.id,
+ target_type=TargetType.RELEASE,
+ )
+ },
+ ),
]
+assert len(SNAPSHOTS) == len(ORIGIN_VISITS)
+
SHA1_TO_LICENSES = {
"01c9379dfc33803963d07c1ccc748d3fe4c96bb5": ["GPL"],
@@ -562,6 +621,7 @@
storage.origin_add(ORIGINS)
storage.directory_add([DIRECTORY, DIRECTORY2])
storage.revision_add(REVISIONS)
+ storage.release_add(RELEASES)
storage.snapshot_add(SNAPSHOTS)
for visit, snapshot in zip(ORIGIN_VISITS, SNAPSHOTS):
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 5 2024, 8:32 AM (11 w, 18 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216465
Attached To
D7941: Add support for indexing from head releases
Event Timeline
Log In to Comment