Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066191
D7469.id27275.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
10 KB
Subscribers
None
D7469.id27275.diff
View Options
diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py
--- a/swh/loader/package/npm/loader.py
+++ b/swh/loader/package/npm/loader.py
@@ -7,6 +7,7 @@
import json
import logging
import os
+import string
from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple, Union
from urllib.parse import quote
@@ -16,11 +17,9 @@
from swh.loader.package.loader import (
BasePackageInfo,
PackageLoader,
- PartialExtID,
RawExtrinsicMetadataCore,
)
from swh.loader.package.utils import api_info, cached_method, release_name
-from swh.model.hashutil import hash_to_bytes
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
@@ -38,18 +37,25 @@
EMPTY_PERSON = Person.from_fullname(b"")
-EXTID_TYPE = "npm-archive-sha1"
-EXTID_VERSION = 0
-
-
@attr.s
class NpmPackageInfo(BasePackageInfo):
raw_info = attr.ib(type=Dict[str, Any])
+ id_ = attr.ib(type=str)
+ """Unique id assigned by the registry for this version."""
+
date = attr.ib(type=Optional[str])
shasum = attr.ib(type=str)
"""sha1 checksum"""
+ # we cannot rely only on $shasum, as it is technically possible for two versions
+ # of the same package to have the exact same tarball.
+ # But the release data (message and date) are extrinsic to the content of the
+ # package, so they differ between versions.
+ MANIFEST_FORMAT = string.Template("$id_ $shasum")
+ EXTID_TYPE = "npm-archive-url-and-sha1"
+ EXTID_VERSION = 0
+
@classmethod
def from_metadata(
cls, project_metadata: Dict[str, Any], version: str
@@ -70,6 +76,7 @@
date = None
return cls(
+ id_=package_metadata["_id"],
url=url,
filename=os.path.basename(url),
date=date,
@@ -84,9 +91,6 @@
],
)
- def extid(self) -> PartialExtID:
- return (EXTID_TYPE, EXTID_VERSION, hash_to_bytes(self.shasum))
-
class NpmLoader(PackageLoader[NpmPackageInfo]):
"""Load npm origin's artifact releases into swh archive.
diff --git a/swh/loader/package/npm/tests/data/https_registry.npmjs.org/org_-_org-0.0.3-beta.tgz b/swh/loader/package/npm/tests/data/https_registry.npmjs.org/org_-_org-0.0.3-beta.tgz
new file mode 120000
--- /dev/null
+++ b/swh/loader/package/npm/tests/data/https_registry.npmjs.org/org_-_org-0.0.3-beta.tgz
@@ -0,0 +1 @@
+org_-_org-0.0.3.tgz
\ No newline at end of file
diff --git a/swh/loader/package/npm/tests/data/https_replicate.npmjs.com/org_version_mismatch b/swh/loader/package/npm/tests/data/https_replicate.npmjs.com/org_version_mismatch
new file mode 100644
--- /dev/null
+++ b/swh/loader/package/npm/tests/data/https_replicate.npmjs.com/org_version_mismatch
@@ -0,0 +1,141 @@
+{
+ "_id": "org",
+ "_rev": "4-22484cc537f12d3023241211ee34e39d",
+ "name": "org",
+ "description": "A parser and converter for org-mode notation",
+ "dist-tags": {
+ "latest": "0.0.3"
+ },
+ "versions": {
+ "0.0.3-beta": {
+ "name": "org",
+ "description": "A parser and converter for org-mode notation",
+ "homepage": "http://mooz.github.com/org-js",
+ "keywords": [
+ "org-mode",
+ "emacs",
+ "parser"
+ ],
+ "author": {
+ "name": "mooz",
+ "email": "stillpedant@gmail.com"
+ },
+ "main": "./lib/org.js",
+ "version": "0.0.3-beta",
+ "directories": {
+ "test": "./tests"
+ },
+ "repository": {
+ "type": "git",
+ "url": "git://github.com/mooz/org-js.git"
+ },
+ "bugs": {
+ "url": "https://github.com/mooz/org-js/issues"
+ },
+ "_id": "org@0.0.3-beta",
+ "dist": {
+ "shasum": "6a44220f88903a6dfc3b47d010238058f9faf3a0",
+ "tarball": "https://registry.npmjs.org/org/-/org-0.0.3-beta.tgz"
+ },
+ "_from": ".",
+ "_npmVersion": "1.2.25",
+ "_npmUser": {
+ "name": "mooz",
+ "email": "stillpedant@gmail.com"
+ },
+ "maintainers": [
+ {
+ "name": "mooz",
+ "email": "stillpedant@gmail.com"
+ }
+ ]
+ },
+ "0.0.3": {
+ "name": "org",
+ "description": "A parser and converter for org-mode notation",
+ "homepage": "http://mooz.github.com/org-js",
+ "bugs": {
+ "url": "http://github.com/mooz/org-s/issues"
+ },
+ "keywords": [
+ "org-mode",
+ "emacs",
+ "parser"
+ ],
+ "author": {
+ "name": "Masafumi Oyamada",
+ "email": "stillpedant@gmail.com",
+ "url": "http://mooz.github.io/"
+ },
+ "licenses": [
+ {
+ "type": "MIT"
+ }
+ ],
+ "main": "./lib/org.js",
+ "version": "0.0.3",
+ "directories": {
+ "test": "./tests"
+ },
+ "repository": {
+ "type": "git",
+ "url": "git://github.com/mooz/org-js.git"
+ },
+ "_id": "org@0.0.3",
+ "dist": {
+ "shasum": "6a44220f88903a6dfc3b47d010238058f9faf3a0",
+ "tarball": "https://registry.npmjs.org/org/-/org-0.0.3.tgz"
+ },
+ "_from": ".",
+ "_npmVersion": "1.2.25",
+ "_npmUser": {
+ "name": "mooz",
+ "email": "stillpedant@gmail.com"
+ },
+ "maintainers": [
+ {
+ "name": "mooz",
+ "email": "stillpedant@gmail.com"
+ }
+ ]
+ }
+ },
+ "readme": "org-js\n======\n\nParser and converter for org-mode (<http://orgmode.org/>) notation written in JavaScript.\n\nInteractive Editor\n------------------\n\nFor working example, see http://mooz.github.com/org-js/editor/.\n\nInstallation\n------------\n\n npm install org\n\nSimple example of org -> HTML conversion\n----------------------------------------\n\n```javascript\nvar org = require(\"org\");\n\nvar parser = new org.Parser();\nvar orgDocument = parser.parse(orgCode);\nvar orgHTMLDocument = orgDocument.convert(org.ConverterHTML, {\n headerOffset: 1,\n exportFromLineNumber: false,\n suppressSubScriptHandling: false,\n suppressAutoLink: false\n});\n\nconsole.dir(orgHTMLDocument); // => { title, contentHTML, tocHTML, toc }\nconsole.log(orgHTMLDocument.toString()) // => Rendered HTML\n```\n\nWriting yet another converter\n-----------------------------\n\nSee `lib/org/converter/html.js`.\n",
+ "maintainers": [
+ {
+ "name": "mooz",
+ "email": "stillpedant@gmail.com"
+ }
+ ],
+ "time": {
+ "modified": "2019-01-05T01:37:44Z",
+ "created": "2014-01-01T15:40:31Z",
+ "0.0.3-beta": "2014-01-01T15:40:33Z",
+ "0.0.3": "2014-01-01T15:55:45Z"
+ },
+ "author": {
+ "name": "Masafumi Oyamada",
+ "email": "stillpedant@gmail.com",
+ "url": "http://mooz.github.io/"
+ },
+ "repository": {
+ "type": "git",
+ "url": "git://github.com/mooz/org-js.git"
+ },
+ "users": {
+ "nak2k": true,
+ "bgschaid": true,
+ "422665vijay": true,
+ "nontau": true
+ },
+ "homepage": "http://mooz.github.com/org-js",
+ "keywords": [
+ "org-mode",
+ "emacs",
+ "parser"
+ ],
+ "bugs": {
+ "url": "http://github.com/mooz/org-s/issues"
+ },
+ "readmeFilename": "README.md"
+}
diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py
--- a/swh/loader/package/npm/tests/test_npm.py
+++ b/swh/loader/package/npm/tests/test_npm.py
@@ -528,6 +528,94 @@
} == stats
+def test_npm_loader_duplicate_shasum(swh_storage, requests_mock_datadir):
+ """Test with two versions that have exactly the same tarball"""
+ package = "org_version_mismatch"
+ url = package_url(package)
+ loader = NpmLoader(swh_storage, url)
+
+ actual_load_status = loader.load()
+ expected_snapshot_id = hash_to_bytes("ac867a4c22ba4e22a022d319f309714477412a5a")
+ assert actual_load_status == {
+ "status": "eventful",
+ "snapshot_id": expected_snapshot_id.hex(),
+ }
+
+ assert_last_visit_matches(
+ swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id
+ )
+
+ beta_release_id = "e6d5490a02ac2a8dcd49702f9ccd5a64c90a46f1"
+ release_id = "f6985f437e28db6eb1b7533230e05ed99f2c91f0"
+ versions = [
+ ("0.0.3-beta", beta_release_id),
+ ("0.0.3", release_id),
+ ]
+
+ expected_snapshot = Snapshot(
+ id=expected_snapshot_id,
+ branches={
+ b"HEAD": SnapshotBranch(
+ target=b"releases/0.0.3", target_type=TargetType.ALIAS
+ ),
+ **{
+ b"releases/"
+ + version_name.encode(): SnapshotBranch(
+ target=hash_to_bytes(version_id), target_type=TargetType.RELEASE,
+ )
+ for (version_name, version_id) in versions
+ },
+ },
+ )
+ check_snapshot(expected_snapshot, swh_storage)
+
+ assert swh_storage.release_get([hash_to_bytes(beta_release_id)])[0] == Release(
+ name=b"0.0.3-beta",
+ message=(
+ b"Synthetic release for NPM source package org_version_mismatch "
+ b"version 0.0.3-beta\n"
+ ),
+ target=hash_to_bytes("3370d20d6f96dc1c9e50f083e2134881db110f4f"),
+ target_type=ModelObjectType.DIRECTORY,
+ synthetic=True,
+ author=Person.from_fullname(b"Masafumi Oyamada <stillpedant@gmail.com>"),
+ date=TimestampWithTimezone.from_datetime(
+ datetime.datetime(2014, 1, 1, 15, 40, 33, tzinfo=datetime.timezone.utc)
+ ),
+ id=hash_to_bytes(beta_release_id),
+ )
+
+ assert swh_storage.release_get([hash_to_bytes(release_id)])[0] == Release(
+ name=b"0.0.3",
+ message=(
+ b"Synthetic release for NPM source package org_version_mismatch "
+ b"version 0.0.3\n"
+ ),
+ target=hash_to_bytes("3370d20d6f96dc1c9e50f083e2134881db110f4f"),
+ target_type=ModelObjectType.DIRECTORY,
+ synthetic=True,
+ author=Person.from_fullname(b"Masafumi Oyamada <stillpedant@gmail.com>"),
+ date=TimestampWithTimezone.from_datetime(
+ datetime.datetime(2014, 1, 1, 15, 55, 45, tzinfo=datetime.timezone.utc)
+ ),
+ id=hash_to_bytes(release_id),
+ )
+
+ # Check incremental re-load keeps it unchanged
+
+ loader = NpmLoader(swh_storage, url)
+
+ actual_load_status = loader.load()
+ assert actual_load_status == {
+ "status": "uneventful",
+ "snapshot_id": expected_snapshot_id.hex(),
+ }
+
+ assert_last_visit_matches(
+ swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id
+ )
+
+
def test_npm_artifact_with_no_intrinsic_metadata(swh_storage, requests_mock_datadir):
"""Skip artifact with no intrinsic metadata during ingestion
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 4 2024, 11:46 PM (11 w, 18 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219513
Attached To
D7469: npm: Include package version id in ExtID manifest
Event Timeline
Log In to Comment