Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F8394127
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
33 KB
Subscribers
None
View Options
diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py
index 02b95d5..f00f963 100644
--- a/swh/loader/package/npm/loader.py
+++ b/swh/loader/package/npm/loader.py
@@ -1,323 +1,344 @@
# Copyright (C) 2019-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import json
import logging
import os
from codecs import BOM_UTF8
from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple, Union
import attr
import chardet
from urllib.parse import quote
from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
Person,
RevisionType,
Revision,
TimestampWithTimezone,
Sha1Git,
)
-from swh.loader.package.loader import BasePackageInfo, PackageLoader
+from swh.loader.package.loader import (
+ BasePackageInfo,
+ PackageLoader,
+ RawExtrinsicMetadataCore,
+)
from swh.loader.package.utils import api_info, release_name
logger = logging.getLogger(__name__)
EMPTY_PERSON = Person(fullname=b"", name=None, email=None)
@attr.s
class NpmPackageInfo(BasePackageInfo):
raw_info = attr.ib(type=Dict[str, Any])
date = attr.ib(type=Optional[str])
shasum = attr.ib(type=str)
"""sha1 checksum"""
version = attr.ib(type=str)
@classmethod
def from_metadata(
cls, project_metadata: Dict[str, Any], version: str
) -> "NpmPackageInfo":
package_metadata = project_metadata["versions"][version]
url = package_metadata["dist"]["tarball"]
# No date available in intrinsic metadata: retrieve it from the API
# metadata, using the version number that the API claims this package
# has.
extrinsic_version = package_metadata["version"]
if "time" in project_metadata:
date = project_metadata["time"][extrinsic_version]
elif "mtime" in package_metadata:
date = package_metadata["mtime"]
else:
date = None
return cls(
url=url,
filename=os.path.basename(url),
date=date,
shasum=package_metadata["dist"]["shasum"],
version=extrinsic_version,
raw_info=package_metadata, # FIXME: loses some of the project metadata
)
class NpmLoader(PackageLoader[NpmPackageInfo]):
"""Load npm origin's artifact releases into swh archive.
"""
visit_type = "npm"
def __init__(self, url: str):
"""Constructor
Args
str: origin url (e.g. https://www.npmjs.com/package/<package-name>)
"""
super().__init__(url=url)
package_name = url.split("https://www.npmjs.com/package/")[1]
safe_name = quote(package_name, safe="")
self.provider_url = f"https://replicate.npmjs.com/{safe_name}/"
self._info: Dict[str, Any] = {}
self._versions = None
@property
def info(self) -> Dict[str, Any]:
"""Return the project metadata information (fetched from npm registry)
"""
if not self._info:
- self._info = json.loads(api_info(self.provider_url))
+ self._raw_info = api_info(self.provider_url)
+ self._info = json.loads(self._raw_info)
return self._info
def get_versions(self) -> Sequence[str]:
return sorted(list(self.info["versions"].keys()))
def get_default_version(self) -> str:
return self.info["dist-tags"].get("latest", "")
+ def get_metadata_authority(self):
+ return MetadataAuthority(
+ type=MetadataAuthorityType.FORGE, url="https://npmjs.com/", metadata={},
+ )
+
+ def get_extrinsic_snapshot_metadata(self):
+ return [
+ RawExtrinsicMetadataCore(
+ format="replicate-npm-package-json",
+ metadata=self._raw_info,
+ discovery_date=None,
+ ),
+ ]
+
def get_package_info(self, version: str) -> Iterator[Tuple[str, NpmPackageInfo]]:
p_info = NpmPackageInfo.from_metadata(
project_metadata=self.info, version=version
)
yield release_name(version), p_info
def resolve_revision_from(
self, known_artifacts: Dict, p_info: NpmPackageInfo
) -> Optional[bytes]:
return artifact_to_revision_id(known_artifacts, p_info)
def build_revision(
self, p_info: NpmPackageInfo, uncompressed_path: str, directory: Sha1Git
) -> Optional[Revision]:
i_metadata = extract_intrinsic_metadata(uncompressed_path)
if not i_metadata:
return None
author = extract_npm_package_author(i_metadata)
message = i_metadata["version"].encode("ascii")
if p_info.date is None:
url = p_info.url
artifact_name = os.path.basename(url)
raise ValueError(
"Origin %s: Cannot determine upload time for artifact %s."
% (p_info.url, artifact_name)
)
date = TimestampWithTimezone.from_iso8601(p_info.date)
# FIXME: this is to remain bug-compatible with earlier versions:
date = attr.evolve(date, timestamp=attr.evolve(date.timestamp, microseconds=0))
r = Revision(
type=RevisionType.TAR,
message=message,
author=author,
date=date,
committer=author,
committer_date=date,
parents=(),
directory=directory,
synthetic=True,
metadata={
"intrinsic": {"tool": "package.json", "raw": i_metadata,},
"extrinsic": {
"provider": self.provider_url,
"when": self.visit_date.isoformat(),
"raw": p_info.raw_info,
},
},
)
return r
def artifact_to_revision_id(
known_artifacts: Dict, p_info: NpmPackageInfo
) -> Optional[bytes]:
"""Given metadata artifact, solves the associated revision id.
The following code allows to deal with 2 metadata formats:
- old format sample::
{
'package_source': {
'sha1': '05181c12cd8c22035dd31155656826b85745da37',
}
}
- new format sample::
{
'original_artifact': [{
'checksums': {
'sha256': '6975816f2c5ad4046acc676ba112f2fff945b01522d63948531f11f11e0892ec', # noqa
...
},
}],
...
}
"""
shasum = p_info.shasum
for rev_id, known_artifact in known_artifacts.items():
known_original_artifact = known_artifact.get("original_artifact")
if not known_original_artifact:
# previous loader-npm version kept original artifact elsewhere
known_original_artifact = known_artifact.get("package_source")
if not known_original_artifact:
continue
original_hash = known_original_artifact["sha1"]
else:
assert isinstance(known_original_artifact, list)
original_hash = known_original_artifact[0]["checksums"]["sha1"]
if shasum == original_hash:
return rev_id
return None
def _author_str(author_data: Union[Dict, List, str]) -> str:
"""Parse author from package.json author fields
"""
if isinstance(author_data, dict):
author_str = ""
name = author_data.get("name")
if name is not None:
if isinstance(name, str):
author_str += name
elif isinstance(name, list):
author_str += _author_str(name[0]) if len(name) > 0 else ""
email = author_data.get("email")
if email is not None:
author_str += f" <{email}>"
result = author_str
elif isinstance(author_data, list):
result = _author_str(author_data[0]) if len(author_data) > 0 else ""
else:
result = author_data
return result
def extract_npm_package_author(package_json: Dict[str, Any]) -> Person:
"""
Extract package author from a ``package.json`` file content and
return it in swh format.
Args:
package_json: Dict holding the content of parsed
``package.json`` file
Returns:
Person
"""
for author_key in ("author", "authors"):
if author_key in package_json:
author_data = package_json[author_key]
if author_data is None:
return EMPTY_PERSON
author_str = _author_str(author_data)
return Person.from_fullname(author_str.encode())
return EMPTY_PERSON
def _lstrip_bom(s, bom=BOM_UTF8):
if s.startswith(bom):
return s[len(bom) :]
else:
return s
def load_json(json_bytes):
"""
Try to load JSON from bytes and return a dictionary.
First try to decode from utf-8. If the decoding failed,
try to detect the encoding and decode again with replace
error handling.
If JSON is malformed, an empty dictionary will be returned.
Args:
json_bytes (bytes): binary content of a JSON file
Returns:
dict: JSON data loaded in a dictionary
"""
json_data = {}
try:
json_str = _lstrip_bom(json_bytes).decode("utf-8")
except UnicodeDecodeError:
encoding = chardet.detect(json_bytes)["encoding"]
if encoding:
json_str = json_bytes.decode(encoding, "replace")
try:
json_data = json.loads(json_str)
except json.decoder.JSONDecodeError:
pass
return json_data
def extract_intrinsic_metadata(dir_path: str) -> Dict:
"""Given an uncompressed path holding the pkginfo file, returns a
pkginfo parsed structure as a dict.
The release artifact contains at their root one folder. For example:
$ tar tvf zprint-0.0.6.tar.gz
drwxr-xr-x root/root 0 2018-08-22 11:01 zprint-0.0.6/
...
Args:
dir_path (str): Path to the uncompressed directory
representing a release artifact from npm.
Returns:
the pkginfo parsed structure as a dict if any or None if
none was present.
"""
# Retrieve the root folder of the archive
if not os.path.exists(dir_path):
return {}
lst = os.listdir(dir_path)
if len(lst) == 0:
return {}
project_dirname = lst[0]
package_json_path = os.path.join(dir_path, project_dirname, "package.json")
if not os.path.exists(package_json_path):
return {}
with open(package_json_path, "rb") as package_json_file:
package_json_bytes = package_json_file.read()
return load_json(package_json_bytes)
diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py
index 1607bee..8d8a02a 100644
--- a/swh/loader/package/npm/tests/test_npm.py
+++ b/swh/loader/package/npm/tests/test_npm.py
@@ -1,659 +1,705 @@
# Copyright (C) 2019-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import json
import os
import pytest
-from swh.model.hashutil import hash_to_bytes
-from swh.model.model import Person, Snapshot, SnapshotBranch, TargetType
+from swh.model.hashutil import hash_to_bytes, hash_to_hex
+from swh.model.identifiers import SWHID
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ MetadataTargetType,
+ Person,
+ RawExtrinsicMetadata,
+ Snapshot,
+ SnapshotBranch,
+ TargetType,
+)
+
+from swh.storage.interface import PagedResult
+from swh.loader.package import __version__
from swh.loader.package.npm.loader import (
_author_str,
NpmLoader,
extract_npm_package_author,
artifact_to_revision_id,
)
from swh.loader.package.tests.common import check_metadata_paths
from swh.loader.tests import (
assert_last_visit_matches,
check_snapshot,
get_stats,
)
+@pytest.fixture
+def org_api_info(datadir) -> bytes:
+ with open(os.path.join(datadir, "https_replicate.npmjs.com", "org"), "rb",) as f:
+ return f.read()
+
+
def test_npm_author_str():
for author, expected_author in [
("author", "author"),
(
["Al from quantum leap", "hal from 2001 space odyssey"],
"Al from quantum leap",
),
([], ""),
({"name": "groot", "email": "groot@galaxy.org",}, "groot <groot@galaxy.org>"),
({"name": "somebody",}, "somebody"),
({"email": "no@one.org"}, " <no@one.org>"), # note first elt is an extra blank
({"name": "no one", "email": None,}, "no one"),
({"email": None,}, ""),
({"name": None}, ""),
({"name": None, "email": None,}, ""),
({}, ""),
(None, None),
({"name": []}, "",),
(
{"name": ["Susan McSween", "William H. Bonney", "Doc Scurlock",]},
"Susan McSween",
),
(None, None),
]:
assert _author_str(author) == expected_author
def test_extract_npm_package_author(datadir):
package_metadata_filepath = os.path.join(
datadir, "https_replicate.npmjs.com", "org_visit1"
)
with open(package_metadata_filepath) as json_file:
package_metadata = json.load(json_file)
extract_npm_package_author(package_metadata["versions"]["0.0.2"]) == Person(
fullname=b"mooz <stillpedant@gmail.com>",
name=b"mooz",
email=b"stillpedant@gmail.com",
)
assert extract_npm_package_author(package_metadata["versions"]["0.0.3"]) == Person(
fullname=b"Masafumi Oyamada <stillpedant@gmail.com>",
name=b"Masafumi Oyamada",
email=b"stillpedant@gmail.com",
)
package_json = json.loads(
"""
{
"name": "highlightjs-line-numbers.js",
"version": "2.7.0",
"description": "Highlight.js line numbers plugin.",
"main": "src/highlightjs-line-numbers.js",
"dependencies": {},
"devDependencies": {
"gulp": "^4.0.0",
"gulp-rename": "^1.4.0",
"gulp-replace": "^0.6.1",
"gulp-uglify": "^1.2.0"
},
"repository": {
"type": "git",
"url": "https://github.com/wcoder/highlightjs-line-numbers.js.git"
},
"author": "Yauheni Pakala <evgeniy.pakalo@gmail.com>",
"license": "MIT",
"bugs": {
"url": "https://github.com/wcoder/highlightjs-line-numbers.js/issues"
},
"homepage": "http://wcoder.github.io/highlightjs-line-numbers.js/"
}"""
)
assert extract_npm_package_author(package_json) == Person(
fullname=b"Yauheni Pakala <evgeniy.pakalo@gmail.com>",
name=b"Yauheni Pakala",
email=b"evgeniy.pakalo@gmail.com",
)
package_json = json.loads(
"""
{
"name": "3-way-diff",
"version": "0.0.1",
"description": "3-way diffing of JavaScript objects",
"main": "index.js",
"authors": [
{
"name": "Shawn Walsh",
"url": "https://github.com/shawnpwalsh"
},
{
"name": "Markham F Rollins IV",
"url": "https://github.com/mrollinsiv"
}
],
"keywords": [
"3-way diff",
"3 way diff",
"three-way diff",
"three way diff"
],
"devDependencies": {
"babel-core": "^6.20.0",
"babel-preset-es2015": "^6.18.0",
"mocha": "^3.0.2"
},
"dependencies": {
"lodash": "^4.15.0"
}
}"""
)
assert extract_npm_package_author(package_json) == Person(
fullname=b"Shawn Walsh", name=b"Shawn Walsh", email=None
)
package_json = json.loads(
"""
{
"name": "yfe-ynpm",
"version": "1.0.0",
"homepage": "http://gitlab.ywwl.com/yfe/yfe-ynpm",
"repository": {
"type": "git",
"url": "git@gitlab.ywwl.com:yfe/yfe-ynpm.git"
},
"author": [
"fengmk2 <fengmk2@gmail.com> (https://fengmk2.com)",
"xufuzi <xufuzi@ywwl.com> (https://7993.org)"
],
"license": "MIT"
}"""
)
assert extract_npm_package_author(package_json) == Person(
fullname=b"fengmk2 <fengmk2@gmail.com> (https://fengmk2.com)",
name=b"fengmk2",
email=b"fengmk2@gmail.com",
)
package_json = json.loads(
"""
{
"name": "umi-plugin-whale",
"version": "0.0.8",
"description": "Internal contract component",
"authors": {
"name": "xiaohuoni",
"email": "448627663@qq.com"
},
"repository": "alitajs/whale",
"devDependencies": {
"np": "^3.0.4",
"umi-tools": "*"
},
"license": "MIT"
}"""
)
assert extract_npm_package_author(package_json) == Person(
fullname=b"xiaohuoni <448627663@qq.com>",
name=b"xiaohuoni",
email=b"448627663@qq.com",
)
package_json_no_authors = json.loads(
"""{
"authors": null,
"license": "MIT"
}"""
)
assert extract_npm_package_author(package_json_no_authors) == Person(
fullname=b"", name=None, email=None
)
def normalize_hashes(hashes):
if isinstance(hashes, str):
return hash_to_bytes(hashes)
if isinstance(hashes, list):
return [hash_to_bytes(x) for x in hashes]
return {hash_to_bytes(k): hash_to_bytes(v) for k, v in hashes.items()}
_expected_new_contents_first_visit = normalize_hashes(
[
"4ce3058e16ab3d7e077f65aabf855c34895bf17c",
"858c3ceee84c8311adc808f8cdb30d233ddc9d18",
"0fa33b4f5a4e0496da6843a38ff1af8b61541996",
"85a410f8ef8eb8920f2c384a9555566ad4a2e21b",
"9163ac8025923d5a45aaac482262893955c9b37b",
"692cf623b8dd2c5df2c2998fd95ae4ec99882fb4",
"18c03aac6d3e910efb20039c15d70ab5e0297101",
"41265c42446aac17ca769e67d1704f99e5a1394d",
"783ff33f5882813dca9239452c4a7cadd4dba778",
"b029cfb85107aee4590c2434a3329bfcf36f8fa1",
"112d1900b4c2e3e9351050d1b542c9744f9793f3",
"5439bbc4bd9a996f1a38244e6892b71850bc98fd",
"d83097a2f994b503185adf4e719d154123150159",
"d0939b4898e83090ee55fd9d8a60e312cfadfbaf",
"b3523a26f7147e4af40d9d462adaae6d49eda13e",
"cd065fb435d6fb204a8871bcd623d0d0e673088c",
"2854a40855ad839a54f4b08f5cff0cf52fca4399",
"b8a53bbaac34ebb8c6169d11a4b9f13b05c583fe",
"0f73d56e1cf480bded8a1ecf20ec6fc53c574713",
"0d9882b2dfafdce31f4e77fe307d41a44a74cefe",
"585fc5caab9ead178a327d3660d35851db713df1",
"e8cd41a48d79101977e3036a87aeb1aac730686f",
"5414efaef33cceb9f3c9eb5c4cc1682cd62d14f7",
"9c3cc2763bf9e9e37067d3607302c4776502df98",
"3649a68410e354c83cd4a38b66bd314de4c8f5c9",
"e96ed0c091de1ebdf587104eaf63400d1974a1fe",
"078ca03d2f99e4e6eab16f7b75fbb7afb699c86c",
"38de737da99514de6559ff163c988198bc91367a",
]
)
_expected_new_directories_first_visit = normalize_hashes(
[
"3370d20d6f96dc1c9e50f083e2134881db110f4f",
"42753c0c2ab00c4501b552ac4671c68f3cf5aece",
"d7895533ef5edbcffdea3f057d9fef3a1ef845ce",
"80579be563e2ef3e385226fe7a3f079b377f142c",
"3b0ddc6a9e58b4b53c222da4e27b280b6cda591c",
"bcad03ce58ac136f26f000990fc9064e559fe1c0",
"5fc7e82a1bc72e074665c6078c6d3fad2f13d7ca",
"e3cd26beba9b1e02f6762ef54bd9ac80cc5f25fd",
"584b5b4b6cf7f038095e820b99386a9c232de931",
"184c8d6d0d242f2b1792ef9d3bf396a5434b7f7a",
"bb5f4ee143c970367eb409f2e4c1104898048b9d",
"1b95491047add1103db0dfdfa84a9735dcb11e88",
"a00c6de13471a2d66e64aca140ddb21ef5521e62",
"5ce6c1cd5cda2d546db513aaad8c72a44c7771e2",
"c337091e349b6ac10d38a49cdf8c2401ef9bb0f2",
"202fafcd7c0f8230e89d5496ad7f44ab12b807bf",
"775cc516543be86c15c1dc172f49c0d4e6e78235",
"ff3d1ead85a14f891e8b3fa3a89de39db1b8de2e",
]
)
_expected_new_revisions_first_visit = normalize_hashes(
{
"d8a1c7474d2956ac598a19f0f27d52f7015f117e": (
"42753c0c2ab00c4501b552ac4671c68f3cf5aece"
),
"5f9eb78af37ffd12949f235e86fac04898f9f72a": (
"3370d20d6f96dc1c9e50f083e2134881db110f4f"
),
"ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a": (
"d7895533ef5edbcffdea3f057d9fef3a1ef845ce"
),
}
)
def package_url(package):
return "https://www.npmjs.com/package/%s" % package
def package_metadata_url(package):
return "https://replicate.npmjs.com/%s/" % package
def test_revision_metadata_structure(swh_config, requests_mock_datadir):
package = "org"
loader = NpmLoader(package_url(package))
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
assert actual_load_status["snapshot_id"] is not None
expected_revision_id = hash_to_bytes("d8a1c7474d2956ac598a19f0f27d52f7015f117e")
revision = list(loader.storage.revision_get([expected_revision_id]))[0]
assert revision is not None
check_metadata_paths(
revision["metadata"],
paths=[
("intrinsic.tool", str),
("intrinsic.raw", dict),
("extrinsic.provider", str),
("extrinsic.when", str),
("extrinsic.raw", dict),
("original_artifact", list),
],
)
for original_artifact in revision["metadata"]["original_artifact"]:
check_metadata_paths(
original_artifact,
paths=[("filename", str), ("length", int), ("checksums", dict),],
)
-def test_npm_loader_first_visit(swh_config, requests_mock_datadir):
+def test_npm_loader_first_visit(swh_config, requests_mock_datadir, org_api_info):
package = "org"
url = package_url(package)
loader = NpmLoader(url)
actual_load_status = loader.load()
expected_snapshot_id = hash_to_bytes("d0587e1195aed5a8800411a008f2f2d627f18e2d")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
}
assert_last_visit_matches(
loader.storage, url, status="full", type="npm", snapshot=expected_snapshot_id
)
stats = get_stats(loader.storage)
assert {
"content": len(_expected_new_contents_first_visit),
"directory": len(_expected_new_directories_first_visit),
"origin": 1,
"origin_visit": 1,
"person": 2,
"release": 0,
"revision": len(_expected_new_revisions_first_visit),
"skipped_content": 0,
"snapshot": 1,
} == stats
assert len(
list(loader.storage.content_get(_expected_new_contents_first_visit))
) == len(_expected_new_contents_first_visit)
assert (
list(loader.storage.directory_missing(_expected_new_directories_first_visit))
== []
)
assert (
list(loader.storage.revision_missing(_expected_new_revisions_first_visit)) == []
)
expected_snapshot = Snapshot(
id=expected_snapshot_id,
branches={
b"HEAD": SnapshotBranch(
target=b"releases/0.0.4", target_type=TargetType.ALIAS
),
b"releases/0.0.2": SnapshotBranch(
target=hash_to_bytes("d8a1c7474d2956ac598a19f0f27d52f7015f117e"),
target_type=TargetType.REVISION,
),
b"releases/0.0.3": SnapshotBranch(
target=hash_to_bytes("5f9eb78af37ffd12949f235e86fac04898f9f72a"),
target_type=TargetType.REVISION,
),
b"releases/0.0.4": SnapshotBranch(
target=hash_to_bytes("ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a"),
target_type=TargetType.REVISION,
),
},
)
check_snapshot(expected_snapshot, loader.storage)
+ snapshot_swhid = SWHID(
+ object_type="snapshot", object_id=hash_to_hex(expected_snapshot_id)
+ )
+ metadata_authority = MetadataAuthority(
+ type=MetadataAuthorityType.FORGE, url="https://npmjs.com/",
+ )
+ expected_metadata = [
+ RawExtrinsicMetadata(
+ type=MetadataTargetType.SNAPSHOT,
+ id=snapshot_swhid,
+ authority=metadata_authority,
+ fetcher=MetadataFetcher(
+ name="swh.loader.package.npm.loader.NpmLoader", version=__version__,
+ ),
+ discovery_date=loader.visit_date,
+ format="replicate-npm-package-json",
+ metadata=org_api_info,
+ origin="https://www.npmjs.com/package/org",
+ )
+ ]
+ assert loader.storage.raw_extrinsic_metadata_get(
+ type=MetadataTargetType.SNAPSHOT,
+ id=snapshot_swhid,
+ authority=metadata_authority,
+ ) == PagedResult(next_page_token=None, results=expected_metadata,)
+
def test_npm_loader_incremental_visit(swh_config, requests_mock_datadir_visits):
package = "org"
url = package_url(package)
loader = NpmLoader(url)
expected_snapshot_id = hash_to_bytes("d0587e1195aed5a8800411a008f2f2d627f18e2d")
actual_load_status = loader.load()
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
}
assert_last_visit_matches(
loader.storage, url, status="full", type="npm", snapshot=expected_snapshot_id
)
stats = get_stats(loader.storage)
assert {
"content": len(_expected_new_contents_first_visit),
"directory": len(_expected_new_directories_first_visit),
"origin": 1,
"origin_visit": 1,
"person": 2,
"release": 0,
"revision": len(_expected_new_revisions_first_visit),
"skipped_content": 0,
"snapshot": 1,
} == stats
loader._info = None # reset loader internal state
actual_load_status2 = loader.load()
assert actual_load_status2["status"] == "eventful"
snap_id2 = actual_load_status2["snapshot_id"]
assert snap_id2 is not None
assert snap_id2 != actual_load_status["snapshot_id"]
assert_last_visit_matches(loader.storage, url, status="full", type="npm")
stats = get_stats(loader.storage)
assert { # 3 new releases artifacts
"content": len(_expected_new_contents_first_visit) + 14,
"directory": len(_expected_new_directories_first_visit) + 15,
"origin": 1,
"origin_visit": 2,
"person": 2,
"release": 0,
"revision": len(_expected_new_revisions_first_visit) + 3,
"skipped_content": 0,
"snapshot": 2,
} == stats
urls = [
m.url
for m in requests_mock_datadir_visits.request_history
if m.url.startswith("https://registry.npmjs.org")
]
assert len(urls) == len(set(urls)) # we visited each artifact once across
@pytest.mark.usefixtures("requests_mock_datadir")
def test_npm_loader_version_divergence(swh_config):
package = "@aller_shared"
url = package_url(package)
loader = NpmLoader(url)
actual_load_status = loader.load()
expected_snapshot_id = hash_to_bytes("b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
}
assert_last_visit_matches(
loader.storage, url, status="full", type="npm", snapshot=expected_snapshot_id
)
stats = get_stats(loader.storage)
assert { # 1 new releases artifacts
"content": 534,
"directory": 153,
"origin": 1,
"origin_visit": 1,
"person": 1,
"release": 0,
"revision": 2,
"skipped_content": 0,
"snapshot": 1,
} == stats
expected_snapshot = Snapshot(
id=expected_snapshot_id,
branches={
b"HEAD": SnapshotBranch(
target_type=TargetType.ALIAS, target=b"releases/0.1.0"
),
b"releases/0.1.0": SnapshotBranch(
target_type=TargetType.REVISION,
target=hash_to_bytes("845673bfe8cbd31b1eaf757745a964137e6f9116"),
),
b"releases/0.1.1-alpha.14": SnapshotBranch(
target_type=TargetType.REVISION,
target=hash_to_bytes("05181c12cd8c22035dd31155656826b85745da37"),
),
},
)
check_snapshot(expected_snapshot, loader.storage)
def test_npm_artifact_to_revision_id_none():
"""Current loader version should stop soon if nothing can be found
"""
class artifact_metadata:
shasum = "05181c12cd8c22035dd31155656826b85745da37"
known_artifacts = {
"b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92": {},
}
assert artifact_to_revision_id(known_artifacts, artifact_metadata) is None
def test_npm_artifact_to_revision_id_old_loader_version():
"""Current loader version should solve old metadata scheme
"""
class artifact_metadata:
shasum = "05181c12cd8c22035dd31155656826b85745da37"
known_artifacts = {
hash_to_bytes("b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92"): {
"package_source": {"sha1": "something-wrong"}
},
hash_to_bytes("845673bfe8cbd31b1eaf757745a964137e6f9116"): {
"package_source": {"sha1": "05181c12cd8c22035dd31155656826b85745da37",}
},
}
assert artifact_to_revision_id(known_artifacts, artifact_metadata) == hash_to_bytes(
"845673bfe8cbd31b1eaf757745a964137e6f9116"
)
def test_npm_artifact_to_revision_id_current_loader_version():
"""Current loader version should be able to solve current metadata scheme
"""
class artifact_metadata:
shasum = "05181c12cd8c22035dd31155656826b85745da37"
known_artifacts = {
hash_to_bytes("b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92"): {
"original_artifact": [
{"checksums": {"sha1": "05181c12cd8c22035dd31155656826b85745da37"},}
],
},
hash_to_bytes("845673bfe8cbd31b1eaf757745a964137e6f9116"): {
"original_artifact": [{"checksums": {"sha1": "something-wrong"},}],
},
}
assert artifact_to_revision_id(known_artifacts, artifact_metadata) == hash_to_bytes(
"b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92"
)
def test_npm_artifact_with_no_intrinsic_metadata(swh_config, requests_mock_datadir):
"""Skip artifact with no intrinsic metadata during ingestion
"""
package = "nativescript-telerik-analytics"
url = package_url(package)
loader = NpmLoader(url)
actual_load_status = loader.load()
# no branch as one artifact without any intrinsic metadata
expected_snapshot = Snapshot(
id=hash_to_bytes("1a8893e6a86f444e8be8e7bda6cb34fb1735a00e"), branches={},
)
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot.id.hex(),
}
check_snapshot(expected_snapshot, loader.storage)
assert_last_visit_matches(
loader.storage, url, status="full", type="npm", snapshot=expected_snapshot.id
)
def test_npm_artifact_with_no_upload_time(swh_config, requests_mock_datadir):
"""With no time upload, artifact is skipped
"""
package = "jammit-no-time"
url = package_url(package)
loader = NpmLoader(url)
actual_load_status = loader.load()
# no branch as one artifact without any intrinsic metadata
expected_snapshot = Snapshot(
id=hash_to_bytes("1a8893e6a86f444e8be8e7bda6cb34fb1735a00e"), branches={},
)
assert actual_load_status == {
"status": "uneventful",
"snapshot_id": expected_snapshot.id.hex(),
}
check_snapshot(expected_snapshot, loader.storage)
assert_last_visit_matches(
loader.storage, url, status="partial", type="npm", snapshot=expected_snapshot.id
)
def test_npm_artifact_use_mtime_if_no_time(swh_config, requests_mock_datadir):
"""With no time upload, artifact is skipped
"""
package = "jammit-express"
url = package_url(package)
loader = NpmLoader(url)
actual_load_status = loader.load()
expected_snapshot_id = hash_to_bytes("d6e08e19159f77983242877c373c75222d5ae9dd")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
}
# artifact is used
expected_snapshot = Snapshot(
id=expected_snapshot_id,
branches={
b"HEAD": SnapshotBranch(
target_type=TargetType.ALIAS, target=b"releases/0.0.1"
),
b"releases/0.0.1": SnapshotBranch(
target_type=TargetType.REVISION,
target=hash_to_bytes("9e4dd2b40d1b46b70917c0949aa2195c823a648e"),
),
},
)
check_snapshot(expected_snapshot, loader.storage)
assert_last_visit_matches(
loader.storage, url, status="full", type="npm", snapshot=expected_snapshot.id
)
def test_npm_no_artifact(swh_config, requests_mock_datadir):
"""If no artifacts at all is found for origin, the visit fails completely
"""
package = "catify"
url = package_url(package)
loader = NpmLoader(url)
actual_load_status = loader.load()
assert actual_load_status == {
"status": "failed",
}
assert_last_visit_matches(loader.storage, url, status="partial", type="npm")
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Wed, Jun 4, 7:21 PM (6 d, 3 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3261274
Attached To
rDLDBASE Generic VCS/Package Loader
Event Timeline
Log In to Comment