diff --git a/swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.0.5.tgz b/swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.0.5.tgz
new file mode 100644
index 0000000..8381a52
Binary files /dev/null and b/swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.0.5.tgz differ
diff --git a/swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.1.0.tgz b/swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.1.0.tgz
new file mode 100644
index 0000000..738c28a
Binary files /dev/null and b/swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.1.0.tgz differ
diff --git a/swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.2.0.tgz b/swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.2.0.tgz
new file mode 100644
index 0000000..544bf08
Binary files /dev/null and b/swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.2.0.tgz differ
diff --git a/swh/loader/package/tests/resources/replicate.npmjs.com/org_metadata_visit1.json b/swh/loader/package/tests/resources/replicate.npmjs.com/org_metadata_visit1.json
deleted file mode 100644
index 3aba6b1..0000000
--- a/swh/loader/package/tests/resources/replicate.npmjs.com/org_metadata_visit1.json
+++ /dev/null
@@ -1,191 +0,0 @@
-{
- "_id": "org",
- "_rev": "4-22484cc537f12d3023241211ee34e39d",
- "name": "org",
- "description": "A parser and converter for org-mode notation",
- "dist-tags": {
- "latest": "0.0.4"
- },
- "versions": {
- "0.0.2": {
- "name": "org",
- "description": "A parser and converter for org-mode notation",
- "homepage": "http://mooz.github.com/org-js",
- "keywords": [
- "org-mode",
- "emacs",
- "parser"
- ],
- "author": {
- "name": "mooz",
- "email": "stillpedant@gmail.com"
- },
- "main": "./lib/org.js",
- "version": "0.0.2",
- "directories": {
- "test": "./tests"
- },
- "repository": {
- "type": "git",
- "url": "git://github.com/mooz/org-js.git"
- },
- "bugs": {
- "url": "https://github.com/mooz/org-js/issues"
- },
- "_id": "org@0.0.2",
- "dist": {
- "shasum": "12c58092e7de94456a43ef7823eef45e4d1d12fe",
- "tarball": "https://registry.npmjs.org/org/-/org-0.0.2.tgz"
- },
- "_from": ".",
- "_npmVersion": "1.2.25",
- "_npmUser": {
- "name": "mooz",
- "email": "stillpedant@gmail.com"
- },
- "maintainers": [
- {
- "name": "mooz",
- "email": "stillpedant@gmail.com"
- }
- ]
- },
- "0.0.3": {
- "name": "org",
- "description": "A parser and converter for org-mode notation",
- "homepage": "http://mooz.github.com/org-js",
- "bugs": {
- "url": "http://github.com/mooz/org-s/issues"
- },
- "keywords": [
- "org-mode",
- "emacs",
- "parser"
- ],
- "author": {
- "name": "Masafumi Oyamada",
- "email": "stillpedant@gmail.com",
- "url": "http://mooz.github.io/"
- },
- "licenses": [
- {
- "type": "MIT"
- }
- ],
- "main": "./lib/org.js",
- "version": "0.0.3",
- "directories": {
- "test": "./tests"
- },
- "repository": {
- "type": "git",
- "url": "git://github.com/mooz/org-js.git"
- },
- "_id": "org@0.0.3",
- "dist": {
- "shasum": "6a44220f88903a6dfc3b47d010238058f9faf3a0",
- "tarball": "https://registry.npmjs.org/org/-/org-0.0.3.tgz"
- },
- "_from": ".",
- "_npmVersion": "1.2.25",
- "_npmUser": {
- "name": "mooz",
- "email": "stillpedant@gmail.com"
- },
- "maintainers": [
- {
- "name": "mooz",
- "email": "stillpedant@gmail.com"
- }
- ]
- },
- "0.0.4": {
- "name": "org",
- "description": "A parser and converter for org-mode notation",
- "homepage": "http://mooz.github.com/org-js",
- "bugs": {
- "url": "http://github.com/mooz/org-s/issues"
- },
- "keywords": [
- "org-mode",
- "emacs",
- "parser"
- ],
- "author": {
- "name": "Masafumi Oyamada",
- "email": "stillpedant@gmail.com",
- "url": "http://mooz.github.io/"
- },
- "licenses": [
- {
- "type": "MIT"
- }
- ],
- "main": "./lib/org.js",
- "version": "0.0.4",
- "directories": {
- "test": "./tests"
- },
- "repository": {
- "type": "git",
- "url": "git://github.com/mooz/org-js.git"
- },
- "_id": "org@0.0.4",
- "dist": {
- "shasum": "788b3be1a50f7c94c1500ae4d922ec76c04e06ea",
- "tarball": "https://registry.npmjs.org/org/-/org-0.0.4.tgz"
- },
- "_from": ".",
- "_npmVersion": "1.2.25",
- "_npmUser": {
- "name": "mooz",
- "email": "stillpedant@gmail.com"
- },
- "maintainers": [
- {
- "name": "mooz",
- "email": "stillpedant@gmail.com"
- }
- ]
- }
- },
- "readme": "org-js\n======\n\nParser and converter for org-mode () notation written in JavaScript.\n\nInteractive Editor\n------------------\n\nFor working example, see http://mooz.github.com/org-js/editor/.\n\nInstallation\n------------\n\n npm install org\n\nSimple example of org -> HTML conversion\n----------------------------------------\n\n```javascript\nvar org = require(\"org\");\n\nvar parser = new org.Parser();\nvar orgDocument = parser.parse(orgCode);\nvar orgHTMLDocument = orgDocument.convert(org.ConverterHTML, {\n headerOffset: 1,\n exportFromLineNumber: false,\n suppressSubScriptHandling: false,\n suppressAutoLink: false\n});\n\nconsole.dir(orgHTMLDocument); // => { title, contentHTML, tocHTML, toc }\nconsole.log(orgHTMLDocument.toString()) // => Rendered HTML\n```\n\nWriting yet another converter\n-----------------------------\n\nSee `lib/org/converter/html.js`.\n",
- "maintainers": [
- {
- "name": "mooz",
- "email": "stillpedant@gmail.com"
- }
- ],
- "time": {
- "modified": "2019-01-05T01:37:44.220Z",
- "created": "2014-01-01T15:40:31.231Z",
- "0.0.2": "2014-01-01T15:40:33.020Z",
- "0.0.3": "2014-01-01T15:55:45.497Z",
- "0.0.4": "2014-01-02T06:10:26.485Z"
- },
- "author": {
- "name": "Masafumi Oyamada",
- "email": "stillpedant@gmail.com",
- "url": "http://mooz.github.io/"
- },
- "repository": {
- "type": "git",
- "url": "git://github.com/mooz/org-js.git"
- },
- "users": {
- "nak2k": true,
- "bgschaid": true,
- "422665vijay": true,
- "nontau": true
- },
- "homepage": "http://mooz.github.com/org-js",
- "keywords": [
- "org-mode",
- "emacs",
- "parser"
- ],
- "bugs": {
- "url": "http://github.com/mooz/org-s/issues"
- },
- "readmeFilename": "README.md"
-}
diff --git a/swh/loader/package/tests/resources/replicate.npmjs.com/org_metadata_visit2.json b/swh/loader/package/tests/resources/replicate.npmjs.com/org_visit1
similarity index 100%
rename from swh/loader/package/tests/resources/replicate.npmjs.com/org_metadata_visit2.json
rename to swh/loader/package/tests/resources/replicate.npmjs.com/org_visit1
diff --git a/swh/loader/package/tests/test_npm.py b/swh/loader/package/tests/test_npm.py
index 173e381..382a472 100644
--- a/swh/loader/package/tests/test_npm.py
+++ b/swh/loader/package/tests/test_npm.py
@@ -1,442 +1,488 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import json
import os
from swh.model.hashutil import hash_to_bytes
from swh.loader.package.npm import (
parse_npm_package_author, extract_npm_package_author
)
from swh.loader.package.tests.common import DATADIR, check_snapshot
from swh.loader.package.npm import NpmLoader
def _parse_author_string_test(author_str, expected_result):
assert parse_npm_package_author(author_str) == expected_result
assert parse_npm_package_author(' %s' % author_str) == expected_result
assert parse_npm_package_author('%s ' % author_str) == expected_result
def test_parse_npm_package_author():
_parse_author_string_test(
'John Doe',
{
'name': 'John Doe'
}
)
_parse_author_string_test(
'',
{
'email': 'john.doe@foo.bar'
}
)
_parse_author_string_test(
'(https://john.doe)',
{
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'John Doe ',
{
'name': 'John Doe',
'email': 'john.doe@foo.bar'
}
)
_parse_author_string_test(
'John Doe',
{
'name': 'John Doe',
'email': 'john.doe@foo.bar'
}
)
_parse_author_string_test(
'John Doe (https://john.doe)',
{
'name': 'John Doe',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'John Doe(https://john.doe)',
{
'name': 'John Doe',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
' (https://john.doe)',
{
'email': 'john.doe@foo.bar',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'(https://john.doe) ',
{
'email': 'john.doe@foo.bar',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'John Doe (https://john.doe)',
{
'name': 'John Doe',
'email': 'john.doe@foo.bar',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'John Doe (https://john.doe) ',
{
'name': 'John Doe',
'email': 'john.doe@foo.bar',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'John Doe (https://john.doe)',
{
'name': 'John Doe',
'email': 'john.doe@foo.bar',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'John Doe(https://john.doe)',
{
'name': 'John Doe',
'email': 'john.doe@foo.bar',
'url': 'https://john.doe'
}
)
_parse_author_string_test('', {})
_parse_author_string_test('<>', {})
_parse_author_string_test(' <>', {})
_parse_author_string_test('<>()', {})
_parse_author_string_test('<> ()', {})
_parse_author_string_test('()', {})
_parse_author_string_test(' ()', {})
_parse_author_string_test(
'John Doe <> ()',
{
'name': 'John Doe'
}
)
_parse_author_string_test(
'John Doe <>',
{
'name': 'John Doe'
}
)
_parse_author_string_test(
'John Doe ()',
{
'name': 'John Doe'
}
)
def test_extract_npm_package_author():
package_metadata_filepath = os.path.join(
- DATADIR, 'replicate.npmjs.com', 'org_metadata_visit2.json')
+ DATADIR, 'replicate.npmjs.com', 'org_visit1')
with open(package_metadata_filepath) as json_file:
package_metadata = json.load(json_file)
extract_npm_package_author(package_metadata['versions']['0.0.2']) == \
{
'fullname': b'mooz ',
'name': b'mooz',
'email': b'stillpedant@gmail.com'
}
assert (
extract_npm_package_author(package_metadata['versions']['0.0.3']) ==
{
'fullname': b'Masafumi Oyamada ',
'name': b'Masafumi Oyamada',
'email': b'stillpedant@gmail.com'
}
)
package_json = json.loads('''
{
"name": "highlightjs-line-numbers.js",
"version": "2.7.0",
"description": "Highlight.js line numbers plugin.",
"main": "src/highlightjs-line-numbers.js",
"dependencies": {},
"devDependencies": {
"gulp": "^4.0.0",
"gulp-rename": "^1.4.0",
"gulp-replace": "^0.6.1",
"gulp-uglify": "^1.2.0"
},
"repository": {
"type": "git",
"url": "https://github.com/wcoder/highlightjs-line-numbers.js.git"
},
"author": "Yauheni Pakala ",
"license": "MIT",
"bugs": {
"url": "https://github.com/wcoder/highlightjs-line-numbers.js/issues"
},
"homepage": "http://wcoder.github.io/highlightjs-line-numbers.js/"
}''') # noqa
assert extract_npm_package_author(package_json) == \
{
'fullname': b'Yauheni Pakala ',
'name': b'Yauheni Pakala',
'email': b'evgeniy.pakalo@gmail.com'
}
package_json = json.loads('''
{
"name": "3-way-diff",
"version": "0.0.1",
"description": "3-way diffing of JavaScript objects",
"main": "index.js",
"authors": [
{
"name": "Shawn Walsh",
"url": "https://github.com/shawnpwalsh"
},
{
"name": "Markham F Rollins IV",
"url": "https://github.com/mrollinsiv"
}
],
"keywords": [
"3-way diff",
"3 way diff",
"three-way diff",
"three way diff"
],
"devDependencies": {
"babel-core": "^6.20.0",
"babel-preset-es2015": "^6.18.0",
"mocha": "^3.0.2"
},
"dependencies": {
"lodash": "^4.15.0"
}
}''')
assert extract_npm_package_author(package_json) == \
{
'fullname': b'Shawn Walsh',
'name': b'Shawn Walsh',
'email': None
}
package_json = json.loads('''
{
"name": "yfe-ynpm",
"version": "1.0.0",
"homepage": "http://gitlab.ywwl.com/yfe/yfe-ynpm",
"repository": {
"type": "git",
"url": "git@gitlab.ywwl.com:yfe/yfe-ynpm.git"
},
"author": [
"fengmk2 (https://fengmk2.com)",
"xufuzi (https://7993.org)"
],
"license": "MIT"
}''')
assert extract_npm_package_author(package_json) == \
{
'fullname': b'fengmk2 ',
'name': b'fengmk2',
'email': b'fengmk2@gmail.com'
}
package_json = json.loads('''
{
"name": "umi-plugin-whale",
"version": "0.0.8",
"description": "Internal contract component",
"authors": {
"name": "xiaohuoni",
"email": "448627663@qq.com"
},
"repository": "alitajs/whale",
"devDependencies": {
"np": "^3.0.4",
"umi-tools": "*"
},
"license": "MIT"
}''')
assert extract_npm_package_author(package_json) == \
{
'fullname': b'xiaohuoni <448627663@qq.com>',
'name': b'xiaohuoni',
'email': b'448627663@qq.com'
}
def normalize_hashes(hashes):
if isinstance(hashes, str):
return hash_to_bytes(hashes)
if isinstance(hashes, list):
return [hash_to_bytes(x) for x in hashes]
return {hash_to_bytes(k): hash_to_bytes(v) for k, v in hashes.items()}
_expected_new_contents_first_visit = normalize_hashes([
'4ce3058e16ab3d7e077f65aabf855c34895bf17c',
'858c3ceee84c8311adc808f8cdb30d233ddc9d18',
'0fa33b4f5a4e0496da6843a38ff1af8b61541996',
'85a410f8ef8eb8920f2c384a9555566ad4a2e21b',
'9163ac8025923d5a45aaac482262893955c9b37b',
'692cf623b8dd2c5df2c2998fd95ae4ec99882fb4',
'18c03aac6d3e910efb20039c15d70ab5e0297101',
'41265c42446aac17ca769e67d1704f99e5a1394d',
'783ff33f5882813dca9239452c4a7cadd4dba778',
'b029cfb85107aee4590c2434a3329bfcf36f8fa1',
'112d1900b4c2e3e9351050d1b542c9744f9793f3',
'5439bbc4bd9a996f1a38244e6892b71850bc98fd',
'd83097a2f994b503185adf4e719d154123150159',
'd0939b4898e83090ee55fd9d8a60e312cfadfbaf',
'b3523a26f7147e4af40d9d462adaae6d49eda13e',
'cd065fb435d6fb204a8871bcd623d0d0e673088c',
'2854a40855ad839a54f4b08f5cff0cf52fca4399',
'b8a53bbaac34ebb8c6169d11a4b9f13b05c583fe',
'0f73d56e1cf480bded8a1ecf20ec6fc53c574713',
'0d9882b2dfafdce31f4e77fe307d41a44a74cefe',
'585fc5caab9ead178a327d3660d35851db713df1',
'e8cd41a48d79101977e3036a87aeb1aac730686f',
'5414efaef33cceb9f3c9eb5c4cc1682cd62d14f7',
'9c3cc2763bf9e9e37067d3607302c4776502df98',
'3649a68410e354c83cd4a38b66bd314de4c8f5c9',
'e96ed0c091de1ebdf587104eaf63400d1974a1fe',
'078ca03d2f99e4e6eab16f7b75fbb7afb699c86c',
'38de737da99514de6559ff163c988198bc91367a',
])
_expected_new_directories_first_visit = normalize_hashes([
'3370d20d6f96dc1c9e50f083e2134881db110f4f',
'42753c0c2ab00c4501b552ac4671c68f3cf5aece',
'd7895533ef5edbcffdea3f057d9fef3a1ef845ce',
'80579be563e2ef3e385226fe7a3f079b377f142c',
'3b0ddc6a9e58b4b53c222da4e27b280b6cda591c',
'bcad03ce58ac136f26f000990fc9064e559fe1c0',
'5fc7e82a1bc72e074665c6078c6d3fad2f13d7ca',
'e3cd26beba9b1e02f6762ef54bd9ac80cc5f25fd',
'584b5b4b6cf7f038095e820b99386a9c232de931',
'184c8d6d0d242f2b1792ef9d3bf396a5434b7f7a',
'bb5f4ee143c970367eb409f2e4c1104898048b9d',
'1b95491047add1103db0dfdfa84a9735dcb11e88',
'a00c6de13471a2d66e64aca140ddb21ef5521e62',
'5ce6c1cd5cda2d546db513aaad8c72a44c7771e2',
'c337091e349b6ac10d38a49cdf8c2401ef9bb0f2',
'202fafcd7c0f8230e89d5496ad7f44ab12b807bf',
'775cc516543be86c15c1dc172f49c0d4e6e78235',
'ff3d1ead85a14f891e8b3fa3a89de39db1b8de2e',
])
_expected_new_revisions_first_visit = normalize_hashes({
'd8a1c7474d2956ac598a19f0f27d52f7015f117e':
'42753c0c2ab00c4501b552ac4671c68f3cf5aece',
'5f9eb78af37ffd12949f235e86fac04898f9f72a':
'3370d20d6f96dc1c9e50f083e2134881db110f4f',
'ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a':
'd7895533ef5edbcffdea3f057d9fef3a1ef845ce'}
)
_expected_new_snapshot_first_visit_id = normalize_hashes(
'd0587e1195aed5a8800411a008f2f2d627f18e2d')
_expected_branches_first_visit = {
'HEAD': {
'target': 'releases/0.0.4',
'target_type': 'alias'
},
'releases/0.0.2': {
'target': 'd8a1c7474d2956ac598a19f0f27d52f7015f117e',
'target_type': 'revision'
},
'releases/0.0.3': {
'target': '5f9eb78af37ffd12949f235e86fac04898f9f72a',
'target_type': 'revision'
},
'releases/0.0.4': {
'target': 'ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a',
'target_type': 'revision'
}
}
def package_url(package):
return 'https://www.npmjs.com/package/%s' % package
def package_metadata_url(package):
return 'https://replicate.npmjs.com/%s/' % package
-def test_npm_loader_2_first_visit(swh_config, local_get):
+def test_npm_loader_first_visit(swh_config, local_get):
package = 'org'
loader = NpmLoader(package,
package_url(package),
package_metadata_url(package))
actual_load_status = loader.load()
assert actual_load_status == {'status': 'eventful'}
stats = loader.storage.stat_counters()
assert {
'content': len(_expected_new_contents_first_visit),
'directory': len(_expected_new_directories_first_visit),
'origin': 1,
'origin_visit': 1,
'person': 2,
'release': 0,
'revision': len(_expected_new_revisions_first_visit),
'skipped_content': 0,
'snapshot': 1,
} == stats
assert len(list(loader.storage.content_get(
_expected_new_contents_first_visit))) == len(
_expected_new_contents_first_visit)
assert list(loader.storage.directory_missing(
_expected_new_directories_first_visit)) == []
assert list(loader.storage.revision_missing(
_expected_new_revisions_first_visit)) == []
expected_snapshot = {
'id': _expected_new_snapshot_first_visit_id,
'branches': _expected_branches_first_visit,
}
check_snapshot(expected_snapshot, loader.storage)
+
+
+def test_npm_loader_incremental_visit(swh_config, local_get_visits):
+ package = 'org'
+ origin_url = package_url(package)
+ metadata_url = package_metadata_url(package)
+ loader = NpmLoader(package, origin_url, metadata_url)
+ print(origin_url)
+ print(metadata_url)
+
+ actual_load_status = loader.load()
+
+ assert actual_load_status == {'status': 'eventful'}
+
+ stats = loader.storage.stat_counters()
+
+ assert {
+ 'content': len(_expected_new_contents_first_visit),
+ 'directory': len(_expected_new_directories_first_visit),
+ 'origin': 1,
+ 'origin_visit': 1,
+ 'person': 2,
+ 'release': 0,
+ 'revision': len(_expected_new_revisions_first_visit),
+ 'skipped_content': 0,
+ 'snapshot': 1,
+ } == stats
+
+ loader._info = None # reset loader internal state
+ actual_load_status2 = loader.load()
+
+ assert actual_load_status2 == {'status': 'eventful'}
+
+ stats = loader.storage.stat_counters()
+
+ assert { # 3 new releases artifacts
+ 'content': len(_expected_new_contents_first_visit) + 14,
+ 'directory': len(_expected_new_directories_first_visit) + 15,
+ 'origin': 1,
+ 'origin_visit': 2,
+ 'person': 2,
+ 'release': 0,
+ 'revision': len(_expected_new_revisions_first_visit) + 3,
+ 'skipped_content': 0,
+ 'snapshot': 2,
+ } == stats