Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7163512
D2657.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
D2657.diff
View Options
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -39,13 +39,13 @@
# Full sample:
# https://forge.softwareheritage.org/diffusion/DCORE/browse/master/setup.py
setup(
- name='swh.<module-name>', # example: swh.loader.pypi
- description='Software Heritage <Module\'s intent>',
+ name='swh.scanner',
+ description='Software Heritage code scanner',
long_description=long_description,
long_description_content_type='text/markdown',
author='Software Heritage developers',
author_email='swh-devel@inria.fr',
- url='https://forge.softwareheritage.org/diffusion/<module-git-code>',
+ url='https://forge.softwareheritage.org/diffusion/DTSCN/',
packages=find_packages(), # packages's modules
install_requires=parse_requirements() + parse_requirements('swh'),
tests_require=parse_requirements('test'),
@@ -55,7 +55,7 @@
include_package_data=True,
entry_points='''
[swh.cli.subcommands]
- <cli-name>=swh.<module>.cli:cli
+ scanner=swh.scanner.cli:scanner
''',
classifiers=[
"Programming Language :: Python :: 3",
@@ -67,6 +67,6 @@
project_urls={
'Bug Reports': 'https://forge.softwareheritage.org/maniphest',
'Funding': 'https://www.softwareheritage.org/donate',
- 'Source': 'https://forge.softwareheritage.org/source/swh-<module>',
+ 'Source': 'https://forge.softwareheritage.org/source/swh-scanner',
},
)
diff --git a/swh/foo/bar.py b/swh/foo/bar.py
deleted file mode 100644
--- a/swh/foo/bar.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (C) 2019 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
diff --git a/swh/foo/cli.py b/swh/foo/cli.py
deleted file mode 100644
--- a/swh/foo/cli.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import click
-
-from swh.core.cli import CONTEXT_SETTINGS
-
-
-@click.group(name='foo', context_settings=CONTEXT_SETTINGS)
-@click.pass_context
-def cli(ctx):
- """Foo main command.
- """
-
-
-@cli.command()
-@click.option('--bar', help='Something')
-@click.pass_context
-def bar(ctx, bar):
- '''Do something.'''
- click.echo('bar')
diff --git a/swh/foo/py.typed b/swh/foo/py.typed
deleted file mode 100644
--- a/swh/foo/py.typed
+++ /dev/null
@@ -1 +0,0 @@
-# Marker file for PEP 561.
diff --git a/swh/foo/__init__.py b/swh/scanner/__init__.py
rename from swh/foo/__init__.py
rename to swh/scanner/__init__.py
diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py
new file mode 100644
--- /dev/null
+++ b/swh/scanner/cli.py
@@ -0,0 +1,38 @@
+# Copyright (C) 2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import click
+
+from swh.core.cli import CONTEXT_SETTINGS
+from swh.scanner.scanner import run
+
+
+@click.group(name='scanner', context_settings=CONTEXT_SETTINGS)
+@click.pass_context
+def scanner(ctx):
+ '''Software Heritage Scanner tools.'''
+ pass
+
+
+@scanner.command(name='scan')
+@click.argument('path', required=True)
+@click.option('--host', '-h', default='localhost',
+ metavar='IP', show_default=True,
+ help="web api endpoint ip")
+@click.option('--port', '-p', default='5080',
+ metavar='PORT', show_default=True,
+ help="web api endpoint port")
+@click.pass_context
+def scan(ctx, path, host, port):
+ result = run(path, host, port)
+ print(result)
+
+
+def main():
+ return scanner(auto_envvar_prefix='SWH_SCANNER')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/swh/scanner/scanner.py b/swh/scanner/scanner.py
new file mode 100644
--- /dev/null
+++ b/swh/scanner/scanner.py
@@ -0,0 +1,108 @@
+# Copyright (C) 2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import requests
+import os
+import json
+import itertools
+from pathlib import PosixPath
+
+from swh.model.cli import pid_of_file, pid_of_dir
+from swh.model.identifiers import (
+ parse_persistent_identifier,
+ DIRECTORY, CONTENT
+)
+
+
+def pids_discovery(pids, host, port):
+ """
+ Args:
+ pids list(str): A list of persistent identifier
+ Returns:
+ A dictionary with:
+ key(str): persistent identifier
+ value(dict):
+ value['known'] = True if pid is found
+ value['known'] = False if pid is not found
+ """
+ endpoint = 'http://%s:%s/api/1/known/' % (host, port)
+ req = requests.post(endpoint, json=pids)
+ resp = req.text
+ return json.loads(resp)
+
+
+def get_sub_paths(path):
+ """Find the persistent identifier of the paths and files under
+ a given path.
+
+ Args:
+ path(PosixPath): the entry root
+
+ Yields:
+ tuple(path, pid): pairs of path and the relative persistent
+ identifier
+ """
+ def pid_of(path):
+ if path.is_dir():
+ return pid_of_dir(bytes(path))
+ elif path.is_file():
+ return pid_of_file(bytes(path))
+
+ dirpath, dnames, fnames = next(os.walk(path))
+ for node in itertools.chain(dnames, fnames):
+ path = PosixPath(dirpath).joinpath(node)
+ yield (path, pid_of(path))
+
+
+def parse_path(path, host, port):
+ """Check if the sub paths of the given path is present in the
+ archive or not.
+ Args:
+ path(PosixPath): The source path
+ host(str): ip for the api request
+ port(str): port for the api request
+ Yields:
+ a tuple with the path found, the persistent identifier
+ relative to the path and a boolean: False if not found,
+ True if found.
+ """
+ pid_map = dict(get_sub_paths(path))
+ parsed_pids = pids_discovery(list(pid_map.values()), host, port)
+
+ for sub_path, pid in pid_map.items():
+ yield (sub_path, pid, parsed_pids[pid]['known'])
+
+
+def run(root, host, port):
+ """Scan the given root
+ Args:
+ path: the path to scan
+ host(str): ip for the api request
+ port(str): port for the api request
+ Returns:
+ A set containing pairs of the path discovered and the
+ relative persistent identifier
+ """
+ def _scan(root, host, port, accum):
+ assert root not in accum
+
+ next_paths = []
+ for path, pid, found in parse_path(root, host, port):
+ obj_type = parse_persistent_identifier(pid).object_type
+
+ if obj_type == CONTENT and found:
+ accum.add((str(path), pid))
+ elif obj_type == DIRECTORY:
+ if found:
+ accum.add((str(path), pid))
+ else:
+ next_paths.append(path)
+
+ for new_path in next_paths:
+ accum = _scan(new_path, host, port, accum)
+
+ return accum
+
+ return _scan(root, host, port, set())
diff --git a/swh/foo/tests/__init__.py b/swh/scanner/tests/__init__.py
rename from swh/foo/tests/__init__.py
rename to swh/scanner/tests/__init__.py
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jan 30, 9:44 AM (3 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223231
Attached To
D2657: code scanner prototype
Event Timeline
Log In to Comment