Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9341327
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
10 KB
Subscribers
None
View Options
diff --git a/swh/scanner/logger.py b/swh/scanner/logger.py
deleted file mode 100644
index b45e7e3..0000000
--- a/swh/scanner/logger.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright (C) 2020 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import logging
-
-
-logger = None
-
-
-def init(**kwargs):
- def decorate(func):
- for k in kwargs:
- setattr(func, k, kwargs[k])
- return func
- return decorate
-
-
-def setup_logger(verbose: bool) -> None:
- global logger
- console = logging.FileHandler('scan.log')
- console.setLevel(logging.DEBUG)
- formatter = logging.Formatter('%(asctime)s | %(levelname)s: %(message)s')
- console.setFormatter(formatter)
-
- logger = logging.getLogger('debug')
- logger.addHandler(console)
- if not verbose:
- logger.propagate = False
-
-
-@init(count=0)
-def log_queries(n: int) -> None:
- if logger is not None:
- log_queries.count += n
-
-
-def log_counters() -> None:
- if logger is not None:
- logger.info('number of queries: %s' % log_queries.count)
-
-
-def error(*args) -> None:
- if logger is not None:
- logger.error(args)
-
-
-def warning(*args) -> None:
- if logger is not None:
- logger.warning(args)
-
-
-def info(*args) -> None:
- if logger is not None:
- logger.info(args)
-
-
-def debug(*args):
- if logger is not None:
- logger.debug(args)
diff --git a/swh/scanner/model.py b/swh/scanner/model.py
index c46e618..c4a3d56 100644
--- a/swh/scanner/model.py
+++ b/swh/scanner/model.py
@@ -1,76 +1,84 @@
# Copyright (C) 2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from __future__ import annotations
+import sys
from pathlib import PosixPath
from typing import Any, Dict
from enum import Enum
from swh.model.identifiers import (
DIRECTORY, CONTENT
)
class Color(Enum):
blue = '\033[94m'
green = '\033[92m'
red = '\033[91m'
end = '\033[0m'
def colorize(text: str, color: Color):
return color.value + text + Color.end.value
class Tree:
"""Representation of a file system structure
"""
- def __init__(self, father: Tree, path: PosixPath):
+ def __init__(self, path: PosixPath, father: Tree = None):
self.father = father
self.path = path
self.otype = DIRECTORY if path.is_dir() else CONTENT
self.pid = ''
self.children: Dict[PosixPath, Tree] = {}
def addNode(self, path: PosixPath, pid: str = None) -> None:
"""Recursively add a new node path
"""
relative_path = path.relative_to(self.path)
if relative_path == PosixPath('.'):
if pid is not None:
self.pid = pid
return
new_path = self.path.joinpath(relative_path.parts[0])
if new_path not in self.children:
- self.children[new_path] = Tree(self, new_path)
+ self.children[new_path] = Tree(new_path, self)
self.children[new_path].addNode(path, pid)
def show(self) -> None:
"""Print all the tree"""
- print(Color.blue.value+str(self.path)+Color.end.value)
- self.printChildren()
+ isatty = sys.stdout.isatty()
- def printChildren(self, inc: int = 0) -> None:
+ print(colorize(str(self.path), Color.blue) if isatty
+ else str(self.path))
+ self.printChildren(isatty)
+
+ def printChildren(self, isatty: bool, inc: int = 0) -> None:
for path, node in self.children.items():
- self.printNode(node, inc)
+ self.printNode(node, isatty, inc)
if node.children:
- node.printChildren(inc+1)
+ node.printChildren(isatty, inc+1)
- def printNode(self, node: Any, inc: int) -> None:
+ def printNode(self, node: Any, isatty: bool, inc: int) -> None:
rel_path = str(node.path.relative_to(self.path))
+ print('│ '*inc, end='')
if node.otype == DIRECTORY:
if node.pid:
- print('│ '*inc + colorize(rel_path, Color.blue) + '/')
+ print(colorize(rel_path, Color.blue) if isatty else rel_path,
+ end='')
else:
- print('│ '*inc + colorize(rel_path, Color.red) + '/')
+ print(colorize(rel_path, Color.red) if isatty else rel_path,
+ end='')
+ print('/')
- if node.otype == CONTENT:
+ elif node.otype == CONTENT:
if node.pid:
- print('│ '*inc + colorize(rel_path, Color.green))
+ print(colorize(rel_path, Color.green) if isatty else rel_path)
else:
- print('│ '*inc + colorize(rel_path, Color.red))
+ print(colorize(rel_path, Color.red) if isatty else rel_path)
diff --git a/swh/scanner/scanner.py b/swh/scanner/scanner.py
index 11065e2..a8b7270 100644
--- a/swh/scanner/scanner.py
+++ b/swh/scanner/scanner.py
@@ -1,146 +1,145 @@
# Copyright (C) 2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import itertools
import asyncio
import aiohttp
-from typing import List, Dict, Tuple, Generator, Iterator
+from typing import List, Dict, Tuple, Iterator
from pathlib import PosixPath
-from .logger import log_queries
from .exceptions import APIError
from .model import Tree
from swh.model.cli import pid_of_file, pid_of_dir
from swh.model.identifiers import (
parse_persistent_identifier,
DIRECTORY, CONTENT
)
async def pids_discovery(
- pids: List[str], session: aiohttp.ClientSession, url: str,
+ pids: List[str], session: aiohttp.ClientSession, api_url: str,
) -> Dict[str, Dict[str, bool]]:
"""API Request to get information about the persistent identifiers given in
input.
Args:
pids: a list of persistent identifier
+ api_url: url for the API request
Returns:
A dictionary with:
key: persistent identifier searched
value:
value['known'] = True if the pid is found
value['known'] = False if the pid is not found
"""
- endpoint = url + '/api/1/known/'
+ endpoint = api_url + 'known/'
chunk_size = 1000
requests = []
- log_queries(len(pids))
-
def get_chunk(pids):
for i in range(0, len(pids), chunk_size):
yield pids[i:i + chunk_size]
async def make_request(pids):
async with session.post(endpoint, json=pids) as resp:
if resp.status != 200:
error_message = '%s with given values %s' % (
resp.text, str(pids))
raise APIError(error_message)
+
return await resp.json()
if len(pids) > chunk_size:
for pids_chunk in get_chunk(pids):
requests.append(asyncio.create_task(
make_request(pids_chunk)))
res = await asyncio.gather(*requests)
# concatenate list of dictionaries
return dict(itertools.chain.from_iterable(e.items() for e in res))
else:
return await make_request(pids)
def get_subpaths(
- path: PosixPath) -> Generator[Tuple[PosixPath, str], None, None]:
+ path: PosixPath) -> Iterator[Tuple[PosixPath, str]]:
"""Find the persistent identifier of the directories and files under a
given path.
Args:
path: the root path
Yields:
pairs of: path, the relative persistent identifier
"""
def pid_of(path):
if path.is_dir():
return pid_of_dir(bytes(path))
elif path.is_file():
return pid_of_file(bytes(path))
dirpath, dnames, fnames = next(os.walk(path))
for node in itertools.chain(dnames, fnames):
sub_path = PosixPath(dirpath).joinpath(node)
yield (sub_path, pid_of(sub_path))
async def parse_path(
- path: PosixPath, session: aiohttp.ClientSession, url: str
+ path: PosixPath, session: aiohttp.ClientSession, api_url: str
) -> Iterator[Tuple[str, str, bool]]:
"""Check if the sub paths of the given path are present in the
archive or not.
Args:
path: the source path
- url: url for the API request
+ api_url: url for the API request
Returns:
a map containing tuples with: a subpath of the given path,
the pid of the subpath and the result of the api call
"""
parsed_paths = dict(get_subpaths(path))
parsed_pids = await pids_discovery(
- list(parsed_paths.values()), session, url)
+ list(parsed_paths.values()), session, api_url)
def unpack(tup):
subpath, pid = tup
return (subpath, pid, parsed_pids[pid]['known'])
return map(unpack, parsed_paths.items())
async def run(
- root: PosixPath, url: str, source_tree: Tree) -> None:
+ root: PosixPath, api_url: str, source_tree: Tree) -> None:
"""Start scanning from the given root.
- It fill the source tree with the path discovered.
+ It fills the source tree with the path discovered.
Args:
root: the root path to scan
- url: url for the API request
+ api_url: url for the API request
"""
- async def _scan(root, session, url, source_tree):
- for path, pid, found in await parse_path(root, session, url):
+ async def _scan(root, session, api_url, source_tree):
+ for path, pid, found in await parse_path(root, session, api_url):
obj_type = parse_persistent_identifier(pid).object_type
if obj_type == CONTENT:
source_tree.addNode(path, pid if found else None)
elif obj_type == DIRECTORY:
if found:
source_tree.addNode(path, pid)
else:
source_tree.addNode(path)
- await _scan(path, session, url, source_tree)
+ await _scan(path, session, api_url, source_tree)
async with aiohttp.ClientSession() as session:
- await _scan(root, session, url, source_tree)
+ await _scan(root, session, api_url, source_tree)
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Fri, Jul 4, 11:56 AM (3 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3273930
Attached To
rDTSCN Code scanner
Event Timeline
Log In to Comment