diff --git a/swh/scanner/scanner.py b/swh/scanner/scanner.py --- a/swh/scanner/scanner.py +++ b/swh/scanner/scanner.py @@ -88,7 +88,11 @@ dirpath, dnames, fnames = next(os.walk(path)) for node in itertools.chain(dnames, fnames): sub_path = PosixPath(dirpath).joinpath(node) - yield (sub_path, pid_of(sub_path)) + sub_path_pid = pid_of(sub_path) + # ensure the persistent identifier exists + # for example, a broken link won't return a pid + if sub_path_pid: + yield (sub_path, sub_path_pid) async def parse_path(