Page MenuHomeSoftware Heritage

D6435.id23373.diff
No OneTemporary

D6435.id23373.diff

diff --git a/swh/scanner/client.py b/swh/scanner/client.py
--- a/swh/scanner/client.py
+++ b/swh/scanner/client.py
@@ -16,7 +16,7 @@
import aiohttp
-from swh.model.identifiers import CoreSWHID
+from swh.model.swhids import CoreSWHID
from .exceptions import error_response
diff --git a/swh/scanner/data.py b/swh/scanner/data.py
--- a/swh/scanner/data.py
+++ b/swh/scanner/data.py
@@ -8,7 +8,7 @@
from swh.model.exceptions import ValidationError
from swh.model.from_disk import Directory
-from swh.model.identifiers import CONTENT, DIRECTORY, CoreSWHID
+from swh.model.swhids import CoreSWHID
from .client import Client
@@ -60,11 +60,11 @@
node_ori = await client.get_origin(node.swhid())
if node_ori:
data[node.swhid()]["origin"] = node_ori
- if node.object_type == DIRECTORY:
+ if node.object_type == "directory":
for sub_node in node.iter_tree():
data[sub_node.swhid()]["origin"] = node_ori # type: ignore
else:
- if node.object_type == DIRECTORY:
+ if node.object_type == "directory":
children = [sub_node for sub_node in node.iter_tree()]
children.remove(node)
queue.extend(children) # type: ignore
@@ -88,7 +88,7 @@
):
directories = list(
filter(
- lambda n: n.object_type == DIRECTORY,
+ lambda n: n.object_type == "directory",
map(lambda n: n[1], source_tree.items()),
)
)
@@ -112,7 +112,7 @@
"""
known_cnt = 0
node_contents = list(
- filter(lambda n: n.object_type == CONTENT, map(lambda n: n[1], node.items()))
+ filter(lambda n: n.object_type == "content", map(lambda n: n[1], node.items()))
)
for sub_node in node_contents:
if nodes_data[sub_node.swhid()]["known"]:
@@ -137,7 +137,7 @@
directory = source_tree[node_path if node_path != source_tree.data["path"] else b""]
node_contents = list(
filter(
- lambda n: n.object_type == CONTENT, map(lambda n: n[1], directory.items())
+ lambda n: n.object_type == "content", map(lambda n: n[1], directory.items())
)
)
files_data = {}
diff --git a/swh/scanner/db.py b/swh/scanner/db.py
--- a/swh/scanner/db.py
+++ b/swh/scanner/db.py
@@ -13,18 +13,14 @@
from io import TextIOWrapper
import logging
from pathlib import Path
-import re
import sqlite3
from typing import Iterable
from swh.core.utils import grouper
+from swh.model.swhids import SWHID_RE
from .exceptions import DBError
-# XXX copied and simplified from swh.model.identifiers (WIP), replace this in favor of
-# swh.model.identifiers.SWHID_RE when it is landed there
-SWHID_RE = re.compile("^swh:1:(ori|snp|rel|rev|dir|cnt):[0-9a-f]{40}$")
-
class Db:
"""Local database interface"""
diff --git a/swh/scanner/policy.py b/swh/scanner/policy.py
--- a/swh/scanner/policy.py
+++ b/swh/scanner/policy.py
@@ -8,7 +8,6 @@
from swh.core.utils import grouper
from swh.model.from_disk import Directory
-from swh.model.identifiers import CONTENT, DIRECTORY
from .client import QUERY_LIMIT, Client
from .data import MerkleNodeInfo
@@ -56,7 +55,7 @@
self.data[node.swhid()]["known"] = swhids_res[str(node.swhid())][
"known"
]
- if node.object_type == DIRECTORY:
+ if node.object_type == "directory":
if not self.data[node.swhid()]["known"]:
children = [n[1] for n in list(node.items())]
queue.extend(children)
@@ -81,7 +80,7 @@
seen.append(node)
if len(seen) == ssize:
return
- if node.object_type == DIRECTORY and self.data[node.swhid()]["known"]:
+ if node.object_type == "directory" and self.data[node.swhid()]["known"]:
sub_nodes = [n for n in node.iter_tree(dedup=False)]
sub_nodes.remove(node) # remove root node
for sub_node in sub_nodes:
@@ -117,7 +116,7 @@
# get all the files
all_contents = list(
filter(
- lambda node: node.object_type == CONTENT, self.source_tree.iter_tree()
+ lambda node: node.object_type == "content", self.source_tree.iter_tree()
)
)
all_contents.reverse() # check deepest node first
@@ -139,7 +138,7 @@
# (update children directories accordingly)
unset_dirs = list(
filter(
- lambda node: node.object_type == DIRECTORY
+ lambda node: node.object_type == "directory"
and self.data[node.swhid()]["known"] is None,
self.source_tree.iter_tree(),
)
@@ -155,7 +154,7 @@
if dir_known:
sub_dirs = list(
filter(
- lambda n: n.object_type == DIRECTORY
+ lambda n: n.object_type == "directory"
and self.data[n.swhid()]["known"] is None,
dir_.iter_tree(),
)
@@ -177,7 +176,8 @@
# get all directory contents that have at least one file content
unknown_dirs = list(
filter(
- lambda dir_: dir_.object_type == DIRECTORY and self.has_contents(dir_),
+ lambda dir_: dir_.object_type == "directory"
+ and self.has_contents(dir_),
self.source_tree.iter_tree(),
)
)
@@ -202,7 +202,7 @@
# get remaining directories that have no file contents
empty_dirs = list(
filter(
- lambda n: n.object_type == DIRECTORY
+ lambda n: n.object_type == "directory"
and not self.has_contents(n)
and self.data[n.swhid()]["known"] is None,
self.source_tree.iter_tree(),
@@ -220,7 +220,7 @@
# check unknown file contents
unknown_cnts = list(
filter(
- lambda n: n.object_type == CONTENT
+ lambda n: n.object_type == "content"
and self.data[n.swhid()]["known"] is None,
self.source_tree.iter_tree(),
)
@@ -243,7 +243,7 @@
def get_contents(self, dir_: Directory):
"""Get all the contents of a given directory"""
for _, node in list(dir_.items()):
- if node.object_type == CONTENT:
+ if node.object_type == "content":
yield node
diff --git a/swh/scanner/tests/test_client.py b/swh/scanner/tests/test_client.py
--- a/swh/scanner/tests/test_client.py
+++ b/swh/scanner/tests/test_client.py
@@ -7,7 +7,7 @@
import pytest
-from swh.model.identifiers import CoreSWHID
+from swh.model.swhids import CoreSWHID
from swh.scanner.client import Client
from swh.scanner.exceptions import APIError
diff --git a/swh/scanner/tests/test_dashboard.py b/swh/scanner/tests/test_dashboard.py
--- a/swh/scanner/tests/test_dashboard.py
+++ b/swh/scanner/tests/test_dashboard.py
@@ -5,7 +5,7 @@
import dash_html_components as html
-from swh.model.identifiers import CoreSWHID, ObjectType
+from swh.model.swhids import CoreSWHID, ObjectType
from swh.scanner.dashboard.dashboard import generate_table_body
from swh.scanner.data import MerkleNodeInfo
diff --git a/swh/scanner/tests/test_policy.py b/swh/scanner/tests/test_policy.py
--- a/swh/scanner/tests/test_policy.py
+++ b/swh/scanner/tests/test_policy.py
@@ -7,7 +7,7 @@
from flask import url_for
import pytest
-from swh.model.identifiers import CONTENT, CoreSWHID, ObjectType
+from swh.model.swhids import CoreSWHID, ObjectType
from swh.scanner.client import Client
from swh.scanner.data import MerkleNodeInfo, init_merkle_node_info
from swh.scanner.policy import (
@@ -145,4 +145,4 @@
)
]
assert len(chunks) == 2
- assert chunks[1][-1].object_type == CONTENT
+ assert chunks[1][-1].object_type == "content"

File Metadata

Mime Type
text/plain
Expires
Dec 21 2024, 9:56 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218744

Event Timeline