Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/svn/loader.py
Show All 40 Lines | |||||
from .exception import SvnLoaderHistoryAltered, SvnLoaderUneventful | from .exception import SvnLoaderHistoryAltered, SvnLoaderUneventful | ||||
from .utils import ( | from .utils import ( | ||||
OutputStream, | OutputStream, | ||||
init_svn_repo_from_archive_dump, | init_svn_repo_from_archive_dump, | ||||
init_svn_repo_from_dump, | init_svn_repo_from_dump, | ||||
) | ) | ||||
DEFAULT_BRANCH = b"HEAD" | DEFAULT_BRANCH = b"HEAD" | ||||
TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.svn." | TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.svn." | ||||
SUBVERSION_ERROR = re.compile(r".*(E[0-9]{6}):.*") | |||||
SUBVERSION_NOT_FOUND = "E170013" | |||||
anlambert: I was rather thinking of capturing all error codes and messages, with the following regex. | |||||
class SvnLoader(BaseLoader): | class SvnLoader(BaseLoader): | ||||
"""Swh svn loader. | """Swh svn loader. | ||||
The repository is either remote or local. The loader deals with | The repository is either remote or local. The loader deals with | ||||
update on an already previously loaded repository. | update on an already previously loaded repository. | ||||
""" | """ | ||||
▲ Show 20 Lines • Show All 593 Lines • ▼ Show 20 Lines | def get_last_loaded_svn_rev(self, svn_url: str) -> int: | ||||
return svn_revision | return svn_revision | ||||
def dump_svn_revisions(self, svn_url, last_loaded_svn_rev=-1): | def dump_svn_revisions(self, svn_url, last_loaded_svn_rev=-1): | ||||
""" | """ | ||||
Generate a subversion dump file using the svnrdump tool. | Generate a subversion dump file using the svnrdump tool. | ||||
If the svnrdump command failed somehow, | If the svnrdump command failed somehow, | ||||
the produced dump file is analyzed to determine if a partial | the produced dump file is analyzed to determine if a partial | ||||
loading is still feasible. | loading is still feasible. | ||||
Raises: | |||||
NotFound when the repository is no longer found at url | |||||
""" | """ | ||||
# Build the svnrdump command line | # Build the svnrdump command line | ||||
svnrdump_cmd = ["svnrdump", "dump", svn_url] | svnrdump_cmd = ["svnrdump", "dump", svn_url] | ||||
Not Done Inline ActionsWe can declare list of error codes and messages here: error_codes = [] error_messages = [] anlambert: We can declare list of error codes and messages here:
```lang=python
error_codes = []… | |||||
# Launch the svnrdump command while capturing stderr as | # Launch the svnrdump command while capturing stderr as | ||||
# successfully dumped revision numbers are printed to it | # successfully dumped revision numbers are printed to it | ||||
dump_temp_dir = tempfile.mkdtemp(dir=self.temp_dir) | dump_temp_dir = tempfile.mkdtemp(dir=self.temp_dir) | ||||
dump_name = "".join(c for c in svn_url if c.isalnum()) | dump_name = "".join(c for c in svn_url if c.isalnum()) | ||||
dump_path = "%s/%s.svndump" % (dump_temp_dir, dump_name) | dump_path = "%s/%s.svndump" % (dump_temp_dir, dump_name) | ||||
stderr_lines = [] | stderr_lines = [] | ||||
self.log.debug("Executing %s" % " ".join(svnrdump_cmd)) | self.log.debug("Executing %s" % " ".join(svnrdump_cmd)) | ||||
with open(dump_path, "wb") as dump_file: | with open(dump_path, "wb") as dump_file: | ||||
stderr_r, stderr_w = pty.openpty() | stderr_r, stderr_w = pty.openpty() | ||||
svnrdump = Popen(svnrdump_cmd, stdout=dump_file, stderr=stderr_w) | svnrdump = Popen(svnrdump_cmd, stdout=dump_file, stderr=stderr_w) | ||||
os.close(stderr_w) | os.close(stderr_w) | ||||
stderr_stream = OutputStream(stderr_r) | stderr_stream = OutputStream(stderr_r) | ||||
readable = True | readable = True | ||||
error_codes: List[str] = [] | |||||
error_messages: List[str] = [] | |||||
while readable: | while readable: | ||||
lines, readable = stderr_stream.read_lines() | lines, readable = stderr_stream.read_lines() | ||||
stderr_lines += lines | stderr_lines += lines | ||||
for line in lines: | for line in lines: | ||||
self.log.debug(line) | self.log.debug(line) | ||||
match = SUBVERSION_ERROR.search(line) | |||||
Done Inline ActionsReplace with: match = SUBVERSION_ERROR.search(line) if match: error_codes.append(match.group(1) error_messages.append(lines) anlambert: Replace with:
```lang=python
match = SUBVERSION_ERROR.search(line)
if match:
error_codes. | |||||
if match: | |||||
error_codes.append(match.group(1)) | |||||
Done Inline Actionss/lines/line/ anlambert: s/lines/line/ | |||||
error_messages.append(line) | |||||
Done Inline ActionsHow about capturing the error codes with a regexp here instead ? anlambert: How about capturing the error codes with a regexp here instead ?
This would allow to handle… | |||||
Done Inline Actionsdo you have an example in mind for both that suggestion and the next? I'm a bit hazy on how to do it. ardumont: do you have an example in mind for both that suggestion and the next?
I'm a bit hazy on how to… | |||||
svnrdump.wait() | svnrdump.wait() | ||||
os.close(stderr_r) | os.close(stderr_r) | ||||
if svnrdump.returncode == 0: | if svnrdump.returncode == 0: | ||||
return dump_path | return dump_path | ||||
# There was an error but it does not mean that no revisions | # There was an error but it does not mean that no revisions | ||||
# can be loaded. | # can be loaded. | ||||
▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines | def dump_svn_revisions(self, svn_url, last_loaded_svn_rev=-1): | ||||
raise Exception( | raise Exception( | ||||
( | ( | ||||
"Last dumped subversion revision (%s) is " | "Last dumped subversion revision (%s) is " | ||||
"lesser than the last one loaded into the " | "lesser than the last one loaded into the " | ||||
"archive (%s)." | "archive (%s)." | ||||
) | ) | ||||
% (last_dumped_rev, last_loaded_svn_rev) | % (last_dumped_rev, last_loaded_svn_rev) | ||||
) | ) | ||||
if SUBVERSION_NOT_FOUND in error_codes: | |||||
raise NotFound( | |||||
f"{SUBVERSION_NOT_FOUND}: Repository never existed or disappeared" | |||||
Done Inline ActionsInstead of processing stderr lines again, I would rather capture error codes in a list and check its content. anlambert: Instead of processing stderr lines again, I would rather capture error codes in a list and… | |||||
) | |||||
Done Inline ActionsRaise NotFound here: if "E170013" in error_codes: .... anlambert: Raise NotFound here:
```lang=python
if "E170013" in error_codes:
....
``` | |||||
raise Exception( | raise Exception( | ||||
"An error occurred when running svnrdump and " | "An error occurred when running svnrdump and " | ||||
"no exploitable dump file has been generated." | "no exploitable dump file has been generated.\n" + "\n".join(error_messages) | ||||
) | ) | ||||
Done Inline ActionsPut error messages in exception text: raise Exception( "An error occurred when running svnrdump and " "no exploitable dump file has been generated.\n" + "\n".join(error_messages) ) anlambert: Put error messages in exception text:
```lang=python
raise Exception(
"An error occurred… | |||||
def prepare(self): | def prepare(self): | ||||
# First, check if previous revisions have been loaded for the | # First, check if previous revisions have been loaded for the | ||||
# subversion origin and get the number of the last one | # subversion origin and get the number of the last one | ||||
last_loaded_svn_rev = self.get_last_loaded_svn_rev(self.svn_url) | last_loaded_svn_rev = self.get_last_loaded_svn_rev(self.svn_url) | ||||
# Then try to generate a dump file containing relevant svn revisions | # Then try to generate a dump file containing relevant svn revisions | ||||
# to load, an exception will be thrown if something wrong happened | # to load, an exception will be thrown if something wrong happened | ||||
Show All 23 Lines |
I was rather thinking of capturing all error codes and messages, with the following regex.