Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/cvs/rlog.py
Show All 39 Lines | ||||||||||||||||||
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |||||||||||||||||
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |||||||||||||||||
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |||||||||||||||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |||||||||||||||||
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |||||||||||||||||
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |||||||||||||||||
import calendar | import calendar | |||||||||||||||||
import re | import re | |||||||||||||||||
vlorentz: Could you `import string` and use `string.ascii_lowercase` etc below?
This avoid lowercased… | ||||||||||||||||||
import string | ||||||||||||||||||
import time | import time | |||||||||||||||||
from typing import BinaryIO, Dict, List, NamedTuple, Optional, Tuple | from typing import BinaryIO, Dict, List, NamedTuple, Optional, Tuple | |||||||||||||||||
from swh.loader.cvs.cvs2gitdump.cvs2gitdump import ChangeSetKey | from swh.loader.cvs.cvs2gitdump.cvs2gitdump import ChangeSetKey | |||||||||||||||||
# There is no known encoding of path names in CVS. The actual encoding used | # There is no known encoding of path names in CVS. The actual encoding used | |||||||||||||||||
# will depend on the CVS server's operating system and perhaps even the | # will depend on the CVS server's operating system and perhaps even the | |||||||||||||||||
# underlying filesystem used to host a CVS repository. | # underlying filesystem used to host a CVS repository. | |||||||||||||||||
Show All 12 Lines | ||||||||||||||||||
class revtuple(NamedTuple): | class revtuple(NamedTuple): | |||||||||||||||||
number: str | number: str | |||||||||||||||||
date: int | date: int | |||||||||||||||||
author: bytes | author: bytes | |||||||||||||||||
state: str | state: str | |||||||||||||||||
branches: None | branches: None | |||||||||||||||||
revnumstr: None | revnumstr: None | |||||||||||||||||
commitid: None | commitid: Optional[str] | |||||||||||||||||
class RlogConv: | class RlogConv: | |||||||||||||||||
def __init__(self, cvsroot_path: str, fuzzsec: int) -> None: | def __init__(self, cvsroot_path: str, fuzzsec: int) -> None: | |||||||||||||||||
self.cvsroot_path = cvsroot_path | self.cvsroot_path = cvsroot_path | |||||||||||||||||
self.fuzzsec = fuzzsec | self.fuzzsec = fuzzsec | |||||||||||||||||
self.changesets: Dict[ChangeSetKey, ChangeSetKey] = dict() | self.changesets: Dict[ChangeSetKey, ChangeSetKey] = dict() | |||||||||||||||||
self.tags: Dict[str, ChangeSetKey] = dict() | self.tags: Dict[str, ChangeSetKey] = dict() | |||||||||||||||||
▲ Show 20 Lines • Show All 322 Lines • ▼ Show 20 Lines | ||||||||||||||||||
def cvs_strptime(timestr): | def cvs_strptime(timestr): | |||||||||||||||||
try: | try: | |||||||||||||||||
return time.strptime(timestr, "%Y/%m/%d %H:%M:%S")[:-1] + (0,) | return time.strptime(timestr, "%Y/%m/%d %H:%M:%S")[:-1] + (0,) | |||||||||||||||||
except ValueError: | except ValueError: | |||||||||||||||||
return time.strptime(timestr, "%Y-%m-%d %H:%M:%S %z")[:-1] + (0,) | return time.strptime(timestr, "%Y-%m-%d %H:%M:%S %z")[:-1] + (0,) | |||||||||||||||||
def _parse_commitid(commitid: bytes) -> Optional[str]: | ||||||||||||||||||
s = commitid.decode("ascii").strip() | ||||||||||||||||||
# Strip "commitid: " tag and the trailing semicolon. | ||||||||||||||||||
s = s[len("commitid: ") : -len(";")] | ||||||||||||||||||
# The commitid itself contains digit and ASCII letters only: | ||||||||||||||||||
for c in s: | ||||||||||||||||||
if ( | ||||||||||||||||||
c not in string.digits | ||||||||||||||||||
and c not in string.ascii_lowercase | ||||||||||||||||||
and c not in string.ascii_uppercase | ||||||||||||||||||
): | ||||||||||||||||||
Not Done Inline Actions
avoids building a string vlorentz: avoids building a string | ||||||||||||||||||
raise ValueError("invalid commitid") | ||||||||||||||||||
return s | ||||||||||||||||||
def _parse_log_entry(fp) -> Tuple[Optional[revtuple], Optional[bytes], Optional[bytes]]: | def _parse_log_entry(fp) -> Tuple[Optional[revtuple], Optional[bytes], Optional[bytes]]: | |||||||||||||||||
"""Parse a single log entry. | """Parse a single log entry. | |||||||||||||||||
On entry, fp should point to the first line of the entry (the "revision" | On entry, fp should point to the first line of the entry (the "revision" | |||||||||||||||||
line). | line). | |||||||||||||||||
On exit, fp will have consumed the log separator line (dashes) or the | On exit, fp will have consumed the log separator line (dashes) or the | |||||||||||||||||
end-of-file marker (equals). | end-of-file marker (equals). | |||||||||||||||||
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines | """ | |||||||||||||||||
if tm[0] < EPOCH: | if tm[0] < EPOCH: | |||||||||||||||||
tm = list(tm) | tm = list(tm) | |||||||||||||||||
if (tm[0] - 1900) < 70: | if (tm[0] - 1900) < 70: | |||||||||||||||||
tm[0] = tm[0] + 100 | tm[0] = tm[0] + 100 | |||||||||||||||||
if tm[0] < EPOCH: | if tm[0] < EPOCH: | |||||||||||||||||
raise ValueError("invalid year") | raise ValueError("invalid year") | |||||||||||||||||
date = calendar.timegm(tm) | date = calendar.timegm(tm) | |||||||||||||||||
commitid = match.group(6) or None | ||||||||||||||||||
if commitid: | ||||||||||||||||||
parsed_commitid = _parse_commitid(commitid) | ||||||||||||||||||
else: | ||||||||||||||||||
parsed_commitid = None | ||||||||||||||||||
# return a revision tuple compatible with 'rcsparse', the log message, | # return a revision tuple compatible with 'rcsparse', the log message, | |||||||||||||||||
# and the EOF marker | # and the EOF marker | |||||||||||||||||
return ( | return ( | |||||||||||||||||
revtuple( | revtuple( | |||||||||||||||||
rev.decode("ascii"), # revision number string | rev.decode("ascii"), # revision number string | |||||||||||||||||
date, | date, | |||||||||||||||||
match.group(2), # author (encoding is arbitrary; don't attempt to decode) | match.group(2), # author (encoding is arbitrary; don't attempt to decode) | |||||||||||||||||
match.group(3).decode( | match.group(3).decode( | |||||||||||||||||
"ascii" | "ascii" | |||||||||||||||||
), # state, usually "Exp" or "dead"; non-ASCII data here would be weird | ), # state, usually "Exp" or "dead"; non-ASCII data here would be weird | |||||||||||||||||
None, # TODO: branches of this rev | None, # TODO: branches of this rev | |||||||||||||||||
None, # TODO: revnumstr of previous rev | None, # TODO: revnumstr of previous rev | |||||||||||||||||
None, # TODO: commitid | parsed_commitid, | |||||||||||||||||
), | ), | |||||||||||||||||
log, | log, | |||||||||||||||||
eof, | eof, | |||||||||||||||||
) | ) |
Could you import string and use string.ascii_lowercase etc below?
This avoid lowercased constants in the global namespace