diff --git a/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py b/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py --- a/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py +++ b/swh/loader/cvs/cvs2gitdump/cvs2gitdump.py @@ -30,6 +30,7 @@ # % git --git-dir /git/openbsd.git fast-import < openbsd2.dump # +from collections import defaultdict import copy import getopt import os @@ -381,18 +382,18 @@ c.tags.append(t) def parse_file(self, path: str) -> None: - rtags: Dict[str, List[str]] = dict() + rtags: Dict[str, List[str]] = defaultdict(list) rcsfile = rcsparse.rcsfile(path) + branches = {'1': 'HEAD', '1.1.1': 'VENDOR'} + for k, v_ in list(rcsfile.symbols.items()): r = v_.split('.') if len(r) == 3: branches[v_] = 'VENDOR' elif len(r) >= 3 and r[-2] == '0': branches['.'.join(r[:-2] + r[-1:])] = k - if len(r) == 2 and branches[r[0]] == 'HEAD': - if v_ not in rtags: - rtags[v_] = list() + elif len(r) == 2 and branches.get(r[0]) == 'HEAD': rtags[v_].append(k) revs: List[Tuple[str, Tuple[str, int, str, str, List[str], str, str]]] = list(rcsfile.revs.items()) @@ -418,6 +419,8 @@ continue last_vendor_status = v[3] elif len(r) == 2: + # ensure revision targets head branch + branches[r[0]] = 'HEAD' if r[0] == '1' and r[1] == '1': if have_initial_revision: continue diff --git a/swh/loader/cvs/rlog.py b/swh/loader/cvs/rlog.py --- a/swh/loader/cvs/rlog.py +++ b/swh/loader/cvs/rlog.py @@ -45,6 +45,7 @@ # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. import calendar +from collections import defaultdict import re import string import time @@ -79,7 +80,7 @@ logmsgs: Dict[str, Optional[bytes]], ) -> None: """Convert RCS revision history of a file into self.changesets items""" - rtags: Dict[str, List[str]] = dict() + rtags: Dict[str, List[str]] = defaultdict(list) # RCS and CVS represent branches by adding digits to revision numbers. # And CVS assigns special meaning to certain revision number ranges. # @@ -121,23 +122,21 @@ # This allows CVS to store information about a branch's existence # before any files on this branch have been modified. # Even-numbered branch revisions appear once the file is modified. + branches = {"1": "HEAD", "1.1.1": "VENDOR"} - k: str - v_: str - for k, v_ in list(taginfo.items()): # type: ignore # FIXME, inconsistent types - r = v_.split(".") + for k_, v_ in taginfo.items(): + v_str = v_.decode() + r = v_str.split(".") if len(r) == 3: # vendor branch number - branches[v_] = "VENDOR" + branches[v_str] = "VENDOR" elif len(r) >= 3 and r[-2] == "0": # magic branch number - branches[".".join(r[:-2] + r[-1:])] = k - if len(r) == 2 and branches[r[0]] == "HEAD": + branches[".".join(r[:-2] + r[-1:])] = k_.decode() + elif len(r) == 2 and branches.get(r[0]) == "HEAD": # main branch number - if v_ not in rtags: - rtags[v_] = list() - rtags[v_].append(k) + rtags[v_str].append(k_.decode()) revs: List[Tuple[str, revtuple]] = list(revisions.items()) # sort by revision descending to priorize 1.1.1.1 than 1.1 @@ -167,6 +166,8 @@ continue last_vendor_status = v[3] elif len(r) == 2: + # ensure revision targets head branch + branches[r[0]] = "HEAD" if r[0] == "1" and r[1] == "1": if have_initial_revision: continue @@ -308,7 +309,7 @@ break if state == 1: - if line[0] == b"\t": + if line.startswith(b"\t"): [tag, rev] = [x.strip() for x in line.split(b":")] taginfo[tag] = rev else: @@ -316,7 +317,7 @@ state = 0 if state == 2: - if line[0] == b"\t": + if line.startswith(b"\t"): [locker, rev] = [x.strip() for x in line.split(b":")] lockinfo[rev] = locker else: diff --git a/swh/loader/cvs/tests/data/cpmixin.tgz b/swh/loader/cvs/tests/data/cpmixin.tgz new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@