diff --git a/swh/model/collections.py b/swh/model/collections.py --- a/swh/model/collections.py +++ b/swh/model/collections.py @@ -3,6 +3,8 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +"""Utility data structures.""" + from collections.abc import Mapping from typing import Dict, Generic, Iterable, Optional, Tuple, TypeVar, Union @@ -11,6 +13,11 @@ class ImmutableDict(Mapping, Generic[KT, VT]): + """A frozen dictionary. + + This class behaves like a dictionary, but internally stores objects in a tuple, + so it is both immutable and hashable.""" + data: Tuple[Tuple[KT, VT], ...] def __init__( diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py --- a/swh/model/from_disk.py +++ b/swh/model/from_disk.py @@ -3,6 +3,13 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +"""Conversion from filesystem tree to SWH objects. + +This module allows reading a tree of directories and files from a local +filesystem, and convert them to in-memory data structures, which can then +be exported to SWH data model objects, as defined in :mod:`swh.model.model`. +""" + import datetime import enum import fnmatch diff --git a/swh/model/git_objects.py b/swh/model/git_objects.py --- a/swh/model/git_objects.py +++ b/swh/model/git_objects.py @@ -3,6 +3,30 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +""" +Converts SWH model objects to git(-like) objects + +Most of the functions in this module take as argument an object from +:mod:`swh.model.model`, and format it like a git object. + +They are the inverse functions of those in :mod:`swh.loader.git.converters`, +but with extensions, as SWH's model is a superset of Git's: + +* extensions of existing types (eg. revision/commit and release/tag dates + can be expressed with precision up to milliseconds, to support formatting + Mercurial objects) +* new types, for SWH's specific needs (:class:`swh.model.model.RawExtrinsicMetadata` + and :class:`swh.model.model.ExtID`) +* support for somewhat corrupted git objects that we need to reproduce + +This is used for two purposes: + +* Format manifests that can be hashed to produce :ref:`intrinsic identifiers + ` +* Write git objects to reproduce git repositories that were ingested in the archive. +""" + + from __future__ import annotations import datetime diff --git a/swh/model/model.py b/swh/model/model.py --- a/swh/model/model.py +++ b/swh/model/model.py @@ -3,6 +3,18 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +""" +Implementation of Software Heritage's data model + +See :ref:`data-model` for an overview of the data model. + +The classes defined in this module are immutable +`attrs objects `__ and enums. + +All classes define a ``from_dict`` class method and a ``to_dict`` +method to convert between them and msgpack-serializable objects. +""" + from abc import ABCMeta, abstractmethod import datetime from enum import Enum diff --git a/swh/model/swhids.py b/swh/model/swhids.py --- a/swh/model/swhids.py +++ b/swh/model/swhids.py @@ -3,6 +3,15 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +""" +Classes to represent :ref:`SWH persistend IDentifiers `. + +:class:`CoreSWHID` represents a SWHID with no qualifier, and :class:`QualifiedSWHID` +represents a SWHID that may have qualifiers. +:class:`ExtendedSWHID` extends the definition of SWHID to other object types, +and is used internally in Software Heritage; it does not support qualifiers. +""" + from __future__ import annotations import enum