Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/sourceforge/lister.py
Show All 37 Lines | ||||||||||
VCS_NAMES = set(v.value for v in VcsNames.__members__.values()) | VCS_NAMES = set(v.value for v in VcsNames.__members__.values()) | |||||||||
@dataclass | @dataclass | |||||||||
class SourceForgeListerEntry: | class SourceForgeListerEntry: | |||||||||
vcs: VcsNames | vcs: VcsNames | |||||||||
url: str | url: str | |||||||||
last_modified: datetime.date | last_modified: datetime.date | |||||||||
ardumont: mypy is right, last_modified is a date and iso8601.parse_date takes a date as string... [1]… | ||||||||||
SubSitemapNameT = str | SubSitemapNameT = str | |||||||||
ProjectNameT = str | ProjectNameT = str | |||||||||
# SourceForge only offers day-level granularity, which is good enough for our purposes | # SourceForge only offers day-level granularity, which is good enough for our purposes | |||||||||
LastModifiedT = datetime.date | LastModifiedT = datetime.date | |||||||||
▲ Show 20 Lines • Show All 187 Lines • ▼ Show 20 Lines | def get_pages(self) -> Iterator[SourceForgeListerPage]: | |||||||||
subtree = ElementTree.fromstring(subsitemap_contents) | subtree = ElementTree.fromstring(subsitemap_contents) | |||||||||
yield from self._get_pages_from_subsitemap(subtree) | yield from self._get_pages_from_subsitemap(subtree) | |||||||||
def get_origins_from_page( | def get_origins_from_page( | |||||||||
self, page: SourceForgeListerPage | self, page: SourceForgeListerPage | |||||||||
) -> Iterator[ListedOrigin]: | ) -> Iterator[ListedOrigin]: | |||||||||
assert self.lister_obj.id is not None | assert self.lister_obj.id is not None | |||||||||
for hit in page: | for hit in page: | |||||||||
last_modified: str = str(hit.last_modified) | ||||||||||
Not Done Inline Actions
Does this work? It spares a serialization/deserialization round-trip vlorentz: Does this work? It spares a serialization/deserialization round-trip | ||||||||||
Done Inline ActionsInteresting, i did not knew that datetime.combine [1] I've opened D6668 with that implementation so we have some clue. [1] https://docs.python.org/3.7/library/datetime.html#datetime.datetime.combine ardumont: Interesting, i did not knew that datetime.combine [1]
I've opened D6668 with that… | ||||||||||
Done Inline Actions
D6668's build says no. ardumont: > Does this work?
D6668's build says no. | ||||||||||
last_update: datetime.datetime = iso8601.parse_date(last_modified) | ||||||||||
yield ListedOrigin( | yield ListedOrigin( | |||||||||
lister_id=self.lister_obj.id, | lister_id=self.lister_obj.id, | |||||||||
visit_type=hit.vcs.value, | visit_type=hit.vcs.value, | |||||||||
url=hit.url, | url=hit.url, | |||||||||
last_update=iso8601.parse_date(hit.last_modified), | last_update=last_update, | |||||||||
) | ) | |||||||||
def _get_pages_from_subsitemap( | def _get_pages_from_subsitemap( | |||||||||
self, subtree: ElementTree.Element | self, subtree: ElementTree.Element | |||||||||
) -> Iterator[SourceForgeListerPage]: | ) -> Iterator[SourceForgeListerPage]: | |||||||||
projects: Set[ProjectNameT] = set() | projects: Set[ProjectNameT] = set() | |||||||||
for project_block in subtree.iterfind(f"{SITEMAP_XML_NAMESPACE}url"): | for project_block in subtree.iterfind(f"{SITEMAP_XML_NAMESPACE}url"): | |||||||||
last_modified_block = project_block.find(f"{SITEMAP_XML_NAMESPACE}lastmod") | last_modified_block = project_block.find(f"{SITEMAP_XML_NAMESPACE}lastmod") | |||||||||
▲ Show 20 Lines • Show All 105 Lines • Show Last 20 Lines |
mypy is right, last_modified is a date and iso8601.parse_date takes a date as string... [1]
Hence the change below.
[1] https://github.com/micktwomey/pyiso8601/blob/main/iso8601/iso8601.py#L102-L104