Changeset View
Changeset View
Standalone View
Standalone View
swh/deposit/parsers.py
# Copyright (C) 2017-2020 The Software Heritage developers | # Copyright (C) 2017-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
"""Module in charge of defining parsers with SWORD 2.0 supported mediatypes. | """Module in charge of defining parsers with SWORD 2.0 supported mediatypes. | ||||
""" | """ | ||||
import logging | import logging | ||||
from typing import Dict, Optional, Union | from typing import Dict, Optional, Union | ||||
from xml.parsers.expat import ExpatError | from xml.parsers.expat import ExpatError | ||||
from django.conf import settings | from django.conf import settings | ||||
from rest_framework.parsers import BaseParser, FileUploadParser, MultiPartParser | from rest_framework.parsers import BaseParser, FileUploadParser, MultiPartParser | ||||
import xmltodict | |||||
from swh.deposit.errors import ParserError | from swh.deposit.errors import ParserError | ||||
from swh.deposit.utils import parse_xml as _parse_xml | |||||
from swh.model.exceptions import ValidationError | from swh.model.exceptions import ValidationError | ||||
from swh.model.identifiers import ( | from swh.model.identifiers import ( | ||||
DIRECTORY, | DIRECTORY, | ||||
RELEASE, | RELEASE, | ||||
REVISION, | REVISION, | ||||
SNAPSHOT, | SNAPSHOT, | ||||
SWHID, | SWHID, | ||||
parse_swhid, | parse_swhid, | ||||
Show All 26 Lines | class SWHXMLParser(BaseParser): | ||||
media_type = "application/xml" | media_type = "application/xml" | ||||
def parse(self, stream, media_type=None, parser_context=None): | def parse(self, stream, media_type=None, parser_context=None): | ||||
""" | """ | ||||
Parses the incoming bytestream as XML and returns the resulting data. | Parses the incoming bytestream as XML and returns the resulting data. | ||||
""" | """ | ||||
parser_context = parser_context or {} | parser_context = parser_context or {} | ||||
encoding = parser_context.get("encoding", settings.DEFAULT_CHARSET) | encoding = parser_context.get("encoding", settings.DEFAULT_CHARSET) | ||||
namespaces = { | return _parse_xml(stream, encoding=encoding) | ||||
"http://www.w3.org/2005/Atom": None, | |||||
"http://purl.org/dc/terms/": None, | |||||
"https://doi.org/10.5063/SCHEMA/CODEMETA-2.0": "codemeta", | |||||
"http://purl.org/net/sword/": "sword", | |||||
"https://www.softwareheritage.org/schema/2018/deposit": "swh", | |||||
} | |||||
data = xmltodict.parse( | |||||
stream, encoding=encoding, namespaces=namespaces, process_namespaces=True | |||||
) | |||||
if "entry" in data: | |||||
data = data["entry"] | |||||
return data | |||||
class SWHAtomEntryParser(SWHXMLParser): | class SWHAtomEntryParser(SWHXMLParser): | ||||
"""Atom entry parser limited to specific mediatype | """Atom entry parser limited to specific mediatype | ||||
""" | """ | ||||
media_type = "application/atom+xml;type=entry" | media_type = "application/atom+xml;type=entry" | ||||
▲ Show 20 Lines • Show All 124 Lines • Show Last 20 Lines |