Changeset View
Changeset View
Standalone View
Standalone View
swh/clearlydefined/mapping_utils.py
Show First 20 Lines • Show All 137 Lines • ▼ Show 20 Lines | if sha1: | ||||
format=format, | format=format, | ||||
) | ) | ||||
) | ) | ||||
else: | else: | ||||
mapping_status = False | mapping_status = False | ||||
return mapping_status | return mapping_status | ||||
def map_scancode( | def list_scancode_files(metadata_string: str) -> List[Tuple[str, Dict]]: | ||||
storage, metadata_string: str, date: datetime | |||||
) -> Tuple[bool, List[RawExtrinsicMetadata]]: | |||||
""" | """ | ||||
Take metadata_string and storage as input and try to | Returns (sha1, filename) pairs for each ScanCode metadata file | ||||
map the sha1 of files with content, return mapping | referenced in the metadata_string. | ||||
status of harvest (True if able to map every sha1, | |||||
False if not able to map every sha1) and | |||||
data to be written in storage | |||||
""" | """ | ||||
metadata = json.loads(metadata_string) | metadata = json.loads(metadata_string) | ||||
content = metadata.get("content") or {} | content = metadata.get("content") or {} | ||||
files = content.get("files") or {} | files = content.get("files") or {} | ||||
mapping_status = True | files_with_sha1 = [] | ||||
format = "clearlydefined-harvest-scancode-json" | |||||
data: List[RawExtrinsicMetadata] = [] | |||||
for file in files: | for file in files: | ||||
sha1 = file.get("sha1") | sha1 = file.get("sha1") | ||||
mapping_status = ( | files_with_sha1.append((sha1, file)) | ||||
map_sha1_and_add_in_data(storage, sha1, data, file, date, format) | return files_with_sha1 | ||||
and mapping_status | |||||
) | |||||
return mapping_status, data | |||||
def map_licensee( | def list_licensee_files(metadata_string: str) -> List[Tuple[str, Dict]]: | ||||
storage, metadata_string: str, date: datetime | |||||
) -> Tuple[bool, List[RawExtrinsicMetadata]]: | |||||
""" | """ | ||||
Take metadata_string and storage as input and try to | Returns (sha1, filename) pairs for each Licensee metadata file | ||||
map the sha1 of files with content, return mapping | referenced in the metadata_string. | ||||
status of harvest (True if able to map every sha1, | |||||
False if not able to map every sha1) and | |||||
data to be written in storage | |||||
""" | """ | ||||
metadata = json.loads(metadata_string) | metadata = json.loads(metadata_string) | ||||
licensee = metadata.get("licensee") or {} | licensee = metadata.get("licensee") or {} | ||||
output = licensee.get("output") or {} | output = licensee.get("output") or {} | ||||
content = output.get("content") or {} | content = output.get("content") or {} | ||||
files = content.get("matched_files") or [] | files = content.get("matched_files") or [] | ||||
mapping_status = True | files_with_sha1 = [] | ||||
format = "clearlydefined-harvest-licensee-json" | |||||
data: List[RawExtrinsicMetadata] = [] | |||||
for file in files: | for file in files: | ||||
sha1 = file.get("content_hash") | sha1 = file.get("content_hash") | ||||
mapping_status = ( | files_with_sha1.append((sha1, file)) | ||||
map_sha1_and_add_in_data(storage, sha1, data, file, date, format) | return files_with_sha1 | ||||
and mapping_status | |||||
) | |||||
return mapping_status, data | |||||
def map_clearlydefined( | def list_clearlydefined_files(metadata_string: str) -> List[Tuple[str, Dict]]: | ||||
storage, metadata_string: str, date: datetime | |||||
) -> Tuple[bool, List[RawExtrinsicMetadata]]: | |||||
""" | """ | ||||
Take metadata_string and storage as input and try to | Returns (sha1, filename) pairs for each ClearlyDefined metadata file | ||||
map the sha1 of files with content, return mapping | referenced in the metadata_string. | ||||
status of harvest (True if able to map every sha1, | |||||
False if not able to map every sha1) and | |||||
data to be written in storage | |||||
""" | """ | ||||
metadata = json.loads(metadata_string) | metadata = json.loads(metadata_string) | ||||
files = metadata.get("files") or [] | files = metadata.get("files") or [] | ||||
mapping_status = True | files_with_sha1 = [] | ||||
format = "clearlydefined-harvest-clearlydefined-json" | |||||
data: List[RawExtrinsicMetadata] = [] | |||||
for file in files: | for file in files: | ||||
hashes = file.get("hashes") or {} | hashes = file.get("hashes") or {} | ||||
sha1 = hashes.get("sha1") | sha1 = hashes.get("sha1") | ||||
mapping_status = ( | assert sha1 | ||||
map_sha1_and_add_in_data(storage, sha1, data, file, date, format) | files_with_sha1.append((sha1, file)) | ||||
and mapping_status | return files_with_sha1 | ||||
) | |||||
return mapping_status, data | |||||
def map_harvest( | def map_harvest( | ||||
storage, tool: str, metadata_string: str, date: datetime | storage, tool: str, metadata_string: str, date: datetime | ||||
) -> Tuple[bool, List[RawExtrinsicMetadata]]: | ) -> Tuple[bool, List[RawExtrinsicMetadata]]: | ||||
""" | """ | ||||
Take tool, metadata_string and storage as input and try to | Take tool, metadata_string and storage as input and try to | ||||
map the sha1 of files with content, return status of | map the sha1 of files with content, return status of | ||||
harvest and data to be written in storage | harvest and data to be written in storage | ||||
""" | """ | ||||
tools = { | tools = { | ||||
"scancode": map_scancode, | "scancode": list_scancode_files, | ||||
"licensee": map_licensee, | "licensee": list_licensee_files, | ||||
"clearlydefined": map_clearlydefined, | "clearlydefined": list_clearlydefined_files, | ||||
} | } | ||||
formats = { | |||||
"scancode": "clearlydefined-harvest-scancode-json", | |||||
"licensee": "clearlydefined-harvest-licensee-json", | |||||
"clearlydefined": "clearlydefined-harvest-clearlydefined-json", | |||||
} | |||||
format_ = formats[tool] | |||||
return tools[tool](storage=storage, metadata_string=metadata_string, date=date) | mapping_status = True | ||||
data: List[RawExtrinsicMetadata] = [] | |||||
for (sha1, file) in tools[tool](metadata_string): | |||||
mapping_status = ( | |||||
map_sha1_and_add_in_data(storage, sha1, data, file, date, format_) | |||||
and mapping_status | |||||
) | |||||
return mapping_status, data | |||||
def map_definition( | def map_definition( | ||||
storage, metadata_string: str, date: datetime | storage, metadata_string: str, date: datetime | ||||
) -> Optional[Tuple[bool, List[RawExtrinsicMetadata]]]: | ) -> Optional[Tuple[bool, List[RawExtrinsicMetadata]]]: | ||||
""" | """ | ||||
Take metadata_string and storage as input and try to | Take metadata_string and storage as input and try to | ||||
map the sha1 of defintion with content/ gitSha in revision | map the sha1 of defintion with content/ gitSha in revision | ||||
▲ Show 20 Lines • Show All 116 Lines • Show Last 20 Lines |