diff --git a/PKG-INFO b/PKG-INFO index 1f8c5d2..680b02c 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.lister.github -Version: 0.0.1 +Version: 0.0.2 Summary: Software Heritage GitHub lister Home-page: https://forge.softwareheritage.org/diffusion/DLSGH/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/PKG-INFO b/swh.lister.github.egg-info/PKG-INFO similarity index 94% copy from PKG-INFO copy to swh.lister.github.egg-info/PKG-INFO index 1f8c5d2..680b02c 100644 --- a/PKG-INFO +++ b/swh.lister.github.egg-info/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.lister.github -Version: 0.0.1 +Version: 0.0.2 Summary: Software Heritage GitHub lister Home-page: https://forge.softwareheritage.org/diffusion/DLSGH/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh.lister.github.egg-info/SOURCES.txt b/swh.lister.github.egg-info/SOURCES.txt new file mode 100644 index 0000000..0e6c754 --- /dev/null +++ b/swh.lister.github.egg-info/SOURCES.txt @@ -0,0 +1,30 @@ +.gitignore +ACKNOWLEDGEMENTS +LICENSE +MANIFEST.in +Makefile +README +TODO +requirements.txt +setup.py +version.txt +debian/changelog +debian/compat +debian/control +debian/copyright +debian/rules +debian/source/format +swh.lister.github.egg-info/PKG-INFO +swh.lister.github.egg-info/SOURCES.txt +swh.lister.github.egg-info/dependency_links.txt +swh.lister.github.egg-info/requires.txt +swh.lister.github.egg-info/top_level.txt +swh/lister/github/__init__.py +swh/lister/github/cache.py +swh/lister/github/constants.py +swh/lister/github/converters.py +swh/lister/github/github_api.py +swh/lister/github/lister.py +swh/lister/github/processors.py +swh/lister/github/req_queue.py +swh/lister/github/storage_utils.py \ No newline at end of file diff --git a/swh.lister.github.egg-info/dependency_links.txt b/swh.lister.github.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/swh.lister.github.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/swh.lister.github.egg-info/requires.txt b/swh.lister.github.egg-info/requires.txt new file mode 100644 index 0000000..7184a44 --- /dev/null +++ b/swh.lister.github.egg-info/requires.txt @@ -0,0 +1,6 @@ +python-dateutil +qless-py +redis +requests +swh.core +swh.storage diff --git a/swh.lister.github.egg-info/top_level.txt b/swh.lister.github.egg-info/top_level.txt new file mode 100644 index 0000000..0cb0f8f --- /dev/null +++ b/swh.lister.github.egg-info/top_level.txt @@ -0,0 +1 @@ +swh diff --git a/swh/lister/github/constants.py b/swh/lister/github/constants.py new file mode 100644 index 0000000..08dcc89 --- /dev/null +++ b/swh/lister/github/constants.py @@ -0,0 +1,7 @@ +# Copyright © 2016 The Software Heritage Developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +GITHUB_ORGS_UUID = '9f7b34d9-aa98-44d4-8907-b332c1036bc3' +GITHUB_USERS_UUID = 'ad6df473-c1d2-4f40-bc58-2b091d4a750e' +GITHUB_LISTER_UUID = '34bd6b1b-463f-43e5-a697-785107f598e4' diff --git a/swh/lister/github/converters.py b/swh/lister/github/converters.py index e630a64..4846f85 100644 --- a/swh/lister/github/converters.py +++ b/swh/lister/github/converters.py @@ -1,94 +1,95 @@ # Copyright © 2016 The Software Heritage Developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import copy import datetime from email.utils import format_datetime from dateutil.parser import parse as parse_datetime -from . import cache, storage_utils +from . import cache, constants def utcnow(): return datetime.datetime.now(tz=datetime.timezone.utc) def updated_at_to_last_modified(updated_at): if not updated_at: return None dt = parse_datetime(updated_at).astimezone(datetime.timezone.utc) return format_datetime(dt, usegmt=True) def repository_to_entity(orig_entity, repo): """Convert a repository to an entity""" entity = copy.deepcopy(orig_entity) owner_uuid = cache.get_user(repo['owner']['id']) if not owner_uuid: raise ValueError("Owner %s (id=%d) not in cache" % ( repo['owner']['login'], repo['owner']['id'])) entity['parent'] = owner_uuid entity['name'] = repo['full_name'] entity['type'] = 'project' entity['description'] = repo['description'] if 'homepage' in repo: entity['homepage'] = repo['homepage'] entity['active'] = True entity['generated'] = True - entity['lister_metadata']['lister'] = storage_utils.GITHUB_LISTER_UUID + entity['lister_metadata']['lister'] = constants.GITHUB_LISTER_UUID entity['lister_metadata']['type'] = 'repository' entity['lister_metadata']['id'] = repo['id'] entity['lister_metadata']['fork'] = repo['fork'] if 'updated_at' in repo: entity['lister_metadata']['updated_at'] = repo['updated_at'] entity['validity'] = [utcnow()] return entity def user_to_entity(orig_entity, user): """Convert a GitHub user toan entity""" entity = copy.deepcopy(orig_entity) if user['type'] == 'User': - parent = storage_utils.GITHUB_USERS_UUID + parent = constants.GITHUB_USERS_UUID + type = 'person' elif user['type'] == 'Organization': - parent = storage_utils.GITHUB_ORGS_UUID + parent = constants.GITHUB_ORGS_UUID type = 'group_of_persons' else: raise ValueError("Unknown GitHub user type %s" % user['type']) entity['parent'] = parent if 'name' in user: entity['name'] = user['name'] if not entity.get('name'): entity['name'] = user['login'] entity['type'] = type entity['active'] = True entity['generated'] = True - entity['lister_metadata']['lister'] = storage_utils.GITHUB_LISTER_UUID + entity['lister_metadata']['lister'] = constants.GITHUB_LISTER_UUID entity['lister_metadata']['type'] = 'user' entity['lister_metadata']['id'] = user['id'] entity['lister_metadata']['login'] = user['login'] if 'updated_at' in user: entity['lister_metadata']['updated_at'] = user['updated_at'] entity['validity'] = [datetime.datetime.now()] return entity diff --git a/swh/lister/github/storage_utils.py b/swh/lister/github/storage_utils.py index e08b1a0..f625fab 100644 --- a/swh/lister/github/storage_utils.py +++ b/swh/lister/github/storage_utils.py @@ -1,122 +1,117 @@ # Copyright © 2016 The Software Heritage Developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import uuid -from . import cache, converters - - -GITHUB_ORGS_UUID = '9f7b34d9-aa98-44d4-8907-b332c1036bc3' -GITHUB_USERS_UUID = 'ad6df473-c1d2-4f40-bc58-2b091d4a750e' -GITHUB_LISTER_UUID = '34bd6b1b-463f-43e5-a697-785107f598e4' +from . import cache, constants, converters def update_user_entities(storage, users): """Update entities for several users in storage. Returns the new entities. """ users = list(sorted(users, key=lambda u: u['id'])) query = [{ - 'lister': GITHUB_LISTER_UUID, + 'lister': constants.GITHUB_LISTER_UUID, 'type': 'user', 'id': user['id'], } for user in users] entities = list(storage.entity_get_from_lister_metadata(query)) new_entities = [] for user, entity in zip(users, entities): if not entity['uuid']: entity = { 'uuid': uuid.uuid4(), 'doap': {}, 'lister_metadata': {}, } new_entity = converters.user_to_entity(entity, user) cache.set_user(user['id'], new_entity['uuid']) new_entities.append(new_entity) storage.entity_add(new_entities) return new_entities def update_repo_entities(storage, repos): """Update entities for several repositories in storage. Returns the new entities.""" repos = list(sorted(repos, key=lambda r: r['id'])) users = {} for repo in repos: if not cache.get_user(repo['owner']['id']): users[repo['owner']['id']] = repo['owner'] if users: update_user_entities(storage, users.values()) query = [{ - 'lister': GITHUB_LISTER_UUID, + 'lister': constants.GITHUB_LISTER_UUID, 'type': 'repository', 'id': repo['id'], } for repo in repos] entities = list(storage.entity_get_from_lister_metadata(query)) new_entities = [] for repo, entity in zip(repos, entities): if not entity['uuid']: entity = { 'uuid': uuid.uuid4(), 'doap': {}, 'lister_metadata': {}, } new_entities.append(converters.repository_to_entity(entity, repo)) storage.entity_add(new_entities) return new_entities def repo_last_modified(storage, id): entity_id = cache.get_repo(id) if entity_id: entity = storage.entity_get_one(entity_id) else: entity = list(storage.entity_get_from_lister_metadata([{ - 'lister': GITHUB_LISTER_UUID, + 'lister': constants.GITHUB_LISTER_UUID, 'type': 'repository', 'id': id, }]))[0] if entity['uuid']: cache.set_repo(id, entity['uuid']) updated_at = entity.get('lister_metadata', {}).get('updated_at') return converters.updated_at_to_last_modified(updated_at) def user_last_modified(storage, id): entity_id = cache.get_user(id) if entity_id: entity = storage.entity_get_one(entity_id) else: entity = list(storage.entity_get_from_lister_metadata([{ - 'lister': GITHUB_LISTER_UUID, + 'lister': constants.GITHUB_LISTER_UUID, 'type': 'user', 'id': id, }]))[0] if entity['uuid']: cache.set_user(id, entity['uuid']) updated_at = entity.get('lister_metadata', {}).get('updated_at') return converters.updated_at_to_last_modified(updated_at) diff --git a/version.txt b/version.txt new file mode 100644 index 0000000..990a0fe --- /dev/null +++ b/version.txt @@ -0,0 +1 @@ +v0.0.2-0-g9550ca3 \ No newline at end of file