diff --git a/PKG-INFO b/PKG-INFO
index 8a0e03d..22f5e93 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,71 +1,71 @@
 Metadata-Version: 2.1
 Name: swh.indexer
-Version: 2.0.1
+Version: 2.0.2
 Summary: Software Heritage Content Indexer
 Home-page: https://forge.softwareheritage.org/diffusion/78/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-indexer
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-indexer/
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: testing
 License-File: LICENSE
 License-File: AUTHORS
 
 swh-indexer
 ============
 
 Tools to compute multiple indexes on SWH's raw contents:
 - content:
   - mimetype
   - ctags
   - language
   - fossology-license
   - metadata
 - revision:
   - metadata
 
 An indexer is in charge of:
 - looking up objects
 - extracting information from those objects
 - store those information in the swh-indexer db
 
 There are multiple indexers working on different object types:
   - content indexer: works with content sha1 hashes
   - revision indexer: works with revision sha1 hashes
   - origin indexer: works with origin identifiers
 
 Indexation procedure:
 - receive batch of ids
 - retrieve the associated data depending on object type
 - compute for that object some index
 - store the result to swh's storage
 
 Current content indexers:
 
 - mimetype (queue swh_indexer_content_mimetype): detect the encoding
   and mimetype
 
 - language (queue swh_indexer_content_language): detect the
   programming language
 
 - ctags (queue swh_indexer_content_ctags): compute tags information
 
 - fossology-license (queue swh_indexer_fossology_license): compute the
   license
 
 - metadata: translate file into translated_metadata dict
 
 Current revision indexers:
 
 - metadata: detects files containing metadata and retrieves translated_metadata
   in content_metadata table in storage or run content indexer to translate
   files.
diff --git a/debian/changelog b/debian/changelog
index 414a52b..f3584a6 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,1410 +1,1415 @@
-swh-indexer (2.0.1-1~swh1~bpo10+1) buster-swh; urgency=medium
+swh-indexer (2.0.2-1~swh1) unstable-swh; urgency=medium
 
-  * Rebuild for buster-swh
+  * New upstream release 2.0.2     - (tagged by Valentin Lorentz
+    <vlorentz@softwareheritage.org> on 2022-06-22 12:32:41 +0200)
+  * Upstream changes:     - v2.0.2     - * Fix mypy issue with swh-
+    journal>=1.1.0     - * cff: Ignore invalid yaml files     - * npm:
+    Add workaround for mangled package descriptions     - * npm: Fix
+    crash when npm description is not a string
 
- -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 10 Jun 2022 09:24:56 +0000
+ -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 22 Jun 2022 10:40:25 +0000
 
 swh-indexer (2.0.1-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 2.0.1     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2022-06-10 10:35:15
     +0200)
   * Upstream changes:     - v2.0.1     - upgrades/134: Add missing index
     creation
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 10 Jun 2022 09:17:44 +0000
 
 swh-indexer (2.0.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 2.0.0     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2022-06-03 15:40:32
     +0200)
   * Upstream changes:     - v2.0.0     - Set current_version attribute
     to postgresql datastore     - Add support for indexing from head
     releases     - Replace RevisionMetadataIndexer with
     DirectoryMetadataIndexer     - Add support for running the server
     with 'postgresql' storage cls     - tests: Shorten definition of
     REVISION     - tests: Simplify definition of ORIGINS list     -
     tests: use stock pytest_postgresql factory function     - Rewrite
     origin_head.py as a normal function instead of an indexer     -
     Convert test_origin_head from unittest to pytest
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 03 Jun 2022 13:59:59 +0000
 
 swh-indexer (1.2.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 1.2.0     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2022-06-01 16:44:30 +0200)
   * Upstream changes:     - v1.2.0     - * cli: Add support for running
     "all" indexers in the journal client
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 01 Jun 2022 15:08:39 +0000
 
 swh-indexer (1.1.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 1.1.0     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2022-05-30 15:56:19 +0200)
   * Upstream changes:     - v1.1.0     - * Add support for indexing
     directly from the journal client     - * cff: Do not change
     yaml.SafeLoader globally     - * add missing sentry captures     - *
     Change misleading documentation in swh-indexer/cli.py     - * test
     and typing maintenance
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Mon, 30 May 2022 14:03:54 +0000
 
 swh-indexer (1.0.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 1.0.0     - (tagged by David Douard
     <david.douard@sdfa3.org> on 2022-02-24 17:35:56 +0100)
   * Upstream changes:     - v1.0.0
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 24 Feb 2022 16:42:39 +0000
 
 swh-indexer (0.8.2-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.8.2     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2022-01-12 13:53:22 +0100)
   * Upstream changes:     - v0.8.2     - * tests: Use
     TimestampWithTimezone.from_datetime() instead of the constructor
     - * docs: Use reference instead of absolute link
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 12 Jan 2022 12:56:56 +0000
 
 swh-indexer (0.8.1-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.8.1     - (tagged by Vincent SELLIER
     <vincent.sellier@softwareheritage.org> on 2021-12-21 16:23:37 +0100)
   * Upstream changes:     - v0.8.1     - Changelog:     - tag frozendict
     version to avoid segfaults on the ci
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 21 Dec 2021 15:28:27 +0000
 
 swh-indexer (0.8.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.8.0     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2021-05-28 16:57:47
     +0200)
   * Upstream changes:     - v0.8.0     - metadata_dictionary: Add
     mapping for CITATION.cff     - metadata/maven: Ignore ill-formed xml
     instead of failing     - metadata: Fix UnboundLocalError in edge
     case     - data/codemeta: sync with official codemeta repo     - Fix
     SingleFileMapping case sensitivity     - Use swh.core 0.14     -
     tox: Add sphinx environments to check sane doc build
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 28 May 2021 15:05:39 +0000
 
 swh-indexer (0.7.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.7.0     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2021-02-03 14:10:16
     +0100)
   * Upstream changes:     - v0.7.0     - Adapt
     origin_get_latest_visit_status according to latest api change
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 03 Feb 2021 13:15:37 +0000
 
 swh-indexer (0.6.4-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.6.4     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2021-02-01 15:06:04
     +0100)
   * Upstream changes:     - v0.6.4     - indexer: Remove pagination
     logic using stream_results() instead.     - ContentPartitionIndexer:
     Do not index the same content multiple times at once.     - Add a
     cli section in the doc     - test_journal_client_cli: Send
     production objects to journal     - test_journal_client: Migrate
     away from mocks     - tests: Use production backends within the
     indexer tests
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Mon, 01 Feb 2021 14:10:18 +0000
 
 swh-indexer (0.6.3-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.6.3     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-11-27 14:42:30
     +0100)
   * Upstream changes:     - v0.6.3     - storage.writer: Fix journal
     writer sanitizer function
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 27 Nov 2020 13:46:03 +0000
 
 swh-indexer (0.6.2-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.6.2     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-11-27 13:55:53
     +0100)
   * Upstream changes:     - v0.6.2     - BaseRow.unique_key: Don't crash
     when indexer_configuration_id is None.     -
     idx.storage.JournalWriter: pass value_sanitizer to
     get_journal_writer.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 27 Nov 2020 13:00:28 +0000
 
 swh-indexer (0.6.1-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.6.1     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-11-27 10:43:14
     +0100)
   * Upstream changes:     - v0.6.1     - Fix test within the debian
     package builds     - refactor tests to pytest
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 27 Nov 2020 09:49:35 +0000
 
 swh-indexer (0.6.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.6.0     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-11-26 17:08:03
     +0100)
   * Upstream changes:     - v0.6.0     - indexer.journal_client:
     Subscribe to OriginVisitStatus topic     -
     swh.indexer.cli.journal_client: ensure the minimal configuration
     exists     - Drop all deprecated uses of `args` in component
     factories     - Drop vcversioner from requirements     - Make the
     indexer storage write to the journal.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 26 Nov 2020 16:39:45 +0000
 
 swh-indexer (0.5.0-2~swh1) unstable-swh; urgency=medium
 
   * Move distutils package from python3-swh.indexer to python3-swh.indexer.storage.
 
  -- Nicolas Dandrimont <olasd@debian.org>  Wed, 18 Nov 2020 20:04:23 +0100
 
 swh-indexer (0.5.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.5.0     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2020-11-06 15:25:04 +0100)
   * Upstream changes:     - v0.5.0     - * Remove metadata deletion
     endpoints and algorithms     - * Remove
     conflict_update/policy_update option from BaseIndexer.run()     - *
     Remove conflict_update option from _add() endpoints.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 06 Nov 2020 14:28:05 +0000
 
 swh-indexer (0.4.2-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.4.2     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-10-30 17:22:22
     +0100)
   * Upstream changes:     - v0.4.2     - tests.conftest: Fix the indexer
     scheduler initialization     - indexer.cli: Fix missing retries_left
     parameter     - Rename sql files according to new conventions
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 30 Oct 2020 16:24:14 +0000
 
 swh-indexer (0.4.1-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.4.1     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-10-16 10:48:51
     +0200)
   * Upstream changes:     - v0.4.1     - test_cli: Remove unneeded
     config args parameter     - api.server: Align configuration
     structure with clients configuration     - storage.api.server: Add
     types to module and refactor tests
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 16 Oct 2020 08:59:09 +0000
 
 swh-indexer (0.4.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.4.0     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-10-15 18:17:59
     +0200)
   * Upstream changes:     - v0.4.0     - swh.indexer.storage: Unify
     get_indexer_storage function with others
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 15 Oct 2020 16:19:01 +0000
 
 swh-indexer (0.3.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.3.0     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2020-10-08 13:33:02 +0200)
   * Upstream changes:     - v0.3.0     - * Make indexer-storage
     endpoints use attr-based classes instead of dicts     - * Add more
     typing to indexers and their tests
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 08 Oct 2020 11:35:50 +0000
 
 swh-indexer (0.2.4-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.2.4     - (tagged by David Douard
     <david.douard@sdfa3.org> on 2020-09-25 12:49:04 +0200)
   * Upstream changes:     - v0.2.4
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 25 Sep 2020 10:51:28 +0000
 
 swh-indexer (0.2.3-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.2.3     - (tagged by David Douard
     <david.douard@sdfa3.org> on 2020-09-11 15:12:01 +0200)
   * Upstream changes:     - v0.2.3
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 11 Sep 2020 13:15:41 +0000
 
 swh-indexer (0.2.2-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.2.2     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-09-04 13:21:19
     +0200)
   * Upstream changes:     - v0.2.2     - metadata: Adapt to latest
     storage revision_get change     - Tell pytest not to recurse in
     dotdirs.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 04 Sep 2020 11:33:41 +0000
 
 swh-indexer (0.2.1-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.2.1     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2020-08-20 12:59:53 +0200)
   * Upstream changes:     - v0.2.1     - * indexer.rehash: Adapt
     content_get_metadata call to content_get     - * origin_head: Use
     snapshot_get_all_branches instead of snapshot_get.     - * Import
     SortedList, db_transaction_generator, and db_transaction from swh-
     core instead of swh-storage.     - * tests: remove invalid assertion
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 20 Aug 2020 11:03:58 +0000
 
 swh-indexer (0.2.0-1~swh2) unstable-swh; urgency=medium
 
   * Bump dependencies
 
  -- Antoine R. Dumont (@ardumont) <ardumont@softwareheritage.org>  Wed, 06 Aug 2020 13:28:00 +0200
 
 swh-indexer (0.2.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.2.0     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-08-06 15:12:44
     +0200)
   * Upstream changes:     - v0.2.0     - Make content indexer work on
     partition of ids
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 06 Aug 2020 13:14:35 +0000
 
 swh-indexer (0.1.1-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.1.1     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-07-28 12:42:19
     +0200)
   * Upstream changes:     - v0.1.1     - setup.py: Migrate from
     vcversioner to setuptools-scm     - MANIFEST: Include missing
     conftest.py requirement     - metadata: Update
     swh.storage.origin_get call to latest api change     - Drop
     unsupported "validate" proxy     - tests: Drop deprecated
     storage.origin_add_one use     - Drop useless use of pifpaf     -
     Clean up the swh.scheduler and swh.storage pytest plugin imports
     - tests: Drop obsolete origin visit fields
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 28 Jul 2020 10:44:54 +0000
 
 swh-indexer (0.1.0-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.1.0     - (tagged by Antoine R. Dumont
     (@ardumont) <ardumont@softwareheritage.org> on 2020-06-23 15:44:15
     +0200)
   * Upstream changes:     - v0.1.0     - origin_head: Retrieve snapshot
     out of the last visit status     - Fix tests according to latest
     internal api changes
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 23 Jun 2020 13:46:23 +0000
 
 swh-indexer (0.0.171-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.171     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2020-04-23 16:46:52
     +0200)
   * Upstream changes:     - v0.0.171     - cli: Adapt journal client
     instantiation according to latest change     - codemeta: Add
     compatibility with PyLD >= 2.0.0.     - setup: Update the minimum
     required runtime python3 version     - Add a pyproject.toml file to
     target py37 for black     - Enable black     - test: make test data
     properly typed     - indexer.cli.journal_client: Simplify the
     journal client call     - Remove type from origin_add calls     -
     Rename --max-messages to --stop-after-objects.     - tests: Migrate
     to latest swh-storage api change
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 23 Apr 2020 14:49:17 +0000
 
 swh-indexer (0.0.170-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.170     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2020-03-08 09:57:39
     +0100)
   * Upstream changes:     - v0.0.170     - indexer.metadata: Make
     compatible old task format
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Sun, 08 Mar 2020 09:03:59 +0000
 
 swh-indexer (0.0.169-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.169     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2020-03-06 15:19:21
     +0100)
   * Upstream changes:     - v0.0.169     - storage: Add @timed metrics
     on remaining indexer storage endpoints     - indexer.storage: Use
     the correct metrics module     - idx.storage: Add time and counter
     metric to idx_configuration_add     - indexer.storage: Remove
     redundant calls to send_metric     - indexer: Fix mypy issues
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 06 Mar 2020 14:24:50 +0000
 
 swh-indexer (0.0.168-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.168     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2020-03-05 15:48:32
     +0100)
   * Upstream changes:     - v0.0.168     - mimetype: Make the parsing
     more resilient     - storage.fossology_license_add: Fix one insert
     query too many     - tests: Migrate some tests to pytest
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 05 Mar 2020 14:52:27 +0000
 
 swh-indexer (0.0.167-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.167     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2020-03-04 16:33:20
     +0100)
   * Upstream changes:     - v0.0.167     - indexer (revision, origin):
     Fix indexer summary to output a status
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 04 Mar 2020 15:37:59 +0000
 
 swh-indexer (0.0.166-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.166     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2020-03-04 15:46:37 +0100)
   * Upstream changes:     - v0.0.166     - * Fix merging documents with
     @list elements.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 04 Mar 2020 14:50:54 +0000
 
 swh-indexer (0.0.165-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.165     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2020-03-04 15:29:52
     +0100)
   * Upstream changes:     - v0.0.165     - indexers: Fix summary
     computation for range indexers     - tests: Use assertEqual instead
     of deprecated assertEquals
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 04 Mar 2020 14:33:09 +0000
 
 swh-indexer (0.0.164-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.164     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2020-03-04 13:52:15
     +0100)
   * Upstream changes:     - v0.0.164     - range-indexers: Fix hard-
     coded summary key value     - indexers: Improve
     persist_index_computations type     - indexer.metadata: Fix wrong
     update
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 04 Mar 2020 13:00:18 +0000
 
 swh-indexer (0.0.163-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.163     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2020-03-04 11:26:56
     +0100)
   * Upstream changes:     - v0.0.163     - Make indexers return a
     summary of their actions     - swh.indexer.storage: Add metrics to
     add/del endpoints     - indexer.storage: Make add/del endpoints sum
     up added objects count     - indexer: Remove unused next_step
     pattern
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 04 Mar 2020 10:31:03 +0000
 
 swh-indexer (0.0.162-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.162     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2020-02-27 11:01:29
     +0100)
   * Upstream changes:     - v0.0.162     - fossology_license: Improve
     add query endpoint     - pgstorage: Empty temp tables instead of
     dropping them     - indexer.metadata: Fix edge case on unknown
     origin
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 27 Feb 2020 10:09:36 +0000
 
 swh-indexer (0.0.161-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.161     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2020-02-25 12:07:39
     +0100)
   * Upstream changes:     - v0.0.161     - sql/128: Add content_mimetype
     index     - storage.db: Improve content range queries to actually
     finish     - Add a new IndexerStorageArgumentException class, for
     exceptions caused by the client.     - Use swh-storage validation
     proxy.     - Fix type errors with hypothesis 5.5     - Add type
     annotations to indexer classes
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 25 Feb 2020 11:20:51 +0000
 
 swh-indexer (0.0.160-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.160     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2020-02-05 18:13:16
     +0100)
   * Upstream changes:     - v0.0.160     - Fix missing import
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 05 Feb 2020 17:28:18 +0000
 
 swh-indexer (0.0.159-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.159     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2020-02-05 16:01:03
     +0100)
   * Upstream changes:     - v0.0.159     - Monkey-patch backend classes
     instead of 'get_storage' functions.     - Fix DeprecationWarning
     about get_storage args.     - Move IndexerStorage documentation and
     endpoint paths to a new IndexerStorageInterface class.     -
     conftest: Use module's `get_<storage-backend>` to instantiate
     backend     - docs: Fix sphinx warnings     - Fix merge_documents to
     work with input document with an @id.     - Fix support of VCSs
     whose HEAD branch is an alias.     - Fix type of 'author' in gemspec
     mapping output.     - Fix test_origin_metadata mistakenly broken by
     e50660efca     - Fix several typos reported by pre-commit hooks     -
     Add a pre-commit config file     - Remove unused property-based test
     environment     - Migrate tox.ini to extras = xxx instead of deps =
     .[testing]     - Merge tox test environments     - Drop version
     constraint on pytest     - Include all requirements in MANIFEST.in
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 05 Feb 2020 15:09:42 +0000
 
 swh-indexer (0.0.158-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.158     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2019-11-20 10:26:59
     +0100)
   * Upstream changes:     - v0.0.158     - Re-enable tests for the in-
     memory storage.     - Truncate result list instead of doing a copy.
     - journal client: add support for new origin_visit schema.     - Fix
     alter table rename column syntax on 126->127 upgrade script
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 20 Nov 2019 09:30:37 +0000
 
 swh-indexer (0.0.157-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.157     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2019-11-08 16:33:36 +0100)
   * Upstream changes:     - v0.0.157     - * migrate storage tests to
     pytest     - * proper pagination for
     IndexerStorage.origin_intrinsic_metadata_search_by_producer
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 08 Nov 2019 15:36:48 +0000
 
 swh-indexer (0.0.156-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.156     - (tagged by Stefano Zacchiroli
     <zack@upsilon.cc> on 2019-11-05 17:36:11 +0100)
   * Upstream changes:     - v0.0.156     - * update indexer for storage
     0.0.156     - * cli: fix max-message handling in the journal-client
     command     - * tests: fix test_metadata.py for frozen entities in
     swh.model.model     - * tests: update tests for storage>=0.0.155
     - * test_metadata typing: use type-specific mappings instead of cast
     - * storage/db.py: drop unused format arg regconfig from query     -
     * typing: minimal changes to make a no-op mypy run pass
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 05 Nov 2019 16:45:10 +0000
 
 swh-indexer (0.0.155-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.155     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2019-10-15 14:51:28 +0200)
   * Upstream changes:     - v0.0.155     - * Avoid spamming logs with
     processed %d messages every message     - * tox.ini: Fix py3
     environment to use packaged tests     - * Remove indirection
     swh.indexer.storage.api.wsgi to start server     - * Add a command-
     line tool to run metadata translation.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 15 Oct 2019 12:55:33 +0000
 
 swh-indexer (0.0.154-1~swh2) unstable-swh; urgency=medium
 
   * Force pg_ctl path
 
  -- Nicolas Dandrimont <olasd@debian.org>  Mon, 07 Oct 2019 16:42:08 +0200
 
 swh-indexer (0.0.154-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.154     - (tagged by Nicolas Dandrimont
     <nicolas@dandrimont.eu> on 2019-10-07 16:34:20 +0200)
   * Upstream changes:     - Release swh.indexer v0.0.154     - Remove
     old scheduler compat code     - Clean up CLI aliases     - Port to
     python-magic instead of file_magic
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Mon, 07 Oct 2019 14:38:47 +0000
 
 swh-indexer (0.0.153-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.153     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2019-09-11 11:46:41
     +0200)
   * Upstream changes:     - v0.0.153     - indexer-storage: Send smaller
     batches to origin_get     - Update
     origin_url/from_revision/metadata_tsvector when conflict_update=True
     - Remove concept of 'minimal set' of metadata     - npm: Fix crash
     on invalid 'author' field     - api/client: use RPCClient instead of
     deprecated SWHRemoteAPI     - api/server: use RPCServerApp instead
     of deprecated SWHServerAPIApp     - tests/utils: Fix various test
     data model issues failing validation
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 11 Sep 2019 09:50:58 +0000
 
 swh-indexer (0.0.152-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.152     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2019-07-19 11:15:41 +0200)
   * Upstream changes:     - Send smaller batches to revision_get
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 19 Jul 2019 09:20:34 +0000
 
 swh-indexer (0.0.151-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.151     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2019-07-03 17:58:32 +0200)
   * Upstream changes:     - v0.0.151     - Fix key names in the journal
     client; it crashed in prod.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 03 Jul 2019 16:03:07 +0000
 
 swh-indexer (0.0.150-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.150     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2019-07-03 12:09:43
     +0200)
   * Upstream changes:     - v0.0.150     - indexer.cli: Drop unused
     extra alias `--consumer-id` flag
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 03 Jul 2019 10:20:46 +0000
 
 swh-indexer (0.0.149-1~swh2) unstable-swh; urgency=medium
 
   * No-change: Bump dependency version
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Wed, 03 Jul 2019 10:44:12 +0200
 
 swh-indexer (0.0.149-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.149     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2019-07-02 18:11:12
     +0200)
   * Upstream changes:     - v0.0.149     - swh.indexer.cli: Fix
     get_journal_client api call     - sql/upgrades/125: Fix migration
     script
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 02 Jul 2019 16:26:50 +0000
 
 swh-indexer (0.0.148-1~swh3) unstable-swh; urgency=medium
 
   * Upstream release 0.0.148: Update version dependency
 
  -- Antoine Romain Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Mon, 01 Jul 2019 01:50:29 +0100
 
 swh-indexer (0.0.148-1~swh2) unstable-swh; urgency=medium
 
   * Upstream release 0.0.148
 
  -- Antoine Romain Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Mon, 01 Jul 2019 01:50:29 +0100
 
 swh-indexer (0.0.148-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.148     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2019-07-01 12:21:32
     +0200)
   * Upstream changes:     - v0.0.148     - Manipulate origin URLs
     instead of origin ids     - journal: create tasks for multiple
     origins     - Tests: Improvments
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Mon, 01 Jul 2019 10:34:26 +0000
 
 swh-indexer (0.0.147-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.147     - (tagged by Antoine Lambert
     <antoine.lambert@inria.fr> on 2019-05-23 11:03:02 +0200)
   * Upstream changes:     - version 0.0.147
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 23 May 2019 09:11:05 +0000
 
 swh-indexer (0.0.146-1~swh2) unstable-swh; urgency=medium
 
   * Remove hypothesis directory
 
  -- Nicolas Dandrimont <olasd@debian.org>  Thu, 18 Apr 2019 18:29:09 +0200
 
 swh-indexer (0.0.146-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.146     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2019-04-11 11:08:29 +0200)
   * Upstream changes:     - Better explain what the 'string fields' are.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 11 Apr 2019 09:47:24 +0000
 
 swh-indexer (0.0.145-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.145     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2019-03-15 11:18:25 +0100)
   * Upstream changes:     - Add support for keywords in PKG-INFO.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 15 Mar 2019 11:34:53 +0000
 
 swh-indexer (0.0.144-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.144     - (tagged by Thibault Allançon
     <tallancon@gmail.com> on 2019-03-07 08:16:49 +0100)
   * Upstream changes:     - Fix heterogeneity of names in metadata
     tables
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 14 Mar 2019 13:30:44 +0000
 
 swh-indexer (0.0.143-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.143     - (tagged by Thibault Allançon
     <tallancon@gmail.com> on 2019-03-12 10:18:37 +0100)
   * Upstream changes:     - Use hashutil.MultiHash in
     swh.indexer.tests.test_utils.fill_storage     - Summary: Closes
     T1448     - Reviewers: #reviewers     - Subscribers: swh-public-ci
     - Maniphest Tasks: T1448     - Differential Revision:
     https://forge.softwareheritage.org/D1235
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 13 Mar 2019 10:24:37 +0000
 
 swh-indexer (0.0.142-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.142     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2019-03-01 14:19:05 +0100)
   * Upstream changes:     - Skip useless requests.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 01 Mar 2019 13:26:06 +0000
 
 swh-indexer (0.0.141-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.141     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2019-03-01 10:59:54 +0100)
   * Upstream changes:     - Prevent origin metadata indexer from writing
     empty records
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 01 Mar 2019 10:10:56 +0000
 
 swh-indexer (0.0.140-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.140     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2019-02-25 10:38:52 +0100)
   * Upstream changes:     - Drop the 'context' and 'type' config of
     metadata indexers.     - They are both ignored already.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Mon, 25 Feb 2019 10:40:10 +0000
 
 swh-indexer (0.0.139-1~swh2) unstable-swh; urgency=low
 
   * New release fixing debian build
 
  -- Antoine Romain Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Fri, 22 Feb 2019 16:27:47 +0100
 
 swh-indexer (0.0.139-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.139     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2019-02-22 15:53:22
     +0100)
   * Upstream changes:     - v0.0.139     - Clean up no longer used tasks
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 22 Feb 2019 14:59:40 +0000
 
 swh-indexer (0.0.138-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.138     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2019-02-22 15:30:30 +0100)
   * Upstream changes:     - Make the 'config' argument of
     OriginMetadaIndexer optional again.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 22 Feb 2019 14:37:35 +0000
 
 swh-indexer (0.0.137-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.137     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2019-02-22 10:59:53
     +0100)
   * Upstream changes:     - v0.0.137     - swh.indexer.storage.api.wsgi:
     Open production wsgi entrypoint     - swh.indexer.cli: Move dev app
     entrypoint in dedicated cli     - indexer.storage: Make server load
     explicit configuration and check     - config: use already loaded
     swh config, if any, when instantiating an Indexer     - api: Add
     support for filtering by tool_id to
     origin_intrinsic_metadata_search_by_producer.     - api: Add storage
     endpoint to search metadata by mapping.     - runtime: Remove
     implicit configuration from the metadata indexers.     - debian:
     Remove debian packaging from master branch     - docs: Update
     missing documentation
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 22 Feb 2019 10:11:29 +0000
 
 swh-indexer (0.0.136-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.136     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2019-02-14 17:09:00 +0100)
   * Upstream changes:     - Don't send 'None' as a revision id to
     storage.revision_get.     - This error wasn't caught before because
     the in-mem storage     - accepts None values, but the pg storage
     doesn't.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 14 Feb 2019 16:22:41 +0000
 
 swh-indexer (0.0.135-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.135     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2019-02-14 14:45:24 +0100)
   * Upstream changes:     - Fix deduplication of origins when persisting
     origin intrinsic metadata.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 14 Feb 2019 14:32:55 +0000
 
 swh-indexer (0.0.134-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.134     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2019-02-13 23:46:44
     +0100)
   * Upstream changes:     - v0.0.134     - package: Break dependency of
     swh.indexer.storage on swh.indexer.     - api/server: Do not read
     configuration at each request     - metadata: Fix gemspec test     -
     metadata: Prevent OriginMetadataIndexer from sending duplicate     -
     revisions to revision_metadata_add.     - test: Fix bugs found by
     hypothesis.     - test: Use hypothesis to generate adversarial
     inputs.     - Add more type checks in metadata dictionary.     - Add
     checks in the idx_storage that the same content/rev/orig is not     -
     present twice in the new data.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 14 Feb 2019 09:16:15 +0000
 
 swh-indexer (0.0.133-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.133     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2019-02-12 10:28:01
     +0100)
   * Upstream changes:     - v0.0.133     - Migrate BaseDB api calls from
     core to storage     - Improve storage api calls using latest storage
     api     - OriginIndexer: Refactoring     - tests: Refactoring     -
     metadata search: Use index     - indexer metadata: Provide stats per
     origin     - indexer metadata: Update mapping column     - indexer
     metadata: Improve and fix issues
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 12 Feb 2019 09:34:43 +0000
 
 swh-indexer (0.0.132-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.132     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2019-01-30 15:03:14
     +0100)
   * Upstream changes:     - v0.0.132     - swh/indexer/tasks: Fix range
     indexer tasks     - Maven: Add support for empty XML nodes.     -
     Add support for alternative call format for Gem::Specification.new.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 30 Jan 2019 14:09:48 +0000
 
 swh-indexer (0.0.131-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.131     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2019-01-30 10:56:43
     +0100)
   * Upstream changes:     - v0.0.131     - fix pep8 violations     - fix
     misspellings
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Wed, 30 Jan 2019 10:01:47 +0000
 
 swh-indexer (0.0.129-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.129     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2019-01-29 14:11:22 +0100)
   * Upstream changes:     - Fix missing config file name change.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 29 Jan 2019 13:34:17 +0000
 
 swh-indexer (0.0.128-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.128     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2019-01-25 15:22:52 +0100)
   * Upstream changes:     - Make metadata indexers store the mappings
     used to translate metadata.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 29 Jan 2019 12:18:16 +0000
 
 swh-indexer (0.0.127-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.127     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2019-01-15 15:56:49 +0100)
   * Upstream changes:     - Prevent repository normalization from
     crashing on malformed input.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Tue, 15 Jan 2019 16:20:32 +0000
 
 swh-indexer (0.0.126-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.126     - (tagged by Valentin Lorentz
     <vlorentz@softwareheritage.org> on 2019-01-14 11:42:52 +0100)
   * Upstream changes:     - Don't call OriginHeadIndexer.next_step when
     there is no revision.
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Mon, 14 Jan 2019 10:57:34 +0000
 
 swh-indexer (0.0.125-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.125     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2019-01-11 12:01:42
     +0100)
   * Upstream changes:     - v0.0.125     - Add journal client that
     listens for origin visits and schedules     - OriginHead     - Fix
     tests to work with the new version of swh.storage
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Fri, 11 Jan 2019 11:08:51 +0000
 
 swh-indexer (0.0.124-1~swh1) unstable-swh; urgency=medium
 
   * New upstream release 0.0.124     - (tagged by Antoine R. Dumont
     (@ardumont) <antoine.romain.dumont@gmail.com> on 2019-01-08 14:09:32
     +0100)
   * Upstream changes:     - v0.0.124     - indexer: Fix type check on
     indexing result
 
  -- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org>  Thu, 10 Jan 2019 17:12:07 +0000
 
 swh-indexer (0.0.118-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.118
   * metadata-indexer: Fix setup initialization
   * tests: Refactoring
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Fri, 30 Nov 2018 14:50:52 +0100
 
 swh-indexer (0.0.67-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.67
   * mimetype: Migrate to indexed data as text
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Wed, 28 Nov 2018 11:35:37 +0100
 
 swh-indexer (0.0.66-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.66
   * range-indexer: Stream indexing range computations
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Tue, 27 Nov 2018 11:48:24 +0100
 
 swh-indexer (0.0.65-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.65
   * Fix revision metadata indexer
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Mon, 26 Nov 2018 19:30:48 +0100
 
 swh-indexer (0.0.64-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.64
   * indexer: Fix mixed identifier encodings issues
   * Add missing config filename for origin intrinsic metadata indexer.
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Mon, 26 Nov 2018 12:20:01 +0100
 
 swh-indexer (0.0.63-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.63
   * Make the OriginMetadataIndexer fetch rev metadata from the storage
   * instead of getting them via the scheduler.
   * Make the 'result_name' key of 'next_step' optional.
   * Add missing return.
   * doc: update index to match new swh-doc format
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Fri, 23 Nov 2018 17:56:10 +0100
 
 swh-indexer (0.0.62-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.62
   * metadata indexer: Add empty tool configuration
   * Add fulltext search on origin intrinsic metadata
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Fri, 23 Nov 2018 14:25:55 +0100
 
 swh-indexer (0.0.61-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.61
   * indexer: Fix origin indexer's default arguments
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Wed, 21 Nov 2018 16:01:50 +0100
 
 swh-indexer (0.0.60-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.60
   * origin_head: Make next step optional
   * tests: Increase coverage
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Wed, 21 Nov 2018 12:33:13 +0100
 
 swh-indexer (0.0.59-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.59
   * fossology license: Fix issue on license computation
   * Improve docstrings
   * Fix pep8 violations
   * Increase coverage on content indexers
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Tue, 20 Nov 2018 14:27:20 +0100
 
 swh-indexer (0.0.58-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.58
   * Add missing default configuration for fossology license indexer
   * tests: Remove dead code
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Tue, 20 Nov 2018 12:06:56 +0100
 
 swh-indexer (0.0.57-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.57
   * storage: Open new endpoint on fossology license range retrieval
   * indexer: Open new fossology license range indexer
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Tue, 20 Nov 2018 11:44:57 +0100
 
 swh-indexer (0.0.56-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.56
   * storage.api: Open new endpoints (mimetype range, fossology range)
   * content indexers: Open mimetype and fossology range indexers
   * Remove orchestrator modules
   * tests: Improve coverage
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Mon, 19 Nov 2018 11:56:06 +0100
 
 swh-indexer (0.0.55-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.55
   * swh.indexer: Let task reschedule itself through the scheduler
   * Use swh.scheduler instead of celery leaking all around
   * swh.indexer.orchestrator: Fix orchestrator initialization step
   * swh.indexer.tasks: Fix type error when no result or list result
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Mon, 29 Oct 2018 10:41:54 +0100
 
 swh-indexer (0.0.54-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.54
   * swh.indexer.tasks: Fix task to use the scheduler's
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Thu, 25 Oct 2018 20:13:51 +0200
 
 swh-indexer (0.0.53-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.53
   * swh.indexer.rehash: Migrate to latest swh.model.hashutil.MultiHash
   * indexer: Add the origin intrinsic metadata indexer
   * indexer: Add OriginIndexer and OriginHeadIndexer.
   * indexer.storage: Add the origin intrinsic metadata storage database
   * indexer.storage: Autogenerate the Indexer Storage HTTP API.
   * setup: prepare for pypi upload
   * tests: Add a tox file
   * tests: migrate to pytest
   * tests: Add tests around celery stack
   * docs: Improve documentation and reuse README in generated
     documentation
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Thu, 25 Oct 2018 19:03:56 +0200
 
 swh-indexer (0.0.52-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.52
   * swh.indexer.storage: Refactor fossology license get (first external
   * contribution, cf. /CONTRIBUTORS)
   * swh.indexer.storage: Fix typo in invariable name metadata
   * swh.indexer.storage: No longer use temp table when reading data
   * swh.indexer.storage: Clean up unused import
   * swh.indexer.storage: Remove dead entry points origin_metadata*
   * swh.indexer.storage: Update docstrings information and format
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Wed, 13 Jun 2018 11:20:40 +0200
 
 swh-indexer (0.0.51-1~swh1) unstable-swh; urgency=medium
 
   * Release swh.indexer v0.0.51
   * Update for new db_transaction{,_generator}
 
  -- Nicolas Dandrimont <nicolas@dandrimont.eu>  Tue, 05 Jun 2018 14:10:39 +0200
 
 swh-indexer (0.0.50-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.50
   * swh.indexer.api.client: Permit to specify the query timeout option
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Thu, 24 May 2018 12:19:06 +0200
 
 swh-indexer (0.0.49-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.49
   * test_storage: Instantiate the tools during tests' setUp phase
   * test_storage: Deallocate storage during teardown step
   * test_storage: Make storage test fixture connect to postgres itself
   * storage.api.server: Only instantiate storage backend once per import
   * Use thread-aware psycopg2 connection pooling for database access
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Mon, 14 May 2018 11:09:30 +0200
 
 swh-indexer (0.0.48-1~swh1) unstable-swh; urgency=medium
 
   * Release swh.indexer v0.0.48
   * Update for new swh.storage
 
  -- Nicolas Dandrimont <nicolas@dandrimont.eu>  Sat, 12 May 2018 18:30:10 +0200
 
 swh-indexer (0.0.47-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.47
   * d/control: Fix runtime typo in packaging dependency
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Thu, 07 Dec 2017 16:54:49 +0100
 
 swh-indexer (0.0.46-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.46
   * Split swh-indexer packages in 2 python3-swh.indexer.storage and
   * python3-swh.indexer
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Thu, 07 Dec 2017 16:18:04 +0100
 
 swh-indexer (0.0.45-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.45
   * Fix usual error raised when deploying
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Thu, 07 Dec 2017 15:01:01 +0100
 
 swh-indexer (0.0.44-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.44
   * swh.indexer: Make indexer use their own storage
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Thu, 07 Dec 2017 13:20:44 +0100
 
 swh-indexer (0.0.43-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.43
   * swh.indexer.mimetype: Work around problem in detection
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Wed, 29 Nov 2017 10:26:11 +0100
 
 swh-indexer (0.0.42-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.42
   * swh.indexer: Make indexers register tools in prepare method
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Fri, 24 Nov 2017 11:26:03 +0100
 
 swh-indexer (0.0.41-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.41
   * mimetype: Use magic library api instead of parsing `file` cli output
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Mon, 20 Nov 2017 13:05:29 +0100
 
 swh-indexer (0.0.39-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.39
   * swh.indexer.producer: Fix argument to match the abstract definition
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Thu, 19 Oct 2017 10:03:44 +0200
 
 swh-indexer (0.0.38-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.38
   * swh.indexer.indexer: Fix argument to match the abstract definition
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Wed, 18 Oct 2017 19:57:47 +0200
 
 swh-indexer (0.0.37-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.37
   * swh.indexer.indexer: Fix argument to match the abstract definition
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Wed, 18 Oct 2017 18:59:42 +0200
 
 swh-indexer (0.0.36-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.36
   * packaging: Cleanup
   * codemeta: Adding codemeta.json file to document metadata
   * swh.indexer.mimetype: Fix edge case regarding empty raw content
   * docs: sanitize docstrings for sphinx documentation generation
   * swh.indexer.metadata: Add RevisionMetadataIndexer
   * swh.indexer.metadata: Add ContentMetadataIndexer
   * swh.indexer: Refactor base class to improve inheritance
   * swh.indexer.metadata: First draft of the metadata content indexer
   * for npm (package.json)
   * swh.indexer.tests: Added tests for language indexer
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Wed, 18 Oct 2017 16:24:24 +0200
 
 swh-indexer (0.0.35-1~swh1) unstable-swh; urgency=medium
 
   * Release swh.indexer 0.0.35
   * Update tasks to new swh.scheduler API
 
  -- Nicolas Dandrimont <nicolas@dandrimont.eu>  Mon, 12 Jun 2017 18:02:04 +0200
 
 swh-indexer (0.0.34-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.34
   * Fix unbound local error on edge case
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Wed, 07 Jun 2017 11:23:29 +0200
 
 swh-indexer (0.0.33-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.33
   * language indexer: Improve edge case policy
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Wed, 07 Jun 2017 11:02:47 +0200
 
 swh-indexer (0.0.32-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.32
   * Update fossology license to use the latest swh-storage
   * Improve language indexer to deal with potential error on bad
   * chunking
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Tue, 06 Jun 2017 18:13:40 +0200
 
 swh-indexer (0.0.31-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.31
   * Reduce log verbosity on language indexer
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Fri, 02 Jun 2017 19:08:52 +0200
 
 swh-indexer (0.0.30-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.30
   * Fix wrong default configuration
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Fri, 02 Jun 2017 18:01:27 +0200
 
 swh-indexer (0.0.29-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.29
   * Update indexer to resolve indexer configuration identifier
   * Adapt language indexer to use partial raw content
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Fri, 02 Jun 2017 16:21:27 +0200
 
 swh-indexer (0.0.28-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.28
   * Add error resilience to fossology indexer
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Mon, 22 May 2017 12:57:55 +0200
 
 swh-indexer (0.0.27-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.27
   * swh.indexer.language: Incremental encoding detection
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Wed, 17 May 2017 18:04:27 +0200
 
 swh-indexer (0.0.26-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.26
   * swh.indexer.orchestrator: Add batch size option per indexer
   * Log caught exception in a unified manner
   * Add rescheduling option (not by default) on rehash + indexers
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Wed, 17 May 2017 14:08:07 +0200
 
 swh-indexer (0.0.25-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.25
   * Add reschedule on error parameter for indexers
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Fri, 12 May 2017 12:13:15 +0200
 
 swh-indexer (0.0.24-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.24
   * Make rehash indexer more resilient to errors by rescheduling
     contents
   * in error (be it reading or updating problems)
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Thu, 04 May 2017 14:22:43 +0200
 
 swh-indexer (0.0.23-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.23
   * Improve producer to optionally make it synchroneous
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Wed, 03 May 2017 15:29:44 +0200
 
 swh-indexer (0.0.22-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.22
   * Improve mimetype indexer implementation
   * Make the chaining option in the mimetype indexer
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Tue, 02 May 2017 16:31:14 +0200
 
 swh-indexer (0.0.21-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.21
   * swh.indexer.rehash: Actually make the worker log
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Tue, 02 May 2017 14:28:55 +0200
 
 swh-indexer (0.0.20-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.20
   * swh.indexer.rehash:
   * Improve reading from objstorage only when needed
   * Fix empty file use case (which was skipped)
   * Add logging
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Fri, 28 Apr 2017 09:39:09 +0200
 
 swh-indexer (0.0.19-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.19
   * Fix rehash indexer's default configuration file
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Thu, 27 Apr 2017 19:17:20 +0200
 
 swh-indexer (0.0.18-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.18
   * Add new rehash indexer
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Wed, 26 Apr 2017 15:23:02 +0200
 
 swh-indexer (0.0.17-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.17
   * Add information on indexer tools (T610)
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Fri, 02 Dec 2016 18:32:54 +0100
 
 swh-indexer (0.0.16-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.16
   * bug fixes
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Tue, 15 Nov 2016 19:31:52 +0100
 
 swh-indexer (0.0.15-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.15
   * Improve message producer
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Tue, 15 Nov 2016 18:16:42 +0100
 
 swh-indexer (0.0.14-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.14
   * Update package dependency on fossology-nomossa
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Tue, 15 Nov 2016 14:13:41 +0100
 
 swh-indexer (0.0.13-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.13
   * Add new license indexer
   * ctags indexer: align behavior with other indexers regarding the
   * conflict update policy
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Mon, 14 Nov 2016 14:13:34 +0100
 
 swh-indexer (0.0.12-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.12
   * Add runtime dependency on universal-ctags
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Fri, 04 Nov 2016 13:59:59 +0100
 
 swh-indexer (0.0.11-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.11
   * Remove dependency on exuberant-ctags
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Thu, 03 Nov 2016 16:13:26 +0100
 
 swh-indexer (0.0.10-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.10
   * Add ctags indexer
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Thu, 20 Oct 2016 16:12:42 +0200
 
 swh-indexer (0.0.9-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.9
   * d/control: Bump dependency to latest python3-swh.storage api
   * mimetype: Use the charset to filter out data
   * orchestrator: Separate 2 distincts orchestrators (one for all
   * contents, one for text contents)
   * mimetype: once index computed, send text contents to text
     orchestrator
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Thu, 13 Oct 2016 15:28:17 +0200
 
 swh-indexer (0.0.8-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.8
   * Separate configuration file per indexer (no need for language)
   * Rename module file_properties to mimetype consistently with other
   * layers
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Sat, 08 Oct 2016 11:46:29 +0200
 
 swh-indexer (0.0.7-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.7
   * Adapt indexer language and mimetype to store result in storage.
   * Clean up obsolete code
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Sat, 08 Oct 2016 10:26:08 +0200
 
 swh-indexer (0.0.6-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.6
   * Fix multiple issues on production
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Fri, 30 Sep 2016 17:00:11 +0200
 
 swh-indexer (0.0.5-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.5
   * Fix debian/control dependency issue
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Fri, 30 Sep 2016 16:06:20 +0200
 
 swh-indexer (0.0.4-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.4
   * Upgrade dependencies issues
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Fri, 30 Sep 2016 16:01:52 +0200
 
 swh-indexer (0.0.3-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.3
   * Add encoding detection
   * Use encoding to improve language detection
   * bypass language detection for binary files
   * bypass ctags for binary files or decoding failure file
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Fri, 30 Sep 2016 12:30:11 +0200
 
 swh-indexer (0.0.2-1~swh1) unstable-swh; urgency=medium
 
   * v0.0.2
   * Provide one possible sha1's name for the multiple tools to ease
   * information extrapolation
   * Fix debian package dependency issue
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Thu, 29 Sep 2016 21:45:44 +0200
 
 swh-indexer (0.0.1-1~swh1) unstable-swh; urgency=medium
 
   * Initial release
   * v0.0.1
   * First implementation on poc
 
  -- Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>  Wed, 28 Sep 2016 23:40:13 +0200
diff --git a/swh.indexer.egg-info/PKG-INFO b/swh.indexer.egg-info/PKG-INFO
index 8a0e03d..22f5e93 100644
--- a/swh.indexer.egg-info/PKG-INFO
+++ b/swh.indexer.egg-info/PKG-INFO
@@ -1,71 +1,71 @@
 Metadata-Version: 2.1
 Name: swh.indexer
-Version: 2.0.1
+Version: 2.0.2
 Summary: Software Heritage Content Indexer
 Home-page: https://forge.softwareheritage.org/diffusion/78/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-indexer
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-indexer/
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: testing
 License-File: LICENSE
 License-File: AUTHORS
 
 swh-indexer
 ============
 
 Tools to compute multiple indexes on SWH's raw contents:
 - content:
   - mimetype
   - ctags
   - language
   - fossology-license
   - metadata
 - revision:
   - metadata
 
 An indexer is in charge of:
 - looking up objects
 - extracting information from those objects
 - store those information in the swh-indexer db
 
 There are multiple indexers working on different object types:
   - content indexer: works with content sha1 hashes
   - revision indexer: works with revision sha1 hashes
   - origin indexer: works with origin identifiers
 
 Indexation procedure:
 - receive batch of ids
 - retrieve the associated data depending on object type
 - compute for that object some index
 - store the result to swh's storage
 
 Current content indexers:
 
 - mimetype (queue swh_indexer_content_mimetype): detect the encoding
   and mimetype
 
 - language (queue swh_indexer_content_language): detect the
   programming language
 
 - ctags (queue swh_indexer_content_ctags): compute tags information
 
 - fossology-license (queue swh_indexer_fossology_license): compute the
   license
 
 - metadata: translate file into translated_metadata dict
 
 Current revision indexers:
 
 - metadata: detects files containing metadata and retrieves translated_metadata
   in content_metadata table in storage or run content indexer to translate
   files.
diff --git a/swh/indexer/metadata_dictionary/cff.py b/swh/indexer/metadata_dictionary/cff.py
index 0e66f21..c5aa5a0 100644
--- a/swh/indexer/metadata_dictionary/cff.py
+++ b/swh/indexer/metadata_dictionary/cff.py
@@ -1,71 +1,76 @@
 from typing import Dict, List, Optional, Union
 
 import yaml
 
 from swh.indexer.codemeta import CODEMETA_CONTEXT_URL, CROSSWALK_TABLE, SCHEMA_URI
 
 from .base import DictMapping, SingleFileMapping
 
 
 class SafeLoader(yaml.SafeLoader):
     yaml_implicit_resolvers = {
         k: [r for r in v if r[0] != "tag:yaml.org,2002:timestamp"]
         for k, v in yaml.SafeLoader.yaml_implicit_resolvers.items()
     }
 
 
 class CffMapping(DictMapping, SingleFileMapping):
     """Dedicated class for Citation (CITATION.cff) mapping and translation"""
 
     name = "cff"
     filename = b"CITATION.cff"
     mapping = CROSSWALK_TABLE["Citation File Format Core (CFF-Core) 1.0.2"]
     string_fields = ["keywords", "license", "abstract", "version", "doi"]
 
-    def translate(self, raw_content: bytes) -> Dict[str, str]:
+    def translate(self, raw_content: bytes) -> Optional[Dict[str, str]]:
         raw_content_string: str = raw_content.decode()
-        content_dict = yaml.load(raw_content_string, Loader=SafeLoader)
-        metadata = self._translate_dict(content_dict)
+        try:
+            content_dict = yaml.load(raw_content_string, Loader=SafeLoader)
+        except yaml.scanner.ScannerError:
+            return None
 
-        metadata["@context"] = CODEMETA_CONTEXT_URL
+        if isinstance(content_dict, dict):
+            metadata = self._translate_dict(content_dict)
+            metadata["@context"] = CODEMETA_CONTEXT_URL
+            return metadata
 
-        return metadata
+        return None
 
     def normalize_authors(self, d: List[dict]) -> Dict[str, list]:
         result = []
         for author in d:
             author_data: Dict[str, Optional[Union[str, Dict]]] = {
                 "@type": SCHEMA_URI + "Person"
             }
             if "orcid" in author:
                 author_data["@id"] = author["orcid"]
             if "affiliation" in author:
                 author_data[SCHEMA_URI + "affiliation"] = {
                     "@type": SCHEMA_URI + "Organization",
                     SCHEMA_URI + "name": author["affiliation"],
                 }
             if "family-names" in author:
                 author_data[SCHEMA_URI + "familyName"] = author["family-names"]
             if "given-names" in author:
                 author_data[SCHEMA_URI + "givenName"] = author["given-names"]
 
             result.append(author_data)
 
         result_final = {"@list": result}
         return result_final
 
     def normalize_doi(self, s: str) -> Dict[str, str]:
         if isinstance(s, str):
             return {"@id": "https://doi.org/" + s}
 
     def normalize_license(self, s: str) -> Dict[str, str]:
         if isinstance(s, str):
             return {"@id": "https://spdx.org/licenses/" + s}
 
     def normalize_repository_code(self, s: str) -> Dict[str, str]:
         if isinstance(s, str):
             return {"@id": s}
 
     def normalize_date_released(self, s: str) -> Dict[str, str]:
         if isinstance(s, str):
             return {"@value": s, "@type": SCHEMA_URI + "Date"}
diff --git a/swh/indexer/metadata_dictionary/npm.py b/swh/indexer/metadata_dictionary/npm.py
index d8bb6bb..467866d 100644
--- a/swh/indexer/metadata_dictionary/npm.py
+++ b/swh/indexer/metadata_dictionary/npm.py
@@ -1,161 +1,228 @@
 # Copyright (C) 2018-2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import re
 
 from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI
 
 from .base import JsonMapping
 
 
 class NpmMapping(JsonMapping):
     """
     dedicated class for NPM (package.json) mapping and translation
     """
 
     name = "npm"
     mapping = CROSSWALK_TABLE["NodeJS"]
     filename = b"package.json"
     string_fields = ["name", "version", "homepage", "description", "email"]
 
     _schema_shortcuts = {
         "github": "git+https://github.com/%s.git",
         "gist": "git+https://gist.github.com/%s.git",
         "gitlab": "git+https://gitlab.com/%s.git",
         # Bitbucket supports both hg and git, and the shortcut does not
         # tell which one to use.
         # 'bitbucket': 'https://bitbucket.org/',
     }
 
     def normalize_repository(self, d):
         """https://docs.npmjs.com/files/package.json#repository
 
         >>> NpmMapping().normalize_repository({
         ...     'type': 'git',
         ...     'url': 'https://example.org/foo.git'
         ... })
         {'@id': 'git+https://example.org/foo.git'}
         >>> NpmMapping().normalize_repository(
         ...     'gitlab:foo/bar')
         {'@id': 'git+https://gitlab.com/foo/bar.git'}
         >>> NpmMapping().normalize_repository(
         ...     'foo/bar')
         {'@id': 'git+https://github.com/foo/bar.git'}
         """
         if (
             isinstance(d, dict)
             and isinstance(d.get("type"), str)
             and isinstance(d.get("url"), str)
         ):
             url = "{type}+{url}".format(**d)
         elif isinstance(d, str):
             if "://" in d:
                 url = d
             elif ":" in d:
                 (schema, rest) = d.split(":", 1)
                 if schema in self._schema_shortcuts:
                     url = self._schema_shortcuts[schema] % rest
                 else:
                     return None
             else:
                 url = self._schema_shortcuts["github"] % d
 
         else:
             return None
 
         return {"@id": url}
 
     def normalize_bugs(self, d):
         """https://docs.npmjs.com/files/package.json#bugs
 
         >>> NpmMapping().normalize_bugs({
         ...     'url': 'https://example.org/bugs/',
         ...     'email': 'bugs@example.org'
         ... })
         {'@id': 'https://example.org/bugs/'}
         >>> NpmMapping().normalize_bugs(
         ...     'https://example.org/bugs/')
         {'@id': 'https://example.org/bugs/'}
         """
         if isinstance(d, dict) and isinstance(d.get("url"), str):
             return {"@id": d["url"]}
         elif isinstance(d, str):
             return {"@id": d}
         else:
             return None
 
     _parse_author = re.compile(
         r"^ *" r"(?P<name>.*?)" r"( +<(?P<email>.*)>)?" r"( +\((?P<url>.*)\))?" r" *$"
     )
 
     def normalize_author(self, d):
         """https://docs.npmjs.com/files/package.json#people-fields-author-contributors'
 
         >>> from pprint import pprint
         >>> pprint(NpmMapping().normalize_author({
         ...     'name': 'John Doe',
         ...     'email': 'john.doe@example.org',
         ...     'url': 'https://example.org/~john.doe',
         ... }))
         {'@list': [{'@type': 'http://schema.org/Person',
                     'http://schema.org/email': 'john.doe@example.org',
                     'http://schema.org/name': 'John Doe',
                     'http://schema.org/url': {'@id': 'https://example.org/~john.doe'}}]}
         >>> pprint(NpmMapping().normalize_author(
         ...     'John Doe <john.doe@example.org> (https://example.org/~john.doe)'
         ... ))
         {'@list': [{'@type': 'http://schema.org/Person',
                     'http://schema.org/email': 'john.doe@example.org',
                     'http://schema.org/name': 'John Doe',
                     'http://schema.org/url': {'@id': 'https://example.org/~john.doe'}}]}
         """  # noqa
         author = {"@type": SCHEMA_URI + "Person"}
         if isinstance(d, dict):
             name = d.get("name", None)
             email = d.get("email", None)
             url = d.get("url", None)
         elif isinstance(d, str):
             match = self._parse_author.match(d)
             if not match:
                 return None
             name = match.group("name")
             email = match.group("email")
             url = match.group("url")
         else:
             return None
         if name and isinstance(name, str):
             author[SCHEMA_URI + "name"] = name
         if email and isinstance(email, str):
             author[SCHEMA_URI + "email"] = email
         if url and isinstance(url, str):
             author[SCHEMA_URI + "url"] = {"@id": url}
         return {"@list": [author]}
 
+    def normalize_description(self, description):
+        r"""Try to re-decode ``description`` as UTF-16, as this is a somewhat common
+        mistake that causes issues in the database because of null bytes in JSON.
+
+        >>> NpmMapping().normalize_description("foo bar")
+        'foo bar'
+        >>> NpmMapping().normalize_description(
+        ...     "\ufffd\ufffd#\x00 \x00f\x00o\x00o\x00 \x00b\x00a\x00r\x00\r\x00 \x00"
+        ... )
+        'foo bar'
+        >>> NpmMapping().normalize_description(
+        ...     "\ufffd\ufffd\x00#\x00 \x00f\x00o\x00o\x00 \x00b\x00a\x00r\x00\r\x00 "
+        ... )
+        'foo bar'
+        >>> NpmMapping().normalize_description(
+        ...     # invalid UTF-16 and meaningless UTF-8:
+        ...     "\ufffd\ufffd\x00#\x00\x00\x00 \x00\x00\x00\x00f\x00\x00\x00\x00"
+        ... ) is None
+        True
+        >>> NpmMapping().normalize_description(
+        ...     # ditto (ut looks like little-endian at first)
+        ...     "\ufffd\ufffd#\x00\x00\x00 \x00\x00\x00\x00f\x00\x00\x00\x00\x00"
+        ... ) is None
+        True
+        >>> NpmMapping().normalize_description(None) is None
+        True
+        """
+        if not isinstance(description, str):
+            return None
+        # XXX: if this function ever need to support more cases, consider
+        # switching to https://pypi.org/project/ftfy/ instead of adding more hacks
+        if description.startswith("\ufffd\ufffd") and "\x00" in description:
+            # 2 unicode replacement characters followed by '# ' encoded as UTF-16
+            # is a common mistake, which indicates a README.md was saved as UTF-16,
+            # and some NPM tool opened it as UTF-8 and used the first line as
+            # description.
+
+            description_bytes = description.encode()
+
+            # Strip the the two unicode replacement characters
+            assert description_bytes.startswith(b"\xef\xbf\xbd\xef\xbf\xbd")
+            description_bytes = description_bytes[6:]
+
+            # If the following attempts fail to recover the description, discard it
+            # entirely because the current indexer storage backend (postgresql) cannot
+            # store zero bytes in JSON columns.
+            description = None
+
+            if not description_bytes.startswith(b"\x00"):
+                # try UTF-16 little-endian (the most common) first
+                try:
+                    description = description_bytes.decode("utf-16le")
+                except UnicodeDecodeError:
+                    pass
+            if description is None:
+                # if it fails, try UTF-16 big-endian
+                try:
+                    description = description_bytes.decode("utf-16be")
+                except UnicodeDecodeError:
+                    pass
+
+            if description:
+                if description.startswith("# "):
+                    description = description[2:]
+                return description.rstrip()
+        return description
+
     def normalize_license(self, s):
         """https://docs.npmjs.com/files/package.json#license
 
         >>> NpmMapping().normalize_license('MIT')
         {'@id': 'https://spdx.org/licenses/MIT'}
         """
         if isinstance(s, str):
             return {"@id": "https://spdx.org/licenses/" + s}
 
     def normalize_homepage(self, s):
         """https://docs.npmjs.com/files/package.json#homepage
 
         >>> NpmMapping().normalize_homepage('https://example.org/~john.doe')
         {'@id': 'https://example.org/~john.doe'}
         """
         if isinstance(s, str):
             return {"@id": s}
 
     def normalize_keywords(self, lst):
         """https://docs.npmjs.com/files/package.json#homepage
 
         >>> NpmMapping().normalize_keywords(['foo', 'bar'])
         ['foo', 'bar']
         """
         if isinstance(lst, list):
             return [x for x in lst if isinstance(x, str)]
diff --git a/swh/indexer/storage/writer.py b/swh/indexer/storage/writer.py
index 2df372c..b4fa365 100644
--- a/swh/indexer/storage/writer.py
+++ b/swh/indexer/storage/writer.py
@@ -1,67 +1,69 @@
 # Copyright (C) 2020 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-from typing import Any, Callable, Dict, Iterable
+from typing import Any, Callable, Dict, Iterable, Optional
 
 import attr
 
 try:
-    from swh.journal.writer import get_journal_writer
+    from swh.journal.writer import JournalWriterInterface, get_journal_writer
 except ImportError:
     get_journal_writer = None  # type: ignore
     # mypy limitation, see https://github.com/python/mypy/issues/1153
 
 from .model import BaseRow
 
 
 class JournalWriter:
     """Journal writer storage collaborator. It's in charge of adding objects to
     the journal.
 
     """
 
+    journal: Optional[JournalWriterInterface]
+
     def __init__(self, tool_getter: Callable[[int], Dict[str, Any]], journal_writer):
         """
         Args:
             tool_getter: a callable that takes a tool_id and return a dict representing
                          a tool object
             journal_writer: configuration passed to
                             `swh.journal.writer.get_journal_writer`
         """
         self._tool_getter = tool_getter
         if journal_writer:
             if get_journal_writer is None:
                 raise EnvironmentError(
                     "You need the swh.journal package to use the "
                     "journal_writer feature"
                 )
             self.journal = get_journal_writer(
                 **journal_writer,
                 value_sanitizer=lambda object_type, value_dict: value_dict,
             )
         else:
             self.journal = None
 
     def write_additions(self, obj_type, entries: Iterable[BaseRow]) -> None:
         if not self.journal:
             return
 
         # usually, all the additions in a batch are from the same indexer,
         # so this cache allows doing a single query for all the entries.
         tool_cache = {}
 
         for entry in entries:
             assert entry.object_type == obj_type  # type: ignore
             # get the tool used to generate this addition
             tool_id = entry.indexer_configuration_id
             assert tool_id
             if tool_id not in tool_cache:
                 tool_cache[tool_id] = self._tool_getter(tool_id)
             entry = attr.evolve(
                 entry, tool=tool_cache[tool_id], indexer_configuration_id=None
             )
 
             # write to kafka
             self.journal.write_addition(obj_type, entry)
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
index 3fb726e..65199fd 100644
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -1,1257 +1,1334 @@
 # Copyright (C) 2017-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import json
 import logging
 
 from hypothesis import HealthCheck, given, settings, strategies
 import pytest
 
 from swh.indexer.codemeta import CODEMETA_TERMS
 from swh.indexer.metadata import ContentMetadataIndexer, DirectoryMetadataIndexer
 from swh.indexer.metadata_detector import detect_metadata
 from swh.indexer.metadata_dictionary import MAPPINGS
 from swh.indexer.metadata_dictionary.maven import MavenMapping
 from swh.indexer.metadata_dictionary.npm import NpmMapping
 from swh.indexer.metadata_dictionary.ruby import GemspecMapping
 from swh.indexer.storage.model import ContentMetadataRow, DirectoryIntrinsicMetadataRow
 from swh.indexer.tests.utils import DIRECTORY2
 from swh.model.hashutil import hash_to_bytes
 from swh.model.model import Directory, DirectoryEntry
 
 from .utils import (
     BASE_TEST_CONFIG,
     YARN_PARSER_METADATA,
     fill_obj_storage,
     fill_storage,
     json_document_strategy,
     xml_document_strategy,
 )
 
 TRANSLATOR_TOOL = {
     "name": "swh-metadata-translator",
     "version": "0.0.2",
     "configuration": {"type": "local", "context": "NpmMapping"},
 }
 
 
 class ContentMetadataTestIndexer(ContentMetadataIndexer):
     """Specific Metadata whose configuration is enough to satisfy the
     indexing tests.
     """
 
     def parse_config_file(self, *args, **kwargs):
         assert False, "should not be called; the dir indexer configures it."
 
 
 DIRECTORY_METADATA_CONFIG = {
     **BASE_TEST_CONFIG,
     "tools": TRANSLATOR_TOOL,
 }
 
 
 class TestMetadata:
     """
     Tests metadata_mock_tool tool for Metadata detection
     """
 
     def setup_method(self):
         self.npm_mapping = MAPPINGS["NpmMapping"]()
         self.codemeta_mapping = MAPPINGS["CodemetaMapping"]()
         self.maven_mapping = MAPPINGS["MavenMapping"]()
         self.pkginfo_mapping = MAPPINGS["PythonPkginfoMapping"]()
         self.gemspec_mapping = MAPPINGS["GemspecMapping"]()
         self.cff_mapping = MAPPINGS["CffMapping"]()
 
     def test_compute_metadata_none(self):
         """
         testing content empty content is empty
         should return None
         """
         # given
         content = b""
 
         # None if no metadata was found or an error occurred
         declared_metadata = None
         # when
         result = self.npm_mapping.translate(content)
         # then
         assert declared_metadata == result
 
     def test_compute_metadata_cff(self):
         """
         testing CITATION.cff translation
         """
         # given
         content = """# YAML 1.2
 ---
 abstract: "Command line program to convert from Citation File \
 Format to various other formats such as BibTeX, EndNote, RIS, \
 schema.org, CodeMeta, and .zenodo.json."
 authors:
   -
     affiliation: "Netherlands eScience Center"
     family-names: Klaver
     given-names: Tom
   -
     affiliation: "Humboldt-Universität zu Berlin"
     family-names: Druskat
     given-names: Stephan
     orcid: https://orcid.org/0000-0003-4925-7248
 cff-version: "1.0.3"
 date-released: 2019-11-12
 doi: 10.5281/zenodo.1162057
 keywords:
   - "citation"
   - "bibliography"
   - "cff"
   - "CITATION.cff"
 license: Apache-2.0
 message: "If you use this software, please cite it using these metadata."
 repository-code: "https://github.com/citation-file-format/cff-converter-python"
 title: cffconvert
 version: "1.4.0-alpha0"
         """.encode(
             "utf-8"
         )
 
         expected = {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "author": [
                 {
                     "type": "Person",
                     "affiliation": {
                         "type": "Organization",
                         "name": "Netherlands eScience Center",
                     },
                     "familyName": "Klaver",
                     "givenName": "Tom",
                 },
                 {
                     "id": "https://orcid.org/0000-0003-4925-7248",
                     "type": "Person",
                     "affiliation": {
                         "type": "Organization",
                         "name": "Humboldt-Universität zu Berlin",
                     },
                     "familyName": "Druskat",
                     "givenName": "Stephan",
                 },
             ],
             "codeRepository": (
                 "https://github.com/citation-file-format/cff-converter-python"
             ),
             "datePublished": "2019-11-12",
             "description": """Command line program to convert from \
 Citation File Format to various other formats such as BibTeX, EndNote, \
 RIS, schema.org, CodeMeta, and .zenodo.json.""",
             "identifier": "https://doi.org/10.5281/zenodo.1162057",
             "keywords": ["citation", "bibliography", "cff", "CITATION.cff"],
             "license": "https://spdx.org/licenses/Apache-2.0",
             "version": "1.4.0-alpha0",
         }
 
         # when
         result = self.cff_mapping.translate(content)
         # then
         assert expected == result
 
+    def test_compute_metadata_cff_invalid_yaml(self):
+        """
+        test yaml translation for invalid yaml file
+        """
+        # given
+        content = """cff-version: 1.0.3
+message: To cite the SigMF specification, please include the following:
+authors:
+  - name: The GNU Radio Foundation, Inc.
+        """.encode(
+            "utf-8"
+        )
+
+        expected = None
+
+        result = self.cff_mapping.translate(content)
+        # then
+        assert expected == result
+
+    def test_compute_metadata_cff_empty(self):
+        """
+        test yaml translation for empty yaml file
+        """
+        # given
+        content = """
+        """.encode(
+            "utf-8"
+        )
+
+        expected = None
+
+        result = self.cff_mapping.translate(content)
+        # then
+        assert expected == result
+
+    def test_compute_metadata_cff_list(self):
+        """
+        test yaml translation for empty yaml file
+        """
+        # given
+        content = """
+- Foo
+- Bar
+        """.encode(
+            "utf-8"
+        )
+
+        expected = None
+
+        result = self.cff_mapping.translate(content)
+        # then
+        assert expected == result
+
     def test_compute_metadata_npm(self):
         """
         testing only computation of metadata with hard_mapping_npm
         """
         # given
         content = b"""
             {
                 "name": "test_metadata",
                 "version": "0.0.2",
                 "description": "Simple package.json test for indexer",
                   "repository": {
                     "type": "git",
                     "url": "https://github.com/moranegg/metadata_test"
                 },
                 "author": {
                     "email": "moranegg@example.com",
                     "name": "Morane G"
                 }
             }
         """
         declared_metadata = {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "name": "test_metadata",
             "version": "0.0.2",
             "description": "Simple package.json test for indexer",
             "codeRepository": "git+https://github.com/moranegg/metadata_test",
             "author": [
                 {
                     "type": "Person",
                     "name": "Morane G",
                     "email": "moranegg@example.com",
                 }
             ],
         }
 
         # when
         result = self.npm_mapping.translate(content)
         # then
         assert declared_metadata == result
 
+    def test_compute_metadata_invalid_description_npm(self):
+        """
+        testing only computation of metadata with hard_mapping_npm
+        """
+        # given
+        content = b"""
+            {
+                "name": "test_metadata",
+                "version": "0.0.2",
+                "description": 1234
+        }
+        """
+        declared_metadata = {
+            "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
+            "type": "SoftwareSourceCode",
+            "name": "test_metadata",
+            "version": "0.0.2",
+        }
+
+        # when
+        result = self.npm_mapping.translate(content)
+        # then
+        assert declared_metadata == result
+
     def test_index_content_metadata_npm(self):
         """
         testing NPM with package.json
         - one sha1 uses a file that can't be translated to metadata and
           should return None in the translated metadata
         """
         # given
         sha1s = [
             hash_to_bytes("26a9f72a7c87cc9205725cfd879f514ff4f3d8d5"),
             hash_to_bytes("d4c647f0fc257591cc9ba1722484229780d1c607"),
             hash_to_bytes("02fb2c89e14f7fab46701478c83779c7beb7b069"),
         ]
         # this metadata indexer computes only metadata for package.json
         # in npm context with a hard mapping
         config = BASE_TEST_CONFIG.copy()
         config["tools"] = [TRANSLATOR_TOOL]
         metadata_indexer = ContentMetadataTestIndexer(config=config)
         fill_obj_storage(metadata_indexer.objstorage)
         fill_storage(metadata_indexer.storage)
 
         # when
         metadata_indexer.run(sha1s)
         results = list(metadata_indexer.idx_storage.content_metadata_get(sha1s))
 
         expected_results = [
             ContentMetadataRow(
                 id=hash_to_bytes("26a9f72a7c87cc9205725cfd879f514ff4f3d8d5"),
                 tool=TRANSLATOR_TOOL,
                 metadata={
                     "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                     "type": "SoftwareSourceCode",
                     "codeRepository": "git+https://github.com/moranegg/metadata_test",
                     "description": "Simple package.json test for indexer",
                     "name": "test_metadata",
                     "version": "0.0.1",
                 },
             ),
             ContentMetadataRow(
                 id=hash_to_bytes("d4c647f0fc257591cc9ba1722484229780d1c607"),
                 tool=TRANSLATOR_TOOL,
                 metadata={
                     "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
                     "type": "SoftwareSourceCode",
                     "issueTracker": "https://github.com/npm/npm/issues",
                     "author": [
                         {
                             "type": "Person",
                             "name": "Isaac Z. Schlueter",
                             "email": "i@izs.me",
                             "url": "http://blog.izs.me",
                         }
                     ],
                     "codeRepository": "git+https://github.com/npm/npm",
                     "description": "a package manager for JavaScript",
                     "license": "https://spdx.org/licenses/Artistic-2.0",
                     "version": "5.0.3",
                     "name": "npm",
                     "keywords": [
                         "install",
                         "modules",
                         "package manager",
                         "package.json",
                     ],
                     "url": "https://docs.npmjs.com/",
                 },
             ),
         ]
 
         for result in results:
             del result.tool["id"]
 
         # The assertion below returns False sometimes because of nested lists
         assert expected_results == results
 
     def test_npm_bugs_normalization(self):
         # valid dictionary
         package_json = b"""{
             "name": "foo",
             "bugs": {
                 "url": "https://github.com/owner/project/issues",
                 "email": "foo@example.com"
             }
         }"""
         result = self.npm_mapping.translate(package_json)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "name": "foo",
             "issueTracker": "https://github.com/owner/project/issues",
             "type": "SoftwareSourceCode",
         }
 
         # "invalid" dictionary
         package_json = b"""{
             "name": "foo",
             "bugs": {
                 "email": "foo@example.com"
             }
         }"""
         result = self.npm_mapping.translate(package_json)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "name": "foo",
             "type": "SoftwareSourceCode",
         }
 
         # string
         package_json = b"""{
             "name": "foo",
             "bugs": "https://github.com/owner/project/issues"
         }"""
         result = self.npm_mapping.translate(package_json)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "name": "foo",
             "issueTracker": "https://github.com/owner/project/issues",
             "type": "SoftwareSourceCode",
         }
 
     def test_npm_repository_normalization(self):
         # normal
         package_json = b"""{
             "name": "foo",
             "repository": {
                 "type" : "git",
                 "url" : "https://github.com/npm/cli.git"
             }
         }"""
         result = self.npm_mapping.translate(package_json)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "name": "foo",
             "codeRepository": "git+https://github.com/npm/cli.git",
             "type": "SoftwareSourceCode",
         }
 
         # missing url
         package_json = b"""{
             "name": "foo",
             "repository": {
                 "type" : "git"
             }
         }"""
         result = self.npm_mapping.translate(package_json)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "name": "foo",
             "type": "SoftwareSourceCode",
         }
 
         # github shortcut
         package_json = b"""{
             "name": "foo",
             "repository": "github:npm/cli"
         }"""
         result = self.npm_mapping.translate(package_json)
         expected_result = {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "name": "foo",
             "codeRepository": "git+https://github.com/npm/cli.git",
             "type": "SoftwareSourceCode",
         }
         assert result == expected_result
 
         # github shortshortcut
         package_json = b"""{
             "name": "foo",
             "repository": "npm/cli"
         }"""
         result = self.npm_mapping.translate(package_json)
         assert result == expected_result
 
         # gitlab shortcut
         package_json = b"""{
             "name": "foo",
             "repository": "gitlab:user/repo"
         }"""
         result = self.npm_mapping.translate(package_json)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "name": "foo",
             "codeRepository": "git+https://gitlab.com/user/repo.git",
             "type": "SoftwareSourceCode",
         }
 
     @pytest.mark.parametrize(
         "filename", [b"package.json", b"Package.json", b"PACKAGE.json", b"PACKAGE.JSON"]
     )
     def test_detect_metadata_package_json(self, filename):
         # given
         df = [
             {
                 "sha1_git": b"abc",
                 "name": b"index.js",
                 "target": b"abc",
                 "length": 897,
                 "status": "visible",
                 "type": "file",
                 "perms": 33188,
                 "dir_id": b"dir_a",
                 "sha1": b"bcd",
             },
             {
                 "sha1_git": b"aab",
                 "name": filename,
                 "target": b"aab",
                 "length": 712,
                 "status": "visible",
                 "type": "file",
                 "perms": 33188,
                 "dir_id": b"dir_a",
                 "sha1": b"cde",
             },
         ]
         # when
         results = detect_metadata(df)
 
         expected_results = {"NpmMapping": [b"cde"]}
         # then
         assert expected_results == results
 
     def test_detect_metadata_codemeta_json_uppercase(self):
         # given
         df = [
             {
                 "sha1_git": b"abc",
                 "name": b"index.html",
                 "target": b"abc",
                 "length": 897,
                 "status": "visible",
                 "type": "file",
                 "perms": 33188,
                 "dir_id": b"dir_a",
                 "sha1": b"bcd",
             },
             {
                 "sha1_git": b"aab",
                 "name": b"CODEMETA.json",
                 "target": b"aab",
                 "length": 712,
                 "status": "visible",
                 "type": "file",
                 "perms": 33188,
                 "dir_id": b"dir_a",
                 "sha1": b"bcd",
             },
         ]
         # when
         results = detect_metadata(df)
 
         expected_results = {"CodemetaMapping": [b"bcd"]}
         # then
         assert expected_results == results
 
     def test_compute_metadata_valid_codemeta(self):
         raw_content = b"""{
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "@type": "SoftwareSourceCode",
             "identifier": "CodeMeta",
             "description": "CodeMeta is a concept vocabulary that can be used to standardize the exchange of software metadata across repositories and organizations.",
             "name": "CodeMeta: Minimal metadata schemas for science software and code, in JSON-LD",
             "codeRepository": "https://github.com/codemeta/codemeta",
             "issueTracker": "https://github.com/codemeta/codemeta/issues",
             "license": "https://spdx.org/licenses/Apache-2.0",
             "version": "2.0",
             "author": [
               {
                 "@type": "Person",
                 "givenName": "Carl",
                 "familyName": "Boettiger",
                 "email": "cboettig@gmail.com",
                 "@id": "http://orcid.org/0000-0002-1642-628X"
               },
               {
                 "@type": "Person",
                 "givenName": "Matthew B.",
                 "familyName": "Jones",
                 "email": "jones@nceas.ucsb.edu",
                 "@id": "http://orcid.org/0000-0003-0077-4738"
               }
             ],
             "maintainer": {
               "@type": "Person",
               "givenName": "Carl",
               "familyName": "Boettiger",
               "email": "cboettig@gmail.com",
               "@id": "http://orcid.org/0000-0002-1642-628X"
             },
             "contIntegration": "https://travis-ci.org/codemeta/codemeta",
             "developmentStatus": "active",
             "downloadUrl": "https://github.com/codemeta/codemeta/archive/2.0.zip",
             "funder": {
                 "@id": "https://doi.org/10.13039/100000001",
                 "@type": "Organization",
                 "name": "National Science Foundation"
             },
             "funding":"1549758; Codemeta: A Rosetta Stone for Metadata in Scientific Software",
             "keywords": [
               "metadata",
               "software"
             ],
             "version":"2.0",
             "dateCreated":"2017-06-05",
             "datePublished":"2017-06-05",
             "programmingLanguage": "JSON-LD"
           }"""  # noqa
         expected_result = {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "identifier": "CodeMeta",
             "description": "CodeMeta is a concept vocabulary that can "
             "be used to standardize the exchange of software metadata "
             "across repositories and organizations.",
             "name": "CodeMeta: Minimal metadata schemas for science "
             "software and code, in JSON-LD",
             "codeRepository": "https://github.com/codemeta/codemeta",
             "issueTracker": "https://github.com/codemeta/codemeta/issues",
             "license": "https://spdx.org/licenses/Apache-2.0",
             "version": "2.0",
             "author": [
                 {
                     "type": "Person",
                     "givenName": "Carl",
                     "familyName": "Boettiger",
                     "email": "cboettig@gmail.com",
                     "id": "http://orcid.org/0000-0002-1642-628X",
                 },
                 {
                     "type": "Person",
                     "givenName": "Matthew B.",
                     "familyName": "Jones",
                     "email": "jones@nceas.ucsb.edu",
                     "id": "http://orcid.org/0000-0003-0077-4738",
                 },
             ],
             "maintainer": {
                 "type": "Person",
                 "givenName": "Carl",
                 "familyName": "Boettiger",
                 "email": "cboettig@gmail.com",
                 "id": "http://orcid.org/0000-0002-1642-628X",
             },
             "contIntegration": "https://travis-ci.org/codemeta/codemeta",
             "developmentStatus": "active",
             "downloadUrl": "https://github.com/codemeta/codemeta/archive/2.0.zip",
             "funder": {
                 "id": "https://doi.org/10.13039/100000001",
                 "type": "Organization",
                 "name": "National Science Foundation",
             },
             "funding": "1549758; Codemeta: A Rosetta Stone for Metadata "
             "in Scientific Software",
             "keywords": ["metadata", "software"],
             "version": "2.0",
             "dateCreated": "2017-06-05",
             "datePublished": "2017-06-05",
             "programmingLanguage": "JSON-LD",
         }
         result = self.codemeta_mapping.translate(raw_content)
         assert result == expected_result
 
     def test_compute_metadata_codemeta_alternate_context(self):
         raw_content = b"""{
             "@context": "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld",
             "@type": "SoftwareSourceCode",
             "identifier": "CodeMeta"
         }"""  # noqa
         expected_result = {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "identifier": "CodeMeta",
         }
         result = self.codemeta_mapping.translate(raw_content)
         assert result == expected_result
 
     def test_compute_metadata_maven(self):
         raw_content = b"""
         <project>
           <name>Maven Default Project</name>
           <modelVersion>4.0.0</modelVersion>
           <groupId>com.mycompany.app</groupId>
           <artifactId>my-app</artifactId>
           <version>1.2.3</version>
           <repositories>
             <repository>
               <id>central</id>
               <name>Maven Repository Switchboard</name>
               <layout>default</layout>
               <url>http://repo1.maven.org/maven2</url>
               <snapshots>
                 <enabled>false</enabled>
               </snapshots>
             </repository>
           </repositories>
           <licenses>
             <license>
               <name>Apache License, Version 2.0</name>
               <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
               <distribution>repo</distribution>
               <comments>A business-friendly OSS license</comments>
             </license>
           </licenses>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "name": "Maven Default Project",
             "identifier": "com.mycompany.app",
             "version": "1.2.3",
             "license": "https://www.apache.org/licenses/LICENSE-2.0.txt",
             "codeRepository": (
                 "http://repo1.maven.org/maven2/com/mycompany/app/my-app"
             ),
         }
 
     def test_compute_metadata_maven_empty(self):
         raw_content = b"""
         <project>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
         }
 
     def test_compute_metadata_maven_almost_empty(self):
         raw_content = b"""
         <project>
           <foo/>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
         }
 
     def test_compute_metadata_maven_invalid_xml(self, caplog):
         expected_warning = (
             "swh.indexer.metadata_dictionary.maven.MavenMapping",
             logging.WARNING,
             "Error parsing XML from foo",
         )
         caplog.at_level(logging.WARNING, logger="swh.indexer.metadata_dictionary")
 
         raw_content = b"""
         <project>"""
         caplog.clear()
         result = MAPPINGS["MavenMapping"]("foo").translate(raw_content)
         assert caplog.record_tuples == [expected_warning]
         assert result is None
 
         raw_content = b"""
         """
         caplog.clear()
         result = MAPPINGS["MavenMapping"]("foo").translate(raw_content)
         assert caplog.record_tuples == [expected_warning]
         assert result is None
 
     def test_compute_metadata_maven_unknown_encoding(self, caplog):
         expected_warning = (
             "swh.indexer.metadata_dictionary.maven.MavenMapping",
             logging.WARNING,
             "Error detecting XML encoding from foo",
         )
         caplog.at_level(logging.WARNING, logger="swh.indexer.metadata_dictionary")
 
         raw_content = b"""<?xml version="1.0" encoding="foo"?>
         <project>
         </project>"""
         caplog.clear()
         result = MAPPINGS["MavenMapping"]("foo").translate(raw_content)
         assert caplog.record_tuples == [expected_warning]
         assert result is None
 
         raw_content = b"""<?xml version="1.0" encoding="UTF-7"?>
         <project>
         </project>"""
         caplog.clear()
         result = MAPPINGS["MavenMapping"]("foo").translate(raw_content)
         assert caplog.record_tuples == [expected_warning]
         assert result is None
 
     def test_compute_metadata_maven_invalid_encoding(self, caplog):
         expected_warning = [
             # libexpat1 <= 2.2.10-2+deb11u1
             [
                 (
                     "swh.indexer.metadata_dictionary.maven.MavenMapping",
                     logging.WARNING,
                     "Error unidecoding XML from foo",
                 )
             ],
             # libexpat1 >= 2.2.10-2+deb11u2
             [
                 (
                     "swh.indexer.metadata_dictionary.maven.MavenMapping",
                     logging.WARNING,
                     "Error parsing XML from foo",
                 )
             ],
         ]
         caplog.at_level(logging.WARNING, logger="swh.indexer.metadata_dictionary")
 
         raw_content = b"""<?xml version="1.0" encoding="UTF-8"?>
         <foo\xe5ct>
         </foo>"""
         caplog.clear()
         result = MAPPINGS["MavenMapping"]("foo").translate(raw_content)
         assert caplog.record_tuples in expected_warning
         assert result is None
 
     def test_compute_metadata_maven_minimal(self):
         raw_content = b"""
         <project>
           <name>Maven Default Project</name>
           <modelVersion>4.0.0</modelVersion>
           <groupId>com.mycompany.app</groupId>
           <artifactId>my-app</artifactId>
           <version>1.2.3</version>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "name": "Maven Default Project",
             "identifier": "com.mycompany.app",
             "version": "1.2.3",
             "codeRepository": (
                 "https://repo.maven.apache.org/maven2/com/mycompany/app/my-app"
             ),
         }
 
     def test_compute_metadata_maven_empty_nodes(self):
         raw_content = b"""
         <project>
           <name>Maven Default Project</name>
           <modelVersion>4.0.0</modelVersion>
           <groupId>com.mycompany.app</groupId>
           <artifactId>my-app</artifactId>
           <version>1.2.3</version>
           <repositories>
           </repositories>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "name": "Maven Default Project",
             "identifier": "com.mycompany.app",
             "version": "1.2.3",
             "codeRepository": (
                 "https://repo.maven.apache.org/maven2/com/mycompany/app/my-app"
             ),
         }
 
         raw_content = b"""
         <project>
           <name>Maven Default Project</name>
           <modelVersion>4.0.0</modelVersion>
           <groupId>com.mycompany.app</groupId>
           <artifactId>my-app</artifactId>
           <version></version>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "name": "Maven Default Project",
             "identifier": "com.mycompany.app",
             "codeRepository": (
                 "https://repo.maven.apache.org/maven2/com/mycompany/app/my-app"
             ),
         }
 
         raw_content = b"""
         <project>
           <name></name>
           <modelVersion>4.0.0</modelVersion>
           <groupId>com.mycompany.app</groupId>
           <artifactId>my-app</artifactId>
           <version>1.2.3</version>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "identifier": "com.mycompany.app",
             "version": "1.2.3",
             "codeRepository": (
                 "https://repo.maven.apache.org/maven2/com/mycompany/app/my-app"
             ),
         }
 
         raw_content = b"""
         <project>
           <name>Maven Default Project</name>
           <modelVersion>4.0.0</modelVersion>
           <groupId>com.mycompany.app</groupId>
           <artifactId>my-app</artifactId>
           <version>1.2.3</version>
           <licenses>
           </licenses>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "name": "Maven Default Project",
             "identifier": "com.mycompany.app",
             "version": "1.2.3",
             "codeRepository": (
                 "https://repo.maven.apache.org/maven2/com/mycompany/app/my-app"
             ),
         }
 
         raw_content = b"""
         <project>
           <groupId></groupId>
           <version>1.2.3</version>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "version": "1.2.3",
         }
 
     def test_compute_metadata_maven_invalid_licenses(self):
         raw_content = b"""
         <project>
           <name>Maven Default Project</name>
           <modelVersion>4.0.0</modelVersion>
           <groupId>com.mycompany.app</groupId>
           <artifactId>my-app</artifactId>
           <version>1.2.3</version>
           <licenses>
             foo
           </licenses>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "name": "Maven Default Project",
             "identifier": "com.mycompany.app",
             "version": "1.2.3",
             "codeRepository": (
                 "https://repo.maven.apache.org/maven2/com/mycompany/app/my-app"
             ),
         }
 
     def test_compute_metadata_maven_multiple(self):
         """Tests when there are multiple code repos and licenses."""
         raw_content = b"""
         <project>
           <name>Maven Default Project</name>
           <modelVersion>4.0.0</modelVersion>
           <groupId>com.mycompany.app</groupId>
           <artifactId>my-app</artifactId>
           <version>1.2.3</version>
           <repositories>
             <repository>
               <id>central</id>
               <name>Maven Repository Switchboard</name>
               <layout>default</layout>
               <url>http://repo1.maven.org/maven2</url>
               <snapshots>
                 <enabled>false</enabled>
               </snapshots>
             </repository>
             <repository>
               <id>example</id>
               <name>Example Maven Repo</name>
               <layout>default</layout>
               <url>http://example.org/maven2</url>
             </repository>
           </repositories>
           <licenses>
             <license>
               <name>Apache License, Version 2.0</name>
               <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
               <distribution>repo</distribution>
               <comments>A business-friendly OSS license</comments>
             </license>
             <license>
               <name>MIT license</name>
               <url>https://opensource.org/licenses/MIT</url>
             </license>
           </licenses>
         </project>"""
         result = self.maven_mapping.translate(raw_content)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "name": "Maven Default Project",
             "identifier": "com.mycompany.app",
             "version": "1.2.3",
             "license": [
                 "https://www.apache.org/licenses/LICENSE-2.0.txt",
                 "https://opensource.org/licenses/MIT",
             ],
             "codeRepository": [
                 "http://repo1.maven.org/maven2/com/mycompany/app/my-app",
                 "http://example.org/maven2/com/mycompany/app/my-app",
             ],
         }
 
     def test_compute_metadata_pkginfo(self):
         raw_content = b"""\
 Metadata-Version: 2.1
 Name: swh.core
 Version: 0.0.49
 Summary: Software Heritage core utilities
 Home-page: https://forge.softwareheritage.org/diffusion/DCORE/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-core
 Description: swh-core
         ========
        \x20
         core library for swh's modules:
         - config parser
         - hash computations
         - serialization
         - logging mechanism
        \x20
 Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Description-Content-Type: text/markdown
 Provides-Extra: testing
 """  # noqa
         result = self.pkginfo_mapping.translate(raw_content)
         assert result["description"] == [
             "Software Heritage core utilities",  # note the comma here
             "swh-core\n"
             "========\n"
             "\n"
             "core library for swh's modules:\n"
             "- config parser\n"
             "- hash computations\n"
             "- serialization\n"
             "- logging mechanism\n"
             "",
         ], result
         del result["description"]
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "url": "https://forge.softwareheritage.org/diffusion/DCORE/",
             "name": "swh.core",
             "author": [
                 {
                     "type": "Person",
                     "name": "Software Heritage developers",
                     "email": "swh-devel@inria.fr",
                 }
             ],
             "version": "0.0.49",
         }
 
     def test_compute_metadata_pkginfo_utf8(self):
         raw_content = b"""\
 Metadata-Version: 1.1
 Name: snowpyt
 Description-Content-Type: UNKNOWN
 Description: foo
         Hydrology N\xc2\xb083
 """  # noqa
         result = self.pkginfo_mapping.translate(raw_content)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "name": "snowpyt",
             "description": "foo\nHydrology N°83",
         }
 
     def test_compute_metadata_pkginfo_keywords(self):
         raw_content = b"""\
 Metadata-Version: 2.1
 Name: foo
 Keywords: foo bar baz
 """  # noqa
         result = self.pkginfo_mapping.translate(raw_content)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "name": "foo",
             "keywords": ["foo", "bar", "baz"],
         }
 
     def test_compute_metadata_pkginfo_license(self):
         raw_content = b"""\
 Metadata-Version: 2.1
 Name: foo
 License: MIT
 """  # noqa
         result = self.pkginfo_mapping.translate(raw_content)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "name": "foo",
             "license": "MIT",
         }
 
     def test_gemspec_base(self):
         raw_content = b"""
 Gem::Specification.new do |s|
   s.name        = 'example'
   s.version     = '0.1.0'
   s.licenses    = ['MIT']
   s.summary     = "This is an example!"
   s.description = "Much longer explanation of the example!"
   s.authors     = ["Ruby Coder"]
   s.email       = 'rubycoder@example.com'
   s.files       = ["lib/example.rb"]
   s.homepage    = 'https://rubygems.org/gems/example'
   s.metadata    = { "source_code_uri" => "https://github.com/example/example" }
 end"""
         result = self.gemspec_mapping.translate(raw_content)
         assert set(result.pop("description")) == {
             "This is an example!",
             "Much longer explanation of the example!",
         }
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "author": [{"type": "Person", "name": "Ruby Coder"}],
             "name": "example",
             "license": "https://spdx.org/licenses/MIT",
             "codeRepository": "https://rubygems.org/gems/example",
             "email": "rubycoder@example.com",
             "version": "0.1.0",
         }
 
     def test_gemspec_two_author_fields(self):
         raw_content = b"""
 Gem::Specification.new do |s|
   s.authors     = ["Ruby Coder1"]
   s.author      = "Ruby Coder2"
 end"""
         result = self.gemspec_mapping.translate(raw_content)
         assert result.pop("author") in (
             [
                 {"type": "Person", "name": "Ruby Coder1"},
                 {"type": "Person", "name": "Ruby Coder2"},
             ],
             [
                 {"type": "Person", "name": "Ruby Coder2"},
                 {"type": "Person", "name": "Ruby Coder1"},
             ],
         )
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
         }
 
     def test_gemspec_invalid_author(self):
         raw_content = b"""
 Gem::Specification.new do |s|
   s.author      = ["Ruby Coder"]
 end"""
         result = self.gemspec_mapping.translate(raw_content)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
         }
         raw_content = b"""
 Gem::Specification.new do |s|
   s.author      = "Ruby Coder1",
 end"""
         result = self.gemspec_mapping.translate(raw_content)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
         }
         raw_content = b"""
 Gem::Specification.new do |s|
   s.authors     = ["Ruby Coder1", ["Ruby Coder2"]]
 end"""
         result = self.gemspec_mapping.translate(raw_content)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "author": [{"type": "Person", "name": "Ruby Coder1"}],
         }
 
     def test_gemspec_alternative_header(self):
         raw_content = b"""
 require './lib/version'
 
 Gem::Specification.new { |s|
   s.name = 'rb-system-with-aliases'
   s.summary = 'execute system commands with aliases'
 }
 """
         result = self.gemspec_mapping.translate(raw_content)
         assert result == {
             "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
             "type": "SoftwareSourceCode",
             "name": "rb-system-with-aliases",
             "description": "execute system commands with aliases",
         }
 
     @settings(suppress_health_check=[HealthCheck.too_slow])
     @given(json_document_strategy(keys=list(NpmMapping.mapping)))
     def test_npm_adversarial(self, doc):
         raw = json.dumps(doc).encode()
         self.npm_mapping.translate(raw)
 
     @settings(suppress_health_check=[HealthCheck.too_slow])
     @given(json_document_strategy(keys=CODEMETA_TERMS))
     def test_codemeta_adversarial(self, doc):
         raw = json.dumps(doc).encode()
         self.codemeta_mapping.translate(raw)
 
     @settings(suppress_health_check=[HealthCheck.too_slow])
     @given(
         xml_document_strategy(
             keys=list(MavenMapping.mapping),
             root="project",
             xmlns="http://maven.apache.org/POM/4.0.0",
         )
     )
     def test_maven_adversarial(self, doc):
         self.maven_mapping.translate(doc)
 
     @settings(suppress_health_check=[HealthCheck.too_slow])
     @given(
         strategies.dictionaries(
             # keys
             strategies.one_of(
                 strategies.text(), *map(strategies.just, GemspecMapping.mapping)
             ),
             # values
             strategies.recursive(
                 strategies.characters(),
                 lambda children: strategies.lists(children, min_size=1),
             ),
         )
     )
     def test_gemspec_adversarial(self, doc):
         parts = [b"Gem::Specification.new do |s|\n"]
         for (k, v) in doc.items():
             parts.append("  s.{} = {}\n".format(k, repr(v)).encode())
         parts.append(b"end\n")
         self.gemspec_mapping.translate(b"".join(parts))
 
     def test_directory_metadata_indexer(self):
         metadata_indexer = DirectoryMetadataIndexer(config=DIRECTORY_METADATA_CONFIG)
         fill_obj_storage(metadata_indexer.objstorage)
         fill_storage(metadata_indexer.storage)
 
         tool = metadata_indexer.idx_storage.indexer_configuration_get(
             {f"tool_{k}": v for (k, v) in TRANSLATOR_TOOL.items()}
         )
         assert tool is not None
         dir_ = DIRECTORY2
 
         metadata_indexer.idx_storage.content_metadata_add(
             [
                 ContentMetadataRow(
                     id=DIRECTORY2.entries[0].target,
                     indexer_configuration_id=tool["id"],
                     metadata=YARN_PARSER_METADATA,
                 )
             ]
         )
 
         metadata_indexer.run([dir_.id])
 
         results = list(
             metadata_indexer.idx_storage.directory_intrinsic_metadata_get(
                 [DIRECTORY2.id]
             )
         )
 
         expected_results = [
             DirectoryIntrinsicMetadataRow(
                 id=dir_.id,
                 tool=TRANSLATOR_TOOL,
                 metadata=YARN_PARSER_METADATA,
                 mappings=["npm"],
             )
         ]
 
         for result in results:
             del result.tool["id"]
 
         # then
         assert results == expected_results
 
     def test_directory_metadata_indexer_single_root_dir(self):
         metadata_indexer = DirectoryMetadataIndexer(config=DIRECTORY_METADATA_CONFIG)
         fill_obj_storage(metadata_indexer.objstorage)
         fill_storage(metadata_indexer.storage)
 
         # Add a parent directory, that is the only directory at the root
         # of the directory
         dir_ = DIRECTORY2
 
         new_dir = Directory(
             entries=(
                 DirectoryEntry(
                     name=b"foobar-1.0.0",
                     type="dir",
                     target=dir_.id,
                     perms=16384,
                 ),
             ),
         )
         assert new_dir.id is not None
         metadata_indexer.storage.directory_add([new_dir])
 
         tool = metadata_indexer.idx_storage.indexer_configuration_get(
             {f"tool_{k}": v for (k, v) in TRANSLATOR_TOOL.items()}
         )
         assert tool is not None
 
         metadata_indexer.idx_storage.content_metadata_add(
             [
                 ContentMetadataRow(
                     id=DIRECTORY2.entries[0].target,
                     indexer_configuration_id=tool["id"],
                     metadata=YARN_PARSER_METADATA,
                 )
             ]
         )
 
         metadata_indexer.run([new_dir.id])
 
         results = list(
             metadata_indexer.idx_storage.directory_intrinsic_metadata_get([new_dir.id])
         )
 
         expected_results = [
             DirectoryIntrinsicMetadataRow(
                 id=new_dir.id,
                 tool=TRANSLATOR_TOOL,
                 metadata=YARN_PARSER_METADATA,
                 mappings=["npm"],
             )
         ]
 
         for result in results:
             del result.tool["id"]
 
         # then
         assert results == expected_results