diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py
--- a/swh/indexer/indexer.py
+++ b/swh/indexer/indexer.py
@@ -17,6 +17,53 @@
 from swh.scheduler.utils import get_task
 
 
+class DiskIndexer:
+    """Mixin intended to be used with other *Indexer classes.
+
+       Indexer* inheriting from this class are a category of indexers
+       which needs the disk for their computations.
+
+       Expects:
+           self.working_directory variable defined at runtime.
+
+    """
+    def __init__(self):
+        super().__init__()
+
+    def write_to_temp(self, filename, data):
+        """Write the sha1's content in a temporary file.
+
+        Args:
+            sha1 (str): the sha1 name
+            filename (str): one of sha1's many filenames
+            data (bytes): the sha1's content to write in temporary
+            file
+
+        Returns:
+            The path to the temporary file created. That file is
+            filled in with the raw content's data.
+
+        """
+        os.makedirs(self.working_directory, exist_ok=True)
+        temp_dir = tempfile.mkdtemp(dir=self.working_directory)
+        content_path = os.path.join(temp_dir, filename)
+
+        with open(content_path, 'wb') as f:
+            f.write(data)
+
+        return content_path
+
+    def cleanup(self, content_path):
+        """Remove content_path from working directory.
+
+        Args:
+            content_path (str): the file to remove
+
+        """
+        temp_dir = os.path.dirname(content_path)
+        shutil.rmtree(temp_dir)
+
+
 class BaseIndexer(SWHConfig,
                   metaclass=abc.ABCMeta):
     """Base class for indexers to inherit from.
@@ -257,50 +304,3 @@
             if self.rescheduling_task:
                 self.log.warn('Rescheduling batch')
                 self.rescheduling_task.delay(sha1s, policy_update)
-
-
-class DiskIndexer:
-    """Mixin intended to be used with other *Indexer classes.
-
-       Indexer* inheriting from this class are a category of indexers
-       which needs the disk for their computations.
-
-       Expects:
-           self.working_directory variable defined at runtime.
-
-    """
-    def __init__(self):
-        super().__init__()
-
-    def write_to_temp(self, filename, data):
-        """Write the sha1's content in a temporary file.
-
-        Args:
-            sha1 (str): the sha1 name
-            filename (str): one of sha1's many filenames
-            data (bytes): the sha1's content to write in temporary
-            file
-
-        Returns:
-            The path to the temporary file created. That file is
-            filled in with the raw content's data.
-
-        """
-        os.makedirs(self.working_directory, exist_ok=True)
-        temp_dir = tempfile.mkdtemp(dir=self.working_directory)
-        content_path = os.path.join(temp_dir, filename)
-
-        with open(content_path, 'wb') as f:
-            f.write(data)
-
-        return content_path
-
-    def cleanup(self, content_path):
-        """Remove content_path from working directory.
-
-        Args:
-            content_path (str): the file to remove
-
-        """
-        temp_dir = os.path.dirname(content_path)
-        shutil.rmtree(temp_dir)
diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
new file mode 100644
--- /dev/null
+++ b/swh/indexer/metadata.py
@@ -0,0 +1,113 @@
+# Copyright (C) 2016-2017  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+from .indexer import BaseIndexer
+from swh.indexer.metadata_dictionary import MetadataDict
+
+
+def compute_metadata(raw_content, context):
+    """
+    uses xyz tool and xyz mapping to translate syntax
+    and translate semantic of content
+        Args:
+            context (text): with the tool name define from which
+                            context/vocabulary the files needs to be translated
+            mapping (dict): extracted from MetadataDict
+                                    (context_term : codemeta_term)
+        Returns:
+            result (dict): translated_metadata (name, version, etc..)
+    """
+    if raw_content is None:
+        return None
+
+    try:
+        content_text = raw_content.decode()
+        tool_for_complete_translation = MetadataDict()
+        return tool_for_complete_translation.parse(context, content_text)
+    except Exception as e:
+        print(e)
+    return None
+
+
+class ContentMetadataIndexer(BaseIndexer):
+    """Indexer in charge of:
+    - filtering out content already indexed
+    - reading content from objstorage with the content's id sha1
+    - computing translated_metadata by given context
+    - using the MetadataDict and a tool for each context
+    - store result instorage
+    """
+    CONFIG_BASE_FILENAME = 'indexer/metadata'
+
+    ADDITIONAL_CONFIG = {
+        'tools': ('dict', {
+            'name': 'hard_mapping_npm',
+            'version': '0.0.1',
+            'configuration': {
+                'type': 'test',
+                'debian-package': '',
+                'max_content_size': 10240,
+            },
+        }),
+    }
+
+    def prepare(self):
+        super().prepare()
+        c = self.config
+        self.max_content_size = c['tools']['configuration']['max_content_size']
+
+    def filter_contents(self, sha1s):
+        """Filter out known sha1s and return only missing ones.
+
+        """
+        yield from self.storage.content_metadata_missing((
+            {
+                'id': sha1,
+                'indexer_configuration_id': self.tools['id'],
+            } for sha1 in sha1s
+        ))
+
+    def index_content(self, sha1, raw_content):
+        """Index sha1s' content and store result.
+
+        Args:
+            sha1 (bytes): content's identifier
+            raw_content (bytes): raw content in bytes
+
+        Returns:
+            A dict, representing a content_metadata, with keys:
+              TODO
+
+        """
+        result = {
+            'id': sha1,
+            'indexer_configuration_id': self.tools['id'],
+            'translated_metadata': None
+        }
+        # TODO a tool for each context
+        self.tool = self.ADDITIONAL_CONFIG['tools'][1]['name']
+
+        try:
+            result['translated_metadata'] = compute_metadata(
+                                                        raw_content, self.tool)
+
+        except Exception as e:
+            print(e)
+
+        return result
+
+    def persist_index_computations(self, results, policy_update):
+        """Persist the results in storage.
+
+        Args:
+            results ([dict]): list of content_metadata, dict with the
+            following keys:
+              - id (bytes): content's identifier (sha1)
+              - translated_metadata (jsonb): detected metadata
+            policy_update ([str]): either 'update-dups' or 'ignore-dups' to
+            respectively update duplicates or ignore them
+
+        """
+        self.storage.content_metadata_add(
+            results, conflict_update=(policy_update == 'update-dups'))
diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
new file mode 100644
--- /dev/null
+++ b/swh/indexer/metadata_dictionary.py
@@ -0,0 +1,115 @@
+# Copyright (C) 2015-2017  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import json
+
+npm_mapping = {
+        'repository': 'codeRepository',
+        'os': 'operatingSystem',
+        'cpu': 'processorRequirements',
+        'engines': 'processorRequirements',
+        'dependencies': 'softwareRequirements',
+        'bundledDependencies': 'softwareRequirements',
+        'peerDependencies': 'softwareRequirements',
+        'author': 'author',
+        'contributor': 'contributor',
+        'keywords': 'keywords',
+        'license': 'license',
+        'version': 'version',
+        'description': 'description',
+        'name': 'name',
+        'devDependencies': 'softwareSuggestions',
+        'optionalDependencies': 'softwareSuggestions',
+        'bugs': 'issueTracker'
+}
+
+
+class MetadataDict():
+
+    def __init__(self):
+        pass
+
+    def parse(self, context, content):
+        """
+        first landing method: a dispatcher that sends content
+        to the right function to carry out the real parsing of syntax
+        and translation of terms
+        Args:
+            - context (text) : defines to which function/tool
+                the content is sent
+            - content (text): the string form of the raw_content
+
+        Returns:
+            - translated_metadata (dict): jsonb form needed for the indexer
+                to store in storage
+
+        """
+        # checks if decoding is needed?
+
+        # sends content to parser and/or translator
+        if context == "hard_mapping_npm":
+            return self.translate_npm(content)
+
+        elif context == "pom_xml":
+            # TODO
+            return self.translate_pom(self.parse_xml(content))
+
+        else:
+            return None
+
+    def pase_xml(self, content):
+        """
+        Parses content from xml to a python dict
+        Args:
+            - content (text): the string form of the raw_content ( in xml)
+
+        Returns:
+            - parsed_xml (dict): a python dict of the content after parsing
+        """
+        pass
+
+    def translate_npm(self, content):
+        """
+        Tranlsates content  by parsing content to a json object
+        and translating with the npm mapping (for now hard_coded mapping)
+        Args:
+            - context_text (text) : should be json
+
+        Returns:
+            - translated_metadata (dict): jsonb form needed for the indexer
+        """
+        translated_metadata = {}
+        # TODO: keep mapping not in code (maybe fetch crosswalk from storage?)
+        # if fetched from storage should be done once for batch of sha1s
+        mapping = npm_mapping
+        content_dict = json.loads(content)
+        default = 'other'
+        translated_metadata['other'] = {}
+        for k, v in content_dict.items():
+            try:
+                term = mapping.get(k, default)
+                # print(k,v)
+                if term != default:
+                    translated_metadata[term] = v
+                else:
+                    # if we want to keep the entries that do not correspond
+                    # with identified terms => all under other
+                    translated_metadata[term][k] = v
+            except KeyError:
+                continue
+        return translated_metadata
+
+    def translate_pom(self, content):
+        pass
+
+
+def main():
+    mtd = MetadataDict()
+    result = mtd.translate_npm('{"name": "test_name", "unknown_term": "ut"}')
+    print(result)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
new file mode 100644
--- /dev/null
+++ b/swh/indexer/tests/test_metadata.py
@@ -0,0 +1,205 @@
+# Copyright (C) 2015-2017  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import unittest
+import logging
+from nose.tools import istest
+
+from swh.indexer import metadata
+from swh.indexer.metadata import ContentMetadataIndexer
+from swh.indexer.tests.test_utils import MockObjStorage
+
+
+def ordered(obj):
+    if isinstance(obj, dict):
+        return sorted((k, ordered(v)) for k, v in obj.items())
+    if isinstance(obj, list):
+        return sorted(ordered(x) for x in obj)
+    else:
+        return obj
+
+
+class MockStorage():
+    """Mock storage to simplify reading indexers' outputs.
+    """
+    def content_metadata_add(self, metadata, conflict_update=None):
+        self.state = metadata
+        self.conflict_update = conflict_update
+
+    def indexer_configuration_get(self, tool):
+        return {
+            'id': 30,
+        }
+
+
+class TestMetadataIndexer(ContentMetadataIndexer):
+    """Specific Metadata whose configuration is enough to satisfy the
+       indexing tests.
+    """
+    def prepare(self):
+        self.config = {
+            'rescheduling_task': None,
+            'tools':  {
+                'name': 'npm_mock_tool',
+                'version': '0.1',
+                'configuration': {
+                    'type': 'local',
+                    'debian-package': '',
+                    'max_content_size': 10240,
+                }
+            }
+        }
+        self.storage = MockStorage()
+        self.log = logging.getLogger('swh.indexer')
+        self.objstorage = MockObjStorage()
+        self.task_destination = None
+        self.rescheduling_task = self.config['rescheduling_task']
+        self.tool_config = self.config['tools']['configuration']
+        self.max_content_size = self.tool_config['max_content_size']
+        self.tools = self.retrieve_tools_information()
+
+
+class Metadata(unittest.TestCase):
+    """
+    Tests metadata_mock_tool tool for Metadata detection
+    """
+    def setUp(self):
+        self.maxDiff = None
+
+    @istest
+    def test_compute_metadata_none(self):
+        """
+        testing content empty content is empty
+        should return None
+        """
+        # given
+        content = None
+        tool = "hard_mapping_npm"
+
+        # should it be empty {} or None if no metadata was found ?
+        declared_metadata = None
+        # when
+        result = metadata.compute_metadata(content, tool)
+        # then
+        self.assertEqual(declared_metadata, result)
+
+    @istest
+    def test_compute_metadata_npm(self):
+        """
+        testing only computation of metadata with hard_mapping_npm
+        """
+        # given
+        content = b"""
+            {
+                "name": "test_metadata",
+                "version": "0.0.1",
+                "description": "Simple package.json test for indexer",
+
+                  "repository": {
+                    "type": "git",
+                    "url": "https://github.com/moranegg/metadata_test"
+                }
+            }
+        """
+        declared_metadata = {
+            'name': 'test_metadata',
+            'version': '0.0.1',
+            'description': 'Simple package.json test for indexer',
+            'codeRepository': {
+                'type': 'git',
+                'url': 'https://github.com/moranegg/metadata_test'
+              },
+            'other': {}
+        }
+
+        # when
+        result = metadata.compute_metadata(
+                                content, "hard_mapping_npm")
+        # then
+        self.assertEqual(declared_metadata, result)
+
+    @istest
+    def test_index_content_metadata_npm(self):
+        """
+        testing NPM with package.json
+        """
+        # given
+        sha1s = ['26a9f72a7c87cc9205725cfd879f514ff4f3d8d5',
+                 'd4c647f0fc257591cc9ba1722484229780d1c607']
+        # this metadata indexer computes only metadata for package.json
+        # in npm context with a hard mapping
+        metadata_indexer = TestMetadataIndexer()
+
+        # when
+        metadata_indexer.run(sha1s, policy_update='ignore-dups')
+        results = metadata_indexer.storage.state
+
+        expected_results = [
+          {
+            'id': '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5',
+            'translated_metadata': {
+              'name': 'test_metadata',
+              'version': '0.0.1',
+              'codeRepository': {
+                'url': 'https://github.com/moranegg/metadata_test',
+                'type': 'git'
+              },
+              'description': 'Simple package.json test for indexer',
+              'other': {
+
+              }
+            },
+            'indexer_configuration_id': 30
+          },
+          {
+            'id': 'd4c647f0fc257591cc9ba1722484229780d1c607',
+            'translated_metadata': {
+              'name': 'npm',
+              'version': '5.0.3',
+              'keywords': [
+                'install',
+                'modules',
+                'package manager',
+                'package.json'
+              ],
+              'softwareSuggestions': {
+                'tacks': '~1.2.6',
+                'tap': '~10.3.2'
+              },
+              'description': 'a package manager for JavaScript',
+              'author': 'Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)',
+              'issueTracker': {
+                'url': 'https://github.com/npm/npm/issues'
+              },
+              'license': 'Artistic-2.0',
+              'softwareRequirements': {
+                'abbrev': '~1.1.0',
+                'ansistyles': '~0.1.3',
+                'ansicolors': '~0.3.2',
+                'JSONStream': '~1.3.1',
+                'ansi-regex': '~2.1.1'
+              },
+              'codeRepository': {
+                'url': 'https://github.com/npm/npm',
+                'type': 'git'
+              },
+              'other': {
+                'bundleDependencies': [
+                  'abbrev',
+                  'ansi-regex'
+                ],
+                'preferGlobal': True,
+                'homepage': 'https://docs.npmjs.com/',
+                'config': {
+                  'publishtest': False
+                }
+              }
+            },
+            'indexer_configuration_id': 30
+          }
+        ]
+        # then
+        # print(ordered(results))
+        self.assertEqual(ordered(expected_results), ordered(results))
diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py
--- a/swh/indexer/tests/test_utils.py
+++ b/swh/indexer/tests/test_utils.py
@@ -51,6 +51,60 @@
             '93666f74f1cf635c8c8ac118879da6ec5623c410': b"""
             (should 'pygments (recognize 'lisp 'easily))
 
+            """,
+            '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5': b"""
+            {
+                "name": "test_metadata",
+                "version": "0.0.1",
+                "description": "Simple package.json test for indexer",
+                "repository": {
+                  "type": "git",
+                  "url": "https://github.com/moranegg/metadata_test"
+              }
+            }
+            """,
+            'd4c647f0fc257591cc9ba1722484229780d1c607': b"""
+            {
+              "version": "5.0.3",
+              "name": "npm",
+              "description": "a package manager for JavaScript",
+              "keywords": [
+                "install",
+                "modules",
+                "package manager",
+                "package.json"
+              ],
+              "preferGlobal": true,
+              "config": {
+                "publishtest": false
+              },
+              "homepage": "https://docs.npmjs.com/",
+              "author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
+              "repository": {
+                "type": "git",
+                "url": "https://github.com/npm/npm"
+              },
+              "bugs": {
+                "url": "https://github.com/npm/npm/issues"
+              },
+              "dependencies": {
+                "JSONStream": "~1.3.1",
+                "abbrev": "~1.1.0",
+                "ansi-regex": "~2.1.1",
+                "ansicolors": "~0.3.2",
+                "ansistyles": "~0.1.3"
+              },
+              "bundleDependencies": [
+                "abbrev",
+                "ansi-regex"
+              ],
+              "devDependencies": {
+                "tacks": "~1.2.6",
+                "tap": "~10.3.2"
+              },
+              "license": "Artistic-2.0"
+            }
+
             """
 
         }