diff --git a/README b/README
index 16ed593..10c6f57 100644
--- a/README
+++ b/README
@@ -1,57 +1,81 @@
 swh-fetcher-googlecode
 ======================
 
 This fetcher does:
 - parse a gs:// url and transforms it according to the email's rule (see below)
 - deriving the file's url as metadata (mediaLink, length, crc32c, md5Hash, etc...)
 - writes on disk such metadata file
 - deriving the actual content from the mediaLink entry (exactly the url described higher)
 - writes on disk such content
 - checks the content file's metadata (crc32c, md5, length) match the one described in file metadata
 - flag as corrupted the file if it does not
 
 
 ```
 Date: Fri, 8 Apr 2016 13:25:41 -0700
 From: Chris Smith <chrsmith@google.com>
 To: Roberto Di Cosmo <roberto@dicosmo.org>
 Cc: Stefano Zacchiroli <zack@upsilon.cc>
 Subject: Re: Archiving the sources from Google Code into Software Heritage
 Message-ID: <CAH3JUH975Yt1ts=QNCqBX1hP1y2EFmMBzda0Ro-bcL3_vwX6Hg@mail.gmail.com>
 
 You can get the list of all files stored in Google Cloud Storage, which
 power the Google Code Archive here:
 
 https://storage.googleapis.com/google-code-archive/google-code-archive.txt.zip
 https://storage.googleapis.com/google-code-archive/google-code-archive-source.txt.zip
 https://storage.googleapis.com/google-code-archive/google-code-archive-downloads.txt.zip
 
 Just download and unzip the files. They contain all the Google Cloud
 Storage object names in each bucket. From there you will need to just
 download the actual files via a basic conversion. For exmaple, with Google
 Cloud Storage URL gs://google-code-archive/v2/code.google/hg4j/project.json,
 you can get the file's contents by URL-escaping the string and adding it to
 googleapis.com. e.g.
 https://www.googleapis.com/storage/v1/
 b/google-code-archive/o/v2%2Fcode.google.com%2Fhg4j%2Fproject.json?alt=media.
 The "?alt=media" part gets the object's contents, not the metadata.
 
 You probably only care about the google-code-archive-source bucket, since
 that is where we contain tarballs of git, hg, and the new svn dumps. But if
 you were interested in poking around the project metadata (e.g. issues) the
 schema is here <http://As for how to make sense of the data, you can see
 the Archive's schema here.>.
 
 If you run into any troubles let me know. I'll be able to look into any
 missing or corrupt repositories for the next couple months. After that time
 we will shut down the Google Code DVCS backends, and only the Google Code
 Archive snapshot will remain. (So you resurrecting these projects might
 ferret out any problems with my data.)
 
 Cheers,
 -Chris
 ```
 
 Note:
 
 It only what's described only for source archive.
+
+
+# Metadata sample:
+
+```
+{
+  "kind": "storage#object",
+  "id": "google-code-archive-source/v2/code.google.com/hg4j/source-archive.zip/1455746620701000",
+  "selfLink": "https://www.googleapis.com/storage/v1/b/google-code-archive-source/o/v2%2Fcode.google.com%2Fhg4j%2Fsource-archive.zip",
+  "name": "v2/code.google.com/hg4j/source-archive.zip",
+  "bucket": "google-code-archive-source",
+  "generation": "1455746620701000",
+  "metageneration": "1",
+  "contentType": "application/octet-stream",
+  "timeCreated": "2016-02-17T22:03:40.698Z",
+  "updated": "2016-02-17T22:03:40.698Z",
+  "storageClass": "NEARLINE",
+  "size": "4655405",
+  "md5Hash": "FaIRjuSDe4v51H1+sRuggQ==",
+  "mediaLink": "https://www.googleapis.com/download/storage/v1/b/google-code-archive-source/o/v2%2Fcode.google.com%2Fhg4j%2Fsource-archive.zip?generation=1455746620701000&alt=media",
+  "crc32c": "PNKIqA==",
+  "etag": "CMjy1uHm/8oCEAE="
+}
+```
diff --git a/swh/fetcher/googlecode/loader.py b/swh/fetcher/googlecode/loader.py
index 7314781..aef93c0 100644
--- a/swh/fetcher/googlecode/loader.py
+++ b/swh/fetcher/googlecode/loader.py
@@ -1,181 +1,160 @@
 # Copyright (C) 2015-2016  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import logging
 import os
 import requests
 
 from swh.core import config, hashutil
 
 from .utils import transform
 from .hashutil import crc32c_hash, md5_hash, md5_from_b64, crc32c_from_b64
 
 
-# sample meta:
-# {
-#  "kind": "storage#object",                                                                                                                                                            # noqa
-#  "id": "google-code-archive-source/v2/code.google.com/hg4j/source-archive.zip/1455746620701000",                                                                                      # noqa
-#  "selfLink": "https://www.googleapis.com/storage/v1/b/google-code-archive-source/o/v2%2Fcode.google.com%2Fhg4j%2Fsource-archive.zip",                                                 # noqa
-#  "name": "v2/code.google.com/hg4j/source-archive.zip",                                                                                                                                # noqa
-#  "bucket": "google-code-archive-source",                                                                                                                                              # noqa
-#  "generation": "1455746620701000",                                                                                                                                                    # noqa
-#  "metageneration": "1",                                                                                                                                                               # noqa
-#  "contentType": "application/octet-stream",                                                                                                                                           # noqa
-#  "timeCreated": "2016-02-17T22:03:40.698Z",                                                                                                                                           # noqa
-#  "updated": "2016-02-17T22:03:40.698Z",                                                                                                                                               # noqa
-#  "storageClass": "NEARLINE",                                                                                                                                                          # noqa
-#  "size": "4655405",                                                                                                                                                                   # noqa
-#  "md5Hash": "FaIRjuSDe4v51H1+sRuggQ==",                                                                                                                                               # noqa
-#  "mediaLink": "https://www.googleapis.com/download/storage/v1/b/google-code-archive-source/o/v2%2Fcode.google.com%2Fhg4j%2Fsource-archive.zip?generation=1455746620701000&alt=media", # noqa
-#  "crc32c": "PNKIqA==",                                                                                                                                                                # noqa
-#  "etag": "CMjy1uHm/8oCEAE="                                                                                                                                                           # noqa
-# }
-
-
 class SWHGoogleFetcher(config.SWHConfig):
     """A swh data fetcher loader.
 
     This fetcher will:
 
     - retrieve the archive metadata and write it to disk.
 
     - download the archive to retrieve and write it to disk.
 
     - check that size and checksums (md5, crc32c) match those describe
       in the metadata.
 
     """
     def __init__(self):
         self.log = logging.getLogger('swh.fetcher.google.SWHGoogleFetcher')
 
         l = logging.getLogger('requests.packages.urllib3.connectionpool')
         l.setLevel(logging.WARN)
 
     def retrieve_source_meta(self, url_meta, filepath_meta):
         if os.path.exists(filepath_meta):
             import json
             with open(filepath_meta, 'r') as f:
                 meta = json.loads(f.read())
         else:
             meta = {}
             try:
                 r = requests.get(url_meta)
             except Exception as e:
                 msg = 'Problem when fetching metadata %s.' % url_meta
                 self.log.error(msg)
                 raise ValueError(msg, e)
             else:
                 meta = r.json()
 
                 with open(filepath_meta, 'w') as f:
                     f.write(r.text)
 
         return meta
 
     def retrieve_source(self, url, filepath):
         if not os.path.exists(filepath):
             self.log.debug('Fetching %s\' raw data.' % url)
             try:
                 r = requests.get(url, stream=True)
             except Exception as e:
                 msg = 'Problem when fetching file %s.' % url
                 self.log.error(msg)
                 raise ValueError(msg, e)
             else:
                 if not r.ok:
                     msg = 'Problem when fetching file %s.' % url
                     self.log.error(msg)
                     raise ValueError(msg)
                 else:
                     with open(filepath, 'wb') as f:
                         for chunk in r.iter_content(hashutil.HASH_BLOCK_SIZE):
                             f.write(chunk)
 
     def check_source(self, meta, filepath):
         expected = {
             'crc32c': crc32c_from_b64(meta['crc32c']),
             'md5': md5_from_b64(meta['md5Hash']),
             'size': int(meta['size'])
         }
 
         error = False
         actual_size = os.path.getsize(filepath)
         if actual_size != expected['size']:
             msg = 'Bad size. Expected: %s. Got: %s' % (
                 expected['size'], actual_size)
             self.log.error(msg)
             error = True
 
         self.log.debug('Checking %s\' raw data checksums and size.' %
                        filepath)
         # Last, check the metadata are ok
         with open(filepath, 'rb') as f:
             md5_h = md5_hash(f)
             if md5_h != expected['md5']:
                 msg = 'Bad md5 signature. Expected: %s. Got: %s' % (
                     expected['md5'], md5_h)
                 self.log.error(msg)
                 error = True
 
             f.seek(0)
 
             crc32c_h = crc32c_hash(f)
             if expected['crc32c'] != crc32c_h:
                 msg = 'Bad crc32c signature. Expected: %s. Got: %s' % (
                     expected['crc32c'], crc32c_h)
                 self.log.error(msg)
                 error = True
 
         return error
 
     def process(self, archive_gs, destination_rootpath):
         self.log.info('Fetch %s\'s metadata' % archive_gs)
 
         # First retrieve the archive gs's metadata
         parent_dir, filename, url_meta, url_content = transform(
             archive_gs)
 
         parent_dir = os.path.join(destination_rootpath, parent_dir)
 
         os.makedirs(parent_dir, exist_ok=True)
 
         project_name = os.path.basename(parent_dir)
 
         filename = project_name + '-' + filename
         filename_meta = filename + '.json'
 
         filepath = os.path.join(parent_dir, filename)
         filepath_meta = os.path.join(parent_dir, filename_meta)
 
         meta = self.retrieve_source_meta(url_meta, filepath_meta)
         if not meta:
             raise ValueError('Fail to download metadata, stop.')
 
         # check existence of the file
         if os.path.exists(filepath):
             # it already exists, check it's ok
                 errors = self.check_source(meta, filepath)
                 if errors:
                     if os.path.exists(filepath):
                         self.log.error('Clean corrupted file %s' % filepath)
                         os.remove(filepath)
                 else:  # it's ok, we are done!
                     self.log.info('Archive %s already fetched!' % archive_gs)
                     return
 
         # the file does not exist, we retrieve it
         self.retrieve_source(meta['mediaLink'], filepath)
 
         # Third - Check the retrieved source
         errors = self.check_source(meta, filepath)
         if errors:
             if os.path.exists(filepath):
                 filepath_corrupted = filepath + '.corrupted'
                 self.log.error('Rename corrupted file %s to %s' % (
                     os.path.basename(filepath),
                     os.path.basename(filepath_corrupted)))
                 os.rename(filepath, filepath_corrupted)
         else:
             self.log.info('Archive %s fetched.' % archive_gs)