diff --git a/.gitignore b/.gitignore
index 397b4a7..06e4f88 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 *.log
+*.zip
diff --git a/compare.py b/compare.py
index 0fa3b1a..48cec99 100755
--- a/compare.py
+++ b/compare.py
@@ -1,98 +1,132 @@
 #!/usr/bin/env python
 
 import io
 import logging
 import os
 import psycopg2
 
 from swh.model.hashutil import hash_to_hex
 from swh.provenance import get_provenance
 
 
 conninfo1 = {
     "cls": "ps",
     "db":
     {
         "host": "/var/run/postgresql",
         "port": "5436",
         "dbname": "old"
     }
 }
 conninfo2 = {
     "cls": "ps",
     "db":
     {
         "host": "/var/run/postgresql",
         "port": "5436",
-        "dbname": "revisited"
+        "dbname": "test"
     }
 }
 
 
-# Print iterations progress
-def printProgressBar(iteration, total, prefix = 'Progress:', suffix = 'Complete', decimals = 1, length = 50, fill = '█', printEnd = "\r"):
+# Print iterations progress.
+def printProgressBar(
+    iteration,
+    total,
+    prefix = 'Progress:',
+    suffix = 'Complete',
+    decimals = 1,
+    length = 50,
+    fill = '█',
+    printEnd = "\r"
+):
     """
     Call in a loop to create terminal progress bar
     @params:
         iteration   - Required  : current iteration (Int)
         total       - Required  : total iterations (Int)
         prefix      - Optional  : prefix string (Str)
         suffix      - Optional  : suffix string (Str)
         decimals    - Optional  : positive number of decimals in percent complete (Int)
         length      - Optional  : character length of bar (Int)
         fill        - Optional  : bar fill character (Str)
         printEnd    - Optional  : end character (e.g. "\r", "\r\n") (Str)
     """
     percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
     filledLength = int(length * iteration // total)
     bar = fill * filledLength + '-' * (length - filledLength)
     print(f'\r{prefix} |{bar}| {percent}% {suffix}', end = printEnd)
     # Print New Line on Complete
     if iteration == total: 
         print()
 
 
+# Output log file name.
+def outfilename(suffix):
+    basename, _ = os.path.splitext(os.path.basename(os.path.abspath(__file__)))
+    return os.path.join(os.getcwd(), basename + '-' + suffix + '.log')
+
+
+# Write log file.
+def writeLogFile(filename, occurrences):
+    with io.open(filename, 'a') as outfile:
+        for row in occurrences:
+            try:
+                # Try to decode path.
+                path = os.fsdecode(row[3]).decode('utf-8', 'replace')
+            except:
+                # Use its raw value if not possible
+                path = row[3]
+
+            outfile.write(
+                "{blob}, {rev}, {date}, {path}\n".format(
+                    blob=hash_to_hex(row[0]),
+                    rev=hash_to_hex(row[1]),
+                    date=row[2],
+                    path=path,
+                )
+            )
+
+
 if __name__ == "__main__":
+    # Clear output from previous executions.
+    outfile1 = outfilename(conninfo1['db']['dbname'])
+    outfile2 = outfilename(conninfo2['db']['dbname'])
+
+    if os.path.exists(outfile1): os.remove(outfile1)
+    if os.path.exists(outfile2): os.remove(outfile2)
+
+    # Get provenance object for both databases and query its lists of content.
     provenance1 = get_provenance(**conninfo1)
     provenance2 = get_provenance(**conninfo2)
 
     provenance1.cursor.execute('''SELECT id FROM content ORDER BY id''')
     content1 = set(map(lambda row: row[0], provenance1.cursor.fetchall()))
 
     provenance2.cursor.execute('''SELECT id FROM content ORDER BY id''')
     content2 = set(map(lambda row: row[0], provenance2.cursor.fetchall()))
 
     if content1 == content2:
+        # If lists of content match, we check that occurrences does as well.
         total = len(content1)
         printProgressBar(0, total)
 
+        # Iterate over all content querying all its occurrences on both databases.
         for i, blob in enumerate(content1):
-            occurrences1 = set(provenance1.content_find_all(blob))
-            occurrences2 = set(provenance2.content_find_all(blob))
-
-            if occurrences1 != occurrences2:
-                with io.open(conninfo1['db']['dbname'] + '.log', 'a') as outfile:
-                    for row in occurrences1:
-                        outfile.write(
-                            "{blob}, {rev}, {date}, {path}\n".format(
-                                blob=hash_to_hex(row[0]),
-                                rev=hash_to_hex(row[1]),
-                                date=row[2],
-                                path=os.fsdecode(row[3]),
-                            )
-                        )
-                with io.open(conninfo2['db']['dbname'] + '.log', 'a') as outfile:
-                    for row in occurrences2:
-                        outfile.write(
-                            "{blob}, {rev}, {date}, {path}\n".format(
-                                blob=hash_to_hex(row[0]),
-                                rev=hash_to_hex(row[1]),
-                                date=row[2],
-                                path=os.fsdecode(row[3]),
-                            )
-                        )
+            occurrences1 = list(provenance1.content_find_all(blob))
+            occurrences2 = list(provenance2.content_find_all(blob))
+
+            # If there is a mismatch log it to file.
+            if (
+                len(occurrences1) != len(occurrences2) or
+                set(occurrences1) != set(occurrences2)
+            ):
+                writeLogFile(outfile1, occurrences1)
+                writeLogFile(outfile2, occurrences2)
 
             printProgressBar(i + 1, total)
 
     else:
+        # If lists of content don't match, we are done.
+        # TODO: maybe log difference?
         logging.warning("Content lists are different")