#!/usr/local/bin/python

import os
import md5

dict_of_all_sizes = {}
for root, dirs, files in os.walk('/home'):
  if 'CVS' in dirs:
    dirs.remove('CVS')
  dir_list = [ os.path.join(root, file) for file in files]
  for file in dir_list:
    try:
      size = os.path.getsize(file)
    except OSError:
      pass
    else:
      if size in dict_of_all_sizes:
        dict_of_all_sizes[size].append(file)
      else:
        dict_of_all_sizes[size] = [file]

dict_of_sizes = {}
for size in dict_of_all_sizes:
  if len(dict_of_all_sizes[size]) > 1:
    dict_of_sizes[size] = dict_of_all_sizes[size]

for size in dict_of_sizes:
  hash_dict = {}
  for file in dict_of_sizes[size]:
    if size > 1024:
      data = open(file).read(1024)
    else:
      data = open(file).read()
    m = md5.new(data).hexdigest()
    if m in hash_dict:
      hash_dict[m].append(file)
    else:
      hash_dict[m] = [file]
  for entry in hash_dict:
    if len(hash_dict[entry]) > 1:
      print hash_dict[entry]
