#!/usr/bin/env python import sys import math import os import re import urllib import optparse import fpformat # -------------------------------------------------------------------------- class Options : def __init__ (self) : self.quick = False; self.run = ""; def read_options () : parser = optparse.OptionParser () parser.add_option("-q", "--quick", dest="quick", action="store_true") parser.add_option("-r", "--run", dest="run", help="method name", default="") global opt global arg opt = Options () (opt, arg) = parser.parse_args (values=opt) # -------------------------------------------------------------------------- def decode_str (s) : return urllib.unquote (s) def decode_num (s) : if len (s) == 0 : return 0 else: return int (s) def decode_time (s) : return s # -------------------------------------------------------------------------- Normal = 0 Old = 1 New = 2 Modified = 3 Equal = 4 Duplicated = 5 class FileInfo : def __init__ (self) : self.kind = Normal self.name = "" self.size = 0 self.mtime = "" self.md5 = "" self.mode = Normal self.e_model = "" self.e_number = "" self.e_width = 0 self.e_height = 0 self.e_time = "" # -------------------------------------------------------------------------- def readLine (line) : "Read one line from scan-file, return FileInfo" result = FileInfo () items = line.split () inx = 1 for item in items : if item.startswith ("md5=") : result.md5 = item[4:] elif item.startswith ("name=") : result.name = decode_str (item[5:]) elif item.startswith ("size=") : result.size = decode_num (item[5:]) elif item.startswith ("mode=") : result.mode = item[5:] elif item.startswith ("uid=") : result.uid = decode_num (item[4:]) elif item.startswith ("gid=") : result.gid = decode_num (item[4:]) elif item.startswith ("atime=") : result.atime = decode_time (item[6:]) elif item.startswith ("ctime=") : result.ctime = decode_time (item[6:]) elif item.startswith ("mtime=") : result.mtime = decode_time (item[6:]) elif item.startswith ("e_model=") : result.e_model = decode_str (item[8:]) elif item.startswith ("e_number=") : result.e_number = decode_str (item[9:]) elif item.startswith ("e_width=") : result.e_width = decode_num (item[8:]) elif item.startswith ("e_height=") : result.e_height = decode_num (item[9:]) elif item.startswith ("e_time=") : result.e_time = decode_time (decode_str (item[7:])) elif inx == 1 : result.md5 = item inx = inx + 1 elif inx == 2 : result.name = decode_str (item) inx = inx + 1 elif inx == 3 : result.size = decode_num (item) inx = inx + 1 elif inx == 4 : result.mtime = decode_time (item) inx = inx + 1 return result def readFile (collection, fileName) : file = open (fileName, "r") for line in file : data = readLine (line) if data.name != "" and data.md5 != "" : collection.append (data) file.close () # -------------------------------------------------------------------------- def lookup (a, b) : "Lookup files from a in b" cache = { } for f in a : if f.md5 in cache : f.mode = Duplicated else : cache [f.md5] = f f.mode = Old second_cache = { } for g in b : if g.md5 in cache : f = cache [g.md5] if f.mode == Old : f.mode = Equal if g.md5 in second_cache : g.mode = Duplicated else : g.mode = Equal second_cache [g.md5] = g else : if g.md5 in second_cache : g.mode = Duplicated else : g.mode = New second_cache [g.md5] = g # -------------------------------------------------------------------------- def ignore_name (name) : # return False return ( name.startswith ("System Volume Information\\") or name.startswith ("Recycled\\") ) def with_suffix (name, suffixes) : result = False for e in suffixes : if name.endswith (e) : result = True return result def with_mask (name, mask) : return re.search (mask, name) != None def compare_by_name (x, y) : return cmp (x.name, y.name) def show_size (size) : return fpformat.fix (float (size) / 1000000, 1) + "MB" def fmt (num) : txt = str (num) n = len (txt) - 1 k = 0 while n > 0 : k = k + 1 if k == 3 : txt = txt [:n] + ' ' + txt [n:] k = 0 n = n - 1 return txt def printSummary (txt, cnt, sum): print txt.rjust (32), fmt (cnt).rjust (7), "files", fmt (sum).rjust (15), "bytes" def summary (a, txt): cnt = 0 sum = 0 for f in a : cnt += 1 sum += f.size printSummary (txt, cnt, sum) def summary2 (a, txt, m): cnt = 0 sum = 0 for f in a : if f.mode == m : cnt += 1 sum += f.size printSummary (txt, cnt, sum) def listing (aa, bb): if 0 : for f in aa : if f.mode == Duplicated : print "Duplicated file in old directory ", f.name if 0 : for f in bb : if f.mode == Duplicated : print "Duplicated file in new directory ", f.name for f in aa : if f.mode == Old : print "File only in old directory ", f.name, show_size (f.size) for f in bb : if f.mode == New : print "File only in new directory ", f.name, show_size (f.size) if 0 : for f in aa : if f.mode == Equal : print "Equal old file ", f.name if 0 : for f in bb : if f.mode == Equal : print "Equal new file ", f.name def printLookup (a, b, exclude = [ ], include = [ ], mask = "") : use_include = False for f in include: use_include = True use_mask = (mask != "") aa = [ ] for f in a : if not ignore_name (f.name) : if not with_suffix (f.name, exclude) : if not use_include or with_suffix (f.name, include) : if not use_mask or with_mask (f.name, mask) : aa.append (f) bb = [ ] for f in b : if not ignore_name (f.name) : if not with_suffix (f.name, exclude) : if not use_include or with_suffix (f.name, include) : if not use_mask or with_mask (f.name, mask) : bb.append (f) aa.sort (compare_by_name) bb.sort (compare_by_name) if not opt.quick : listing (aa, bb) print summary (a, "Old files:") summary (aa, "Selected old files:") summary (b, "New files:") summary (bb, "Selected new files:") print summary2 (aa, "Duplications in old directory:", Duplicated) summary2 (bb, "Duplications in new directory:", Duplicated) summary2 (aa, "Only in old directory:", Old) summary2 (bb, "Only in new directory:", New) summary2 (aa, "Equal:", Equal) summary2 (bb, "Equal:", Equal) # -------------------------------------------------------------------------- class Command: def __init__ (self) : self.first_set = [ ] self.second_set = [ ] self.current_set = self.first_set self.include_set = [ ] self.exclude_set = [ ] self.mask_set = "" global cmd cmd = Command () def only (ext): for e in ext: cmd.include_set.append (e) cmd.include_set.append (e.upper ()) def exclude (ext): for e in ext: cmd.exclude_set.append (e) cmd.exclude_set.append (e.upper ()) def mask (m): cmd.mask_set = m def read (name): readFile (cmd.current_set, name) def switch (): cmd.current_set = cmd.second_set def compare (): lookup (cmd.first_set, cmd.second_set) printLookup (cmd.first_set, cmd.second_set, cmd.exclude_set, cmd.include_set, cmd.mask_set) # -------------------------------------------------------------------------- def read_all () : read ("data/wd-foto-canon-powershot-a70.txt") read ("data/wd-foto-eos-foto-2008.txt") read ("data/wd-foto-fotoarchiv.txt") read ("data/wd-foto-raw.txt") read ("data/wd-foto-selection-archive.txt") read ("data/wd-video.txt") read ("data2/wd-foto-2009-02-01.txt") read ("data3/amd-diskf-2009-04-25.txt") read ("data3/amd-diskg-2009-04-25.txt") read ("data3/pentium4-diskf-2009-04-25.txt") read ("data3/pentium4-diskg-2009-04-25.txt") read ("data3/wd-foto-2009-04-25.txt") read ("data3/wd-video-2009-04-25.txt") read ("data3/wd-prev-2009-04-25.txt") read ("data4/pentium4-diskf-2009-06-27.txt") read ("data4/pentium4-diskg-2009-06-27.txt") read ("data4/wd-foto-2009-06-27.txt") read ("data4/wd-video-2009-06-27.txt") read ("data5/blue-fotky-2009-08-23.txt") read ("data5/duo-store-foto-2009-08-22.txt") # !? read ("data5/duo-diskn-video-2009-08-22.txt") # !? read ("data5/duo-store-transfer-disk_n-fotoarchiv-2009-08-22.txt") # !? read ("data6/duo-diskn-foto-2009-10-04.txt") # !? read ("data6/duo-diskn-foto-orig-2009-10-04.txt") # !? read ("data7/duo-diskn-foto-2009-10-18.txt") # !? read ("data7/duo-diskn-fotoarchiv-2009-10-18.txt") # !? read ("data7/duo-diskn-video-2009-10-18.txt") # !? read ("data7/duo-store-fotografie-diskf-2009-10-17.txt") # !? read ("data7/duo-store-fotografie-diskg-2009-10-17.txt") # !? read ("data7/red-diskf-2009-10-17.txt") read ("data7/red-diskg-2009-10-17.txt") def compare_all () : read_all () switch () read ("data8/wd-foto-2009-11-08.txt") read ("data8/wd-fotoarchiv-2009-11-08.txt") read ("data8/wd-video-2009-11-08.txt") exclude ( [ ".info", ".thm", ".scn", ".db", ".xls", ".exe", ".doc", ".pdf", ".zip" ] ) # exclude ( [ ".avi", ".mpg" ] ) # only ( [ ".avi", ".mpg" ] ) # only ( [ ".jpg", ".cr2" ] ) # only ( [ ".jpg" ] ) compare (); # -------------------------------------------------------------------------- def compare_wd3 () : read ("data3/pentium4-diskf-2009-04-25.txt") read ("data3/pentium4-diskg-2009-04-25.txt") switch () read ("data3/wd-foto-2009-04-25.txt") read ("data3/wd-video-2009-04-25.txt") exclude ( [ ".info" ] ) compare (); def compare_wd4 () : read ("data4/pentium4-diskf-2009-06-27.txt") read ("data4/pentium4-diskg-2009-06-27.txt") switch () read ("data4/wd-foto-2009-06-27.txt") read ("data4/wd-video-2009-06-27.txt") exclude ( [ ".info" ] ) compare (); def compare_amd () : read ("data3/amd-diskf-2009-04-25.txt") read ("data3/amd-diskg-2009-04-25.txt") switch () read ("data3/pentium4-diskf-2009-04-25.txt") read ("data3/pentium4-diskg-2009-04-25.txt") exclude ( [ ".info" ] ) # only ( [ ".avi", ".mpg" ] ) compare (); def compare_backup () : read ("data3/wd-backup-2009-04-25.txt") read ("data3/wd-prev-2009-04-25.txt") switch () read ("data4/wd-foto-2009-06-27.txt") read ("data4/wd-video-2009-06-27.txt") exclude ( [ ".info" ] ) # only ( [ ".jpg", ".avi" ] ) # mask ( "/[a-zA-Z0-9]{3}_[0-9]{4}\\." ) compare () # -------------------------------------------------------------------------- def compare_blue () : read ("data4/duo-diskn-fotoarchiv-2009-06-28.txt") read ("data5/duo-diskn-video-2009-08-22.txt") read ("data5/duo-store-foto-2009-08-22.txt") switch () read ("data5/blue-fotky-2009-08-23.txt") compare () def compare_amd_red () : read ("data3/amd-diskf-2009-04-25.txt") read ("data3/amd-diskg-2009-04-25.txt") switch () read ("data7/red-diskf-2009-10-17.txt") read ("data7/red-diskg-2009-10-17.txt") # only ( [ '.jpg', '.cr2' ] ) compare () def compare_wd_red () : read ("data4/wd-foto-2009-06-27.txt") read ("data4/wd-video-2009-06-27.txt") switch () read ("data7/red-diskf-2009-10-17.txt") read ("data7/red-diskg-2009-10-17.txt") exclude ( [ ".info", ".thm", ".scn" ] ) # only ( [ '.jpg' ] ) # only ( [ '.cr2' ] ) # only ( [ ".avi", ".mpg"] ) compare () def compare_blue_red () : read ("data5/blue-fotky-2009-08-23.txt") switch () read ("data7/red-diskf-2009-10-17.txt") read ("data7/red-diskg-2009-10-17.txt") # only ( [ '.jpg' ] ) # only ( [ '.jpg', '.cr2' ] ) compare () def compare_pentium4_red () : read ("data4/pentium4-diskf-2009-06-27.txt") read ("data4/pentium4-diskg-2009-06-27.txt") switch () read ("data7/red-diskf-2009-10-17.txt") read ("data7/red-diskg-2009-10-17.txt") exclude ( [ ".info", ".thm", ".scn" ] ) compare () def compare_duo_red () : read ("data7/duo-store-fotografie-diskf-2009-10-17.txt") # !? read ("data7/duo-store-fotografie-diskg-2009-10-17.txt") # !? switch () read ("data7/red-diskf-2009-10-17.txt") read ("data7/red-diskg-2009-10-17.txt") exclude ( [ ".info", ".thm", ".scn" ] ) exclude ( [ ".avi", ".mpg" ] ) # only ( [ '.jpg' ] ) # only ( [ '.cr2' ] ) # mask ( "/[a-zA-Z0-9]{3}_[0-9]{4}\\." ) # mask ( "/[a-zA-Z0-9]{3}_[0-9]{4}" ) # mask ( "/[iI][mM][gG]_[0-9]{4}" ) compare () # -------------------------------------------------------------------------- def compare_amd_wd4 () : read ("data3/amd-diskf-2009-04-25.txt") read ("data3/amd-diskg-2009-04-25.txt") switch () read ("data4/wd-foto-2009-06-27.txt") read ("data4/wd-video-2009-06-27.txt") only ( [ ".avi", ".mpg" ] ) # only ( [ '.jpg', '.cr2' ] ) compare () def compare_amd_wd8 () : read ("data3/amd-diskf-2009-04-25.txt") read ("data3/amd-diskg-2009-04-25.txt") switch () read ("data8/wd-foto-2009-11-08.txt") read ("data8/wd-fotoarchiv-2009-11-08.txt") # read ("data8/wd-video-2009-11-08.txt") # only ( [ ".avi", ".mpg" ] ) only ( [ '.jpg', '.cr2' ] ) compare () # -------------------------------------------------------------------------- def compare_common () : read ("data7/red-diskf-2009-10-17.txt") switch () read ("data7/red-diskg-2009-10-17.txt") only ( [ '.jpg' ] ) # only ( [ '.cr2' ] ) # only ( [ ".avi", ".mpg" ] ) # mask ( "/[a-zA-Z0-9]{3}_[0-9]{4}" ) # mask ( "/[a-zA-Z0-9]{3}_[0-9]{4}\\." ) compare () # -------------------------------------------------------------------------- # -------------------------------------------------------------------------- def compare_by_e (x, y) : result = cmp (x.e_model, y.e_model) if result == 0 : result = cmp (x.e_number, y.e_number) if result == 0 : result = cmp (x.e_time, y.e_time) return result def check_stamps (a, key, n1, n2) : cnt = 0 for f in a : if f.e_model.find (key) != -1 : cnt = cnt + 1 if f.name.find (n1) != -1 or f.name.find (n2) != -1 : print "Inconsitent name: ", f.e_model, f.e_number, f.name print key, cnt def read_stamps () : a = [] # readFile (a, "data7/exif-duo-store-diskf-2009-10-24.txt") # readFile (a, "data7/exif-duo-store-diskg-fotoarchiv-2009-10-24.txt") readFile (a, "data8/wd-foto-2009-11-08.txt") readFile (a, "data8/wd-fotoarchiv-2009-11-08.txt") cache = { } for f in a : if f.md5 in cache : g = cache [f.md5] eq = f.e_model == g.e_model eq = eq and f.e_number == g.e_number eq = eq and f.e_width == g.e_width eq = eq and f.e_height == g.e_height eq = eq and f.e_time == g.e_time if not eq : print "Inconsitent info: ", f.name, f.e_model, f.e_number, f.e_width, f.e_height, f.e_time else : cache [f.md5] = f if 0 : queue = [ ] for f in a : queue.append (f) queue.sort (compare_by_e) for f in queue : print f.e_number, f.e_width, f.e_height, f.e_model, f.e_time, f.name if 0 : check_stamps (a, "A70", "A200", "EOS") check_stamps (a, "A2000", "EOS", "A70") check_stamps (a, "EOS", "A2000", "A70") if 0 : mask = "/[a-zA-Z0-9]{3}_[0-9]{4}\\." for f in a : if f.e_model != "" : if re.search (mask, f.name) : print "Short name", f.e_model, f.e_number, f.name # -------------------------------------------------------------------------- def main () : read_options () # read_stamps () if (opt.run != ""): eval (opt.run + "()") exit () compare_all () # compare_common () if __name__ == '__main__' : main () # --------------------------------------------------------------------------