#!/usr/bin/env python import sys import math import os import re import time import stat import urllib import optparse import fpformat import unicodedata # -------------------------------------------------------------------------- class Options : def __init__ (self) : self.quick = False self.no_md5 = False self.run = "" self.first = [ ] self.second = [ ] self.foto = False self.video = False self.jpeg = False self.cr2 = False self.avi = False self.mpeg = False self.exclude = False self.ext = False self.size = False self.sum = False self.large = False self.source = "" self.target = "" self.use_map = "" self.create_map = False self.copy = False self.xcopy = False self.cp = False self.rsync = False self.tar = False self.scp = False self.mkdir = False def ensure_value (self, name, value) : return getattr (self, name, value) def read_options () : parser = optparse.OptionParser () parser.add_option("-q", "--quick", dest="quick", action="store_true", help="print only summary") parser.add_option("-n", "--no-md5", dest="no_md5", action="store_true", help="do not compare md5 ") parser.add_option("-r", "--run", dest="run", help="method name") parser.add_option("-a", "--first", dest="first", action="append", help="first file group") parser.add_option("-b", "--second", dest="second", action="append", help="first file group") parser.add_option("-f", "--foto", dest="foto", action="store_true", help=".jpg and .cr2") parser.add_option("-v", "--video", dest="video", action="store_true", help=".avi and .mpg") parser.add_option("-j", "--jpg", "--jpeg", dest="jpeg", action="store_true") parser.add_option("-c", "--cr2", dest="cr2", action="store_true") parser.add_option("--avi", dest="avi", action="store_true") parser.add_option("--mpg", "--mpeg", dest="mpeg", action="store_true") parser.add_option("-e", "--exclude", dest="exclude", action="store_true", help="exclude .info, .db, .thm and .scn") parser.add_option("-l", "--large", dest="large", action="store_true", help="only files >= 1MB") parser.add_option("--ext", dest="ext", action="store_true", help="extension summary") parser.add_option("--size", dest="size", action="store_true", help="size summary") parser.add_option("--sum", dest="sum", action="store_true", help="summary") parser.add_option("--from", "--source", dest="source", help="source directory") parser.add_option("--to", "--target", dest="target", help="target directory") parser.add_option("--use-map", dest="use_map") parser.add_option("--create-map", dest="create_map", action="store_true") parser.add_option("--copy", dest="copy", action="store_true") parser.add_option("--xcopy", dest="xcopy", action="store_true") parser.add_option("--cp", dest="cp", action="store_true") parser.add_option("--rsync", dest="rsync", action="store_true") parser.add_option("--tar", dest="tar", action="store_true") parser.add_option("--scp", dest="scp", action="store_true") parser.add_option("--mkdir", dest="mkdir", action="store_true") global opt global arg opt = Options () (opt, arg) = parser.parse_args (values=opt) # -------------------------------------------------------------------------- def decode_str (s) : return urllib.unquote (s) def decode_num (s) : if len (s) == 0 : return 0 else: return int (s) def decode_time (s) : return s # -------------------------------------------------------------------------- Normal = 0 Old = 1 New = 2 Modified = 3 Equal = 4 Duplicated = 5 class FileInfo : def __init__ (self) : self.kind = Normal self.name = "" self.size = 0 self.mtime = "" self.md5 = "" self.mode = Normal self.e_model = "" self.e_number = "" self.e_width = 0 self.e_height = 0 self.e_time = "" # -------------------------------------------------------------------------- def readLine (line) : "Read one line from scan-file, return FileInfo" result = FileInfo () items = line.split () inx = 1 for item in items : if item.startswith ("md5=") : result.md5 = item[4:] elif item.startswith ("name=") : result.name = decode_str (item[5:]) elif item.startswith ("size=") : result.size = decode_num (item[5:]) elif item.startswith ("mode=") : result.mode = item[5:] elif item.startswith ("uid=") : result.uid = decode_num (item[4:]) elif item.startswith ("gid=") : result.gid = decode_num (item[4:]) elif item.startswith ("atime=") : result.atime = decode_time (item[6:]) elif item.startswith ("ctime=") : result.ctime = decode_time (item[6:]) elif item.startswith ("mtime=") : result.mtime = decode_time (item[6:]) elif item.startswith ("e_model=") : result.e_model = decode_str (item[8:]) elif item.startswith ("e_number=") : result.e_number = decode_str (item[9:]) elif item.startswith ("e_width=") : result.e_width = decode_num (item[8:]) elif item.startswith ("e_height=") : result.e_height = decode_num (item[9:]) elif item.startswith ("e_time=") : result.e_time = decode_time (decode_str (item[7:])) elif inx == 1 : result.md5 = item inx = inx + 1 elif inx == 2 : result.name = decode_str (item) inx = inx + 1 elif inx == 3 : result.size = decode_num (item) inx = inx + 1 elif inx == 4 : result.mtime = decode_time (item) inx = inx + 1 return result def readFile (collection, fileName) : file = open (fileName, "r") for line in file : data = readLine (line) if data.name != "" and data.md5 != "" : collection.append (data) file.close () # -------------------------------------------------------------------------- def conv_time (t) : s = time.gmtime (t) return time.strftime ("%Y-%m-%d:%H:%M:%S", s) def scan_dir (collection, top_dir, rel_dir) : dir_name = os.path.join (top_dir, rel_dir) subitems = os.listdir (dir_name) subitems.sort () for loc_file_name in subitems : rel_file_name = os.path.join (rel_dir, loc_file_name) file_name = os.path.join (top_dir, rel_file_name) if os.path.isdir (file_name) : scan_dir (collection, top_dir, rel_file_name) else : info = os.stat (file_name); result = FileInfo () result.name = rel_file_name result.size = info [stat.ST_SIZE] result.mtime = decode_time (conv_time (info [stat.ST_MTIME])) collection.append (result) # -------------------------------------------------------------------------- def scan_lslR (collection, file_name) : file = open (file_name, "r") directory = "" for line in file : line = line.strip () if line == "" : pass elif line.endswith (":") : directory = line [0:-1] if directory == "." : directory = "" elif directory.startswith ("./") : directory = directory [2:] elif line.startswith ("total ") : pass else : items = line.split (None, 8) item_count = len (items) name = "" if item_count == 1 or item_count == 8: name = items [item_count-1] elif item_count > 8 : name = items [7] for s in items [8:] : # strange name = name + " " + s if name.endswith ('/') or name.endswith ('*') : name = name [0:-1] is_dir = False if item_count >= 8: is_dir = items[0].startswith('d') if name != "" and not is_dir : result = FileInfo () result.name = os.path.join (directory, name) if item_count != 1 : result.size = decode_num (items [4]) result.mtime = decode_time (items[5] + ':' + items [6]) # items [0] ... mode # items [1] ... link count # items [2] ... uid # items [3] ... gid # items [4] ... size # items [5] ... date # items [6] ... time # items [7] ... file name collection.append (result) file.close () # -------------------------------------------------------------------------- def lookup (a, b) : "Lookup files from a in b" cache = { } for f in a : if f.md5 in cache : f.mode = Duplicated else : cache [f.md5] = f f.mode = Old second_cache = { } for g in b : if g.md5 in cache : f = cache [g.md5] if f.mode == Old : f.mode = Equal if g.md5 in second_cache : g.mode = Duplicated else : g.mode = Equal second_cache [g.md5] = g else : if g.md5 in second_cache : g.mode = Duplicated else : g.mode = New second_cache [g.md5] = g # -------------------------------------------------------------------------- def lookup_by_name (a, b) : cache = { } for f in a : if f.name in cache : f.mode = Duplicated else : cache [f.name] = f f.mode = Old second_cache = { } for g in b : if g.name in cache : f = cache [g.name] if f.mode == Old : f.mode = Equal if g.name in second_cache : g.mode = Duplicated else : second_cache [g.name] = g g.mode = Equal if f.size != g.size : if f.mode == Equal : f.mode = Modified g.mode = Modified else : if g.name in second_cache : g.mode = Duplicated else : g.mode = New second_cache [g.name] = g # -------------------------------------------------------------------------- def directory_name (s) : return os.path.dirname (s.replace ("\\", "/")) def print_map (a, b) : md5_cache = { } dir_cache = { } for g in b : if g.md5 != "" : md5_cache [g.md5] = g for f in a : if f.md5 in md5_cache : g = md5_cache [f.md5] s = directory_name (f.name) # source if not s in dir_cache : dir_cache [s] = { } # map to map p = dir_cache [s] # map t = directory_name (g.name) # target if not t in p : p[t] = 0 p[t] += 1 for s in sorted (dir_cache.keys ()) : p = dir_cache [s] # map cnt = 0 for t in sorted (p.keys ()) : cnt = cnt + 1 if cnt == 1 : print s, else : print " ", print "->", t, "(", p[t], ")" # -------------------------------------------------------------------------- def ascii_file_name (n) : text = unicode (n, "cp1250") result = u"" for c in text : if c < ' ' or c > '\x7f' : r = unicodedata.normalize ("NFD", c) [0] elif c == '*' or c == '?': r = '_' elif c == ' ' : r = ' ' else : r = c result = result + r return result def win_file_name (n) : # n = unicode (n, "cp1250") return "\"" + n.replace ("/", "\\") + "\"" def target_file_name (n) : n = ascii_file_name (n) return "\"" + n.replace ("/", "\\") + "\"" def recursive_mkdir (cache, d) : if not d in cache : sub_dir = directory_name (d) if sub_dir != "" : recursive_mkdir (cache, sub_dir) cache [d] = 1 print "mkdir", target_file_name (os.path.join (opt.target, d)) def copy_commands (a) : if opt.mkdir : print "mkdir", target_file_name (opt.target) cache = { } for f in a : if f.mode == Old : d = directory_name (f.name) recursive_mkdir (cache, d) s = win_file_name (os.path.join (opt.source, f.name)) t = target_file_name (os.path.join (opt.target, d)) print "copy ", s, t # -------------------------------------------------------------------------- def show_size (size) : return fpformat.fix (float (size) / 1000000, 1) + "MB" def fmt (num) : txt = str (num) n = len (txt) - 1 k = 0 while n > 0 : k = k + 1 if k == 3 : txt = txt [:n] + ' ' + txt [n:] k = 0 n = n - 1 return txt def print_summary (txt, cnt, sum): print txt.rjust (32), fmt (cnt).rjust (7), "files,", fmt (sum).rjust (17), "bytes" def summary (a, txt): cnt = 0 sum = 0 for f in a : cnt += 1 sum += f.size print_summary (txt, cnt, sum) def summary2 (a, txt, m): cnt = 0 sum = 0 for f in a : if f.mode == m : cnt += 1 sum += f.size print_summary (txt, cnt, sum) # -------------------------------------------------------------------------- def extension_summary (a, txt, m): cnt = { } sum = { } for f in a : if f.mode == m : base, ext = os.path.splitext (f.name) ext = ext.lower () if not ext in cnt : cnt [ext] = 0 sum [ext] = 0 cnt [ext] += 1 sum [ext] += f.size print for ext in sorted (cnt.keys ()) : t = ext if t == "" : t = "no extension" print_summary (t + " (" + txt + ")", cnt[ext], sum[ext]) # -------------------------------------------------------------------------- def length_summary (a): cnt = { } for f in a : if not f.size in cnt : cnt [f.size] = 0 cnt [f.size] += 1 print for grp in sorted (cnt.keys ()) : if cnt [grp] > 1 : print cnt[grp], "files", grp, "bytes" # -------------------------------------------------------------------------- def size_summary (a, txt, m): cnt = { } sum = { } for f in a : if f.mode == m : if f.size < 1000 * 1000 : n = 100 * 1000 elif f.size < 10 * 1000 * 1000 : n = 1000 * 1000 elif f.size < 100 * 1000 * 1000 : n = 10 * 1000 * 1000 else : n = 100 * 1000 * 1000 grp = (f.size / n + 1) * n if not grp in cnt : cnt [grp] = 0 sum [grp] = 0 cnt [grp] += 1 sum [grp] += f.size print for grp in sorted (cnt.keys ()) : if grp < 1000 * 1000 : t = "<= " + str (grp/1000) + " KB" else : t = "<= " + str (grp/(1000*1000)) + " MB" print_summary (t + " (" + txt + ")", cnt[grp], sum[grp]) # -------------------------------------------------------------------------- def ignore_name (name) : # return False return ( name.startswith ("System Volume Information\\") or name.startswith ("Recycled\\") ) # -------------------------------------------------------------------------- def with_suffix (name, suffixes) : result = False for e in suffixes : if name.endswith (e) : result = True return result def with_mask (name, mask) : return re.search (mask, name) != None def compare_by_name (x, y) : return cmp (x.name, y.name) # -------------------------------------------------------------------------- def listing (aa, bb): if 0 : for f in aa : if f.mode == Duplicated : print "Duplicated file in old directory ", f.name if 0 : for f in bb : if f.mode == Duplicated : print "Duplicated file in new directory ", f.name for f in aa : if f.mode == Old : print "File only in old directory ", f.name, show_size (f.size) for f in bb : if f.mode == New : print "File only in new directory ", f.name, show_size (f.size) if 0 : for f in aa : if f.mode == Equal : print "Equal old file ", f.name if 0 : for f in bb : if f.mode == Equal : print "Equal new file ", f.name # -------------------------------------------------------------------------- def print_lookup (a, b, exclude = [ ], include = [ ], mask = "", min_size = 0) : use_include = False for f in include: use_include = True use_mask = (mask != "") aa = [ ] for f in a : if not ignore_name (f.name) : if not with_suffix (f.name, exclude) : if not use_include or with_suffix (f.name, include) : if not use_mask or with_mask (f.name, mask) : if f.size >= min_size : aa.append (f) bb = [ ] for f in b : if not ignore_name (f.name) : if not with_suffix (f.name, exclude) : if not use_include or with_suffix (f.name, include) : if not use_mask or with_mask (f.name, mask) : if f.size >= min_size : bb.append (f) aa.sort (compare_by_name) bb.sort (compare_by_name) cmds = opt.copy or opt.xcopy or opt.cp or opt.rsync or opt.scp if not opt.quick and not cmds : listing (aa, bb) if opt.size : size_summary (aa, "old", Old) size_summary (bb, "new", New) size_summary (aa, "equal", Equal) if opt.ext : extension_summary (aa, "old", Old) extension_summary (bb, "new", New) extension_summary (aa, "equal", Equal) if opt.sum or not cmds : print summary (a, "Old files:") summary (aa, "Selected old files:") summary (b, "New files:") summary (bb, "Selected new files:") print summary2 (aa, "Duplications in old directory:", Duplicated) summary2 (bb, "Duplications in new directory:", Duplicated) summary2 (aa, "Only in old directory:", Old) summary2 (bb, "Only in new directory:", New) summary2 (aa, "Equal:", Equal) summary2 (bb, "Equal:", Equal) if opt.create_map : print_map (aa, bb) if opt.copy : copy_commands (aa) # -------------------------------------------------------------------------- class Command: def __init__ (self) : self.first_set = [ ] self.second_set = [ ] self.current_set = self.first_set self.include_set = [ ] self.exclude_set = [ ] self.mask_set = "" self.min_size = 0 global cmd cmd = Command () def only (ext): for e in ext: cmd.include_set.append (e) cmd.include_set.append (e.upper ()) def exclude (ext): for e in ext: cmd.exclude_set.append (e) cmd.exclude_set.append (e.upper ()) def mask (m): cmd.mask_set = m def minimal_size (n): cmd.min_size = n def read (name): readFile (cmd.current_set, name) def read_dir (name): scan_dir (cmd.current_set, name, "") def read_lslR (name): scan_lslR (cmd.current_set, name) def switch (): cmd.current_set = cmd.second_set def compare (): if opt.no_md5 : lookup_by_name (cmd.first_set, cmd.second_set) # without md5 sums else : lookup (cmd.first_set, cmd.second_set) # with md5 sums print_lookup (cmd.first_set, cmd.second_set, cmd.exclude_set, cmd.include_set, cmd.mask_set, cmd.min_size) # -------------------------------------------------------------------------- def all7 () : read ("data/duo-diskn-foto.txt") # !? read ("data/duo-diskn-video.txt") # !? # NO read ("data/wd-backup.txt") # !? read ("data/wd-foto-canon-powershot-a70.txt") read ("data/wd-foto-eos-foto-2008.txt") read ("data/wd-foto-fotoarchiv.txt") read ("data/wd-foto-raw.txt") read ("data/wd-foto-selection-archive.txt") read ("data/wd-video.txt") read ("data2/wd-foto-2009-02-01.txt") read ("data3/amd-diskf-2009-04-25.txt") read ("data3/amd-diskg-2009-04-25.txt") read ("data3/pentium4-diskf-2009-04-25.txt") read ("data3/pentium4-diskg-2009-04-25.txt") # NO read ("data3/wd-backup-2009-04-25.txt") # !? read ("data3/wd-foto-2009-04-25.txt") read ("data3/wd-video-2009-04-25.txt") read ("data3/wd-prev-2009-04-25.txt") # !? read ("data4/pentium4-diskf-2009-06-27.txt") read ("data4/pentium4-diskg-2009-06-27.txt") read ("data4/wd-foto-2009-06-27.txt") read ("data4/wd-video-2009-06-27.txt") read ("data4/duo-diskn-dalsi-2009-06-28.txt") # !? read ("data4/duo-diskn-fotoarchiv-2009-06-28.txt") # !? read ("data5/blue-fotky-2009-08-23.txt") read ("data5/duo-store-foto-2009-08-22.txt") # !? read ("data5/duo-diskn-video-2009-08-22.txt") # !? read ("data5/duo-store-transfer-disk_n-fotoarchiv-2009-08-22.txt") # !? read ("data6/duo-diskn-foto-2009-10-04.txt") # !? read ("data6/duo-diskn-foto-orig-2009-10-04.txt") # !? read ("data7/duo-diskn-foto-2009-10-18.txt") # !? read ("data7/duo-diskn-fotoarchiv-2009-10-18.txt") # !? read ("data7/duo-diskn-video-2009-10-18.txt") # !? read ("data7/duo-store-fotografie-diskf-2009-10-17.txt") # !? read ("data7/duo-store-fotografie-diskg-2009-10-17.txt") # !? read ("data7/red-diskf-2009-10-17.txt") read ("data7/red-diskg-2009-10-17.txt") def all8 () : all7 () read ("data8/duo-diskn-fotoarchiv-2009-11-07.txt") read ("data8/duo-diskn-video-2009-11-07.txt") # !? # NO read ("data8/wd-backup-2009-11-21.txt") # !? read ("data8/wd-foto-2009-11-08.txt") read ("data8/wd-fotoarchiv-2009-11-08.txt") read ("data8/wd-video-2009-11-08.txt") def all9 () : all8 () read ("data9/amd-diskf-2010-06-26.txt") read ("data9/amd-diskg-2010-06-26.txt") # NO read ("data9/wd-backup-2010-06-27.txt") # !? read ("data9/wd-doplnky-2010-06-27.txt") read ("data9/wd-foto-2010-06-27.txt") read ("data9/wd-fotoarchiv-2010-06-27.txt") read ("data9/wd-grafika-2010-06-27.txt") read ("data9/wd-video-2010-06-27.txt") read ("data9/pentium4-diskf-2010-06-18.txt") read ("data9/pentium4-diskg-2010-06-18.txt") def all () : all9 () def compare_all_8 () : all7 () switch () read ("data8/wd-foto-2009-11-08.txt") read ("data8/wd-fotoarchiv-2009-11-08.txt") read ("data8/wd-video-2009-11-08.txt") exclude ( [ ".info", ".thm", ".scn", ".db", ".xls", ".exe", ".doc", ".pdf", ".zip" ] ) compare () def compare_all () : all8 () switch () read ("data9/wd-doplnky-2010-06-27.txt") read ("data9/wd-foto-2010-06-27.txt") read ("data9/wd-fotoarchiv-2010-06-27.txt") read ("data9/wd-grafika-2010-06-27.txt") read ("data9/wd-video-2010-06-27.txt") exclude ( [ ".info", ".thm", ".scn", ".db", ".xls", ".exe", ".doc", ".pdf", ".zip" ] ) compare () # -------------------------------------------------------------------------- def wd3 () : read ("data3/wd-foto-2009-04-25.txt") read ("data3/wd-video-2009-04-25.txt") def wd4 () : read ("data4/wd-foto-2009-06-27.txt") read ("data4/wd-video-2009-06-27.txt") def wd8 () : read ("data8/wd-foto-2009-11-08.txt") read ("data8/wd-fotoarchiv-2009-11-08.txt") read ("data8/wd-video-2009-11-08.txt") def wd9 () : read ("data9/wd-foto-2010-06-27.txt") read ("data9/wd-fotoarchiv-2010-06-27.txt") read ("data9/wd-video-2010-06-27.txt") read ("data9/wd-doplnky-2010-06-27.txt") def amd () : read ("data3/amd-diskf-2009-04-25.txt") read ("data3/amd-diskg-2009-04-25.txt") def amd9 () : read ("data9/amd-diskf-2010-06-26.txt") read ("data9/amd-diskg-2010-06-26.txt") def blue () : read ("data5/blue-fotky-2009-08-23.txt") def red () : read ("data7/red-diskf-2009-10-17.txt") read ("data7/red-diskg-2009-10-17.txt") def pentium () : read ("data9/pentium4-diskf-2010-06-18.txt") read ("data9/pentium4-diskg-2010-06-18.txt") def duo () : read ("data8/duo-diskn-fotoarchiv-2009-11-07.txt") # -------------------------------------------------------------------------- def wd8f () : read ("data8/wd-foto-2009-11-08.txt") def wd8a () : read ("data8/wd-fotoarchiv-2009-11-08.txt") def wd8v () : read ("data8/wd-video-2009-11-08.txt") def wd8b () : read ("data8/wd-backup-2009-11-21.txt") # -------------------------------------------------------------------------- def wd9f () : read ("data9/wd-foto-2010-06-27.txt") def wd9a () : read ("data9/wd-fotoarchiv-2010-06-27.txt") def wd9v () : read ("data9/wd-video-2010-06-27.txt") def wd9d () : read ("data9/wd-doplnky-2010-06-27.txt") def wd9b () : read ("data9/wd-backup-2010-06-27.txt") # -------------------------------------------------------------------------- def pf () : read ("data9/pentium4-diskf-2010-06-18.txt") def pa () : read ("data9/sub/pentium4-diskg-fotoarchiv-2010-06-18.txt") def pv () : read ("data9/sub/pentium4-diskg-video-2010-06-18.txt") # -------------------------------------------------------------------------- def s1a () : read ("data10/branik-store1-fotoarchiv-2010-10-28.txt") def s1v () : read ("data10/branik-store1-video-2010-10-28.txt") def s2f () : read ("data10/branik-store2-foto-2010-10-28.txt") def s2v () : read ("data10/branik-store2-video-2010-10-28.txt") # -------------------------------------------------------------------------- def compare_wd3 () : read ("data3/pentium4-diskf-2009-04-25.txt") read ("data3/pentium4-diskg-2009-04-25.txt") switch () read ("data3/wd-foto-2009-04-25.txt") read ("data3/wd-video-2009-04-25.txt") exclude ( [ ".info" ] ) compare () def compare_wd4 () : read ("data4/pentium4-diskf-2009-06-27.txt") read ("data4/pentium4-diskg-2009-06-27.txt") switch () read ("data4/wd-foto-2009-06-27.txt") read ("data4/wd-video-2009-06-27.txt") exclude ( [ ".info" ] ) compare () def compare_amd () : read ("data3/amd-diskf-2009-04-25.txt") read ("data3/amd-diskg-2009-04-25.txt") switch () read ("data3/pentium4-diskf-2009-04-25.txt") read ("data3/pentium4-diskg-2009-04-25.txt") exclude ( [ ".info" ] ) compare () def compare_backup () : read ("data3/wd-backup-2009-04-25.txt") read ("data3/wd-prev-2009-04-25.txt") switch () read ("data4/wd-foto-2009-06-27.txt") read ("data4/wd-video-2009-06-27.txt") exclude ( [ ".info" ] ) compare () # -------------------------------------------------------------------------- def compare_blue () : read ("data4/duo-diskn-fotoarchiv-2009-06-28.txt") read ("data5/duo-diskn-video-2009-08-22.txt") read ("data5/duo-store-foto-2009-08-22.txt") switch () read ("data5/blue-fotky-2009-08-23.txt") compare () # mask ( "/[a-zA-Z0-9]{3}_[0-9]{4}\\." ) # mask ( "/[a-zA-Z0-9]{3}_[0-9]{4}" ) # mask ( "/[iI][mM][gG]_[0-9]{4}" ) # -------------------------------------------------------------------------- def compare_by_e (x, y) : result = cmp (x.e_model, y.e_model) if result == 0 : result = cmp (x.e_number, y.e_number) if result == 0 : result = cmp (x.e_time, y.e_time) return result def check_stamps (a, key, n1, n2) : cnt = 0 for f in a : if f.e_model.find (key) != -1 : cnt = cnt + 1 if f.name.find (n1) != -1 or f.name.find (n2) != -1 : print "Inconsitent name: ", f.e_model, f.e_number, f.name print key, cnt def read_stamps () : a = [] # readFile (a, "data7/exif-duo-store-diskf-2009-10-24.txt") # readFile (a, "data7/exif-duo-store-diskg-fotoarchiv-2009-10-24.txt") readFile (a, "data8/wd-foto-2009-11-08.txt") readFile (a, "data8/wd-fotoarchiv-2009-11-08.txt") cache = { } for f in a : if f.md5 in cache : g = cache [f.md5] eq = f.e_model == g.e_model eq = eq and f.e_number == g.e_number eq = eq and f.e_width == g.e_width eq = eq and f.e_height == g.e_height eq = eq and f.e_time == g.e_time if not eq : print "Inconsitent info: ", f.name, f.e_model, f.e_number, f.e_width, f.e_height, f.e_time else : cache [f.md5] = f if 0 : queue = [ ] for f in a : queue.append (f) queue.sort (compare_by_e) for f in queue : print f.e_number, f.e_width, f.e_height, f.e_model, f.e_time, f.name if 0 : check_stamps (a, "A70", "A200", "EOS") check_stamps (a, "A2000", "EOS", "A70") check_stamps (a, "EOS", "A2000", "A70") if 0 : mask = "/[a-zA-Z0-9]{3}_[0-9]{4}\\." for f in a : if f.e_model != "" : if re.search (mask, f.name) : print "Short name", f.e_model, f.e_number, f.name # -------------------------------------------------------------------------- def load (n) : fmt = "" if n.startswith("proc:") : fmt = "proc" n = n [5:] elif n.startswith("file:") : fmt = "proc" n = n [5:] elif n.startswith ("lslr:") : fmt = "lslr" n = n [5:] elif n.startswith ("dir:") : fmt = "dir" n = n [4:] elif n.endswith (".lslr") or n.endswith (".lslR") : fmt = "lslr" elif re.match ("^[a-zA-Z-][a-zA-Z0-9_]*$", n) : fmt = "proc" else : fmt = "file" if fmt == "proc" : eval (n + "()") elif fmt == "file" : read (n) elif fmt == "lslr" : read_lslR (n) elif fmt == "dir" : read_dir (n) else : print "Unknown format", fmt, n exit () # -------------------------------------------------------------------------- def command_line_options (): if opt.exclude : exclude ( [ ".info", ".db", ".thm", ".scn" ] ) if opt.large : minimal_size ( 1000 * 1000 ) if opt.foto : only ( [ ".jpg", ".jpeg", ".cr2" ] ) if opt.video : only ( [ ".avi", ".mpg", ".mpeg" ] ) if opt.jpeg : only ( [ ".jpg", ".jpeg" ] ) if opt.cr2 : only ( [ ".cr2" ] ) if opt.avi : only ( [ ".avi" ] ) if opt.mpeg : only ( [ ".mpg", ".mpeg" ] ) # -------------------------------------------------------------------------- def main () : read_options () # read_stamps () command_line_options () if opt.run == "" : for n in opt.first : load (n) switch () for n in opt.second : load (n) compare () if opt.run != "" : eval (opt.run + "()") if __name__ == '__main__' : main () # --------------------------------------------------------------------------