#! /usr/bin/env python import sys, os, time from stat import * import hashlib import urllib import tarfile import zipfile import logging import optparse # import ConfigParser # -------------------------------------------------------------------------- class Options : def __init__ (self) : self.quick = False; # True => skip md5 checksums self.exif = False; # True => read exif information self.dir_list = False; # True => read lslR file self.path = ""; # additional directory with Python modules # -------------------------------------------------------------------------- def init_imports () : if opt.path != "" : sys.path.append (opt.path) if opt.exif : global pyexiv2 import pyexiv2 # -------------------------------------------------------------------------- def init_log () : # set up logging to file logging.basicConfig (level=logging.DEBUG, format="%(asctime)s %(name)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M", filename="log.txt", filemode="a") # "%(filename)s %(module)s %(funcName)s %(lineno)s %(msecs)s" # define a Handler which writes INFO messages or higher to the sys.stderr console = logging.StreamHandler () console.setLevel (logging.INFO) # set a format which is simpler for console use formatter = logging.Formatter ("%(name)s: %(levelname)s: %(message)s") # tell the handler to use this format console.setFormatter (formatter) # add the handler to the root logger logging.getLogger("").addHandler(console) global log name = os.path.basename (sys.argv[0]) log = logging.getLogger (name) # -------------------------------------------------------------------------- def read_options () : parser = optparse.OptionParser () parser.add_option("-q", "--quick", dest="quick", action="store_true") parser.add_option("-e", "--exif", dest="exif", action="store_true") parser.add_option("-l", "--list", dest="dir_list", action="store_true", help="file name with ls -lR output") parser.add_option("-p", "--path", dest="path", help="add into PYTHONPATH", default="") global opt global arg opt = Options () (opt, arg) = parser.parse_args (values=opt) # if len (arg) > 1 : if len (arg) > 2 : parser.error("too many arguments") if len (arg) == 0 : parser.error("missing argument") # -------------------------------------------------------------------------- def conv_time (t) : s = time.gmtime (t) return time.strftime ("%Y-%m-%d:%H:%M:%S", s) def conv_str (s) : return urllib.quote (s, ":/") def calc_sum (file_name): m = hashlib.md5 () f = open (file_name, "rb") while True: d = f.read (32*1024) if not d : break m.update (d) f.close (); return m.hexdigest () def scan_file (top_dir, rel_dir, loc_file_name) : rel_file_name = os.path.join (rel_dir, loc_file_name) file_name = os.path.join (top_dir, rel_file_name) # log.info ("scanning " + conv_str (rel_file_name)) info = os.stat (file_name); size = "size=" + str (info [ST_SIZE]) mode = "mode=" + ("%o" % info [ST_MODE]) uid = "uid=" + str (info [ST_UID]) gid = "gid="+ str (info [ST_GID]) a_time = "atime=" + conv_time (info [ST_ATIME]) c_time = "ctime=" + conv_time (info [ST_CTIME]) m_time = "mtime=" + conv_time (info [ST_MTIME]) if opt.quick : sum = '-' * 32 elif S_ISREG (info [ST_MODE]) : sum = calc_sum (file_name) else : sum = '-' * 32 log.warning ("strange file " + conv_str (rel_file_name)) exif_info = "" if opt.exif : exif_info = scan_exif (file_name) out_rel_name = conv_str (rel_file_name) out_loc_name = conv_str (loc_file_name) # print sum, out_rel_name, size, mode, uid, gid, a_time, c_time, m_time, exif_info print sum, out_rel_name, size, m_time, exif_info def scan_dir (top_dir, rel_dir) : log.info ("scanning " + conv_str (rel_dir)) dir_name = os.path.join (top_dir, rel_dir) subitems = os.listdir (dir_name) subitems.sort () for loc_file_name in subitems : rel_file_name = os.path.join (rel_dir, loc_file_name) file_name = os.path.join (top_dir, rel_file_name) # info = os.stat (file_name); # if S_ISDIR (info [ST_MODE]) : if os.path.isdir (file_name) : scan_dir (top_dir, rel_file_name) else : scan_file (top_dir, rel_dir, loc_file_name) # -------------------------------------------------------------------------- def scan_zip (zip_file_name) : zip_file = zipfile.ZipFile (zip_file_name, mode="r") # zip_file.printdir () for info in zip_file.infolist () : name = info.filename size = "size=" + str (info.file_size) t = info.date_time time = "%4i-%02i-%02i:%02i:%02i:%02i" % t; time = "m_time=" + time m = hashlib.md5 () bytes = zip_file.read (name); m.update (bytes) sum = m.hexdigest () print sum, name, size, time zip_file.close () # -------------------------------------------------------------------------- def scan_tar (tar_file_name) : tar_file = tarfile.open (tar_file_name, mode="r") # tar_file.list () for info in tar_file.getmembers () : if info.isfile () : name = info.name size = "size=" + str (info.size) mode = "mode=" + ("%o" % info.mode) uid = "uid=" + str (info.uid) gid = "gid="+ str (info.gid) m_time = "mtime=" + conv_time (info.mtime) sum = "" m = hashlib.md5 () f = tar_file.extractfile (info) if f : while True: d = f.read (16*1024) if not d : break m.update (d) f.close (); sum = m.hexdigest () print sum, name, size, mode, uid, gid, m_time tar_file.close () # -------------------------------------------------------------------------- def exif_key (name, img, key): txt = "" if key in img.exifKeys () : value = img.interpretedExifValue (key) txt = name + "=" + conv_str (value) + " " return txt def scan_exif (file_name) : txt = "" try: # import pyexiv2 img = pyexiv2.Image (file_name) img.readMetadata () txt += exif_key ("e_time", img, 'Exif.Image.DateTime') txt += exif_key ("e_width", img, 'Exif.CanonPi.ImageWidth') txt += exif_key ("e_height", img, 'Exif.CanonPi.ImageHeight') txt += exif_key ("e_model", img, 'Exif.Image.Model') txt += exif_key ("e_number", img, 'Exif.Canon.ImageNumber') except: pass return txt # -------------------------------------------------------------------------- def scan_lslR (file_name) : file = open (file_name, "r") directory = "" for line in file : line = line.strip () print "line ", line if line == "" : pass elif line.endswith (":") : directory = line [0:-1] if directory == "." : directory = "" elif directory.startswith ("./") : directory = directory [2:] elif line.startswith ("total ") : pass else : items = line.split (None, 8) item_count = len (items) # print "item_count", item_count #, "items ", items name = "" if item_count == 1 or item_count == 8: name = items [item_count-1] elif item_count > 8 : name = items [7] for s in items [8:] : # strange name = name + " " + s if name.endswith ('/') or name.endswith ('*') : name = name [0:-1] if name != "" : # sum = '-' * 32 name = os.path.join (directory, name) name = "name=" + conv_str (name); if item_count == 1 : print name else : mode = "mode=" + items [0] # items [1] ... link count uid = "uid=" + items [2] gid = "gid="+ items [3] size = "size=" + items [4] m_time = "mtime=" + items[5] + ':' + items [6] print name, size, mode, uid, gid, m_time file.close () # -------------------------------------------------------------------------- def main () : init_log () read_options () init_imports () top_dir = arg[0] top_dir = os.path.expanduser (top_dir) top_rel = "" if len (arg) > 1 : top_rel = arg[1] if opt.dir_list : scan_lslR (top_dir) else : scan_dir (top_dir, top_rel) if __name__ == '__main__' : main () # -------------------------------------------------------------------------- # scan_zip ("/mnt/store/Transfer/disk_n/Work.zip") # scan_tar ("/mnt/store/product/packages/gtk-2.8.16-mandrake-10.2.tgz") # --------------------------------------------------------------------------