import os
import pickle
import sys
import shutil
BACKUP_DRIVE_PATH ="E:\\Guru\\backup"
def dumpfileset(top_dir):
"""
This method creates the Set object with all the files/directories name under top_dir and
persists the set object in top_dir overriding any previous .set objects
"""
fset = buildfileset(top_dir)
set_file_path = os.path.join(top_dir,".set")
print("Saving set file in "+set_file_path)
pickle.dump(fset,open(set_file_path,"wb"))
return fset
def readfileset(top_dir):
"""
This method reads the persisted .set file into a Set from top_dir.If the .set file does not exist,
an empty set is created
"""
try:
fset = pickle.load(open(os.path.join(top_dir,".set"),"rb"))
except IOError:
fset = set()
return fset
def buildfileset(top_dir):
"""
This method recurses through all the files and folders under top_dir to create a Set object with all file_name+~*~last_mod_ts.
Using the Set data structure is inefficient but makes the program easier to write as a straight forward A - B gives the entire
list of files that have changed since the last backup was run.
The reason using a Set is inefficient is because I'll have to compare all files between current and previous Sets which can be avoided
for certain cases if a Tree is used.For example if the top_dir has a new directory created ABC under it and a 1000 files were added -
with the Set data structure the program checks for all 1000 files instead of just knowing that since ABC is a new directory all the 1000
files underneath must be new
"""
fset = set()
for root, dirs, files in os.walk(top_dir):
all = dirs+files
for file in all:
full_path = os.path.join(root,file)
fset.add(full_path +"~*~"+str(os.stat(full_path).st_mtime))
return fset
def getbackuppath(file):
"""
Returns the path where the given file will be backed up
"""
file = file[3:]
return os.path.join(BACKUP_DRIVE_PATH,file)
if __name__ == "__main__":
if os.path.isdir(BACKUP_DRIVE_PATH) == False:
print("Backup directory is not accessible.Exiting........"+BACKUP_DRIVE_PATH)
exit(1)
dirs_to_copy =[]
files_to_copy = []
for dir in sys.argv[1:]:
old_set = readfileset(dir)
new_set = dumpfileset(dir)
diff_set = new_set - old_set
for diff_file in diff_set:
diff_file = diff_file[: diff_file.index('~')]
if os.path.isdir(diff_file):
dirs_to_copy.append(diff_file)
else:
files_to_copy.append(diff_file)
dirs_to_copy = sorted(dirs_to_copy)
for dir in sys.argv[1:]:
dir_backup_path =getbackuppath(dir)
if os.path.isdir(dir_backup_path) == False:
print("Mkdir "+dir_backup_path)
os.makedirs(dir_backup_path)
for dir in dirs_to_copy:
dir_backup_path = getbackuppath(dir)
if os.path.isdir(dir_backup_path) == False:
print("Mkdir "+dir_backup_path)
os.mkdir(dir_backup_path)
for file in files_to_copy:
file_backup_path = getbackuppath(file)
print("Copying "+file_backup_path)
shutil.copy(file,file_backup_path)