FileArchiver.py
import sys
from time import *
from os import *
from zipfile import *
from string import *
from stat import *

#############################################################################
# This script scans a specified directory looking
# for files older than a specified number of days.
# Age is measured by the date at which the file was
# most recently modified.
#
# When such files are discovered, they are moved from
# their current directory into a specified archive
# directory and packaged into a compressed zip file.
#
# File type filtering is an optional runtime switch, whereby
# only files with the specified extension (case insensitive
# match) will be archived.
#
# COMMAND LINE ARGUMENTS :
#   workingDirectory archiveDirectory maxAgeInDays [fileExtension]
#
# Author:   Michael Fitzmaurice, December 2001
#############################################################################

# lookup table to convert months from integers
MONTHS_AS_STRINGS = ['Jan', 'Feb', 'Mar', 'Apr',
                     'May', 'Jun', 'Jul', 'Aug',
                     'Sep', 'Oct', 'Nov', 'Dec']

#############################################################
# Returns the date as a string in concise format
#
# param:    time - The date/time (integer) represented as
#           seconds since the epoch
#
# returns:  The date in the format ddmmmyyyy, e.g. 03Dec2001
#############################################################
def formatTime(time):
    time    = gmtime(time)
    year    = time[0]
    month   = time[1] - 1# month as returned ranges from 1 - 12
    day     = time[2]
    return str(day) + MONTHS_AS_STRINGS[month] + str(year)

#############################################################
# Creates filename derived from the period being archived
#
# param:    startTime - The date/time (integer) of the start of the
#           archive period, represented as seconds since the epoch
# param:    endTime - The date/time (integer) of the end of the
#           archive period, represented as seconds since the epoch
#
# returns:  An appropriate filename based on the archive period,
#           e.g. 03Dec2001-15Dec2001.zip
#############################################################
def makeArchiveFilename(startTime, EndTime):
    startDateString = formatTime(startTime)
    endDateString   = formatTime(EndTime)
    arcFileName     = startDateString + '-' + endDateString + '.zip'
    return arcFileName

#############################################################
# Filters files according to filetype, as determined by extension.
# Filetype matching is case-insensitive.
#
# param:    filename - The name of the file to be tested
# param:    desiredExtension - The extension of files that should
#           be accepted
#
# returns:  1 if the file has the desired extension, otherwise 0
#############################################################
def acceptFile(filename, desiredExtension):
    extensionLength = len(desiredExtension)
    # for case-insensitive matching
    filename = upper(filename)
    return filename[-extensionLength:] == desiredExtension

print 'File filtering on ' + str( len(sys.argv) > 4 )

# the directories we will shift files between
workingDir  = sys.argv[1]
archiveDir  = sys.argv[2]
# maximum age of a file in days
daysOld     = float(sys.argv[3])
# check optional argument for file type (extension)
if len(sys.argv) > 4:
    useFilter   = 1
    fileType    = sys.argv[4]
    # for case-insensitive matching
    fileType    = upper(fileType)
else:
    useFilter = 0
# get time in seconds at start of program
timeNow     = time()
# the age (in secs) beyond which files are to be archived
maxAge      = ( (60 * 60 * 24) * daysOld )

# examine all files in the working directory - record which
# should be archived, as well as the oldest and newest of
# these files (to determine the boundaries of the archive
# period)
filesToArchive      = []
startOfPeriod       = timeNow
endOfPeriod         = 0
workingDirContents  = listdir(workingDir)

for x in workingDirContents:
    fullPath        = workingDir + '\\' + x
    # stat function gets the 10 'most important integers'
    # providing info about the file - time modified (secs) is
    # one element in the resultant tuple
    fileDetails     = stat(fullPath)
    timeModified    = fileDetails[ST_MTIME]
    fileAge         = timeNow - timeModified

    if fileAge > maxAge:
        # make sure we only try to archive files - leave directories alone
        mode        = fileDetails[ST_MODE]
        if S_ISDIR(mode):
            print fullPath + ' IS A DIRECTORY - WILL NOT BE ARCHIVED'
        else:
            accept = 1
            if useFilter == 1:
                accept = acceptFile(x, fileType)
            if accept == 1:
                print fullPath + ' IS A FILE & WILL BE ARCHIVED'
                filesToArchive.append(fullPath)
                # adjust archive period boundaries if necessary
                if timeModified < startOfPeriod:
                    startOfPeriod   = timeModified
                if timeModified > endOfPeriod:
                    endOfPeriod     = timeModified
            else:
                print fullPath + ' DOES NOT MATCH THE SUPPLIED FILE TYPE'
    else:
        print fullPath + ' IS WITHIN AGE LIMIT'

if len(filesToArchive) > 0:
    # create the (archive)zip file, write relevant (compressed)
    # files out to it, then delete the original files
    arcFileName = makeArchiveFilename(startOfPeriod, endOfPeriod)
    archiveFile = ZipFile(archiveDir + '\\' + arcFileName, 'w', ZIP_DEFLATED)
    print 'CREATED ARCHIVE FILE : ' + arcFileName

    try:
        for x in filesToArchive:
            print 'WRITING ' + x + ' TO ARCHIVE FILE'
            archiveFile.write(x)
            remove(x)
    finally:
        archiveFile.close()

FileArchiver.py