#!/usr/bin/env python
import os,sys,time,string,shutil
from pyPgSQL import PgSQL

# requires gzip, bzip2, wget, find, rm, sloccount and pyPgSQL

DSN = "localhost:5432:survey:nobody"
SCRATCH = "/tmp"
WGET = "/usr/local/bin/wget"
SLOCCOUNT = "/usr/local/bin/sloccount"
SLOCDIR = SCRATCH + "/.sloccount"
MURDER = [ "mkinstalldirs",
           "configure",
           "install-sh",
           "depcomp",
           "config.sub",
           "config.guess",
           "aclocal.m4",
           "config.rpath",
           "acinclude.m4",
           "missing",
           "ltmain.sh",
           "ltconfig.sh",
           "\\*~",
           "ltconfig",
           "libtool",
           "config.status",
           "debianrules",
           "old-ltconfig",
           "ylwrap",
           "old-ltcf\*.sh",
           "am_edit",
           "am_edit.py",
           "ltcf\*.sh",
           "conf_change.pl",
           "conf.change.pl",
           "cvs-clean.pl",
           "cvs.sh",
           "old-ltmain.sh",
           "new-ltmain.sh",
        ] # Makefile.in is handled specially

def d(s):
    sys.stderr.write(s+"\n")

# Fix path so sloccount doesn't barf on dependencies
os.putenv("PATH", "/usr/bin:/usr/local/bin:/usr/sbin:/bin:.")

if not len(sys.argv) > 2:
    raise TypeError, "Usage: process_tarball.py <project id> <tarball>"

project = int(sys.argv[1])
url = sys.argv[2]

os.chdir(SCRATCH)

if url.find("http://") != -1 or url.find("ftp://") != -1:
    file = os.path.basename(url)
    if os.path.exists(file):
        os.unlink(file)
    os.system("%s -P %s '%s'" % (WGET, SCRATCH, url))
else: # specified tarball on commandline
    if not os.path.exists(url):
        raise TypeError, "File %s not found in %s" % (url, os.getcwd())
    # need to see if I want to copy the file here or not
    file = os.path.basename(url)
    if not os.path.exists(file):
        shutil.copyfile(url, file)

if not os.path.exists(file):
    raise TypeError, "File %s not downloaded properly into %s" % (file, os.getcwd())

print "-" * 72

# Fix up type to gzipped gnu tar 
t = os.popen("file %s" % file).read()
if t.find("gzip") != -1:
    pass
elif t.find("bzip") != -1:
    # convert to gzip so we have the same compression format to avoid
    # skewing tarball size
    os.system("bzip2 -d %s ; gzip %s" % (file, file[:-4]) )
    file = file[:-4] + ".gz"
elif t.find("Zip") != -1:
    stub = file.split(".")[:-1]
    stub = ".".join(stub)
    os.system("unzip -o -d %s %s" % (stub, file))
    os.system("cd %s; find . -type d -exec chmod +x {} \;" % stub)
    os.system("chmod -R u+w,a+r %s" % stub)
    os.system("tar cvfz %s.tar.gz %s" % (stub,stub))
    os.unlink(file)
    file = stub + ".tar.gz"
elif t.find("GNU tar") != -1:
    os.system("gzip %s" % file)
    file = file + ".gz"
else:
    raise TypeError, "File %s must be a .tar.gz file" % file

# get size of tarball
tarsize = os.path.getsize(file)

# open tarball and try and guess what path it blows up to.
# this fails miserably when there is no top-level dir in the tarball
f = os.popen("tar xvfz %s" % file)
path = f.readline()
while not path or path.strip() == ".":
    path = f.readline()
if path[0] == "/":
    path = path[1:]
while path[:2] == "./":
    path = path[2:]
path = path.split(os.sep)[0]
print "Using path %s" % path
f.close()
if path[0] == "/":
    path = path[1:]
# deal with a path that ends with \n
if path[-1] == "\n":
    path = path[:-1]
# deal with path that doesnt end with /
if path.find("/") == -1:
    path = path + "/"
path = os.path.split(path)[:-1]
path = os.path.join(*path)

os.chdir(path)

# kill files that are in our murder list (autogenerated stuff)
for file in MURDER:
    os.system("find . -type f -name %s -exec rm {} \;" % file)

if os.popen("find . -name Makefile.am").read():
    os.system("find . -type f -name %s -exec rm {} \;" % "Makefile.in")

# catch config.pl if it exists
os.system("rm -f admin/config.pl")
os.system("rm -f debian/rules")

# sloccount
if not os.path.exists(SLOCDIR):
    os.mkdir(SLOCDIR)
s = os.popen("%s --datadir %s *" % (SLOCCOUNT, SLOCDIR))
slocdata = s.readlines()
out = 1
sloc = []
for line in slocdata:
    if not line.strip():
        out = 1
        continue
    if line.find("Totals grouped") != -1:
        out = 0
        continue
    if out:
        continue
    linedata = line.split()
    sloc.append((linedata[0].split(":")[0], int(linedata[1])))

# latest modified file
filelist = os.popen("find . -type f").readlines()
filelist = map(lambda x: x[:-1], filelist)
l = map(lambda x: (x, os.path.getmtime(x)), filelist)
l.sort(lambda x,y: cmp(y[1], x[1]))
# after sorting, the latest file is the first one in the list
newest = l[0][0]
print "Newest file is %s" % newest
mtime = os.path.getmtime(newest)
mdate = time.strftime("%Y%m%d", time.gmtime(mtime))

os.chdir("..")
os.system("rm -rf %s" % path)

# mtime, mdate
# sloc
# tarsize
print mtime
print tarsize
print sloc
print

# insert crap into database
db = PgSQL.connect(DSN)
st = db.cursor()
qstr = "UPDATE surveys SET tartime = to_timestamp(%s, 'YYYYMMDD'), tarsize = %d WHERE project = %d" % (mdate, tarsize, project)
d(qstr)
st.execute(qstr)
st.execute("SELECT project FROM slocs WHERE project = %d" % project)
if st.fetchone():
    st.execute("DELETE FROM slocs WHERE project = %d" % project)
for item in sloc:
    qstr = "INSERT INTO slocs VALUES ( %d, '%s', %d )" % (project, item[0], item[1])
    d(qstr)
    st.execute(qstr)
st.execute("COMMIT")
st.close()
db.close()
print "Insertion OK"
print "-" * 72
