WWW statistics using python (2/3)

Paul Sijben (sijben@cs.utwente.nl)
Fri, 1 Jul 1994 10:11:34 GMT

Of course you will want to change (s)services and some parts of the WWW page
generation....

stat.py:
--------------
#!/usr/local/bin/python
# Author: Paul Sijben (sijben@pegasus.esprit.ec.org)
# Date: 1 july 1994
# version 1.0
import sys
import posix
from string import *
from time import *
from HTMLsupport import *
services=['refdbms', 'pgp', 'campus', 'weather', 'badge',\
'finger', 'people','others','default','pegasus.gif']
sservices=['refdbms', 'pgp', 'campus', 'weather', 'finger?ab.',\
'finger?', 'people','~','default','pegasus.gif']

def spc(i):
a=""
for i in range(i):
a=a+' '
return a

def swapfields(item):
if item[0] in digits:
return item
a=splitfields(item,'.')
l=len(a)/2 #int!!!!
for i in range(l):
b=a[i]
a[i]=a[-(1+i)]
a[-(1+i)]=b
return joinfields(a,'.')

def revsort(list):
for i in range(len(list)):
list[i]=swapfields(list[i])
list.sort()
for i in range(len(list)):
list[i]=swapfields(list[i])

def checkdate():
if len(sys.argv)>2 and sys.argv[2][0] in digits:
return atoi(sys.argv[2])
else:
return 0

def ranges(n,m,dayrange,monthrange):
tijd=time()-m*60*60*24
for i in range(-n+1,1,1):
datum=splitfields(asctime(localtime(tijd+60*60*24*i))," ")
if datum[2]!="":
dayrange.append(datum[2])
else:
dayrange.append(datum[3])
monthrange.append(datum[1])
return datum[-1]

def parsecommandline(dayrange,monthrange):
a=sys.argv
if len (a)==1:
return ranges(1,0,dayrange,monthrange)
else:
if a[1]=="-w":
return ranges(7,checkdate(),dayrange,monthrange)
elif a[1]=="-d":
return ranges(1,checkdate(),dayrange,monthrange)
elif a[1]=="-h":
print "Usage: -w|-d [<number>] [-g <pathname>]",\
"[-f {<filename>}]"
print " -w : present seven days "
print " -d : one day only"
print " <number> : negative offset; -d 1 means present yesterday's connections"
print " -g : generate WWW pages for the report"
print " -f : input filenames;"
print " when -f is ommitted stdin is used"
print " .gz files are automatically uncpompressed."
sys.exit(0)

def readandcheck(fd,regels,dayrange,monthrange):
while 1:
regel=fd.readline()
if len(regel)<1:
break
else:
tmp=splitfields(regel," ")
if tmp[2] in monthrange and tmp[3] in dayrange:
regels.append(tmp)

def readinput(regels,dayrange,monthrange):
i=2
files=0
while i<len(sys.argv):
if files==1:
fn=sys.argv[i]
if fn[-3:]==".gz":
fd=posix.popen('cat '+fn+'| gunzip','r')
else:
fd=open(fn,'r')
readandcheck(fd,regels,dayrange,monthrange)
if sys.argv[i]=="-f":
files=1
i=i+1
if files==0:
readandcheck(sys.stdin,regels,dayrange,monthrange)

def top(n,fromlist):
l=[]
for i in range(n):
l.append("",0)
for i in fromlist.keys():
aant=fromlist[i][-1]
j=10
gev=0
while j>0 and aant>l[j-1][1]:
gev=1
j=j-1
if gev==1:
l.remove(l[-1])
l.insert(j,(i,aant))
return l

def NewWWWpage(aantallen,fromlist ,dayrange,monthrange,year):
names=['Refdbms service', 'PGP docuentation', \
'Campus information pages', 'Weather images',\
'Badge system', 'Fingers on persons directly',\
'Private pages of people', 'Others']
fn="report."+dayrange[0]+"."+monthrange[0]+".-."+\
dayrange[-1]+"."+monthrange[-1]+"."+year+".html"
fd=open(fn,"w")
TITLE(fd,"Pegasus WWWserver report")
str="Actions on the server "
if len(dayrange)==1:
str=str+"on "+dayrange[0]+" "+monthrange[0]+" "+year
else:
str=str+"during the week from <P>"+dayrange[0]+" "
if monthrange[0]!= monthrange[-1]:
str=str+monthrange[0]+" "
str=str+"to "+dayrange[-1]+" "+monthrange[-1]+" "+year
H1(fd,str)
PHR(fd)
H2(fd,"This is the list of number of usages of the server per type:")
P(fd)
str=""
tot=0
for i in range(len(names)-1):
tot=tot+aantallen[i]
str=str+names[i]+":"+spc(30-len(names[i]))+\
rjust(`aantallen[i]`,4)+"\n"
res=aantallen[-1]+aantallen[-2]+aantallen[-3]
tot=tot+res
str=str+names[-1]+":"+spc(30-len(names[-1]))+rjust(`res`,4)+"\n"
PRE(fd,str)
P(fd)
fd.write("There have been <B>"+`tot`+"</B> accesses to the server.\n")
P(fd)
PHR(fd)
H2(fd,"Of these accesses the top 10 using domains were:")
P(fd)
str=""
l=top(10,fromlist)
for i in l:
if i[1]!=0:
str=str+i[0]+":"+spc(30-len(i[0]))+rjust(`i[1]`,4)+"\n"
PRE(fd,str)
P(fd)
A(fd,"HREF=\"http:../index.html\"","Paul Sijben.")
fd.close()

def NewTopPage():
tab={"Jan":"01","Feb":"02","Mar":"03","Apr":"04","May":"05","Jun":"06",\
"Jul":"07","Aug":"08","Sep":"09","Oct":"10","Nov":"11","Dec":"12"}
directory=posix.listdir(".")
l={}
for i in directory:
a=splitfields(i,".")
if a[0]=='report':
blub=a[-4]
if len(blub)==1:
blub='0'+blub
digest=a[-2]+tab[a[-3]]+blub
title=joinfields(a[1:7],' ')
l[digest]=(title,i)
k=l.keys()
k.sort()
fd=open("index.html","w")
TITLE(fd,"Usage reports")
IMG(fd,"../../../pegasus.gif")
H1(fd,"Reports on the usage of the Pegasus WWW server")

P(fd)
fd.write("We keep track of usage of this WWW server.")
fd.write(" The summarised reports are automatically placed here.")
P(fd)
fd.write("Click ")
A(fd,"HREF=\"http:./"+l[k[-1]][1]+"\"","here")
fd.write(" for the latest report.")
PHR(fd)
H2(fd,"Click on one of the date ranges to see the usage report of that week.")
fd.write("<UL>")
for i in k:
fd.write("<LI>")
A(fd,"HREF=\"http:./"+l[i][1]+"\"",l[i][0])
#P(fd)
fd.write("\n")
fd.write("</UL>")
PHR(fd)
A(fd,"HREF=\"http:../index.html\"","Paul Sijben.")
fd.close()

def MinG():
p=""
for i in range(len(sys.argv)):
if sys.argv[i]=="-g":
if len(sys.argv)>i+1:
p=sys.argv[i+1]
posix.chdir(p)
else:
print "please specify a valid pathname"
sys.exit(0)
return p

def GenReport(aantallen,fromlist ,dayrange,monthrange,year):
p=MinG()
if p!="":
NewWWWpage(aantallen,fromlist ,dayrange,monthrange,year)
NewTopPage()

def printoutput(aantallen,fromlist ,dayrange,monthrange):
print "Overview of connections made to the pegasus WWW server made",
if len(dayrange)==1:
print "on",dayrange[0], monthrange[0]
else:
print
print "during the week from",dayrange[0],
if monthrange[0]!= monthrange[-1]:
print monthrange[0],
print "to",dayrange[-1], monthrange[-1]
for i in range(7):
print spc(27),
for j in range(len(services)+1):
if j==len(services):
stringje="TOTALS"
print "| ",
else:
stringje=services[j]
if len(stringje)>i:
print stringje[i],spc(1),
else:
print spc(3),
print
print "--------------------------------------------------------------------+-------"

lijst=fromlist.keys()
revsort(lijst)
for i in lijst:
print i, spc(25-len(i)),
for j in range(len(aantallen)):
print center(`fromlist[i][j]`,3),
print " |",rjust(`fromlist[i][-1]`,3)


print "--------------------------------------------------------------------+-------"
print "TOTALS",spc(19),
tot=0
for j in range(len(aantallen)):
print center(`aantallen[j]`,3),
tot=tot+aantallen[j]
print " |",rjust(`tot`,3)

def aggregate(tolist,fromlist,aantallen,regels):
for i in range(len(regels)):
tmp=regels[i]
van=tmp[0]
if len(splitfields(van,'.'))>2 and van[0] in letters:
van=van[index(van,'.')+1:]
if fromlist.has_key(van)==0:
fromlist[van]=([0,0,0,0,0,0,0,0,0,0,0])
page=tmp[7]
if tolist.has_key(page):
tolist[page]=tolist[page]+1
else:
tolist[page]=1
for j in range(10):
if find(page,sservices[j])!=-1:
if j==7:
k=6
else:
k=j
break
else:
k=7
aantallen[k]=aantallen[k]+1
fromlist[van][k]=fromlist[van][k]+1
fromlist[van][-1]=fromlist[van][-1]+1

def main():
dayrange=[]
monthrange=[]
year=parsecommandline(dayrange,monthrange)
regels=[]
readinput(regels,dayrange,monthrange)
fromlist={}
tolist={}
aantallen=[0,0,0,0,0,0,0,0,0,0]
aggregate(tolist,fromlist,aantallen,regels)
GenReport(aantallen,fromlist ,dayrange,monthrange,year)
printoutput(aantallen,fromlist ,dayrange,monthrange)

main()