Here is the problem:
A file ( a map of a city ) of about 6300 lines of the following type
33595 6233.1 -3250.2 43040 0 33609 0
has to be processed. The "ID" fields 0,3,4,5,6 have to be
remapped into 1..6300 ; this needs a dictionary-type object
since the ID fields are within a large range.
It has to produce a file of about 31500 lines with some computing
of distances(floating point) of IDs.
On my machine (SGI R4000SC) (not one of the slowest) this problem
takes 258 seconds CPU time.
The same problem is solved with MAWK in 9.48 seconds!
Or is my (first) Python program terribly inefficiently programmed?
Here it is:
#! /usr/people/jarausch/bin/python
import sys,os
from string import atoi,atof,split
from math import sqrt
def dist(x1,y1,x2,y2) :
dx=x2-x1; dy=y2-y1
return sqrt(dx*dx+dy*dy)
inp=open(sys.argv[1],'r')
STDOUT=sys.stdout
sys.stdout=sys.stderr
MinX=input("MinX= ")
MaxX=input("MaxX= ")
MinY=input("MinY= ")
MaxY=input("MaxY= ")
Line=inp.readline()
Node=1
NODE={}
NODE[0]=0
X={}
Y={}
while len(Line) > 0 :
Line=split(Line)
Id=atoi(Line[0])
X0=atof(Line[1])
Y0=atof(Line[2])
if X0 >= MinX and X0 <= MaxX and Y0 >= MinY and Y0 <= MaxY :
NODE[Id]= Node
X[Node]= X0
Y[Node]= Y0
Node=Node+1
else:
NODE[Id]= 0
Line=inp.readline()
inp.seek(0,0)
print "Number of Nodes=",Node-1
sys.stdout=STDOUT
Line=inp.readline()
try :
while len(Line) > 0 :
Line=split(Line)
Id=atoi(Line[0])
NN=NODE[Id]
if NN > 0 :
print NN, round(atof(Line[1])), round(atof(Line[2]))
NB= NODE[atoi(Line[3])]
if NB > 0 :
Dist= dist(X[NN],Y[NN],X[NB],Y[NB])
print NB, round(Dist)
else:
print "0 0"
NB= NODE[atoi(Line[4])]
if NB > 0 :
Dist= dist(X[NN],Y[NN],X[NB],Y[NB])
print NB, round(Dist)
else:
print "0 0"
NB= NODE[atoi(Line[5])]
if NB > 0 :
Dist= dist(X[NN],Y[NN],X[NB],Y[NB])
print NB, round(Dist)
else:
print "0 0"
NB= NODE[atoi(Line[6])]
if NB > 0 :
Dist= dist(X[NN],Y[NN],X[NB],Y[NB])
print NB, round(Dist)
else:
print "0 0"
#
Line=inp.readline()
#
finally :
print Line
inp.close()
---------------------------------------------------------------
And this is the MAWK programm
BEGIN{ Node= 1; FILE= ARGV[1]
if ( MinX == "" )
{ printf "MinX= " > "/dev/stderr"; getline MinX < "-"
printf "MaxX= " > "/dev/stderr"; getline MaxX < "-"
printf "MinY= " > "/dev/stderr"; getline MinY < "-"
printf "MaxY= " > "/dev/stderr"; getline MaxY < "-"
}
OK= getline < FILE
while ( OK == 1 )
{ X0= $2; Y0=$3;
if ( X0 >= MinX && X0 <= MaxX && Y0 >= MinY && Y0 <= MaxY )
{ NODE[$1]= Node;
X[Node]= $2; Y[Node]=$3;
Node++;
}
else NODE[$1]= 0;
OK= getline < FILE;
}
close(FILE);
print Node-1
}
{ if ( NF < 7 )
{ print "*** too few fields: " $0 }
NN= NODE[$1];
if ( NN > 0 )
{ print NN, Round($2), Round($3)
if ( $4 > 0 && ! ($4 in NODE) )
{ print $4 " illegal in " $0 }
NB= NODE[$4];
if ( NB > 0 )
{ Dist= sqrt( (X[NN]-X[NB])^2 + (Y[NN]-Y[NB])^2 );
print NB, int(Dist+0.5)
}
else print "0 0"
if ( $5 > 0 && ! ($5 in NODE) )
{ print $5 " illegal in " $0 }
NB= NODE[$5];
if ( NB > 0 )
{ Dist= sqrt( (X[NN]-X[NB])^2 + (Y[NN]-Y[NB])^2 );
print NB, int(Dist+0.5)
}
else print "0 0"
if ( $6 > 0 && ! ($6 in NODE) )
{ print $6 " illegal in " $0 }
NB= NODE[$6];
if ( NB > 0 )
{ Dist= sqrt( (X[NN]-X[NB])^2 + (Y[NN]-Y[NB])^2 );
print NB, int(Dist+0.5)
}
else print "0 0"
if ( $7 > 0 && ! ($7 in NODE) )
{ print $7 " illegal in " $0 }
NB= NODE[$7];
if ( NB > 0 )
{ Dist= sqrt( (X[NN]-X[NB])^2 + (Y[NN]-Y[NB])^2 );
print NB, int(Dist+0.5)
}
else print "0 0"
}
}
function Round(x)
{ if ( x >= 0 ) return int(x+0.5);
else return int(x-0.5);
}