tarlib.py prototype

Steven D. Majewski (sdm7g@elvis.med.virginia.edu)
Fri, 17 Dec 1993 18:33:02 -0500

I *may* grab some time over the next couple of weeks to work on
finishing up some of my current python projects, OR I may just
manage to avoid computers entirely for a while. ( Ha! I admit
it - I'm an adict - even if I have to get my fix at 1200 baud!)

[ Or maybe I'll go all the way, and borrow a PC or a Mac from
the lab and work on porting some python modules to one of
those. ]

But, just in case I don't get a chance to do much with it, I'll
post a teaser now.

This is a read-only prototype of a tarlib, with a TarHdr class
( which parses a raw tar header from disk ) and a TarFile
class, which is a sequence like representation of a tar file.

Right now, it's read-only: you can't do much besides list the
contents of a file or browse the file contents with:
'print TAR[n].file[:1000] ' for example, to print the first
1000 chars of file 'n' . Lot's of changes and additions are
in the works: this is one end of the remote-ftp-server-directly
-to-tape backup program I mentioned a while ago. As with the
additions to ftplib, most of the effort has been in trying to
get it to handle variations of 'ustar', old-(bsd?)-tar, gnu-tar
( extended ustar ) and pax ( ustar ) formats, ( or in the ftp
case, trying to find the common denominator of several common
but not strictly conforming ftp servers. )

The next version should support writing tar files, extracting
tar files to disk, and a tar file browser ( with options for
the file itself to be cached in memory, or to be loaded on
reference by file + seek-offset ). [ And some mac-bin extensions
to handle Macintosh files ... I'm about to post to the Mac
news groups to see if anyone has already made some tar extensions,
before I invent my own. ]

This provides another example of a sequence-like class, and
an example of tuple-driven processing in the parse method.

Guido and I have discussed trying to provide several generically
filesystem-like class-wrapper views of ftp-servers, tar-files,
mail files, and imap and nntp servers, as well as native filesystems,
which would provide common methods: chdir(), open(), read(),
namelist(), etc. upon which to build a common browser.
( besides the half-finished ftp extensions, I have a very rudimentary
start on an IMAP server class, and some nntp extensions. )

I hope I can provide something a bit more polished sometime after
Python 1.0 gets posted! ( I expect to use a couple of the new
feature that Guido has promised! )

- Steve Majewski (804-982-0831) <sdm7g@Virginia.EDU>
- UVA Department of Molecular Physiology and Biological Physics

-------------------------
#!/usr/local/bin/python
#
# Most of these 'defines' ( and their comments ) have been
# converted from gnutar's tar.h include file.
#
_NULL = '\000'
_NAMSIZ = 100
_USIZE = 32
_RECSIZE = 512

_LF_OLDNORMAL = '\0' ## Normal disk file, Unix compat
_LF_NORMAL = '0' ## Normal disk file
_LF_LINK = '1' ## Link to previously dumped file
_LF_SYMLINK = '2' ## Symbolic link
_LF_CHR = '3' ## Character special file
_LF_BLK = '4' ## Block special file
_LF_DIR = '5' ## Directory
_LF_FIFO = '6' ## FIFO special file
_LF_CONTIG = '7' ## Contiguous file

## Further link types may be defined later.
##
## Note that the standards committee allows only capital A through
## capital Z for user-defined expansion. This means that defining something
## as, say '8' is a *bad* idea.

_LF_DUMPDIR = 'D' ## This is a dir entry that contains
## the names of files that were in the
## dir at the time the dump was made
_LF_LONGLINK = 'K' ## Identifies the NEXT file on the tape
## as having a long linkname
_LF_LONGNAME = 'L' ## Identifies the NEXT file on the tape
## as having a long name.
_LF_MULTIVOL = 'M' ## This is the continuation
## of a file that began on another
## volume
_LF_NAMES = 'N' ## For storing filenames that didn't
## fit in 100 characters
_LF_SPARSE = 'S' ## This is for sparse files
_LF_VOLHDR = 'V' ## This file is a tape/volume header

# The first character of text description is used for verbose directory listings
entry_types = { _LF_OLDNORMAL:' File',
_LF_NORMAL:'- normal file',
_LF_LINK:'- link',
_LF_SYMLINK:'l symbolic link',
_LF_CHR:'character special file',
_LF_BLK:'block special file',
_LF_DIR:'directory',
_LF_FIFO:'pipe/FIFO special file',
_LF_CONTIG:'contiguous file',
_LF_DUMPDIR:'dumpdir listing',
_LF_LONGLINK:'?Next file on tape has a long linkname',
_LF_LONGNAME:'?Next file on the tape has a longname',
_LF_MULTIVOL:'+Continuation of tape volume',
_LF_NAMES:'?Long filename',
_LF_SPARSE:'- Sparse File',
_LF_VOLHDR:'Volume/Tape Header' }

import time
from string import ljust,rjust,strip

_map = ( ( 'name', _NAMSIZ, 's' ),
( 'mode',8, 'o'), ('uid',8, 'o' ), ('gid',8, 'o' ),
( 'size', 12, 'o' ), ( 'mtime',12, 'o'), ('chksum',8, 'o'),
('linkflag',1, 's'),
( 'linkname', _NAMSIZ, 's' ),
( 'magic',8, 's'),
('uname',32,'s'), ('gname',32,'s'),
( 'devmaj',8,'o'), ('devmin',8,'o'),
( 'atime', 12, 'o' ), ( 'ctime', 12, 'o' ),
( 'offset', 12, 'o' ) )

def _zerostrip( s ):
news = ''
for c in s :
if c <> _NULL : news = news + c
return news

# This class defines a tar file header
class TarHdr:
def __init__( self, *hdrstr ):
if hdrstr : self.parse( hdrstr[0] )
def parse( self, h ):
if not h or not _zerostrip( h ) :
raise EOFError, 'Null Header'
i = j = 0
for item in _map:
i,j = j, j+item[1]
if item[-1] == 'o' :
val = strip(_zerostrip( h[i:j] ))
elif item[1] == 1 :
val = h[i:j]
else: val = _zerostrip( h[i:j] )
if item[-1] == 'o' :
val = '0'+val
try:
val = eval( val )
except OverflowError:
val = eval( val+'L' )
setattr( self, item[0], val )
def filetype( self ):
return entry_types[self.linkflag]
def _modestr( self ):
rwx = 'rwxrwxrwx'
str = ''
for i in range(len(rwx)):
bit = self.mode & ( 1 << i )
if bit : str = rwx[-(i+1)] + str
else: str = '-' + str
return str
def modestr( self ):
return entry_types[self.linkflag][0] + self._modestr()
def user( self ):
if hasattr( self, 'uname' ) and self.uname : return self.uname
else: return repr(self.uid)
def group( self ):
if hasattr( self, 'gname' ) and self.gname : return self.gname
else: return repr(self.gid)
def skip( self ):
blocks, extra = (self.size/_RECSIZE),(self.size % _RECSIZE)
if extra : blocks = blocks + 1
return blocks * _RECSIZE
def ugfmt( self, w ):
w = (w - 1) / 2
return rjust(self.user(),w)+ '/' + ljust(self.group(),w)
def string( self ):
timestr = time.ctime(self.mtime)
timestr = timestr[4:11] + timestr[-2:]
ug = ' [' + self.ugfmt(13) + ']'
return self.modestr() + ' ' + ljust(self.name, 35) + timestr + ug + rjust(repr(self.size),6)
def __repr__( self ):
return self.string()
#
# class TarFile is a sequence just a sequence of tar headers + the file
# contents and/or references to the (non-memory-resident) file contents.
#
class TarFile:
def __init__( self, *args ):
self.list = []
if args : _ = self.readfrom( args[0] )
def readfrom( self, file ):
self.file = file
while 'True' :
hdr = file.read( 512 )
if not hdr : return self
try:
HDR = TarHdr( hdr )
except EOFError:
return self
try:
HDR.where = file.tell()
except IOError:
HDR.where = None
HDR.file = ''
if HDR.size: HDR.file = file.read( HDR.size )
excess = HDR.skip() - HDR.size
if excess: _ = file.read( excess )
self.list.append( HDR )
def __len__( self ):
return len(self.list)
def __getitem__( self, i ):
return self.list[i]
def dir( self ):
print repr(self)
for H in self.list :
print H.string()
def ls( self ):
for H in self.list :
print H.name
def __repr__( self ):
return '< TarFile: ' + repr(self.file)[1:-1] + ' >'

def list( file ):
if type(file) == type(''):
file = open( file, 'r' )
TAR = TarFile( file )
TAR.dir()


import sys
if sys.argv[1:] :
for afile in sys.argv[1:]:
if afile == '-' : list( sys.stdin )
else: list( afile )