From: kajiyama at grad.sccs.chukyo-u.ac.jp (Tamito Kajiyama) Date: 29 Apr 1999 21:40:56 GMT Subject: Python implementation of tar In-Reply-To: frankmcgeough@my-dejanews.com's message of Sat, 24 Apr 1999 23:20:22 GMT References: <7ftjjl$tim$1@nnrp1.dejanews.com> Message-ID: Content-Length: 4607 X-UID: 170 frankmcgeough at my-dejanews.com writes: | | Does someone has a tar implementation written in Python? The attached is one that I wrote last year. It can extract files from archives, but cannot create an archive. Feel free to use, modify, and redistribute the module. Comments and improvement are appreciated. Regards, KAJIYAMA, Tamito ----------snip----------snip----------snip---------- #!/usr/bin/env python # # Tar.py - handles tape archive (tar) files # written by Tamito KAJIYAMA <18 November 1998> # import string BLOCKSIZE = 512 def trim_null(v): return v[:string.find(v, '\000')] def oct2int(v): return eval('0' + string.strip(v)) def int2int(v): return int(string.strip(v)) class SubfileHeader: def __init__(self, hblock, tar_name, base): self.tar_name = tar_name self.base = base self.name = trim_null(hblock[0:100]) self.mode = hblock[100:108-2] self.uid = oct2int(hblock[108:116-2]) self.gid = oct2int(hblock[116:124-2]) self.size = oct2int(hblock[124:136]) self.mtime = oct2int(hblock[136:148]) self.checksum = int2int(hblock[148:156-2]) self.linkflag = hblock[156] self.linkname = trim_null(hblock[157:256]) if self.size % BLOCKSIZE == 0: self.size_in_block = self.size / BLOCKSIZE else: self.size_in_block = self.size / BLOCKSIZE + 1 class Subfile: def __init__(self, header): self.base = header.base self.size = header.size self.file = open(header.tar_name, 'r') self.file.seek(header.base, 0) self.header = header def fileno(self): return self.file.fileno() def seek(self, offset, whence=0): if whence == 0: pass elif whence == 1: offset = self.tell() + offset elif whence == 2: offset = self.size - offset else: raise IOError, (22, 'Invalid argument') if offset < 0: offset = 0 elif offset > self.size: offset = self.size self.file.seek(self.base + offset, 0) def tell(self): return self.file.tell() - self.base def read(self, length=None): remain = self.size - self.tell() if remain <= 0: return '' elif length and length < remain: return self.file.read(length) else: return self.file.read(remain) def readline(self, length=None): remain = self.size - self.tell() if remain <= 0: return '' elif length and length < remain: return self.file.readline(length) else: return self.file.readline(remain) def readlines(self): lines = [] while 1: line = self.readline() if not line: break lines.append(line) return lines class Tar: def __init__(self, filename): file = open(filename, 'r') self.headers = [] while 1: # read subfile header hblock = file.read(BLOCKSIZE) if hblock[0] == '\000': break header = SubfileHeader(hblock, filename, file.tell()) self.headers.append(header) # skip subfile body file.read(BLOCKSIZE * header.size_in_block) file.close() def list(self): return self.headers def retrieve(self, name): for header in self.headers: if header.name == name: return Subfile(header) else: return None def test(): import os, sys, time if len(sys.argv) == 2: tar = Tar(sys.argv[1]) for header in tar.list(): print ' name:', header.name print ' size:', header.size, 'bytes' print 'mtime:', time.ctime(header.mtime) print elif len(sys.argv) > 2: tar = Tar(sys.argv[1]) for filename in sys.argv[2:]: file = tar.retrieve(filename) if file: outfile = open(file.header.name, 'w') outfile.write(file.read()) outfile.close() print 'wrote', file.header.name else: print filename, 'not found' else: print 'Usage: %s filename.tar [filename ...]' % \ os.path.basename(sys.argv[0]) if __name__ == '__main__': test() ----------snip----------snip----------snip---------- -- KAJIYAMA, Tamito