root/cdat/trunk/Packages/cdms/Lib/cdurllib.py

Revision 2109, 5.5 kB (checked in by dubois, 7 years ago)

Rearrange repository; enable separate tarballs.

cdat/Tars -> exsrc
cdat/Packages -> Packages

Line 
1 """Customized URLopener"""
2
3 import urllib, getpass, socket, string, sys
4
5 MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
6
7 class CDURLopener(urllib.URLopener):
8
9     def __init__(self, proxies=None):
10         urllib.URLopener.__init__(self,proxies)
11         self._userObject = None
12
13     # Attach an object to be returned with callbacks
14     def setUserObject(self, userObject):
15         self._userObject = userObject
16
17     # Use FTP protocol
18     def open_ftp(self, url):
19         host, path = urllib.splithost(url)
20         if not host: raise IOError, ('ftp error', 'no host given')
21         host, port = urllib.splitport(host)
22         user, host = urllib.splituser(host)
23         # if user: user, passwd = splitpasswd(user)
24         if user: passwd = getpass.getpass()
25         else: passwd = None
26         host = urllib.unquote(host)
27         user = urllib.unquote(user or '')
28         passwd = urllib.unquote(passwd or '')
29         host = socket.gethostbyname(host)
30         if not port:
31             import ftplib
32             port = ftplib.FTP_PORT
33         else:
34             port = int(port)
35         path, attrs = urllib.splitattr(path)
36         path = urllib.unquote(path)
37         dirs = string.splitfields(path, '/')
38         dirs, file = dirs[:-1], dirs[-1]
39         if dirs and not dirs[0]: dirs = dirs[1:]
40         key = (user, host, port, string.joinfields(dirs, '/'))
41         # XXX thread unsafe!
42         if len(self.ftpcache) > MAXFTPCACHE:
43             # Prune the cache, rather arbitrarily
44             for k in self.ftpcache.keys():
45                 if k != key:
46                     v = self.ftpcache[k]
47                     del self.ftpcache[k]
48                     v.close()
49         try:
50             if not self.ftpcache.has_key(key):
51                 print 'Creating ftpwrapper: ',user,host,port,dirs
52                 self.ftpcache[key] = \
53                     urllib.ftpwrapper(user, passwd, host, port, dirs)
54             if not file: type = 'D'
55             else: type = 'I'
56             for attr in attrs:
57                 attr, value = urllib.splitvalue(attr)
58                 if string.lower(attr) == 'type' and \
59                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
60                     type = string.upper(value)
61             (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
62             if retrlen is not None and retrlen >= 0:
63                 import mimetools, StringIO
64                 headers = mimetools.Message(StringIO.StringIO(
65                     'Content-Length: %d\n' % retrlen))
66             else:
67                 headers = noheaders()
68             return urllib.addinfourl(fp, headers, "ftp:" + url)
69         except urllib.ftperrors(), msg:
70             raise IOError, ('ftp error', msg), sys.exc_info()[2]
71
72     def retrieve(self, url, filename=None, reporthook=None, blocksize=262144):
73         url = urllib.unwrap(url)
74         if self.tempcache and self.tempcache.has_key(url):
75             return self.tempcache[url]
76         type, url1 = urllib.splittype(url)
77         if not filename and (not type or type == 'file'):
78             try:
79                 fp = self.open_local_file(url1)
80                 hdrs = fp.info()
81                 del fp
82                 return url2pathname(urllib.splithost(url1)[1]), hdrs
83             except IOError, msg:
84                 pass
85         fp = self.open(url)
86         headers = fp.info()
87         if not filename:
88             import tempfile
89             garbage, path = urllib.splittype(url)
90             garbage, path = urllib.splithost(path or "")
91             path, garbage = urllib.splitquery(path or "")
92             path, garbage = urllib.splitattr(path or "")
93             suffix = os.path.splitext(path)[1]
94             filename = tempfile.mktemp(suffix)
95             self.__tempfiles.append(filename)
96         result = filename, headers
97         if self.tempcache is not None:
98             self.tempcache[url] = result
99         tfp = open(filename, 'wb')
100         bs = blocksize
101         size = -1
102         blocknum = 1
103         if reporthook:
104             if headers.has_key("content-length"):
105                 size = int(headers["Content-Length"])
106             stayopen = reporthook(0, bs, size, self._userObject)
107             if stayopen==0:
108                 raise KeyboardInterrupt
109         bytesread = 0
110         block = fp.read(bs)
111         if reporthook:
112             stayopen = reporthook(1, bs, size, self._userObject)
113             if stayopen==0:
114                 raise KeyboardInterrupt
115         while block:
116             tfp.write(block)
117             bytesread = bytesread + len(block)
118 ##             print blocknum, bytesread, size,
119 ##             if blocknum*blocksize!=bytesread:
120 ##                 print ' (*)'
121 ##             else:
122 ##                 print
123             if block and reporthook:
124                 stayopen = reporthook(blocknum, bs, size, self._userObject)
125                 if stayopen==0:
126                     raise KeyboardInterrupt
127             blocknum = blocknum + 1
128             block = fp.read(bs)
129         # fp.close()
130         tfp.close()
131         del fp
132         del tfp
133         return result
134
135 def sampleReportHook(blocknum, blocksize, size, userObj):
136     sizekb = size/1024
137     percent = min(100,int(100.0*float(blocknum*blocksize)/float(size)))
138     print "Read: %3d%% of %dK"%(percent,sizekb)
139     return 1
140
141 if __name__ == '__main__':
142
143     import sys
144     if len(sys.argv)!=4:
145         print 'Usage: cdurllib.py URL filename blocksize'
146         sys.exit(1)
147
148     url = sys.argv[1]
149     filename = sys.argv[2]
150     blocksize = int(sys.argv[3])
151    
152     urlopener = CDURLopener()
153     fname, headers = urlopener.retrieve(url, filename, sampleReportHook, blocksize)
154     print fname, 'written'
Note: See TracBrowser for help on using the browser.