root/cdat/trunk/cdat/Packages/cdms/cdurllib.py

Revision 845, 5.5 kB (checked in by drach, 9 years ago)

- separated PCMDI extensions and modules from the Python

distribution tree

- added NGI extensions to CDMS: LDAP, caching, progress gui

Line 
1 #!/usr/bin/env python
2
3 # Customized URLopener
4
5 import urllib, getpass, socket, string, sys
6
7 MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
8
9 class CDURLopener(urllib.URLopener):
10
11     def __init__(self, proxies=None):
12         urllib.URLopener.__init__(self,proxies)
13         self._userObject = None
14
15     # Attach an object to be returned with callbacks
16     def setUserObject(self, userObject):
17         self._userObject = userObject
18
19     # Use FTP protocol
20     def open_ftp(self, url):
21         host, path = urllib.splithost(url)
22         if not host: raise IOError, ('ftp error', 'no host given')
23         host, port = urllib.splitport(host)
24         user, host = urllib.splituser(host)
25         # if user: user, passwd = splitpasswd(user)
26         if user: passwd = getpass.getpass()
27         else: passwd = None
28         host = urllib.unquote(host)
29         user = urllib.unquote(user or '')
30         passwd = urllib.unquote(passwd or '')
31         host = socket.gethostbyname(host)
32         if not port:
33             import ftplib
34             port = ftplib.FTP_PORT
35         else:
36             port = int(port)
37         path, attrs = urllib.splitattr(path)
38         path = urllib.unquote(path)
39         dirs = string.splitfields(path, '/')
40         dirs, file = dirs[:-1], dirs[-1]
41         if dirs and not dirs[0]: dirs = dirs[1:]
42         key = (user, host, port, string.joinfields(dirs, '/'))
43         # XXX thread unsafe!
44         if len(self.ftpcache) > MAXFTPCACHE:
45             # Prune the cache, rather arbitrarily
46             for k in self.ftpcache.keys():
47                 if k != key:
48                     v = self.ftpcache[k]
49                     del self.ftpcache[k]
50                     v.close()
51         try:
52             if not self.ftpcache.has_key(key):
53                 print 'Creating ftpwrapper: ',user,host,port,dirs
54                 self.ftpcache[key] = \
55                     urllib.ftpwrapper(user, passwd, host, port, dirs)
56             if not file: type = 'D'
57             else: type = 'I'
58             for attr in attrs:
59                 attr, value = urllib.splitvalue(attr)
60                 if string.lower(attr) == 'type' and \
61                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
62                     type = string.upper(value)
63             (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
64             if retrlen is not None and retrlen >= 0:
65                 import mimetools, StringIO
66                 headers = mimetools.Message(StringIO.StringIO(
67                     'Content-Length: %d\n' % retrlen))
68             else:
69                 headers = noheaders()
70             return urllib.addinfourl(fp, headers, "ftp:" + url)
71         except urllib.ftperrors(), msg:
72             raise IOError, ('ftp error', msg), sys.exc_info()[2]
73
74     def retrieve(self, url, filename=None, reporthook=None, blocksize=262144):
75         url = urllib.unwrap(url)
76         if self.tempcache and self.tempcache.has_key(url):
77             return self.tempcache[url]
78         type, url1 = urllib.splittype(url)
79         if not filename and (not type or type == 'file'):
80             try:
81                 fp = self.open_local_file(url1)
82                 hdrs = fp.info()
83                 del fp
84                 return url2pathname(urllib.splithost(url1)[1]), hdrs
85             except IOError, msg:
86                 pass
87         fp = self.open(url)
88         headers = fp.info()
89         if not filename:
90             import tempfile
91             garbage, path = urllib.splittype(url)
92             garbage, path = urllib.splithost(path or "")
93             path, garbage = urllib.splitquery(path or "")
94             path, garbage = urllib.splitattr(path or "")
95             suffix = os.path.splitext(path)[1]
96             filename = tempfile.mktemp(suffix)
97             self.__tempfiles.append(filename)
98         result = filename, headers
99         if self.tempcache is not None:
100             self.tempcache[url] = result
101         tfp = open(filename, 'wb')
102         bs = blocksize
103         size = -1
104         blocknum = 1
105         if reporthook:
106             if headers.has_key("content-length"):
107                 size = int(headers["Content-Length"])
108             stayopen = reporthook(0, bs, size, self._userObject)
109             if stayopen==0:
110                 raise KeyboardInterrupt
111         bytesread = 0
112         block = fp.read(bs)
113         if reporthook:
114             stayopen = reporthook(1, bs, size, self._userObject)
115             if stayopen==0:
116                 raise KeyboardInterrupt
117         while block:
118             tfp.write(block)
119             bytesread = bytesread + len(block)
120 ##             print blocknum, bytesread, size,
121 ##             if blocknum*blocksize!=bytesread:
122 ##                 print ' (*)'
123 ##             else:
124 ##                 print
125             if block and reporthook:
126                 stayopen = reporthook(blocknum, bs, size, self._userObject)
127                 if stayopen==0:
128                     raise KeyboardInterrupt
129             blocknum = blocknum + 1
130             block = fp.read(bs)
131         fp.close()
132         tfp.close()
133         del fp
134         del tfp
135         return result
136
137 def sampleReportHook(blocknum, blocksize, size, userObj):
138     sizekb = size/1024
139     percent = min(100,int(100.0*float(blocknum*blocksize)/float(size)))
140     print "Read: %3d%% of %dK"%(percent,sizekb)
141     return 1
142
143 if __name__ == '__main__':
144
145     import sys
146     if len(sys.argv)!=4:
147         print 'Usage: cdurllib.py URL filename blocksize'
148         sys.exit(1)
149
150     url = sys.argv[1]
151     filename = sys.argv[2]
152     blocksize = int(sys.argv[3])
153    
154     urlopener = CDURLopener()
155     fname, headers = urlopener.retrieve(url, filename, sampleReportHook, blocksize)
156     print fname, 'written'
Note: See TracBrowser for help on using the browser.