1 |
"""Customized URLopener""" |
---|
2 |
|
---|
3 |
import urllib, getpass, socket, string, sys |
---|
4 |
|
---|
5 |
MAXFTPCACHE = 10 |
---|
6 |
|
---|
7 |
class CDURLopener(urllib.URLopener): |
---|
8 |
|
---|
9 |
def __init__(self, proxies=None): |
---|
10 |
urllib.URLopener.__init__(self,proxies) |
---|
11 |
self._userObject = None |
---|
12 |
|
---|
13 |
|
---|
14 |
def setUserObject(self, userObject): |
---|
15 |
self._userObject = userObject |
---|
16 |
|
---|
17 |
|
---|
18 |
def open_ftp(self, url): |
---|
19 |
host, path = urllib.splithost(url) |
---|
20 |
if not host: raise IOError, ('ftp error', 'no host given') |
---|
21 |
host, port = urllib.splitport(host) |
---|
22 |
user, host = urllib.splituser(host) |
---|
23 |
|
---|
24 |
if user: passwd = getpass.getpass() |
---|
25 |
else: passwd = None |
---|
26 |
host = urllib.unquote(host) |
---|
27 |
user = urllib.unquote(user or '') |
---|
28 |
passwd = urllib.unquote(passwd or '') |
---|
29 |
host = socket.gethostbyname(host) |
---|
30 |
if not port: |
---|
31 |
import ftplib |
---|
32 |
port = ftplib.FTP_PORT |
---|
33 |
else: |
---|
34 |
port = int(port) |
---|
35 |
path, attrs = urllib.splitattr(path) |
---|
36 |
path = urllib.unquote(path) |
---|
37 |
dirs = string.splitfields(path, '/') |
---|
38 |
dirs, file = dirs[:-1], dirs[-1] |
---|
39 |
if dirs and not dirs[0]: dirs = dirs[1:] |
---|
40 |
key = (user, host, port, string.joinfields(dirs, '/')) |
---|
41 |
|
---|
42 |
if len(self.ftpcache) > MAXFTPCACHE: |
---|
43 |
|
---|
44 |
for k in self.ftpcache.keys(): |
---|
45 |
if k != key: |
---|
46 |
v = self.ftpcache[k] |
---|
47 |
del self.ftpcache[k] |
---|
48 |
v.close() |
---|
49 |
try: |
---|
50 |
if not self.ftpcache.has_key(key): |
---|
51 |
print 'Creating ftpwrapper: ',user,host,port,dirs |
---|
52 |
self.ftpcache[key] = \ |
---|
53 |
urllib.ftpwrapper(user, passwd, host, port, dirs) |
---|
54 |
if not file: type = 'D' |
---|
55 |
else: type = 'I' |
---|
56 |
for attr in attrs: |
---|
57 |
attr, value = urllib.splitvalue(attr) |
---|
58 |
if string.lower(attr) == 'type' and \ |
---|
59 |
value in ('a', 'A', 'i', 'I', 'd', 'D'): |
---|
60 |
type = string.upper(value) |
---|
61 |
(fp, retrlen) = self.ftpcache[key].retrfile(file, type) |
---|
62 |
if retrlen is not None and retrlen >= 0: |
---|
63 |
import mimetools, StringIO |
---|
64 |
headers = mimetools.Message(StringIO.StringIO( |
---|
65 |
'Content-Length: %d\n' % retrlen)) |
---|
66 |
else: |
---|
67 |
headers = noheaders() |
---|
68 |
return urllib.addinfourl(fp, headers, "ftp:" + url) |
---|
69 |
except urllib.ftperrors(), msg: |
---|
70 |
raise IOError, ('ftp error', msg), sys.exc_info()[2] |
---|
71 |
|
---|
72 |
def retrieve(self, url, filename=None, reporthook=None, blocksize=262144): |
---|
73 |
url = urllib.unwrap(url) |
---|
74 |
if self.tempcache and self.tempcache.has_key(url): |
---|
75 |
return self.tempcache[url] |
---|
76 |
type, url1 = urllib.splittype(url) |
---|
77 |
if not filename and (not type or type == 'file'): |
---|
78 |
try: |
---|
79 |
fp = self.open_local_file(url1) |
---|
80 |
hdrs = fp.info() |
---|
81 |
del fp |
---|
82 |
return url2pathname(urllib.splithost(url1)[1]), hdrs |
---|
83 |
except IOError, msg: |
---|
84 |
pass |
---|
85 |
fp = self.open(url) |
---|
86 |
headers = fp.info() |
---|
87 |
if not filename: |
---|
88 |
import tempfile |
---|
89 |
garbage, path = urllib.splittype(url) |
---|
90 |
garbage, path = urllib.splithost(path or "") |
---|
91 |
path, garbage = urllib.splitquery(path or "") |
---|
92 |
path, garbage = urllib.splitattr(path or "") |
---|
93 |
suffix = os.path.splitext(path)[1] |
---|
94 |
filename = tempfile.mktemp(suffix) |
---|
95 |
self.__tempfiles.append(filename) |
---|
96 |
result = filename, headers |
---|
97 |
if self.tempcache is not None: |
---|
98 |
self.tempcache[url] = result |
---|
99 |
tfp = open(filename, 'wb') |
---|
100 |
bs = blocksize |
---|
101 |
size = -1 |
---|
102 |
blocknum = 1 |
---|
103 |
if reporthook: |
---|
104 |
if headers.has_key("content-length"): |
---|
105 |
size = int(headers["Content-Length"]) |
---|
106 |
stayopen = reporthook(0, bs, size, self._userObject) |
---|
107 |
if stayopen==0: |
---|
108 |
raise KeyboardInterrupt |
---|
109 |
bytesread = 0 |
---|
110 |
block = fp.read(bs) |
---|
111 |
if reporthook: |
---|
112 |
stayopen = reporthook(1, bs, size, self._userObject) |
---|
113 |
if stayopen==0: |
---|
114 |
raise KeyboardInterrupt |
---|
115 |
while block: |
---|
116 |
tfp.write(block) |
---|
117 |
bytesread = bytesread + len(block) |
---|
118 |
|
---|
119 |
|
---|
120 |
|
---|
121 |
|
---|
122 |
|
---|
123 |
if block and reporthook: |
---|
124 |
stayopen = reporthook(blocknum, bs, size, self._userObject) |
---|
125 |
if stayopen==0: |
---|
126 |
raise KeyboardInterrupt |
---|
127 |
blocknum = blocknum + 1 |
---|
128 |
block = fp.read(bs) |
---|
129 |
|
---|
130 |
tfp.close() |
---|
131 |
del fp |
---|
132 |
del tfp |
---|
133 |
return result |
---|
134 |
|
---|
135 |
def sampleReportHook(blocknum, blocksize, size, userObj): |
---|
136 |
sizekb = size/1024 |
---|
137 |
percent = min(100,int(100.0*float(blocknum*blocksize)/float(size))) |
---|
138 |
print "Read: %3d%% of %dK"%(percent,sizekb) |
---|
139 |
return 1 |
---|
140 |
|
---|
141 |
if __name__ == '__main__': |
---|
142 |
|
---|
143 |
import sys |
---|
144 |
if len(sys.argv)!=4: |
---|
145 |
print 'Usage: cdurllib.py URL filename blocksize' |
---|
146 |
sys.exit(1) |
---|
147 |
|
---|
148 |
url = sys.argv[1] |
---|
149 |
filename = sys.argv[2] |
---|
150 |
blocksize = int(sys.argv[3]) |
---|
151 |
|
---|
152 |
urlopener = CDURLopener() |
---|
153 |
fname, headers = urlopener.retrieve(url, filename, sampleReportHook, blocksize) |
---|
154 |
print fname, 'written' |
---|