101 lines
2.6 KiB
Plaintext
101 lines
2.6 KiB
Plaintext
From: jam at newimage.com (jam)
|
|
Date: Mon, 26 Apr 1999 06:19:59 -0400
|
|
Subject: timeout on urllib.urlopen?
|
|
In-Reply-To: <7g0r7c$gn9$1@nnrp1.dejanews.com>; from Kevin L on Mon, Apr 26, 1999 at 04:48:44AM +0000
|
|
References: <7g0r7c$gn9$1@nnrp1.dejanews.com>
|
|
Message-ID: <19990426061959.A18551@toast.internal>
|
|
Content-Length: 2298
|
|
X-UID: 123
|
|
|
|
On Mon, Apr 26, 1999 at 04:48:44AM +0000, Kevin L wrote:
|
|
>
|
|
> I'm trying to use urllib.urlopen() on a big list of urls, some of which are
|
|
> dead (they don't return a 404, just no response). And the function just waits.
|
|
> Is there any way to specify a timeout period for this function? thanks,
|
|
>
|
|
> Kevin
|
|
>
|
|
|
|
greetings,
|
|
|
|
attached, please find a short lightly tested module that might do what you
|
|
are looking for.. please let me know if this is what you need. it's a piece
|
|
of code I wrote for a larger application, and it seems to get the job done
|
|
nicely. suggestions for optimizations, etc, accepted.
|
|
|
|
regards,
|
|
J
|
|
--
|
|
|| visit gfd <http://quark.newimage.com:8080/>
|
|
|| psa member #293 <http://www.python.org/>
|
|
|| New Image Systems & Services, Inc. <http://www.newimage.com/>
|
|
-------------- next part --------------
|
|
import socket
|
|
import string
|
|
import select
|
|
|
|
from urlparse import urlparse, urlunparse
|
|
from httplib import HTTP, HTTP_PORT
|
|
|
|
from errno import EINPROGRESS, ETIMEDOUT
|
|
|
|
class localHTTP(HTTP):
|
|
def __init__(self, host = '', port = 0, timeout = 10.0):
|
|
self.connect_timeout = timeout
|
|
HTTP.__init__(self, host, port)
|
|
|
|
def connect(self, host, port = 0):
|
|
if not port:
|
|
i = string.find(host, ":")
|
|
if i >= 0:
|
|
host, port = host[:i], host[i+1:]
|
|
try:
|
|
port = string.atoi(port)
|
|
except string.atoi_error:
|
|
raise socket.error, "nonnumeric port"
|
|
if not port:
|
|
port = HTTP_PORT
|
|
|
|
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
if self.debuglevel > 0:
|
|
print "connect:", (host, port)
|
|
|
|
self.sock.setblocking(0)
|
|
try:
|
|
self.sock.connect(host, port)
|
|
except socket.error, why:
|
|
if why[0] == EINPROGRESS:
|
|
pass
|
|
else:
|
|
raise socket.error, why
|
|
|
|
(r, w, e) = select.select([], [self.sock], [], self.connect_timeout)
|
|
if w == [self.sock]:
|
|
self.sock.setblocking(1)
|
|
return
|
|
else:
|
|
raise socket.error, (ETIMEDOUT, "timeout during connect phase")
|
|
|
|
def checkurl(url):
|
|
if url == "" or url == None:
|
|
return None
|
|
|
|
u = urlparse(url)
|
|
netloc = u[1]
|
|
path = u[2]
|
|
|
|
h = localHTTP(netloc)
|
|
h.set_debuglevel(0)
|
|
h.putrequest("HEAD", path)
|
|
h.putheader("accept", "text/html")
|
|
h.putheader("accept", "text/plain")
|
|
h.endheaders()
|
|
|
|
return h.getreply()
|
|
|
|
if __name__ == "__main__":
|
|
print checkurl("http://quark.newimage.com:8080/")
|
|
|
|
|
|
|