prev | Version 1107 (Mon Nov 27 20:46:08 2006) | next |
stdin
, stdout
, and zero/nonzero exit codes"24.153.22.195"
"www.third-bit.com"
nslookup
to talk to DNS directlyFigure 23.1: Sockets
import sys, socket buffer_size = 1024 # bytes host = '127.0.0.1' # local machine port = 19073 # hope nobody else is using it... message = 'ping!' # what to send # AF_INET means 'Internet socket'. # SOCK_STREAM means 'TCP'. sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((host, port)) # Send the message. sock.send(message) # Receive and display the reply. data = sock.recv(buffer_size) print 'client received', `data` # Tidy up. sock.close()
client received 'pong!'
import sys, socket buffer_size = 1024 # bytes host = '' # empty string means 'this machine' port = 19073 # must agree with client # Create and bind a socket. s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind((host, port)) # Wait for a connection request. s.listen(True) sock, addr = s.accept() print 'Connected by', addr # Receive and display a message. data = sock.recv(buffer_size) print 'server saw', str(data) # Replace vowels in reply. data = data.replace('i', 'o') sock.send(data) # Tidy up. sock.close()
Connected by ('127.0.0.1', 1297) server saw ping!
Figure 23.3: HTTP Request
"GET"
: to fetch information"POST"
: to submit form data or upload files/index.html
"HTTP/1.0"
"HTTP/1.1"
"Accept: text/html"
"Accept-Language: en, fr"
"If-Modified-Since: 16-May-2005"
"Content-Length"
header tells the server how many bytes to readFigure 23.4: HTTP Response
Code | Name | Meaning |
---|---|---|
100 | Continue | Client should continue sending data |
200 | OK | The request has succeeded |
204 | No Content | The server has completed the request, but doesn't need to return any data |
301 | Moved Permanently | The requested resource has moved to a new permanent location |
307 | Temporary Redirect | The requested resource is temporarily at a different location |
400 | Bad Request | The request is badly formatted |
401 | Unauthorized | The request requires authentication |
404 | Not Found | The requested resource could not be found |
408 | Timeout | The server gave up waiting for the client |
500 | Internal Server Error | An error occurred in the server that prevented it fulfilling the request |
601 | Connection Timed Out | The server did not respond before the connection timed out |
Table 23.1: HTTP Response Codes |
import sys, socket buffer_size = 1024 HttpRequest = '''GET /greeting.html HTTP/1.0 ''' sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect(('www.third-bit.com', 80)) sock.send(HttpRequest) response = '' while True: data = sock.recv(buffer_size) if not data: break response += data sock.close() print response
HTTP/1.1 200 OK Date: Fri, 03 Mar 2006 18:12:55 GMT Server: Apache/2.0.54 (Debian GNU/Linux) Last-Modified: Fri, 03 Mar 2006 18:12:23 GMT Content-Length: 92 Content-Type: text/html <html> <head><title>Greeting Page</title></head> <body> <h1>Greetings!</h1> </body> </html>
sock.connect
are deliberateurllib
urllib.urlopen(URL)
does what your browser would do if you gave it the URLimport urllib instream = urllib.urlopen("http://www.third-bit.com/greeting.html") lines = instream.readlines() instream.close() for line in lines: print line,
readlines
wouldn't do the right thing if the thing being read was an imageread
to grab the bytes in that caseimport sys, urllib, re url = sys.argv[1] instream = urllib.urlopen(url) page = instream.read() instream.close() links = re.findall(r'href=\"[^\"]+\"', page) temp = set() for x in links: x = x[6:-1] # strip off 'href="' and '"' if x.startswith('http://'): temp.add(x) links = list(temp) links.sort() for x in links: print x
$ python spider.py http://www.google.ca
http://groups.google.ca/grphp?hl=en&tab=wg&ie=UTF-8
http://news.google.ca/nwshp?hl=en&tab=wn&ie=UTF-8
http://scholar.google.com/schhp?hl=en&tab=ws&ie=UTF-8
http://www.google.ca/fr
http://www.google.ca?q=Python
searches for pages related to Python"?"
separates the parameters from the rest of the URL"&"
http://www.google.ca/search?q=Python&client=firefox
"?"
or "&"
in a parameter?"%"
followed by a 2-digit hexadecimal code"+"
Character | Encoding |
---|---|
"#" | %23 |
"$" | %24 |
"%" | %25 |
"&" | %26 |
"+" | %2B |
"," | %2C |
"/" | %2F |
":" | %3A |
";" | %3B |
"=" | %3D |
"?" | %3F |
"@" | %40 |
Table 23.2: URL Encoding |
http://www.google.ca/search?q=grade+%3D+A%2B
urllib
has functions to make this easyurllib.quote(str)
replaces special characters in str
with escape sequencesurllib.unquote(str)
replaces escape sequences with charactersurllib.urlencode(params)
takes a dictionary and constructs the entire query parameter stringimport urllib print urllib.urlencode({'surname' : 'Von Neumann', 'forename' : 'John'})
surname=Von+Neumann&forename=John
Figure 23.5: Web Services
PyAmazon
turns parameters into URL, and converts the XML reply into Python objectsimport sys, amazon # Format multiple authors' names nicely. def prettyName(arg): if type(arg) in (list, tuple): arg = ', '.join(arg[:-1]) + ' and ' + arg[-1] return arg if __name__ == '__main__': # Get information. key, asin = sys.argv[1], sys.argv[2] amazon.setLicense(key) items = amazon.searchByASIN(asin) # Handle errors. if not items: print 'Nothing found for', asin if len(items) > 1: print len(items), 'items found for', asin # Display information. item = items[0] productName = item.ProductName ourPrice = item.OurPrice authors = prettyName(item.Authors.Author) print '%s: %s (%s)' % (authors, productName, ourPrice)
$ python findbook.py 123ABCDEFGHIJKL4MN56 0974514071
Greg Wilson: Data Crunching : Solve Everyday Problems Using Java, Python, and more. ($18.87)
prev | Copyright © 2005-06 Python Software Foundation. | next |