urllib and urllib2

windard · windard · commit d23ea378c896 · 2020-05-04T18:44:04.000+08:00
Change-Id: I86cf915afa565735874e9a96b945ef672b8a6398
diff --git a/README.md b/README.md
@@ -295,6 +295,10 @@ python的强大之处有很大的一方面在于它有各种各样非常强大
 
 ## [abc](content/abc.md)
 
+## [urllib](content/urllib.md)
+
+## [urllib2](content/urllib2.md)
+
 ## [tools](content/tools.md)
 
 ## [Other_thing](content/other_thing.md)
diff --git a/code/urllib2_basic_auth.py b/code/urllib2_basic_auth.py
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+
+import urllib2
+
+# first try
+try:
+    resp = urllib2.urlopen("http://httpbin.org/basic-auth/admin/password")
+    print resp.read()
+except Exception as e:
+    print "error", e
+
+
+# with basic auth
+basic_auth = urllib2.HTTPBasicAuthHandler()
+basic_auth.add_password(
+    realm="Fake Realm",  # 资源域空间
+    uri="http://httpbin.org/basic-auth/admin/password",  # 资源地址
+    user='admin',  # 用户名
+    passwd='password'  # 密码
+)
+
+opener = urllib2.build_opener(basic_auth)
+urllib2.install_opener(opener)
+
+
+# second try
+try:
+    resp = urllib2.urlopen("http://httpbin.org/basic-auth/admin/password")
+    print resp.read()
+except Exception as e:
+    print "error", e
diff --git a/code/urllib2_cookies.py b/code/urllib2_cookies.py
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+
+import cookielib
+import urllib2
+
+
+def creat_cookie(name, value, **kwargs):
+    result = {
+        'version': 0,
+        'name': name,
+        'value': value,
+        'port': None,
+        'domain': '',
+        'path': '/',
+        'secure': False,
+        'expires': None,
+        'discard': True,
+        'comment': None,
+        'comment_url': None,
+        'rest': {'HttpOnly': None},
+        'rfc2109': False,
+    }
+    result.update(kwargs)
+    result['port_specified'] = bool(result['port'])
+    result['domain_specified'] = bool(result['domain'])
+    result['domain_initial_dot'] = result['domain'].startswith('.')
+    result['path_specified'] = bool(result['path'])
+
+    return cookielib.Cookie(**result)
+
+
+def header():
+    cookie_jar = cookielib.CookieJar()
+    cookie_handler = urllib2.HTTPCookieProcessor(cookie_jar)
+    opener = urllib2.build_opener(cookie_handler)
+
+    request = urllib2.Request("http://httpbin.org/cookies")
+    request.add_header("Cookie", "name=windard")
+
+    resp = opener.open(request)
+    print resp.read()
+
+    for cookie in cookie_jar:
+        print cookie.name, ":", cookie.value
+
+
+def main():
+    cookie_jar = cookielib.CookieJar()
+    cookie_handler = urllib2.HTTPCookieProcessor(cookie_jar)
+    opener = urllib2.build_opener(cookie_handler)
+    cookie_jar.set_cookie(creat_cookie("name", "Windard"))
+    cookie_jar.set_cookie(creat_cookie("location", "Shanghai"))
+
+    resp = opener.open("http://httpbin.org/cookies")
+    print resp.read()
+
+    for cookie in cookie_jar:
+        print cookie.name, ":", cookie.value
+
+
+if __name__ == '__main__':
+    header()
diff --git a/code/urllib2_get.py b/code/urllib2_get.py
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+
+import urllib2
+
+
+# 发起请求
+resp = urllib2.urlopen("http://httpbin.org/get")
+
+# 返回是一个类 file 对象，可以通过 read() 读取
+print resp.read()
+
+print "HTTP code", resp.code
+print "HTTP msg", resp.msg
+print "HTTP Status Code:", resp.getcode()
+print "HTTP Request Url:", resp.geturl()
+print "HTTP Response Headers:"
+print resp.info()
+print "HTT Content-Length:", resp.info().get("Content-Length")
diff --git a/code/urllib2_headers.py b/code/urllib2_headers.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+
+import urllib
+import urllib2
+
+
+headers = {
+    "From": "China",
+    "Year": "2020",
+}
+
+data = {
+    "name": "windard",
+    "country": "china",
+}
+
+request = urllib2.Request("http://httpbin.org/post", headers=headers)
+request.add_data(urllib.urlencode(data))
+request.add_header("To", "USA")
+
+resp = urllib2.urlopen(request)
+print resp.read()
diff --git a/code/urllib2_proxy.py b/code/urllib2_proxy.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+
+import urllib
+import urllib2
+
+
+def urllib_proxy():
+    resp = urllib.urlopen("http://httpbin.org/ip", proxies={
+        "http": "http://117.69.152.162:8691"
+    })
+    print resp.read()
+
+
+def urllib2_proxy():
+    proxy_handler = urllib2.ProxyHandler({
+        "http": "http://117.69.152.162:8691"
+    })
+    opener = urllib2.build_opener(proxy_handler)
+    urllib2.install_opener(opener)
+
+    resp = urllib2.urlopen("http://httpbin.org/ip")
+    print resp.read()
+
+
+if __name__ == '__main__':
+    urllib_proxy()
+    urllib2_proxy()
diff --git a/code/urllib_data.py b/code/urllib_data.py
@@ -0,0 +1,16 @@
+# -*- coding: utf-8 -*-
+
+import urllib
+
+data = {
+    "name": "windard",
+    "country": "china",
+}
+
+data_string = urllib.urlencode(data)
+
+# 发起请求
+resp = urllib.urlopen("http://httpbin.org/post", data_string)
+
+# 返回是一个类 file 对象，可以通过 read() 读取
+print resp.read()
diff --git a/code/urllib_download.py b/code/urllib_download.py
@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+import urllib
+
+url = "http://techslides.com/demos/sample-videos/small.mp4"
+filename = url.split('/')[-1]
+
+download_name, headers = urllib.urlretrieve(url, filename)
+
+print "filename: ", download_name
+print "headers : "
+print headers
+
+urllib.urlcleanup()
+
+
+resp = urllib.urlopen(url)
+with open(filename, "w") as f:
+    chunk = resp.read(1024)
+    while chunk:
+        f.write(chunk)
+        chunk = resp.read(1024)
+
+print "filename: ", filename
diff --git a/code/urllib_get.py b/code/urllib_get.py
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+
+import urllib
+
+# 发起请求
+resp = urllib.urlopen("http://httpbin.org/get")
+
+# 返回是一个类 file 对象，可以通过 read() 读取
+print resp.read()
+
+print "HTTP Status Code:", resp.getcode()
+print "HTTP Request Url:", resp.geturl()
+print "HTTP Response Headers:"
+print resp.info()
+print "HTT Content-Length:", resp.info().get("Content-Length")
diff --git a/code/urllib_param.py b/code/urllib_param.py
@@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+
+import urllib
+
+
+param = {
+    "name": "windard",
+    "country": "china",
+}
+
+query_string = urllib.urlencode(param)
+
+# 发起请求
+resp = urllib.urlopen("http://httpbin.org/get"+"?"+query_string)
+
+# 返回是一个类 file 对象，可以通过 read() 读取
+print resp.read()
diff --git a/code/urllib_quote.py b/code/urllib_quote.py
@@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+
+import urllib
+
+params = "https://windard.com"
+
+print urllib.quote(params)
+print urllib.quote_plus(params)
+print urllib.unquote(urllib.quote(params))
+print urllib.unquote_plus(urllib.quote_plus(params))
diff --git a/content/images/http_basic_auth_demo.png b/content/images/http_basic_auth_demo.png
diff --git a/content/urllib.md b/content/urllib.md
diff --git a/content/urllib2.md b/content/urllib2.md