1717ee22cdf77849e2e273566c877f95311e691b
[SubU] /
1 """
2 This module provides a pool manager that uses Google App Engine's
3 `URLFetch Service <https://cloud.google.com/appengine/docs/python/urlfetch>`_.
4
5 Example usage::
6
7     from pip._vendor.urllib3 import PoolManager
8     from pip._vendor.urllib3.contrib.appengine import AppEngineManager, is_appengine_sandbox
9
10     if is_appengine_sandbox():
11         # AppEngineManager uses AppEngine's URLFetch API behind the scenes
12         http = AppEngineManager()
13     else:
14         # PoolManager uses a socket-level API behind the scenes
15         http = PoolManager()
16
17     r = http.request('GET', 'https://google.com/')
18
19 There are `limitations <https://cloud.google.com/appengine/docs/python/\
20 urlfetch/#Python_Quotas_and_limits>`_ to the URLFetch service and it may not be
21 the best choice for your application. There are three options for using
22 urllib3 on Google App Engine:
23
24 1. You can use :class:`AppEngineManager` with URLFetch. URLFetch is
25    cost-effective in many circumstances as long as your usage is within the
26    limitations.
27 2. You can use a normal :class:`~urllib3.PoolManager` by enabling sockets.
28    Sockets also have `limitations and restrictions
29    <https://cloud.google.com/appengine/docs/python/sockets/\
30    #limitations-and-restrictions>`_ and have a lower free quota than URLFetch.
31    To use sockets, be sure to specify the following in your ``app.yaml``::
32
33         env_variables:
34             GAE_USE_SOCKETS_HTTPLIB : 'true'
35
36 3. If you are using `App Engine Flexible
37 <https://cloud.google.com/appengine/docs/flexible/>`_, you can use the standard
38 :class:`PoolManager` without any configuration or special environment variables.
39 """
40
41 from __future__ import absolute_import
42
43 import io
44 import logging
45 import warnings
46
47 from ..exceptions import (
48     HTTPError,
49     HTTPWarning,
50     MaxRetryError,
51     ProtocolError,
52     SSLError,
53     TimeoutError,
54 )
55 from ..packages.six.moves.urllib.parse import urljoin
56 from ..request import RequestMethods
57 from ..response import HTTPResponse
58 from ..util.retry import Retry
59 from ..util.timeout import Timeout
60 from . import _appengine_environ
61
62 try:
63     from google.appengine.api import urlfetch
64 except ImportError:
65     urlfetch = None
66
67
68 log = logging.getLogger(__name__)
69
70
71 class AppEnginePlatformWarning(HTTPWarning):
72     pass
73
74
75 class AppEnginePlatformError(HTTPError):
76     pass
77
78
79 class AppEngineManager(RequestMethods):
80     """
81     Connection manager for Google App Engine sandbox applications.
82
83     This manager uses the URLFetch service directly instead of using the
84     emulated httplib, and is subject to URLFetch limitations as described in
85     the App Engine documentation `here
86     <https://cloud.google.com/appengine/docs/python/urlfetch>`_.
87
88     Notably it will raise an :class:`AppEnginePlatformError` if:
89         * URLFetch is not available.
90         * If you attempt to use this on App Engine Flexible, as full socket
91           support is available.
92         * If a request size is more than 10 megabytes.
93         * If a response size is more than 32 megabytes.
94         * If you use an unsupported request method such as OPTIONS.
95
96     Beyond those cases, it will raise normal urllib3 errors.
97     """
98
99     def __init__(
100         self,
101         headers=None,
102         retries=None,
103         validate_certificate=True,
104         urlfetch_retries=True,
105     ):
106         if not urlfetch:
107             raise AppEnginePlatformError(
108                 "URLFetch is not available in this environment."
109             )
110
111         warnings.warn(
112             "urllib3 is using URLFetch on Google App Engine sandbox instead "
113             "of sockets. To use sockets directly instead of URLFetch see "
114             "https://urllib3.readthedocs.io/en/1.26.x/reference/urllib3.contrib.html.",
115             AppEnginePlatformWarning,
116         )
117
118         RequestMethods.__init__(self, headers)
119         self.validate_certificate = validate_certificate
120         self.urlfetch_retries = urlfetch_retries
121
122         self.retries = retries or Retry.DEFAULT
123
124     def __enter__(self):
125         return self
126
127     def __exit__(self, exc_type, exc_val, exc_tb):
128         # Return False to re-raise any potential exceptions
129         return False
130
131     def urlopen(
132         self,
133         method,
134         url,
135         body=None,
136         headers=None,
137         retries=None,
138         redirect=True,
139         timeout=Timeout.DEFAULT_TIMEOUT,
140         **response_kw
141     ):
142
143         retries = self._get_retries(retries, redirect)
144
145         try:
146             follow_redirects = redirect and retries.redirect != 0 and retries.total
147             response = urlfetch.fetch(
148                 url,
149                 payload=body,
150                 method=method,
151                 headers=headers or {},
152                 allow_truncated=False,
153                 follow_redirects=self.urlfetch_retries and follow_redirects,
154                 deadline=self._get_absolute_timeout(timeout),
155                 validate_certificate=self.validate_certificate,
156             )
157         except urlfetch.DeadlineExceededError as e:
158             raise TimeoutError(self, e)
159
160         except urlfetch.InvalidURLError as e:
161             if "too large" in str(e):
162                 raise AppEnginePlatformError(
163                     "URLFetch request too large, URLFetch only "
164                     "supports requests up to 10mb in size.",
165                     e,
166                 )
167             raise ProtocolError(e)
168
169         except urlfetch.DownloadError as e:
170             if "Too many redirects" in str(e):
171                 raise MaxRetryError(self, url, reason=e)
172             raise ProtocolError(e)
173
174         except urlfetch.ResponseTooLargeError as e:
175             raise AppEnginePlatformError(
176                 "URLFetch response too large, URLFetch only supports"
177                 "responses up to 32mb in size.",
178                 e,
179             )
180
181         except urlfetch.SSLCertificateError as e:
182             raise SSLError(e)
183
184         except urlfetch.InvalidMethodError as e:
185             raise AppEnginePlatformError(
186                 "URLFetch does not support method: %s" % method, e
187             )
188
189         http_response = self._urlfetch_response_to_http_response(
190             response, retries=retries, **response_kw
191         )
192
193         # Handle redirect?
194         redirect_location = redirect and http_response.get_redirect_location()
195         if redirect_location:
196             # Check for redirect response
197             if self.urlfetch_retries and retries.raise_on_redirect:
198                 raise MaxRetryError(self, url, "too many redirects")
199             else:
200                 if http_response.status == 303:
201                     method = "GET"
202
203                 try:
204                     retries = retries.increment(
205                         method, url, response=http_response, _pool=self
206                     )
207                 except MaxRetryError:
208                     if retries.raise_on_redirect:
209                         raise MaxRetryError(self, url, "too many redirects")
210                     return http_response
211
212                 retries.sleep_for_retry(http_response)
213                 log.debug("Redirecting %s -> %s", url, redirect_location)
214                 redirect_url = urljoin(url, redirect_location)
215                 return self.urlopen(
216                     method,
217                     redirect_url,
218                     body,
219                     headers,
220                     retries=retries,
221                     redirect=redirect,
222                     timeout=timeout,
223                     **response_kw
224                 )
225
226         # Check if we should retry the HTTP response.
227         has_retry_after = bool(http_response.headers.get("Retry-After"))
228         if retries.is_retry(method, http_response.status, has_retry_after):
229             retries = retries.increment(method, url, response=http_response, _pool=self)
230             log.debug("Retry: %s", url)
231             retries.sleep(http_response)
232             return self.urlopen(
233                 method,
234                 url,
235                 body=body,
236                 headers=headers,
237                 retries=retries,
238                 redirect=redirect,
239                 timeout=timeout,
240                 **response_kw
241             )
242
243         return http_response
244
245     def _urlfetch_response_to_http_response(self, urlfetch_resp, **response_kw):
246
247         if is_prod_appengine():
248             # Production GAE handles deflate encoding automatically, but does
249             # not remove the encoding header.
250             content_encoding = urlfetch_resp.headers.get("content-encoding")
251
252             if content_encoding == "deflate":
253                 del urlfetch_resp.headers["content-encoding"]
254
255         transfer_encoding = urlfetch_resp.headers.get("transfer-encoding")
256         # We have a full response's content,
257         # so let's make sure we don't report ourselves as chunked data.
258         if transfer_encoding == "chunked":
259             encodings = transfer_encoding.split(",")
260             encodings.remove("chunked")
261             urlfetch_resp.headers["transfer-encoding"] = ",".join(encodings)
262
263         original_response = HTTPResponse(
264             # In order for decoding to work, we must present the content as
265             # a file-like object.
266             body=io.BytesIO(urlfetch_resp.content),
267             msg=urlfetch_resp.header_msg,
268             headers=urlfetch_resp.headers,
269             status=urlfetch_resp.status_code,
270             **response_kw
271         )
272
273         return HTTPResponse(
274             body=io.BytesIO(urlfetch_resp.content),
275             headers=urlfetch_resp.headers,
276             status=urlfetch_resp.status_code,
277             original_response=original_response,
278             **response_kw
279         )
280
281     def _get_absolute_timeout(self, timeout):
282         if timeout is Timeout.DEFAULT_TIMEOUT:
283             return None  # Defer to URLFetch's default.
284         if isinstance(timeout, Timeout):
285             if timeout._read is not None or timeout._connect is not None:
286                 warnings.warn(
287                     "URLFetch does not support granular timeout settings, "
288                     "reverting to total or default URLFetch timeout.",
289                     AppEnginePlatformWarning,
290                 )
291             return timeout.total
292         return timeout
293
294     def _get_retries(self, retries, redirect):
295         if not isinstance(retries, Retry):
296             retries = Retry.from_int(retries, redirect=redirect, default=self.retries)
297
298         if retries.connect or retries.read or retries.redirect:
299             warnings.warn(
300                 "URLFetch only supports total retries and does not "
301                 "recognize connect, read, or redirect retry parameters.",
302                 AppEnginePlatformWarning,
303             )
304
305         return retries
306
307
308 # Alias methods from _appengine_environ to maintain public API interface.
309
310 is_appengine = _appengine_environ.is_appengine
311 is_appengine_sandbox = _appengine_environ.is_appengine_sandbox
312 is_local_appengine = _appengine_environ.is_local_appengine
313 is_prod_appengine = _appengine_environ.is_prod_appengine
314 is_prod_appengine_mvms = _appengine_environ.is_prod_appengine_mvms