import cchardet from retrying import retry from powerspider import logger from powerspider.tools.Ua import ua from requests import request, RequestException
@retry(stop_max_attempt_number=3, retry_on_result=lambda x: x is None, wait_fixed=2000) defdownloader(url, method=None, header=None, timeout=None, binary=False, **kwargs): logger.info(f'Scraping {url}') _header = {'User-Agent': ua()} _maxTimeout = timeout if timeout else5 _headers = header if header else _header _method = "GET"ifnot method else method try: response = request(method=_method, url=url, headers=_headers, **kwargs) encoding = cchardet.detect(response.content)['encoding'] if response.status_code == 200: return response.content if binary else response.content.decode(encoding) elif200 < response.status_code < 400: logger.info(f"Redirect_URL: {response.url}") logger.error('Get invalid status code %s while scraping %s', response.status_code, url) except RequestException as e: logger.error(f'Error occurred while scraping {url}, Msg: {e}', exc_info=True)
if __name__ == '__main__': print(downloader("https://www.baidu.com/", "GET"))
Warning: get_headers(): SSL operation failed with code 1. OpenSSL Error messages:
error:14090086:SSL routines:ssl3_get_server_certificate:certificate verify failed in /mydata/web/wwwshanhubei/web/wp-content/themes/shanhuke/single.php on line 57
Warning: get_headers(): Failed to enable crypto in /mydata/web/wwwshanhubei/web/wp-content/themes/shanhuke/single.php on line 57
Warning: get_headers(https://static.shanhubei.com/qrcode/qrcode_viewid_11514.jpg): failed to open stream: operation failed in /mydata/web/wwwshanhubei/web/wp-content/themes/shanhuke/single.php on line 57