python+selenium+phantomjs can not handle https url which `adding certification is strongly advised` -


i try use python+selenium+phantomjs handle https request,but fail receive response server shows me adding certification advised,here get_request function.

def get_random_x_forwarded_for():     # 得到随机x-forwarded-for值     numbers = []     while not numbers or numbers[0] in (10, 172, 192):         numbers = random.sample(range(1, 255), 4)     return '.'.join(str(_) _ in numbers)  def get_random_ua():     # 得到随机user-agent值     import os     if os.path.exists("dicts/user-agent.txt"):         f = open(modulepath + "dicts/user-agents.txt", "r+")         all_user_agents = f.readlines()         f.close()     else:         all_user_agents = [             "mozilla/4.0 (mozilla/4.0; msie 7.0; windows nt 5.1; fdm; sv1)",             "mozilla/4.0 (mozilla/4.0; msie 7.0; windows nt 5.1; fdm; sv1; .net clr 3.0.04506.30)",             "mozilla/4.0 (windows; msie 7.0; windows nt 5.1; sv1; .net clr 2.0.50727)",             "mozilla/4.0 (windows; u; windows nt 5.0; en-us) applewebkit/532.0 (khtml, gecko) chrome/3.0.195.33 safari/532.0",             "mozilla/4.0 (windows; u; windows nt 5.1; en-us) applewebkit/525.19 (khtml, gecko) chrome/1.0.154.59 safari/525.19",             "mozilla/4.0 (compatible; msie 6.0; linux i686 ; en) opera 9.70",         ]     random_ua_index = random.randint(0, len(all_user_agents) - 1)     ua = re.sub(r"(\s)$", "", all_user_agents[random_ua_index])     return ua  def get_request(url, by="mechanicalsoup", proxyurl="", cookie=""):     code = none     title = none     content = none     if == "seleniumphantomjs":         hasformaction=false         formactionvalue=""         selenium import webdriver         selenium.common.exceptions import timeoutexception         import time         if proxyurl == "" or proxyurl == 0:             service_args_value = ['--ignore-ssl-errors=true', '--ssl-protocol=any']         if proxyurl != "" , proxyurl != 0:             proxytype = proxyurl.split(":")[0]             proxyvaluewithtype = proxyurl.split("/")[-1]             service_args_value = ['--ignore-ssl-errors=true', '--ssl-protocol=any',                                   '--proxy=%s' % proxyvaluewithtype, '--proxy-type=%s' % proxytype]         # final_url=driver.current_url         # print("正在访问的url是这个:\n"+final_url)         # driver.quit()          try:             selenium.webdriver.common.desired_capabilities import desiredcapabilities             if cookie != "":                 dcap = dict(desiredcapabilities.phantomjs)                 dcap["phantomjs.page.settings.cookie"] = cookie                 dcap["phantomjs.page.settings.useragent"] = get_random_ua()                 driver = webdriver.phantomjs(service_args=service_args_value, desired_capabilities=dcap)             else:                 driver = webdriver.phantomjs(service_args=service_args_value)              print(111111111)             driver.implicitly_wait(5)             driver.set_page_load_timeout(5)              driver.get(url)             print(222222222222222)             # http://www.cnblogs.com/fnng/p/3269450.html             originalcookie = driver.get_cookies()              #print("current cookie is:\n" + str(originalcookie))              import random             code = 200              title = driver.title             content = driver.page_source             a=re.search(r'''(<.*type=('|")?submit('|")?.*>)''',content,re.i)             if a:                 hasformaction=true                 print(a.group(1))                 input(67666)             else:                 pass              print("len content :\n" + str(len(content)))             print("title :\n" + title)              if re.search(r"(页面不存在)|(未找到页面)|(page not found)|(404)",title+content,re.i):                 return get_request(url,by="mechanicalsoup")             # time.sleep(5) # let user see something!             # driver.quit()          except timeoutexception e:             # handle exception here             print(e)         finally:             driver.quit()          return {             'code': code,             'title': title,             'content': content,             #true or false             'hasformaction':hasformaction,             #eg,https://www.baidu.com^a=1&b=2             #eg,https://www.baidu.com/?a=1&b=2             'formactionvalue':formactionvalue}      else:         import mechanicalsoup          try:             browser = mechanicalsoup.browser(soup_config={"features": "lxml"})             ua = get_random_ua()             browser.session.headers.update({'user-agent': '%s' % ua})             # headers=browser.session.headers             # if 'cookie' in headers:             # originalcookie=headers['cookie']             if cookie == "":                 pass             else:                 browser.session.headers.update({'cookie': '%s' % cookie})             # print(originalcookie)             x_forwarded_for = get_random_x_forwarded_for()             browser.session.headers.update(                 {'x-forwarded-for': '%s' % x_forwarded_for})               result = browser.get(url,                     timeout=10,verify=false)             # print(dir(result))             code = result.status_code             content = result.content             import chardet             bytesencoding = chardet.detect(content)['encoding']             # print(bytesencoding)             content = content.decode(bytesencoding)             title = beautifulsoup(content, "lxml").title             if title not none:                 title_value = title.string             else:                 title_value = none         except:              code = 0             title_value = "you may blocked or code doesn't handle ssl certificate well"             content = 'can not html content time,may blocked server request'          return_value = {             'code': code,             'title': title_value,             'content': content}         # print("访问当前url为:\n\t"+url+"\ntitle如下:")         # print("\t"+str(return_value['title']))         return return_value  b=get_request("https://www.zoomeye.org/search/advanced",by="seleniumphantomjs") #b=get_request("https://www.zoomeye.org/search/advanced",by="mechanicalsoup") print(b) 

when use b=get_request("https://www.zoomeye.org/search/advanced",by="seleniumphantomjs"),it print 11111..1 , stuck there without print 2222.2,then try use b=get_request("https://www.zoomeye.org/search/advanced",by="mechanicalsoup"),this time below error debug info:

/usr/local/lib/python3.6/site-packages/requests/packages/urllib3/connectionpool.py:852: insecurerequestwarning: unverified https request being made. adding certificate verification advised. see: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings insecurerequestwarning)

i want use python3+selenium+phantomjs handle adding certificate verification advised error,can me?

later,i tried use phantomjs directly without selenium:

phantomjs --ignore-ssl-errors=true --ssl-protocol=any --web-security=false 1.js 1.js has below content:

var webpage = require('webpage'); var page = webpage.create(); console.log('test666'); page.open('https://www.zoomeye.org/search/advanced', function (status) {   var content = page.content;   console.log('content: ' + content);   phantom.exit(); }); 

however,it doesn't work again,may phantomjs's bug:(

go through below url. provide capabilities need use https phantomjs :-

http://phantomjs.org/api/command-line.html

main capabilities required below :-

  • --web-security=[true|false] enables web security , forbids cross-domain xhr (default true). accepted: [yes|no].
  • --ssl-protocol=[sslv3|sslv2|tlsv1|tlsv1.1|tlsv1.2|any'] sets ssl protocol secure connections (default sslv3). not values may supported, depending on system openssl library.
  • --ignore-ssl-errors=[true|false] ignores ssl errors, such expired or self-signed certificate errors (default false). accepted: [yes|no].

i have java code works fine me:-

private static capabilities getphantomcapabilities(string os) {     desiredcapabilities capabilities = null;     arraylist<string> cliargscap = new arraylist<string>();     capabilities = desiredcapabilities.phantomjs();     cliargscap.add("--web-security=false");     cliargscap.add("--ssl-protocol=any");     cliargscap.add("--ignore-ssl-errors=true");     capabilities.setcapability("takesscreenshot", true);     capabilities.setjavascriptenabled(true);     capabilities.setcapability(         phantomjsdriverservice.phantomjs_cli_args, cliargscap);     capabilities.setcapability(         phantomjsdriverservice.phantomjs_ghostdriver_cli_args,             new string[] { "--loglevel=2" });     return capabilities; } 

convert code in python. should work

hope :)


Comments

Popular posts from this blog

ubuntu - PHP script to find files of certain extensions in a directory, returns populated array when run in browser, but empty array when run from terminal -

php - How can i create a user dashboard -

javascript - How to detect toggling of the fullscreen-toolbar in jQuery Mobile? -