Selenium
install
apt-get install xvfb # yum install xorg-X11-server-Xvfb
apt-get install firefox # yum install gdk-pixbuf2, yum install firefox
# xvfb-run firefox http://google.com
sudo pip install selenium
sudo pip install pyvirtualdisplay
testing firefox
from pyvirtualdisplay import Display
from selenium import webdriver
this is required on servers
# display = Display(visible=0, size=(1024, 768))
# display.start()
driver = webdriver.Firefox()
driver.get('http://www.google.com/search?ie=ISO-8859-1&hl=en&gbv=1&start=20&q=oracle')
news = driver.find_element_by_class_name('_K2')
if news.text:
self.search.news = news.text
print driver.page_source
driver.close()
# display.stop()
quering
element = driver.find_element_by_id("passwd-id")
element = driver.find_element_by_name("passwd")
element = driver.find_element_by_xpath("//input[@id='passwd-id']")
find_element_by_link_text
find_element_by_partial_link_text
find_element_by_tag_name
find_element_by_class_name
find_element_by_css_selector
# write some text
```python
element.send_keys("some text")
# select
```python
from selenium.webdriver.support.ui import Select
select = Select(driver.find_element_by_name('name'))
select.select_by_index(index)
select.select_by_visible_text("text")
select.select_by_value(value)
select = Select(driver.find_element_by_id('id'))
select.deselect_all()
Extract after loading
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from pyvirtualdisplay import Display
def getElementValue(driver, search_type, css_class):
try:
if search_type == By.CLASS_NAME:
element = driver.find_element_by_class_name(css_class)
else:
element = driver.find_element_by_css_selector(css_class)
if element.text:
return element.text
else:
return None
except:
print "not found component <"+css_class+">"
finally:
pass
display = Display(visible=0, size=(1024, 768))
display.start()
driver = webdriver.Firefox()
driver.implicitly_wait(5)
driver.get('https://www.google.com/search?q=rowe+digital')
twitter = getElementValue(driver, By.CLASS_NAME, '_ksh') #_ksh
wikipedia = getElementValue(driver, By.CLASS_NAME, 'kno-ecr-pt')
sitelinks = getElementValue(driver, By.CSS_SELECTOR, 'table.nrgt')
print "== All OK=="
driver.close()
display.stop()
Extract after loading
```py from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException from pyvirtualdisplay import Display
def getElementValue(driver, search_type, css_class): try: element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((search_type, css_class)) ) # element = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.CLASS_NAME, cssclass)) ) if element.text: return element.text else: return None except TimeoutException: print “Selenium timeout for <”+css_class+”>” finally: pass
display = Display(visible=0, size=(1024, 768)) display.start()
driver = webdriver.Firefox() driver.get(‘https://www.google.com/search?q=rowe+digital’) twitter = getElementValue(driver, By.CLASS_NAME, ‘_ksh’) #_ksh wikipedia = getElementValue(driver, By.CLASS_NAME, ‘kno-ecr-pt’) sitelinks = getElementValue(driver, By.CSS_SELECTOR, ‘table.nrgt’)