versiones guia español actualizar python selenium xpath selenium-webdriver web-scraping

python - guia - qgis español



Hacer clic en elementos que no están visibles(donde hay un control deslizante) (2)

No se puede hacer clic en un elemento cuando no está visible en Selenium. Deberá usar los botones de desplazamiento para hacer clic en todos los elementos.

¿Hay alguna manera de hacer clic en los elementos fuera de la línea de visión en el selenio? Intento hacer clic en todos los elementos del control deslizante para poder raspar cada página. Sin embargo, el trabajo me da un error con:

Traceback (most recent call last): File "C:/Users/Bain3/PycharmProjects/untitled4/TOPBETTA.py", line 1329, in <module> clickMe = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, ''("//*[@class="name"]//span")[%s]'' % str(index + 1)))) File "C:/Users/Bain3/Anaconda3/lib/site-packages/selenium/webdriver/support/wait.py", line 80, in until raise TimeoutException(message, screen, stacktrace) selenium.common.exceptions.TimeoutException: Message:

El Href parece ser la página predeterminada como https://www.topbetta.com.au/sports/ como se ve en la imagen, por lo que no es posible extraer el Href y navegar por cada página individualmente.

Pude borrar esto con Winautomation. ¿Alguna idea sobre cómo puedo hacer esto con Selenium?

driver.execute_script(''document.getElementByxpath("//[@class="name"]//span").style.visibility = "visible";'')

El código anterior lamentablemente no ayudó y dio:

Traceback (most recent call last): File "C:/Users/Bain3/PycharmProjects/untitled4/TOPBETTA.py", line 1329, in <module> driver.execute_script(''document.getElementByxpath("//*[@class="name"]//span").style.visibility = "visible";'') File "C:/Users/Bain3/Anaconda3/lib/site-packages/selenium/webdriver/remote/webdriver.py", line 532, in execute_script ''args'': converted_args})[''value''] File "C:/Users/Bain3/Anaconda3/lib/site-packages/selenium/webdriver/remote/webdriver.py", line 297, in execute self.error_handler.check_response(response) File "C:/Users/Bain3/Anaconda3/lib/site-packages/selenium/webdriver/remote/errorhandler.py", line 194, in check_response raise exception_class(message, screen, stacktrace) selenium.common.exceptions.WebDriverException: Message: unknown error: Runtime.evaluate threw exception: SyntaxError: missing ) after argument list (Session info: chrome=61.0.3163.100) (Driver info: chromedriver=2.31.488763 (092de99f48a300323ecf8c2a4e2e7cab51de5ba8),platform=Windows NT 10.0.16299 x86_64)

Code: try: os.remove(''vtg121.csv'') except OSError: pass driver.get(''https://www.topbetta.com.au/sports/football/'') #SCROLL_PAUSE_TIME = 0.5 from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC #clickMe = wait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, (''//*[@id="TopPromotionBetNow"]'')))) #if driver.find_element_by_css_selector(''#TopPromotionBetNow''): #driver.find_element_by_css_selector(''#TopPromotionBetNow'').click() #last_height = driver.execute_script("return document.body.scrollHeight") #while True: #driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") #time.sleep(SCROLL_PAUSE_TIME) #new_height = driver.execute_script("return document.body.scrollHeight") #if new_height == last_height: #break #last_height = new_height time.sleep(1) #clickMe = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, (''//div[text()="Soccer"][contains(@class, "wn-Classification")]'')))) #clickMe.click() #time.sleep(0) options = driver.find_elements_by_xpath(''//*[@class="name"]//span'') indexes = [index for index in range(len(options))] shuffle(indexes) for index in indexes: time.sleep(0) clickMe = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, ''("//*[@class="name"]//span")[%s]'' % str(index + 1)))) clickMe.click() time.sleep(0) # Team clickMe = wait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,("#js_body-region > div > div:nth-child(1) > div.sports-body-region > div.js_event-list-region > div > div.js_events-container.events-container > div > div.js_event-region.event-region > div > a > div > div.team-container.home > div")))) langs3 = driver.find_elements_by_css_selector("#js_body-region > div > div:nth-child(1) > div.sports-body-region > div.js_event-list-region > div > div.js_events-container.events-container > div > div.js_event-region.event-region > div > a > div > div.team-container.home > div") langs3_text = [] for lang in langs3: #print(lang.text) langs3_text.append(lang.text) time.sleep(0) # Team ODDS langs = driver.find_elements_by_css_selector(" #js_body-region > div > div:nth-child(1) > div.sports-body-region > div.js_event-list-region > div > div.js_events-container.events-container > div > div.js_event-region.event-region > div > div > div > div.js_teams-container.market-items > div.head-to-head-item.home > div > div > button.js_price-button.price") langs_text = [] for lang in langs: #print(lang.text) langs_text.append(lang.text) time.sleep(0) # Draw odds #langs1 = driver.find_elements_by_xpath("//ul[@class=''runners'']//li[2]") langs1 = driver.find_elements_by_css_selector("#js_body-region > div > div:nth-child(1) > div.sports-body-region > div.js_event-list-region > div > div.js_events-container.events-container > div > div.js_event-region.event-region > div > a > div > div.team-container.away > div") langs1_text = [] for lang in langs1: #print(lang.text) langs1_text.append(lang.text) time.sleep(0) # HREF #langs2 = driver.find_elements_by_xpath("//ul[@class=''runners'']//li[1]") url1 = driver.current_url print(("NEW LINE BREAK")) import sys import io with open(''vtg121.csv'', ''a'', newline='''', encoding="utf-8") as outfile: writer = csv.writer(outfile) for row in zip(langs_text, langs1_text, langs3_text): writer.writerow(row + (url1,)) print(row + (url1,))


Pruebe la siguiente solución para raspar las páginas requeridas:

url = "https://www.topbetta.com.au/sports/football/" driver.get(url) counter = 0 for link in range(len(wait(driver, 15).until(EC.presence_of_all_elements_located((By.XPATH, ''//a[@href="/sports" and ./div[@class="name"]]''))))): wait(driver, 15).until_not(EC.visibility_of_element_located((By.CLASS_NAME, "mask"))) link = wait(driver, 15).until(EC.presence_of_all_elements_located((By.XPATH, ''//a[@href="/sports" and ./div[@class="name"]]'')))[counter] link.location_once_scrolled_into_view link = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, ''(//a[@href="/sports" and ./div[@class="name"]])[%s]'' % str(counter + 1)))) wait(driver, 15).until_not(EC.visibility_of_element_located((By.CLASS_NAME, "mask"))) link.click() print(driver.current_url) wait(driver, 10).until(EC.staleness_of(driver.find_element(By.XPATH, ''//div[@class="competition-events-module"]''))) counter += 1 driver.get(url)

Simplemente reemplace print(driver.current_url) con el código que desea aplicar a cada página