FB Marketplace crawler

This commit is contained in:
Shariar Imtiaz 2024-02-02 10:08:37 +04:00
parent cccd42ae70
commit aaf84022e6
1 changed files with 5 additions and 1 deletions

View File

@ -25,7 +25,7 @@ class FbMarketplaceProductInfo:
op.add_argument('--user-data-dir=/home/ec2-user/fb-crawler-binary/chrome/fb_marketplace') op.add_argument('--user-data-dir=/home/ec2-user/fb-crawler-binary/chrome/fb_marketplace')
self.version_main = 120 self.version_main = 120
self.driver = Chrome(options=op, version_main=self.version_main) self.driver = Chrome(options=op, version_main=self.version_main)
#time.sleep(3000) time.sleep(3000)
self.cur.execute("select id from " + self.config.get('crawler_schema') + "." + self.config.get( self.cur.execute("select id from " + self.config.get('crawler_schema') + "." + self.config.get(
'source_tab') + " where source_name='fb_marketplace'") 'source_tab') + " where source_name='fb_marketplace'")
@ -126,6 +126,10 @@ class FbMarketplaceProductInfo:
pass pass
pass pass
if seller_name=="":
time.sleep(5)
return
try: try:
seller_link = self.driver.find_element(By.XPATH, "/html/body/div[1]/div/div[1]/div/div[3]/div/div/div[1]/div[1]/div[2]/div/div/div/div/div/div[1]/div[2]/div/div[2]/div/div[1]/div[1]/div[2]/div/div/div[2]/div[1]/div/div/div/div/div[2]/div/div/div/div/span/span/div/div/a").get_attribute("href") seller_link = self.driver.find_element(By.XPATH, "/html/body/div[1]/div/div[1]/div/div[3]/div/div/div[1]/div[1]/div[2]/div/div/div/div/div/div[1]/div[2]/div/div[2]/div/div[1]/div[1]/div[2]/div/div/div[2]/div[1]/div/div/div/div/div[2]/div/div/div/div/span/span/div/div/a").get_attribute("href")
except: except: