83 lines
2.3 KiB
Python
83 lines
2.3 KiB
Python
|
import hashlib
|
||
|
import logging
|
||
|
import sys
|
||
|
import string
|
||
|
import undetected_chromedriver as webdriver
|
||
|
from selenium.webdriver.common.by import By
|
||
|
from selenium.webdriver.chrome.service import Service
|
||
|
import psycopg2
|
||
|
import bs4
|
||
|
from webdriver_manager.chrome import ChromeDriverManager
|
||
|
import random
|
||
|
from bs4 import BeautifulSoup
|
||
|
import json
|
||
|
import time
|
||
|
import gzip
|
||
|
import re
|
||
|
import random
|
||
|
from amazon_db_writer import amazon_db_writer
|
||
|
|
||
|
import ssl
|
||
|
ssl._create_default_https_context = ssl._create_unverified_context
|
||
|
|
||
|
|
||
|
def reseller_info(store_url):
|
||
|
|
||
|
op = webdriver.ChromeOptions()
|
||
|
op.add_argument('--no-sandbox')
|
||
|
op.add_argument('--disable-notifications')
|
||
|
op.add_argument("--lang=en-GB")
|
||
|
#op.headless = True
|
||
|
driver=webdriver.Chrome( options=op)
|
||
|
|
||
|
driver.get(store_url)
|
||
|
|
||
|
driver.implicitly_wait(5)
|
||
|
|
||
|
try:
|
||
|
driver.get(store_url)
|
||
|
driver.implicitly_wait(5)
|
||
|
|
||
|
##### reseller info
|
||
|
|
||
|
avg_rating = driver.find_element(By.CSS_SELECTOR,'#effective-timeperiod-rating-year-description.ratings-reviews').text
|
||
|
|
||
|
print(avg_rating)
|
||
|
|
||
|
|
||
|
|
||
|
except Exception as e:
|
||
|
print(e)
|
||
|
|
||
|
config = {
|
||
|
"crawler_name": "raena_crawler_enginer_amazon",
|
||
|
"crawler_schema": "raena_spider_management",
|
||
|
"category_tab": "rce_category",
|
||
|
"tracker_tab": "crawler_tracker",
|
||
|
"product_tab": "rce_product",
|
||
|
"variant_tab": "rce_product_variant",
|
||
|
"brand_tab": "rce_brand",
|
||
|
"reseller_tab": "rce_reseller",
|
||
|
"reseller_store_tab": "rce_reseller_store",
|
||
|
"review_tab": "rce_ratings_reviews",
|
||
|
"review_productmodels_tab": "rce_ratings_reviews_productmodels",
|
||
|
"review_producttags_tab": "rce_ratings_reviews_producttags",
|
||
|
"review_tags": "rce_tags",
|
||
|
"source_tab": "rce_source",
|
||
|
"product_per_category": "1000",
|
||
|
"source_category": "11043145",
|
||
|
"db_user": "postgres",
|
||
|
"db_pass": "postgres",
|
||
|
"database": "postgres",
|
||
|
"db_host": "localhost",
|
||
|
"db_port": "5444",
|
||
|
"crawler_main": "1",
|
||
|
"crawler_slave_no": ""
|
||
|
}
|
||
|
conn = psycopg2.connect(database=config.get('database'), user=config.get('db_user'), password=config.get('db_pass'), host=config.get('db_host'), port=config.get('db_port'))
|
||
|
conn.autocommit = True
|
||
|
cur = conn.cursor()
|
||
|
db_writer = amazon_db_writer(config)
|
||
|
|
||
|
|
||
|
reseller_info('https://www.amazon.ae/sp?ie=UTF8&seller=A3TFGX22P341AN&isAmazonFulfilled=0&asin=B09BR31PF9&ref_=olp_merch_name_1')
|