raena-crawler-engine/amazon_crawler_engine/test1.py

83 lines
2.3 KiB
Python
Raw Normal View History

2024-01-24 13:05:07 +00:00
import hashlib
import logging
import sys
import string
import undetected_chromedriver as webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
import psycopg2
import bs4
from webdriver_manager.chrome import ChromeDriverManager
import random
from bs4 import BeautifulSoup
import json
import time
import gzip
import re
import random
from amazon_db_writer import amazon_db_writer
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
def reseller_info(store_url):
op = webdriver.ChromeOptions()
op.add_argument('--no-sandbox')
op.add_argument('--disable-notifications')
op.add_argument("--lang=en-GB")
#op.headless = True
driver=webdriver.Chrome( options=op)
driver.get(store_url)
driver.implicitly_wait(5)
try:
driver.get(store_url)
driver.implicitly_wait(5)
##### reseller info
avg_rating = driver.find_element(By.CSS_SELECTOR,'#effective-timeperiod-rating-year-description.ratings-reviews').text
print(avg_rating)
except Exception as e:
print(e)
config = {
"crawler_name": "raena_crawler_enginer_amazon",
"crawler_schema": "raena_spider_management",
"category_tab": "rce_category",
"tracker_tab": "crawler_tracker",
"product_tab": "rce_product",
"variant_tab": "rce_product_variant",
"brand_tab": "rce_brand",
"reseller_tab": "rce_reseller",
"reseller_store_tab": "rce_reseller_store",
"review_tab": "rce_ratings_reviews",
"review_productmodels_tab": "rce_ratings_reviews_productmodels",
"review_producttags_tab": "rce_ratings_reviews_producttags",
"review_tags": "rce_tags",
"source_tab": "rce_source",
"product_per_category": "1000",
"source_category": "11043145",
"db_user": "postgres",
"db_pass": "postgres",
"database": "postgres",
"db_host": "localhost",
"db_port": "5444",
"crawler_main": "1",
"crawler_slave_no": ""
}
conn = psycopg2.connect(database=config.get('database'), user=config.get('db_user'), password=config.get('db_pass'), host=config.get('db_host'), port=config.get('db_port'))
conn.autocommit = True
cur = conn.cursor()
db_writer = amazon_db_writer(config)
reseller_info('https://www.amazon.ae/sp?ie=UTF8&seller=A3TFGX22P341AN&isAmazonFulfilled=0&asin=B09BR31PF9&ref_=olp_merch_name_1')