added Hasaki crawler
This commit is contained in:
parent
54d49ea78f
commit
d239129739
|
@ -3,11 +3,22 @@ import logging
|
|||
import time
|
||||
import psycopg2
|
||||
import pandas as pd
|
||||
from pyvirtualdisplay import Display
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
from hasaki_db_writer import hasaki_db_writer
|
||||
from Util import translate_text_to_english
|
||||
|
||||
###### Looger ######
|
||||
logname = '/home/ubuntu/logs/hasaki_crawler.log'
|
||||
logging.basicConfig(filename=logname,
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class HasakiCategories:
|
||||
|
@ -31,9 +42,13 @@ class HasakiCategories:
|
|||
|
||||
self.db_writer = hasaki_db_writer(config)
|
||||
|
||||
self.display = Display(visible=0, size=(800, 600))
|
||||
self.display.start()
|
||||
|
||||
def __del__(self):
|
||||
print("Closing connection.....")
|
||||
self.conn.close()
|
||||
self.display.stop()
|
||||
|
||||
def start_processing(self):
|
||||
|
||||
|
|
|
@ -2,9 +2,19 @@ import logging
|
|||
import random
|
||||
import time
|
||||
import psycopg2
|
||||
from pyvirtualdisplay import Display
|
||||
from playwright.sync_api import sync_playwright
|
||||
from hasaki_db_writer import hasaki_db_writer
|
||||
from Util import translate_text_to_english
|
||||
|
||||
###### Looger ######
|
||||
logname = '/home/ubuntu/logs/hasaki_crawler.log'
|
||||
logging.basicConfig(filename=logname,
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
|
||||
class HasakiCategoryProducts:
|
||||
def __init__(self, config):
|
||||
logging.info("Initializing HasakiCategoryProducts........")
|
||||
|
@ -26,9 +36,13 @@ class HasakiCategoryProducts:
|
|||
|
||||
self.db_writer = hasaki_db_writer(config)
|
||||
|
||||
self.display = Display(visible=0, size=(800, 600))
|
||||
self.display.start()
|
||||
|
||||
def __del__(self):
|
||||
print("Closing connection.....")
|
||||
self.conn.close()
|
||||
self.display.stop()
|
||||
|
||||
def start_processing(self):
|
||||
|
||||
|
|
|
@ -10,9 +10,13 @@ from hasaki_category_products import HasakiCategoryProducts
|
|||
from hasaki_product_info import HasakiProductInfo
|
||||
from email.message import EmailMessage
|
||||
|
||||
##### Looger ######
|
||||
format = "%(asctime)s: %(message)s"
|
||||
logging.basicConfig(format=format, level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S")
|
||||
###### Looger ######
|
||||
logname = '/home/ubuntu/logs/hasaki_crawler.log'
|
||||
logging.basicConfig(filename=logname,
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
|
||||
config = {}
|
||||
|
||||
|
@ -26,6 +30,8 @@ def main(cur):
|
|||
hasaki_categories.start_processing()
|
||||
cur.execute(f"""update process_tracker set flag = 1 where process = 'category'""")
|
||||
|
||||
logging.info("Category collection completed........ Moving to collecting products ;ist of the categories.......")
|
||||
|
||||
#time.sleep(60)
|
||||
|
||||
cur.execute(f"""select flag from process_tracker where process = 'category_product'""")
|
||||
|
@ -35,6 +41,8 @@ def main(cur):
|
|||
hasaki_category_products.start_processing()
|
||||
cur.execute(f"""update process_tracker set flag = 1 where process = 'category_product'""")
|
||||
|
||||
logging.info("Category products collection completed........ Moving to collecting product info.......")
|
||||
|
||||
#time.sleep(60)
|
||||
|
||||
cur.execute(f"""select flag from process_tracker where process = 'product_info'""")
|
||||
|
@ -48,6 +56,8 @@ def main(cur):
|
|||
cur.execute(f"""update process_tracker set flag = 0 where process = 'category_product'""")
|
||||
cur.execute(f"""update process_tracker set flag = 0 where process = 'product_info'""")
|
||||
|
||||
logging.info("Product info collection done. Stopping........")
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -2,8 +2,12 @@ import logging
|
|||
import psycopg2
|
||||
|
||||
###### Looger ######
|
||||
format = "%(asctime)s: %(message)s"
|
||||
logging.basicConfig(format=format, level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S")
|
||||
logname = '/home/ubuntu/logs/hasaki_crawler.log'
|
||||
logging.basicConfig(filename=logname,
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
|
||||
class hasaki_db_writer:
|
||||
def __init__(self, config):
|
||||
|
|
|
@ -12,12 +12,21 @@ from Util import translate_text_to_english
|
|||
from fake_useragent import UserAgent
|
||||
import time
|
||||
import random
|
||||
from pyvirtualdisplay import Display
|
||||
from seleniumwire import webdriver
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
import brotli
|
||||
import json
|
||||
|
||||
###### Looger ######
|
||||
logname = '/home/ubuntu/logs/hasaki_crawler.log'
|
||||
logging.basicConfig(filename=logname,
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
|
||||
class HasakiProductInfo:
|
||||
def __init__(self, config):
|
||||
logging.info("Initializing HasakiProductInfo")
|
||||
|
@ -40,9 +49,13 @@ class HasakiProductInfo:
|
|||
|
||||
self.db_writer = hasaki_db_writer(config)
|
||||
|
||||
self.display = Display(visible=0, size=(800, 600))
|
||||
self.display.start()
|
||||
|
||||
def __del__(self):
|
||||
print("Closing connection.....")
|
||||
self.conn.close()
|
||||
self.display.stop()
|
||||
|
||||
def start_processing(self):
|
||||
logging.info("Starting to collect product info from Hasaki........")
|
||||
|
|
Loading…
Reference in New Issue