From 1e7dcaa894665cc52f4e0eed915f4b3d2a02654c Mon Sep 17 00:00:00 2001 From: "shariar@raenabeauty.com" Date: Mon, 1 Apr 2024 11:59:47 +0400 Subject: [PATCH] added Hasaki crawler --- hasaki_crawler_engine/hasaki_categories.py | 39 ++++++++++++---------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/hasaki_crawler_engine/hasaki_categories.py b/hasaki_crawler_engine/hasaki_categories.py index 669b73d..d5e4ed2 100644 --- a/hasaki_crawler_engine/hasaki_categories.py +++ b/hasaki_crawler_engine/hasaki_categories.py @@ -20,27 +20,30 @@ logging.basicConfig(filename="/home/ubuntu/logs/hasaki_crawler.log", class HasakiCategories: def __init__(self, config): - logging.info("Initializing HasakiSubCategories") - self.master_category = [] - self.config = config - self.crawler_name = self.config.get("crawler_name") - self.product_limit = int(self.config.get("product_per_category")) - self.conn = psycopg2.connect(database=self.config.get('database'), user=self.config.get('db_user'), - password=self.config.get('db_pass'), host=self.config.get('db_host'), - port=self.config.get('db_port')) - self.conn.autocommit = True - self.cur = self.conn.cursor() - self.cur.execute(f"""select id from {self.config.get('crawler_schema')}.{self.config.get('source_tab')} where source_name='Hasaki'""") try: - self.rce_source_id = self.cur.fetchone()[0] - except: - logging.info("Source tab is empty. Please check. Exiting.....") - exit(1) + logging.info("Initializing HasakiSubCategories") + self.master_category = [] + self.config = config + self.crawler_name = self.config.get("crawler_name") + self.product_limit = int(self.config.get("product_per_category")) + self.conn = psycopg2.connect(database=self.config.get('database'), user=self.config.get('db_user'), + password=self.config.get('db_pass'), host=self.config.get('db_host'), + port=self.config.get('db_port')) + self.conn.autocommit = True + self.cur = self.conn.cursor() + self.cur.execute(f"""select id from {self.config.get('crawler_schema')}.{self.config.get('source_tab')} where source_name='Hasaki'""") + try: + self.rce_source_id = self.cur.fetchone()[0] + except: + logging.info("Source tab is empty. Please check. Exiting.....") + exit(1) - self.db_writer = hasaki_db_writer(config) + self.db_writer = hasaki_db_writer(config) - self.display = Display(visible=0, size=(800, 600)) - self.display.start() + self.display = Display(visible=0, size=(800, 600)) + self.display.start() + except Exception as e: + logging.info(e) def __del__(self): print("Closing connection.....")