From 82a3d9d9b8bb919d171148db8e01b00774a85768 Mon Sep 17 00:00:00 2001 From: "shariar@raenabeauty.com" Date: Mon, 1 Apr 2024 11:59:12 +0400 Subject: [PATCH] added Hasaki crawler --- hasaki_crawler_engine/conf.json | 3 +-- hasaki_crawler_engine/hasaki_categories.py | 13 ++++++------- hasaki_crawler_engine/hasaki_category_products.py | 12 ++++++------ hasaki_crawler_engine/hasaki_crawler.py | 13 ++++++------- hasaki_crawler_engine/hasaki_db_writer.py | 12 ++++++------ hasaki_crawler_engine/hasaki_product_info.py | 12 ++++++------ 6 files changed, 31 insertions(+), 34 deletions(-) diff --git a/hasaki_crawler_engine/conf.json b/hasaki_crawler_engine/conf.json index e822066..bbceaf5 100755 --- a/hasaki_crawler_engine/conf.json +++ b/hasaki_crawler_engine/conf.json @@ -20,6 +20,5 @@ "db_pass": "5qCif6eyY3Kmg4z", "database": "analytics", "db_host": "redshift-cluster-1.cdqj58hfx4p7.ap-southeast-1.redshift.amazonaws.com", - "db_port": "5439", - "log_loc": "/home/ubuntu/logs/hasaki_crawler.log" + "db_port": "5439" } \ No newline at end of file diff --git a/hasaki_crawler_engine/hasaki_categories.py b/hasaki_crawler_engine/hasaki_categories.py index 1172906..669b73d 100644 --- a/hasaki_crawler_engine/hasaki_categories.py +++ b/hasaki_crawler_engine/hasaki_categories.py @@ -9,18 +9,17 @@ from playwright.sync_api import sync_playwright from hasaki_db_writer import hasaki_db_writer from Util import translate_text_to_english - +###### Looger ###### +logging.basicConfig(filename="/home/ubuntu/logs/hasaki_crawler.log", + filemode='a', + format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s', + datefmt="%Y-%m-%d %H:%M:%S", + level=logging.INFO) class HasakiCategories: def __init__(self, config): - ###### Looger ###### - logging.basicConfig(filename=config.get("log_loc"), - filemode='a', - format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s', - datefmt="%Y-%m-%d %H:%M:%S", - level=logging.INFO) logging.info("Initializing HasakiSubCategories") self.master_category = [] self.config = config diff --git a/hasaki_crawler_engine/hasaki_category_products.py b/hasaki_crawler_engine/hasaki_category_products.py index b7883aa..0be34d4 100644 --- a/hasaki_crawler_engine/hasaki_category_products.py +++ b/hasaki_crawler_engine/hasaki_category_products.py @@ -7,15 +7,15 @@ from playwright.sync_api import sync_playwright from hasaki_db_writer import hasaki_db_writer from Util import translate_text_to_english +###### Looger ###### +logging.basicConfig(filename="/home/ubuntu/logs/hasaki_crawler.log", + filemode='a', + format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s', + datefmt="%Y-%m-%d %H:%M:%S", + level=logging.INFO) class HasakiCategoryProducts: def __init__(self, config): - ###### Looger ###### - logging.basicConfig(filename=config.get("log_loc"), - filemode='a', - format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s', - datefmt="%Y-%m-%d %H:%M:%S", - level=logging.INFO) logging.info("Initializing HasakiCategoryProducts........") self.config = config self.crawler_name = self.config.get("crawler_name") diff --git a/hasaki_crawler_engine/hasaki_crawler.py b/hasaki_crawler_engine/hasaki_crawler.py index ecaf2fd..847836f 100644 --- a/hasaki_crawler_engine/hasaki_crawler.py +++ b/hasaki_crawler_engine/hasaki_crawler.py @@ -12,6 +12,12 @@ from email.message import EmailMessage config = {} +###### Looger ###### +logging.basicConfig(filename="/home/ubuntu/logs/hasaki_crawler.log", + filemode='a', + format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s', + datefmt="%Y-%m-%d %H:%M:%S", + level=logging.INFO) @@ -171,13 +177,6 @@ if __name__ == "__main__": logging.info("Config file loaded.......") logging.info(config) - ###### Looger ###### - logging.basicConfig(filename=config.get("log_loc"), - filemode='a', - format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s', - datefmt="%Y-%m-%d %H:%M:%S", - level=logging.INFO) - conn = sqlite3.connect('process_tracker.db') conn.isolation_level = None diff --git a/hasaki_crawler_engine/hasaki_db_writer.py b/hasaki_crawler_engine/hasaki_db_writer.py index 0d37d8e..ff37553 100755 --- a/hasaki_crawler_engine/hasaki_db_writer.py +++ b/hasaki_crawler_engine/hasaki_db_writer.py @@ -1,15 +1,15 @@ import logging import psycopg2 +###### Looger ###### +logging.basicConfig(filename="/home/ubuntu/logs/hasaki_crawler.log", + filemode='a', + format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s', + datefmt="%Y-%m-%d %H:%M:%S", + level=logging.INFO) class hasaki_db_writer: def __init__(self, config): - ###### Looger ###### - logging.basicConfig(filename=config.get("log_loc"), - filemode='a', - format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s', - datefmt="%Y-%m-%d %H:%M:%S", - level=logging.INFO) self.config = config self.conn = psycopg2.connect(database=self.config.get('database'), user=self.config.get('db_user'), password=self.config.get('db_pass'), host=self.config.get('db_host'), port=self.config.get('db_port')) self.conn.autocommit = True diff --git a/hasaki_crawler_engine/hasaki_product_info.py b/hasaki_crawler_engine/hasaki_product_info.py index 5abd1ef..d21bad9 100644 --- a/hasaki_crawler_engine/hasaki_product_info.py +++ b/hasaki_crawler_engine/hasaki_product_info.py @@ -19,15 +19,15 @@ from webdriver_manager.chrome import ChromeDriverManager import brotli import json +###### Looger ###### +logging.basicConfig(filename="/home/ubuntu/logs/hasaki_crawler.log", + filemode='a', + format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s', + datefmt="%Y-%m-%d %H:%M:%S", + level=logging.INFO) class HasakiProductInfo: def __init__(self, config): - ###### Looger ###### - logging.basicConfig(filename=config.get("log_loc"), - filemode='a', - format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s', - datefmt="%Y-%m-%d %H:%M:%S", - level=logging.INFO) logging.info("Initializing HasakiProductInfo") self.pattern = r'[' + string.punctuation + ']' self.config = config