added Hasaki crawler

This commit is contained in:
Shariar Imtiaz 2024-04-01 11:59:12 +04:00
parent 6ed8a649ae
commit 82a3d9d9b8
6 changed files with 31 additions and 34 deletions

View File

@ -20,6 +20,5 @@
"db_pass": "5qCif6eyY3Kmg4z",
"database": "analytics",
"db_host": "redshift-cluster-1.cdqj58hfx4p7.ap-southeast-1.redshift.amazonaws.com",
"db_port": "5439",
"log_loc": "/home/ubuntu/logs/hasaki_crawler.log"
"db_port": "5439"
}

View File

@ -9,18 +9,17 @@ from playwright.sync_api import sync_playwright
from hasaki_db_writer import hasaki_db_writer
from Util import translate_text_to_english
###### Looger ######
logging.basicConfig(filename="/home/ubuntu/logs/hasaki_crawler.log",
filemode='a',
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
datefmt="%Y-%m-%d %H:%M:%S",
level=logging.INFO)
class HasakiCategories:
def __init__(self, config):
###### Looger ######
logging.basicConfig(filename=config.get("log_loc"),
filemode='a',
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
datefmt="%Y-%m-%d %H:%M:%S",
level=logging.INFO)
logging.info("Initializing HasakiSubCategories")
self.master_category = []
self.config = config

View File

@ -7,15 +7,15 @@ from playwright.sync_api import sync_playwright
from hasaki_db_writer import hasaki_db_writer
from Util import translate_text_to_english
class HasakiCategoryProducts:
def __init__(self, config):
###### Looger ######
logging.basicConfig(filename=config.get("log_loc"),
logging.basicConfig(filename="/home/ubuntu/logs/hasaki_crawler.log",
filemode='a',
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
datefmt="%Y-%m-%d %H:%M:%S",
level=logging.INFO)
class HasakiCategoryProducts:
def __init__(self, config):
logging.info("Initializing HasakiCategoryProducts........")
self.config = config
self.crawler_name = self.config.get("crawler_name")

View File

@ -12,6 +12,12 @@ from email.message import EmailMessage
config = {}
###### Looger ######
logging.basicConfig(filename="/home/ubuntu/logs/hasaki_crawler.log",
filemode='a',
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
datefmt="%Y-%m-%d %H:%M:%S",
level=logging.INFO)
@ -171,13 +177,6 @@ if __name__ == "__main__":
logging.info("Config file loaded.......")
logging.info(config)
###### Looger ######
logging.basicConfig(filename=config.get("log_loc"),
filemode='a',
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
datefmt="%Y-%m-%d %H:%M:%S",
level=logging.INFO)
conn = sqlite3.connect('process_tracker.db')
conn.isolation_level = None

View File

@ -1,15 +1,15 @@
import logging
import psycopg2
class hasaki_db_writer:
def __init__(self, config):
###### Looger ######
logging.basicConfig(filename=config.get("log_loc"),
logging.basicConfig(filename="/home/ubuntu/logs/hasaki_crawler.log",
filemode='a',
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
datefmt="%Y-%m-%d %H:%M:%S",
level=logging.INFO)
class hasaki_db_writer:
def __init__(self, config):
self.config = config
self.conn = psycopg2.connect(database=self.config.get('database'), user=self.config.get('db_user'), password=self.config.get('db_pass'), host=self.config.get('db_host'), port=self.config.get('db_port'))
self.conn.autocommit = True

View File

@ -19,15 +19,15 @@ from webdriver_manager.chrome import ChromeDriverManager
import brotli
import json
class HasakiProductInfo:
def __init__(self, config):
###### Looger ######
logging.basicConfig(filename=config.get("log_loc"),
logging.basicConfig(filename="/home/ubuntu/logs/hasaki_crawler.log",
filemode='a',
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
datefmt="%Y-%m-%d %H:%M:%S",
level=logging.INFO)
class HasakiProductInfo:
def __init__(self, config):
logging.info("Initializing HasakiProductInfo")
self.pattern = r'[' + string.punctuation + ']'
self.config = config