added Hasaki crawler
This commit is contained in:
parent
6ed8a649ae
commit
82a3d9d9b8
|
@ -20,6 +20,5 @@
|
|||
"db_pass": "5qCif6eyY3Kmg4z",
|
||||
"database": "analytics",
|
||||
"db_host": "redshift-cluster-1.cdqj58hfx4p7.ap-southeast-1.redshift.amazonaws.com",
|
||||
"db_port": "5439",
|
||||
"log_loc": "/home/ubuntu/logs/hasaki_crawler.log"
|
||||
"db_port": "5439"
|
||||
}
|
|
@ -9,18 +9,17 @@ from playwright.sync_api import sync_playwright
|
|||
from hasaki_db_writer import hasaki_db_writer
|
||||
from Util import translate_text_to_english
|
||||
|
||||
|
||||
###### Looger ######
|
||||
logging.basicConfig(filename="/home/ubuntu/logs/hasaki_crawler.log",
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
|
||||
|
||||
|
||||
class HasakiCategories:
|
||||
def __init__(self, config):
|
||||
###### Looger ######
|
||||
logging.basicConfig(filename=config.get("log_loc"),
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
logging.info("Initializing HasakiSubCategories")
|
||||
self.master_category = []
|
||||
self.config = config
|
||||
|
|
|
@ -7,15 +7,15 @@ from playwright.sync_api import sync_playwright
|
|||
from hasaki_db_writer import hasaki_db_writer
|
||||
from Util import translate_text_to_english
|
||||
|
||||
|
||||
class HasakiCategoryProducts:
|
||||
def __init__(self, config):
|
||||
###### Looger ######
|
||||
logging.basicConfig(filename=config.get("log_loc"),
|
||||
logging.basicConfig(filename="/home/ubuntu/logs/hasaki_crawler.log",
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
|
||||
class HasakiCategoryProducts:
|
||||
def __init__(self, config):
|
||||
logging.info("Initializing HasakiCategoryProducts........")
|
||||
self.config = config
|
||||
self.crawler_name = self.config.get("crawler_name")
|
||||
|
|
|
@ -12,6 +12,12 @@ from email.message import EmailMessage
|
|||
|
||||
config = {}
|
||||
|
||||
###### Looger ######
|
||||
logging.basicConfig(filename="/home/ubuntu/logs/hasaki_crawler.log",
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
|
||||
|
||||
|
||||
|
@ -171,13 +177,6 @@ if __name__ == "__main__":
|
|||
logging.info("Config file loaded.......")
|
||||
logging.info(config)
|
||||
|
||||
###### Looger ######
|
||||
logging.basicConfig(filename=config.get("log_loc"),
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
|
||||
conn = sqlite3.connect('process_tracker.db')
|
||||
conn.isolation_level = None
|
||||
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
import logging
|
||||
import psycopg2
|
||||
|
||||
|
||||
class hasaki_db_writer:
|
||||
def __init__(self, config):
|
||||
###### Looger ######
|
||||
logging.basicConfig(filename=config.get("log_loc"),
|
||||
logging.basicConfig(filename="/home/ubuntu/logs/hasaki_crawler.log",
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
|
||||
class hasaki_db_writer:
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
self.conn = psycopg2.connect(database=self.config.get('database'), user=self.config.get('db_user'), password=self.config.get('db_pass'), host=self.config.get('db_host'), port=self.config.get('db_port'))
|
||||
self.conn.autocommit = True
|
||||
|
|
|
@ -19,15 +19,15 @@ from webdriver_manager.chrome import ChromeDriverManager
|
|||
import brotli
|
||||
import json
|
||||
|
||||
|
||||
class HasakiProductInfo:
|
||||
def __init__(self, config):
|
||||
###### Looger ######
|
||||
logging.basicConfig(filename=config.get("log_loc"),
|
||||
logging.basicConfig(filename="/home/ubuntu/logs/hasaki_crawler.log",
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
|
||||
class HasakiProductInfo:
|
||||
def __init__(self, config):
|
||||
logging.info("Initializing HasakiProductInfo")
|
||||
self.pattern = r'[' + string.punctuation + ']'
|
||||
self.config = config
|
||||
|
|
Loading…
Reference in New Issue