added Hasaki crawler
This commit is contained in:
parent
959fd9a03e
commit
6ed8a649ae
|
@ -21,6 +21,5 @@
|
|||
"database": "analytics",
|
||||
"db_host": "redshift-cluster-1.cdqj58hfx4p7.ap-southeast-1.redshift.amazonaws.com",
|
||||
"db_port": "5439",
|
||||
"crawler_main": "1",
|
||||
"crawler_slave_no": ""
|
||||
"log_loc": "/home/ubuntu/logs/hasaki_crawler.log"
|
||||
}
|
|
@ -10,21 +10,17 @@ from hasaki_db_writer import hasaki_db_writer
|
|||
from Util import translate_text_to_english
|
||||
|
||||
|
||||
###### Looger ######
|
||||
logname = '/home/ubuntu/logs/hasaki_crawler.log'
|
||||
#logname = 'hasaki_crawler.log'
|
||||
logging.basicConfig(filename=logname,
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class HasakiCategories:
|
||||
def __init__(self, config):
|
||||
###### Looger ######
|
||||
logging.basicConfig(filename=config.get("log_loc"),
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
logging.info("Initializing HasakiSubCategories")
|
||||
self.master_category = []
|
||||
self.config = config
|
||||
|
|
|
@ -7,17 +7,15 @@ from playwright.sync_api import sync_playwright
|
|||
from hasaki_db_writer import hasaki_db_writer
|
||||
from Util import translate_text_to_english
|
||||
|
||||
###### Looger ######
|
||||
logname = '/home/ubuntu/logs/hasaki_crawler.log'
|
||||
#logname = 'hasaki_crawler.log'
|
||||
logging.basicConfig(filename=logname,
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
|
||||
class HasakiCategoryProducts:
|
||||
def __init__(self, config):
|
||||
###### Looger ######
|
||||
logging.basicConfig(filename=config.get("log_loc"),
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
logging.info("Initializing HasakiCategoryProducts........")
|
||||
self.config = config
|
||||
self.crawler_name = self.config.get("crawler_name")
|
||||
|
|
|
@ -13,16 +13,6 @@ from email.message import EmailMessage
|
|||
config = {}
|
||||
|
||||
|
||||
###### Looger ######
|
||||
logname = '/home/ubuntu/logs/hasaki_crawler.log'
|
||||
#logname = 'hasaki_crawler.log'
|
||||
logging.basicConfig(filename=logname,
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
|
||||
|
||||
|
||||
|
||||
def main(cur):
|
||||
|
@ -181,6 +171,13 @@ if __name__ == "__main__":
|
|||
logging.info("Config file loaded.......")
|
||||
logging.info(config)
|
||||
|
||||
###### Looger ######
|
||||
logging.basicConfig(filename=config.get("log_loc"),
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
|
||||
conn = sqlite3.connect('process_tracker.db')
|
||||
conn.isolation_level = None
|
||||
|
||||
|
|
|
@ -1,17 +1,15 @@
|
|||
import logging
|
||||
import psycopg2
|
||||
|
||||
###### Looger ######
|
||||
logname = '/home/ubuntu/logs/hasaki_crawler.log'
|
||||
#logname = 'hasaki_crawler.log'
|
||||
logging.basicConfig(filename=logname,
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
|
||||
class hasaki_db_writer:
|
||||
def __init__(self, config):
|
||||
###### Looger ######
|
||||
logging.basicConfig(filename=config.get("log_loc"),
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
self.config = config
|
||||
self.conn = psycopg2.connect(database=self.config.get('database'), user=self.config.get('db_user'), password=self.config.get('db_pass'), host=self.config.get('db_host'), port=self.config.get('db_port'))
|
||||
self.conn.autocommit = True
|
||||
|
|
|
@ -19,17 +19,15 @@ from webdriver_manager.chrome import ChromeDriverManager
|
|||
import brotli
|
||||
import json
|
||||
|
||||
###### Looger ######
|
||||
logname = '/home/ubuntu/logs/hasaki_crawler.log'
|
||||
#logname = 'hasaki_crawler.log'
|
||||
logging.basicConfig(filename=logname,
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
|
||||
class HasakiProductInfo:
|
||||
def __init__(self, config):
|
||||
###### Looger ######
|
||||
logging.basicConfig(filename=config.get("log_loc"),
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
level=logging.INFO)
|
||||
logging.info("Initializing HasakiProductInfo")
|
||||
self.pattern = r'[' + string.punctuation + ']'
|
||||
self.config = config
|
||||
|
|
Loading…
Reference in New Issue