added Hasaki crawler
This commit is contained in:
parent
6d84e37c27
commit
4ec62a41ed
|
@ -2,6 +2,7 @@ import logging
|
|||
import json
|
||||
import time
|
||||
import smtplib
|
||||
import sqlite3
|
||||
|
||||
from hasaki_categories import HasakiCategories
|
||||
from hasaki_category_products import HasakiCategoryProducts
|
||||
|
@ -15,19 +16,38 @@ logging.basicConfig(format=format, level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%
|
|||
config = {}
|
||||
|
||||
|
||||
def main():
|
||||
hasaki_categories = HasakiCategories(config)
|
||||
hasaki_categories.start_processing()
|
||||
def main(cur):
|
||||
|
||||
time.sleep(60)
|
||||
cur.execute(f"""select flag from process_tracker where process = 'category'""")
|
||||
cat_flags = cur.fetchone()
|
||||
if cat_flags[0]==0:
|
||||
hasaki_categories = HasakiCategories(config)
|
||||
hasaki_categories.start_processing()
|
||||
cur.execute(f"""update process_tracker set flag = 1 where process = 'category'""")
|
||||
|
||||
hasaki_category_products = HasakiCategoryProducts(config)
|
||||
hasaki_category_products.start_processing()
|
||||
#time.sleep(60)
|
||||
|
||||
cur.execute(f"""select flag from process_tracker where process = 'category_product'""")
|
||||
cat_pro_flags = cur.fetchone()
|
||||
if cat_pro_flags[0] == 0:
|
||||
hasaki_category_products = HasakiCategoryProducts(config)
|
||||
hasaki_category_products.start_processing()
|
||||
cur.execute(f"""update process_tracker set flag = 1 where process = 'category_product'""")
|
||||
|
||||
#time.sleep(60)
|
||||
|
||||
cur.execute(f"""select flag from process_tracker where process = 'product_info'""")
|
||||
prod_flag = cur.fetchone()
|
||||
if prod_flag[0] == 0:
|
||||
hasaki_products = HasakiProductInfo(config)
|
||||
hasaki_products.start_processing()
|
||||
cur.execute(f"""update process_tracker set flag = 1 where process = 'product_info'""")
|
||||
else:
|
||||
cur.execute(f"""update process_tracker set flag = 0 where process = 'category'""")
|
||||
cur.execute(f"""update process_tracker set flag = 0 where process = 'category_product'""")
|
||||
cur.execute(f"""update process_tracker set flag = 0 where process = 'product_info'""")
|
||||
|
||||
time.sleep(60)
|
||||
|
||||
hasaki_products = HasakiProductInfo(config)
|
||||
hasaki_products.start_processing()
|
||||
|
||||
|
||||
def send_mail(msg):
|
||||
|
@ -71,6 +91,33 @@ def send_mail(msg):
|
|||
except Exception as e:
|
||||
logging.info("Error while sending mail: {}".format(e))
|
||||
|
||||
def init_tracker_tab(cur):
|
||||
cur.execute(f"""CREATE TABLE IF NOT EXISTS process_tracker (
|
||||
process TEXT,
|
||||
flag int
|
||||
)""")
|
||||
|
||||
logging.info("++++++++++++++++++++++++++++++++++++++")
|
||||
cur.execute(f"""select * from process_tracker""")
|
||||
logging.info(cur.fetchall())
|
||||
|
||||
cur.execute(f"""select * from process_tracker where process = 'category'""")
|
||||
if cur.fetchone() is None:
|
||||
cur.execute(f"""insert into process_tracker (process, flag) values('category', 0)""")
|
||||
|
||||
cur.execute(f"""select * from process_tracker where process = 'category_product'""")
|
||||
if cur.fetchone() is None:
|
||||
cur.execute(f"""insert into process_tracker (process, flag) values('category_product', 0)""")
|
||||
|
||||
cur.execute(f"""select * from process_tracker where process = 'product_info'""")
|
||||
if cur.fetchone() is None:
|
||||
cur.execute(f"""insert into process_tracker (process, flag) values('product_info', 0)""")
|
||||
|
||||
logging.info("++++++++++++++++++++++++++++++++++++++")
|
||||
cur.execute(f"""select * from process_tracker""")
|
||||
logging.info(cur.fetchall())
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.info("Starting Hasaki Crawler.......")
|
||||
|
@ -79,13 +126,30 @@ if __name__ == "__main__":
|
|||
with open("conf.json", "r") as jsonfile:
|
||||
config = json.load(jsonfile)
|
||||
logging.info("Config file loaded.......")
|
||||
print(config)
|
||||
logging.info(config)
|
||||
|
||||
main()
|
||||
send_mail("Hasaki crawler run complete.")
|
||||
conn = sqlite3.connect('process_tracker.db')
|
||||
conn.isolation_level = None
|
||||
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute(f"""update process_tracker set flag = 1 where process = 'category'""")
|
||||
cur.execute(f"""update process_tracker set flag = 1 where process = 'category_product'""")
|
||||
cur.execute(f"""update process_tracker set flag = 0 where process = 'product_info'""")
|
||||
|
||||
init_tracker_tab(cur)
|
||||
|
||||
|
||||
|
||||
main(cur)
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
#send_mail("Hasaki crawler run complete.")
|
||||
|
||||
except Exception as e:
|
||||
logging.info("Error: ".format(e))
|
||||
logging.info("Cannot load config file. Please check. Exiting......")
|
||||
send_mail("Error occurred. Please check Hasaki Pipeline.")
|
||||
logging.info("Error occurred. Please check config file or the internal SQLLITE DB. Exiting......")
|
||||
#send_mail("Error occurred. Please check config file or the internal SQLLITE DB.")
|
||||
exit(1)
|
||||
|
|
Binary file not shown.
Loading…
Reference in New Issue