diff --git a/hasaki_crawler_engine/hasaki_crawler.py b/hasaki_crawler_engine/hasaki_crawler.py index 8fa98c3..a6822cb 100644 --- a/hasaki_crawler_engine/hasaki_crawler.py +++ b/hasaki_crawler_engine/hasaki_crawler.py @@ -2,6 +2,7 @@ import logging import json import time import smtplib +import sqlite3 from hasaki_categories import HasakiCategories from hasaki_category_products import HasakiCategoryProducts @@ -15,19 +16,38 @@ logging.basicConfig(format=format, level=logging.INFO, datefmt="%Y-%m-%d %H:%M:% config = {} -def main(): - hasaki_categories = HasakiCategories(config) - hasaki_categories.start_processing() +def main(cur): - time.sleep(60) + cur.execute(f"""select flag from process_tracker where process = 'category'""") + cat_flags = cur.fetchone() + if cat_flags[0]==0: + hasaki_categories = HasakiCategories(config) + hasaki_categories.start_processing() + cur.execute(f"""update process_tracker set flag = 1 where process = 'category'""") - hasaki_category_products = HasakiCategoryProducts(config) - hasaki_category_products.start_processing() + #time.sleep(60) + + cur.execute(f"""select flag from process_tracker where process = 'category_product'""") + cat_pro_flags = cur.fetchone() + if cat_pro_flags[0] == 0: + hasaki_category_products = HasakiCategoryProducts(config) + hasaki_category_products.start_processing() + cur.execute(f"""update process_tracker set flag = 1 where process = 'category_product'""") + + #time.sleep(60) + + cur.execute(f"""select flag from process_tracker where process = 'product_info'""") + prod_flag = cur.fetchone() + if prod_flag[0] == 0: + hasaki_products = HasakiProductInfo(config) + hasaki_products.start_processing() + cur.execute(f"""update process_tracker set flag = 1 where process = 'product_info'""") + else: + cur.execute(f"""update process_tracker set flag = 0 where process = 'category'""") + cur.execute(f"""update process_tracker set flag = 0 where process = 'category_product'""") + cur.execute(f"""update process_tracker set flag = 0 where process = 'product_info'""") - time.sleep(60) - hasaki_products = HasakiProductInfo(config) - hasaki_products.start_processing() def send_mail(msg): @@ -71,6 +91,33 @@ def send_mail(msg): except Exception as e: logging.info("Error while sending mail: {}".format(e)) +def init_tracker_tab(cur): + cur.execute(f"""CREATE TABLE IF NOT EXISTS process_tracker ( + process TEXT, + flag int + )""") + + logging.info("++++++++++++++++++++++++++++++++++++++") + cur.execute(f"""select * from process_tracker""") + logging.info(cur.fetchall()) + + cur.execute(f"""select * from process_tracker where process = 'category'""") + if cur.fetchone() is None: + cur.execute(f"""insert into process_tracker (process, flag) values('category', 0)""") + + cur.execute(f"""select * from process_tracker where process = 'category_product'""") + if cur.fetchone() is None: + cur.execute(f"""insert into process_tracker (process, flag) values('category_product', 0)""") + + cur.execute(f"""select * from process_tracker where process = 'product_info'""") + if cur.fetchone() is None: + cur.execute(f"""insert into process_tracker (process, flag) values('product_info', 0)""") + + logging.info("++++++++++++++++++++++++++++++++++++++") + cur.execute(f"""select * from process_tracker""") + logging.info(cur.fetchall()) + + if __name__ == "__main__": logging.info("Starting Hasaki Crawler.......") @@ -79,13 +126,30 @@ if __name__ == "__main__": with open("conf.json", "r") as jsonfile: config = json.load(jsonfile) logging.info("Config file loaded.......") - print(config) + logging.info(config) - main() - send_mail("Hasaki crawler run complete.") + conn = sqlite3.connect('process_tracker.db') + conn.isolation_level = None + + cur = conn.cursor() + + cur.execute(f"""update process_tracker set flag = 1 where process = 'category'""") + cur.execute(f"""update process_tracker set flag = 1 where process = 'category_product'""") + cur.execute(f"""update process_tracker set flag = 0 where process = 'product_info'""") + + init_tracker_tab(cur) + + + + main(cur) + + cur.close() + conn.close() + + #send_mail("Hasaki crawler run complete.") except Exception as e: logging.info("Error: ".format(e)) - logging.info("Cannot load config file. Please check. Exiting......") - send_mail("Error occurred. Please check Hasaki Pipeline.") + logging.info("Error occurred. Please check config file or the internal SQLLITE DB. Exiting......") + #send_mail("Error occurred. Please check config file or the internal SQLLITE DB.") exit(1) diff --git a/hasaki_crawler_engine/process_tracker.db b/hasaki_crawler_engine/process_tracker.db new file mode 100644 index 0000000..08379bc Binary files /dev/null and b/hasaki_crawler_engine/process_tracker.db differ