From 4ec62a41ed67417f860c3ed29f25d96ed7c0e717 Mon Sep 17 00:00:00 2001 From: "shariar@raenabeauty.com" Date: Wed, 27 Mar 2024 11:01:53 +0400 Subject: [PATCH] added Hasaki crawler --- hasaki_crawler_engine/hasaki_crawler.py | 92 +++++++++++++++++++---- hasaki_crawler_engine/process_tracker.db | Bin 0 -> 8192 bytes 2 files changed, 78 insertions(+), 14 deletions(-) create mode 100644 hasaki_crawler_engine/process_tracker.db diff --git a/hasaki_crawler_engine/hasaki_crawler.py b/hasaki_crawler_engine/hasaki_crawler.py index 8fa98c3..a6822cb 100644 --- a/hasaki_crawler_engine/hasaki_crawler.py +++ b/hasaki_crawler_engine/hasaki_crawler.py @@ -2,6 +2,7 @@ import logging import json import time import smtplib +import sqlite3 from hasaki_categories import HasakiCategories from hasaki_category_products import HasakiCategoryProducts @@ -15,19 +16,38 @@ logging.basicConfig(format=format, level=logging.INFO, datefmt="%Y-%m-%d %H:%M:% config = {} -def main(): - hasaki_categories = HasakiCategories(config) - hasaki_categories.start_processing() +def main(cur): - time.sleep(60) + cur.execute(f"""select flag from process_tracker where process = 'category'""") + cat_flags = cur.fetchone() + if cat_flags[0]==0: + hasaki_categories = HasakiCategories(config) + hasaki_categories.start_processing() + cur.execute(f"""update process_tracker set flag = 1 where process = 'category'""") - hasaki_category_products = HasakiCategoryProducts(config) - hasaki_category_products.start_processing() + #time.sleep(60) + + cur.execute(f"""select flag from process_tracker where process = 'category_product'""") + cat_pro_flags = cur.fetchone() + if cat_pro_flags[0] == 0: + hasaki_category_products = HasakiCategoryProducts(config) + hasaki_category_products.start_processing() + cur.execute(f"""update process_tracker set flag = 1 where process = 'category_product'""") + + #time.sleep(60) + + cur.execute(f"""select flag from process_tracker where process = 'product_info'""") + prod_flag = cur.fetchone() + if prod_flag[0] == 0: + hasaki_products = HasakiProductInfo(config) + hasaki_products.start_processing() + cur.execute(f"""update process_tracker set flag = 1 where process = 'product_info'""") + else: + cur.execute(f"""update process_tracker set flag = 0 where process = 'category'""") + cur.execute(f"""update process_tracker set flag = 0 where process = 'category_product'""") + cur.execute(f"""update process_tracker set flag = 0 where process = 'product_info'""") - time.sleep(60) - hasaki_products = HasakiProductInfo(config) - hasaki_products.start_processing() def send_mail(msg): @@ -71,6 +91,33 @@ def send_mail(msg): except Exception as e: logging.info("Error while sending mail: {}".format(e)) +def init_tracker_tab(cur): + cur.execute(f"""CREATE TABLE IF NOT EXISTS process_tracker ( + process TEXT, + flag int + )""") + + logging.info("++++++++++++++++++++++++++++++++++++++") + cur.execute(f"""select * from process_tracker""") + logging.info(cur.fetchall()) + + cur.execute(f"""select * from process_tracker where process = 'category'""") + if cur.fetchone() is None: + cur.execute(f"""insert into process_tracker (process, flag) values('category', 0)""") + + cur.execute(f"""select * from process_tracker where process = 'category_product'""") + if cur.fetchone() is None: + cur.execute(f"""insert into process_tracker (process, flag) values('category_product', 0)""") + + cur.execute(f"""select * from process_tracker where process = 'product_info'""") + if cur.fetchone() is None: + cur.execute(f"""insert into process_tracker (process, flag) values('product_info', 0)""") + + logging.info("++++++++++++++++++++++++++++++++++++++") + cur.execute(f"""select * from process_tracker""") + logging.info(cur.fetchall()) + + if __name__ == "__main__": logging.info("Starting Hasaki Crawler.......") @@ -79,13 +126,30 @@ if __name__ == "__main__": with open("conf.json", "r") as jsonfile: config = json.load(jsonfile) logging.info("Config file loaded.......") - print(config) + logging.info(config) - main() - send_mail("Hasaki crawler run complete.") + conn = sqlite3.connect('process_tracker.db') + conn.isolation_level = None + + cur = conn.cursor() + + cur.execute(f"""update process_tracker set flag = 1 where process = 'category'""") + cur.execute(f"""update process_tracker set flag = 1 where process = 'category_product'""") + cur.execute(f"""update process_tracker set flag = 0 where process = 'product_info'""") + + init_tracker_tab(cur) + + + + main(cur) + + cur.close() + conn.close() + + #send_mail("Hasaki crawler run complete.") except Exception as e: logging.info("Error: ".format(e)) - logging.info("Cannot load config file. Please check. Exiting......") - send_mail("Error occurred. Please check Hasaki Pipeline.") + logging.info("Error occurred. Please check config file or the internal SQLLITE DB. Exiting......") + #send_mail("Error occurred. Please check config file or the internal SQLLITE DB.") exit(1) diff --git a/hasaki_crawler_engine/process_tracker.db b/hasaki_crawler_engine/process_tracker.db new file mode 100644 index 0000000000000000000000000000000000000000..08379bc24d94e1078ab7d7cfaf874d36b992d689 GIT binary patch literal 8192 zcmeI$u};G<5C-6LjMM-^uEfBEbPG~ZDNA>tpln$H58WaYoGPMiD%T8*@P52PAE0Yr zppMiqbRa88|1Vq4*_Y+p?B~a{^p=at&5Y+Ebx8=iV)Eiqje z>yywW)GdX9KmY;|fB*y_009U<00Izz00jP{z~_zVob~(SYv|3()Gl0=+vPIz&g5^_ z?Um8PQ!>&C>(PCh@ZKT!j<_YeKUgPYebw4sOwEMLx&O8Cvc8*3^-XG{R$)OP009U< z00Izz00bZa0SG_<0ucD?0!qq@IJ^?PzUMwG=S6iI$-!Z6yq#3;Bio%E3wi#txdpkE BNp}DM literal 0 HcmV?d00001