added Hasaki crawler

This commit is contained in:
Shariar Imtiaz 2024-03-27 11:01:53 +04:00
parent 6d84e37c27
commit 4ec62a41ed
2 changed files with 78 additions and 14 deletions

View File

@ -2,6 +2,7 @@ import logging
import json import json
import time import time
import smtplib import smtplib
import sqlite3
from hasaki_categories import HasakiCategories from hasaki_categories import HasakiCategories
from hasaki_category_products import HasakiCategoryProducts from hasaki_category_products import HasakiCategoryProducts
@ -15,19 +16,38 @@ logging.basicConfig(format=format, level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%
config = {} config = {}
def main(): def main(cur):
cur.execute(f"""select flag from process_tracker where process = 'category'""")
cat_flags = cur.fetchone()
if cat_flags[0]==0:
hasaki_categories = HasakiCategories(config) hasaki_categories = HasakiCategories(config)
hasaki_categories.start_processing() hasaki_categories.start_processing()
cur.execute(f"""update process_tracker set flag = 1 where process = 'category'""")
time.sleep(60) #time.sleep(60)
cur.execute(f"""select flag from process_tracker where process = 'category_product'""")
cat_pro_flags = cur.fetchone()
if cat_pro_flags[0] == 0:
hasaki_category_products = HasakiCategoryProducts(config) hasaki_category_products = HasakiCategoryProducts(config)
hasaki_category_products.start_processing() hasaki_category_products.start_processing()
cur.execute(f"""update process_tracker set flag = 1 where process = 'category_product'""")
time.sleep(60) #time.sleep(60)
cur.execute(f"""select flag from process_tracker where process = 'product_info'""")
prod_flag = cur.fetchone()
if prod_flag[0] == 0:
hasaki_products = HasakiProductInfo(config) hasaki_products = HasakiProductInfo(config)
hasaki_products.start_processing() hasaki_products.start_processing()
cur.execute(f"""update process_tracker set flag = 1 where process = 'product_info'""")
else:
cur.execute(f"""update process_tracker set flag = 0 where process = 'category'""")
cur.execute(f"""update process_tracker set flag = 0 where process = 'category_product'""")
cur.execute(f"""update process_tracker set flag = 0 where process = 'product_info'""")
def send_mail(msg): def send_mail(msg):
@ -71,6 +91,33 @@ def send_mail(msg):
except Exception as e: except Exception as e:
logging.info("Error while sending mail: {}".format(e)) logging.info("Error while sending mail: {}".format(e))
def init_tracker_tab(cur):
cur.execute(f"""CREATE TABLE IF NOT EXISTS process_tracker (
process TEXT,
flag int
)""")
logging.info("++++++++++++++++++++++++++++++++++++++")
cur.execute(f"""select * from process_tracker""")
logging.info(cur.fetchall())
cur.execute(f"""select * from process_tracker where process = 'category'""")
if cur.fetchone() is None:
cur.execute(f"""insert into process_tracker (process, flag) values('category', 0)""")
cur.execute(f"""select * from process_tracker where process = 'category_product'""")
if cur.fetchone() is None:
cur.execute(f"""insert into process_tracker (process, flag) values('category_product', 0)""")
cur.execute(f"""select * from process_tracker where process = 'product_info'""")
if cur.fetchone() is None:
cur.execute(f"""insert into process_tracker (process, flag) values('product_info', 0)""")
logging.info("++++++++++++++++++++++++++++++++++++++")
cur.execute(f"""select * from process_tracker""")
logging.info(cur.fetchall())
if __name__ == "__main__": if __name__ == "__main__":
logging.info("Starting Hasaki Crawler.......") logging.info("Starting Hasaki Crawler.......")
@ -79,13 +126,30 @@ if __name__ == "__main__":
with open("conf.json", "r") as jsonfile: with open("conf.json", "r") as jsonfile:
config = json.load(jsonfile) config = json.load(jsonfile)
logging.info("Config file loaded.......") logging.info("Config file loaded.......")
print(config) logging.info(config)
main() conn = sqlite3.connect('process_tracker.db')
send_mail("Hasaki crawler run complete.") conn.isolation_level = None
cur = conn.cursor()
cur.execute(f"""update process_tracker set flag = 1 where process = 'category'""")
cur.execute(f"""update process_tracker set flag = 1 where process = 'category_product'""")
cur.execute(f"""update process_tracker set flag = 0 where process = 'product_info'""")
init_tracker_tab(cur)
main(cur)
cur.close()
conn.close()
#send_mail("Hasaki crawler run complete.")
except Exception as e: except Exception as e:
logging.info("Error: ".format(e)) logging.info("Error: ".format(e))
logging.info("Cannot load config file. Please check. Exiting......") logging.info("Error occurred. Please check config file or the internal SQLLITE DB. Exiting......")
send_mail("Error occurred. Please check Hasaki Pipeline.") #send_mail("Error occurred. Please check config file or the internal SQLLITE DB.")
exit(1) exit(1)

Binary file not shown.