added Hasaki crawler
This commit is contained in:
parent
6d84e37c27
commit
4ec62a41ed
|
@ -2,6 +2,7 @@ import logging
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
import smtplib
|
import smtplib
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
from hasaki_categories import HasakiCategories
|
from hasaki_categories import HasakiCategories
|
||||||
from hasaki_category_products import HasakiCategoryProducts
|
from hasaki_category_products import HasakiCategoryProducts
|
||||||
|
@ -15,19 +16,38 @@ logging.basicConfig(format=format, level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%
|
||||||
config = {}
|
config = {}
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main(cur):
|
||||||
hasaki_categories = HasakiCategories(config)
|
|
||||||
hasaki_categories.start_processing()
|
|
||||||
|
|
||||||
time.sleep(60)
|
cur.execute(f"""select flag from process_tracker where process = 'category'""")
|
||||||
|
cat_flags = cur.fetchone()
|
||||||
|
if cat_flags[0]==0:
|
||||||
|
hasaki_categories = HasakiCategories(config)
|
||||||
|
hasaki_categories.start_processing()
|
||||||
|
cur.execute(f"""update process_tracker set flag = 1 where process = 'category'""")
|
||||||
|
|
||||||
hasaki_category_products = HasakiCategoryProducts(config)
|
#time.sleep(60)
|
||||||
hasaki_category_products.start_processing()
|
|
||||||
|
cur.execute(f"""select flag from process_tracker where process = 'category_product'""")
|
||||||
|
cat_pro_flags = cur.fetchone()
|
||||||
|
if cat_pro_flags[0] == 0:
|
||||||
|
hasaki_category_products = HasakiCategoryProducts(config)
|
||||||
|
hasaki_category_products.start_processing()
|
||||||
|
cur.execute(f"""update process_tracker set flag = 1 where process = 'category_product'""")
|
||||||
|
|
||||||
|
#time.sleep(60)
|
||||||
|
|
||||||
|
cur.execute(f"""select flag from process_tracker where process = 'product_info'""")
|
||||||
|
prod_flag = cur.fetchone()
|
||||||
|
if prod_flag[0] == 0:
|
||||||
|
hasaki_products = HasakiProductInfo(config)
|
||||||
|
hasaki_products.start_processing()
|
||||||
|
cur.execute(f"""update process_tracker set flag = 1 where process = 'product_info'""")
|
||||||
|
else:
|
||||||
|
cur.execute(f"""update process_tracker set flag = 0 where process = 'category'""")
|
||||||
|
cur.execute(f"""update process_tracker set flag = 0 where process = 'category_product'""")
|
||||||
|
cur.execute(f"""update process_tracker set flag = 0 where process = 'product_info'""")
|
||||||
|
|
||||||
time.sleep(60)
|
|
||||||
|
|
||||||
hasaki_products = HasakiProductInfo(config)
|
|
||||||
hasaki_products.start_processing()
|
|
||||||
|
|
||||||
|
|
||||||
def send_mail(msg):
|
def send_mail(msg):
|
||||||
|
@ -71,6 +91,33 @@ def send_mail(msg):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.info("Error while sending mail: {}".format(e))
|
logging.info("Error while sending mail: {}".format(e))
|
||||||
|
|
||||||
|
def init_tracker_tab(cur):
|
||||||
|
cur.execute(f"""CREATE TABLE IF NOT EXISTS process_tracker (
|
||||||
|
process TEXT,
|
||||||
|
flag int
|
||||||
|
)""")
|
||||||
|
|
||||||
|
logging.info("++++++++++++++++++++++++++++++++++++++")
|
||||||
|
cur.execute(f"""select * from process_tracker""")
|
||||||
|
logging.info(cur.fetchall())
|
||||||
|
|
||||||
|
cur.execute(f"""select * from process_tracker where process = 'category'""")
|
||||||
|
if cur.fetchone() is None:
|
||||||
|
cur.execute(f"""insert into process_tracker (process, flag) values('category', 0)""")
|
||||||
|
|
||||||
|
cur.execute(f"""select * from process_tracker where process = 'category_product'""")
|
||||||
|
if cur.fetchone() is None:
|
||||||
|
cur.execute(f"""insert into process_tracker (process, flag) values('category_product', 0)""")
|
||||||
|
|
||||||
|
cur.execute(f"""select * from process_tracker where process = 'product_info'""")
|
||||||
|
if cur.fetchone() is None:
|
||||||
|
cur.execute(f"""insert into process_tracker (process, flag) values('product_info', 0)""")
|
||||||
|
|
||||||
|
logging.info("++++++++++++++++++++++++++++++++++++++")
|
||||||
|
cur.execute(f"""select * from process_tracker""")
|
||||||
|
logging.info(cur.fetchall())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
logging.info("Starting Hasaki Crawler.......")
|
logging.info("Starting Hasaki Crawler.......")
|
||||||
|
@ -79,13 +126,30 @@ if __name__ == "__main__":
|
||||||
with open("conf.json", "r") as jsonfile:
|
with open("conf.json", "r") as jsonfile:
|
||||||
config = json.load(jsonfile)
|
config = json.load(jsonfile)
|
||||||
logging.info("Config file loaded.......")
|
logging.info("Config file loaded.......")
|
||||||
print(config)
|
logging.info(config)
|
||||||
|
|
||||||
main()
|
conn = sqlite3.connect('process_tracker.db')
|
||||||
send_mail("Hasaki crawler run complete.")
|
conn.isolation_level = None
|
||||||
|
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
cur.execute(f"""update process_tracker set flag = 1 where process = 'category'""")
|
||||||
|
cur.execute(f"""update process_tracker set flag = 1 where process = 'category_product'""")
|
||||||
|
cur.execute(f"""update process_tracker set flag = 0 where process = 'product_info'""")
|
||||||
|
|
||||||
|
init_tracker_tab(cur)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
main(cur)
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
#send_mail("Hasaki crawler run complete.")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.info("Error: ".format(e))
|
logging.info("Error: ".format(e))
|
||||||
logging.info("Cannot load config file. Please check. Exiting......")
|
logging.info("Error occurred. Please check config file or the internal SQLLITE DB. Exiting......")
|
||||||
send_mail("Error occurred. Please check Hasaki Pipeline.")
|
#send_mail("Error occurred. Please check config file or the internal SQLLITE DB.")
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue