2024-03-14 05:16:59 +00:00
import logging
import json
import time
2024-03-14 09:32:49 +00:00
import smtplib
2024-03-27 07:01:53 +00:00
import sqlite3
2024-04-01 06:46:40 +00:00
import psycopg2
2024-03-14 05:16:59 +00:00
from hasaki_categories import HasakiCategories
from hasaki_category_products import HasakiCategoryProducts
from hasaki_product_info import HasakiProductInfo
2024-03-14 09:32:49 +00:00
from email . message import EmailMessage
2024-03-14 05:16:59 +00:00
2024-04-01 07:31:33 +00:00
###### Looger ######
logname = ' /home/ubuntu/logs/hasaki_crawler.log '
logging . basicConfig ( filename = logname ,
filemode = ' a ' ,
format = ' %(asctime)s , %(msecs)d %(name)s %(levelname)s : %(message)s ' ,
datefmt = " % Y- % m- %d % H: % M: % S " ,
level = logging . INFO )
2024-03-14 05:16:59 +00:00
config = { }
2024-03-27 07:01:53 +00:00
def main ( cur ) :
2024-03-26 11:23:37 +00:00
2024-03-27 07:01:53 +00:00
cur . execute ( f """ select flag from process_tracker where process = ' category ' """ )
cat_flags = cur . fetchone ( )
if cat_flags [ 0 ] == 0 :
hasaki_categories = HasakiCategories ( config )
hasaki_categories . start_processing ( )
cur . execute ( f """ update process_tracker set flag = 1 where process = ' category ' """ )
2024-03-26 11:23:37 +00:00
2024-04-01 07:31:33 +00:00
logging . info ( " Category collection completed........ Moving to collecting products ;ist of the categories....... " )
2024-03-27 07:01:53 +00:00
#time.sleep(60)
cur . execute ( f """ select flag from process_tracker where process = ' category_product ' """ )
cat_pro_flags = cur . fetchone ( )
if cat_pro_flags [ 0 ] == 0 :
hasaki_category_products = HasakiCategoryProducts ( config )
hasaki_category_products . start_processing ( )
cur . execute ( f """ update process_tracker set flag = 1 where process = ' category_product ' """ )
2024-04-01 07:31:33 +00:00
logging . info ( " Category products collection completed........ Moving to collecting product info....... " )
2024-03-27 07:01:53 +00:00
#time.sleep(60)
cur . execute ( f """ select flag from process_tracker where process = ' product_info ' """ )
prod_flag = cur . fetchone ( )
if prod_flag [ 0 ] == 0 :
hasaki_products = HasakiProductInfo ( config )
hasaki_products . start_processing ( )
cur . execute ( f """ update process_tracker set flag = 1 where process = ' product_info ' """ )
else :
cur . execute ( f """ update process_tracker set flag = 0 where process = ' category ' """ )
cur . execute ( f """ update process_tracker set flag = 0 where process = ' category_product ' """ )
cur . execute ( f """ update process_tracker set flag = 0 where process = ' product_info ' """ )
2024-03-26 11:23:37 +00:00
2024-04-01 07:31:33 +00:00
logging . info ( " Product info collection done. Stopping........ " )
2024-03-14 05:16:59 +00:00
2024-03-14 09:32:49 +00:00
def send_mail ( msg ) :
try :
EMAIL_ADDRESS = " AKIAR2YL57QC6NITTJN5 "
EMAIL_PASSWORD = " BAs9W772KNxLL1xnMzYhdIkpflQ8H+KP0Zbl8dphQZWh "
From = ' data_reporting@raenabeauty.com '
2024-03-15 07:02:44 +00:00
To = ' shariar@raenabeauty.com, data_reporting@raenabeauty.com '
2024-04-01 06:46:40 +00:00
#To = 'shariar@raenabeauty.com'
2024-03-14 09:32:49 +00:00
html = f '''
< ! DOCTYPE html >
< html >
< body >
< div style = " background-color:#eee;padding:10px 20px; " >
< h2 style = " font-family:Georgia, ' Times New Roman ' , Times, serif;color#454349; " > Hasaki Crawler Status < / h2 >
2024-04-01 06:46:40 +00:00
< / div >
< div style = " padding:20px 0px " >
< div style = " height: 800px;width:800px " >
{ msg }
< div style = " text-align:Left; " >
< p > This is system generated mail . Please do not reply . < / p >
2024-03-14 09:32:49 +00:00
< / div >
< / div >
< / div >
< / body >
< / html >
'''
msg = EmailMessage ( )
msg [ ' Subject ' ] = ' Hasaki Crawler Status '
msg [ ' From ' ] = From
msg [ ' To ' ] = To
msg . set_content ( html , subtype = ' html ' )
with smtplib . SMTP ( ' email-smtp.ap-southeast-1.amazonaws.com ' , 587 ) as smtp :
smtp . ehlo ( )
smtp . starttls ( )
smtp . login ( EMAIL_ADDRESS , EMAIL_PASSWORD )
smtp . send_message ( msg )
except Exception as e :
logging . info ( " Error while sending mail: {} " . format ( e ) )
2024-03-27 07:01:53 +00:00
def init_tracker_tab ( cur ) :
cur . execute ( f """ CREATE TABLE IF NOT EXISTS process_tracker (
process TEXT ,
flag int
) """ )
2024-03-27 08:13:06 +00:00
# logging.info("++++++++++++++++++++++++++++++++++++++")
# cur.execute(f"""select * from process_tracker""")
# logging.info(cur.fetchall())
2024-03-27 07:01:53 +00:00
cur . execute ( f """ select * from process_tracker where process = ' category ' """ )
if cur . fetchone ( ) is None :
cur . execute ( f """ insert into process_tracker (process, flag) values( ' category ' , 0) """ )
cur . execute ( f """ select * from process_tracker where process = ' category_product ' """ )
if cur . fetchone ( ) is None :
cur . execute ( f """ insert into process_tracker (process, flag) values( ' category_product ' , 0) """ )
cur . execute ( f """ select * from process_tracker where process = ' product_info ' """ )
if cur . fetchone ( ) is None :
cur . execute ( f """ insert into process_tracker (process, flag) values( ' product_info ' , 0) """ )
2024-03-27 08:13:06 +00:00
logging . info ( " ++++++++++++++++ process tracker tab status ++++++++++++++++++++++ " )
2024-03-27 07:01:53 +00:00
cur . execute ( f """ select * from process_tracker """ )
logging . info ( cur . fetchall ( ) )
2024-04-01 06:46:40 +00:00
def get_status ( ) :
conn = psycopg2 . connect ( database = config . get ( ' database ' ) , user = config . get ( ' db_user ' ) ,
password = config . get ( ' db_pass ' ) , host = config . get ( ' db_host ' ) ,
port = config . get ( ' db_port ' ) )
conn . autocommit = True
cur = conn . cursor ( )
cur . execute (
f """ select count(1) from raena_spider_management.rce_category where rce_source_id = (select id from raena_spider_management.rce_source where source_name = ' Hasaki ' ) """ )
cat_count = cur . fetchone ( ) [ 0 ]
cur . execute ( f """ select count(1) from raena_spider_management.crawler_tracker_hasaki """ )
product_total = cur . fetchone ( ) [ 0 ]
cur . execute ( f """ select count(1) from raena_spider_management.crawler_tracker_hasaki where flag = 1 """ )
product_successful = cur . fetchone ( ) [ 0 ]
cur . execute ( f """ select count(1) from raena_spider_management.crawler_tracker_hasaki where flag = 0 """ )
product_failed = cur . fetchone ( ) [ 0 ]
msg = f """
< p > < b > Hasaki Crawler run is completed . Please check the status below , < / b > < / p >
< br >
< ul style = " list-style-type:disc " >
< li > Total Collected categories : < b > { cat_count } < / b > < / li >
< li > Total Collected products for categories : < b > { product_total } < / b > < / li >
< li > Total successfully collected products : < b { ' style= " color: green; " ' if product_successful == product_total else ' style= " color: red; " ' } > { product_successful } < / b > < / li >
< li > Total failed to collect products : < b { ' style= " color: red; " ' if product_failed > 0 else ' style= " color: green; " ' } > { product_failed } < / b > < / li >
< / ul >
"""
cur . close ( )
conn . close ( )
return msg
2024-03-27 07:01:53 +00:00
2024-03-14 05:16:59 +00:00
if __name__ == " __main__ " :
logging . info ( " Starting Hasaki Crawler....... " )
try :
logging . info ( " Loading config file....... " )
with open ( " conf.json " , " r " ) as jsonfile :
config = json . load ( jsonfile )
logging . info ( " Config file loaded....... " )
2024-03-27 07:01:53 +00:00
logging . info ( config )
conn = sqlite3 . connect ( ' process_tracker.db ' )
conn . isolation_level = None
cur = conn . cursor ( )
2024-03-27 07:02:09 +00:00
# cur.execute(f"""update process_tracker set flag = 1 where process = 'category'""")
# cur.execute(f"""update process_tracker set flag = 1 where process = 'category_product'""")
# cur.execute(f"""update process_tracker set flag = 0 where process = 'product_info'""")
2024-03-27 07:01:53 +00:00
init_tracker_tab ( cur )
main ( cur )
cur . close ( )
conn . close ( )
2024-04-01 06:46:40 +00:00
msg = get_status ( )
2024-03-14 05:16:59 +00:00
2024-04-01 06:46:40 +00:00
send_mail ( msg )
2024-03-14 05:16:59 +00:00
except Exception as e :
logging . info ( " Error: " . format ( e ) )
2024-03-27 07:01:53 +00:00
logging . info ( " Error occurred. Please check config file or the internal SQLLITE DB. Exiting...... " )
2024-03-27 07:02:55 +00:00
send_mail ( " Error occurred. Please check config file or the internal SQLLITE DB. " )
2024-03-14 05:16:59 +00:00
exit ( 1 )