raena-crawler-engine/shopee_crawler_engine/shopee_crawler.py

214 lines
9.7 KiB
Python
Raw Permalink Normal View History

2024-01-24 13:05:07 +00:00
from shopee_sub_categories import shopee_sub_categories
from shopee_category_products import shopee_category_products
from shopee_products import shopee_products
import logging
import psycopg2
import json
###### Looger ######
format = "%(asctime)s: %(message)s"
logging.basicConfig(format=format, level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S")
config = {}
def get_sub_category():
sub_cat = shopee_sub_categories(config)
sub_cat.get_sub_categories()
def get_category_products(cur, slave01, slave02):
products = shopee_category_products(config)
products.browse_category_page()
if not slave01:
sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_slave01',1)"
cur.execute(sql)
else:
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=1 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave01'"
cur.execute(sql)
if not slave02:
sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_slave02',1)"
cur.execute(sql)
else:
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=1 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave02'"
cur.execute(sql)
def get_products_info():
product_info = shopee_products(config)
product_info.get_shopee_products()
def main():
crawler_main = int(config.get('crawler_main'))
crawler_slave_no = int(config.get('crawler_slave_no')) if config.get('crawler_slave_no') else None
if crawler_main:
crawler_master()
else:
if crawler_slave_no == 1:
crawler_slave1()
elif crawler_slave_no ==2:
crawler_slave2()
def crawler_master():
conn = psycopg2.connect(database=config.get('database'), user=config.get('db_user'), password=config.get('db_pass'), host=config.get('db_host'), port=config.get('db_port'))
conn.autocommit = True
cur = conn.cursor()
sql = "select crawler_name,keyword,flag from "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
cur.execute(sql)
res = cur.fetchone()
sql = "select crawler_name,keyword,flag from "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave01'"
cur.execute(sql)
slave01 = cur.fetchone()
sql = "select crawler_name,keyword,flag from "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave02'"
cur.execute(sql)
slave02 = cur.fetchone()
if not res:
sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_master',0)"
cur.execute(sql)
if not slave01:
sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_slave01',0)"
cur.execute(sql)
else:
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=0 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave01'"
cur.execute(sql)
if not slave02:
sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_slave02',0)"
cur.execute(sql)
else:
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=0 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave02'"
cur.execute(sql)
get_sub_category()
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=1 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
cur.execute(sql)
get_category_products(cur, slave01, slave02)
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=2 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
cur.execute(sql)
get_products_info()
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=3 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
cur.execute(sql)
else:
if res[2]==0:
if not slave01:
sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_slave01',0)"
cur.execute(sql)
else:
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=0 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave01'"
cur.execute(sql)
if not slave02:
sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_slave02',0)"
cur.execute(sql)
else:
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=0 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave02'"
cur.execute(sql)
get_sub_category()
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=1 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
cur.execute(sql)
get_category_products(cur, slave01, slave02)
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=2 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
cur.execute(sql)
get_products_info()
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=3 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
cur.execute(sql)
elif res[2]==1:
get_category_products(cur, slave01, slave02)
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=2 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
cur.execute(sql)
get_products_info()
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=3 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
cur.execute(sql)
elif res[2]==2:
get_products_info()
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=3 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
cur.execute(sql)
elif res[2]==3:
if slave01[2]==2 and slave02[2]==2:
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=0 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
cur.execute(sql)
main()
else:
logging.info("Slaves are working.....")
conn.close()
conn.close()
def crawler_slave1():
conn = psycopg2.connect(database=config.get('database'), user=config.get('db_user'), password=config.get('db_pass'), host=config.get('db_host'), port=config.get('db_port'))
conn.autocommit = True
cur = conn.cursor()
sql = "select crawler_name,keyword,flag from "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave01'"
cur.execute(sql)
res = cur.fetchone()
if res:
if res[2]==1:
get_products_info()
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=2 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave01'"
cur.execute(sql)
else:
logging.info("Slave02 or Master are working.....")
conn.close()
def crawler_slave2():
conn = psycopg2.connect(database=config.get('database'), user=config.get('db_user'), password=config.get('db_pass'), host=config.get('db_host'), port=config.get('db_port'))
conn.autocommit = True
cur = conn.cursor()
sql = "select crawler_name,keyword,flag from "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave02'"
cur.execute(sql)
res = cur.fetchone()
if res:
if res[2]==1:
get_products_info()
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=2 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave02'"
cur.execute(sql)
else:
logging.info("Slave01 or Master are working.....")
conn.close()
if __name__ == "__main__":
logging.info("Starting Shopee Crawler.......")
try:
logging.info("Loading config file.......")
with open("conf.json", "r") as jsonfile:
config = json.load(jsonfile)
logging.info("Config file loaded.......")
main()
except Exception as e:
#logging.info("Error: ".format(e))
logging.info("Cannot load cofig file. Please check. Exiting......")
exit(1)