214 lines
9.7 KiB
Python
214 lines
9.7 KiB
Python
|
from shopee_sub_categories import shopee_sub_categories
|
||
|
from shopee_category_products import shopee_category_products
|
||
|
from shopee_products import shopee_products
|
||
|
import logging
|
||
|
import psycopg2
|
||
|
import json
|
||
|
|
||
|
###### Looger ######
|
||
|
format = "%(asctime)s: %(message)s"
|
||
|
logging.basicConfig(format=format, level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S")
|
||
|
|
||
|
config = {}
|
||
|
|
||
|
def get_sub_category():
|
||
|
sub_cat = shopee_sub_categories(config)
|
||
|
sub_cat.get_sub_categories()
|
||
|
|
||
|
|
||
|
def get_category_products(cur, slave01, slave02):
|
||
|
products = shopee_category_products(config)
|
||
|
products.browse_category_page()
|
||
|
|
||
|
if not slave01:
|
||
|
sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_slave01',1)"
|
||
|
cur.execute(sql)
|
||
|
else:
|
||
|
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=1 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave01'"
|
||
|
cur.execute(sql)
|
||
|
|
||
|
|
||
|
if not slave02:
|
||
|
sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_slave02',1)"
|
||
|
cur.execute(sql)
|
||
|
else:
|
||
|
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=1 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave02'"
|
||
|
cur.execute(sql)
|
||
|
|
||
|
def get_products_info():
|
||
|
product_info = shopee_products(config)
|
||
|
product_info.get_shopee_products()
|
||
|
|
||
|
|
||
|
def main():
|
||
|
|
||
|
crawler_main = int(config.get('crawler_main'))
|
||
|
crawler_slave_no = int(config.get('crawler_slave_no')) if config.get('crawler_slave_no') else None
|
||
|
|
||
|
if crawler_main:
|
||
|
crawler_master()
|
||
|
else:
|
||
|
if crawler_slave_no == 1:
|
||
|
crawler_slave1()
|
||
|
elif crawler_slave_no ==2:
|
||
|
crawler_slave2()
|
||
|
|
||
|
def crawler_master():
|
||
|
conn = psycopg2.connect(database=config.get('database'), user=config.get('db_user'), password=config.get('db_pass'), host=config.get('db_host'), port=config.get('db_port'))
|
||
|
conn.autocommit = True
|
||
|
cur = conn.cursor()
|
||
|
|
||
|
sql = "select crawler_name,keyword,flag from "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
|
||
|
cur.execute(sql)
|
||
|
res = cur.fetchone()
|
||
|
|
||
|
sql = "select crawler_name,keyword,flag from "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave01'"
|
||
|
cur.execute(sql)
|
||
|
slave01 = cur.fetchone()
|
||
|
|
||
|
sql = "select crawler_name,keyword,flag from "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave02'"
|
||
|
cur.execute(sql)
|
||
|
slave02 = cur.fetchone()
|
||
|
|
||
|
if not res:
|
||
|
sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_master',0)"
|
||
|
cur.execute(sql)
|
||
|
if not slave01:
|
||
|
sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_slave01',0)"
|
||
|
cur.execute(sql)
|
||
|
else:
|
||
|
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=0 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave01'"
|
||
|
cur.execute(sql)
|
||
|
|
||
|
|
||
|
if not slave02:
|
||
|
sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_slave02',0)"
|
||
|
cur.execute(sql)
|
||
|
else:
|
||
|
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=0 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave02'"
|
||
|
cur.execute(sql)
|
||
|
|
||
|
get_sub_category()
|
||
|
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=1 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
|
||
|
cur.execute(sql)
|
||
|
|
||
|
get_category_products(cur, slave01, slave02)
|
||
|
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=2 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
|
||
|
cur.execute(sql)
|
||
|
|
||
|
get_products_info()
|
||
|
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=3 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
|
||
|
cur.execute(sql)
|
||
|
else:
|
||
|
if res[2]==0:
|
||
|
if not slave01:
|
||
|
sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_slave01',0)"
|
||
|
cur.execute(sql)
|
||
|
else:
|
||
|
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=0 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave01'"
|
||
|
cur.execute(sql)
|
||
|
|
||
|
|
||
|
if not slave02:
|
||
|
sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_slave02',0)"
|
||
|
cur.execute(sql)
|
||
|
else:
|
||
|
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=0 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave02'"
|
||
|
cur.execute(sql)
|
||
|
|
||
|
|
||
|
get_sub_category()
|
||
|
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=1 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
|
||
|
cur.execute(sql)
|
||
|
|
||
|
get_category_products(cur, slave01, slave02)
|
||
|
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=2 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
|
||
|
cur.execute(sql)
|
||
|
|
||
|
get_products_info()
|
||
|
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=3 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
|
||
|
cur.execute(sql)
|
||
|
elif res[2]==1:
|
||
|
get_category_products(cur, slave01, slave02)
|
||
|
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=2 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
|
||
|
cur.execute(sql)
|
||
|
|
||
|
get_products_info()
|
||
|
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=3 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
|
||
|
cur.execute(sql)
|
||
|
elif res[2]==2:
|
||
|
get_products_info()
|
||
|
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=3 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
|
||
|
cur.execute(sql)
|
||
|
elif res[2]==3:
|
||
|
|
||
|
if slave01[2]==2 and slave02[2]==2:
|
||
|
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=0 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'"
|
||
|
cur.execute(sql)
|
||
|
main()
|
||
|
else:
|
||
|
logging.info("Slaves are working.....")
|
||
|
|
||
|
conn.close()
|
||
|
|
||
|
conn.close()
|
||
|
|
||
|
def crawler_slave1():
|
||
|
conn = psycopg2.connect(database=config.get('database'), user=config.get('db_user'), password=config.get('db_pass'), host=config.get('db_host'), port=config.get('db_port'))
|
||
|
conn.autocommit = True
|
||
|
cur = conn.cursor()
|
||
|
|
||
|
sql = "select crawler_name,keyword,flag from "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave01'"
|
||
|
cur.execute(sql)
|
||
|
res = cur.fetchone()
|
||
|
|
||
|
if res:
|
||
|
if res[2]==1:
|
||
|
get_products_info()
|
||
|
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=2 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave01'"
|
||
|
cur.execute(sql)
|
||
|
else:
|
||
|
logging.info("Slave02 or Master are working.....")
|
||
|
|
||
|
|
||
|
|
||
|
conn.close()
|
||
|
|
||
|
def crawler_slave2():
|
||
|
conn = psycopg2.connect(database=config.get('database'), user=config.get('db_user'), password=config.get('db_pass'), host=config.get('db_host'), port=config.get('db_port'))
|
||
|
conn.autocommit = True
|
||
|
cur = conn.cursor()
|
||
|
|
||
|
sql = "select crawler_name,keyword,flag from "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave02'"
|
||
|
cur.execute(sql)
|
||
|
res = cur.fetchone()
|
||
|
|
||
|
if res:
|
||
|
if res[2]==1:
|
||
|
get_products_info()
|
||
|
sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=2 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave02'"
|
||
|
cur.execute(sql)
|
||
|
else:
|
||
|
logging.info("Slave01 or Master are working.....")
|
||
|
|
||
|
conn.close()
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
logging.info("Starting Shopee Crawler.......")
|
||
|
try:
|
||
|
logging.info("Loading config file.......")
|
||
|
with open("conf.json", "r") as jsonfile:
|
||
|
config = json.load(jsonfile)
|
||
|
logging.info("Config file loaded.......")
|
||
|
|
||
|
main()
|
||
|
|
||
|
except Exception as e:
|
||
|
#logging.info("Error: ".format(e))
|
||
|
logging.info("Cannot load cofig file. Please check. Exiting......")
|
||
|
exit(1)
|
||
|
|
||
|
|
||
|
|
||
|
|