import logging import psycopg2 ###### Looger ###### format = "%(asctime)s: %(message)s" logging.basicConfig(format=format, level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S") class shopee_db_writer: def __init__(self, config): self.config = config self.conn = psycopg2.connect(database=self.config.get('database'), user=self.config.get('db_user'), password=self.config.get('db_pass'), host=self.config.get('db_host'), port=self.config.get('db_port')) self.conn.autocommit = True self.cur = self.conn.cursor() def __del__(self): logging.info("Closing connection.....") self.conn.close() def rce_category(self, data): sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('category_tab')+" where rce_source_category_id = "+str(data['rce_source_category_id']) self.cur.execute(sql) res = self.cur.fetchone() cat_name = data['category_name'].replace("'","''") cat_url = data['category_page_url'].replace("'","''") if not res: sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('category_tab')+" (parent_category_id,rce_source_id," \ "rce_source_category_id,rce_source_status,category_page_url,category_page_url_hash,category_name) values (" \ +str(data['parent_category_id'])+","+str(data['rce_source_id'])+", "+str(data['rce_source_category_id'])+", "+str(data['rce_source_status'])+", " \ "'"+str(cat_url)+"', '"+str(data['category_page_url_hash'])+"', '"+str(cat_name)+"')" #logging.info(sql) self.cur.execute(sql) sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('category_tab')+"(id,parent_category_id,rce_source_id," \ "rce_source_category_id,rce_source_status,category_page_url,category_page_url_hash,category_name,createdat,updatedat) " \ "select id,parent_category_id,rce_source_id,rce_source_category_id,rce_source_status,category_page_url,category_page_url_hash," \ "category_name,createdat,updatedat from "+self.config.get('crawler_schema')+"."+self.config.get('category_tab')+" " \ "where rce_source_category_id = "+ str(data['rce_source_category_id']) #logging.info(sql) self.cur.execute(sql) else: if str(data['parent_category_id'])==str(res[1]) and str(data['rce_source_category_id'])==str(res[3]) and str(data['category_name']) == str(res[7]) and \ str(data['category_page_url'])==str(res[5]): sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('category_tab')+" set updatedat=now() " \ "where rce_source_category_id = "+ str(res[3]) logging.info(sql) self.cur.execute(sql) sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('category_tab')+" a set updatedat=b.updatedat " \ "from "+self.config.get('crawler_schema')+"."+self.config.get('category_tab')+" b where a.id=b.id and b.id = "+str(res[0]) logging.info(sql) self.cur.execute(sql) else: sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('category_tab')+" set parent_category_id = " \ ""+str(data['parent_category_id'])+", rce_source_category_id = "+str(data['rce_source_category_id'])+", " \ "category_name='"+str(cat_name)+"', category_page_url='"+str(cat_url)+"', " \ "category_page_url_hash='"+str(data['category_page_url_hash'])+"', updatedat=now() where " \ "rce_source_category_id = "+ str(res[3]) #logging.info(sql) self.cur.execute(sql) sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('category_tab')+"(id,parent_category_id,rce_source_id," \ "rce_source_category_id,rce_source_status,category_page_url,category_page_url_hash,category_name,createdat,updatedat) " \ "select id,parent_category_id,rce_source_id,rce_source_category_id,rce_source_status,category_page_url,category_page_url_hash," \ "category_name,createdat,updatedat from "+self.config.get('crawler_schema')+"."+self.config.get('category_tab')+" " \ "where rce_source_category_id = "+ str(res[3]) #logging.info(sql) self.cur.execute(sql) def rce_product(self, data): sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('product_tab')+" where rce_source_product_id = "+str(data['rce_source_product_id']) self.cur.execute(sql) res = self.cur.fetchone() data['product_page_url'] = data['product_page_url'].replace("'","''") data['rce_source_product_name'] = data['rce_source_product_name'].replace("'","''") if not res: sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('product_tab')+" (rce_source_product_id," \ "rce_source_product_status,product_page_url,product_page_url_hash,rce_category_id,rce_brand_id," \ "rce_store_id,rce_source_product_name,product_images,product_description,product_sold_total,product_sold," \ "product_price_min,product_price_min_before_discount,product_price_max,product_price_max_before_discount,ratings," \ "ships_from) values("+str(data['rce_source_product_id'])+","+str(data['rce_source_product_status'])+",'"+str(data['product_page_url'])+"'," \ "'"+str(data['product_page_url_hash'])+"',"+str(data['rce_category_id'])+","+str(data['rce_brand_id'])+","+str(data['rce_store_id'])+"," \ "'"+str(data['rce_source_product_name'])+"','"+str(data['product_images'])+"','"+str(data['product_description'])+"',"+str(data['product_sold_total'])+"," \ ""+str(data['product_sold'])+",'"+str(data['product_price_min'])+"','"+str(data['product_price_min_before_discount'])+"','"+str(data['product_price_max'])+"'," \ "'"+str(data['product_price_max_before_discount'])+"','"+str(data['ratings'])+"','"+str(data['ships_from'])+"')" #logging.info(sql) self.cur.execute(sql) sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('product_tab')+" (id,rce_source_product_id," \ "rce_source_product_status,product_page_url,product_page_url_hash,rce_category_id,rce_brand_id," \ "rce_store_id,rce_source_product_name,product_images,product_description,product_sold_total,product_sold," \ "product_price_min,product_price_min_before_discount,product_price_max,product_price_max_before_discount,ratings," \ "ships_from,createdat,updatedat) select id,rce_source_product_id," \ "rce_source_product_status,product_page_url,product_page_url_hash,rce_category_id,rce_brand_id," \ "rce_store_id,rce_source_product_name,product_images,product_description,product_sold_total,product_sold," \ "product_price_min,product_price_min_before_discount,product_price_max,product_price_max_before_discount,ratings," \ "ships_from,createdat,updatedat from "+self.config.get('crawler_schema')+"."+self.config.get('product_tab')+" where " \ "rce_source_product_id="+str(data['rce_source_product_id'])+"" #logging.info(sql) self.cur.execute(sql) else: if str(data['rce_source_product_id'])==str(res[1]) and str(data['rce_source_product_status'])==str(res[2]) and \ str(data['product_page_url'])==str(res[3]) and str(data['product_page_url_hash'])==str(res[4]) and str(data['rce_category_id'])==str(res[5]) and \ str(data['rce_brand_id'])==str(res[6]) and str(data['rce_store_id'])==str(res[7]) and str(data['rce_source_product_name'])==str(res[8]) and \ str(data['product_images'])==str(res[9]) and str(data['product_sold_total'])==str(res[11]) and \ str(data['product_sold'])==str(res[12]) and str(data['product_price_min'])==str(res[13]) and str(data['product_price_min_before_discount'])==str(res[14]) and \ str(data['product_price_max'])==str(res[15]) and str(data['product_price_max_before_discount'])==str(res[16]) and str(data['ratings'])==str(res[17]) and \ str(data['ships_from'])==str(res[18]): sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('product_tab')+" set updatedat=now() " \ "where rce_source_product_id = "+ str(res[1]) #logging.info(sql) self.cur.execute(sql) sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('product_tab')+" a set updatedat=b.updatedat " \ "from "+self.config.get('crawler_schema')+"."+self.config.get('product_tab')+" b where a.id=b.id and b.id = "+str(res[0]) #logging.info(sql) self.cur.execute(sql) else: sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('product_tab')+" set rce_source_product_id="+str(data['rce_source_product_id'])+"," \ "rce_source_product_status="+str(data['rce_source_product_status'])+",product_page_url='"+str(data['product_page_url'])+"',product_page_url_hash= " \ "'"+str(data['product_page_url_hash'])+"',rce_category_id="+str(data['rce_category_id'])+",rce_brand_id="+str(data['rce_brand_id'])+"," \ "rce_store_id="+str(data['rce_store_id'])+",rce_source_product_name='"+str(data['rce_source_product_name'])+"',product_images='"+str(data['product_images'])+"'" \ ",product_description='"+str(data['product_description'])+"',product_sold_total="+str(data['product_sold_total'])+",product_sold="+str(data['product_sold'])+"," \ "product_price_min='"+str(data['product_price_min'])+"',product_price_min_before_discount='"+str(data['product_price_min_before_discount'])+"'," \ "product_price_max='"+str(data['product_price_max'])+"',product_price_max_before_discount='"+str(data['product_price_max_before_discount'])+"',ratings='"+str(data['ratings'])+"'," \ "ships_from='"+str(data['ships_from'])+"', updatedat=now() where rce_source_product_id = "+ str(res[1]) #logging.info(sql) self.cur.execute(sql) sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('product_tab')+" (id,rce_source_product_id," \ "rce_source_product_status,product_page_url,product_page_url_hash,rce_category_id,rce_brand_id," \ "rce_store_id,rce_source_product_name,product_images,product_description,product_sold_total,product_sold," \ "product_price_min,product_price_min_before_discount,product_price_max,product_price_max_before_discount,ratings," \ "ships_from,createdat,updatedat) select id,rce_source_product_id," \ "rce_source_product_status,product_page_url,product_page_url_hash,rce_category_id,rce_brand_id," \ "rce_store_id,rce_source_product_name,product_images,product_description,product_sold_total,product_sold," \ "product_price_min,product_price_min_before_discount,product_price_max,product_price_max_before_discount,ratings," \ "ships_from,createdat,updatedat from "+self.config.get('crawler_schema')+"."+self.config.get('product_tab')+" where " \ "rce_source_product_id="+str(res[1])+"" #logging.info(sql) self.cur.execute(sql) def rce_product_variant(self, data): sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('variant_tab')+" where rce_source_variant_id = "+str(data['rce_source_variant_id']) self.cur.execute(sql) res = self.cur.fetchone() data['product_variant_name'] = data['product_variant_name'].replace("'","''") if not res: sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('variant_tab')+" (rce_source_variant_id,rce_product_id," \ "product_variant_name,product_variant_price,product_variant_price_before_discount,product_variant_stock) values("+str(data['rce_source_variant_id'])+"," \ ""+str(data['rce_product_id'])+",'"+str(data['product_variant_name'])+"','"+str(data['product_variant_price'])+"'," \ "'"+str(data['product_variant_price_before_discount'])+"',"+str(data['product_variant_stock'])+")" #logging.info(sql) self.cur.execute(sql) sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('variant_tab')+" (id,rce_source_variant_id,rce_product_id," \ "product_variant_name,product_variant_price,product_variant_price_before_discount,product_variant_stock,createdat,updatedat) select * from " \ ""+self.config.get('crawler_schema')+"."+self.config.get('variant_tab')+" where rce_source_variant_id="+str(data['rce_source_variant_id'])+"" #logging.info(sql) self.cur.execute(sql) else: if str(data['rce_source_variant_id'])==str(res[1]) and str(data['rce_product_id'])==str(res[2]) and str(data['product_variant_name'])==str(res[3]) and \ str(data['product_variant_price'])==str(res[4]) and str(data['product_variant_price_before_discount'])==str(res[5]) and str(data['product_variant_stock'])==str(res[6]): sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('variant_tab')+" set updatedat=now() " \ "where rce_source_variant_id = "+ str(res[1]) #logging.info(sql) self.cur.execute(sql) sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('variant_tab')+" a set updatedat=b.updatedat " \ "from "+self.config.get('crawler_schema')+"."+self.config.get('variant_tab')+" b where a.id=b.id and b.id = "+str(res[0]) #logging.info(sql) self.cur.execute(sql) else: sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('variant_tab')+" set rce_source_variant_id="+str(data['rce_source_variant_id'])+", " \ "rce_product_id="+str(data['rce_product_id'])+", product_variant_name='"+str(data['product_variant_name'])+"', product_variant_price=" \ "'"+str(data['product_variant_price'])+"',product_variant_price_before_discount='"+str(data['product_variant_price_before_discount'])+"'," \ "product_variant_stock="+str(data['product_variant_stock'])+", updatedat=now() where rce_source_variant_id = "+ str(res[1]) #logging.info(sql) self.cur.execute(sql) sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('variant_tab')+" (id,rce_source_variant_id,rce_product_id," \ "product_variant_name,product_variant_price,product_variant_price_before_discount,product_variant_stock,createdat,updatedat) select * from " \ ""+self.config.get('crawler_schema')+"."+self.config.get('variant_tab')+" where rce_source_variant_id="+str(res[1])+"" #logging.info(sql) self.cur.execute(sql) def rce_brand(self, data): sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('brand_tab')+" where rce_source_brand_id = "+str(data['rce_source_brand_id']) self.cur.execute(sql) res = self.cur.fetchone() data['brand_page_url'] = data['brand_page_url'].replace("'","''") data['brand_name'] = data['brand_name'].replace("'","''") if not res: sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('brand_tab')+" (rce_source_id,rce_source_brand_id,rce_source_brand_status," \ "brand_page_url,brand_page_url_hash,brand_name) values("+str(data['rce_source_id'])+","+str(data['rce_source_brand_id'])+"," \ ""+str(data['rce_source_brand_status'])+",'"+str(data['brand_page_url'])+"','"+str(data['brand_page_url_hash'])+"'," \ "'"+str(data['brand_name'])+"')" #logging.info(sql) self.cur.execute(sql) sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('brand_tab')+" (id,rce_source_id,rce_source_brand_id,rce_source_brand_status," \ "brand_page_url,brand_page_url_hash,brand_name,createdat,updatedat) select id,rce_source_id,rce_source_brand_id,rce_source_brand_status," \ "brand_page_url,brand_page_url_hash,brand_name,createdat,updatedat from " \ ""+self.config.get('crawler_schema')+"."+self.config.get('brand_tab')+" where rce_source_brand_id="+str(data['rce_source_brand_id'])+"" #logging.info(sql) self.cur.execute(sql) else: if str(data['rce_source_brand_id'])==str(res[2]) and str(data['rce_source_brand_status'])==str(res[3]) and str(data['brand_page_url'])==str(res[4]) and \ str(data['brand_page_url_hash'])==str(res[5]) and str(data['brand_name'])==str(res[6]): sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('brand_tab')+" set updatedat=now() " \ "where rce_source_brand_id = "+ str(res[2]) #logging.info(sql) self.cur.execute(sql) sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('brand_tab')+" a set updatedat=b.updatedat " \ "from "+self.config.get('crawler_schema')+"."+self.config.get('brand_tab')+" b where a.id=b.id and b.id = "+str(res[0]) #logging.info(sql) self.cur.execute(sql) else: sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('brand_tab')+" set rce_source_id="+str(data['rce_source_id'])+", rce_source_brand_id="+str(data['rce_source_brand_id'])+", " \ "rce_source_brand_status="+str(data['rce_source_brand_status'])+", brand_page_url='"+str(data['brand_page_url'])+"', brand_page_url_hash=" \ "'"+str(data['brand_page_url_hash'])+"',brand_name='"+str(data['brand_name'])+"', updatedat=now() where rce_source_brand_id = "+ str(res[2]) #logging.info(sql) self.cur.execute(sql) sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('brand_tab')+" (id,rce_source_id,rce_source_brand_id,rce_source_brand_status," \ "brand_page_url,brand_page_url_hash,brand_name,createdat,updatedat) select * from " \ ""+self.config.get('crawler_schema')+"."+self.config.get('brand_tab')+" where rce_source_brand_id="+str(res[2])+"" #logging.info(sql) self.cur.execute(sql) def rce_reseller(self, data): sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('reseller_tab')+" where rce_source_reseller_id = "+str(data['rce_source_reseller_id']) self.cur.execute(sql) res = self.cur.fetchone() data['reseller_name'] = data['reseller_name'].replace("'","''") if not res: sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('reseller_tab')+" (rce_source_id,rce_source_reseller_id,rce_source_reseller_status," \ "reseller_name,reseller_average_rating,reseller_follower_count,reseller_response_rate) values("+str(data['rce_source_id'])+","+str(data['rce_source_reseller_id'])+"," \ ""+str(data['rce_source_reseller_status'])+",'"+str(data['reseller_name'])+"','"+str(data['reseller_average_rating'])+"'," \ ""+str(data['reseller_follower_count'])+",'"+str(data['reseller_response_rate'])+"')" #logging.info(sql) self.cur.execute(sql) sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('reseller_tab')+" (id,rce_source_id,rce_source_reseller_id,rce_source_reseller_status," \ "reseller_name,reseller_average_rating,reseller_follower_count,reseller_response_rate,createdat,updatedat) select id,rce_source_id,rce_source_reseller_id,rce_source_reseller_status," \ "reseller_name,reseller_average_rating,reseller_follower_count,reseller_response_rate,createdat,updatedat from " \ ""+self.config.get('crawler_schema')+"."+self.config.get('reseller_tab')+" where rce_source_reseller_id="+str(data['rce_source_reseller_id'])+"" #logging.info(sql) self.cur.execute(sql) else: if str(data['rce_source_reseller_id'])==str(res[2]) and str(data['rce_source_reseller_status'])==str(res[3]) and str(data['reseller_name'])==str(res[4]) and \ str(data['reseller_average_rating'])==str(res[5]) and str(data['reseller_follower_count'])==str(res[7]) and str(data['reseller_response_rate'])==str(res[8]): sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('reseller_tab')+" set updatedat=now() " \ "where rce_source_reseller_id = "+ str(res[2]) #logging.info(sql) self.cur.execute(sql) sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('reseller_tab')+" a set updatedat=b.updatedat " \ "from "+self.config.get('crawler_schema')+"."+self.config.get('reseller_tab')+" b where a.id=b.id and b.id = "+str(res[0]) #logging.info(sql) self.cur.execute(sql) else: sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('reseller_tab')+" set rce_source_id="+str(data['rce_source_id'])+",rce_source_reseller_id="+str(data['rce_source_reseller_id'])+", " \ "rce_source_reseller_status="+str(data['rce_source_reseller_status'])+", reseller_name='"+str(data['reseller_name'])+"', reseller_average_rating=" \ "'"+str(data['reseller_average_rating'])+"',reseller_follower_count='"+str(data['reseller_follower_count'])+"', reseller_response_rate=" \ "'"+str(data['reseller_response_rate'])+"', updatedat=now() where rce_source_reseller_id = "+ str(res[2]) #logging.info(sql) self.cur.execute(sql) sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('reseller_tab')+" (id,rce_source_id,rce_source_reseller_id,rce_source_reseller_status," \ "reseller_name,reseller_average_rating,reseller_follower_count,reseller_response_rate,createdat,updatedat) select id,rce_source_id,rce_source_reseller_id,rce_source_reseller_status," \ "reseller_name,reseller_average_rating,reseller_follower_count,reseller_response_rate,createdat,updatedat from " \ ""+self.config.get('crawler_schema')+"."+self.config.get('reseller_tab')+" where rce_source_reseller_id="+str(res[2]) #logging.info(sql) self.cur.execute(sql) def rce_reseller_store(self, data): sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('reseller_store_tab')+" where rce_source_store_id = "+str(data['rce_source_store_id']) self.cur.execute(sql) res = self.cur.fetchone() data['store_page_url'] = data['store_page_url'].replace("'","''") if not res: sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('reseller_store_tab')+" (rce_source_store_id,rce_source_store_status," \ "store_page_url,store_page_url_hash,store_location,rce_reseller_id) values("+str(data['rce_source_store_id'])+"," \ ""+str(data['rce_source_store_status'])+",'"+str(data['store_page_url'])+"','"+str(data['store_page_url_hash'])+"'," \ "'"+str(data['store_location'])+"', "+str(data['rce_reseller_id'])+")" #logging.info(sql) self.cur.execute(sql) sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('reseller_store_tab')+" (id,rce_source_store_id,rce_source_store_status," \ "store_page_url,store_page_url_hash,store_location,rce_reseller_id,createdat,updatedat) select id,rce_source_store_id,rce_source_store_status," \ "store_page_url,store_page_url_hash,store_location,rce_reseller_id,createdat,updatedat from " \ ""+self.config.get('crawler_schema')+"."+self.config.get('reseller_store_tab')+" where rce_source_store_id="+str(data['rce_source_store_id'])+"" #logging.info(sql) self.cur.execute(sql) else: if str(data['rce_source_store_id'])==str(res[1]) and str(data['rce_source_store_status'])==str(res[2]) and str(data['store_page_url'])==str(res[3]) and \ str(data['store_page_url_hash'])==str(res[4]) and str(data['store_location'])==str(res[5]) and str(data['rce_reseller_id'])==str(res[6]): sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('reseller_store_tab')+" set updatedat=now() " \ "where rce_source_store_id = "+ str(res[1]) #logging.info(sql) self.cur.execute(sql) sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('reseller_store_tab')+" a set updatedat=b.updatedat " \ "from "+self.config.get('crawler_schema')+"."+self.config.get('reseller_store_tab')+" b where a.id=b.id and b.id = "+str(res[0]) #logging.info(sql) self.cur.execute(sql) else: sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('reseller_store_tab')+" set rce_source_store_id="+str(data['rce_source_store_id'])+", " \ "rce_source_store_status="+str(data['rce_source_store_status'])+", store_page_url='"+str(data['store_page_url'])+"', store_page_url_hash=" \ "'"+str(data['store_page_url_hash'])+"',store_location='"+str(data['store_location'])+"', rce_reseller_id="+str(data['rce_reseller_id'])+", " \ "updatedat=now() where rce_source_store_id = "+ str(res[1]) #logging.info(sql) self.cur.execute(sql) sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('reseller_store_tab')+" (id,rce_source_store_id,rce_source_store_status," \ "store_page_url,store_page_url_hash,store_location,rce_reseller_id,createdat,updatedat) select id,rce_source_store_id,rce_source_store_status," \ "store_page_url,store_page_url_hash,store_location,rce_reseller_id,createdat,updatedat from " \ ""+self.config.get('crawler_schema')+"."+self.config.get('reseller_store_tab')+" where rce_source_store_id="+str(res[1])+"" #logging.info(sql) self.cur.execute(sql) def rce_ratings_reviews(self, data): sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('review_tab')+" where rce_product_id = "+str(data['rce_product_id'])+" and username ='"+str(data['username'])+"'" self.cur.execute(sql) res = self.cur.fetchone() data['username'] = data['username'].replace("'","''") data['img_url'] = data['img_url'].replace("'","''") if not res: sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('review_tab')+" (id,rce_product_id,username," \ "review,img_url,review_like_count,user_tier,shop_id,video_url,rating) values("+str(data['id'])+","+str(data['rce_product_id'])+"," \ "'"+str(data['username'])+"','"+str(data['review'])+"','"+str(data['img_url'])+"',"+str(data['review_like_count'])+",'"+str(data['user_tier'])+"'," \ ""+str(data['shop_id'])+", '"+str(data['video_url'])+"', '"+str(data['rating'])+"')" #logging.info(sql) self.cur.execute(sql) sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_tab')+" (id,rce_product_id,username," \ "review,img_url,review_like_count,user_tier,shop_id,video_url,rating,createdat,updatedat) select id,rce_product_id,username," \ "review,img_url,review_like_count,user_tier,shop_id,video_url,rating,createdat,updatedat from " \ ""+self.config.get('crawler_schema')+"."+self.config.get('review_tab')+" where rce_product_id="+str(data['rce_product_id'])+" and username ='"+str(data['username'])+"'" #logging.info(sql) self.cur.execute(sql) else: if str(data['rce_product_id'])==str(res[1]) and str(data['username'])==str(res[2]) and str(data['review'])==str(res[3]) and \ str(data['img_url'])==str(res[4]) and str(data['review_like_count'])==str(res[5]) and str(data['user_tier'])==str(res[6]) and \ str(data['shop_id'])==str(res[7]) and str(data['video_url'])==str(res[8]) and str(data['rating'])==str(res[9]): sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_tab')+" set updatedat=now() " \ "where rce_product_id = "+ str(res[1])+" and username ='"+res[2]+"'" #logging.info(sql) self.cur.execute(sql) sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_tab')+" a set updatedat=b.updatedat " \ "from "+self.config.get('crawler_schema')+"."+self.config.get('review_tab')+" b where a.id=b.id and b.id = "+str(res[0]) #logging.info(sql) self.cur.execute(sql) else: sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_tab')+" set rce_product_id="+str(data['rce_product_id'])+", " \ "username='"+str(data['username'])+"', review='"+str(data['review'])+"', img_url=" \ "'"+str(data['img_url'])+"',review_like_count="+str(data['review_like_count'])+", user_tier='"+str(data['user_tier'])+"', " \ "shop_id="+str(data['shop_id'])+", video_url='"+str(data['video_url'])+"', rating='"+str(data['rating'])+"', updatedat=now() " \ "where rce_product_id = "+ str(res[1])+" and username ='"+str(data['username'])+"'" #logging.info(sql) self.cur.execute(sql) sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_tab')+" (id,rce_product_id,username," \ "review,img_url,review_like_count,user_tier,shop_id,video_url,rating,createdat,updatedat) select id,rce_product_id,username," \ "review,img_url,review_like_count,user_tier,shop_id,video_url,rating,createdat,updatedat from " \ ""+self.config.get('crawler_schema')+"."+self.config.get('review_tab')+" where rce_product_id="+str(res[1])+" and username ='"+str(data['username'])+"'" #logging.info(sql) self.cur.execute(sql) def rce_ratings_reviews_productmodels(self,data): sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" where rce_rating_id = "+str(data['rce_rating_id']) self.cur.execute(sql) res = self.cur.fetchone() if not res: sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" (rce_rating_id,model_id) " \ "values("+str(data['rce_rating_id'])+",'"+str(data['model_id'])+"')" #logging.info(sql) self.cur.execute(sql) sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_productmodels_tab')+" (id,rce_rating_id,model_id," \ "createdat,updatedat) select id,rce_rating_id,model_id,createdat,updatedat from " \ ""+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" where rce_rating_id="+str(data['rce_rating_id'])+"" #logging.info(sql) self.cur.execute(sql) else: if str(data['rce_rating_id'])==str(res[1]) and str(data['model_id'])==str(res[2]): sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" set updatedat=now() " \ "where rce_rating_id = "+ str(res[1]) #logging.info(sql) self.cur.execute(sql) sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_productmodels_tab')+" a set updatedat=b.updatedat " \ "from "+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" b where a.id=b.id and b.id = "+str(res[0]) #logging.info(sql) self.cur.execute(sql) else: sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" set model_id="+str(data['model_id'])+", " \ "updatedat=now() where rce_source_store_id = "+ str(res[1]) #logging.info(sql) self.cur.execute(sql) sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_productmodels_tab')+" (id,rce_rating_id,model_id," \ "createdat,updatedat) select id,rce_rating_id,model_id,createdat,updatedat from " \ ""+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" where rce_rating_id="+str(res[1])+"" #logging.info(sql) self.cur.execute(sql) def rce_tags(self,data): sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" where description = '"+str(data['description'])+"'" self.cur.execute(sql) res = self.cur.fetchone() if not res: sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" (id,description) " \ "values("+str(data['id'])+",'"+str(data['description'])+"')" #logging.info(sql) self.cur.execute(sql) sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_tags_tab')+" (id,description," \ "createdat,updatedat) select id,description,createdat,updatedat from " \ ""+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" where description='"+str(data['description'])+"'" #logging.info(sql) self.cur.execute(sql) else: if str(data['description'])==str(res[1]): sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" set updatedat=now() " \ "where description = '"+ str(res[1])+"'" #logging.info(sql) self.cur.execute(sql) sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_tags_tab')+" a set updatedat=b.updatedat " \ "from "+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" b where a.id=b.id and b.id = "+str(res[0]) #logging.info(sql) self.cur.execute(sql) else: sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" set description='"+str(data['description'])+"', " \ "updatedat=now() where description = "+ str(res[1]) #logging.info(sql) self.cur.execute(sql) sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_tags_tab')+" (id,description," \ "createdat,updatedat) select id,description,createdat,updatedat from " \ ""+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" where description='"+str(res[1])+"'" #logging.info(sql) self.cur.execute(sql) def rce_ratings_reviews_producttags(self,data): sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" where rce_rating_id = '"+str(data['rce_rating_id'])+"'" self.cur.execute(sql) res = self.cur.fetchone() if not res: sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" (rce_rating_id,tag_ids) " \ "values("+str(data['rce_rating_id'])+",'"+str(data['tag_ids'])+"')" #logging.info(sql) self.cur.execute(sql) sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_producttags_tab')+" (id,rce_rating_id,tag_ids," \ "createdat,updatedat) select id,rce_rating_id,tag_ids,createdat,updatedat from " \ ""+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" where rce_rating_id='"+str(data['rce_rating_id'])+"'" #logging.info(sql) self.cur.execute(sql) else: if str(data['rce_rating_id'])==str(res[1]): sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" set updatedat=now() " \ "where rce_rating_id = '"+ str(res[1])+"'" #logging.info(sql) self.cur.execute(sql) sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_producttags_tab')+" a set updatedat=b.updatedat " \ "from "+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" b where a.id=b.id and b.id = "+str(res[0]) #logging.info(sql) self.cur.execute(sql) else: sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" set rce_rating_id='"+str(data['rce_rating_id'])+"', " \ "updatedat=now() where rce_rating_id = "+ str(res[1]) #logging.info(sql) self.cur.execute(sql) sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_producttags_tab')+" (id,rce_rating_id,tag_ids," \ "createdat,updatedat) select id,description,createdat,updatedat from " \ ""+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" where description='"+str(res[1])+"'" #logging.info(sql) self.cur.execute(sql)