2024-03-14 05:16:59 +00:00
import logging
import psycopg2
2024-04-01 07:59:12 +00:00
###### Looger ######
logging . basicConfig ( filename = " /home/ubuntu/logs/hasaki_crawler.log " ,
filemode = ' a ' ,
format = ' %(asctime)s , %(msecs)d %(name)s %(levelname)s : %(message)s ' ,
datefmt = " % Y- % m- %d % H: % M: % S " ,
level = logging . INFO )
2024-03-14 05:16:59 +00:00
class hasaki_db_writer :
def __init__ ( self , config ) :
self . config = config
self . conn = psycopg2 . connect ( database = self . config . get ( ' database ' ) , user = self . config . get ( ' db_user ' ) , password = self . config . get ( ' db_pass ' ) , host = self . config . get ( ' db_host ' ) , port = self . config . get ( ' db_port ' ) )
self . conn . autocommit = True
self . cur = self . conn . cursor ( )
def __del__ ( self ) :
logging . info ( " Closing connection..... " )
self . conn . close ( )
def get_id ( self , schema , table ) :
sql = f """
select max ( id ) from { schema } . { table }
"""
self . cur . execute ( sql )
res = self . cur . fetchone ( )
if res [ 0 ] != None :
id = res [ 0 ] + 1
else :
id = 1
return id
def get_aud_id ( self , schema , table ) :
sql = f """
select max ( auditid ) from { schema } . { table }
"""
self . cur . execute ( sql )
res = self . cur . fetchone ( )
if res [ 0 ] != None :
id = res [ 0 ] + 1
else :
id = 1
return id
def rce_category ( self , data ) :
sql = f """
select * from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' category_tab ' ) } where category_name = ' {data['category_name']} ' and rce_source_id = { data [ ' rce_source_id ' ] }
"""
self . cur . execute ( sql )
res = self . cur . fetchone ( )
id_main = self . get_id ( self . config . get ( ' crawler_schema ' ) , self . config . get ( ' category_tab ' ) )
id_aud = self . get_aud_id ( self . config . get ( ' crawler_schema ' ) , " aud_ " + self . config . get ( ' category_tab ' ) )
if not res :
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' category_tab ' ) } ( id , parent_category_id , rce_source_id , rce_source_category_id , rce_source_status , category_page_url , category_page_url_hash , category_name , category_parent_name )
values ( { id_main } , { data [ ' parent_category_id ' ] } , { data [ ' rce_source_id ' ] } , { data [ ' rce_source_category_id ' ] } , { data [ ' rce_source_status ' ] } , ' {data['category_page_url']} ' , ' {data['category_page_url_hash']} ' , ' {data['category_name']} ' , ' {data['category_parent_name']} ' )
"""
logging . info ( sql )
self . cur . execute ( sql )
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . aud_ { self . config . get ( ' category_tab ' ) } ( auditid , id , parent_category_id , rce_source_id , rce_source_category_id , rce_source_status , category_page_url , category_page_url_hash , category_name , createdat , updatedat , category_parent_name )
select { id_aud } , id , parent_category_id , rce_source_id , rce_source_category_id , rce_source_status , category_page_url , category_page_url_hash , category_name , createdat , updatedat , category_parent_name from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' category_tab ' ) }
where category_name = ' {data['category_name']} ' and rce_source_id = { data [ ' rce_source_id ' ] }
"""
logging . info ( sql )
self . cur . execute ( sql )
else :
if str ( data [ ' parent_category_id ' ] ) == str ( res [ 1 ] ) and str ( data [ ' rce_source_category_id ' ] ) == str ( res [ 3 ] ) and str ( data [ ' category_name ' ] ) == str ( res [ 7 ] ) and \
str ( data [ ' category_page_url ' ] ) == str ( res [ 5 ] ) and str ( data [ ' category_parent_name ' ] ) == str ( res [ 12 ] ) :
sql = f """
update { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' category_tab ' ) } set updatedat = GETDATE ( )
where category_name = ' {data['category_name']} ' and rce_source_id = { data [ ' rce_source_id ' ] }
"""
logging . info ( sql )
self . cur . execute ( sql )
sql = " update " + self . config . get ( ' crawler_schema ' ) + " .aud_ " + self . config . get ( ' category_tab ' ) + " a set updatedat=b.updatedat " \
" from " + self . config . get ( ' crawler_schema ' ) + " . " + self . config . get ( ' category_tab ' ) + " b where a.id=b.id and b.id = " + str ( res [ 0 ] )
logging . info ( sql )
self . cur . execute ( sql )
else :
sql = f """
update { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' category_tab ' ) } set parent_category_id = { data [ ' parent_category_id ' ] } , rce_source_category_id = { data [ ' rce_source_category_id ' ] } ,
category_name = ' {data['category_name']} ' , category_page_url = ' {data['category_page_url']} ' , category_page_url_hash = ' {data['category_page_url_hash']} ' , category_parent_name = ' {data['category_parent_name']} ' ,
updatedat = GETDATE ( ) where category_name = ' {data['category_name']} ' and rce_source_id = { data [ ' rce_source_id ' ] }
"""
logging . info ( sql )
self . cur . execute ( sql )
sql = " insert into " + self . config . get ( ' crawler_schema ' ) + " .aud_ " + self . config . get ( ' category_tab ' ) + " (auditid,id,parent_category_id,rce_source_id, " \
" rce_source_category_id,rce_source_status,category_page_url,category_page_url_hash,category_name,createdat,updatedat,category_parent_name) " \
" select " + str ( id_aud ) + " , id,parent_category_id,rce_source_id,rce_source_category_id,rce_source_status,category_page_url,category_page_url_hash, " \
" category_name,createdat,updatedat,category_parent_name from " + self . config . get ( ' crawler_schema ' ) + " . " + self . config . get ( ' category_tab ' ) + " " \
" where category_name = ' " + str ( res [ 7 ] ) + " ' "
logging . info ( sql )
self . cur . execute ( sql )
def rce_product ( self , data ) :
sql = f """
select * from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' product_tab ' ) }
where rce_source_product_id = { data [ ' rce_source_product_id ' ] } and rce_source_id = { data [ ' rce_source_id ' ] }
"""
self . cur . execute ( sql )
res = self . cur . fetchone ( )
id_main = self . get_id ( self . config . get ( ' crawler_schema ' ) , self . config . get ( ' product_tab ' ) )
id_aud = self . get_aud_id ( self . config . get ( ' crawler_schema ' ) , " aud_ " + self . config . get ( ' product_tab ' ) )
if not res :
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' product_tab ' ) } ( id , rce_source_product_id , rce_source_product_status , product_page_url ,
product_page_url_hash , rce_category_id , rce_brand_id , rce_store_id , rce_source_product_name , product_images , product_description , product_sold_total , product_sold ,
product_price_min , product_price_min_before_discount , product_price_max , product_price_max_before_discount , ratings , product_section ,
rce_source_id , countryoforigin , rank , ships_from ) values ( { id_main } , { data [ ' rce_source_product_id ' ] } , { data [ ' rce_source_product_status ' ] } , ' {data['product_page_url']} ' ,
' {data['product_page_url_hash']} ' , { data [ ' rce_category_id ' ] } , { data [ ' rce_brand_id ' ] } , { data [ ' rce_store_id ' ] } , ' {data['rce_source_product_name']} ' , ' {data['product_images']} ' , ' {data['product_description']} ' , { data [ ' product_sold_total ' ] } , { data [ ' product_sold ' ] } ,
{ data [ ' product_price_min ' ] } , { data [ ' product_price_min_before_discount ' ] } , { data [ ' product_price_max ' ] } , { data [ ' product_price_max_before_discount ' ] } , { data [ ' ratings ' ] } , ' {data['product_section']} ' ,
{ data [ ' rce_source_id ' ] } , ' {data['countryoforigin']} ' , { data [ ' rank ' ] } , ' {data['ships_from']} ' )
"""
logging . info ( sql )
self . cur . execute ( sql )
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . aud_ { self . config . get ( ' product_tab ' ) } ( auditid , id , rce_source_product_id , rce_source_product_status , product_page_url , product_page_url_hash ,
rce_category_id , rce_brand_id , rce_store_id , rce_source_product_name , product_images , product_description , product_sold_total , product_sold , product_price_min , product_price_min_before_discount ,
product_price_max , product_price_max_before_discount , ratings , ships_from , product_section , createdat , updatedat , rce_source_id , countryoforigin , rank )
select { id_aud } , id , rce_source_product_id , rce_source_product_status , product_page_url , product_page_url_hash ,
rce_category_id , rce_brand_id , rce_store_id , rce_source_product_name , product_images , product_description , product_sold_total , product_sold , product_price_min , product_price_min_before_discount ,
product_price_max , product_price_max_before_discount , ratings , ships_from , product_section , createdat , updatedat , rce_source_id , countryoforigin , rank from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' product_tab ' ) }
where rce_source_product_id = { data [ ' rce_source_product_id ' ] } and rce_source_id = { data [ ' rce_source_id ' ] }
"""
logging . info ( sql )
self . cur . execute ( sql )
else :
if str ( data [ ' rce_source_product_id ' ] ) == str ( res [ 1 ] ) and str ( data [ ' rce_source_product_status ' ] ) == str ( res [ 2 ] ) and \
str ( data [ ' product_page_url ' ] ) == str ( res [ 3 ] ) and str ( data [ ' product_page_url_hash ' ] ) == str ( res [ 4 ] ) and str ( data [ ' rce_category_id ' ] ) == str ( res [ 5 ] ) and \
str ( data [ ' rce_brand_id ' ] ) == str ( res [ 6 ] ) and str ( data [ ' rce_store_id ' ] ) == str ( res [ 7 ] ) and str ( data [ ' rce_source_product_name ' ] ) == str ( res [ 8 ] ) and \
str ( data [ ' product_images ' ] ) == str ( res [ 9 ] ) and str ( data [ ' product_sold_total ' ] ) == str ( res [ 11 ] ) and \
str ( data [ ' product_sold ' ] ) == str ( res [ 12 ] ) and str ( data [ ' product_price_min ' ] ) == str ( res [ 13 ] ) and str ( data [ ' product_price_min_before_discount ' ] ) == str ( res [ 14 ] ) and \
str ( data [ ' product_price_max ' ] ) == str ( res [ 15 ] ) and str ( data [ ' product_price_max_before_discount ' ] ) == str ( res [ 16 ] ) and str ( data [ ' ratings ' ] ) == str ( res [ 17 ] ) \
and str ( data [ ' ships_from ' ] ) == str ( res [ 18 ] ) and str ( data [ ' rce_source_id ' ] ) == str ( res [ 21 ] ) \
and str ( data [ ' product_section ' ] ) == str ( res [ 22 ] ) and str ( data [ ' countryoforigin ' ] ) == str ( res [ 23 ] ) \
and str ( data [ ' rank ' ] ) == str ( res [ 24 ] ) :
sql = f """
update { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' product_tab ' ) } set updatedat = GETDATE ( )
where rce_source_product_id = { data [ ' rce_source_product_id ' ] } and rce_source_id = { data [ ' rce_source_id ' ] }
"""
logging . info ( sql )
self . cur . execute ( sql )
sql = " update " + self . config . get ( ' crawler_schema ' ) + " .aud_ " + self . config . get ( ' product_tab ' ) + " a set updatedat=b.updatedat " \
" from " + self . config . get ( ' crawler_schema ' ) + " . " + self . config . get ( ' product_tab ' ) + " b where a.id=b.id and b.id = " + str ( res [ 0 ] )
logging . info ( sql )
self . cur . execute ( sql )
else :
sql = f """
update { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' product_tab ' ) } set rce_source_product_id = { data [ ' rce_source_product_id ' ] } , rce_source_product_status = { data [ ' rce_source_product_status ' ] } , product_page_url = ' {data['product_page_url']} ' ,
product_page_url_hash = ' {data['product_page_url_hash']} ' , rce_category_id = { data [ ' rce_category_id ' ] } , rce_brand_id = { data [ ' rce_brand_id ' ] } , rce_store_id = { data [ ' rce_store_id ' ] } ,
rce_source_product_name = ' {data['rce_source_product_name']} ' , product_images = ' {data['product_images']} ' , product_description = ' {data['product_description']} ' , product_sold_total = { data [ ' product_sold_total ' ] } ,
product_sold = { data [ ' product_sold ' ] } , product_price_min = ' {data['product_price_min']} ' , product_price_min_before_discount = ' {data['product_price_min_before_discount']} ' ,
product_price_max = ' {data['product_price_max']} ' , product_price_max_before_discount = ' {data['product_price_max_before_discount']} ' , ratings = { data [ ' ratings ' ] } ,
ships_from = ' {data['ships_from']} ' , product_section = ' {data['product_section']} ' , countryoforigin = ' {data['countryoforigin']} ' , rank = { data [ ' rank ' ] } , updatedat = GETDATE ( )
where rce_source_product_id = { data [ ' rce_source_product_id ' ] } and rce_source_id = { data [ ' rce_source_id ' ] }
"""
logging . info ( sql )
self . cur . execute ( sql )
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . aud_ { self . config . get ( ' product_tab ' ) } ( auditid , id , rce_source_product_id , rce_source_product_status , product_page_url , product_page_url_hash ,
rce_category_id , rce_brand_id , rce_store_id , rce_source_product_name , product_images , product_description , product_sold_total , product_sold , product_price_min , product_price_min_before_discount ,
product_price_max , product_price_max_before_discount , ratings , ships_from , product_section , createdat , updatedat , rce_source_id , countryoforigin , rank )
select { id_aud } , id , rce_source_product_id , rce_source_product_status , product_page_url , product_page_url_hash ,
rce_category_id , rce_brand_id , rce_store_id , rce_source_product_name , product_images , product_description , product_sold_total , product_sold , product_price_min , product_price_min_before_discount ,
product_price_max , product_price_max_before_discount , ratings , ships_from , product_section , createdat , updatedat , rce_source_id , countryoforigin , rank from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' product_tab ' ) }
where rce_source_product_id = { data [ ' rce_source_product_id ' ] } and rce_source_id = { data [ ' rce_source_id ' ] }
"""
logging . info ( sql )
self . cur . execute ( sql )
def rce_product_variant ( self , data ) :
sql = f """
select * from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' variant_tab ' ) } where
rce_source_variant_id = { data [ ' rce_source_variant_id ' ] } and rce_product_id = { data [ ' rce_product_id ' ] }
"""
self . cur . execute ( sql )
res = self . cur . fetchone ( )
id_main = self . get_id ( self . config . get ( ' crawler_schema ' ) , self . config . get ( ' variant_tab ' ) )
id_aud = self . get_aud_id ( self . config . get ( ' crawler_schema ' ) , " aud_ " + self . config . get ( ' variant_tab ' ) )
if not res :
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' variant_tab ' ) } ( id , rce_source_variant_id , rce_product_id , product_variant_name , product_variant_price , product_variant_price_before_discount , product_variant_stock , product_variant_sku )
values ( { id_main } , { data [ ' rce_source_variant_id ' ] } , { data [ ' rce_product_id ' ] } , ' {data['product_variant_name']} ' , { data [ ' product_variant_price ' ] } , { data [ ' product_variant_price_before_discount ' ] } , { data [ ' product_variant_stock ' ] } , ' {data['product_variant_sku']} ' )
"""
logging . info ( sql )
self . cur . execute ( sql )
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . aud_ { self . config . get ( ' variant_tab ' ) } ( auditid , id , rce_source_variant_id , rce_product_id , product_variant_name , product_variant_price , product_variant_price_before_discount , product_variant_stock , product_variant_sku , createdat , updatedat )
select { id_aud } , id , rce_source_variant_id , rce_product_id , product_variant_name , product_variant_price , product_variant_price_before_discount , product_variant_stock , product_variant_sku , createdat , updatedat
from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' variant_tab ' ) } where rce_source_variant_id = { data [ ' rce_source_variant_id ' ] } and rce_product_id = { data [ ' rce_product_id ' ] }
"""
logging . info ( sql )
self . cur . execute ( sql )
else :
if str ( data [ ' rce_source_variant_id ' ] ) == str ( res [ 1 ] ) and str ( data [ ' rce_product_id ' ] ) == str ( res [ 2 ] ) and str ( data [ ' product_variant_name ' ] ) == str ( res [ 3 ] ) and \
str ( data [ ' product_variant_price ' ] ) == str ( res [ 4 ] ) and str ( data [ ' product_variant_price_before_discount ' ] ) == str ( res [ 5 ] ) and str ( data [ ' product_variant_stock ' ] ) == str ( res [ 6 ] ) \
and str ( data [ ' product_variant_sku ' ] ) == str ( res [ 9 ] ) :
sql = f """
update { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' variant_tab ' ) } set updatedat = GETDATE ( )
where rce_source_variant_id = { data [ ' rce_source_variant_id ' ] } and rce_product_id = { data [ ' rce_product_id ' ] }
"""
logging . info ( sql )
self . cur . execute ( sql )
sql = f """
update { self . config . get ( ' crawler_schema ' ) } . aud_ { self . config . get ( ' variant_tab ' ) } a set updatedat = b . updatedat
from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' variant_tab ' ) } b where a . id = b . id and b . id = { res [ 0 ] }
"""
logging . info ( sql )
self . cur . execute ( sql )
else :
sql = f """
update { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' variant_tab ' ) } set rce_source_variant_id = { data [ ' rce_source_variant_id ' ] } ,
rce_product_id = { data [ ' rce_product_id ' ] } , product_variant_name = ' {data['product_variant_name']} ' , product_variant_price = { data [ ' product_variant_price ' ] } ,
product_variant_price_before_discount = { data [ ' product_variant_price_before_discount ' ] } , product_variant_stock = { data [ ' product_variant_stock ' ] } ,
product_variant_sku = { data [ ' product_variant_sku ' ] } , updatedat = GETDATE ( )
where rce_source_variant_id = { data [ ' rce_source_variant_id ' ] } and rce_product_id = { data [ ' rce_product_id ' ] }
"""
logging . info ( sql )
self . cur . execute ( sql )
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . aud_ { self . config . get ( ' variant_tab ' ) } ( auditid , id , rce_source_variant_id , rce_product_id , product_variant_name , product_variant_price , product_variant_price_before_discount , product_variant_stock , product_variant_sku , createdat , updatedat )
select { id_aud } , id , rce_source_variant_id , rce_product_id , product_variant_name , product_variant_price , product_variant_price_before_discount , product_variant_stock , product_variant_sku , createdat , updatedat
from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' variant_tab ' ) } where rce_source_variant_id = { data [ ' rce_source_variant_id ' ] } and rce_product_id = { data [ ' rce_product_id ' ] }
"""
logging . info ( sql )
self . cur . execute ( sql )
def rce_brand ( self , data ) :
sql = f """
select * from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' brand_tab ' ) } where rce_source_brand_id = { data [ ' rce_source_brand_id ' ] }
and rce_source_id = { data [ ' rce_source_id ' ] }
"""
self . cur . execute ( sql )
res = self . cur . fetchone ( )
id_main = self . get_id ( self . config . get ( ' crawler_schema ' ) , self . config . get ( ' brand_tab ' ) )
id_aud = self . get_aud_id ( self . config . get ( ' crawler_schema ' ) , " aud_ " + self . config . get ( ' brand_tab ' ) )
if not res :
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' brand_tab ' ) } ( id , rce_source_id , rce_source_brand_id , rce_source_brand_status , brand_page_url , brand_page_url_hash , brand_name , brand_following , brand_rating )
values ( { id_main } , { data [ ' rce_source_id ' ] } , { data [ ' rce_source_brand_id ' ] } , { data [ ' rce_source_brand_status ' ] } , ' {data['brand_page_url']} ' , ' {data['brand_page_url_hash']} ' , ' {data['brand_name']} ' , { data [ ' brand_following ' ] } , { data [ ' brand_rating ' ] } )
"""
logging . info ( sql )
self . cur . execute ( sql )
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . aud_ { self . config . get ( ' brand_tab ' ) } ( auditid , id , rce_source_id , rce_source_brand_id , rce_source_brand_status , brand_page_url , brand_page_url_hash , brand_name , brand_following , brand_rating , createdat , updatedat )
select { id_aud } , id , rce_source_id , rce_source_brand_id , rce_source_brand_status , brand_page_url , brand_page_url_hash , brand_name , brand_following , brand_rating , createdat , updatedat from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' brand_tab ' ) }
where rce_source_brand_id = { data [ ' rce_source_brand_id ' ] } and rce_source_id = { data [ ' rce_source_id ' ] }
"""
logging . info ( sql )
self . cur . execute ( sql )
else :
if str ( data [ ' rce_source_id ' ] ) == str ( res [ 1 ] ) and str ( data [ ' rce_source_brand_status ' ] ) == str ( res [ 3 ] ) and str ( data [ ' brand_page_url ' ] ) == str ( res [ 4 ] ) and \
str ( data [ ' brand_page_url_hash ' ] ) == str ( res [ 5 ] ) and str ( data [ ' brand_name ' ] ) == str ( res [ 6 ] ) and str ( data [ ' rce_source_brand_id ' ] ) == str ( res [ 2 ] ) :
sql = f """
update { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' brand_tab ' ) } set updatedat = GETDATE ( ) where rce_source_brand_id = { data [ ' rce_source_brand_id ' ] } and rce_source_id = { data [ ' rce_source_id ' ] }
"""
logging . info ( sql )
self . cur . execute ( sql )
sql = f """
update { self . config . get ( ' crawler_schema ' ) } . aud_ { self . config . get ( ' brand_tab ' ) } a set updatedat = b . updatedat
from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' brand_tab ' ) } b where a . id = b . id and b . id = { res [ 0 ] } and
b . rce_source_id = { data [ ' rce_source_id ' ] }
"""
logging . info ( sql )
self . cur . execute ( sql )
else :
sql = f """
update { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' brand_tab ' ) } set rce_source_id = { data [ ' rce_source_id ' ] } , rce_source_brand_id = { data [ ' rce_source_brand_id ' ] } ,
rce_source_brand_status = { data [ ' rce_source_brand_status ' ] } , brand_page_url = ' {data['brand_page_url']} ' , brand_page_url_hash = ' {data['brand_page_url_hash']} ' ,
brand_name = ' {data['brand_name']} ' , brand_following = { data [ ' brand_following ' ] } , brand_rating = { data [ ' brand_rating ' ] } , updatedat = GETDATE ( ) where rce_source_brand_id = { data [ ' rce_source_brand_id ' ] }
and rce_source_id = { data [ ' rce_source_id ' ] }
"""
logging . info ( sql )
self . cur . execute ( sql )
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . aud_ { self . config . get ( ' brand_tab ' ) } ( auditid , id , rce_source_id , rce_source_brand_id , rce_source_brand_status , brand_page_url , brand_page_url_hash , brand_name , brand_following , brand_rating , createdat , updatedat )
select { id_aud } , id , rce_source_id , rce_source_brand_id , rce_source_brand_status , brand_page_url , brand_page_url_hash , brand_name , brand_following , brand_rating , createdat , updatedat from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' brand_tab ' ) }
where rce_source_brand_id = { data [ ' rce_source_brand_id ' ] } and rce_source_id = { data [ ' rce_source_id ' ] }
"""
logging . info ( sql )
self . cur . execute ( sql )
def rce_reseller ( self , data ) :
data [ ' reseller_name ' ] = data [ ' reseller_name ' ]
sql = " select * from " + self . config . get ( ' crawler_schema ' ) + " . " + self . config . get ( ' reseller_tab ' ) + " where reseller_name = ' " + str ( data [ ' reseller_name ' ] ) + " ' "
self . cur . execute ( sql )
res = self . cur . fetchone ( )
id_main = self . get_id ( self . config . get ( ' crawler_schema ' ) , self . config . get ( ' reseller_tab ' ) )
id_aud = self . get_aud_id ( self . config . get ( ' crawler_schema ' ) , " aud_ " + self . config . get ( ' reseller_tab ' ) )
if not res :
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' reseller_tab ' ) } ( id , rce_source_id , rce_source_reseller_status , reseller_name )
values ( { id_main } , ' {data['rce_source_id']} ' , ' {data['rce_source_reseller_status']} ' , ' {data['reseller_name']} ' )
"""
#logging.info(sql)
self . cur . execute ( sql )
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . aud_ { self . config . get ( ' reseller_tab ' ) } ( auditid , id , rce_source_id , rce_source_reseller_status , reseller_name , createdat , updatedat )
select { id_aud } , id , rce_source_id , rce_source_reseller_status , reseller_name , createdat , updatedat from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' reseller_tab ' ) }
where reseller_name = ' {data['reseller_name']} '
"""
#logging.info(sql)
self . cur . execute ( sql )
else :
if str ( data [ ' rce_source_reseller_status ' ] ) == str ( res [ 3 ] ) and str ( data [ ' reseller_name ' ] ) == str ( res [ 4 ] ) :
sql = " update " + self . config . get ( ' crawler_schema ' ) + " . " + self . config . get ( ' reseller_tab ' ) + " set updatedat=GETDATE() " \
" where reseller_name = ' " + str ( res [ 4 ] ) + " ' "
#logging.info(sql)
self . cur . execute ( sql )
sql = " update " + self . config . get ( ' crawler_schema ' ) + " .aud_ " + self . config . get ( ' reseller_tab ' ) + " a set updatedat=b.updatedat " \
" from " + self . config . get ( ' crawler_schema ' ) + " . " + self . config . get ( ' reseller_tab ' ) + " b where a.id=b.id and b.id = " + str ( res [ 0 ] )
#logging.info(sql)
self . cur . execute ( sql )
else :
sql = " update " + self . config . get ( ' crawler_schema ' ) + " . " + self . config . get ( ' reseller_tab ' ) + " set rce_source_id= " + str ( data [ ' rce_source_id ' ] ) + " , " \
" rce_source_reseller_status= " + str ( data [ ' rce_source_reseller_status ' ] ) + " , reseller_name= ' " + str ( data [ ' reseller_name ' ] ) + " ' , reseller_average_rating= " \
" ' " + str ( data [ ' reseller_average_rating ' ] ) + " ' ,reseller_description= ' " + str ( data [ ' reseller_description ' ] ) + " ' , updatedat=GETDATE() where reseller_name = ' " + str ( res [ 4 ] ) + " ' "
#logging.info(sql)
self . cur . execute ( sql )
sql = " insert into " + self . config . get ( ' crawler_schema ' ) + " .aud_ " + self . config . get ( ' reseller_tab ' ) + " (auditid,id,rce_source_id,rce_source_reseller_status, " \
" reseller_name,reseller_average_rating,reseller_description,createdat,updatedat) select " + str ( id_aud ) + " , id,rce_source_id,rce_source_reseller_status, " \
" reseller_name,reseller_average_rating,reseller_description,createdat,updatedat from " \
" " + self . config . get ( ' crawler_schema ' ) + " . " + self . config . get ( ' reseller_tab ' ) + " where reseller_name= ' " + str ( res [ 4 ] ) + " ' "
#logging.info(sql)
self . cur . execute ( sql )
def rce_reseller_store ( self , data ) :
data [ ' store_page_url ' ] = data [ ' store_page_url ' ] . replace ( " ' " , " ' ' " )
sql = " select * from " + self . config . get ( ' crawler_schema ' ) + " . " + self . config . get ( ' reseller_store_tab ' ) + " where store_page_url = ' " + str ( data [ ' store_page_url ' ] ) + " ' "
self . cur . execute ( sql )
res = self . cur . fetchone ( )
id_main = self . get_id ( self . config . get ( ' crawler_schema ' ) , self . config . get ( ' reseller_store_tab ' ) )
id_aud = self . get_aud_id ( self . config . get ( ' crawler_schema ' ) , " aud_ " + self . config . get ( ' reseller_store_tab ' ) )
if not res :
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' reseller_store_tab ' ) } ( id , rce_source_store_status , store_page_url , store_page_url_hash , rce_reseller_id , rce_source_id )
values ( { id_main } , ' {data['rce_source_store_status']} ' , ' {data['store_page_url']} ' , ' {data['store_page_url_hash']} ' , { data [ ' rce_reseller_id ' ] } , { data [ ' rce_source_id ' ] } )
"""
#logging.info(sql)
self . cur . execute ( sql )
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . aud_ { self . config . get ( ' reseller_store_tab ' ) } ( auditid , id , rce_source_store_status , store_page_url , store_page_url_hash , rce_reseller_id , createdat , updatedat , rce_source_id )
select { id_aud } , id , rce_source_store_status , store_page_url , store_page_url_hash , rce_reseller_id , createdat , updatedat , rce_source_id from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' reseller_store_tab ' ) }
where store_page_url = ' {data['store_page_url']} '
"""
#logging.info(sql)
self . cur . execute ( sql )
else :
if str ( data [ ' rce_source_store_status ' ] ) == str ( res [ 2 ] ) and str ( data [ ' store_page_url ' ] ) == str ( res [ 3 ] ) and \
str ( data [ ' store_page_url_hash ' ] ) == str ( res [ 4 ] ) and \
str ( data [ ' rce_reseller_id ' ] ) == str ( res [ 6 ] ) and str ( data [ ' rce_source_id ' ] ) == str ( res [ 9 ] ) :
sql = " update " + self . config . get ( ' crawler_schema ' ) + " . " + self . config . get ( ' reseller_store_tab ' ) + " set updatedat=GETDATE() " \
" where store_page_url = ' " + str ( res [ 3 ] ) + " ' "
#logging.info(sql)
self . cur . execute ( sql )
sql = " update " + self . config . get ( ' crawler_schema ' ) + " .aud_ " + self . config . get ( ' reseller_store_tab ' ) + " a set updatedat=b.updatedat " \
" from " + self . config . get ( ' crawler_schema ' ) + " . " + self . config . get ( ' reseller_store_tab ' ) + " b where a.id=b.id and b.id = " + str ( res [ 0 ] )
#logging.info(sql)
self . cur . execute ( sql )
else :
sql = " update " + self . config . get ( ' crawler_schema ' ) + " . " + self . config . get ( ' reseller_store_tab ' ) + " set " \
" rce_source_store_status= " + str ( data [ ' rce_source_store_status ' ] ) + " , store_page_url= ' " + str ( data [ ' store_page_url ' ] ) + " ' , store_page_url_hash= " \
" ' " + str ( data [ ' store_page_url_hash ' ] ) + " ' ,store_location= ' " + str ( data [ ' store_location ' ] ) + " ' , rce_reseller_id= " + str ( data [ ' rce_reseller_id ' ] ) + " , " \
" updatedat=GETDATE(), rce_source_id= " + str ( data [ ' rce_source_id ' ] ) + " where store_page_url = ' " + str ( res [ 3 ] ) + " ' "
#logging.info(sql)
self . cur . execute ( sql )
sql = " insert into " + self . config . get ( ' crawler_schema ' ) + " .aud_ " + self . config . get ( ' reseller_store_tab ' ) + " (auditid,id,rce_source_store_status, " \
" store_page_url,store_page_url_hash,store_location,rce_reseller_id,createdat,updatedat,rce_source_id) select " + id_aud + " , id,rce_source_store_status, " \
" store_page_url,store_page_url_hash,store_location,rce_reseller_id,createdat,updatedat,rce_source_id from " \
" " + self . config . get ( ' crawler_schema ' ) + " . " + self . config . get ( ' reseller_store_tab ' ) + " where store_page_url= ' " + str ( res [ 3 ] ) + " ' "
#logging.info(sql)
self . cur . execute ( sql )
def rce_ratings_reviews ( self , data ) :
sql = f """
select * from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' review_tab ' ) }
where rce_product_id = { data [ ' rce_product_id ' ] } and username = ' {data['username']} '
"""
self . cur . execute ( sql )
res = self . cur . fetchone ( )
data [ ' username ' ] = data [ ' username ' ] . replace ( " ' " , " " )
id_main = self . get_id ( self . config . get ( ' crawler_schema ' ) , self . config . get ( ' review_tab ' ) )
id_aud = self . get_aud_id ( self . config . get ( ' crawler_schema ' ) , " aud_ " + self . config . get ( ' review_tab ' ) )
if not res :
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' review_tab ' ) } ( id , rce_product_id , username , review , img_url , review_like_count , user_tier , shop_id , video_url , rating )
values ( { id_main } , { data [ ' rce_product_id ' ] } , ' {data['username']} ' , ' {data['review']} ' , ' {data['img_url']} ' , { data [ ' review_like_count ' ] } , ' {data['user_tier']} ' , { data [ ' shop_id ' ] } , ' {data['video_url']} ' , { data [ ' rating ' ] } )
"""
logging . info ( sql )
self . cur . execute ( sql )
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . aud_ { self . config . get ( ' review_tab ' ) } ( auditid , id , rce_product_id , username , review , img_url , review_like_count , user_tier , shop_id , video_url , rating , createdat , updatedat )
select { id_aud } , id , rce_product_id , username , review , img_url , review_like_count , user_tier , shop_id , video_url , rating , createdat , updatedat from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' review_tab ' ) }
where rce_product_id = { data [ ' rce_product_id ' ] } and username = ' {data['username']} '
"""
logging . info ( sql )
self . cur . execute ( sql )
else :
if str ( data [ ' rce_product_id ' ] ) == str ( res [ 1 ] ) and str ( data [ ' username ' ] ) == str ( res [ 2 ] ) and str ( data [ ' review ' ] ) == str ( res [ 3 ] ) and \
str ( data [ ' img_url ' ] ) == str ( res [ 4 ] ) and str ( data [ ' review_like_count ' ] ) == str ( res [ 5 ] ) and str ( data [ ' user_tier ' ] ) == str ( res [ 6 ] ) and \
str ( data [ ' shop_id ' ] ) == str ( res [ 7 ] ) and str ( data [ ' video_url ' ] ) == str ( res [ 8 ] ) and str ( data [ ' rating ' ] ) == str ( res [ 9 ] ) :
sql = " update " + self . config . get ( ' crawler_schema ' ) + " . " + self . config . get ( ' review_tab ' ) + " set updatedat=GETDATE() " \
" where rce_product_id = " + str ( res [ 1 ] ) + " and username = ' " + res [ 2 ] + " ' "
logging . info ( sql )
self . cur . execute ( sql )
sql = " update " + self . config . get ( ' crawler_schema ' ) + " .aud_ " + self . config . get ( ' review_tab ' ) + " a set updatedat=b.updatedat " \
" from " + self . config . get ( ' crawler_schema ' ) + " . " + self . config . get ( ' review_tab ' ) + " b where a.id=b.id and b.id = " + str ( res [ 0 ] )
logging . info ( sql )
self . cur . execute ( sql )
else :
sql = " update " + self . config . get ( ' crawler_schema ' ) + " . " + self . config . get ( ' review_tab ' ) + " set rce_product_id= " + str ( data [ ' rce_product_id ' ] ) + " , " \
" username= ' " + str ( data [ ' username ' ] ) + " ' , review= ' " + str ( data [ ' review ' ] ) + " ' , img_url= " \
" ' " + str ( data [ ' img_url ' ] ) + " ' ,review_like_count= " + str ( data [ ' review_like_count ' ] ) + " , user_tier= ' " + str ( data [ ' user_tier ' ] ) + " ' , " \
" shop_id= " + str ( data [ ' shop_id ' ] ) + " , video_url= ' " + str ( data [ ' video_url ' ] ) + " ' , rating= ' " + str ( data [ ' rating ' ] ) + " ' , updatedat=GETDATE() " \
" where rce_product_id = " + str ( res [ 1 ] ) + " and username = ' " + str ( data [ ' username ' ] ) + " ' "
logging . info ( sql )
self . cur . execute ( sql )
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . aud_ { self . config . get ( ' review_tab ' ) } ( auditid , id , rce_product_id , username , review , img_url , review_like_count , user_tier , shop_id , video_url , rating , createdat , updatedat )
select { id_aud } , id , rce_product_id , username , review , img_url , review_like_count , user_tier , shop_id , video_url , rating , createdat , updatedat from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' review_tab ' ) }
where rce_product_id = { data [ ' rce_product_id ' ] } and username = ' {data['username']} '
"""
logging . info ( sql )
self . cur . execute ( sql )
def rce_seo ( self , data ) :
sql = f """
select * from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' seo_tab ' ) }
where rce_product_id = { data [ ' rce_product_id ' ] } and rce_source_id = { data [ ' rce_source_id ' ] }
"""
self . cur . execute ( sql )
res = self . cur . fetchone ( )
id_main = self . get_id ( self . config . get ( ' crawler_schema ' ) , self . config . get ( ' seo_tab ' ) )
id_aud = self . get_aud_id ( self . config . get ( ' crawler_schema ' ) , " aud_ " + self . config . get ( ' seo_tab ' ) )
if not res :
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' seo_tab ' ) } ( id , rce_product_id , rce_source_id , seo_title , seo_description , seo_url , seo_url_hash , seo_image , seo_price_amount , seo_price_currency , seo_product_band , seo_product_availability , seo_product_category , seo_product_condition , seo_product_retailer_item_id , seo_product_robots )
values ( { id_main } , { data [ ' rce_product_id ' ] } , { data [ ' rce_source_id ' ] } , ' {data['seo_title']} ' , ' {data['seo_description']} ' , ' {data['seo_url']} ' , ' {data['seo_url_hash']} ' , ' {data['seo_image']} ' , { data [ ' seo_price_amount ' ] } , ' {data['seo_price_currency']} ' , ' {data['seo_product_band']} ' , ' {data['seo_product_availability']} ' , ' {data['seo_product_category']} ' ,
' {data['seo_product_condition']} ' , { data [ ' seo_product_retailer_item_id ' ] } , ' {data['seo_product_robots']} ' )
"""
logging . info ( sql )
self . cur . execute ( sql )
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . aud_ { self . config . get ( ' seo_tab ' ) } ( auditid , id , rce_product_id , rce_source_id , seo_title , seo_description , seo_url , seo_url_hash , seo_image , seo_price_amount , seo_price_currency , seo_product_band , seo_product_availability , seo_product_category , seo_product_condition , seo_product_retailer_item_id , seo_product_robots , createdat , updatedat )
select { id_aud } , id , rce_product_id , rce_source_id , seo_title , seo_description , seo_url , seo_url_hash , seo_image , seo_price_amount , seo_price_currency , seo_product_band , seo_product_availability , seo_product_category , seo_product_condition , seo_product_retailer_item_id , seo_product_robots , createdat , updatedat from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' seo_tab ' ) }
where rce_product_id = { data [ ' rce_product_id ' ] } and rce_source_id = { data [ ' rce_source_id ' ] }
"""
logging . info ( sql )
self . cur . execute ( sql )
else :
if ( str ( data [ ' rce_product_id ' ] ) == str ( res [ 1 ] ) and str ( data [ ' rce_source_id ' ] ) == str ( res [ 2 ] ) and str ( data [ ' seo_title ' ] ) == str ( res [ 3 ] ) and \
str ( data [ ' seo_description ' ] ) == str ( res [ 4 ] ) and str ( data [ ' seo_url ' ] ) == str ( res [ 5 ] ) and str ( data [ ' seo_url_hash ' ] ) == str ( res [ 6 ] ) and \
str ( data [ ' seo_image ' ] ) == str ( res [ 7 ] ) and str ( data [ ' seo_price_amount ' ] ) == str ( res [ 8 ] ) and str ( data [ ' seo_price_currency ' ] ) == str ( res [ 9 ] ) and \
str ( data [ ' seo_product_band ' ] ) == str ( res [ 10 ] ) ) and str ( data [ ' seo_product_availability ' ] ) == str ( res [ 11 ] ) and str ( data [ ' seo_product_category ' ] ) == str ( res [ 12 ] ) and \
str ( data [ ' seo_product_condition ' ] ) == str ( res [ 13 ] ) and str ( data [ ' seo_product_retailer_item_id ' ] ) == str ( res [ 14 ] ) and str ( data [ ' seo_product_robots ' ] ) == str ( res [ 15 ] ) :
sql = " update " + self . config . get ( ' crawler_schema ' ) + " . " + self . config . get ( ' seo_tab ' ) + " set updatedat=GETDATE() " \
" where rce_product_id = " + str ( res [ 1 ] ) + " and rce_source_id = " + str ( data [ ' rce_source_id ' ] )
logging . info ( sql )
self . cur . execute ( sql )
sql = " update " + self . config . get ( ' crawler_schema ' ) + " .aud_ " + self . config . get ( ' seo_tab ' ) + " a set updatedat=b.updatedat " \
" from " + self . config . get ( ' crawler_schema ' ) + " . " + self . config . get ( ' seo_tab ' ) + " b where a.id=b.id and b.id = " + str ( res [ 0 ] )
logging . info ( sql )
self . cur . execute ( sql )
else :
sql = f """
update { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' seo_tab ' ) } set rce_product_id = { data [ ' rce_product_id ' ] } , rce_source_id = { data [ ' rce_source_id ' ] } , seo_title = ' {data['seo_title']} ' , seo_description = ' {data['seo_description']} ' ,
seo_url = ' {data['seo_url']} ' , seo_url_hash = ' {data['seo_url_hash']} ' , seo_image = ' {data['seo_image']} ' , seo_price_amount = ' {data['seo_price_amount']} ' , seo_price_currency = ' {data['seo_price_currency']} ' , seo_product_band = ' {data['seo_product_band']} ' ,
seo_product_availability = ' {data['seo_product_availability']} ' , seo_product_category = ' {data['seo_product_category']} ' , seo_product_condition = ' {data['seo_product_condition']} ' , seo_product_retailer_item_id = { data [ ' seo_product_retailer_item_id ' ] } ,
seo_product_robots = ' {data['seo_product_robots']} ' where rce_product_id = { data [ ' rce_product_id ' ] } and rce_source_id = { data [ ' rce_source_id ' ] }
"""
logging . info ( sql )
self . cur . execute ( sql )
sql = f """
insert into { self . config . get ( ' crawler_schema ' ) } . aud_ { self . config . get ( ' seo_tab ' ) } ( auditid , id , rce_product_id , rce_source_id , seo_title , seo_description , seo_url , seo_url_hash , seo_image , seo_price_amount , seo_price_currency , seo_product_band , seo_product_availability , seo_product_category , seo_product_condition , seo_product_retailer_item_id , seo_product_robots , createdat , updatedat )
select { id_aud } , id , rce_product_id , rce_source_id , seo_title , seo_description , seo_url , seo_url_hash , seo_image , seo_price_amount , seo_price_currency , seo_product_band , seo_product_availability , seo_product_category , seo_product_condition , seo_product_retailer_item_id , seo_product_robots , createdat , updatedat from { self . config . get ( ' crawler_schema ' ) } . { self . config . get ( ' seo_tab ' ) }
where rce_product_id = { data [ ' rce_product_id ' ] } and rce_source_id = { data [ ' rce_source_id ' ] }
"""
logging . info ( sql )
self . cur . execute ( sql )
# def rce_ratings_reviews_productmodels(self,data):
#
# sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" where rce_rating_id = "+str(data['rce_rating_id'])
# self.cur.execute(sql)
# res = self.cur.fetchone()
#
#
# if not res:
#
# sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" (rce_rating_id,model_id) " \
# "values("+str(data['rce_rating_id'])+",'"+str(data['model_id'])+"')"
# #logging.info(sql)
#
# self.cur.execute(sql)
#
# sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_productmodels_tab')+" (id,rce_rating_id,model_id," \
# "createdat,updatedat) select id,rce_rating_id,model_id,createdat,updatedat from " \
# ""+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" where rce_rating_id="+str(data['rce_rating_id'])+""
# #logging.info(sql)
#
# self.cur.execute(sql)
#
# else:
#
# if str(data['rce_rating_id'])==str(res[1]) and str(data['model_id'])==str(res[2]):
#
# sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" set updatedat=GETDATE() " \
# "where rce_rating_id = "+ str(res[1])
# #logging.info(sql)
# self.cur.execute(sql)
#
# sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_productmodels_tab')+" a set updatedat=b.updatedat " \
# "from "+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" b where a.id=b.id and b.id = "+str(res[0])
# #logging.info(sql)
# self.cur.execute(sql)
# else:
#
# sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" set model_id="+str(data['model_id'])+", " \
# "updatedat=GETDATE() where rce_source_store_id = "+ str(res[1])
# #logging.info(sql)
# self.cur.execute(sql)
#
# sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_productmodels_tab')+" (id,rce_rating_id,model_id," \
# "createdat,updatedat) select id,rce_rating_id,model_id,createdat,updatedat from " \
# ""+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" where rce_rating_id="+str(res[1])+""
# #logging.info(sql)
#
# self.cur.execute(sql)
#
#
# def rce_tags(self,data):
#
# sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" where description = '"+str(data['description'])+"'"
# self.cur.execute(sql)
# res = self.cur.fetchone()
#
#
# if not res:
#
# sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" (id,description) " \
# "values("+str(data['id'])+",'"+str(data['description'])+"')"
# #logging.info(sql)
#
# self.cur.execute(sql)
#
# sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_tags_tab')+" (id,description," \
# "createdat,updatedat) select id,description,createdat,updatedat from " \
# ""+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" where description='"+str(data['description'])+"'"
# #logging.info(sql)
#
# self.cur.execute(sql)
#
# else:
#
# if str(data['description'])==str(res[1]):
#
# sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" set updatedat=GETDATE() " \
# "where description = '"+ str(res[1])+"'"
# #logging.info(sql)
# self.cur.execute(sql)
#
# sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_tags_tab')+" a set updatedat=b.updatedat " \
# "from "+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" b where a.id=b.id and b.id = "+str(res[0])
# #logging.info(sql)
# self.cur.execute(sql)
# else:
#
# sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" set description='"+str(data['description'])+"', " \
# "updatedat=GETDATE() where description = "+ str(res[1])
# #logging.info(sql)
# self.cur.execute(sql)
#
# sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_tags_tab')+" (id,description," \
# "createdat,updatedat) select id,description,createdat,updatedat from " \
# ""+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" where description='"+str(res[1])+"'"
# #logging.info(sql)
#
# self.cur.execute(sql)
#
#
# def rce_ratings_reviews_producttags(self,data):
#
# sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" where rce_rating_id = '"+str(data['rce_rating_id'])+"'"
# self.cur.execute(sql)
# res = self.cur.fetchone()
#
#
# if not res:
#
# sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" (rce_rating_id,tag_ids) " \
# "values("+str(data['rce_rating_id'])+",'"+str(data['tag_ids'])+"')"
# #logging.info(sql)
#
# self.cur.execute(sql)
#
# sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_producttags_tab')+" (id,rce_rating_id,tag_ids," \
# "createdat,updatedat) select id,rce_rating_id,tag_ids,createdat,updatedat from " \
# ""+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" where rce_rating_id='"+str(data['rce_rating_id'])+"'"
# #logging.info(sql)
#
# self.cur.execute(sql)
#
# else:
#
# if str(data['rce_rating_id'])==str(res[1]):
#
# sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" set updatedat=GETDATE() " \
# "where rce_rating_id = '"+ str(res[1])+"'"
# #logging.info(sql)
# self.cur.execute(sql)
#
# sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_producttags_tab')+" a set updatedat=b.updatedat " \
# "from "+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" b where a.id=b.id and b.id = "+str(res[0])
# #logging.info(sql)
# self.cur.execute(sql)
# else:
#
# sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" set rce_rating_id='"+str(data['rce_rating_id'])+"', " \
# "updatedat=GETDATE() where rce_rating_id = "+ str(res[1])
# #logging.info(sql)
# self.cur.execute(sql)
#
# sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_producttags_tab')+" (id,rce_rating_id,tag_ids," \
# "createdat,updatedat) select id,description,createdat,updatedat from " \
# ""+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" where description='"+str(res[1])+"'"
# #logging.info(sql)
#
# self.cur.execute(sql)
#
#