Source code for FBAdLibrarian.downloader

# -*- coding: utf-8 -*-
"""
Created on Sun Apr 12 15:16:29 2020
@author: rhs
"""

from selenium import webdriver
import requests
from datetime import datetime
#import chromedriver_autoinstaller

from webdriver_manager.chrome import ChromeDriverManager
import FBAdLibrarian.helpers as helpers


[docs]def adImageDownloader(url_filename, adid_filename, facebookAccesToken, outputDir):

    
    #load ad_id_list
    adlib_id_list = helpers.load_txt_to_list(str(adid_filename))
    
    url_list = helpers.load_txt_to_list(str(url_filename))
    url_list = helpers.clean_url(url_list, facebookAccesToken)

 
    wd = webdriver.Chrome(ChromeDriverManager().install())

    #DRIVER_PATH = os.path.abspath('.') + r"/chromedriver.exe"
    #wd = webdriver.Chrome(executable_path=DRIVER_PATH)
    api_block_string = "Blocked from Searching or Viewing the Ad Library"
    


        
        
    #url_list
    #adlib_id_list
    if len(url_list) == 0:
        print("All images has been downloaded")
    if len(url_list) == len(adlib_id_list):
    

        #setting counter
        counter_max = len(url_list)
        counter = 0
        
        #copying lists to delete already downloaded entries
        url_list_out = list(url_list)
        adid_list_out = list(adlib_id_list)
        
        
        for n in reversed(range(0, len(url_list))):
            #print('Length of url_list_out: %s' % len(url_list_out))
            #print('Length of adid_list_out: %s' % len(adid_list_out))
            
            content_type = "Unknown"
            success = False
            counter+=1
            print("Processing ad {} out of {}".format(counter, counter_max))
            try:
      

     
                from selenium.common.exceptions import NoSuchElementException
    
                wd.get(url_list[n])
                
                #checking if session has been logged out
                try:
                    if wd.find_element_by_css_selector('div._70g9'):
                        
                        ts = datetime.now().strftime("%Y%m%d%H%M%S")
                        helpers.write_log(adid_list_out, "temp/adid_list_", ts)
                        helpers.write_log(url_list_out, "temp/url_list_", ts)
                        
                        raise helpers.CSSClassError("Writing log at {}".format(ts))
                        
                except NoSuchElementException:
                    pass
                
                #Checking if API has been blocked
                html_content = wd.page_source
                if api_block_string in html_content:
                      
                    raise helpers.GeneralError("You have been locked out of the api")
                else:
                    pass
                        
                # Finding the actual image  
                try:
                    image_box = wd.find_element_by_css_selector('img._7jys')
                    
                
                    image_box.get_attribute('src') and 'http' in image_box.get_attribute('src')
                    image_url = image_box.get_attribute('src')

                    
                    helpers.save_image(output_path = str(outputDir),
                              image_url = image_url,
                              ad_id = adlib_id_list[n])
                    
                    content_type = 'image'
                
                except NoSuchElementException:
                    pass
                
                # Checking if video    
                try: 
                    if wd.find_element_by_css_selector("div._8o0a._8o05"):
                        content_type = 'video'
                    
                    
                except NoSuchElementException:
                    pass
         
            
                print("Content type: %s" % content_type)
            

            
            
                #deleting current entry from a copy of the lists

                url_list_out, adid_list_out = helpers.delete_downloaded_element(
                    url_list_out = url_list_out,
                    adid_list_out = adid_list_out, 
                    n = n)

                #Setting objects
                link = url_list[n]
                adid = adlib_id_list[n]
                
                #writing metadata
                with open("metadata.txt", 'a') as appender:
                    appender.write('\n')
                    for item in [link, adid, content_type]:
                        appender.write('%s,' % item)
                        
                success = True
                
                
            except helpers.GeneralError:
                raise helpers.GeneralError("Unknown error")

            except KeyboardInterrupt:
                print("Program has been interrupted")
                print(str(KeyboardInterrupt))
                raise KeyboardInterrupt

            except requests.ConnectionError as e:
                print("Connection Error. Make sure you are connected to the internet")
                print(str(e))
                raise requests.ConnectionError

            finally:
                    if success == False:
                        ts = datetime.now().strftime("%Y%m%d%H%M%S")
                        helpers.write_log(adid_list_out, "temp/adid_list_", ts)
                        helpers.write_log(url_list_out, "temp/url_list_", ts)
                        print("Writing log at {} \n".format(ts))
                                    
                        
    
    else:
        print("Length of ad ID's is not equal to length of ad Url's")



[docs]def adVideoDownloader():
    NotImplementedError