# -*- coding: utf-8 -*-
"""
Created on Sun Apr 12 15:16:29 2020
@author: rhs
"""
from selenium import webdriver
import requests
from datetime import datetime
#import chromedriver_autoinstaller
from webdriver_manager.chrome import ChromeDriverManager
import FBAdLibrarian.helpers as helpers
[docs]def adImageDownloader(url_filename, adid_filename, facebookAccesToken, outputDir):
#load ad_id_list
adlib_id_list = helpers.load_txt_to_list(str(adid_filename))
url_list = helpers.load_txt_to_list(str(url_filename))
url_list = helpers.clean_url(url_list, facebookAccesToken)
wd = webdriver.Chrome(ChromeDriverManager().install())
#DRIVER_PATH = os.path.abspath('.') + r"/chromedriver.exe"
#wd = webdriver.Chrome(executable_path=DRIVER_PATH)
api_block_string = "Blocked from Searching or Viewing the Ad Library"
#url_list
#adlib_id_list
if len(url_list) == 0:
print("All images has been downloaded")
if len(url_list) == len(adlib_id_list):
#setting counter
counter_max = len(url_list)
counter = 0
#copying lists to delete already downloaded entries
url_list_out = list(url_list)
adid_list_out = list(adlib_id_list)
for n in reversed(range(0, len(url_list))):
#print('Length of url_list_out: %s' % len(url_list_out))
#print('Length of adid_list_out: %s' % len(adid_list_out))
content_type = "Unknown"
success = False
counter+=1
print("Processing ad {} out of {}".format(counter, counter_max))
try:
from selenium.common.exceptions import NoSuchElementException
wd.get(url_list[n])
#checking if session has been logged out
try:
if wd.find_element_by_css_selector('div._70g9'):
ts = datetime.now().strftime("%Y%m%d%H%M%S")
helpers.write_log(adid_list_out, "temp/adid_list_", ts)
helpers.write_log(url_list_out, "temp/url_list_", ts)
raise helpers.CSSClassError("Writing log at {}".format(ts))
except NoSuchElementException:
pass
#Checking if API has been blocked
html_content = wd.page_source
if api_block_string in html_content:
raise helpers.GeneralError("You have been locked out of the api")
else:
pass
# Finding the actual image
try:
image_box = wd.find_element_by_css_selector('img._7jys')
image_box.get_attribute('src') and 'http' in image_box.get_attribute('src')
image_url = image_box.get_attribute('src')
helpers.save_image(output_path = str(outputDir),
image_url = image_url,
ad_id = adlib_id_list[n])
content_type = 'image'
except NoSuchElementException:
pass
# Checking if video
try:
if wd.find_element_by_css_selector("div._8o0a._8o05"):
content_type = 'video'
except NoSuchElementException:
pass
print("Content type: %s" % content_type)
#deleting current entry from a copy of the lists
url_list_out, adid_list_out = helpers.delete_downloaded_element(
url_list_out = url_list_out,
adid_list_out = adid_list_out,
n = n)
#Setting objects
link = url_list[n]
adid = adlib_id_list[n]
#writing metadata
with open("metadata.txt", 'a') as appender:
appender.write('\n')
for item in [link, adid, content_type]:
appender.write('%s,' % item)
success = True
except helpers.GeneralError:
raise helpers.GeneralError("Unknown error")
except KeyboardInterrupt:
print("Program has been interrupted")
print(str(KeyboardInterrupt))
raise KeyboardInterrupt
except requests.ConnectionError as e:
print("Connection Error. Make sure you are connected to the internet")
print(str(e))
raise requests.ConnectionError
finally:
if success == False:
ts = datetime.now().strftime("%Y%m%d%H%M%S")
helpers.write_log(adid_list_out, "temp/adid_list_", ts)
helpers.write_log(url_list_out, "temp/url_list_", ts)
print("Writing log at {} \n".format(ts))
else:
print("Length of ad ID's is not equal to length of ad Url's")
[docs]def adVideoDownloader():
NotImplementedError