# -- coding: utf-8 -- __author__ = "yelfathi" import argparse import re import requests from bs4 import BeautifulSoup from multiprocessing import Process from email.mime.text import MIMEText class Leboncoin(Process): URL = 'https://www.leboncoin.fr/locations/offres/aquitaine/?th=1&location=Toutes%20les%20communes%2040180&parrot=0' def __init__(self): Process.__init__(self) #self.keyw = re.sub('\s+', '+', keyw) self.keyw = "leboncoin" self.url = self.URL #+ self.keyw def run(self): try: req = requests.get(self.url, timeout=5) except requests.ConnectionError: raise LookupError('Could not reach host') # List of ads present on the URL page ad_id_list = [] # Dict: key as LBC id of the ads, value as URL of the ads url_dict = {} # Dict: key as LBC id of the ads, value as the Title of the ads title_dict = {} soup = BeautifulSoup(req.content, 'html.parser') for ad in soup.find_all('a', {'title': True}): ad_date = ad.find_all('div', {'class': 'date'}) for date in ad_date: print date if date.findAll('div')[0].text == "Aujourd'hui": ad_url = ad['href'] ad_id = re.findall(r'([0-9]+)\.htm', ad_url) ad_id_list.append(ad_id[0]) url_dict[str(ad_id[0])] = str(ad_url) title_dict[str(ad_id[0])] = ad['title'].encode('utf-8') else: ad_url = ad['href'] ad_id = re.findall(r'([0-9]+)\.htm', ad_url) ad_id_list.append(ad_id[0]) url_dict[str(ad_id[0])] = str(ad_url) title_dict[str(ad_id[0])] = ad['title'].encode('utf-8') with open(self.keyw+'-db.txt', 'a+') as my_file: archive = my_file.read().splitlines() message = '' for ad_id in ad_id_list: if ad_id not in archive: my_file.seek(0, 2) # For Microsoft Windows only my_file.write(ad_id + "\n") try: req = requests.get(url_dict.get(ad_id), timeout=5) except requests.ConnectionError: raise LookupError('Could not reach host') soup = BeautifulSoup(req.content, 'html.parser') ad_price = soup.find('span', {'class': 'price'}) if ad_price: ad_price = ad_price.text.strip() else: ad_price = 'Not specified' ad_image = soup.find('div', {'class': 'print-lbcImages'}) if ad_image: ad_image = str(ad_image.find_all('img')[0].get('src')) else: ad_image =\ 'http://static.leboncoin.fr/img/logo_big_new.png' ad_description = soup.find('div', 'content') for tag in ad_description.findAll('br'): tag.extract() message += '

Link to LBC ad

Description: '\ + ad_description.text.strip()+'

'\ + '

Price: '+ad_price\ +'

Main Image

' if message: subject = 'Leboncoin:' print("https://smsapi.free-mobile.fr/sendmsg?user=11117653&pass=poUbj5QQgU9Iwm&msg=%s" % subject + message) requests.get("https://smsapi.free-mobile.fr/sendmsg?user=11117653&pass=poUbj5QQgU9Iwm&msg=%s" % subject + message) if __name__ == '__main__': parser = argparse.ArgumentParser(description='\ Check on Leboncoin.fr for new ads.') parser.add_argument('--objects', help='objects to look after seperated by "+"', required=False) args = parser.parse_args() Leboncoin().start()