leboncoin/leboncoin.py

106 lines
4.3 KiB
Python
Raw Normal View History

2017-07-26 14:05:52 +02:00
# -- coding: utf-8 --
__author__ = "yelfathi"
import argparse
import re
import requests
from bs4 import BeautifulSoup
from multiprocessing import Process
from email.mime.text import MIMEText
class Leboncoin(Process):
URL = 'https://www.leboncoin.fr/locations/offres/aquitaine/?th=1&location=Toutes%20les%20communes%2040180&parrot=0'
def __init__(self):
Process.__init__(self)
#self.keyw = re.sub('\s+', '+', keyw)
self.keyw = "leboncoin"
self.url = self.URL #+ self.keyw
def run(self):
try:
req = requests.get(self.url, timeout=5)
except requests.ConnectionError:
raise LookupError('Could not reach host')
# List of ads present on the URL page
ad_id_list = []
# Dict: key as LBC id of the ads, value as URL of the ads
url_dict = {}
# Dict: key as LBC id of the ads, value as the Title of the ads
title_dict = {}
soup = BeautifulSoup(req.content, 'html.parser')
for ad in soup.find_all('a', {'title': True}):
ad_date = ad.find_all('div', {'class': 'date'})
for date in ad_date:
print date
if date.findAll('div')[0].text == "Aujourd'hui":
ad_url = ad['href']
ad_id = re.findall(r'([0-9]+)\.htm', ad_url)
ad_id_list.append(ad_id[0])
url_dict[str(ad_id[0])] = str(ad_url)
title_dict[str(ad_id[0])] = ad['title'].encode('utf-8')
else:
ad_url = ad['href']
ad_id = re.findall(r'([0-9]+)\.htm', ad_url)
ad_id_list.append(ad_id[0])
url_dict[str(ad_id[0])] = str(ad_url)
title_dict[str(ad_id[0])] = ad['title'].encode('utf-8')
with open(self.keyw+'-db.txt', 'a+') as my_file:
archive = my_file.read().splitlines()
message = ''
for ad_id in ad_id_list:
if ad_id not in archive:
my_file.seek(0, 2) # For Microsoft Windows only
my_file.write(ad_id + "\n")
try:
req = requests.get(url_dict.get(ad_id), timeout=5)
except requests.ConnectionError:
raise LookupError('Could not reach host')
soup = BeautifulSoup(req.content, 'html.parser')
ad_price = soup.find('span', {'class': 'price'})
if ad_price:
ad_price = ad_price.text.strip()
else:
ad_price = 'Not specified'
ad_image = soup.find('div', {'class': 'print-lbcImages'})
if ad_image:
ad_image = str(ad_image.find_all('img')[0].get('src'))
else:
ad_image =\
'http://static.leboncoin.fr/img/logo_big_new.png'
ad_description = soup.find('div', 'content')
for tag in ad_description.findAll('br'):
tag.extract()
message += '<html><head></head><body><p><a href="'\
+ url_dict.get(ad_id)\
+ '">Link to LBC ad</a></p><p>Description: '\
+ ad_description.text.strip()+'</p>'\
+ '<p>Price: '+ad_price\
+'</p><p><img alt="Main Image" src="'\
+ ad_image\
+ '"/></p><hr noshade width="50%" align=\
"center"></body></html>'
if message:
subject = 'Leboncoin:'
print("https://smsapi.free-mobile.fr/sendmsg?user=11117653&pass=poUbj5QQgU9Iwm&msg=%s" % subject + message)
requests.get("https://smsapi.free-mobile.fr/sendmsg?user=11117653&pass=poUbj5QQgU9Iwm&msg=%s" % subject + message)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='\
Check on Leboncoin.fr for new ads.')
parser.add_argument('--objects',
help='objects to look after seperated by "+"',
required=False)
args = parser.parse_args()
Leboncoin().start()