106 lines
4.3 KiB
Python
106 lines
4.3 KiB
Python
|
# -- coding: utf-8 --
|
||
|
__author__ = "yelfathi"
|
||
|
|
||
|
|
||
|
import argparse
|
||
|
import re
|
||
|
import requests
|
||
|
from bs4 import BeautifulSoup
|
||
|
from multiprocessing import Process
|
||
|
from email.mime.text import MIMEText
|
||
|
|
||
|
|
||
|
class Leboncoin(Process):
|
||
|
URL = 'https://www.leboncoin.fr/locations/offres/aquitaine/?th=1&location=Toutes%20les%20communes%2040180&parrot=0'
|
||
|
|
||
|
def __init__(self):
|
||
|
Process.__init__(self)
|
||
|
#self.keyw = re.sub('\s+', '+', keyw)
|
||
|
self.keyw = "leboncoin"
|
||
|
self.url = self.URL #+ self.keyw
|
||
|
|
||
|
def run(self):
|
||
|
try:
|
||
|
req = requests.get(self.url, timeout=5)
|
||
|
except requests.ConnectionError:
|
||
|
raise LookupError('Could not reach host')
|
||
|
|
||
|
# List of ads present on the URL page
|
||
|
ad_id_list = []
|
||
|
# Dict: key as LBC id of the ads, value as URL of the ads
|
||
|
url_dict = {}
|
||
|
# Dict: key as LBC id of the ads, value as the Title of the ads
|
||
|
title_dict = {}
|
||
|
|
||
|
soup = BeautifulSoup(req.content, 'html.parser')
|
||
|
for ad in soup.find_all('a', {'title': True}):
|
||
|
ad_date = ad.find_all('div', {'class': 'date'})
|
||
|
for date in ad_date:
|
||
|
print date
|
||
|
if date.findAll('div')[0].text == "Aujourd'hui":
|
||
|
ad_url = ad['href']
|
||
|
ad_id = re.findall(r'([0-9]+)\.htm', ad_url)
|
||
|
ad_id_list.append(ad_id[0])
|
||
|
url_dict[str(ad_id[0])] = str(ad_url)
|
||
|
title_dict[str(ad_id[0])] = ad['title'].encode('utf-8')
|
||
|
else:
|
||
|
ad_url = ad['href']
|
||
|
ad_id = re.findall(r'([0-9]+)\.htm', ad_url)
|
||
|
ad_id_list.append(ad_id[0])
|
||
|
url_dict[str(ad_id[0])] = str(ad_url)
|
||
|
title_dict[str(ad_id[0])] = ad['title'].encode('utf-8')
|
||
|
|
||
|
with open(self.keyw+'-db.txt', 'a+') as my_file:
|
||
|
archive = my_file.read().splitlines()
|
||
|
message = ''
|
||
|
for ad_id in ad_id_list:
|
||
|
if ad_id not in archive:
|
||
|
my_file.seek(0, 2) # For Microsoft Windows only
|
||
|
my_file.write(ad_id + "\n")
|
||
|
try:
|
||
|
req = requests.get(url_dict.get(ad_id), timeout=5)
|
||
|
except requests.ConnectionError:
|
||
|
raise LookupError('Could not reach host')
|
||
|
|
||
|
soup = BeautifulSoup(req.content, 'html.parser')
|
||
|
ad_price = soup.find('span', {'class': 'price'})
|
||
|
if ad_price:
|
||
|
ad_price = ad_price.text.strip()
|
||
|
else:
|
||
|
ad_price = 'Not specified'
|
||
|
ad_image = soup.find('div', {'class': 'print-lbcImages'})
|
||
|
if ad_image:
|
||
|
ad_image = str(ad_image.find_all('img')[0].get('src'))
|
||
|
else:
|
||
|
ad_image =\
|
||
|
'http://static.leboncoin.fr/img/logo_big_new.png'
|
||
|
ad_description = soup.find('div', 'content')
|
||
|
for tag in ad_description.findAll('br'):
|
||
|
tag.extract()
|
||
|
message += '<html><head></head><body><p><a href="'\
|
||
|
+ url_dict.get(ad_id)\
|
||
|
+ '">Link to LBC ad</a></p><p>Description: '\
|
||
|
+ ad_description.text.strip()+'</p>'\
|
||
|
+ '<p>Price: '+ad_price\
|
||
|
+'</p><p><img alt="Main Image" src="'\
|
||
|
+ ad_image\
|
||
|
+ '"/></p><hr noshade width="50%" align=\
|
||
|
"center"></body></html>'
|
||
|
|
||
|
if message:
|
||
|
subject = 'Leboncoin:'
|
||
|
print("https://smsapi.free-mobile.fr/sendmsg?user=11117653&pass=poUbj5QQgU9Iwm&msg=%s" % subject + message)
|
||
|
requests.get("https://smsapi.free-mobile.fr/sendmsg?user=11117653&pass=poUbj5QQgU9Iwm&msg=%s" % subject + message)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
|
||
|
parser = argparse.ArgumentParser(description='\
|
||
|
Check on Leboncoin.fr for new ads.')
|
||
|
parser.add_argument('--objects',
|
||
|
help='objects to look after seperated by "+"',
|
||
|
required=False)
|
||
|
args = parser.parse_args()
|
||
|
|
||
|
Leboncoin().start()
|