leboncoin/leboncoin.py

# -- coding: utf-8 --
__author__ = "yelfathi"


import argparse
import re
import requests
from bs4 import BeautifulSoup
from multiprocessing import Process
from email.mime.text import MIMEText


class Leboncoin(Process):
    URL = 'https://www.leboncoin.fr/locations/offres/aquitaine/?th=1&location=Toutes%20les%20communes%2040180&parrot=0'

    def __init__(self):
        Process.__init__(self)
        #self.keyw = re.sub('\s+', '+', keyw)
        self.keyw = "leboncoin"
        self.url = self.URL #+ self.keyw

    def run(self):
        try:
            req = requests.get(self.url, timeout=5)
        except requests.ConnectionError:
            raise LookupError('Could not reach host')

        # List of ads present on the URL page
        ad_id_list = []
        # Dict: key as LBC id of the ads, value as URL of the ads
        url_dict = {}
        # Dict: key as LBC id of the ads, value as the Title of the ads
        title_dict = {}

        soup = BeautifulSoup(req.content, 'html.parser')
        for ad in soup.find_all('a', {'title': True}):
            ad_date = ad.find_all('div', {'class': 'date'})
            for date in ad_date:
                print date
                if date.findAll('div')[0].text == "Aujourd'hui":
                    ad_url = ad['href']
                    ad_id = re.findall(r'([0-9]+)\.htm', ad_url)
                    ad_id_list.append(ad_id[0])
                    url_dict[str(ad_id[0])] = str(ad_url)
                    title_dict[str(ad_id[0])] = ad['title'].encode('utf-8')
                else:
                    ad_url = ad['href']
                    ad_id = re.findall(r'([0-9]+)\.htm', ad_url)
                    ad_id_list.append(ad_id[0])
                    url_dict[str(ad_id[0])] = str(ad_url)
                    title_dict[str(ad_id[0])] = ad['title'].encode('utf-8')

        with open(self.keyw+'-db.txt', 'a+') as my_file:
            archive = my_file.read().splitlines()
            message = ''
            for ad_id in ad_id_list:
                if ad_id not in archive:
                    my_file.seek(0, 2)  # For Microsoft Windows only
                    my_file.write(ad_id + "\n")
                    try:
                        req = requests.get(url_dict.get(ad_id), timeout=5)
                    except requests.ConnectionError:
                            raise LookupError('Could not reach host')

                    soup = BeautifulSoup(req.content, 'html.parser')
                    ad_price = soup.find('span', {'class': 'price'})
                    if ad_price:
                        ad_price = ad_price.text.strip()
                    else:
                        ad_price = 'Not specified'
                    ad_image = soup.find('div', {'class': 'print-lbcImages'})
                    if ad_image:
                        ad_image = str(ad_image.find_all('img')[0].get('src'))
                    else:
                        ad_image =\
                            'http://static.leboncoin.fr/img/logo_big_new.png'
                    ad_description = soup.find('div', 'content')
                    for tag in ad_description.findAll('br'):
                        tag.extract()
                    message += '<html><head></head><body><p><a href="'\
                        + url_dict.get(ad_id)\
                        + '">Link to LBC ad</a></p><p>Description: '\
                        + ad_description.text.strip()+'</p>'\
                        + '<p>Price: '+ad_price\
                        +'</p><p><img alt="Main Image" src="'\
                        + ad_image\
                        + '"/></p><hr noshade width="50%" align=\
                        "center"></body></html>'

            if message:
                subject = 'Leboncoin:'
                print("https://smsapi.free-mobile.fr/sendmsg?user=11117653&pass=poUbj5QQgU9Iwm&msg=%s" % subject + message)
                requests.get("https://smsapi.free-mobile.fr/sendmsg?user=11117653&pass=poUbj5QQgU9Iwm&msg=%s" % subject + message)


if __name__ == '__main__':

    parser = argparse.ArgumentParser(description='\
                        Check on Leboncoin.fr for new ads.')
    parser.add_argument('--objects',
                        help='objects to look after seperated by "+"',
                        required=False)
    args = parser.parse_args()

    Leboncoin().start()
init push 2017-07-26 14:05:52 +02:00			`# -- coding: utf-8 --`
			`__author__ = "yelfathi"`


			`import argparse`
			`import re`
			`import requests`
			`from bs4 import BeautifulSoup`
			`from multiprocessing import Process`
			`from email.mime.text import MIMEText`


			`class Leboncoin(Process):`
			`URL = 'https://www.leboncoin.fr/locations/offres/aquitaine/?th=1&location=Toutes%20les%20communes%2040180&parrot=0'`

			`def __init__(self):`
			`Process.__init__(self)`
			`#self.keyw = re.sub('\s+', '+', keyw)`
			`self.keyw = "leboncoin"`
			`self.url = self.URL #+ self.keyw`

			`def run(self):`
			`try:`
			`req = requests.get(self.url, timeout=5)`
			`except requests.ConnectionError:`
			`raise LookupError('Could not reach host')`

			`# List of ads present on the URL page`
			`ad_id_list = []`
			`# Dict: key as LBC id of the ads, value as URL of the ads`
			`url_dict = {}`
			`# Dict: key as LBC id of the ads, value as the Title of the ads`
			`title_dict = {}`

			`soup = BeautifulSoup(req.content, 'html.parser')`
			`for ad in soup.find_all('a', {'title': True}):`
			`ad_date = ad.find_all('div', {'class': 'date'})`
			`for date in ad_date:`
			`print date`
			`if date.findAll('div')[0].text == "Aujourd'hui":`
			`ad_url = ad['href']`
			`ad_id = re.findall(r'([0-9]+)\.htm', ad_url)`
			`ad_id_list.append(ad_id[0])`
			`url_dict[str(ad_id[0])] = str(ad_url)`
			`title_dict[str(ad_id[0])] = ad['title'].encode('utf-8')`
			`else:`
			`ad_url = ad['href']`
			`ad_id = re.findall(r'([0-9]+)\.htm', ad_url)`
			`ad_id_list.append(ad_id[0])`
			`url_dict[str(ad_id[0])] = str(ad_url)`
			`title_dict[str(ad_id[0])] = ad['title'].encode('utf-8')`

			`with open(self.keyw+'-db.txt', 'a+') as my_file:`
			`archive = my_file.read().splitlines()`
			`message = ''`
			`for ad_id in ad_id_list:`
			`if ad_id not in archive:`
			`my_file.seek(0, 2) # For Microsoft Windows only`
			`my_file.write(ad_id + "\n")`
			`try:`
			`req = requests.get(url_dict.get(ad_id), timeout=5)`
			`except requests.ConnectionError:`
			`raise LookupError('Could not reach host')`

			`soup = BeautifulSoup(req.content, 'html.parser')`
			`ad_price = soup.find('span', {'class': 'price'})`
			`if ad_price:`
			`ad_price = ad_price.text.strip()`
			`else:`
			`ad_price = 'Not specified'`
			`ad_image = soup.find('div', {'class': 'print-lbcImages'})`
			`if ad_image:`
			`ad_image = str(ad_image.find_all('img')[0].get('src'))`
			`else:`
			`ad_image =\`
			`'http://static.leboncoin.fr/img/logo_big_new.png'`
			`ad_description = soup.find('div', 'content')`
			`for tag in ad_description.findAll('br'):`
			`tag.extract()`
			`message += '<html><head></head><body><p><a href="'\`
			`+ url_dict.get(ad_id)\`
			`+ '">Link to LBC ad</a></p><p>Description: '\`
			`+ ad_description.text.strip()+'</p>'\`
			`+ '<p>Price: '+ad_price\`
			`+'</p><p><img alt="Main Image" src="'\`
			`+ ad_image\`
			`+ '"/></p><hr noshade width="50%" align=\`
			`"center"></body></html>'`

			`if message:`
			`subject = 'Leboncoin:'`
			`print("https://smsapi.free-mobile.fr/sendmsg?user=11117653&pass=poUbj5QQgU9Iwm&msg=%s" % subject + message)`
			`requests.get("https://smsapi.free-mobile.fr/sendmsg?user=11117653&pass=poUbj5QQgU9Iwm&msg=%s" % subject + message)`


			`if __name__ == '__main__':`

			`parser = argparse.ArgumentParser(description='\`
			`Check on Leboncoin.fr for new ads.')`
			`parser.add_argument('--objects',`
			`help='objects to look after seperated by "+"',`
			`required=False)`
			`args = parser.parse_args()`

			`Leboncoin().start()`