bachir
/
ola5doc


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
							#!/usr/bin/env python2.7.14
# -*- coding: utf-8 -*-

# libraries webscrapping
# from pattern.web import Newsfeed
# from pattern.web import Wikipedia
from pattern.web import *
import wikipedia
# libraries traitement langage
from pattern.en import tag
import random

# -------------------------- #
# déclaration des fonctions
# -------------------------- #

# fonction qui récupère un flux RSS
def get_rss( url ):
    # on boucle dans le flux rss
    for result in Newsfeed().search(url)[:1]:
        # on recupère le premier titre
        str = repr(result.title)
        # on remplace les apostrophe
        str = str.replace("'","")
        # on enlève le u (première lettre unicode string)
        str = str.replace(str[:1],'')
        return str

# fonction qui teste une string pour trouver un nom
def prc_word( str, postag ):
    words = []
    # on cherche dans la str les noms et on les stocke dans une liste
    for word, pos in tag(str):
        if pos == postag:
            words.append(word)
    # on retourne le second élement du tableau

    #list_len = len(words)
    #ran_num = random.randint(0,list_len)

    #return words[ran_num]
    return words

# fonction qui va chercher une page wikipedia
def get_wiki( str ):
    # on appelle la page wikipedia
    page = wikipedia.page( str )
    # on recupère le contenu texte
    cont = page.content
    # on sépare le texte avec les retours à la ligne
    res = cont.split('\n')
    #res = wikipedia.summary(str, sentences=1)
    #res = wikipedia.search(str, results=1)
    #list_len = len(res)
    #ran_num = random.randint(0,list_len)
    #return res[ran_num]
    return res

# fonction pour écrire le fichier texte
def write_txt( str, nom_file ):
    # on ouvre un fichier
    file = open(nom_file, 'w')
    # on l'écrit
    file.write( str )
    file.close()
    print 'fichier écrit'


# -------------------------- #
# appelle des fonctions
# -------------------------- #

# on recupère un flux rss
str_rss = get_rss('https://www.theguardian.com/world/rss')
# on recupère un nom
wrd_tag = prc_word(str_rss, 'NN')
# on va chercher la définition dans wikipedia
str_from_word = get_wiki(wrd_tag)
# on recupère un nom
#wrd_tag2 = prc_word(str_from_word[1], 'NN')
# on va chercher la définition dans wikipedia
#str_from_word2 = get_wiki(wrd_tag2)

#str_final = str_rss + '\n' + wrd_tag + '\n' + str_from_word

print str_rss
print wrd_tag
print str_from_word
#print wrd_tag2
#print str_from_word2[2]

#write_txt(str_final, 'test.txt')