• 欢迎访问开心洋葱网站,在线教程,推荐使用最新版火狐浏览器和Chrome浏览器访问本网站,欢迎加入开心洋葱 QQ群
  • 为方便开心洋葱网用户,开心洋葱官网已经开启复制功能!
  • 欢迎访问开心洋葱网站,手机也能访问哦~欢迎加入开心洋葱多维思维学习平台 QQ群
  • 如果您觉得本站非常有看点,那么赶紧使用Ctrl+D 收藏开心洋葱吧~~~~~~~~~~~~~!
  • 由于近期流量激增,小站的ECS没能经的起亲们的访问,本站依然没有盈利,如果各位看如果觉着文字不错,还请看官给小站打个赏~~~~~~~~~~~~~!

python实现自动登录人人网并采集信息的代码

python 水墨上仙 2722次浏览

python实现自动登录人人网并采集信息的代码

#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import re
import urllib2
import urllib
import cookielib
class Renren(object):
   
    def __init__(self):
        self.name = self.pwd = self.content = self.domain = self.origURL =  ''
        self.operate = ''#登录进去的操作对象
        self.cj = cookielib.LWPCookieJar()
        try: 
            self.cj.revert('./renren.coockie') 
        except Exception,e:
            print e
           
        self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
        urllib2.install_opener(self.opener)
   
   
    def setinfo(self,username,password,domain,origURL):
        '''设置用户登录信息'''
        self.name = username
        self.pwd = password
        self.domain = domain
        self.origURL = origURL
    def login(self):
        '''登录人人网'''
        params = {
            'domain':self.domain,
            'origURL':self.origURL,
            'email':self.name, 
            'password':self.pwd}
        print 'login.......'
        req = urllib2.Request( 
            'http://www.renren.com/PLogin.do',
            urllib.urlencode(params)
        )
       
        self.file=urllib2.urlopen(req).read()        
        newsfeed = open('news.html','w')
        try:
            newsfeed.write(self.file)
        except Exception, e:
            newsfeed.close()
        self.operate = self.opener.open(req) 
        print type(self.operate)
        print self.operate.geturl()
        if self.operate.geturl(): 
            print 'Logged on successfully!'
            self.cj.save('./renren.coockie')
            self.__viewnewinfo()
        else:
            print 'Logged on error'
   
    def __viewnewinfo(self):
        '''查看好友的更新状态'''
        self.__caiinfo()
       
       
    def __caiinfo(self):
        '''采集信息'''       
        h3patten = re.compile('<article>(.*?)</article>')#匹配范围
        apatten = re.compile('<h3.+>(.+)</h3>:')#匹配作者
        cpatten = re.compile('</a>(.+)\s')#匹配内容  
        content = h3patten.findall(self.file)
        print len(content)   
        infocontent = self.operate.readlines()
        print type(infocontent)
        print 'friend newinfo:' 
        for i in infocontent:
            content = h3patten.findall(i)
            if len(content) != 0:
                for m in content:
                    username = apatten.findall(m)
                    info = cpatten.findall(m)
                    if len(username) !=0:
                        print username[0],'说:',info[0]
                        print '----------------------------------------------'
                    else:
                        continue
   
ren = Renren()
username = 'username'#你的人人网的帐号
password = 'password'#你的人人网的密码
domain = 'www.renren.com'#人人网的地址
origURL = 'http://www.renren.com/home'#人人网登录以后的地址
ren.setinfo(username,password,domain,origURL)
ren.login()


开心洋葱 , 版权所有丨如未注明 , 均为原创丨未经授权请勿修改 , 转载请注明python实现自动登录人人网并采集信息的代码
喜欢 (0)
加载中……