python读取新闻组内的信息_python

这段python可以用来读取新闻组内的新闻，并保存为纯文本文件
# Hello, this script is written in Python - http://www.75271.com
#
# newsarchiver 1.1p - Newsgroup archiver
#
# Purpose:
#    This script will download all available message from the desired Usenet group
#    and save them as plain text files. Usefull for bulk group archiving.
#
# Usage:
#
#    Syntaxe   : python newsarchiver.py <groupname> [firstArticle]
#
#      where   groupname is the group name (eg. comp.lang.python)
#              firstArticle is the article number to fetch from (optional)
#
#    Example 1 : python newsarchiver.py comp.lang.python
#                (fetch all article available from comp.lang.python)
#
#    Example 2 : python newsarchiver.py comp.lang.python 108224
#                (fetch all article available from comp.lang.python
#                 starting with article number 108224)
#
#    Password will be asked when the script is run.
#
#    Server name, login and destination directory are hardcoded.
#    Tweak the lines below the 'import' statement to suit your needs.
#    Variable names should be self-explanatory.
#
#    Then run this script with and enter your password for the corresponding login.
#    This script will then connect to news server and start fetching messages.
#
#    You can stop this script at anytime (break with CTRL+C)
#    and re-run it later to continue to fetch messages.
#    This script will not fetch message it has already fetched.
#
#    All messages will be saved as individual files in the form:
#        groupname_messageNumber
#    (with dots replaced by underscores)
#    ( eg : comp_lang_python_104208 )
#
#    Keep in mind that 'messageNumber' is server-dependant.
#    (If you change newsserver, the messageNumber will be different : you will
#    have to erase all files and fetch them all to have a coherent fileset)
#    The messageNumber matches the Xref reference number in each message.
#
#    Group must exist on server.
#
# Changes:
#    1.0p : - first version
#    1.1p : - added group name and first article number as command-line parameters.
#           - added help screen
#
# Author's comments:
#    Oh my, I wouldn't beleive this would be so easy to program... thanks to Python !
#
# Credits:
#    I created this script for 2 purposes:
#        - train Python programming (this is one of my first Python scripts)
#        - archive comp.lang.python and other interesting newsgroups.
#
#    This author of this script is Sebastien SAUVAGE <sebsauvage at sebsauvage dot net>
#                                  http://sebsauvage.net
#    Other quick & dirty Python stuff is likely to be available at http://sebsauvage.net/python/
#
# Legal:
#   This script is public domain. Feel free to re-use and tweak the code.
#
import os.path,nntplib,string,getpass,sys
destination = 'c:\\ngarchive\\'   # do not forget the trailing [back]slash !
newsserver  = '127.0.0.1'
loginname   = 'sebsauvage'
if len( sys.argv ) < 2:
    print '>>> newsArchiver 1.1p\n'
    print '    *** IMPORTANT ***'
    print '    See comments in code for more information before running this script !'
    print '    (News server address and login name are hardcoded :'
    print '     you need to tailor them before using this script.)'
    print '    News server',newsserver,"will be used with login '"+loginname+"'"
    print '    Destination path is',destination,'\n'
    print '    Syntax    : python newsarchiver.py <groupname> [firstArticle]\n'
    print '    Example 1 : python newsarchiver.py comp.lang.python'
    print '                (fetch all article available from comp.lang.python)\n'
    print '    Example 2 : python newsarchiver.py comp.lang.python 108224'
    print '                (fetch all article available from comp.lang.python'
    print '                 starting with article number 108224)\n'
    sys.exit()
groupName   = sys.argv[1]
firstArticle = 0
if len( sys.argv ) > 2:
    try:
        firstArticle = int(sys.argv[2])
    except:
    	print 'Error : firstArticle parameters must be numeric.'
	sys.exit()
loginpassword = getpass.getpass('>>> Please enter password for login '+loginname+'@'+newsserver+' : ')
print '>>> Connecting to news server',newsserver,'...'
try:
    ns = nntplib.NNTP(newsserver,119,loginname,loginpassword)
except:
    print '>>> Could not connect to news server.'
else:
    print '>>> News server welcomes us:'
    print ns.getwelcome()
    print '>>> Accessing group', groupName
    try:
        group = ns.group(groupName)
    except:
        print '>>> Could not open group',groupName
    else:
        count = group[1]  # nb of articles available on server
        first = group[2]  # ID of first available article
        last =  group[3]  # ID of last available article
        print '>>>    Article count :',count
        print '>>>    First         :',first
        print '>>>    Last          :',last
	if (firstArticle > int(first)) and (firstArticle <= int(last)):
	    first = str(firstArticle)
	    print '>>> Fetching from article',first
        for articleNumber in range(int(first),int(last)+1):
            fileName = destination+string.replace(groupName+'.'+str(articleNumber),'.','_')
            if not os.path.isfile( fileName ):
                print '>>> Fetching article',articleNumber,'out of',last,'from',groupName
                try:
                    article = ns.article(str(articleNumber))
                except:
                    print '>>> Could not fetch article',articleNumber
                else:
                    f=open(fileName, 'w+')
                    for line in article[3]:
                        f.write(line+'\n')
                    f.close()
    print '>>> Closing connection with news server...'
    ns.quit()