这段python可以用来读取新闻组内的新闻,并保存为纯文本文件
# Hello, this script is written in Python - http://www.75271.com # # newsarchiver 1.1p - Newsgroup archiver # # Purpose: # This script will download all available message from the desired Usenet group # and save them as plain text files. Usefull for bulk group archiving. # # Usage: # # Syntaxe : python newsarchiver.py <groupname> [firstArticle] # # where groupname is the group name (eg. comp.lang.python) # firstArticle is the article number to fetch from (optional) # # Example 1 : python newsarchiver.py comp.lang.python # (fetch all article available from comp.lang.python) # # Example 2 : python newsarchiver.py comp.lang.python 108224 # (fetch all article available from comp.lang.python # starting with article number 108224) # # Password will be asked when the script is run. # # Server name, login and destination directory are hardcoded. # Tweak the lines below the 'import' statement to suit your needs. # Variable names should be self-explanatory. # # Then run this script with and enter your password for the corresponding login. # This script will then connect to news server and start fetching messages. # # You can stop this script at anytime (break with CTRL+C) # and re-run it later to continue to fetch messages. # This script will not fetch message it has already fetched. # # All messages will be saved as individual files in the form: # groupname_messageNumber # (with dots replaced by underscores) # ( eg : comp_lang_python_104208 ) # # Keep in mind that 'messageNumber' is server-dependant. # (If you change newsserver, the messageNumber will be different : you will # have to erase all files and fetch them all to have a coherent fileset) # The messageNumber matches the Xref reference number in each message. # # Group must exist on server. # # Changes: # 1.0p : - first version # 1.1p : - added group name and first article number as command-line parameters. # - added help screen # # Author's comments: # Oh my, I wouldn't beleive this would be so easy to program... thanks to Python ! # # Credits: # I created this script for 2 purposes: # - train Python programming (this is one of my first Python scripts) # - archive comp.lang.python and other interesting newsgroups. # # This author of this script is Sebastien SAUVAGE <sebsauvage at sebsauvage dot net> # http://sebsauvage.net # Other quick & dirty Python stuff is likely to be available at http://sebsauvage.net/python/ # # Legal: # This script is public domain. Feel free to re-use and tweak the code. # import os.path,nntplib,string,getpass,sys destination = 'c:\\ngarchive\\' # do not forget the trailing [back]slash ! newsserver = '127.0.0.1' loginname = 'sebsauvage' if len( sys.argv ) < 2: print '>>> newsArchiver 1.1p\n' print ' *** IMPORTANT ***' print ' See comments in code for more information before running this script !' print ' (News server address and login name are hardcoded :' print ' you need to tailor them before using this script.)' print ' News server',newsserver,"will be used with login '"+loginname+"'" print ' Destination path is',destination,'\n' print ' Syntax : python newsarchiver.py <groupname> [firstArticle]\n' print ' Example 1 : python newsarchiver.py comp.lang.python' print ' (fetch all article available from comp.lang.python)\n' print ' Example 2 : python newsarchiver.py comp.lang.python 108224' print ' (fetch all article available from comp.lang.python' print ' starting with article number 108224)\n' sys.exit() groupName = sys.argv[1] firstArticle = 0 if len( sys.argv ) > 2: try: firstArticle = int(sys.argv[2]) except: print 'Error : firstArticle parameters must be numeric.' sys.exit() loginpassword = getpass.getpass('>>> Please enter password for login '+loginname+'@'+newsserver+' : ') print '>>> Connecting to news server',newsserver,'...' try: ns = nntplib.NNTP(newsserver,119,loginname,loginpassword) except: print '>>> Could not connect to news server.' else: print '>>> News server welcomes us:' print ns.getwelcome() print '>>> Accessing group', groupName try: group = ns.group(groupName) except: print '>>> Could not open group',groupName else: count = group[1] # nb of articles available on server first = group[2] # ID of first available article last = group[3] # ID of last available article print '>>> Article count :',count print '>>> First :',first print '>>> Last :',last if (firstArticle > int(first)) and (firstArticle <= int(last)): first = str(firstArticle) print '>>> Fetching from article',first for articleNumber in range(int(first),int(last)+1): fileName = destination+string.replace(groupName+'.'+str(articleNumber),'.','_') if not os.path.isfile( fileName ): print '>>> Fetching article',articleNumber,'out of',last,'from',groupName try: article = ns.article(str(articleNumber)) except: print '>>> Could not fetch article',articleNumber else: f=open(fileName, 'w+') for line in article[3]: f.write(line+'\n') f.close() print '>>> Closing connection with news server...' ns.quit()