python 下载指定网址的所有图片_python

python 下载指定网址的所有图片

#coding=utf-8
#download pictures of the url
#useage: python downpicture.py www.baidu.com
import os
import sys
from html.parser import HTMLParser
from urllib.request import urlopen
from urllib.parse import urlparse
def getpicname(path):
    '''    retrive filename of url        '''
    if os.path.splitext(path)[1] == '':
        return None
    pr=urlparse(path)
    path='http://'+pr[1]+pr[2]
    return os.path.split(path)[1]
def saveimgto(path, urls):
    '''
    save img of url to local path
    '''
    if not os.path.isdir(path):
        print('path is invalid')
        sys.exit()
    else:
        for url in urls:
            of=open(os.path.join(path, getpicname(url)), 'w+b')
            q=urlopen(url)
            of.write(q.read())
            q.close()
            of.close()
class myhtmlparser(HTMLParser):
    '''put all src of img into urls'''
    def __init__(self):
        HTMLParser.__init__(self)
        self.urls=list()
        self.num=0
    def handle_starttag(self, tag, attr):
        if tag.lower() == 'img':
            srcs=[u[1] for u in attr if u[0].lower() == 'src']
            self.urls.extend(srcs)
            self.num = self.num+1
if __name__ == '__main__':
    url=sys.argv[1]
    if not url.startswith('http://'):
        url='http://' + sys.argv[1]
    parseresult=urlparse(url)
    domain='http://' + parseresult[1]
    q=urlopen(url)
    content=q.read().decode('utf-8', 'ignore')
    q.close()
    myparser=myhtmlparser()
    myparser.feed(content)
    for u in myparser.urls:
        if (u.startswith('//')):
            myparser.urls[myparser.urls.index(u)]= 'http:'+u
        elif u.startswith('/'):
            myparser.urls[myparser.urls.index(u)]= domain+u
    saveimgto(r'D:\python\song', myparser.urls)
    print('num of download pictures is {}'.format(myparser.num))

　　result：
　　num&nbspof&nbspdownload&nbsppictures&nbspis&nbsp19

一	二	三	四	五	六	日
					1	2
3	4	5	6	7	8	9
10	11	12	13	14	15	16
17	18	19	20	21	22	23
24	25	26	27	28