芋の独り言

当ブログへのアクセスは当ブログのプライバシーポリシーに同意したものとみなします.

あるサイトからの画像ダウンロード

まんだらけメロンブックスの商品(同人誌とか)の画像をダウンロードするスクリプト. 気になる商品のメモとして使うなど.


# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup as bs
import os,requests

# 保存先フォルダを選択
def path():
    import wx
    app=wx.App()
    wx.MessageBox('保存先フォルダを選択してください','フォルダ選択',wx.STAY_ON_TOP)
    # フォルダ選択ダイアログを作成
    folda = wx.DirDialog(None,style=wx.DD_CHANGE_DIR | wx.OK | wx.STAY_ON_TOP,message="保存先フォルダ")
    # フォルダが選択されたとき
    if folda.ShowModal() == wx.ID_OK:
        folda_path = folda.GetPath()
        folda.Destroy()
        return folda_path

class pic_get:

    def make_folda(self,title):
        print(title)
        self.path=os.path.join(self.path,title)
        try:
            os.mkdir(self.path)
        except FileExistsError:
            pass
    
    def info_get(self,info):
        
        soup = bs(info,'html.parser')
        
        if "https://www.mandarake.co.jp/" in self.url:
            title=soup.find("h2").text

            self.make_folda(str(title))
            
            for i in soup.find_all("div",attrs={"class":"box"}):
                url = i.img.get("src")
                with open(os.path.join(self.path,url.split("/")[-1]),mode="wb") as f:
                    f.write(requests.get(url, timeout=(3.0, 7.5)).content)
                    
            with open(os.path.join(self.path,"info.txt"),mode="w",encoding="utf-8") as f:
                info = soup.find("div",attrs={"class":"post_item_caption"})
                f.write(str(info.h3.text))
                f.write(str(info.p.text))
                f.write(str(soup.find("table").get_text()))

        elif "https://www.melonbooks.co.jp/" in self.url:
            info = soup.find("div",attrs={"id":"description","class":"mb20"})
            try:
                title = soup.find("h1").text.replace("\n","").replace(" ","")
            except AttributeError:
                session = requests.session()
                url = bs(session.get(self.url).content,'html.parser').find("a",attrs={"class":"f_left yes"}).get("href")
                session.get("https://www.melonbooks.co.jp"+url)
                
                soup = bs(session.get(self.url, timeout=(3.0, 7.5)).content,'html.parser')
                info = soup.find("div",attrs={"id":"description","class":"mb20"})
                title = soup.find("h1").text.replace("\n","").replace(" ","")
            
            self.make_folda(str(title))

            if soup.find("a",attrs={"class":"opacity pop"}):
                url = "https:" + soup.find("a",attrs={"class":"opacity pop"}).get("href")
                with open(os.path.join(self.path,url.split("=")[-1]),mode="wb") as f:
                    f.write(requests.get(url, timeout=(3.0, 7.5)).content)

            if soup.find("a",attrs={"class":"opacity pop tag_sample1"}):
                url =  "https:" + soup.find("a",attrs={"class":"opacity pop tag_sample1"}).get("href")
                with open(os.path.join(self.path,"pop"+url.split("=")[-1]),mode="wb") as f:
                    f.write(requests.get(url, timeout=(3.0, 7.5)).content)

            if soup.find("div",attrs={"class":"thumb thumb_detail"}):
                for i in soup.find_all("div",attrs={"class":"thumb thumb_detail"}):
                    url = "https:" + i.a.get("href")
                    with open(os.path.join(self.path,url.split("=")[-1]),mode="wb") as f:
                        f.write(requests.get(url, timeout=(3.0, 7.5)).content)

            try:
                url = info.p.a.get("href")
                with open(os.path.join(self.path,url.split("/")[-1]),mode="wb") as f:
                    f.write(requests.get(url, timeout=(3.0, 7.5)).content)
            except AttributeError:
               pass

            with open(os.path.join(self.path,"info.txt"),mode="w",encoding="utf-8") as f:
                try:
                    f.write(str(info.p.text))
                except AttributeError:
                    f.write(str(info.get_text()))
                f.write(str(info.table.get_text()))
                if soup.find("span",attrs={"class":"digital"}):
                    f.write("立ち読み:https://www.melonbooks.co.jp/viewer/?product_id={}".format(self.url.replace("&adult_view=1","").split("=")[-1]))

    def main(self):                      
        self.info_get(requests.get(self.url, timeout=(3.0, 7.5)).content)
        print('finish')

    def __init__(self,d1,d2):
        self.url      = d1
        self.path     = d2

if __name__ == '__main__':
    path = path()
    
    while True:
        url = input('url:')
        pic_get(url,path).main()

        if input("continue?(y/n):")=="y":
            continue
        else:
            break

qiita.com qiita.com qiita.com