####### e-hentai 爬蟲測試 #######
import urllib.request as req
import requests from bs4
import BeautifulSoup
import os
import time
url = 'https://e-hentai.org/s/67763a1e8c/1298046-1'
Header = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'}
pagenum = 127 #總頁數
count = 1 #計算目前存到第幾頁
def crawl(url,count):
source = req.Request(url,headers = Header)
with req.urlopen(source) as response:
data = response.read().decode('utf-8')
page = BeautifulSoup(data,"html.parser")
div = page.find('div',id='i3') #找出div id為i3
Next_link = div.a['href'] #抓div中的a標籤 提取裡面href的路徑
img = div.a.img['src'] #抓div中的a中的img標籤 提取裡面src的路徑
fname = 'D:\\本子存放區\\My Showgirl Nina\\'+str(count)+'.jpg' #檔案儲存路徑+名稱
download = requests.get(img)
with open(fname,'wb') as f:
f.write(download.content)
f.close()
del download
print('downloaded: '+fname.split('\\')[-1])
if(count<pagenum):
count+=1
del source
crawl(Next_link,count)
#開始抓爬
crawl(url,count)