专业编程基础技术教程

网站首页 > 基础教程 正文

站酷图片爬取(站酷怎么采集作品)

ccvgpt 2024-07-24 11:11:44 基础教程 26 ℃
import requests
import time
from bs4 import BeautifulSoup
from urllib.parse import urlencode
import asyncio
async def zhanku():
 ss = 1
 shuru = input('请输入关键词:')
 for t in range(1,101):
 data = {
 "word": shuru,
 "p": t,
 "requestId": "requestId_1550140318080",
 "sort": "5",
 "recommend": "0",
 "other": "0",
 "field": "0",
 "type": "0"
 }
 url = 'https://www.zcool.com.cn/search/content?'+urlencode(data)
 headers = {
 "Referer": "https://www.zcool.com.cn/search/content?&word=%E7%BE%8E%E5%A5%B3",
 "Upgrade-Insecure-Requests": "1",
 "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.20 Safari/537.36",
 "Cookie": "up_location_prompt=1; isnv=1; _uab_collina=154859717337696843763434; gr_user_id=1051849d-dd25-4746-95ee-9322b52115c6; JSESSIONID=aaaSvgFRBbZGh4Nw7pzIw; gr_cs1_a9067039-a1bd-4602-8dd4-1009e86d802a=uid%3A0; gr_session_id_bd647439edc1d679=d2abb64f-c622-43b4-bec1-5dedbd6051da; zcool_logon_new=17865262%7C%7C%7Cnull%7C%7Cnull%7C20190214183032%7CBD0D76E36396165A3786DF7AC4E582A8; zcool_logon_hw=17865262%7Cpython%25E7%2588%25AC%25E8%2599%25AB%7Chttps%3A%2F%2Fimg.zcool.cn%2Fcommunity%2F2noavatar.gif%7C15047803470%7C20190214183032%7C6D8074FA942AF9E090855F61A4A7FE82; gr_session_id_acec0eb2dafeaf05=06bd03ac-6498-4719-9cd8-235925117373; gr_session_id_acec0eb2dafeaf05_06bd03ac-6498-4719-9cd8-235925117373=false; gr_cs1_06bd03ac-6498-4719-9cd8-235925117373=uid%3A17865262; zui=%7B%22memberType%22:0,%22memberProfession%22:%22%E7%BD%91%E9%A1%B5%E8%AE%BE%E8%AE%A1%E5%B8%88%22,%22memberGender%22:%22%E5%A5%B3%22,%22pageUrl%22:%22https://www.zcool.com.cn/u/17865262%22,%22username%22:%22python%E7%88%AC%E8%99%AB%22,%22avatar%22:%22https://static.zcool.cn/git_z/z/images/girl.png%22,%22id%22:17865262%7D"
 }
 html = requests.get(url, headers=headers, timeout=30)
 soup = BeautifulSoup(html.text, 'html.parser')
 html_soup = soup.select('.work-list-box .card-box .card-img a img')
 for x in html_soup:
 a = (x['src'])
 d = requests.get(a)
 e = d.content
 op = open(r'e:/python/爬完数据存放地址/壁纸/觅知网%d.png'%ss, 'wb')
 print('正在下载', a)
 op.write(e)
 ss+=1
if __name__ == '__main__':
 start = time.time()
 loop = asyncio.get_event_loop()
 loop.run_until_complete(zhanku())
 loop.close()
 print('[info]耗时:%s' % (time.time() - start))

最近发表
标签列表