仅供参考
# !/usr/bin/python | |
# -*- coding: UTF-8 -*- | |
import re | |
import json | |
import os | |
import shutil | |
import sys | |
import time | |
import requests | |
import img2pdf | |
from PIL import Image | |
from alive_progress import alive_bar | |
from requests.exceptions import SSLError | |
png_headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36', | |
'Referer': 'https://max.book118.com/', | |
} | |
def down_pngs(pageIndex): | |
print(pngs[pageIndex]) | |
down_url = 'https://view-cache.book118.com' + pngs[pageIndex] | |
print(down_url) | |
res = requests.get(url=down_url) | |
try: | |
png = res.content | |
with open(os.path.join(temp_dir, str(pageIndex) + '.jpeg'), 'wb') as f: | |
f.write(png) | |
except: | |
return | |
logo = """ __ __ | |
_ _ /_@)_@) \ /^^\ /^\ /^^\_ | |
_/oo \____/~''. . . '~\ /'\'' ~ ''~~' -'\_ | |
/ '.'. ~.~.~. .' ~ | /'\~~..''''.'' '' ~\_ | |
('_'_'_'_'_'_'_'_ ' : ' \_/' '.'' . '. .'' '. ~\_ | |
~V~V~V~V \ ~\ '' '~ ' '' ~ ` ~ '' ~\_ | |
/\~/\~/\~/\~/|/ ' '' _ ' ~ '' ' ~ '' __ ' .. \_ | |
<-- --- ---.---.--/' '' /' '\_ '' ': ~ ;;'' ' /''; \ ;'''''' '' ~\ _ | |
\~ '. . : .:: ~. :. /_'''_'' \_' :'''_ : _ ''/''_' '_ \:_ '''' #''..\/\/\/~/\~ ''~~~~~O | |
~~ \-~ `---~~~---- \(_)(_)(_)/ ~ ~~' ~\(_)(_)(_)\_~_~_~_~_~/˜¤¹ | |
---------------------------------------------------------------------------------------------by:bean0283 | |
""" | |
print(logo) | |
while True: | |
url = input('请输入原创力文库url:') | |
url = url.split('?')[0] | |
print('下载地址:', url) | |
temp_dir = url.split('/')[-1] | |
# 删除老的临时文件夹并新建临时文件夹 | |
if os.path.exists(temp_dir): | |
shutil.rmtree(temp_dir) | |
os.mkdir(temp_dir) | |
print('开始下载 HTML...', end='') | |
try: | |
response = requests.get(url=url) | |
except(SSLError): | |
print("\n\033[31m不要使用代理软件-------\033[0m") | |
print("\033[31m请关闭代理软件后重新运行程序\033[0m") | |
print("\033[31m请关闭vpn软件后重新运行程序\033[0m") | |
sys.exit(0) | |
page = response.text | |
print('成功. \n开始解析 HTML...', end='') | |
title = re.search('title: (.*),', page).group(1).replace("'", "") | |
view_token = re.search('view_token: (.*)\'', page).group(1).replace("'", "") | |
filetype = re.search('format: (.*)\'', page).group(1).replace("'", "") | |
senddate = re.search('senddate: (.*),', page).group(1).replace("'", "") | |
aid = re.search(' aid: (.*), //解密后的id', page).group(1) | |
actual_page = int(re.search('actual_page: (.*),', page).group(1)) # 真实页数 | |
preview_page = int(re.search('preview_page: (.*),', page).group(1)) # 可预览页数 | |
if actual_page > preview_page: | |
print("\n\033[31m该文档为限制文档,无法下载全部内容,请用vip+rmb,该问题无需反馈给开发者,他也不会解决!!\033[0m\n") | |
output = title # 输出文件(夹) | |
print('解析成功. ') | |
print('文档标题: ', title) | |
# 接口每次会返回 6 个下载 page 地址 | |
list_pn = list(range(1, preview_page + 1, 6)) | |
# print(pngs) | |
if filetype == 'pdf': | |
pngs = {} | |
print('解析到pdf文档, 准备开始解析下载..', end='') | |
print('解析成功.\n正在获取pngs下载地址...') | |
print('受接口限制,2s访问一次,请耐心等待所有接口信息返回') | |
with alive_bar(len(list_pn), title='ing...') as bar: | |
for pn in list_pn: | |
bar() | |
down_page_url = 'https://openapi.book118.com/getPreview.html?project_id=1&aid={}&view_token={}&page={}&_={}'.format( | |
aid, view_token, pn, str(int(time.time()))) | |
jsonpReturn = requests.get(url=down_page_url) | |
page = re.search('jsonpReturn\((.*)\)', jsonpReturn.text).group(1) | |
data_temp = json.loads(page)['data'] | |
# print(data_temp) | |
pngs.update({x: data_temp[x] for x in data_temp}) # 这里有个 bug,若返回值的 url 为空时,这里不会报错,但会造成下载 png 时异常,暂时没有考虑处理 | |
if pn != list_pn[-1]: | |
time.sleep(2) | |
print('\n开始下载 jpg(s)...') | |
pagenums = list(range(1, len(pngs) + 1)) | |
with alive_bar(len(pagenums), title='ing...') as bar: | |
for i in range(len(pagenums)): | |
bar() | |
down_url = "https:" + pngs[str(i + 1)] | |
request = requests.get(url=down_url, headers=png_headers) | |
try: | |
page = request.content | |
with open(os.path.join(temp_dir, str(pagenums[i]) + '.jpeg'), 'wb') as f: | |
f.write(page) | |
except: | |
continue | |
print('\n开始合并图片成PDF...', end='') | |
file_imgs = [os.path.join(temp_dir, str(i) + '.jpeg') for i in pagenums] | |
# 不用以下代码会使 img2pdf 报错 | |
for img_path in file_imgs: | |
with open(img_path, 'rb') as data: | |
img = Image.open(data) | |
# 将 PNG 中 RGBA 属性变为 RGB,即可删掉 alpha 透明度通道 | |
img.convert('RGB').save(img_path) | |
with open(output + '.pdf', 'wb') as f: | |
f.write(img2pdf.convert(file_imgs)) | |
shutil.rmtree(temp_dir) | |
print('下载成功.') | |
print('保存到 ' + output + '.pdf') | |
elif filetype in ['docx', 'doc']: | |
pngs = {} | |
print('解析到{}文档, 准备开始解析下载..'.format(filetype), end='') | |
print('解析成功.\n正在获取pngs下载地址...') | |
print('受接口限制,2s访问一次,请耐心等待所有接口信息返回') | |
x = 0 | |
with alive_bar(len(list_pn), title='ing...') as bar: | |
for pn in list_pn: | |
down_page_url = 'https://openapi.book118.com/getPreview.html?&project_id=1&aid={}&t={}&view_token={}&page={}&_={}'.format( | |
aid, senddate, view_token, pn, str(int(time.time()))) | |
jsonpReturn = requests.get(url=down_page_url) | |
page = re.search('jsonpReturn\((.*)\)', jsonpReturn.text).group(1) | |
data_temp = json.loads(page)['data'] | |
# print(data_temp) | |
bar() | |
pngs.update({x: data_temp[x] for x in data_temp}) | |
if pn != list_pn[-1]: | |
time.sleep(2) | |
print('\n开始下载 jpg(s)...') | |
pagenums = list(range(1, len(pngs) + 1)) | |
with alive_bar(len(pagenums), title='ing...') as bar: | |
for i in range(len(pagenums)): | |
down_url = "https:" + pngs[str(i + 1)] | |
request = requests.get(url=down_url, headers=png_headers) | |
bar() | |
try: | |
page = request.content | |
with open(os.path.join(temp_dir, str(pagenums[i]) + '.jpeg'), 'wb') as f: | |
f.write(page) | |
except: | |
continue | |
print('\n开始合并图片成PDF...', end='') | |
file_imgs = [os.path.join(temp_dir, str(i) + '.jpeg') for i in pagenums] | |
for img_path in file_imgs: | |
with open(img_path, 'rb') as data: | |
img = Image.open(data) | |
# 将 PNG 中 RGBA 属性变为 RGB,即可删掉 alpha 透明度通道 | |
img.convert('RGB').save(img_path) | |
with open(output + '.pdf', 'wb') as f: | |
f.write(img2pdf.convert(file_imgs)) | |
# rea(temp_dir, output + '.pdf') | |
shutil.rmtree(temp_dir) | |
print('下载成功.') | |
print('保存到 ' + output + '.pdf') | |
else: | |
print('不支持的参数.文件类型:', filetype) | |
temp_ = os.path.realpath(sys.argv[0]) | |
os.startfile(os.path.dirname(temp_)) | |
print("执行完成,继续下载请黏贴url,结束请关闭窗口\n") |