91超碰碰碰碰久久久久久综合_超碰av人澡人澡人澡人澡人掠_国产黄大片在线观看画质优化_txt小说免费全本

溫馨提示×

溫馨提示×

您好,登錄后才能下訂單哦!

密碼登錄×
登錄注冊×
其他方式登錄
點擊 登錄注冊 即表示同意《億速云用戶服務條款》

python pdf

發布時間:2020-07-17 22:54:31 來源:網絡 閱讀:818 作者:cooperfang 欄目:編程語言
# 從pdf中讀取文本
# 寫pdf
# 加密解密pdf
# 和平pdf,加水印
# pip install PyPDF2
%cd D:\python全站\office
import PyPDF2
D:\python全站\office
pdf_obj = open('coop.pdf', 'rb')
pdf = PyPDF2.PdfFileReader(pdf_obj)
pdf.numPages
3
page = pdf.getPage(0)
page.extractText()  # 提取文件
'\n\n \n \n1\\\n1\nN¥\n \nde8ug word\n \nde8ug word\n \nde8ug word\n \nde8ug word\n \n\n\n \n \n\n \nde8ug word\n \nde8ug word\n \nde8ug word\n \nde8ug word\n \n \n\n\n \n \n\n \nde8ug word\n \nde8ug word\n \nde8ug word\n \nde8ug word\n \n \n'
# 提取中文 pip install pdfminer3k  #支持中文
from pdfminer.pdfinterp import PDFResourceManager, process_pdf # 資源管理
from pdfminer.converter import TextConverter  # 文本轉換
from pdfminer.layout import LAParams #布局
from io import StringIO  # 生成臨時文件

def convert_pdf(path):
    rsrcmgr = PDFResourceManager()
    retstr = StringIO()
    laparams = LAParams()
    device = TextConverter(rsrcmgr, retstr, laparams = laparams)
    fp = open(path, 'rb')
    process_pdf(rsrcmgr, device, fp)
    fp.close()
    device.close()
    out = retstr.getvalue()
    retstr.close()
    return out
s = convert_pdf('coop.pdf')
# print(s)
# convert_pdf('coop.pdf')
s.split('\n\x0c')
['測試語句 \n\n第 1 頁 \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\n測試語句 \n\n第一頁 \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\n測試語句 \n\n第一頁 \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\n \n \n \n \n ',
 '測試語句 \n\n第 2 頁 \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\n \n \n \n \n ',
 'de8ug word \n\n測試語句 \n\n第 3 頁 \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\nde8ug word \n\n \n \n ',
 '']
# 寫pdf,從上文打開的pdf找出第二頁,新鞋一個pdf
pdf_writer = PyPDF2.PdfFileWriter()
page = pdf.getPage(1)
pdf_writer.addPage(page)
with open('coop-1.pdf', 'wb') as f:
    pdf_writer.write(f)
pdf_obj.close()
# 加密pdf
with open('coop.pdf', 'rb') as f_in:
    pdf = PyPDF2.PdfFileReader(f_in)
    pdf_writer = PyPDF2.PdfFileWriter()
    for page_num in range(pdf.numPages):
        pdf_writer.addPage(pdf.getPage(page_num))
    pdf_writer.encrypt('hicoop')
    with open('coop-s.pdf', 'wb') as f_out:
        pdf_writer.write(f_out)
# 解密
with open('coop-s.pdf', 'rb') as f_in:
    pdf = PyPDF2.PdfFileReader(f_in)
    print(pdf.isEncrypted)
    pdf.decrypt('hicoop')
    pdf.getPage(0) #取到解密后的數據才能正常操作
True
# 合并多個pdf,加水印
with open('coop.pdf', 'rb') as f_in:
    with open('coop-watermarked.pdf', 'rb') as f_w:
        pdf = PyPDF2.PdfFileReader(f_in)
        pdf_w = PyPDF2.PdfFileReader(f_w)

        pdf_write = PyPDF2.PdfFileWriter()
        for page_num in range(pdf.numPages):
            page = pdf.getPage(page_num)
            page.mergePage(pdf_w.getPage(0))
            pdf_write.addPage(page)
        with open('coop-watermarked.pdf', 'wb') as f_out:
            pdf_write.write(f_out)
---------------------------------------------------------------------------

OSError                                   Traceback (most recent call last)

<ipython-input-39-b87325251ec9> in <module>()
      3     with open('coop-watermarked.pdf', 'rb') as f_w:
      4         pdf = PyPDF2.PdfFileReader(f_in)
----> 5         pdf_w = PyPDF2.PdfFileReader(f_w)
      6 
      7         pdf_write = PyPDF2.PdfFileWriter()

c:\users\coop\miniconda3\envs\coop\lib\site-packages\PyPDF2\pdf.py in __init__(self, stream, strict, warndest, overwriteWarnings)
   1082             stream = BytesIO(b_(fileobj.read()))
   1083             fileobj.close()
-> 1084         self.read(stream)
   1085         self.stream = stream
   1086 

c:\users\coop\miniconda3\envs\coop\lib\site-packages\PyPDF2\pdf.py in read(self, stream)
   1687         if debug: print(">>read", stream)
   1688         # start at the end:
-> 1689         stream.seek(-1, 2)
   1690         if not stream.tell():
   1691             raise utils.PdfReadError('Cannot read an empty file')

OSError: [Errno 22] Invalid argument
向AI問一下細節

免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。

AI

河西区| 宝坻区| 甘谷县| 金寨县| 十堰市| 淮安市| 灌云县| 那曲县| 邵阳市| 阳泉市| 平江县| 抚远县| 喜德县| 石景山区| 临清市| 林周县| 阿拉尔市| 通河县| 象山县| 临泽县| 霍城县| 扎赉特旗| 惠安县| 太仓市| 深州市| 乌兰察布市| 丹东市| 涿鹿县| 宝坻区| 拉萨市| 常宁市| 胶州市| 涟水县| 甘泉县| 方城县| 阿拉善右旗| 广河县| 东台市| 中方县| 卢龙县| 普陀区|