-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPDF_to_Excel.py
More file actions
32 lines (25 loc) · 1.13 KB
/
PDF_to_Excel.py
File metadata and controls
32 lines (25 loc) · 1.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#Transfer data (from an invoice for example) from PDF to excel.
#Name our file 'invoice.pdf' and we put it in the same folder as the script.
#Use our RegEx to accomodate our needs.
#Run the script and it will extract them to an excel file named 'invoices.xlsx'.
from PyPDF2 import PdfReader
import re
import openpyxl
# Open the PDF invoice using PyPDF2
pdf = PdfReader('invoice.pdf')
# Extract text from PDF
text = ''
for page in pdf.pages:
text += page.extract_text()
# Use regular expression or string matching to find invoice number and amount
invoice_number = re.findall(r'INVOICE NO. (XXX-23-24-E-060)', text)[0]
invoice_date = re.findall(r'INVOICE DATE (26‐Dec‐2023)', text)[0]
total_amount = re.findall(r'Total EX‐WORKS Price : (€ 1,500.00)', text)[0]
# Write the extracted values to Excel using openpyxl
excel_file = openpyxl.load_workbook('invoices.xlsx')
sheet = excel_file.active
row = sheet.max_row + 1
sheet.cell(row=row, column=1).value = invoice_number
sheet.cell(row=row, column=1).value = invoice_date
sheet.cell(row=row, column=2).value = total_amount
excel_file.save('invoices.xlsx')