orario-scolastico-itet/pdf2csv.py

25 lines
660 B
Python

import pdfplumber
import csv
import sys
if __name__ == '__main__':
if len(sys.argv) != 3:
print("Usage: pdf2csv.py <input.pdf> <output.csv>")
sys.exit(1)
input_pdf = sys.argv[1]
output_csv = sys.argv[2]
with open(input_pdf, 'rb') as f:
pdf = pdfplumber.open(f)
with open(output_csv, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
for page in pdf.pages:
tables = page.extract_tables()
for table in tables:
flattened_table = [row for row in table]
writer.writerows(flattened_table)
pdf.close()