- xls2xlsx version: 0.2.0
- Python version: 3.10
- Operating System: Azure Runbook
Description
converting .xls to xlsx.
script (below) works with for say 700k .xls files running in An Azure runbook and running in a local python window.
And for a 10M size .xls it works on local python, but fails in an Azure Runbook.
What I Did
#!/usr/bin/env python3
from azure.storage.blob import BlobServiceClient
import xlrd
import openpyxl
import io
from xls2xlsx import XLS2XLSX
connection_string = "a"
container_name = "b"
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
container_client = blob_service_client.get_container_client(container_name)
for blob in container_client.list_blobs(name_starts_with = "20"):
if ".xlsx" not in blob.name:
if ".xls" in blob.name:
blob_client = container_client.get_blob_client(blob.name)
excel_bytes = blob_client.download_blob().readall()
excel_file = io.BytesIO(excel_bytes)
x2x = XLS2XLSX(excel_file)
wb = x2x.to_xlsx()
xlsx_bytes = io.BytesIO()
wb.save(xlsx_bytes)
xlsx_bytes.seek(0)
new_blob_name = blob.name+'x'
new_blob_client = container_client.get_blob_client(new_blob_name)
new_blob_client.upload_blob(xlsx_bytes, overwrite=True)
delete the original .xls
blob_client.delete_blob()
Description
converting .xls to xlsx.
script (below) works with for say 700k .xls files running in An Azure runbook and running in a local python window.
And for a 10M size .xls it works on local python, but fails in an Azure Runbook.
What I Did
#!/usr/bin/env python3
from azure.storage.blob import BlobServiceClient
import xlrd
import openpyxl
import io
from xls2xlsx import XLS2XLSX
connection_string = "a"
container_name = "b"
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
container_client = blob_service_client.get_container_client(container_name)
for blob in container_client.list_blobs(name_starts_with = "20"):
if ".xlsx" not in blob.name:
if ".xls" in blob.name:
blob_client = container_client.get_blob_client(blob.name)
excel_bytes = blob_client.download_blob().readall()
excel_file = io.BytesIO(excel_bytes)
x2x = XLS2XLSX(excel_file)
wb = x2x.to_xlsx()
xlsx_bytes = io.BytesIO()
wb.save(xlsx_bytes)
xlsx_bytes.seek(0)
new_blob_name = blob.name+'x'
new_blob_client = container_client.get_blob_client(new_blob_name)
new_blob_client.upload_blob(xlsx_bytes, overwrite=True)
delete the original .xls
blob_client.delete_blob()