import os
import time
import requests
import json
import base64
from pathlib import Path
API_URL = "https://www.datalab.to/api/v1/fill"
API_KEY = os.getenv("DATALAB_API_KEY")
def fill_form(
form_path: Path,
field_data: dict,
context: str = None
):
url = "https://www.datalab.to/api/v1/fill"
#
# Submit initial request
#
with open(form_path, 'rb') as f:
form_data = {
'file': (form_path.name, f, 'application/pdf'),
'field_data': (None, json.dumps(field_data)),
}
if context:
form_data['context'] = (None, context)
headers = {"X-Api-Key": API_KEY}
response = requests.post(url, files=form_data, headers=headers)
data = response.json()
#
# Poll for completion
#
max_polls = 300
check_url = data["request_check_url"]
for i in range(max_polls):
response = requests.get(check_url, headers=headers)
check_result = response.json()
if check_result['status'] == 'complete':
if check_result['success']:
# Decode the filled form
filled_form_bytes = base64.b64decode(check_result['output_base64'])
# Save to file
output_ext = 'pdf' if check_result['output_format'] == 'pdf' else 'png'
output_path = form_path.with_suffix(f'.filled.{output_ext}')
with open(output_path, 'wb') as f:
f.write(filled_form_bytes)
print(f"Form filled successfully!")
print(f"Fields filled: {check_result['fields_filled']}")
print(f"Fields not found: {check_result['fields_not_found']}")
print(f"Saved to: {output_path}")
return output_path
else:
print(f"Form filling failed: {check_result.get('error')}")
return None
elif check_result["status"] == "failed":
print("Failed to fill form")
return None
else:
print("Waiting 2 more seconds to re-check status")
time.sleep(2)
# Example usage
field_data = {
"name": {"value": "John Doe", "description": "Full name"},
"email": {"value": "[email protected]", "description": "Email address"},
"date": {"value": "12/15/2024", "description": "Date"}
}
fill_form(
Path("form.pdf"),
field_data,
context="General form filling"
)