-
Notifications
You must be signed in to change notification settings - Fork 0
extract
gitpavleenbali edited this page Feb 17, 2026
·
2 revisions
The extract function extracts structured data from unstructured text.
from pyai import extract# Extract with schema
data = extract(text, schema={"name": str, "email": str})
# Extract common entities
entities = extract.entities("John works at Microsoft")| Parameter | Type | Default | Description |
|---|---|---|---|
text |
str | required | Source text |
schema |
dict | None | Expected data structure |
type |
str | None | Preset: "contact", "date", "product" |
from pyai import extract
text = """
Contact John Smith at john@example.com
Phone: 555-123-4567
"""
contact = extract(text, schema={
"name": str,
"email": str,
"phone": str
})
print(contact)
# {"name": "John Smith", "email": "john@example.com", "phone": "555-123-4567"}text = "Apple announced iPhone 15 on September 12, 2023"
entities = extract.entities(text)
# {
# "organizations": ["Apple"],
# "products": ["iPhone 15"],
# "dates": ["September 12, 2023"]
# }# Extract invoice data
invoice_data = extract(
invoice_text,
schema={
"invoice_number": str,
"date": str,
"total": float,
"items": list
}
)# Extract product info
product = extract(
description,
schema={
"name": str,
"price": float,
"features": list,
"specifications": dict
}
)import asyncio
from pyai import extract
async def main():
data = await extract.async_(
email_text,
schema={"sender": str, "subject": str}
)
print(data)
asyncio.run(main())| Type | Extracted Fields |
|---|---|
contact |
name, email, phone, address |
date |
dates, times, durations |
product |
name, price, description, features |
receipt |
items, totals, date, vendor |
invoice |
number, date, items, total, tax |
Intelligence, Embedded.