accounting-micro-automation / ai_transcriber.py
Adr740's picture
Upload 8 files
411ca77 verified
raw
history blame
2.18 kB
# input : data_path
import os
from pdfparser_hq import encode_image
from config import openai_api
from openai import OpenAI
def transcribe_all(data_path,
name_of_raw_transcripts = "transcript_raw.txt"):
imgs = []
client = OpenAI(api_key=openai_api)
transcripts = []
system_prompt = """
You will be given a reciept that could be handwritten or properly formated. Your goal is to transcribe what is written in JSON following this format:
{
"name_of_supplier" : X,
"amount" : X,
"currency": X,
"date" : DD/MM/YYYY
}
Make sure you provide the total amount and the correct dates, handwritten ones might be tricky. This will be used to reconcile with banking transactions.
"""
for root, dirs, files in os.walk(data_path):
for file in files:
if file.endswith('.png'):
print(os.path.join(root, file))
imgs.append({"path": os.path.join(root, file)})
pass
for i, img in enumerate(imgs):
filename = img["path"]
base64_image = encode_image(img["path"])
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": [
{
"type": "text",
"text": system_prompt
}
]
},
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
}
}
]
}
],
temperature=1,
max_tokens=1877,
top_p=1,
response_format={"type": "json_object"},
frequency_penalty=0,
presence_penalty=0
).choices[0].message.content
transcripts.append({"path": filename,"filename" : f"P{i+1}.png", "content": response})
print(f"done transcribing transcript: {i+1}/{len(imgs)}")
with open(f"{data_path}/{name_of_raw_transcripts}", 'w') as file:
file.write(str(transcripts))