pvanand commited on
Commit
9e5e37a
·
verified ·
1 Parent(s): 338bb7c

Update file_conversion.py

Browse files
Files changed (1) hide show
  1. file_conversion.py +0 -14
file_conversion.py CHANGED
@@ -17,50 +17,36 @@ TEMP_DIR = "/.tempfiles"
17
  def remove_file(path: str):
18
  if os.path.exists(path):
19
  os.unlink(path)
20
- logger.info(f"Removed temporary file: {path}")
21
 
22
  @router.post("/convert/pdf_to_docx")
23
  async def convert_pdf_to_docx(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
24
  if not file.filename.endswith('.pdf'):
25
  raise HTTPException(status_code=400, detail="File must be a PDF")
26
 
27
- # Ensure the temp directory exists
28
  os.makedirs(TEMP_DIR, exist_ok=True)
29
-
30
  pdf_temp_path = os.path.join(TEMP_DIR, f"temp_{file.filename}")
31
  docx_temp_path = pdf_temp_path.replace('.pdf', '.docx')
32
 
33
  try:
34
- # Save the uploaded file
35
  with open(pdf_temp_path, "wb") as pdf_file:
36
  shutil.copyfileobj(file.file, pdf_file)
37
-
38
- logger.info(f"Starting conversion of {pdf_temp_path}")
39
 
40
- # Convert PDF to DOCX
41
  cv = Converter(pdf_temp_path)
42
  cv.convert(docx_temp_path)
43
  cv.close()
44
 
45
- logger.info(f"Conversion completed. Output file: {docx_temp_path}")
46
-
47
- # Check if the file exists
48
  if not os.path.exists(docx_temp_path):
49
  raise FileNotFoundError(f"Converted file not found: {docx_temp_path}")
50
 
51
- # Schedule file removal after response is sent
52
  background_tasks.add_task(remove_file, pdf_temp_path)
53
  background_tasks.add_task(remove_file, docx_temp_path)
54
 
55
- # Return the DOCX file
56
  return FileResponse(
57
  docx_temp_path,
58
  media_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document',
59
  filename=file.filename.replace('.pdf', '.docx')
60
  )
61
  except Exception as e:
62
- logger.error(f"Conversion failed: {str(e)}")
63
- # Clean up files in case of an error
64
  remove_file(pdf_temp_path)
65
  remove_file(docx_temp_path)
66
  raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}")
 
17
  def remove_file(path: str):
18
  if os.path.exists(path):
19
  os.unlink(path)
 
20
 
21
  @router.post("/convert/pdf_to_docx")
22
  async def convert_pdf_to_docx(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
23
  if not file.filename.endswith('.pdf'):
24
  raise HTTPException(status_code=400, detail="File must be a PDF")
25
 
 
26
  os.makedirs(TEMP_DIR, exist_ok=True)
 
27
  pdf_temp_path = os.path.join(TEMP_DIR, f"temp_{file.filename}")
28
  docx_temp_path = pdf_temp_path.replace('.pdf', '.docx')
29
 
30
  try:
 
31
  with open(pdf_temp_path, "wb") as pdf_file:
32
  shutil.copyfileobj(file.file, pdf_file)
 
 
33
 
 
34
  cv = Converter(pdf_temp_path)
35
  cv.convert(docx_temp_path)
36
  cv.close()
37
 
 
 
 
38
  if not os.path.exists(docx_temp_path):
39
  raise FileNotFoundError(f"Converted file not found: {docx_temp_path}")
40
 
 
41
  background_tasks.add_task(remove_file, pdf_temp_path)
42
  background_tasks.add_task(remove_file, docx_temp_path)
43
 
 
44
  return FileResponse(
45
  docx_temp_path,
46
  media_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document',
47
  filename=file.filename.replace('.pdf', '.docx')
48
  )
49
  except Exception as e:
 
 
50
  remove_file(pdf_temp_path)
51
  remove_file(docx_temp_path)
52
  raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}")