nhathuy07 commited on
Commit
b3b29cf
·
verified ·
1 Parent(s): b6b3aea

Enable async document conversion

Browse files
Files changed (1) hide show
  1. main.py +20 -9
main.py CHANGED
@@ -130,24 +130,35 @@ async def __remove_temp(request):
130
 
131
  return PlainTextResponse("", 200)
132
 
133
- def __convert_text(input, type_in, type_out):
134
 
135
  # Create a subprocess
136
  process = await asyncio.create_subprocess_exec(
137
- 'pandoc', '-t', 'html', # command to execute
 
138
  stdout=asyncio.subprocess.PIPE, # redirect stdout
139
  stderr=asyncio.subprocess.STDOUT,
140
  stdin=asyncio.subprocess.PIPE,# redirect stderr
141
  )
142
  stdout, _ = await process.communicate(input=input.encode())
143
- print(stdout.decode())
144
 
145
- def __convert2md(inp):
 
 
 
 
 
 
 
 
 
 
146
  # Use gfm-raw_html to strip styling data from source file
147
- return pypandoc.convert_text(inp, "gfm-raw_html", "html")
148
 
149
- def __convert2plain(inp):
150
- return pypandoc.convert_text(inp, "plain", "html")
151
 
152
  def convert2md(req):
153
  pass
@@ -156,7 +167,7 @@ async def __parse_paragraphs (content: str, batching: bool = False):
156
  _p = ""
157
  _rp = content
158
 
159
- _rp = __convert2md(_rp).replace('\r','')
160
  # remove empty lines and headers
161
  _p = [_x.strip() for _x in _rp.split('\n\n') if len(_x)!=0 and _x.strip().count('#') != len(_x)]
162
 
@@ -521,7 +532,7 @@ async def convert2html(request):
521
 
522
  for file in files:
523
  try:
524
- output = pypandoc.convert_file(file, "html")
525
  print(output)
526
 
527
  except Exception as e:
 
130
 
131
  return PlainTextResponse("", 200)
132
 
133
+ async def __convert_text(input, type_out="plain", type_in):
134
 
135
  # Create a subprocess
136
  process = await asyncio.create_subprocess_exec(
137
+ # command to execute
138
+ 'pandoc', '-f', type_in, '-t', type_out,
139
  stdout=asyncio.subprocess.PIPE, # redirect stdout
140
  stderr=asyncio.subprocess.STDOUT,
141
  stdin=asyncio.subprocess.PIPE,# redirect stderr
142
  )
143
  stdout, _ = await process.communicate(input=input.encode())
144
+ return (stdout.decode("utf-8"))
145
 
146
+ async def __convert_file(fname_in, type_out="plain"):
147
+ proc = await asyncio.create_subprocess_exec(
148
+ 'pandoc', '-i', fname_in, '-t', type_out,
149
+ stdout=asyncio.subprocess.PIPE,
150
+ stderr=asyncio.subprocess.STDOUT,
151
+ stdin=asyncio.subprocess.PIPE,
152
+ )
153
+ stdout, _ = await proc.communicate()
154
+ return stdout.decode("utf-8")
155
+
156
+ async def __convert2md(inp):
157
  # Use gfm-raw_html to strip styling data from source file
158
+ return await __convert_text(inp.encode(), "gfm-raw_html", "html")
159
 
160
+ async def __convert2plain(inp):
161
+ return await __convert_text(inp.encode(), "gfm-raw_html", "html")
162
 
163
  def convert2md(req):
164
  pass
 
167
  _p = ""
168
  _rp = content
169
 
170
+ _rp = await __convert2md(_rp).replace('\r','')
171
  # remove empty lines and headers
172
  _p = [_x.strip() for _x in _rp.split('\n\n') if len(_x)!=0 and _x.strip().count('#') != len(_x)]
173
 
 
532
 
533
  for file in files:
534
  try:
535
+ output = await __convert_file(file, "html")
536
  print(output)
537
 
538
  except Exception as e: