Spaces:
Runtime error
Runtime error
Update Dockerfile
Browse files- Dockerfile +70 -157
Dockerfile
CHANGED
@@ -1,157 +1,70 @@
|
|
1 |
-
#
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
app
|
20 |
-
app
|
21 |
-
app
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
session['uploaded_files'] = uploaded_files
|
73 |
-
flash('Files successfully uploaded')
|
74 |
-
logging.info(f"Files successfully uploaded: {uploaded_files}")
|
75 |
-
return redirect(url_for('index'))
|
76 |
-
|
77 |
-
@app.route('/remove_file')
|
78 |
-
def remove_file():
|
79 |
-
uploaded_files = session.get('uploaded_files', [])
|
80 |
-
for filename in uploaded_files:
|
81 |
-
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
82 |
-
if os.path.exists(file_path):
|
83 |
-
os.remove(file_path)
|
84 |
-
logging.info(f"Removed file: {filename}")
|
85 |
-
else:
|
86 |
-
logging.warning(f"File not found for removal: {file_path}") # More specific log
|
87 |
-
|
88 |
-
session.pop('uploaded_files', None)
|
89 |
-
flash('Files successfully removed')
|
90 |
-
logging.info("All uploaded files removed")
|
91 |
-
return redirect(url_for('index'))
|
92 |
-
|
93 |
-
@app.route('/process', methods=['POST'])
|
94 |
-
def process_file():
|
95 |
-
uploaded_files = session.get('uploaded_files', [])
|
96 |
-
if not uploaded_files:
|
97 |
-
flash('No files selected for processing')
|
98 |
-
logging.warning("No files selected for processing")
|
99 |
-
return redirect(url_for('index'))
|
100 |
-
|
101 |
-
file_paths = [os.path.join(app.config['UPLOAD_FOLDER'], filename) for filename in uploaded_files]
|
102 |
-
logging.info(f"Processing files: {file_paths}")
|
103 |
-
|
104 |
-
extracted_text = {}
|
105 |
-
processed_Img = {}
|
106 |
-
|
107 |
-
try:
|
108 |
-
extracted_text, processed_Img = extract_text_from_images(file_paths, RESULT_FOLDER)
|
109 |
-
logging.info(f"Extracted text: {extracted_text}")
|
110 |
-
logging.info(f"Processed images: {processed_Img}")
|
111 |
-
|
112 |
-
llmText = json_to_llm_str(extracted_text)
|
113 |
-
logging.info(f"LLM text: {llmText}")
|
114 |
-
|
115 |
-
LLMdata = Data_Extractor(llmText)
|
116 |
-
logging.info(f"LLM data: {LLMdata}")
|
117 |
-
|
118 |
-
except Exception as e:
|
119 |
-
logging.error(f"Error during LLM processing: {e}")
|
120 |
-
logging.info("Running backup model...")
|
121 |
-
|
122 |
-
LLMdata = {}
|
123 |
-
|
124 |
-
if extracted_text:
|
125 |
-
text = json_to_llm_str(extracted_text)
|
126 |
-
LLMdata = NER_Model(text)
|
127 |
-
logging.info(f"NER model data: {LLMdata}")
|
128 |
-
else:
|
129 |
-
logging.warning("No extracted text available for backup model")
|
130 |
-
|
131 |
-
cont_data = process_extracted_text(extracted_text)
|
132 |
-
logging.info(f"Contextual data: {cont_data}")
|
133 |
-
|
134 |
-
processed_data = process_resume_data(LLMdata, cont_data, extracted_text)
|
135 |
-
logging.info(f"Processed data: {processed_data}")
|
136 |
-
|
137 |
-
session['processed_data'] = processed_data
|
138 |
-
session['processed_Img'] = processed_Img
|
139 |
-
flash('Data processed and analyzed successfully')
|
140 |
-
logging.info("Data processed and analyzed successfully")
|
141 |
-
return redirect(url_for('result'))
|
142 |
-
|
143 |
-
@app.route('/result')
|
144 |
-
def result():
|
145 |
-
processed_data = session.get('processed_data', {})
|
146 |
-
processed_Img = session.get('processed_Img', {})
|
147 |
-
logging.info(f"Displaying results: Data - {processed_data}, Images - {processed_Img}")
|
148 |
-
return render_template('result.html', data=processed_data, Img=processed_Img)
|
149 |
-
|
150 |
-
@app.route('/uploads/<filename>')
|
151 |
-
def uploaded_file(filename):
|
152 |
-
logging.info(f"Serving file: {filename}")
|
153 |
-
return send_from_directory(app.config['UPLOAD_FOLDER'], filename)
|
154 |
-
|
155 |
-
if __name__ == '__main__':
|
156 |
-
logging.info("Starting Flask app")
|
157 |
-
app.run(debug=True)
|
|
|
1 |
+
# Use an official Python runtime as a parent image
|
2 |
+
FROM python:3.9-slim
|
3 |
+
|
4 |
+
# Set environment variables for Python
|
5 |
+
ENV PYTHONDONTWRITEBYTECODE 1
|
6 |
+
ENV PYTHONUNBUFFERED 1
|
7 |
+
|
8 |
+
# Set environment variables for Hugging Face cache to /tmp (which is writable)
|
9 |
+
ENV TRANSFORMERS_CACHE=/tmp/huggingface
|
10 |
+
ENV HF_HOME=/tmp/huggingface
|
11 |
+
|
12 |
+
# Ensure permissions are correct for /tmp/huggingface (optional, but just in case)
|
13 |
+
RUN mkdir -p /tmp/huggingface && chmod -R 777 /tmp/huggingface
|
14 |
+
|
15 |
+
# Create cache directories for Hugging Face and Matplotlib
|
16 |
+
RUN mkdir -p /app/.cache/huggingface /app/.cache/matplotlib
|
17 |
+
|
18 |
+
# Set environment variables for Hugging Face and Matplotlib cache
|
19 |
+
ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
|
20 |
+
ENV HF_HOME=/app/.cache/huggingface
|
21 |
+
ENV MPLCONFIGDIR=/app/.cache/matplotlib
|
22 |
+
|
23 |
+
# Adding permission for the cache
|
24 |
+
RUN chmod -R 777 /app/.cache
|
25 |
+
|
26 |
+
# Give write permissions to the /app directory
|
27 |
+
RUN chmod -R 777 /app
|
28 |
+
|
29 |
+
# Create /app/logs directory and set permissions for logging
|
30 |
+
RUN mkdir -p /app/logs && chmod -R 777 /app/logs
|
31 |
+
|
32 |
+
# Set the working directory
|
33 |
+
WORKDIR /app
|
34 |
+
|
35 |
+
# Install system dependencies, including libgomp
|
36 |
+
RUN apt-get update && apt-get install -y \
|
37 |
+
libgl1-mesa-glx \
|
38 |
+
libgomp1 \
|
39 |
+
libglib2.0-0 \
|
40 |
+
&& rm -rf /var/lib/apt/lists/*
|
41 |
+
|
42 |
+
# Set environment variables for Matplotlib and Hugging Face cache directories
|
43 |
+
ENV MPLCONFIGDIR=/tmp/matplotlib
|
44 |
+
ENV TRANSFORMERS_CACHE=/tmp/transformers_cache
|
45 |
+
|
46 |
+
# Copy the requirements file into the container at /app
|
47 |
+
COPY requirements.txt /app/
|
48 |
+
|
49 |
+
# Install any needed packages specified in requirements.txt
|
50 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
51 |
+
|
52 |
+
# Create directories for session storage, uploads, and cache
|
53 |
+
RUN mkdir -p /app/flask_sessions /app/uploads /tmp/matplotlib /tmp/transformers_cache && chmod -R 777 /app/flask_sessions /app/uploads /tmp/matplotlib /tmp/transformers_cache
|
54 |
+
|
55 |
+
# Set permission for the Paddle OCR
|
56 |
+
ENV PADDLEOCR_MODEL_DIR=/tmp/.paddleocr
|
57 |
+
RUN mkdir -p /tmp/.paddleocr && chmod -R 777 /tmp/.paddleocr
|
58 |
+
|
59 |
+
# Copy the rest of the application code to /app
|
60 |
+
COPY . /app/
|
61 |
+
|
62 |
+
# Expose the port that the app runs on
|
63 |
+
EXPOSE 7860
|
64 |
+
|
65 |
+
# Set environment variables for Flask
|
66 |
+
ENV FLASK_APP=app.py
|
67 |
+
ENV FLASK_ENV=production
|
68 |
+
|
69 |
+
# Command to run the Flask app using Gunicorn with 1 worker
|
70 |
+
CMD ["gunicorn", "--workers=1", "--bind=0.0.0.0:7860", "--timeout=120", "app:app"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|