Spaces:
Runtime error
Runtime error
mariagrandury
commited on
Commit
·
7e34cb9
1
Parent(s):
112de32
move comments in create collection name function
Browse files
app.py
CHANGED
@@ -98,26 +98,28 @@ def initialize_llmchain(
|
|
98 |
# Generate collection name for vector database
|
99 |
# - Use filepath as input, ensuring unicode text
|
100 |
def create_collection_name(filepath):
|
101 |
-
# Extract filename without extension
|
102 |
-
|
103 |
# Fix potential issues from naming convention
|
104 |
-
|
105 |
-
collection_name =
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
if len(collection_name) < 3:
|
115 |
collection_name = collection_name + "xyz"
|
116 |
-
|
|
|
117 |
if not collection_name[0].isalnum():
|
118 |
collection_name = "A" + collection_name[1:]
|
119 |
if not collection_name[-1].isalnum():
|
120 |
collection_name = collection_name[:-1] + "Z"
|
|
|
121 |
print("Filepath: ", filepath)
|
122 |
print("Collection name: ", collection_name)
|
123 |
return collection_name
|
|
|
98 |
# Generate collection name for vector database
|
99 |
# - Use filepath as input, ensuring unicode text
|
100 |
def create_collection_name(filepath):
|
101 |
+
collection_name = Path(filepath).stem # Extract filename without extension
|
102 |
+
|
103 |
# Fix potential issues from naming convention
|
104 |
+
collection_name = collection_name.replace(" ", "-") # Remove space
|
105 |
+
collection_name = unidecode(
|
106 |
+
collection_name
|
107 |
+
) # ASCII transliterations of Unicode text
|
108 |
+
collection_name = re.sub(
|
109 |
+
"[^A-Za-z0-9]+", "-", collection_name
|
110 |
+
) # Remove special characters
|
111 |
+
collection_name = collection_name[:50] # Limit length to 50 characters
|
112 |
+
|
113 |
+
# Minimum length of 3 characters
|
114 |
if len(collection_name) < 3:
|
115 |
collection_name = collection_name + "xyz"
|
116 |
+
|
117 |
+
# Enforce start and end as alphanumeric character
|
118 |
if not collection_name[0].isalnum():
|
119 |
collection_name = "A" + collection_name[1:]
|
120 |
if not collection_name[-1].isalnum():
|
121 |
collection_name = collection_name[:-1] + "Z"
|
122 |
+
|
123 |
print("Filepath: ", filepath)
|
124 |
print("Collection name: ", collection_name)
|
125 |
return collection_name
|