mariagrandury commited on
Commit
7e34cb9
·
1 Parent(s): 112de32

move comments in create collection name function

Browse files
Files changed (1) hide show
  1. app.py +15 -13
app.py CHANGED
@@ -98,26 +98,28 @@ def initialize_llmchain(
98
  # Generate collection name for vector database
99
  # - Use filepath as input, ensuring unicode text
100
  def create_collection_name(filepath):
101
- # Extract filename without extension
102
- collection_name = Path(filepath).stem
103
  # Fix potential issues from naming convention
104
- ## Remove space
105
- collection_name = collection_name.replace(" ", "-")
106
- ## ASCII transliterations of Unicode text
107
- collection_name = unidecode(collection_name)
108
- ## Remove special characters
109
- # collection_name = re.findall("[\dA-Za-z]*", collection_name)[0]
110
- collection_name = re.sub("[^A-Za-z0-9]+", "-", collection_name)
111
- ## Limit length to 50 characters
112
- collection_name = collection_name[:50]
113
- ## Minimum length of 3 characters
114
  if len(collection_name) < 3:
115
  collection_name = collection_name + "xyz"
116
- ## Enforce start and end as alphanumeric character
 
117
  if not collection_name[0].isalnum():
118
  collection_name = "A" + collection_name[1:]
119
  if not collection_name[-1].isalnum():
120
  collection_name = collection_name[:-1] + "Z"
 
121
  print("Filepath: ", filepath)
122
  print("Collection name: ", collection_name)
123
  return collection_name
 
98
  # Generate collection name for vector database
99
  # - Use filepath as input, ensuring unicode text
100
  def create_collection_name(filepath):
101
+ collection_name = Path(filepath).stem # Extract filename without extension
102
+
103
  # Fix potential issues from naming convention
104
+ collection_name = collection_name.replace(" ", "-") # Remove space
105
+ collection_name = unidecode(
106
+ collection_name
107
+ ) # ASCII transliterations of Unicode text
108
+ collection_name = re.sub(
109
+ "[^A-Za-z0-9]+", "-", collection_name
110
+ ) # Remove special characters
111
+ collection_name = collection_name[:50] # Limit length to 50 characters
112
+
113
+ # Minimum length of 3 characters
114
  if len(collection_name) < 3:
115
  collection_name = collection_name + "xyz"
116
+
117
+ # Enforce start and end as alphanumeric character
118
  if not collection_name[0].isalnum():
119
  collection_name = "A" + collection_name[1:]
120
  if not collection_name[-1].isalnum():
121
  collection_name = collection_name[:-1] + "Z"
122
+
123
  print("Filepath: ", filepath)
124
  print("Collection name: ", collection_name)
125
  return collection_name