davidheineman commited on
Commit
2b1f01e
·
1 Parent(s): d2f9318

remove colbert dependency

Browse files
Files changed (1) hide show
  1. search.py +21 -18
search.py CHANGED
@@ -9,7 +9,6 @@ from colbert.search.index_storage import IndexScorer
9
  from colbert.search.strided_tensor import StridedTensor
10
  from colbert.indexing.codecs.residual_embeddings_strided import ResidualEmbeddingsStrided
11
  from colbert.indexing.codecs.residual import ResidualCodec
12
- from colbert.modeling.colbert import ColBERT
13
 
14
  load_dotenv()
15
 
@@ -67,19 +66,23 @@ def init_colbert(index_path=INDEX_PATH, load_index_with_mmap=False):
67
  offsets = embeddings_strided.codes_strided.offsets
68
 
69
 
70
- # def colbert_score_reduce(scores_padded, D_mask):
71
- # D_padding = ~D_mask.view(scores_padded.size(0), scores_padded.size(1)).bool()
72
- # scores_padded[D_padding] = -9999
73
- # scores = scores_padded.max(1).values
74
- # return scores.sum(-1)
75
 
 
76
 
77
- # def colbert_score(Q, D_padded, D_mask):
78
- # assert Q.dim() == 3, Q.size()
79
- # assert D_padded.dim() == 3, D_padded.size()
80
- # assert Q.size(0) in [1, D_padded.size(0)]
81
- # scores = D_padded @ Q.to(dtype=D_padded.dtype).permute(0, 2, 1)
82
- # return colbert_score_reduce(scores, D_mask)
 
 
 
 
83
 
84
 
85
  def colbert_score_packed(Q, D_packed, D_lengths):
@@ -91,8 +94,8 @@ def colbert_score_packed(Q, D_packed, D_lengths):
91
 
92
  scores = D_packed @ Q.T
93
 
94
- # C++ : Calculate maxsim operation
95
- scores = ColBERT.segmented_maxsim(scores, D_lengths)
96
 
97
  return scores
98
 
@@ -116,10 +119,10 @@ def score_pids(config, Q, pids, centroid_scores):
116
 
117
  if Q.size(0) == 1:
118
  scores = colbert_score_packed(Q, D_packed, D_mask)
119
- # else:
120
- # D_strided = StridedTensor(D_packed, D_mask, use_gpu=False)
121
- # D_padded, D_lengths = D_strided.as_padded_tensor()
122
- # scores = colbert_score(Q, D_padded, D_lengths, config)
123
 
124
  return scores, pids
125
 
 
9
  from colbert.search.strided_tensor import StridedTensor
10
  from colbert.indexing.codecs.residual_embeddings_strided import ResidualEmbeddingsStrided
11
  from colbert.indexing.codecs.residual import ResidualCodec
 
12
 
13
  load_dotenv()
14
 
 
66
  offsets = embeddings_strided.codes_strided.offsets
67
 
68
 
69
+ def colbert_score_reduce(scores_padded, D_mask):
70
+ D_padding = ~D_mask.view(scores_padded.size(0), scores_padded.size(1)).bool()
71
+ scores_padded[D_padding] = -9999
72
+ scores = scores_padded.max(1).values
 
73
 
74
+ return scores.sum(-1)
75
 
76
+
77
+ def colbert_score(Q, D_padded, D_mask):
78
+ assert Q.dim() == 3, Q.size()
79
+ assert D_padded.dim() == 3, D_padded.size()
80
+ assert Q.size(0) in [1, D_padded.size(0)]
81
+
82
+ scores = D_padded @ Q.to(dtype=D_padded.dtype).permute(0, 2, 1)
83
+ scores = colbert_score_reduce(scores, D_mask)
84
+
85
+ return scores
86
 
87
 
88
  def colbert_score_packed(Q, D_packed, D_lengths):
 
94
 
95
  scores = D_packed @ Q.T
96
 
97
+ scores_padded, scores_mask = StridedTensor(scores, D_lengths, use_gpu=False).as_padded_tensor()
98
+ scores = colbert_score_reduce(scores_padded, scores_mask)
99
 
100
  return scores
101
 
 
119
 
120
  if Q.size(0) == 1:
121
  scores = colbert_score_packed(Q, D_packed, D_mask)
122
+ else:
123
+ D_strided = StridedTensor(D_packed, D_mask, use_gpu=False)
124
+ D_padded, D_lengths = D_strided.as_padded_tensor()
125
+ scores = colbert_score(Q, D_padded, D_lengths)
126
 
127
  return scores, pids
128