Commit
·
2b1f01e
1
Parent(s):
d2f9318
remove colbert dependency
Browse files
search.py
CHANGED
@@ -9,7 +9,6 @@ from colbert.search.index_storage import IndexScorer
|
|
9 |
from colbert.search.strided_tensor import StridedTensor
|
10 |
from colbert.indexing.codecs.residual_embeddings_strided import ResidualEmbeddingsStrided
|
11 |
from colbert.indexing.codecs.residual import ResidualCodec
|
12 |
-
from colbert.modeling.colbert import ColBERT
|
13 |
|
14 |
load_dotenv()
|
15 |
|
@@ -67,19 +66,23 @@ def init_colbert(index_path=INDEX_PATH, load_index_with_mmap=False):
|
|
67 |
offsets = embeddings_strided.codes_strided.offsets
|
68 |
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
# return scores.sum(-1)
|
75 |
|
|
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
83 |
|
84 |
|
85 |
def colbert_score_packed(Q, D_packed, D_lengths):
|
@@ -91,8 +94,8 @@ def colbert_score_packed(Q, D_packed, D_lengths):
|
|
91 |
|
92 |
scores = D_packed @ Q.T
|
93 |
|
94 |
-
|
95 |
-
scores =
|
96 |
|
97 |
return scores
|
98 |
|
@@ -116,10 +119,10 @@ def score_pids(config, Q, pids, centroid_scores):
|
|
116 |
|
117 |
if Q.size(0) == 1:
|
118 |
scores = colbert_score_packed(Q, D_packed, D_mask)
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
|
124 |
return scores, pids
|
125 |
|
|
|
9 |
from colbert.search.strided_tensor import StridedTensor
|
10 |
from colbert.indexing.codecs.residual_embeddings_strided import ResidualEmbeddingsStrided
|
11 |
from colbert.indexing.codecs.residual import ResidualCodec
|
|
|
12 |
|
13 |
load_dotenv()
|
14 |
|
|
|
66 |
offsets = embeddings_strided.codes_strided.offsets
|
67 |
|
68 |
|
69 |
+
def colbert_score_reduce(scores_padded, D_mask):
|
70 |
+
D_padding = ~D_mask.view(scores_padded.size(0), scores_padded.size(1)).bool()
|
71 |
+
scores_padded[D_padding] = -9999
|
72 |
+
scores = scores_padded.max(1).values
|
|
|
73 |
|
74 |
+
return scores.sum(-1)
|
75 |
|
76 |
+
|
77 |
+
def colbert_score(Q, D_padded, D_mask):
|
78 |
+
assert Q.dim() == 3, Q.size()
|
79 |
+
assert D_padded.dim() == 3, D_padded.size()
|
80 |
+
assert Q.size(0) in [1, D_padded.size(0)]
|
81 |
+
|
82 |
+
scores = D_padded @ Q.to(dtype=D_padded.dtype).permute(0, 2, 1)
|
83 |
+
scores = colbert_score_reduce(scores, D_mask)
|
84 |
+
|
85 |
+
return scores
|
86 |
|
87 |
|
88 |
def colbert_score_packed(Q, D_packed, D_lengths):
|
|
|
94 |
|
95 |
scores = D_packed @ Q.T
|
96 |
|
97 |
+
scores_padded, scores_mask = StridedTensor(scores, D_lengths, use_gpu=False).as_padded_tensor()
|
98 |
+
scores = colbert_score_reduce(scores_padded, scores_mask)
|
99 |
|
100 |
return scores
|
101 |
|
|
|
119 |
|
120 |
if Q.size(0) == 1:
|
121 |
scores = colbert_score_packed(Q, D_packed, D_mask)
|
122 |
+
else:
|
123 |
+
D_strided = StridedTensor(D_packed, D_mask, use_gpu=False)
|
124 |
+
D_padded, D_lengths = D_strided.as_padded_tensor()
|
125 |
+
scores = colbert_score(Q, D_padded, D_lengths)
|
126 |
|
127 |
return scores, pids
|
128 |
|