Spaces:
Running
Running
Fix prompt caching on llama.cpp endpoints (#920)
Browse filesExplicitly enable prompt caching on llama.cpp endpoints
Co-authored-by: Nathan Sarrazin <[email protected]>
src/lib/server/endpoints/llamacpp/endpointLlamacpp.ts
CHANGED
@@ -41,6 +41,7 @@ export function endpointLlamacpp(
|
|
41 |
stop: model.parameters.stop,
|
42 |
repeat_penalty: model.parameters.repetition_penalty,
|
43 |
n_predict: model.parameters.max_new_tokens,
|
|
|
44 |
}),
|
45 |
});
|
46 |
|
|
|
41 |
stop: model.parameters.stop,
|
42 |
repeat_penalty: model.parameters.repetition_penalty,
|
43 |
n_predict: model.parameters.max_new_tokens,
|
44 |
+
cache_prompt: true,
|
45 |
}),
|
46 |
});
|
47 |
|