Hansimov commited on
Commit
e2b245b
·
1 Parent(s): 395e196

:gem: [Feature] Add use_cache parameter, and set default temperature to 0.5

Browse files
Files changed (2) hide show
  1. apis/chat_api.py +6 -1
  2. networks/message_streamer.py +6 -1
apis/chat_api.py CHANGED
@@ -88,13 +88,17 @@ class ChatAPIApp:
88
  description="(list) Messages",
89
  )
90
  temperature: Union[float, None] = Field(
91
- default=0,
92
  description="(float) Temperature",
93
  )
94
  max_tokens: Union[int, None] = Field(
95
  default=-1,
96
  description="(int) Max tokens",
97
  )
 
 
 
 
98
  stream: bool = Field(
99
  default=True,
100
  description="(bool) Stream",
@@ -113,6 +117,7 @@ class ChatAPIApp:
113
  temperature=item.temperature,
114
  max_new_tokens=item.max_tokens,
115
  api_key=api_key,
 
116
  )
117
  if item.stream:
118
  event_source_response = EventSourceResponse(
 
88
  description="(list) Messages",
89
  )
90
  temperature: Union[float, None] = Field(
91
+ default=0.5,
92
  description="(float) Temperature",
93
  )
94
  max_tokens: Union[int, None] = Field(
95
  default=-1,
96
  description="(int) Max tokens",
97
  )
98
+ use_cache: bool = Field(
99
+ default=False,
100
+ description="(bool) Use cache",
101
+ )
102
  stream: bool = Field(
103
  default=True,
104
  description="(bool) Stream",
 
117
  temperature=item.temperature,
118
  max_new_tokens=item.max_tokens,
119
  api_key=api_key,
120
+ use_cache=item.use_cache,
121
  )
122
  if item.stream:
123
  event_source_response = EventSourceResponse(
networks/message_streamer.py CHANGED
@@ -61,9 +61,10 @@ class MessageStreamer:
61
  def chat_response(
62
  self,
63
  prompt: str = None,
64
- temperature: float = 0,
65
  max_new_tokens: int = None,
66
  api_key: str = None,
 
67
  ):
68
  # https://huggingface.co/docs/api-inference/detailed_parameters?code=curl
69
  # curl --proxy http://<server>:<port> https://api-inference.huggingface.co/models/<org>/<model_name> -X POST -d '{"inputs":"who are you?","parameters":{"max_new_token":64}}' -H 'Content-Type: application/json' -H 'Authorization: Bearer <HF_TOKEN>'
@@ -105,6 +106,7 @@ class MessageStreamer:
105
  # huggingface_hub/inference/_text_generation.py:
106
  # class TextGenerationRequest > param `stream`
107
  # https://huggingface.co/docs/text-generation-inference/conceptual/streaming#streaming-with-curl
 
108
  self.request_body = {
109
  "inputs": prompt,
110
  "parameters": {
@@ -112,6 +114,9 @@ class MessageStreamer:
112
  "max_new_tokens": max_new_tokens,
113
  "return_full_text": False,
114
  },
 
 
 
115
  "stream": True,
116
  }
117
 
 
61
  def chat_response(
62
  self,
63
  prompt: str = None,
64
+ temperature: float = 0.5,
65
  max_new_tokens: int = None,
66
  api_key: str = None,
67
+ use_cache: bool = False,
68
  ):
69
  # https://huggingface.co/docs/api-inference/detailed_parameters?code=curl
70
  # curl --proxy http://<server>:<port> https://api-inference.huggingface.co/models/<org>/<model_name> -X POST -d '{"inputs":"who are you?","parameters":{"max_new_token":64}}' -H 'Content-Type: application/json' -H 'Authorization: Bearer <HF_TOKEN>'
 
106
  # huggingface_hub/inference/_text_generation.py:
107
  # class TextGenerationRequest > param `stream`
108
  # https://huggingface.co/docs/text-generation-inference/conceptual/streaming#streaming-with-curl
109
+ # https://huggingface.co/docs/api-inference/detailed_parameters#text-generation-task
110
  self.request_body = {
111
  "inputs": prompt,
112
  "parameters": {
 
114
  "max_new_tokens": max_new_tokens,
115
  "return_full_text": False,
116
  },
117
+ "options": {
118
+ "use_cache": use_cache,
119
+ },
120
  "stream": True,
121
  }
122