IE101TW / tools /model_utils /gpt_response.py
DeepLearning101's picture
Upload 21 files
45311fe
# -*- coding: utf-8 -*-
# @Time    : 2023/3/23 1:02 p.m.
# @Author  : Jianing Wang
# @File    : gpt_response.py
import os
import sys
import torch
import openai
import time
"""
Call for GPT-style LLM.
The output format is the same as OpenAI (e.g., GPT-3.5 text-davinci-003)
"""
class GPTResponse:
def __init__(self, model_type: str, data_path: str) -> None:
assert model_type in ["gpt2", "gpt3"]
self.model_type = model_type
if self.model_type == "gpt3":
with open(os.path.join(data_path, 'openai_key.txt'), 'r') as f:
key = f.readline().strip()
openai.api_key = key
def call_for_gpt3_response(self, prompt, l, model_name, temp=0, num_log_probs=None, echo=False, n=None):
"""
call GPT-3 API until result is provided and then return it
"""
response = None
received = False
while not received:
try:
response = openai.Completion.create(engine=model_name, prompt=prompt, max_tokens=l, temperature=temp,
logprobs=num_log_probs, echo=echo, stop='\n', n=n)
received = True
except:
error = sys.exc_info()[0]
if error == openai.error.InvalidRequestError: # something is wrong: e.g. prompt too long
print(f"InvalidRequestError\nPrompt passed in:\n\n{prompt}\n\n")
assert False
print("API error:", error)
time.sleep(1)
return response
def call_for_gpt2_response(self, gpt2_tokenizer, logits, total_sequences, l=10, num_log_probs=None, echo=False, n=None):
"""
Obtain the prediction logits from gpt2 in local, and convert it to the value that can match the response from OpenAI
"""
if not echo:
# get the top tokens and probs for the generated l tokens
probs = torch.softmax(logits[:,-l-1:], dim=2).cpu()
else:
# get the top tokens and probs for the context and the generated l tokens
probs = torch.softmax(logits, dim=2).cpu()
# print("probs=", probs)
top_probs, top_tokens = torch.topk(probs, k=num_log_probs)
logprobs = torch.log(probs)
top_log_probs = torch.log(top_probs)
# create the return value to resemble OpenAI
return_json = {}
choices = []
# print("="*50)
for batch_id in range(len(logits)):
curr_json = {}
# text is just the optional context and next l tokens
if not echo:
curr_json['text'] = gpt2_tokenizer.decode(total_sequences[batch_id][-l:], skip_special_tokens=True)
else:
curr_json['text'] = gpt2_tokenizer.decode(total_sequences[batch_id], skip_special_tokens=True)
# fill the return json with the top tokens and probs to match the OpenAI return value.
if num_log_probs is not None:
curr_json['logprobs'] = {}
curr_json['logprobs']['top_logprobs'] = []
curr_json['logprobs']['token_logprobs'] = []
curr_json['logprobs']['tokens'] = []
if not echo:
# cutoff the -1 here because the probs are shifted one over for LMs
for current_element_top_log_probs, current_element_top_tokens in zip(top_log_probs[batch_id][:-1], top_tokens[batch_id][:-1]):
# tokens is a list of the top token at each position
curr_json['logprobs']['tokens'].append(gpt2_tokenizer.decode([current_element_top_tokens[0]]))
# token_logprobs is a list of the logprob of the top token at each position
curr_json['logprobs']['token_logprobs'].append(current_element_top_log_probs[0].item())
# top_logprobs is a list of dicts for the top K tokens. with each entry being {'token_name': log_prob}
temp = {}
for log_prob, token in zip(current_element_top_log_probs, current_element_top_tokens):
temp[gpt2_tokenizer.decode(token.item())] = log_prob.item()
curr_json['logprobs']['top_logprobs'].append(temp)
else:
# same as not above but small tweaks
# we add null to the front because for the GPT models, they have null probability for the first token
# (for some reason they don't have an beginning of sentence token)
curr_json['logprobs']['top_logprobs'].append('null')
# cutoff the -1 here because the probs are shifted one over for LMs
for index, (current_element_top_log_probs, current_element_top_tokens) in enumerate(zip(top_log_probs[batch_id][:-1], top_tokens[batch_id][:-1])):
# skip padding tokens
if total_sequences[batch_id][index].item() == 50256:
continue
temp = {}
for log_prob, token in zip(current_element_top_log_probs, current_element_top_tokens):
temp[gpt2_tokenizer.decode(token.item())] = log_prob.item()
curr_json['logprobs']['top_logprobs'].append(temp)
for index in range(len(probs[batch_id])):
curr_json['logprobs']['tokens'].append(gpt2_tokenizer.decode([total_sequences[batch_id][index]]))
curr_json['logprobs']['token_logprobs'].append('null')
for index, log_probs_token_position_j in enumerate(logprobs[batch_id][:-1]):
# probs are left shifted for LMs
curr_json['logprobs']['token_logprobs'].append(log_probs_token_position_j[total_sequences[batch_id][index+1]])
choices.append(curr_json)
# print("curr_json=", curr_json)
'''
e.g.,
num_tokens_to_predict=1
curr_json= {
'text': ' I', # 当前生成的top词
'logprobs': {'top_logprobs': [{' I': -3.4267239570617676, '\n': -3.5073862075805664, ...], # top100词及其socre
'token_logprobs': [-3.4267239570617676], # 当前top词的score
'tokens': [' I']}
}
num_tokens_to_predict=2
curr_json= {
'text': '\nThe', # 如果指定生成两个词,则为两个词
'logprobs': {'top_logprobs': [ # 两个位置对应的预测的score
{'\n': -3.186706304550171, '\xa0': -3.222092390060425, ' We': -6.781067848205566, ...},
{'The': -2.5251243114471436, '"': -2.857935667037964, ...],
'token_logprobs': [-3.186706304550171, -2.5251243114471436], # 生成的词的score
'tokens': ['\n', 'The']}
}
'''
return_json['choices'] = choices
# print("="*50)
# print("return_json=", return_json)
return return_json