Spaces:

DeepLearning101
/

IE101TW

Sleeping

App Files Files Community

IE101TW / tools /model_utils /gpt_response.py

DeepLearning101

Upload 21 files

45311fe over 1 year ago

raw

history blame contribute delete

7.19 kB

	# -- coding: utf-8 --
	# @Time : 2023/3/23 1:02 p.m.
	# @Author : Jianing Wang
	# @File : gpt_response.py

	import os
	import sys
	import torch
	import openai
	import time

	"""
	Call for GPT-style LLM.
	The output format is the same as OpenAI (e.g., GPT-3.5 text-davinci-003)
	"""
	class GPTResponse:

	def __init__(self, model_type: str, data_path: str) -> None:
	assert model_type in ["gpt2", "gpt3"]
	self.model_type = model_type
	if self.model_type == "gpt3":

	with open(os.path.join(data_path, 'openai_key.txt'), 'r') as f:
	key = f.readline().strip()
	openai.api_key = key

	def call_for_gpt3_response(self, prompt, l, model_name, temp=0, num_log_probs=None, echo=False, n=None):
	"""
	call GPT-3 API until result is provided and then return it
	"""
	response = None
	received = False
	while not received:
	try:
	response = openai.Completion.create(engine=model_name, prompt=prompt, max_tokens=l, temperature=temp,
	logprobs=num_log_probs, echo=echo, stop='\n', n=n)
	received = True
	except:
	error = sys.exc_info()[0]
	if error == openai.error.InvalidRequestError: # something is wrong: e.g. prompt too long
	print(f"InvalidRequestError\nPrompt passed in:\n\n{prompt}\n\n")
	assert False

	print("API error:", error)
	time.sleep(1)
	return response

	def call_for_gpt2_response(self, gpt2_tokenizer, logits, total_sequences, l=10, num_log_probs=None, echo=False, n=None):
	"""
	Obtain the prediction logits from gpt2 in local, and convert it to the value that can match the response from OpenAI
	"""
	if not echo:
	# get the top tokens and probs for the generated l tokens
	probs = torch.softmax(logits[:,-l-1:], dim=2).cpu()
	else:
	# get the top tokens and probs for the context and the generated l tokens
	probs = torch.softmax(logits, dim=2).cpu()
	# print("probs=", probs)
	top_probs, top_tokens = torch.topk(probs, k=num_log_probs)
	logprobs = torch.log(probs)
	top_log_probs = torch.log(top_probs)

	# create the return value to resemble OpenAI
	return_json = {}
	choices = []
	# print("="*50)
	for batch_id in range(len(logits)):
	curr_json = {}
	# text is just the optional context and next l tokens
	if not echo:
	curr_json['text'] = gpt2_tokenizer.decode(total_sequences[batch_id][-l:], skip_special_tokens=True)
	else:
	curr_json['text'] = gpt2_tokenizer.decode(total_sequences[batch_id], skip_special_tokens=True)

	# fill the return json with the top tokens and probs to match the OpenAI return value.
	if num_log_probs is not None:
	curr_json['logprobs'] = {}
	curr_json['logprobs']['top_logprobs'] = []
	curr_json['logprobs']['token_logprobs'] = []
	curr_json['logprobs']['tokens'] = []
	if not echo:
	# cutoff the -1 here because the probs are shifted one over for LMs
	for current_element_top_log_probs, current_element_top_tokens in zip(top_log_probs[batch_id][:-1], top_tokens[batch_id][:-1]):
	# tokens is a list of the top token at each position
	curr_json['logprobs']['tokens'].append(gpt2_tokenizer.decode([current_element_top_tokens[0]]))
	# token_logprobs is a list of the logprob of the top token at each position
	curr_json['logprobs']['token_logprobs'].append(current_element_top_log_probs[0].item())
	# top_logprobs is a list of dicts for the top K tokens. with each entry being {'token_name': log_prob}
	temp = {}
	for log_prob, token in zip(current_element_top_log_probs, current_element_top_tokens):
	temp[gpt2_tokenizer.decode(token.item())] = log_prob.item()
	curr_json['logprobs']['top_logprobs'].append(temp)
	else:
	# same as not above but small tweaks
	# we add null to the front because for the GPT models, they have null probability for the first token
	# (for some reason they don't have an beginning of sentence token)
	curr_json['logprobs']['top_logprobs'].append('null')
	# cutoff the -1 here because the probs are shifted one over for LMs
	for index, (current_element_top_log_probs, current_element_top_tokens) in enumerate(zip(top_log_probs[batch_id][:-1], top_tokens[batch_id][:-1])):
	# skip padding tokens
	if total_sequences[batch_id][index].item() == 50256:
	continue
	temp = {}
	for log_prob, token in zip(current_element_top_log_probs, current_element_top_tokens):
	temp[gpt2_tokenizer.decode(token.item())] = log_prob.item()
	curr_json['logprobs']['top_logprobs'].append(temp)
	for index in range(len(probs[batch_id])):
	curr_json['logprobs']['tokens'].append(gpt2_tokenizer.decode([total_sequences[batch_id][index]]))
	curr_json['logprobs']['token_logprobs'].append('null')
	for index, log_probs_token_position_j in enumerate(logprobs[batch_id][:-1]):
	# probs are left shifted for LMs
	curr_json['logprobs']['token_logprobs'].append(log_probs_token_position_j[total_sequences[batch_id][index+1]])

	choices.append(curr_json)
	# print("curr_json=", curr_json)
	'''
	e.g.,
	num_tokens_to_predict=1
	curr_json= {
	'text': ' I', # 当前生成的top词
	'logprobs': {'top_logprobs': [{' I': -3.4267239570617676, '\n': -3.5073862075805664, ...], # top100词及其socre
	'token_logprobs': [-3.4267239570617676], # 当前top词的score
	'tokens': [' I']}
	}
	num_tokens_to_predict=2
	curr_json= {
	'text': '\nThe', # 如果指定生成两个词，则为两个词
	'logprobs': {'top_logprobs': [ # 两个位置对应的预测的score
	{'\n': -3.186706304550171, '\xa0': -3.222092390060425, ' We': -6.781067848205566, ...},
	{'The': -2.5251243114471436, '"': -2.857935667037964, ...],
	'token_logprobs': [-3.186706304550171, -2.5251243114471436], # 生成的词的score
	'tokens': ['\n', 'The']}
	}
	'''
	return_json['choices'] = choices
	# print("="*50)
	# print("return_json=", return_json)
	return return_json