Visual-Reasoning-Agent

Sleeping

App Files Files Community

Visual-Reasoning-Agent / medrax /llava /eval /llm.py

Dhruv-Ty

initial commit

cb3a670 4 months ago

raw

history blame contribute delete

4.86 kB

	import abc
	import asyncio
	from abc import abstractmethod
	import math

	import tiktoken
	import openai
	import backoff


	class LLM(abc.ABC):

	prompt_percent = 0.9

	@abstractmethod
	def __init__(self):
	raise NotImplementedError("Subclasses should implement this!")

	@abstractmethod
	def infer(self, prompts):
	raise NotImplementedError("Subclasses should implement this!")

	@abstractmethod
	def split_input(
	self, fixed_instruction, few_shot_examples, splittable_input, input_header, output_header
	):
	raise NotImplementedError("Subclasses should implement this!")


	class GPT(LLM):

	prompt_percent = 0.8

	openai_cxn_dict = {
	"default": {
	"endpoint": "INSERT YOUR AZURE OPENAI ENDPOINT HERE",
	"api_key": "INSERT YOUR AZURE OPENAI API KEY HERE",
	},
	}

	deployment_max_length_dict = {
	"gpt-4": 8192,
	"gpt-4-0314": 8192,
	"gpt-4-32k": 32768,
	"gpt-35-turbo": 4096,
	"gpt-35-turbo-16k": 16385,
	}

	def __init__(self, model_id):
	self.temperature = 0.0
	self.top_k = 1
	self.encoding = tiktoken.encoding_for_model(
	"-".join(model_id.split("-", 2)[:2]).replace("5", ".5")
	)
	self.openai_api = "default"
	self.model_id = model_id
	self.max_length = self.deployment_max_length_dict[model_id]
	self.client = openai.AsyncAzureOpenAI(
	api_key=self.openai_cxn_dict[self.openai_api]["api_key"],
	api_version="2023-12-01-preview",
	azure_endpoint=self.openai_cxn_dict[self.openai_api]["endpoint"],
	)

	def gen_messages(
	self, fixed_instruction, few_shot_examples, input, input_header, output_header
	):
	messages = [
	{
	"role": "system",
	"content": fixed_instruction,
	},
	]
	for example in few_shot_examples:
	messages.extend(
	[
	{
	"role": "user",
	"content": input_header + "\n" + example["user"] + "\n\n" + output_header,
	},
	{
	"role": "assistant",
	"content": example["assistant"],
	},
	]
	)
	messages.extend(
	[
	{
	"role": "user",
	"content": input_header + "\n" + input + "\n\n" + output_header,
	},
	]
	)
	return messages

	# Define the coroutine for making API calls to GPT
	@backoff.on_exception(backoff.expo, openai.RateLimitError)
	async def make_api_call_to_gpt(self, messages):
	response = await self.client.chat.completions.create(
	model=self.model_id,
	messages=messages,
	temperature=self.temperature,
	)
	return response.choices[0].message.content

	async def dispatch_openai_requests(
	self,
	messages_list,
	):
	# Asynchronously call the function for each prompt
	tasks = [self.make_api_call_to_gpt(messages) for messages in messages_list]

	# Gather and run the tasks concurrently
	results = await asyncio.gather(*tasks)
	return results

	def infer(
	self,
	messages_list,
	):
	return asyncio.run(self.dispatch_openai_requests(messages_list))

	def split_input(
	self, fixed_instruction, few_shot_examples, splittable_input, input_header, output_header
	):
	# Tokenize fixed_prompt
	fixed_token_ids = self.encoding.encode(
	fixed_instruction
	+ " ".join([x["user"] + " " + x["assistant"] for x in few_shot_examples])
	)
	# Calculate remaining token length
	remaining_token_len = math.ceil(
	(self.prompt_percent * self.max_length) - len(fixed_token_ids)
	)

	# Tokenize splittable_input
	split_token_ids = self.encoding.encode(splittable_input)

	# Split tokenized split_prompt into list of individual inputs strings. Uses tokens to calculate length
	split_token_ids_list = [
	split_token_ids[i : i + remaining_token_len + 10]
	for i in range(0, len(split_token_ids), remaining_token_len)
	]
	split_input_list = [
	self.encoding.decode(split_token_ids) for split_token_ids in split_token_ids_list
	]

	# Take the fixed_prompt, few_shot_examples, splitted inputs, and input/output headers and generate list of prompt strings.
	return [
	self.gen_messages(
	fixed_instruction, few_shot_examples, split_input, input_header, output_header
	)
	for split_input in split_input_list
	]