|
@@ -1,5 +1,7 @@
|
|
|
-from langchain.callbacks.manager import Callbacks
|
|
|
+from langchain.callbacks.manager import Callbacks, CallbackManagerForLLMRun
|
|
|
from langchain.llms import AzureOpenAI
|
|
|
+from langchain.llms.openai import _streaming_response_template, completion_with_retry, _update_response, \
|
|
|
+ update_token_usage
|
|
|
from langchain.schema import LLMResult
|
|
|
from typing import Optional, List, Dict, Mapping, Any, Union, Tuple
|
|
|
|
|
@@ -67,3 +69,58 @@ class StreamableAzureOpenAI(AzureOpenAI):
|
|
|
@classmethod
|
|
|
def get_kwargs_from_model_params(cls, params: dict):
|
|
|
return params
|
|
|
+
|
|
|
+ def _generate(
|
|
|
+ self,
|
|
|
+ prompts: List[str],
|
|
|
+ stop: Optional[List[str]] = None,
|
|
|
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
|
+ **kwargs: Any,
|
|
|
+ ) -> LLMResult:
|
|
|
+ """Call out to OpenAI's endpoint with k unique prompts.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ prompts: The prompts to pass into the model.
|
|
|
+ stop: Optional list of stop words to use when generating.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ The full LLM output.
|
|
|
+
|
|
|
+ Example:
|
|
|
+ .. code-block:: python
|
|
|
+
|
|
|
+ response = openai.generate(["Tell me a joke."])
|
|
|
+ """
|
|
|
+ params = self._invocation_params
|
|
|
+ params = {**params, **kwargs}
|
|
|
+ sub_prompts = self.get_sub_prompts(params, prompts, stop)
|
|
|
+ choices = []
|
|
|
+ token_usage: Dict[str, int] = {}
|
|
|
+ # Get the token usage from the response.
|
|
|
+ # Includes prompt, completion, and total tokens used.
|
|
|
+ _keys = {"completion_tokens", "prompt_tokens", "total_tokens"}
|
|
|
+ for _prompts in sub_prompts:
|
|
|
+ if self.streaming:
|
|
|
+ if len(_prompts) > 1:
|
|
|
+ raise ValueError("Cannot stream results with multiple prompts.")
|
|
|
+ params["stream"] = True
|
|
|
+ response = _streaming_response_template()
|
|
|
+ for stream_resp in completion_with_retry(
|
|
|
+ self, prompt=_prompts, **params
|
|
|
+ ):
|
|
|
+ if len(stream_resp["choices"]) > 0:
|
|
|
+ if run_manager:
|
|
|
+ run_manager.on_llm_new_token(
|
|
|
+ stream_resp["choices"][0]["text"],
|
|
|
+ verbose=self.verbose,
|
|
|
+ logprobs=stream_resp["choices"][0]["logprobs"],
|
|
|
+ )
|
|
|
+ _update_response(response, stream_resp)
|
|
|
+ choices.extend(response["choices"])
|
|
|
+ else:
|
|
|
+ response = completion_with_retry(self, prompt=_prompts, **params)
|
|
|
+ choices.extend(response["choices"])
|
|
|
+ if not self.streaming:
|
|
|
+ # Can't update token usage if streaming
|
|
|
+ update_token_usage(_keys, response, token_usage)
|
|
|
+ return self.create_llm_result(choices, prompts, token_usage)
|