Spaces:

duckdb-nsql-hub
/

DuckDB-SQL-Eval

Sleeping

App Files Files Community

tdoehmen commited on Oct 4, 2024

Commit

c635df2

1 Parent(s): 2dc631c

added hf inference api

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

duckdb-nsql/eval/get_manifest.py +1 -1
duckdb-nsql/eval/predict.py +2 -2
duckdb-nsql/manifest/.flake8 +0 -11
duckdb-nsql/manifest/.pre-commit-config.yaml +0 -23
duckdb-nsql/manifest/CHANGELOG.rst +0 -93
duckdb-nsql/manifest/LICENSE +0 -201
duckdb-nsql/manifest/Makefile +0 -27
duckdb-nsql/manifest/README.md +0 -304
duckdb-nsql/manifest/examples/langchain_chatgpt.ipynb +0 -455
duckdb-nsql/manifest/examples/manifest_async.py +0 -27
duckdb-nsql/manifest/examples/manifest_azure.ipynb +0 -149
duckdb-nsql/manifest/examples/manifest_chatgpt.ipynb +0 -101
duckdb-nsql/manifest/examples/manifest_connection_pool.ipynb +0 -208
duckdb-nsql/manifest/examples/manifest_diffusers.ipynb +0 -0
duckdb-nsql/manifest/examples/manifest_embedding.ipynb +0 -156
duckdb-nsql/manifest/examples/manifest_google.ipynb +0 -117
duckdb-nsql/manifest/examples/manifest_openrouter.ipynb +0 -108
duckdb-nsql/manifest/examples/manifest_streaming.ipynb +0 -105
duckdb-nsql/manifest/examples/manifest_together.ipynb +0 -106
duckdb-nsql/manifest/manifest/__init__.py +0 -6
duckdb-nsql/manifest/manifest/api/__init__.py +0 -1
duckdb-nsql/manifest/manifest/api/app.py +0 -301
duckdb-nsql/manifest/manifest/api/models/__init__.py +0 -1
duckdb-nsql/manifest/manifest/api/models/diffuser.py +0 -123
duckdb-nsql/manifest/manifest/api/models/huggingface.py +0 -671
duckdb-nsql/manifest/manifest/api/models/model.py +0 -91
duckdb-nsql/manifest/manifest/api/models/sentence_transformer.py +0 -113
duckdb-nsql/manifest/manifest/api/response.py +0 -55
duckdb-nsql/manifest/manifest/caches/__init__.py +0 -1
duckdb-nsql/manifest/manifest/caches/array_cache.py +0 -116
duckdb-nsql/manifest/manifest/caches/cache.py +0 -135
duckdb-nsql/manifest/manifest/caches/noop.py +0 -47
duckdb-nsql/manifest/manifest/caches/postgres.py +0 -131
duckdb-nsql/manifest/manifest/caches/redis.py +0 -64
duckdb-nsql/manifest/manifest/caches/serializers.py +0 -204
duckdb-nsql/manifest/manifest/caches/sqlite.py +0 -65
duckdb-nsql/manifest/manifest/clients/__init__.py +0 -1
duckdb-nsql/manifest/manifest/clients/ai21.py +0 -125
duckdb-nsql/manifest/manifest/clients/azureendpoint.py +0 -139
duckdb-nsql/manifest/manifest/clients/azureopenai.py +0 -113
duckdb-nsql/manifest/manifest/clients/azureopenai_chat.py +0 -116
duckdb-nsql/manifest/manifest/clients/client.py +0 -699
duckdb-nsql/manifest/manifest/clients/cohere.py +0 -125
duckdb-nsql/manifest/manifest/clients/diffuser.py +0 -112
duckdb-nsql/manifest/manifest/clients/dummy.py +0 -251
duckdb-nsql/manifest/manifest/clients/google.py +0 -197
duckdb-nsql/manifest/manifest/clients/google_chat.py +0 -155
duckdb-nsql/manifest/manifest/clients/huggingface.py +0 -137
duckdb-nsql/manifest/manifest/clients/huggingface_embedding.py +0 -98
duckdb-nsql/manifest/manifest/clients/openai.py +0 -162

duckdb-nsql/eval/get_manifest.py CHANGED Viewed

@@ -9,7 +9,7 @@ def get_manifest(
     manifest_engine: str,
 ) -> Manifest:
     """Get manifest engine."""
-    if manifest_client in {"openai", "openaichat", "openai_mock", "openrouter", "azureendpoint"}:
         manifest = Manifest(
             client_name=manifest_client,
             engine=manifest_engine,

     manifest_engine: str,
 ) -> Manifest:
     """Get manifest engine."""
+    if manifest_client in {"openai", "openaichat", "openai_mock", "openrouter", "azureendpoint", "inference_api"}:
         manifest = Manifest(
             client_name=manifest_client,
             engine=manifest_engine,

duckdb-nsql/eval/predict.py CHANGED Viewed

@@ -213,7 +213,7 @@ def predict(
     console.print(f"Running with {manifest_params} manifest.")
     model_name = manifest_params.get("engine", manifest_params["model_name"])
-    if manifest_client in {"openai", "openaichat", "openrouter", "azureendpoint"}:
         tokenizer = AutoTokenizer.from_pretrained("gpt2", trust_remote_code=True)
     else:
         tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
@@ -234,7 +234,7 @@ def predict(
         middleix = manifest_engine
     elif manifest_client in {"huggingface", "ray"}:
         middleix = Path(manifest_params.get("model_path", "")).name.replace("/", "-")
-    elif manifest_client in {"toma", "openrouter", "openaichat", "azureendpoint"}:
         middleix = manifest_engine.split("/")[-1]
     else:
         raise ValueError(f"Unknown manifest client {manifest_client}")

     console.print(f"Running with {manifest_params} manifest.")
     model_name = manifest_params.get("engine", manifest_params["model_name"])
+    if manifest_client in {"openai", "openaichat", "openrouter", "azureendpoint", "inference_api"}:
         tokenizer = AutoTokenizer.from_pretrained("gpt2", trust_remote_code=True)
     else:
         tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
         middleix = manifest_engine
     elif manifest_client in {"huggingface", "ray"}:
         middleix = Path(manifest_params.get("model_path", "")).name.replace("/", "-")
+    elif manifest_client in {"toma", "openrouter", "openaichat", "azureendpoint", "inference_api"}:
         middleix = manifest_engine.split("/")[-1]
     else:
         raise ValueError(f"Unknown manifest client {manifest_client}")

duckdb-nsql/manifest/.flake8 DELETED Viewed

@@ -1,11 +0,0 @@
-# This is our code-style check. We currently allow the following exceptions:
-#   - E731: do not assign a lambda expression, use a def
-#   - E402: module level import not at top of file
-#   - W503: line break before binary operator
-#   - E203: whitespace before :
-[flake8]
-exclude = .git
-max-line-length = 88
-ignore = E731, E402, W503, E203, PAI100, PAI101, PAI201, PAI202, PAI203
-per-file-ignores = __init__.py:F401, version.py:D100

duckdb-nsql/manifest/.pre-commit-config.yaml DELETED Viewed

@@ -1,23 +0,0 @@
-repos:
-    - repo: https://github.com/pre-commit/pre-commit-hooks
-      rev: v3.2.0
-      hooks:
-          - id: trailing-whitespace
-          - id: end-of-file-fixer
-          - id: check-yaml
-          - id: check-toml
-          - id: check-merge-conflict
-          - id: check-added-large-files
-    - repo: https://github.com/timothycrosley/isort
-      rev: 5.13.2
-      hooks:
-          - id: isort
-    - repo: https://github.com/psf/black
-      rev: 22.3.0
-      hooks:
-          - id: black
-            language_version: python3
-    - repo: https://github.com/PyCQA/flake8
-      rev: 6.0.0
-      hooks:
-          - id: flake8

duckdb-nsql/manifest/CHANGELOG.rst DELETED Viewed

@@ -1,93 +0,0 @@
-0.1.10 - Unreleased
----------------------
-0.1.9 - 2024-01-22
----------------------
-Fixed
-^^^^^
-* Added trust code params HF models
-* Added LRU cache to HF model param calls to avoid extra calls
-* Fixed pydantic type issue HF model return
-* Support for Python 3.10-3.11
-0.1.8 - 2023-05-22
----------------------
-Added
-^^^^^
-* Azure model support (completion and chat)
-* Google Vertex API model support (completion and chat)
-* Streaming responses for LM Completions (set stream=True)
-Fixed
-^^^^^
-* `run` with batches now acts the same as async run except not async. We will batch requests into appropriate batchs sizes.
-* Refactored client so unified preprocess and postprocess of requests and responses to better support model variants in request/response format.
-0.1.7 - 2023-05-17
----------------------
-Fixed
-^^^^^
-* `_run_chat` fixed bug where not passing in kwargs
-0.1.6 - 2023-05-16
----------------------
-Fixed
-^^^^^
-* Unified `run` and `run_chat` methods so it's just `run` now.
-* LLama HF models for eval
-0.1.5 - 2023-05-03
----------------------
-Added
-^^^^^
-* Added chat input for chat models.
-0.1.4 - 2023-04-24
----------------------
-Added
-^^^^^
-* Connection pools to swap between clients
-* Chunksize param for async runs
-Fixed
-^^^^^
-* Determine cache and response by request type, not client name
-* Refactor Response to use Pydantic types for Request and Response
-0.1.1
----------------------
-Added
-^^^^^
-* Async support in arun_batch
-Fixed
-^^^^^
-* Batched runs now caches individual items
-* Score prompt does not truncate outside token
-Removed
-^^^^^
-* Deprecated chatGPT in favor of openaichat which uses OpenAI completions
-* Deprecated Sessions
-0.1.0 - 2022-01-31
----------------------
-Added
-^^^^^
-* Batched inference support in `manifest.run`. No more separate `manifest.run_batch` method.
-* Standard request base model for all language inputs.
-* ChatGPT client. Requires CHATGPT_SESSION_KEY to be passed in.
-* Diffusion model support
-* Together model support
-Removed
-^^^^^^^
-* `Prompt` class
-* `OPT` client - OPT is now available in HuggingFace
-0.0.1 - 2022-11-08
--------------------
-First major pip release of Manifest. Install via `pip install manifest-ml`.
-.. _@lorr1: https://github.com/lorr1

duckdb-nsql/manifest/LICENSE DELETED Viewed

@@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-   1. Definitions.
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-   END OF TERMS AND CONDITIONS
-   APPENDIX: How to apply the Apache License to your work.
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-   Copyright [yyyy] [name of copyright owner]
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-       http://www.apache.org/licenses/LICENSE-2.0
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.

duckdb-nsql/manifest/Makefile DELETED Viewed

@@ -1,27 +0,0 @@
-dev:
-	pip install -e .[all]
-	pre-commit install
-test: dev check
-	pytest tests
-format:
-	isort --atomic manifest/ tests/ web_app/
-	black manifest/ tests/ web_app/
-check:
-	isort -c manifest/ tests/ web_app/
-	black manifest/ tests/ web_app/ --check
-	flake8 manifest/ tests/ web_app/
-	mypy manifest/ tests/ web_app/
-clean:
-	pip uninstall -y manifest
-	rm -rf src/manifest.egg-info
-	rm -rf build/ dist/
-prune:
-	@bash -c "git fetch -p";
-	@bash -c "for branch in $(git branch -vv | grep ': gone]' | awk '{print $1}'); do git branch -d $branch; done";
-.PHONY: dev test clean check prune

duckdb-nsql/manifest/README.md DELETED Viewed

@@ -1,304 +0,0 @@
-# Manifest
-How to make prompt programming with Foundation Models a little easier.
-# Table of Contents
-- [Install](#install)
-- [Getting Started](#getting-started)
-- [Manifest](#manifest-components)
-- [Other Models Types](#other-models)
-    - [Local HuggingFace Models](#local-huggingface-models)
-    - [Chat Models](#chat-models)
-    - [Embedding Models](#embedding-models)
-- [Road Map](#road-map)
-- [Development](#development)
-- [Cite](#cite)
-# Install
-Install:
-```bash
-pip install manifest-ml
-```
-Install with diffusion support:
-```bash
-pip install manifest-ml[diffusers]
-```
-Install with HuggingFace local model support:
-```bash
-pip install manifest-ml[api]
-```
-Dev Install:
-```bash
-git clone [email protected]:HazyResearch/manifest.git
-cd manifest
-make dev
-```
-# Getting Started
-Running is simple to get started. If using OpenAI, set `export OPENAI_API_KEY=<OPENAIKEY>` (or pass key in through variable `client_connection`) then run
-```python
-from manifest import Manifest
-# Start a manifest session to OpenAI - default `engine=text-davinci-003`
-manifest = Manifest(
-    client_name = "openai",
-)
-manifest.run("Why is the grass green?")
-```
-## Examples
-We have example notebook and python scripts located at [examples](examples). These show how to use different models, model types (i.e. text, diffusers, or embedding models), and async running.
-# Manifest Components
-Manifest is meant to be a very light weight package to help with prompt design and iteration. Three key design decisions of Manifest are
-* All models are behind APIs
-* Supports caching of model inputs/outputs for iteration, reproducibility, and cost saving
-* Unified API to support generate, score, and embed
-## Models
-Manifest provides model clients for [OpenAI](https://openai.com/), [AI21](https://studio.ai21.com/), [Cohere](https://cohere.ai/), [Together](https://together.xyz/), and HuggingFace (see [below](#huggingface-models) for how to use locally hosted HuggingFace models). You can toggle between the models by changing `client_name` and `client_connection`. For example, if a HuggingFace model is loaded locally, run
-```python
-manifest = Manifest(
-    client_name = "huggingface",
-    client_connection = "http://127.0.0.1:5000",
-)
-```
-If you want to use Cohere, run
-```python
-manifest = Manifest(
-    client_name = "cohere",
-    client_connection = <COHERE_API_KEY>,
-)
-```
-You can also just set `export COHERE_API_KEY=<COHERE_API_KEY>` and not use `client_connection`.
-If you want to use AI21 Labs, run
-```python
-manifest = Manifest(
-    client_name = "ai21",
-    client_connection = <AI21_API_KEY>,
-)
-```
-You can see the model details and possible model inputs to `run()` via
-```python
-print(manifest.client_pool.get_current_client().get_model_params())
-print(manifest.client_pool.get_current_client().get_model_inputs())
-```
-## Global Cache
-We support having queries and results stored in a global cache that can be shared across users. We treat inputs and outputs as key value pairs and support SQLite or Redis backends. To start with global caching using SQLite, run
-```python
-manifest = Manifest(
-    client_name = "openai",
-    cache_name = "sqlite",
-    cache_connection = "mycache.sqlite",
-)
-```
-The cache will be saved in `mycache.sqlite`.
-We also support Redis backend.
-```python
-manifest = Manifest(
-    client_name = "openai",
-    cache_name = "redis",
-    cache_connection = "localhost:6379"
-)
-```
-As a hint, if you want to get Redis running, see the `docker run` command below under development.
-## Running Queries
-Once you have a session open, you can write and develop prompts.
-```python
-result = manifest.run("Hello, my name is Laurel")
-```
-You can also run over multiple examples if supported by the client.
-```python
-results = manifest.run(["Where are the cats?", "Where are the dogs?"])
-```
-We support async queries as well via
-```python
-import asyncio
-results = asyncio.run(manifest.arun_batch(["Where are the cats?", "Where are the dogs?"]))
-```
-If something doesn't go right, you can also ask to get a raw manifest Response.
-```python
-result_object = manifest.run(["Where are the cats?", "Where are the dogs?"], return_response=True)
-print(result_object.get_request_obj())
-print(result_object.is_cached())
-print(result_object.get_response_obj())
-```
-By default, we do not truncate results based on a stop token. You can change this by either passing a new stop token to a Manifest session or to a `run`.
-```python
-result = manifest.run(prompt, "Laurel", stop_token="and")
-```
-If you want to change default parameters to a model, we pass those as `kwargs` to the client.
-```python
-result = manifest.run(prompt, "Laurel", max_tokens=50)
-```
-## Streaming Queries
-Manifest also supports streaming the model response back, assuming it's supported by the underlying client. When calling `run`, pass `stream=True` to get a streaming iterator in response.
-```python
-result_iterator = manifest.run("Tell me a story. Once upon a time", max_tokens=100, stream=True)
-for res_text in result_iterator:
-    print(res_text)
-```
-Streaming responses are only supported for single string queries (not batch mode) for text completion models.
-## Model Pools
-Manifest supports querying multiple models with different schedulers. This is very much a work in progress effort, but Manifest will round robin select (or randomly select) the clients you want. You can use the same client multiple times with different connection strings (e.g. different API keys), or you can mix and match. The only requirement is that all clients are the same request type. I.e. you can't have a pool of generation models and embedding models.
-To query between a local model and OpenAI,
-```python
-from manifest.connections.client_pool import ClientConnection
-from manifest import Manifest
-client_connection1 = ClientConnection(
-    client_name="huggingface",
-    client_connection="http://127.0.0.1:5000",
-)
-client_connection2 = ClientConnection(client_name="openai", engine="text-ada-001")
-manifest = Manifest(
-    client_pool=[client_connection1, client_connection2],
-    cache_name="sqlite",
-    client_connection=sqlite_cache,
-)
-manifest.run(...)
-```
-The speed benefit comes in with async batched runs. When calling `arun_batch` with a list of prompts, Manifest supports a `chunk_size` param. This will break the prompts into `chunk_size` chunks to spread across the client pool. By default `chunk_size` is `-1` which means only one client will get all the prompts to run asynchronously. You must set `chunk_size > 1` to distribute across the pool. There is a further `batch_size` param which control the individual client `batch_size` to send to the model.
-```python
-responses = asyncio.run(manifest.arun_batch(prompts, max_tokens=30, chunk_size=20))
-```
-# Other Models
-## Local Huggingface Models
-To use a HuggingFace generative model, in `manifest/api` we have a Flask application that hosts the models for you.
-In a separate terminal or Tmux/Screen session, to load 6B parameters models, run
-```bash
-python3 -m manifest.api.app \
-    --model_type huggingface \
-    --model_name_or_path EleutherAI/gpt-j-6B \
-    --device 0
-```
-You will see the Flask session start and output a URL `http://127.0.0.1:5000`. Pass this in to Manifest. If you want to use a different port, set the `FLASK_PORT` environment variable.
-```python
-manifest = Manifest(
-    client_name = "huggingface",
-    client_connection = "http://127.0.0.1:5000",
-)
-```
-If you have a custom model you trained, pass the model path to `--model_name_or_path`.
-To help load larger models, we also support using `parallelize()` from HF, [accelerate](https://huggingface.co/docs/accelerate/index), [bitsandbytes](https://github.com/TimDettmers/bitsandbytes), and [deepspeed](https://github.com/microsoft/DeepSpeed). You will need to install these packages first via `pip install manifest-ml[api]`. We list the commands to load larger models below.
-* T0pp
-```bash
-python3 -m manifest.api.app \
-    --model_type huggingface \
-    --model_name_or_path bigscience/T0pp \
-    --use_hf_parallelize
-```
-* NeoX 20B (requires at least 60GB of GPU memory)
-```bash
-python3 -m manifest.api.app \
-    --model_type huggingface \
-    --model_name_or_path EleutherAI/gpt-neox-20b \
-    --use_accelerate_multigpu \
-    --percent_max_gpu_mem_reduction 0.75
-```
-* Bloom 175B (requires at least 240GB of GPU memory)
-```bash
-python3 -m manifest.api.app \
-    --model_type huggingface \
-    --model_name_or_path bigscience/bloom \
-    --use_bitsandbytes \
-    --percent_max_gpu_mem_reduction 0.85
-```
-## Chat Models
-Manifest has specific support for executing against chat models in the more standard "system" / "user" dialogue. To pass in a dialogue history to Manifest, use the `run` command with a list of dictionary inputs with `role` and `content` keys using an associated chat model such as `openaichat`.
-```python
-manifest = Manifest(client_name="openaichat")
-dialogue = [
-    {"role": "system", "content": "You are a helpful assistant who also responds in rhymes"},
-    {"role": "user", "content": "What is the date?"},
-]
-res = manifest.run(dialogue, max_tokens=100)
-```
-## Embedding Models
-Manifest also supports getting embeddings from models and available APIs. We do this all through changing the `client_name` argument. You still use `run` and `abatch_run`.
-To use OpenAI's embedding models, simply run
-```python
-manifest = Manifest(client_name="openaiembedding")
-embedding_as_np = manifest.run("Get me an embedding for a bunny")
-```
-As explained above, you can load local HuggingFace models that give you embeddings, too. If you want to use a standard generative model, load the model as above use use `client_name="huggingfaceembedding"`. If you want to use a standard embedding model, like those from SentenceTransformers, load your local model via
-```bash
-python3 -m manifest.api.app \
-    --model_type sentence_transformers \
-    --model_name_or_path all-mpnet-base-v2 \
-    --device 0
-```
-# Road Map
-Here's what's coming up next
-- [ ] Clients
-  - [ ] HuggingFace Hub
-  - [x] Azure OpenAI
-  - [x] Google Vertex
-  - [ ] Anthropic
-  - [x] Streaming Support Completions
-  - [ ] Streaming Support Chat Models
-- [ ] Data Types
-  - [ ] Diffusion Models
-- [x] Orchestration
-  - [x] Connection pools
-- [ ] Local Inference
-  - [ ] FlexGen
-# Development
-Before submitting a PR, run
-```bash
-export REDIS_PORT="6379"  # or whatever PORT local redis is running for those tests
-cd <REDIS_PATH>
-docker run -d -p 127.0.0.1:${REDIS_PORT}:6379 -v `pwd`:`pwd` -w `pwd` --name manifest_redis_test redis
-make test
-```
-# Cite
-Please cite Manifest if you used it for any publications. Thanks!!
-```
-@misc{orr2022manifest,
-  author = {Orr, Laurel},
-  title = {Manifest},
-  year = {2022},
-  publisher = {GitHub},
-  howpublished = {\url{https://github.com/HazyResearch/manifest}},
-}
-```

duckdb-nsql/manifest/examples/langchain_chatgpt.ipynb DELETED Viewed

@@ -1,455 +0,0 @@
-{
- "cells": [
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "b253f4d5",
-   "metadata": {},
-   "source": [
-    "# ChatGPT Clone using TOMA GPT-JT-6B\n",
-    "(adopted from ChatGPT Clone [notebook](https://github.com/hwchase17/langchain/blob/master/docs/examples/chains/chatgpt_clone.ipynb))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "b0302886",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "env: TOMA_URL=https://staging.together.xyz/api\n"
-     ]
-    }
-   ],
-   "source": [
-    "%env TOMA_URL=https://staging.together.xyz/api"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "93a18ea6",
-   "metadata": {},
-   "source": [
-    "Make sure you have langchain installed and manifest. For the most recent versions, run\n",
-    "```\n",
-    "pip install git+https://github.com/hwchase17/langchain.git\n",
-    "pip install git+https://github.com/HazyResearch/manifest.git\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "id": "a99acd89",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
-      "Prompt after formatting:\n",
-      "\u001b[32;1m\u001b[1;3mI am a classification model. It will try to classify your input.\n",
-      "\n",
-      "\n",
-      "Input: Classes are \"positive\" and \"negative\". For example given\n",
-      "Input: I love this product!\n",
-      "Output: positive.\n",
-      "I think this movie was one of the worst of the year. Script was boring!\n",
-      "Output:\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
-      "negative.\n"
-     ]
-    }
-   ],
-   "source": [
-    "from manifest import Manifest\n",
-    "from langchain.llms.manifest import ManifestWrapper\n",
-    "from langchain import ConversationChain, LLMChain, PromptTemplate\n",
-    "from langchain.chains.conversation.memory import ConversationalBufferWindowMemory\n",
-    "\n",
-    "\n",
-    "template = \"\"\"I am a classification model. It will try to classify your input.\n",
-    "\n",
-    "{history}\n",
-    "Input: {human_input}\n",
-    "Output:\"\"\"\n",
-    "\n",
-    "prompt = PromptTemplate(\n",
-    "    input_variables=[\"history\", \"human_input\"], \n",
-    "    template=template\n",
-    ")\n",
-    "\n",
-    "manifest = Manifest(\n",
-    "    client_name=\"toma\",\n",
-    "    engine=\"Together-gpt-JT-6B-v1\",\n",
-    "    max_tokens=150,\n",
-    "    top_p=0.9,\n",
-    "    top_k=40,\n",
-    "    stop_sequences=[\"\\n\"],\n",
-    ")\n",
-    "\n",
-    "chatgpt_chain = LLMChain(\n",
-    "    llm=ManifestWrapper(client=manifest), \n",
-    "    prompt=prompt, \n",
-    "    verbose=True, \n",
-    "    memory=ConversationalBufferWindowMemory(k=8),\n",
-    ")\n",
-    "\n",
-    "output = chatgpt_chain.predict(human_input=\"Classes are \\\"positive\\\" and \\\"negative\\\". For example given\\nInput: I love this product!\\nOutput: positive.\\nI think this movie was one of the worst of the year. Script was boring!\")\n",
-    "print(output)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "id": "4ef711d6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
-      "Prompt after formatting:\n",
-      "\u001b[32;1m\u001b[1;3mI am a classification model. It will try to classify your input.\n",
-      "\n",
-      "Human: Classes are \"positive\" and \"negative\". For example given\n",
-      "Input: I love this product!\n",
-      "Output: positive.\n",
-      "I think this movie was one of the worst of the year. Script was boring!\n",
-      "AI: negative.\n",
-      "Input: So awesome! I wish I could have gone\n",
-      "Output:\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
-      "positive.\n"
-     ]
-    }
-   ],
-   "source": [
-    "output = chatgpt_chain.predict(human_input=\"So awesome! I wish I could have gone\")\n",
-    "print(output)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "id": "a5d6dac2",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
-      "Prompt after formatting:\n",
-      "\u001b[32;1m\u001b[1;3mI am a classification model. It will try to classify your input.\n",
-      "\n",
-      "Human: Classes are \"positive\" and \"negative\". For example given\n",
-      "Input: I love this product!\n",
-      "Output: positive.\n",
-      "I think this movie was one of the worst of the year. Script was boring!\n",
-      "AI: negative.\n",
-      "Human: So awesome! I wish I could have gone\n",
-      "AI: positive.\n",
-      "Input: Hate it.\n",
-      "Output:\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
-      "negative.\n"
-     ]
-    }
-   ],
-   "source": [
-    "output = chatgpt_chain.predict(human_input=\"Hate it.\")\n",
-    "print(output)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "id": "b9283077",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
-      "Prompt after formatting:\n",
-      "\u001b[32;1m\u001b[1;3mI am a classification model. It will try to classify your input.\n",
-      "\n",
-      "\n",
-      "Input: Classes are fruits \"apple\", \"banana\", \"orange\", \"pear\". For example given\n",
-      "Input: This fruit rippens off of the tree.\n",
-      "Output: banana.\n",
-      "Often comes in bosc and bartlett varieties.\n",
-      "Output:\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
-      "apple.\n"
-     ]
-    }
-   ],
-   "source": [
-    "chatgpt_chain.memory.clear()\n",
-    "output = chatgpt_chain.predict(human_input=\"Classes are fruits \\\"apple\\\", \\\"banana\\\", \\\"orange\\\", \\\"pear\\\". For example given\\nInput: This fruit rippens off of the tree.\\nOutput: banana.\\nOften comes in bosc and bartlett varieties.\")\n",
-    "print(output)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "id": "cd0a23d9",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
-      "Prompt after formatting:\n",
-      "\u001b[32;1m\u001b[1;3mI am a classification model. It will try to classify your input.\n",
-      "\n",
-      "Human: Classes are fruits \"apple\", \"banana\", \"orange\", \"pear\". For example given\n",
-      "Input: This fruit rippens off of the tree.\n",
-      "Output: banana.\n",
-      "Often comes in bosc and bartlett varieties.\n",
-      "AI: apple.\n",
-      "Input: Often associated with monkeys\n",
-      "Output:\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
-      "banana.\n"
-     ]
-    }
-   ],
-   "source": [
-    "output = chatgpt_chain.predict(human_input=\"Often associated with monkeys\")\n",
-    "print(output)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "id": "90db6eb2",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
-      "Prompt after formatting:\n",
-      "\u001b[32;1m\u001b[1;3mI am a classification model. It will try to classify your input.\n",
-      "\n",
-      "Human: Classes are fruits \"apple\", \"banana\", \"orange\", \"pear\". For example given\n",
-      "Input: This fruit rippens off of the tree.\n",
-      "Output: banana.\n",
-      "Often comes in bosc and bartlett varieties.\n",
-      "AI: apple.\n",
-      "Human: Often associated with monkeys\n",
-      "AI: banana.\n",
-      "Input: Is the color red and often delicious.\n",
-      "Output:\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
-      "apple.\n"
-     ]
-    }
-   ],
-   "source": [
-    "output = chatgpt_chain.predict(human_input=\"Is the color red and often delicious.\")\n",
-    "print(output)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "id": "c3806f89",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
-      "Prompt after formatting:\n",
-      "\u001b[32;1m\u001b[1;3mI am a classification model. It will try to classify your input.\n",
-      "\n",
-      "\n",
-      "Input: Classes are colors \"red\", \"green\", \"blue\", \"yellow\". For example given\n",
-      "Input: The color of a school bus.\n",
-      "Output: yellow.\n",
-      "Is the color of the sky\n",
-      "Output:\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
-      "blue.\n"
-     ]
-    }
-   ],
-   "source": [
-    "chatgpt_chain.memory.clear()\n",
-    "output = chatgpt_chain.predict(human_input=\"Classes are colors \\\"red\\\", \\\"green\\\", \\\"blue\\\", \\\"yellow\\\". For example given\\nInput: The color of a school bus.\\nOutput: yellow.\\nIs the color of the sky\")\n",
-    "print(output)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "id": "f508f597",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
-      "Prompt after formatting:\n",
-      "\u001b[32;1m\u001b[1;3mI am a classification model. It will try to classify your input.\n",
-      "\n",
-      "Human: Classes are colors \"red\", \"green\", \"blue\", \"yellow\". For example given\n",
-      "Input: The color of a school bus.\n",
-      "Output: yellow.\n",
-      "Is the color of the sky\n",
-      "AI: blue.\n",
-      "Input: Color of a banana.\n",
-      "Output:\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
-      "yellow.\n"
-     ]
-    }
-   ],
-   "source": [
-    "output = chatgpt_chain.predict(human_input=\"Color of a banana.\")\n",
-    "print(output)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "id": "cbd607f4",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
-      "Prompt after formatting:\n",
-      "\u001b[32;1m\u001b[1;3mI am a classification model. It will try to classify your input.\n",
-      "\n",
-      "Human: Classes are colors \"red\", \"green\", \"blue\", \"yellow\". For example given\n",
-      "Input: The color of a school bus.\n",
-      "Output: yellow.\n",
-      "Is the color of the sky\n",
-      "AI: blue.\n",
-      "Human: Color of a banana.\n",
-      "AI: yellow.\n",
-      "Input: When someone is sick they are this color.\n",
-      "Output:\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
-      "green.\n"
-     ]
-    }
-   ],
-   "source": [
-    "output = chatgpt_chain.predict(human_input=\"When someone is sick they are this color.\")\n",
-    "print(output)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "id": "d33e0e28",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
-      "Prompt after formatting:\n",
-      "\u001b[32;1m\u001b[1;3mI am a classification model. It will try to classify your input.\n",
-      "\n",
-      "Human: Classes are colors \"red\", \"green\", \"blue\", \"yellow\". For example given\n",
-      "Input: The color of a school bus.\n",
-      "Output: yellow.\n",
-      "Is the color of the sky\n",
-      "AI: blue.\n",
-      "Human: Color of a banana.\n",
-      "AI: yellow.\n",
-      "Human: When someone is sick they are this color.\n",
-      "AI: green.\n",
-      "Input: Color of anger.\n",
-      "Output:\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
-      "red.\n"
-     ]
-    }
-   ],
-   "source": [
-    "output = chatgpt_chain.predict(human_input=\"Color of anger.\")\n",
-    "print(output)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "bootleg",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.12 | packaged by conda-forge | (default, Jan 30 2022, 23:36:06) \n[Clang 11.1.0 ]"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "7a3f97ab0465937066e9b79893b779dfc8a12d73c41f9d98a7bf05133c798250"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

duckdb-nsql/manifest/examples/manifest_async.py DELETED Viewed

@@ -1,27 +0,0 @@
-import asyncio
-import time
-from manifest import Manifest
-def main():
-    manifest = Manifest(
-        client_name="openaichat",
-    )
-    print("Running in serial")
-    prompts = [f"Tell me something interesting about {i}" for i in range(50)]
-    st = time.time()
-    for pmt in prompts:
-        _ = manifest.run(pmt)
-    print(f"For loop: {time.time() - st :.2f}")
-    print("Running with async")
-    st = time.time()
-    _ = asyncio.run(manifest.arun_batch(prompts, max_tokens=30))
-    print(f"Async loop: {time.time() - st :.2f}")
-if __name__ == "__main__":
-    main()

duckdb-nsql/manifest/examples/manifest_azure.ipynb DELETED Viewed

@@ -1,149 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "AZURE_KEY = \"API_KEY::URL\"\n",
-    "OPENAI_KEY = \"sk-XXX\""
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Use Azure and OpenAI models"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from manifest import Manifest\n",
-    "from manifest.connections.client_pool import ClientConnection\n",
-    "from pathlib import Path\n",
-    "\n",
-    "cache_path = Path(\"manifest.db\")\n",
-    "if cache_path.exists():\n",
-    "    cache_path.unlink()\n",
-    "\n",
-    "\n",
-    "azure = ClientConnection(\n",
-    "    client_name=\"azureopenai\",\n",
-    "    client_connection=AZURE_KEY,\n",
-    "    engine=\"text-davinci-003\",\n",
-    ")\n",
-    "\n",
-    "manifest = Manifest(client_pool=[azure], \n",
-    "    cache_name=\"sqlite\",\n",
-    "    cache_connection=\"manifest.db\"\n",
-    ")\n",
-    "\n",
-    "\n",
-    "openai = ClientConnection(\n",
-    "    client_name=\"openai\",\n",
-    "    client_connection=OPENAI_KEY,\n",
-    "    engine=\"text-davinci-003\",\n",
-    ")\n",
-    "\n",
-    "manifest_openai_nocache = Manifest(client_pool=[openai])\n",
-    "\n",
-    "manifest_openai = Manifest(client_pool=[openai], \n",
-    "    cache_name=\"sqlite\",\n",
-    "    cache_connection=\"manifest.db\"\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Show caches are the same\n",
-    "text = \"What is the meaning of life?\"\n",
-    "res = manifest.run(text, max_tokens=100, temperature=0.7, return_response=True)\n",
-    "print(res.get_response())\n",
-    "print(res.is_cached())\n",
-    "res2 = manifest_openai.run(text, max_tokens=100, temperature=0.7, return_response=True)\n",
-    "print(res2.is_cached())\n",
-    "\n",
-    "assert res2.get_response() == res.get_response()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "azure_chat = ClientConnection(\n",
-    "    client_name=\"azureopenaichat\",\n",
-    "    client_connection=AZURE_KEY,\n",
-    "    engine=\"gpt-3.5-turbo\",\n",
-    ")\n",
-    "\n",
-    "manifest = Manifest(client_pool=[azure_chat])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(manifest.run(\"What do you think is the best food?\", max_tokens=100))\n",
-    "\n",
-    "chat_dict = [\n",
-    "    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
-    "    {\"role\": \"user\", \"content\": \"Who won the world series in 2020?\"},\n",
-    "    {\"role\": \"assistant\", \"content\": \"The Los Angeles Dodgers won the World Series in 2020.\"},\n",
-    "    {\"role\": \"user\", \"content\": \"Where was it played?\"}\n",
-    "]\n",
-    "print(manifest.run(chat_dict, max_tokens=100))"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "manifest",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "orig_nbformat": 4,
-  "vscode": {
-   "interpreter": {
-    "hash": "fddffe4ac3b9f00470127629076101c1b5f38ecb1e7358b567d19305425e9491"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

duckdb-nsql/manifest/examples/manifest_chatgpt.ipynb DELETED Viewed

@@ -1,101 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "OPENAI_KEY = \"sk-XXX\""
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Use ChatOpenAI\n",
-    "\n",
-    "Set you `OPENAI_API_KEY` environment variable."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from manifest import Manifest\n",
-    "from manifest.connections.client_pool import ClientConnection\n",
-    "\n",
-    "openai_chat = ClientConnection(\n",
-    "    client_name=\"openaichat\",\n",
-    "    client_connection=OPENAI_KEY,\n",
-    "    engine=\"gpt-3.5-turbo\"\n",
-    ")\n",
-    "\n",
-    "manifest = Manifest(client_pool=[openai_chat])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Simple question\n",
-    "chat_dict = [\n",
-    "    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
-    "    {\"role\": \"user\", \"content\": \"Who won the world series in 2020?\"},\n",
-    "    {\"role\": \"assistant\", \"content\": \"The Los Angeles Dodgers won the World Series in 2020.\"},\n",
-    "    {\"role\": \"user\", \"content\": \"Where was it played?\"}\n",
-    "]\n",
-    "print(manifest.run(chat_dict, max_tokens=100))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "manifest",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "orig_nbformat": 4,
-  "vscode": {
-   "interpreter": {
-    "hash": "fddffe4ac3b9f00470127629076101c1b5f38ecb1e7358b567d19305425e9491"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

duckdb-nsql/manifest/examples/manifest_connection_pool.ipynb DELETED Viewed

@@ -1,208 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "OPENAI_KEY1 = \"sk-XXX\"\n",
-    "OPENAI_KEY2 = \"sk-XX\""
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Use OpenAI\n",
-    "\n",
-    "Set you `OPENAI_API_KEY` environment variable."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from manifest import Manifest\n",
-    "from manifest.connections.client_pool import ClientConnection\n",
-    "\n",
-    "openai_ada = ClientConnection(\n",
-    "    client_name=\"openai\",\n",
-    "    client_connection=OPENAI_KEY1,\n",
-    "    engine=\"text-ada-001\"\n",
-    ")\n",
-    "\n",
-    "openai_curie = ClientConnection(\n",
-    "    client_name=\"openai\",\n",
-    "    client_connection=OPENAI_KEY2,\n",
-    "    engine=\"text-curie-001\"\n",
-    ")\n",
-    "\n",
-    "manifest = Manifest(client_pool=[openai_ada, openai_curie], client_pool_schedule=\"round_robin\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0\n",
-      "I am a model.\n",
-      "1\n",
-      "I am a MacBook Pro with a retina\n"
-     ]
-    }
-   ],
-   "source": [
-    "res = manifest.run(\"What model are you?\", temperature=0.0)\n",
-    "print(manifest.client_pool.current_client_id)\n",
-    "print(res)\n",
-    "res = manifest.run(\"What model are you?\", temperature=0.0)\n",
-    "print(manifest.client_pool.current_client_id)\n",
-    "print(res)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## With Async"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import nest_asyncio\n",
-    "# This is required for asyncio.run(...) to work in Jupyter notebooks.\n",
-    "nest_asyncio.apply()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from manifest import Manifest\n",
-    "from manifest.connections.client_pool import ClientConnection\n",
-    "\n",
-    "openai_ada = ClientConnection(\n",
-    "    client_name=\"openai\",\n",
-    "    client_connection=OPENAI_KEY1,\n",
-    "    engine=\"text-ada-001\"\n",
-    ")\n",
-    "\n",
-    "openai_babbage = ClientConnection(\n",
-    "    client_name=\"openai\",\n",
-    "    client_connection=OPENAI_KEY2,\n",
-    "    engine=\"text-babbage-001\"\n",
-    ")\n",
-    "\n",
-    "openai_curie = ClientConnection(\n",
-    "    client_name=\"openai\",\n",
-    "    client_connection=OPENAI_KEY2,\n",
-    "    engine=\"text-curie-001\"\n",
-    ")\n",
-    "\n",
-    "manifest = Manifest(client_pool=[openai_ada, openai_babbage, openai_curie], client_pool_schedule=\"round_robin\")\n",
-    "manifest_single_client = Manifest(client_pool=[openai_babbage])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "For loop: 128.68\n",
-      "Running with async single client\n",
-      "Running 1 tasks across all clients.\n",
-      "Async loop: 4.02\n",
-      "Running with async two clients but not chunking\n",
-      "Running 1 tasks across all clients.\n",
-      "Async loop: 3.92\n",
-      "Running with async two clients and chunk size\n",
-      "Running 20 tasks across all clients.\n",
-      "Async loop: 1.44\n"
-     ]
-    }
-   ],
-   "source": [
-    "import time\n",
-    "import asyncio\n",
-    "\n",
-    "prompts = [f\"Tell me something interesting about {i}\" for i in range(400)]\n",
-    "st = time.time()\n",
-    "for pmt in prompts:\n",
-    "    _ = manifest_single_client.run(pmt, max_tokens=30)\n",
-    "print(f\"For loop: {time.time() - st :.2f}\")\n",
-    "\n",
-    "print(\"Running with async single client\")\n",
-    "st = time.time()\n",
-    "_ = asyncio.run(manifest_single_client.arun_batch(prompts, max_tokens=30, chunk_size=-1))\n",
-    "print(f\"Async loop: {time.time() - st :.2f}\")\n",
-    "\n",
-    "print(\"Running with async two clients but not chunking\")\n",
-    "st = time.time()\n",
-    "_ = asyncio.run(manifest.arun_batch(prompts, max_tokens=30, chunk_size=-1))\n",
-    "print(f\"Async loop: {time.time() - st :.2f}\")\n",
-    "\n",
-    "print(\"Running with async two clients and chunk size\")\n",
-    "st = time.time()\n",
-    "_ = asyncio.run(manifest.arun_batch(prompts, max_tokens=30, chunk_size=20))\n",
-    "print(f\"Async loop: {time.time() - st :.2f}\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "manifest",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "orig_nbformat": 4,
-  "vscode": {
-   "interpreter": {
-    "hash": "fddffe4ac3b9f00470127629076101c1b5f38ecb1e7358b567d19305425e9491"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

duckdb-nsql/manifest/examples/manifest_diffusers.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

duckdb-nsql/manifest/examples/manifest_embedding.ipynb DELETED Viewed

@@ -1,156 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Use OpenAI\n",
-    "\n",
-    "Set you `OPENAI_API_KEY` environment variable."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'model_name': 'openaiembedding', 'engine': 'text-embedding-ada-002'}\n"
-     ]
-    }
-   ],
-   "source": [
-    "from manifest import Manifest\n",
-    "\n",
-    "manifest = Manifest(client_name=\"openaiembedding\")\n",
-    "print(manifest.client_pool.get_next_client().get_model_params())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(1536,)\n"
-     ]
-    }
-   ],
-   "source": [
-    "emb = manifest.run(\"Is this an embedding?\")\n",
-    "print(emb.shape)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Using Locally Hosted Huggingface LM\n",
-    "\n",
-    "Run\n",
-    "```\n",
-    "python3 manifest/api/app.py --model_type huggingface --model_name_or_path EleutherAI/gpt-neo-125M --device 0\n",
-    "```\n",
-    "or\n",
-    "```\n",
-    "python3 manifest/api/app.py --model_type sentence_transformers --model_name_or_path all-mpnet-base-v2 --device 0\n",
-    "```\n",
-    "\n",
-    "in a separate `screen` or `tmux`. Make sure to note the port. You can change this with `export FLASK_PORT=<port>`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'model_name': 'all-mpnet-base-v2', 'model_path': 'all-mpnet-base-v2', 'client_name': 'huggingfaceembedding'}\n"
-     ]
-    }
-   ],
-   "source": [
-    "from manifest import Manifest\n",
-    "\n",
-    "# Local hosted GPT Neo 125M\n",
-    "manifest = Manifest(\n",
-    "    client_name=\"huggingfaceembedding\",\n",
-    "    client_connection=\"http://127.0.0.1:6000\",\n",
-    "    cache_name=\"sqlite\",\n",
-    "    cache_connection=\"my_sqlite_manifest.sqlite\"\n",
-    ")\n",
-    "print(manifest.client_pool.get_next_client().get_model_params())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(768,)\n",
-      "(768,) (768,)\n"
-     ]
-    }
-   ],
-   "source": [
-    "emb = manifest.run(\"Is this an embedding?\")\n",
-    "print(emb.shape)\n",
-    "\n",
-    "emb = manifest.run([\"Is this an embedding?\", \"Bananas!!!\"])\n",
-    "print(emb[0].shape, emb[1].shape)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "manifest",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "orig_nbformat": 4,
-  "vscode": {
-   "interpreter": {
-    "hash": "fddffe4ac3b9f00470127629076101c1b5f38ecb1e7358b567d19305425e9491"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

duckdb-nsql/manifest/examples/manifest_google.ipynb DELETED Viewed

@@ -1,117 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "GOOGLE_KEY = \"KEY::PROJECT_ID\""
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Use GoogleVertexAPI"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from manifest import Manifest\n",
-    "from manifest.connections.client_pool import ClientConnection\n",
-    "\n",
-    "google_bison = ClientConnection(\n",
-    "    client_name=\"google\",\n",
-    "    client_connection=GOOGLE_KEY\n",
-    ")\n",
-    "\n",
-    "manifest = Manifest(client_pool=[google_bison])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Simple question\n",
-    "print(manifest.run(\"What is your name\", max_tokens=40))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from manifest import Manifest\n",
-    "from manifest.connections.client_pool import ClientConnection\n",
-    "\n",
-    "google_bison = ClientConnection(\n",
-    "    client_name=\"googlechat\",\n",
-    "    client_connection=GOOGLE_KEY\n",
-    ")\n",
-    "\n",
-    "manifest = Manifest(client_pool=[google_bison])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "chat_dict = [\n",
-    "    # {\"author\": \"bot\", \"content\": \"You are a helpful assistant.\"},\n",
-    "    {\"author\": \"user\", \"content\": \"Who won the world series in 2020?\"},\n",
-    "    {\"author\": \"bot\", \"content\": \"The Los Angeles Dodgers won the World Series in 2020.\"},\n",
-    "    {\"author\": \"user\", \"content\": \"Where was it played?\"}\n",
-    "]\n",
-    "print(manifest.run(chat_dict, max_tokens=8))"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "manifest",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "orig_nbformat": 4,
-  "vscode": {
-   "interpreter": {
-    "hash": "fddffe4ac3b9f00470127629076101c1b5f38ecb1e7358b567d19305425e9491"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

duckdb-nsql/manifest/examples/manifest_openrouter.ipynb DELETED Viewed

@@ -1,108 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "OPENROUTER_API_KEY = \"sk-...\""
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Use ChatOpenAI\n",
-    "\n",
-    "Set you `OPENROUTER_API_KEY` environment variable."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from manifest import Manifest\n",
-    "from manifest.connections.client_pool import ClientConnection\n",
-    "\n",
-    "openai_chat = ClientConnection(\n",
-    "    client_name=\"openrouter\",\n",
-    "    client_connection=OPENROUTER_API_KEY,\n",
-    "    engine=\"meta-llama/codellama-70b-instruct\"\n",
-    ")\n",
-    "\n",
-    "manifest = Manifest(client_pool=[openai_chat])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2020 World Series was played at the Globe Life Field in Arlington, Texas.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Simple question\n",
-    "chat_dict = [\n",
-    "    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
-    "    {\"role\": \"user\", \"content\": \"Who won the world series in 2020?\"},\n",
-    "    {\"role\": \"assistant\", \"content\": \"The Los Angeles Dodgers won the World Series in 2020.\"},\n",
-    "    {\"role\": \"user\", \"content\": \"Where was it played?\"}\n",
-    "]\n",
-    "print(manifest.run(chat_dict, max_tokens=100))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "fddffe4ac3b9f00470127629076101c1b5f38ecb1e7358b567d19305425e9491"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}

duckdb-nsql/manifest/examples/manifest_streaming.ipynb DELETED Viewed

@@ -1,105 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "OPENAI_KEY = \"sk-XXX\""
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Use ChatOpenAI\n",
-    "\n",
-    "Set you `OPENAI_API_KEY` environment variable."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from manifest import Manifest\n",
-    "from manifest.connections.client_pool import ClientConnection\n",
-    "\n",
-    "openai_chat = ClientConnection(\n",
-    "    client_name=\"openaichat\",\n",
-    "    client_connection=OPENAI_KEY,\n",
-    "    engine=\"gpt-3.5-turbo\"\n",
-    ")\n",
-    "\n",
-    "manifest = Manifest(client_pool=[openai_chat])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "manifest_iterator = manifest.run(\"Tell me a story about a fat cat.\\n\\nOnce upon a time\", max_tokens=200, stream=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import sys\n",
-    "\n",
-    "cur_line_length = 0\n",
-    "# Iterate over stream\n",
-    "for res in manifest_iterator:\n",
-    "    sys.stdout.write(res)\n",
-    "    cur_line_length += len(res)\n",
-    "    if cur_line_length > 80:\n",
-    "        sys.stdout.write(\"\\n\")\n",
-    "        cur_line_length = 0"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "manifest",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "orig_nbformat": 4,
-  "vscode": {
-   "interpreter": {
-    "hash": "fddffe4ac3b9f00470127629076101c1b5f38ecb1e7358b567d19305425e9491"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

duckdb-nsql/manifest/examples/manifest_together.ipynb DELETED Viewed

@@ -1,106 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "env: TOMA_URL=<TOMA_URL>\n"
-     ]
-    }
-   ],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2\n",
-    "\n",
-    "%env TOMA_URL=<TOMA_URL>"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from manifest import Manifest\n",
-    "\n",
-    "# The responses are not fast\n",
-    "manifest = Manifest(\n",
-    "    client_name=\"toma\",\n",
-    ")\n",
-    "\n",
-    "print(manifest.run(\"What is the color of an apple?\"))"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "With a cache"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from manifest import Manifest\n",
-    "\n",
-    "# The responses are not fast\n",
-    "manifest = Manifest(\n",
-    "    client_name=\"toma\",\n",
-    "    cache_name=\"sqlite\",\n",
-    "    cache_connection=\"my_manifest_cache.sqlite\",\n",
-    ")\n",
-    "\n",
-    "res = manifest.run(\"What is the color of an apple?\", return_response=True)\n",
-    "print(res.get_response())\n",
-    "print(\"Is Cached?\", res.is_cached())\n",
-    "\n",
-    "res = manifest.run(\"What is the color of an apple?\", return_response=True)\n",
-    "print(res.get_response())\n",
-    "print(\"Is Cached?\", res.is_cached())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "manifest",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  },
-  "orig_nbformat": 4,
-  "vscode": {
-   "interpreter": {
-    "hash": "fddffe4ac3b9f00470127629076101c1b5f38ecb1e7358b567d19305425e9491"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

duckdb-nsql/manifest/manifest/__init__.py DELETED Viewed

@@ -1,6 +0,0 @@
-"""Manifest init."""
-from manifest.manifest import Manifest
-from manifest.request import Request
-from manifest.response import Response
-__all__ = ["Manifest", "Response", "Request"]

duckdb-nsql/manifest/manifest/api/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- """Api init."""

duckdb-nsql/manifest/manifest/api/app.py DELETED Viewed

@@ -1,301 +0,0 @@
-"""Flask app."""
-import argparse
-import io
-import json
-import logging
-import os
-import socket
-from typing import Dict
-import pkg_resources
-from flask import Flask, Response, request
-from manifest.api.models.diffuser import DiffuserModel
-from manifest.api.models.huggingface import (
-    MODEL_GENTYPE_REGISTRY,
-    CrossModalEncoderModel,
-    TextGenerationModel,
-)
-from manifest.api.models.sentence_transformer import SentenceTransformerModel
-from manifest.api.response import ModelResponse
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-logger = logging.getLogger(__name__)
-app = Flask(__name__)  # define app using Flask
-# Will be global
-model = None
-model_type = None
-PORT = int(os.environ.get("FLASK_PORT", 5000))
-MODEL_CONSTRUCTORS = {
-    "huggingface": TextGenerationModel,
-    "sentence_transformers": SentenceTransformerModel,
-    "huggingface_crossmodal": CrossModalEncoderModel,
-    "diffuser": DiffuserModel,
-}
-def parse_args() -> argparse.Namespace:
-    """Generate args."""
-    parser = argparse.ArgumentParser(description="Model args")
-    parser.add_argument(
-        "--model_type",
-        default=None,
-        type=str,
-        required=True,
-        help="Model type used for finding constructor.",
-        choices=MODEL_CONSTRUCTORS.keys(),
-    )
-    parser.add_argument(
-        "--model_generation_type",
-        default=None,
-        type=str,
-        help="Model generation type.",
-        choices=MODEL_GENTYPE_REGISTRY.keys(),
-    )
-    parser.add_argument(
-        "--model_name_or_path",
-        default=None,
-        type=str,
-        help="Name of model or path to model. Used in initialize of model class.",
-    )
-    parser.add_argument(
-        "--cache_dir", default=None, type=str, help="Cache directory for models."
-    )
-    parser.add_argument(
-        "--device", type=int, default=0, help="Model device. -1 for CPU."
-    )
-    parser.add_argument(
-        "--fp16", action="store_true", help="Force use fp16 for model params."
-    )
-    parser.add_argument(
-        "--percent_max_gpu_mem_reduction",
-        type=float,
-        default=0.85,
-        help="Used with accelerate multigpu. Scales down max memory.",
-    )
-    parser.add_argument(
-        "--use_bitsandbytes",
-        action="store_true",
-        help=("Use bits and bytes. " "This will override --device parameter."),
-    )
-    parser.add_argument(
-        "--use_accelerate_multigpu",
-        action="store_true",
-        help=(
-            "Use accelerate for multi gpu inference. "
-            "This will override --device parameter."
-        ),
-    )
-    parser.add_argument(
-        "--use_hf_parallelize",
-        action="store_true",
-        help=(
-            "Use HF parallelize for multi gpu inference. "
-            "This will override --device parameter."
-        ),
-    )
-    parser.add_argument(
-        "--use_deepspeed",
-        action="store_true",
-        help=("Use deepspeed. This will override --device parameter."),
-    )
-    args = parser.parse_args()
-    return args
-def is_port_in_use(port: int) -> bool:
-    """Check if port is in use."""
-    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-        return s.connect_ex(("localhost", port)) == 0
-def main() -> None:
-    """Run main."""
-    kwargs = parse_args()
-    if is_port_in_use(PORT):
-        raise ValueError(f"Port {PORT} is already in use.")
-    global model_type
-    model_type = kwargs.model_type
-    model_gen_type = kwargs.model_generation_type
-    model_name_or_path = kwargs.model_name_or_path
-    if not model_name_or_path:
-        raise ValueError("Must provide model_name_or_path.")
-    if kwargs.use_accelerate_multigpu:
-        logger.info("Using accelerate. Overridding --device argument.")
-    if (
-        kwargs.percent_max_gpu_mem_reduction <= 0
-        or kwargs.percent_max_gpu_mem_reduction > 1
-    ):
-        raise ValueError("percent_max_gpu_mem_reduction must be in (0, 1].")
-    if (
-        sum(
-            [
-                kwargs.use_accelerate_multigpu,
-                kwargs.use_hf_parallelize,
-                kwargs.use_bitsandbytes,
-                kwargs.use_deepspeed,
-            ]
-        )
-        > 1
-    ):
-        raise ValueError(
-            "Only one of use_accelerate_multigpu, use_hf_parallelize, "
-            "use_bitsandbytes, and use_deepspeed can be set."
-        )
-    # Global model
-    global model
-    model = MODEL_CONSTRUCTORS[model_type](
-        model_name_or_path,
-        model_type=model_gen_type,
-        cache_dir=kwargs.cache_dir,
-        device=kwargs.device,
-        use_accelerate=kwargs.use_accelerate_multigpu,
-        use_parallelize=kwargs.use_hf_parallelize,
-        use_bitsandbytes=kwargs.use_bitsandbytes,
-        use_deepspeed=kwargs.use_deepspeed,
-        perc_max_gpu_mem_red=kwargs.percent_max_gpu_mem_reduction,
-        use_fp16=kwargs.fp16,
-    )
-    app.run(host="0.0.0.0", port=PORT)
-@app.route("/completions", methods=["POST"])
-def completions() -> Response:
-    """Get completions for generation."""
-    prompt = request.json["prompt"]
-    del request.json["prompt"]
-    generation_args = request.json
-    if not isinstance(prompt, (str, list)):
-        raise ValueError("Prompt must be a str or list of str")
-    try:
-        result_gens = []
-        for generations in model.generate(prompt, **generation_args):
-            result_gens.append(generations)
-        if model_type == "diffuser":
-            # Assign None logprob as it's not supported in diffusers
-            results = [
-                {"array": r[0], "logprob": None, "tokens": None, "token_logprobs": None}
-                for r in result_gens
-            ]
-            res_type = "image_generation"
-        else:
-            results = [
-                {"text": r[0], "logprob": r[1], "tokens": r[2], "token_logprobs": r[3]}
-                for r in result_gens
-            ]
-            res_type = "text_completion"
-        # transform the result into the openai format
-        return Response(
-            json.dumps(ModelResponse(results, response_type=res_type).__dict__()),
-            status=200,
-        )
-    except Exception as e:
-        logger.error(e)
-        return Response(
-            json.dumps({"message": str(e)}),
-            status=400,
-        )
-@app.route("/embed", methods=["POST"])
-def embed() -> Response:
-    """Get embed for generation."""
-    if "modality" in request.json:
-        modality = request.json["modality"]
-    else:
-        modality = "text"
-    if modality == "text":
-        prompts = request.json["prompt"]
-    elif modality == "image":
-        import base64
-        from PIL import Image
-        prompts = [
-            Image.open(io.BytesIO(base64.b64decode(data)))
-            for data in request.json["prompt"]
-        ]
-    else:
-        raise ValueError("modality must be text or image")
-    try:
-        results = []
-        embeddings = model.embed(prompts)
-        for embedding in embeddings:
-            results.append(
-                {
-                    "array": embedding,
-                    "logprob": None,
-                    "tokens": None,
-                    "token_logprobs": None,
-                }
-            )
-        return Response(
-            json.dumps(
-                ModelResponse(results, response_type="embedding_generation").__dict__()
-            ),
-            status=200,
-        )
-    except Exception as e:
-        logger.error(e)
-        return Response(
-            json.dumps({"message": str(e)}),
-            status=400,
-        )
-@app.route("/score_sequence", methods=["POST"])
-def score_sequence() -> Response:
-    """Get logprob of prompt."""
-    prompt = request.json["prompt"]
-    del request.json["prompt"]
-    generation_args = request.json
-    if not isinstance(prompt, (str, list)):
-        raise ValueError("Prompt must be a str or list of str")
-    try:
-        score_list = model.score_sequence(prompt, **generation_args)
-        results = [
-            {
-                "text": prompt if isinstance(prompt, str) else prompt[i],
-                "logprob": r[0],
-                "tokens": r[1],
-                "token_logprobs": r[2],
-            }
-            for i, r in enumerate(score_list)
-        ]
-        # transform the result into the openai format
-        return Response(
-            json.dumps(
-                ModelResponse(results, response_type="prompt_logit_score").__dict__()
-            ),
-            status=200,
-        )
-    except Exception as e:
-        logger.error(e)
-        return Response(
-            json.dumps({"message": str(e)}),
-            status=400,
-        )
-@app.route("/params", methods=["POST"])
-def params() -> Dict:
-    """Get model params."""
-    return model.get_init_params()
-@app.route("/")
-def index() -> str:
-    """Get index completion."""
-    fn = pkg_resources.resource_filename("metaseq", "service/index.html")
-    with open(fn) as f:
-        return f.read()
-if __name__ == "__main__":
-    main()

duckdb-nsql/manifest/manifest/api/models/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- """Models init."""

duckdb-nsql/manifest/manifest/api/models/diffuser.py DELETED Viewed

@@ -1,123 +0,0 @@
-"""Diffuser model."""
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, Union
-import numpy as np
-import torch
-from diffusers import StableDiffusionPipeline
-from manifest.api.models.model import Model
-class DiffuserModel(Model):
-    """Diffuser model."""
-    def __init__(
-        self,
-        model_name_or_path: str,
-        model_type: Optional[str] = None,
-        model_config: Optional[str] = None,
-        cache_dir: Optional[str] = None,
-        device: int = 0,
-        use_accelerate: bool = False,
-        use_parallelize: bool = False,
-        use_bitsandbytes: bool = False,
-        use_deepspeed: bool = False,
-        perc_max_gpu_mem_red: float = 1.0,
-        use_fp16: bool = False,
-    ):
-        """
-        Initialize model.
-        All arguments will be passed in the request from Manifest.
-        Args:
-            model_name_or_path: model name string.
-            model_config: model config string.
-            cache_dir: cache directory for model.
-            device: device to use for model.
-            use_accelerate: whether to use accelerate for multi-gpu inference.
-            use_parallelize: use HF default parallelize
-            use_bitsandbytes: use HF bits and bytes
-            use_deepspeed: use deepspeed
-            perc_max_gpu_mem_red: percent max memory reduction in accelerate
-            use_fp16: use fp16 for model weights.
-        """
-        if use_accelerate or use_parallelize or use_bitsandbytes or use_deepspeed:
-            raise ValueError(
-                "Cannot use accelerate or parallelize or "
-                "bitsandbytes or deepspeeed with diffusers"
-            )
-        # Check if providing path
-        self.model_path = model_name_or_path
-        if Path(self.model_path).exists() and Path(self.model_path).is_dir():
-            model_name_or_path = Path(self.model_path).name
-        self.model_name = model_name_or_path
-        print("Model Name:", self.model_name, "Model Path:", self.model_path)
-        dtype = torch.float16 if use_fp16 else None
-        torch_device = (
-            torch.device("cpu")
-            if (device == -1 or not torch.cuda.is_available())
-            else torch.device(f"cuda:{device}")
-        )
-        self.pipeline = StableDiffusionPipeline.from_pretrained(
-            self.model_path,
-            torch_dtype=dtype,
-            revision="fp16" if str(dtype) == "float16" else None,
-        )
-        self.pipeline.safety_checker = None
-        self.pipeline.to(torch_device)
-    def get_init_params(self) -> Dict:
-        """Return init params to determine what model is being used."""
-        return {"model_name": self.model_name, "model_path": self.model_path}
-    @torch.no_grad()
-    def generate(
-        self, prompt: Union[str, List[str]], **kwargs: Any
-    ) -> List[Tuple[Any, float, List[str], List[float]]]:
-        """
-        Generate the prompt from model.
-        Outputs must be generated text and score, not including prompt.
-        Args:
-            prompt: promt to generate from.
-        Returns:
-            list of generated text (list of length 1 for 1 generation).
-        """
-        # TODO: Is this correct for getting arguments in?
-        if isinstance(prompt, str):
-            prompt = [prompt]
-        result = self.pipeline(prompt, output_type="np.array", **kwargs)
-        # Return None for logprobs and token logprobs
-        return [(im, None, None, None) for im in result["images"]]
-    @torch.no_grad()
-    def embed(self, prompt: Union[str, List[str]], **kwargs: Any) -> np.ndarray:
-        """
-        Embed the prompt from model.
-        Args:
-            prompt: promt to embed from.
-        Returns:
-            list of embeddings (list of length 1 for 1 embedding).
-        """
-        raise NotImplementedError("Embed not supported for diffusers")
-    @torch.no_grad()
-    def score_sequence(
-        self, prompt: Union[str, List[str]], **kwargs: Any
-    ) -> List[Tuple[float, List[int], List[float]]]:
-        """
-        Score a sequence of choices.
-        Args:
-            prompt (:obj:`str` or :obj:`List[str]`):
-                The prompt to score the choices against.
-            **kwargs:
-                Additional keyword arguments passed along to the :obj:`__call__` method.
-        """
-        raise NotImplementedError("Score sequence not supported for diffusers")

duckdb-nsql/manifest/manifest/api/models/huggingface.py DELETED Viewed

@@ -1,671 +0,0 @@
-"""Huggingface model."""
-import json
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, Union, cast
-import deepspeed
-import numpy as np
-import PIL
-import torch
-from accelerate import dispatch_model, infer_auto_device_map
-from accelerate.utils.modeling import get_max_memory as acc_get_max_memory
-from transformers import (
-    AutoModelForCausalLM,
-    AutoModelForSeq2SeqLM,
-    AutoTokenizer,
-    BloomForCausalLM,
-    CLIPModel,
-    CLIPProcessor,
-    GPT2LMHeadModel,
-    GPTJForCausalLM,
-    GPTNeoForCausalLM,
-    GPTNeoXForCausalLM,
-    LlamaForCausalLM,
-    LlamaTokenizer,
-    OPTForCausalLM,
-    PreTrainedModel,
-    PreTrainedTokenizer,
-)
-from manifest.api.models.model import Model
-MODEL_REGISTRY = {
-    "EleutherAI/gpt-neo-125M": GPTNeoForCausalLM,
-    "EleutherAI/gpt-neo-1.3B": GPTNeoForCausalLM,
-    "EleutherAI/gpt-neo-2.7B": GPTNeoForCausalLM,
-    "EleutherAI/gpt-j-6B": GPTJForCausalLM,
-    "EleutherAI/gpt-neox-20b": GPTNeoXForCausalLM,
-    "facebook/opt-125m": OPTForCausalLM,
-    "facebook/opt-350m": OPTForCausalLM,
-    "Salesforce/codegen-2B-mono": AutoModelForCausalLM,
-    "Salesforce/codegen-6B-mono": AutoModelForCausalLM,
-    "facebook/opt-1.3b": OPTForCausalLM,
-    "facebook/opt-2.7b": OPTForCausalLM,
-    "facebook/opt-6.7b": OPTForCausalLM,
-    "facebook/opt-13b": OPTForCausalLM,
-    "facebook/opt-30b": OPTForCausalLM,
-    "gpt2": GPT2LMHeadModel,
-    "openai/clip-vit-base-patch32": CLIPModel,
-    "bigscience/bloom-560m": BloomForCausalLM,
-    "bigscience/bloom-1b7": BloomForCausalLM,
-    "bigscience/bloom-3b": BloomForCausalLM,
-    "bigscience/bloom-7b1": BloomForCausalLM,
-    "chainyo/alpaca-lora-7b": LlamaForCausalLM,
-    "bigscience/bloom": AutoModelForCausalLM,
-    "bigscience/T0pp": AutoModelForSeq2SeqLM,
-    "bigscience/T0_3B": AutoModelForSeq2SeqLM,
-    "google/t5-small-lm-adapt": AutoModelForSeq2SeqLM,  # 220M
-    "google/t5-l-lm-adapt": AutoModelForSeq2SeqLM,  # 800M
-    "google/t5-xl-lm-adapt": AutoModelForSeq2SeqLM,  # 3B
-    "google/t5-xxl-lm-adapt": AutoModelForSeq2SeqLM,  # 11B
-    "google/t5-v1_1-l": AutoModelForSeq2SeqLM,  # 800M
-    "google/t5-v1_1-xl": AutoModelForSeq2SeqLM,  # 3B
-    "google/t5-v1_1-xxl": AutoModelForSeq2SeqLM,  # 11B
-    "google/flan-t5-l": AutoModelForSeq2SeqLM,  # 800M
-    "google/flan-t5-xl": AutoModelForSeq2SeqLM,  # 3B
-    "google/flan-t5-xxl": AutoModelForSeq2SeqLM,  # 11B
-}
-MODEL_GENTYPE_REGISTRY = {
-    "text-generation": AutoModelForCausalLM,
-    "llama-text-generation": LlamaForCausalLM,
-    "text2text-generation": AutoModelForSeq2SeqLM,
-}
-def get_max_memory(gpu_reduction: float) -> Dict[int, str]:
-    """Get max memory in GB times reduction."""
-    free_in_gb = int(torch.cuda.mem_get_info()[0] / 1024**3)  # type: ignore
-    max_mem = f"{int(gpu_reduction*free_in_gb)}GB"
-    n_gpus = torch.cuda.device_count()
-    max_mem_dict = {i: max_mem for i in range(n_gpus)}
-    return max_mem_dict
-class GenerationPipeline:
-    """
-    Custom Pipeline.
-    HF pipelines do not handle devices well in multi-gpu setting.
-    Create our own generation pipeline.
-    """
-    def __init__(
-        self,
-        model: Union[PreTrainedModel, deepspeed.InferenceEngine],
-        tokenizer: PreTrainedTokenizer,
-        device: int = None,
-        bitsandbytes: bool = False,
-        is_encdec: bool = False,
-    ):
-        """Initialize."""
-        # Use to turn off sampling
-        # https://github.com/TimDettmers/bitsandbytes/issues/42
-        self.bitsandbytes = bitsandbytes
-        self.model = model
-        self.is_encdec = is_encdec
-        config = model.config  # type: ignore
-        # Used for GPT
-        self.max_length = getattr(config, "max_position_embeddings", None)
-        if self.max_length is None:
-            # Used for Bloom
-            self.max_length = getattr(config, "seq_length", None)
-            if self.max_length is None:
-                # Used for T0
-                self.max_length = getattr(config, "d_model", None)
-                if self.max_length is None:
-                    # Default
-                    self.max_length = 2048
-        print(f"Usings max_length: {self.max_length}")
-        self.tokenizer = tokenizer
-        # self.device = device
-        # With bits and bytes, do not want to place inputs on any device
-        # if self.device:
-        self.device = (
-            torch.device("cpu")
-            if (device == -1 or not torch.cuda.is_available())
-            else torch.device(f"cuda:{device}")
-        )
-    def __call__(
-        self, text: Union[str, List[str]], **kwargs: Any
-    ) -> List[Dict[str, Union[str, List[float], List[str]]]]:
-        """Generate from text.
-        Args:
-            text: text to generate.
-        Returns:
-            generated text.
-        """
-        # If text is longer than max model length, we reduce max input length to ensure
-        # the user indicated generation tokens is preserved.
-        max_input_len = (
-            self.max_length - kwargs.get("max_new_tokens")
-            if not self.is_encdec
-            else self.max_length
-        )
-        encoded_prompt = self.tokenizer(
-            text,
-            max_length=max_input_len,
-            truncation=True,
-            padding=True,
-            return_tensors="pt",
-        )
-        encoded_prompt = encoded_prompt.to(self.device)
-        kwargs_to_pass = dict(
-            temperature=kwargs.get("temperature"),
-            top_k=kwargs.get("top_k"),
-            top_p=kwargs.get("top_p"),
-            repetition_penalty=kwargs.get("repetition_penalty"),
-            num_return_sequences=kwargs.get("num_return_sequences"),
-            do_sample=kwargs.get("do_sample"),
-        )
-        kwargs_to_pass = {k: v for k, v in kwargs_to_pass.items() if v is not None}
-        output_dict = self.model.generate(  # type: ignore
-            **encoded_prompt,
-            **kwargs_to_pass,
-            max_new_tokens=kwargs.get("max_new_tokens"),
-            eos_token_id=self.tokenizer.eos_token_id,
-            pad_token_id=self.tokenizer.pad_token_id,
-            output_scores=True,
-            return_dict_in_generate=True,
-        )
-        # logits/scores from the output always correspond to the generated tokens.
-        # shape (num_tokens, num_return_sequences, vocab_size)
-        logits = torch.stack(output_dict.scores)
-        logits = torch.nn.functional.log_softmax(logits, dim=-1)
-        num_generated_tokens = logits.shape[0]
-        generated_sequences = [
-            {
-                "generated_text": self.tokenizer.decode(
-                    output_seq[-num_generated_tokens:], skip_special_tokens=True
-                ),
-                "logprobs": logits[
-                    range(num_generated_tokens), i, output_seq[-num_generated_tokens:]
-                ].tolist(),
-                "tokens": self.tokenizer.convert_ids_to_tokens(
-                    output_seq[-num_generated_tokens:].tolist()
-                ),
-            }
-            for i, output_seq in enumerate(output_dict.sequences)
-        ]
-        return generated_sequences
-class HuggingFaceModel(Model):
-    """HuggingFace Model."""
-    def __init__(
-        self,
-        model_name_or_path: str,
-        model_type: Optional[str] = None,
-        model_config: Optional[str] = None,
-        cache_dir: Optional[str] = None,
-        device: int = 0,
-        use_accelerate: bool = False,
-        use_parallelize: bool = False,
-        use_bitsandbytes: bool = False,
-        use_deepspeed: bool = False,
-        perc_max_gpu_mem_red: float = 1.0,
-        use_fp16: bool = False,
-    ):
-        """
-        Initialize model.
-        All arguments will be passed in the request from Manifest.
-        Args:
-            model_name_or_path: model name string.
-            model_config: model config string.
-            cache_dir: cache directory for model.
-            device: device to use for model.
-            use_accelerate: whether to use accelerate for multi-gpu inference.
-            use_parallelize: use HF default parallelize
-            use_bitsandbytes: use HF bits and bytes
-            use_deepspeed: use deepspeed
-            perc_max_gpu_mem_red: percent max memory reduction in accelerate
-            use_fp16: use fp16 for model weights.
-        """
-        if sum([use_accelerate, use_parallelize, use_bitsandbytes, use_deepspeed]) > 1:
-            raise ValueError(
-                "Only one of use_accelerate, use_parallelize, "
-                "use_bitsandbytes, use_deepspeed can be set to True"
-            )
-        # Check if providing path
-        self.model_path = model_name_or_path
-        if Path(self.model_path).exists() and Path(self.model_path).is_dir():
-            # Try to find config
-            if (Path(self.model_path) / "config.json").exists():
-                config = json.load(open(Path(self.model_path) / "config.json"))
-                model_name_or_path = config["_name_or_path"]
-        self.model_name = model_name_or_path
-        self.model_type = model_type
-        if self.model_name not in MODEL_REGISTRY and self.model_type is None:
-            raise ValueError(
-                f"{self.model_name} is not in our registry. Please specify "
-                "--model_generation_type as either text-generation (for Causal)"
-                " or text2text-generation (for Seq2Seq)"
-            )
-        print("Model Name:", self.model_name, "Model Path:", self.model_path)
-    def get_init_params(self) -> Dict:
-        """Return init params to determine what model is being used."""
-        return {"model_name": self.model_name, "model_path": self.model_path}
-    def _dispatch_deepspeed_model(
-        self, model: PreTrainedModel
-    ) -> deepspeed.InferenceEngine:
-        """
-        Load model with deepspeed.
-        Adapted from https://www.deepspeed.ai/tutorials/inference-tutorial/
-        Args:
-            model: loaded hugging face model
-        """
-        model = deepspeed.init_inference(
-            model=model,
-            mp_size=1,
-            dtype=model.dtype,
-            replace_method="auto",
-            replace_with_kernel_inject=True,
-        )
-        return model
-    def _dispatch_accelerate_model(
-        self, model: PreTrainedModel, perc_max_gpu_mem_red: float
-    ) -> None:
-        """
-        Load model with accelerate.
-        Adapted from https://colab.research.google.com/drive/14wnxMvD9zsiBQo2FtT
-                     pxn6w2cpXCcb-7#scrollTo=y8Ne7jJdaF9F&uniqifier=1
-        Args:
-            model: loaded hugging face model
-            perc_max_gpu_mem_red: percent memory reduction
-        """
-        model.tie_weights()  # type: ignore
-        # Get the model where we can infer devices from
-        if hasattr(model, "model"):
-            # OPT
-            main_model = model.model  # type: ignore
-            model_getter = "model."
-        else:
-            # Eleuther Neo and J
-            main_model = model
-            model_getter = ""
-        # Decrease max mem
-        max_memory = {
-            k: int(perc_max_gpu_mem_red * v) for k, v in acc_get_max_memory().items()
-        }
-        raw_device_map = infer_auto_device_map(
-            main_model,
-            max_memory=max_memory,
-            no_split_module_classes=[
-                "OPTDecoderLayer",
-                "GPTNeoBlock",
-                "GPTJBlock",
-                "GPTNeoXLayer",
-                "T5Block",
-            ],
-            dtype=model.dtype,  # type: ignore
-        )
-        # Hacky fix for Eleuther getting the "weight" of embeddings
-        device_map = {}
-        for k, v in raw_device_map.items():
-            if k in {"wte", "wpe"}:
-                device_map[f"{model_getter}{k}.weight"] = v
-            else:
-                device_map[f"{model_getter}{k}"] = v
-        # For OPT models
-        if "lm_head" not in device_map:
-            try:
-                device_map["lm_head"] = max(device_map.values())
-            except TypeError:
-                device_map["lm_head"] = "cpu"
-        print("Device Map", device_map)
-        dispatch_model(model, device_map=device_map)
-        return
-class CrossModalEncoderModel(HuggingFaceModel):
-    """CrossModalEncoderModel."""
-    def __init__(
-        self,
-        model_name_or_path: str,
-        model_type: Optional[str] = None,
-        model_config: Optional[str] = None,
-        cache_dir: Optional[str] = None,
-        device: int = 0,
-        use_accelerate: bool = False,
-        use_parallelize: bool = False,
-        use_bitsandbytes: bool = False,
-        use_deepspeed: bool = False,
-        perc_max_gpu_mem_red: float = 1.0,
-        use_fp16: bool = False,
-    ):
-        """
-        Initialize model.
-        All arguments will be passed in the request from Manifest.
-        Args:
-            model_name_or_path: model name string.
-            model_config: model config string.
-            cache_dir: cache directory for model.
-            device: device to use for model.
-            use_accelerate: whether to use accelerate for multi-gpu inference.
-            use_parallelize: use HF default parallelize
-            use_bitsandbytes: use HF bits and bytes
-            use_deepspeed: use deepspeed
-            perc_max_gpu_mem_red: percent max memory reduction in accelerate
-            use_fp16: use fp16 for model weights.
-        """
-        super().__init__(
-            model_name_or_path,
-            model_type,
-            model_config,
-            cache_dir,
-            device,
-            use_accelerate,
-            use_parallelize,
-            use_bitsandbytes,
-            use_deepspeed,
-            perc_max_gpu_mem_red,
-            use_fp16,
-        )
-        # TODO: make this generalizable
-        self.processor = CLIPProcessor.from_pretrained(self.model_path)
-        model = MODEL_REGISTRY.get(
-            self.model_name, MODEL_GENTYPE_REGISTRY.get(self.model_type, None)
-        ).from_pretrained(
-            self.model_path,
-            cache_dir=cache_dir,
-            trust_remote_code=True,
-        )
-        model.eval()
-        torch_device = (
-            torch.device("cpu")
-            if (device == -1 or not torch.cuda.is_available())
-            else torch.device(f"cuda:{device}")
-        )
-        self.model = model.to(torch_device)  # type: ignore
-    @torch.no_grad()
-    def embed(self, prompt: Union[str, List[str]], **kwargs: Any) -> np.ndarray:
-        """
-        Compute embedding for prompts.
-        Args:
-            prompt: promt to generate from.
-        Returns:
-            embedding
-        """
-        if isinstance(prompt, str):
-            inputs = self.processor(text=prompt, return_tensors="pt", padding=True)
-        elif isinstance(prompt, PIL.Image.Image):
-            inputs = self.processor(images=prompt, return_tensors="pt", padding=True)
-        else:
-            raise ValueError("Prompt must be a string or an image")
-        outputs = self.model(**inputs)
-        return outputs
-class TextGenerationModel(HuggingFaceModel):
-    """Huggingface model."""
-    def __init__(
-        self,
-        model_name_or_path: str,
-        model_type: Optional[str] = None,
-        model_config: Optional[str] = None,
-        cache_dir: Optional[str] = None,
-        device: int = 0,
-        use_accelerate: bool = False,
-        use_parallelize: bool = False,
-        use_bitsandbytes: bool = False,
-        use_deepspeed: bool = False,
-        perc_max_gpu_mem_red: float = 1.0,
-        use_fp16: bool = False,
-    ):
-        """
-        Initialize model.
-        All arguments will be passed in the request from Manifest.
-        Args:
-            model_name_or_path: model name string.
-            model_config: model config string.
-            cache_dir: cache directory for model.
-            device: device to use for model.
-            use_accelerate: whether to use accelerate for multi-gpu inference.
-            use_parallelize: use HF default parallelize
-            use_bitsandbytes: use HF bits and bytes
-            use_deepspeed: use deepspeed
-            perc_max_gpu_mem_red: percent max memory reduction in accelerate
-            use_fp16: use fp16 for model weights.
-        """
-        super().__init__(
-            model_name_or_path,
-            model_type,
-            model_config,
-            cache_dir,
-            device,
-            use_accelerate,
-            use_parallelize,
-            use_bitsandbytes,
-            use_deepspeed,
-            perc_max_gpu_mem_red,
-            use_fp16,
-        )
-        if (
-            MODEL_REGISTRY.get(
-                self.model_name, MODEL_GENTYPE_REGISTRY.get(self.model_type, None)
-            )
-            == LlamaForCausalLM
-        ):
-            tokenizer = LlamaTokenizer.from_pretrained(self.model_name)
-        else:
-            try:
-                tokenizer = AutoTokenizer.from_pretrained(
-                    self.model_name, truncation_side="left", padding_side="left"
-                )
-            except ValueError:
-                tokenizer = AutoTokenizer.from_pretrained(
-                    self.model_name,
-                    truncation_side="left",
-                    padding_side="left",
-                    use_fast=False,
-                )
-        dtype = torch.float16 if use_fp16 else "auto"
-        if use_bitsandbytes:
-            print("WARNING!!! Cannot use sampling with bitsandbytes.")
-            max_memory = get_max_memory(perc_max_gpu_mem_red)
-            model = MODEL_REGISTRY.get(
-                self.model_name, MODEL_GENTYPE_REGISTRY.get(self.model_type, None)
-            ).from_pretrained(  # type: ignore
-                self.model_path,
-                cache_dir=cache_dir,
-                load_in_8bit=True,
-                device_map="auto",
-                max_memory=max_memory,
-                trust_remote_code=True,
-            )
-        else:
-            try:
-                # Try to explicitely find a fp16 copy (gpt-j-6B for example)
-                model = MODEL_REGISTRY.get(
-                    self.model_name, MODEL_GENTYPE_REGISTRY.get(self.model_type, None)
-                ).from_pretrained(  # type: ignore
-                    self.model_path,
-                    cache_dir=cache_dir,
-                    revision="float16",
-                    torch_dtype=torch.float16,
-                    trust_remote_code=True,
-                )
-            except Exception:
-                model = MODEL_REGISTRY.get(
-                    self.model_name, MODEL_GENTYPE_REGISTRY.get(self.model_type, None)
-                ).from_pretrained(  # type: ignore
-                    self.model_path,
-                    cache_dir=cache_dir,
-                    torch_dtype=dtype,
-                    trust_remote_code=True,
-                )
-        model.eval()
-        print(f"Loaded Model DType {model.dtype}")
-        self.is_encdec = model.config.is_encoder_decoder
-        if not self.is_encdec:
-            tokenizer.pad_token = tokenizer.eos_token
-            tokenizer.pad_token_id = tokenizer.eos_token_id
-        if not use_bitsandbytes:
-            if use_accelerate:
-                self._dispatch_accelerate_model(model, perc_max_gpu_mem_red)
-                device = 0
-            elif use_parallelize:
-                model.parallelize()
-                device = 0
-            elif use_deepspeed:
-                self._dispatch_deepspeed_model(model)
-                device = 0
-            else:
-                if device > -1:
-                    torch_device = (
-                        torch.device("cpu")
-                        if (device == -1 or not torch.cuda.is_available())
-                        else torch.device(f"cuda:{device}")
-                    )
-                    model = model.to(torch_device)  # type: ignore
-        self.pipeline = GenerationPipeline(  # type: ignore
-            model=model,
-            tokenizer=tokenizer,
-            device=device,
-            bitsandbytes=use_bitsandbytes,
-            is_encdec=self.is_encdec,
-        )
-    @torch.no_grad()
-    def embed(self, prompt: Union[str, List[str]], **kwargs: Any) -> np.ndarray:
-        """
-        Embed the prompt from model.
-        Args:
-            prompt: promt to embed from.
-        Returns:
-            list of embeddings (list of length 1 for 1 embedding).
-        """
-        if isinstance(prompt, str):
-            prompt = [prompt]
-        encoded_prompt = self.pipeline.tokenizer(
-            prompt,
-            max_length=self.pipeline.max_length,
-            truncation=True,
-            padding=True,
-            return_tensors="pt",
-        )
-        encoded_prompt = encoded_prompt.to(self.pipeline.device)
-        # Get last hidden state
-        output = self.pipeline.model(  # type: ignore
-            **encoded_prompt,
-            output_hidden_states=True,
-            return_dict=True,
-        )
-        last_hidden_state = output["hidden_states"][-1][:, -1, :]
-        return last_hidden_state.cpu().numpy()
-    @torch.no_grad()
-    def generate(
-        self, prompt: Union[str, List[str]], **kwargs: Any
-    ) -> List[Tuple[Any, float, List[str], List[float]]]:
-        """
-        Generate the prompt from model.
-        Outputs must be generated text and score, not including prompt.
-        Args:
-            prompt: promt to generate from.
-        Returns:
-            list of generated text (list of length 1 for 1 generation).
-        """
-        num_return = kwargs.get("n", 1)
-        if isinstance(prompt, list) and num_return > 1:
-            raise ValueError("In batch generate, n must be 1.")
-        result = self.pipeline(
-            prompt,
-            max_new_tokens=kwargs.get("max_tokens"),
-            temperature=kwargs.get("temperature"),
-            repetition_penalty=kwargs.get("repetition_penalty"),
-            top_k=kwargs.get("top_k"),
-            top_p=kwargs.get("top_p"),
-            do_sample=kwargs.get("do_sample"),
-            num_return_sequences=num_return,
-        )
-        final_results = [
-            (
-                cast(str, r["generated_text"]),
-                sum(cast(List[float], r["logprobs"])),
-                cast(List[str], r["tokens"]),
-                cast(List[float], r["logprobs"]),
-            )
-            for r in result
-        ]
-        return final_results
-    @torch.no_grad()
-    def score_sequence(
-        self, prompt: Union[str, List[str]], **kwargs: Any
-    ) -> List[Tuple[float, List[int], List[float]]]:
-        """
-        Score a sequence of choices.
-        Args:
-            prompt (:obj:`str` or :obj:`List[str]`):
-                The prompt to score the choices against.
-            **kwargs:
-                Additional keyword arguments passed along to the :obj:`__call__` method.
-        """
-        if isinstance(prompt, str):
-            prompt = [prompt]
-        encoded_prompt = self.pipeline.tokenizer(
-            prompt,
-            max_length=self.pipeline.max_length,
-            truncation=True,
-            padding=True,
-            return_tensors="pt",
-        )
-        encoded_prompt["labels"] = encoded_prompt["input_ids"].clone()
-        encoded_prompt = encoded_prompt.to(self.pipeline.device)
-        logits = self.pipeline.model(  # type: ignore
-            **encoded_prompt,
-        ).logits
-        # For causal decoders, shift logts and labels
-        labels_attention_mask = encoded_prompt["attention_mask"].unsqueeze(-1)
-        masked_log_probs = labels_attention_mask.float() * torch.log_softmax(
-            logits.float(), dim=-1
-        )
-        seq_token_log_probs = torch.gather(
-            masked_log_probs, -1, encoded_prompt["labels"].unsqueeze(-1)
-        )
-        seq_token_log_probs = seq_token_log_probs.squeeze(dim=-1)
-        seq_log_prob = seq_token_log_probs.sum(dim=-1)
-        return [
-            (seq, tokens, seq_token)
-            for seq, tokens, seq_token in zip(
-                seq_log_prob.tolist(),
-                encoded_prompt["input_ids"].tolist(),
-                seq_token_log_probs.tolist(),
-            )
-        ]

duckdb-nsql/manifest/manifest/api/models/model.py DELETED Viewed

@@ -1,91 +0,0 @@
-"""Model class."""
-from typing import Any, Dict, List, Tuple, Union
-import numpy as np
-class Model:
-    """Model class."""
-    def __init__(
-        self,
-        model_name_or_path: str,
-        model_type: str,
-        cache_dir: str,
-        device: int,
-        use_accelerate: bool,
-        use_parallelize: bool,
-        use_bitsandbytes: bool,
-        use_deepspeed: bool,
-        perc_max_gpu_mem_red: float,
-        use_fp16: bool,
-    ):
-        """
-        Initialize model.
-        All arguments will be passed in the request from Manifest.
-        Args:
-            model_name_or_path: model name string.
-            model_type: model type string for when model_name not in registry.
-            cache_dir: cache directory for model.
-            device: device to use for model.
-            use_accelerate: whether to use accelerate for multi-gpu inference.
-            use_parallelize: use HF default parallelize
-            use_bitsandbytes: use HF bits and bytes
-            use_deepspeed: use deepspeed
-            perc_max_gpu_mem_red: percent max memory reduction in accelerate
-            use_fp16: use fp16 for model weights.
-        """
-        raise NotImplementedError()
-    def get_init_params(self) -> Dict:
-        """Return init params to determine what model is being used."""
-        raise NotImplementedError()
-    def generate(
-        self, prompt: Union[str, List[str]], **kwargs: Any
-    ) -> List[Tuple[Any, float, List[str], List[float]]]:
-        """
-        Generate the prompt from model.
-        Outputs must be generated text and score, not including prompt.
-        Args:
-            prompt: promt to generate from.
-        Returns:
-            list of generated text (list of length 1 for 1 generation).
-            Each item is the response, answer logprob, list of tokens,
-            and list of logprobs for each token.
-        """
-        raise NotImplementedError()
-    def embed(self, prompt: Union[str, List[str]], **kwargs: Any) -> np.ndarray:
-        """
-        Embed the prompt from model.
-        Args:
-            prompt: promt to embed from.
-        Returns:
-            list of embeddings (list of length 1 for 1 embedding).
-        """
-        raise NotImplementedError()
-    def score_sequence(
-        self, prompt: Union[str, List[str]], **kwargs: Any
-    ) -> List[Tuple[float, List[int], List[float]]]:
-        """
-        Score a sequence of choices.
-        Args:
-            prompt (:obj:`str` or :obj:`List[str]`):
-                The prompt to score the choices against.
-            **kwargs:
-                Additional keyword arguments passed along to the :obj:`__call__` method.
-        Returns:
-            Tuple of total score, tokens, and probs per token.
-        """
-        raise NotImplementedError()

duckdb-nsql/manifest/manifest/api/models/sentence_transformer.py DELETED Viewed

@@ -1,113 +0,0 @@
-"""Sentence transformer model."""
-from typing import Any, Dict, List, Optional, Tuple, Union
-import numpy as np
-import torch
-from sentence_transformers import SentenceTransformer
-from manifest.api.models.model import Model
-class SentenceTransformerModel(Model):
-    """SentenceTransformer model."""
-    def __init__(
-        self,
-        model_name_or_path: str,
-        model_type: Optional[str] = None,
-        model_config: Optional[str] = None,
-        cache_dir: Optional[str] = None,
-        device: int = 0,
-        use_accelerate: bool = False,
-        use_parallelize: bool = False,
-        use_bitsandbytes: bool = False,
-        use_deepspeed: bool = False,
-        perc_max_gpu_mem_red: float = 1.0,
-        use_fp16: bool = False,
-    ):
-        """
-        Initialize model.
-        All arguments will be passed in the request from Manifest.
-        Args:
-            model_name_or_path: model name string.
-            model_config: model config string.
-            cache_dir: cache directory for model.
-            device: device to use for model.
-            use_accelerate: whether to use accelerate for multi-gpu inference.
-            use_parallelize: use HF default parallelize
-            use_bitsandbytes: use HF bits and bytes
-            use_deepspeed: use deepspeed
-            perc_max_gpu_mem_red: percent max memory reduction in accelerate
-            use_fp16: use fp16 for model weights.
-        """
-        if use_accelerate or use_parallelize or use_bitsandbytes or use_deepspeed:
-            raise ValueError(
-                "Cannot use accelerate or parallelize or "
-                "bitsandbytes or deepspeeed with sentence transformers"
-            )
-        # Check if providing path
-        self.model_name = model_name_or_path
-        print("Model Name:", self.model_name)
-        torch_device = (
-            torch.device("cpu")
-            if (device == -1 or not torch.cuda.is_available())
-            else torch.device(f"cuda:{device}")
-        )
-        self.embedding_model = SentenceTransformer(self.model_name, device=torch_device)
-        self.embedding_model.to(torch_device)
-        self.embedding_model.eval()
-    def get_init_params(self) -> Dict:
-        """Return init params to determine what model is being used."""
-        return {"model_name": self.model_name, "model_path": self.model_name}
-    @torch.no_grad()
-    def generate(
-        self, prompt: Union[str, List[str]], **kwargs: Any
-    ) -> List[Tuple[Any, float, List[str], List[float]]]:
-        """
-        Generate the prompt from model.
-        Outputs must be generated text and score, not including prompt.
-        Args:
-            prompt: promt to generate from.
-        Returns:
-            list of generated text (list of length 1 for 1 generation).
-        """
-        raise NotImplementedError("Generate not supported for sentence transformers")
-    @torch.no_grad()
-    def embed(self, prompt: Union[str, List[str]], **kwargs: Any) -> np.ndarray:
-        """
-        Embed the prompt from model.
-        Args:
-            prompt: promt to embed from.
-        Returns:
-            list of embeddings (list of length 1 for 1 embedding).
-        """
-        if isinstance(prompt, str):
-            prompt = [prompt]
-        return self.embedding_model.encode(prompt)
-    @torch.no_grad()
-    def score_sequence(
-        self, prompt: Union[str, List[str]], **kwargs: Any
-    ) -> List[Tuple[float, List[int], List[float]]]:
-        """
-        Score a sequence of choices.
-        Args:
-            prompt (:obj:`str` or :obj:`List[str]`):
-                The prompt to score the choices against.
-            **kwargs:
-                Additional keyword arguments passed along to the :obj:`__call__` method.
-        """
-        raise NotImplementedError(
-            "Score sequence not supported for sentence transformers"
-        )

duckdb-nsql/manifest/manifest/api/response.py DELETED Viewed

@@ -1,55 +0,0 @@
-"""Response."""
-import time
-import uuid
-from typing import Any, Dict, List
-class ModelResponse:
-    """ModelResponse."""
-    def __init__(self, results: List[Dict[str, Any]], response_type: str) -> None:
-        """Initialize response."""
-        self.results = results
-        self.response_type = response_type
-        if self.response_type not in {
-            "text_completion",
-            "prompt_logit_score",
-            "image_generation",
-            "embedding_generation",
-        }:
-            raise ValueError(
-                f"Invalid response type: {self.response_type}. "
-                "Must be one of: text_completion, prompt_logit_score, "
-                "image_generation, embedding_generation."
-            )
-        self.response_id = str(uuid.uuid4())
-        self.created = int(time.time())
-    def __dict__(self) -> Dict[str, Any]:  # type: ignore
-        """Return dictionary representation of response."""
-        key = (
-            "text"
-            if self.response_type not in {"image_generation", "embedding_generation"}
-            else "array"
-        )
-        return {
-            "id": self.response_id,
-            "object": self.response_type,
-            "created": self.created,
-            "model": "flask_model",
-            "choices": [
-                {
-                    key: result[key],
-                    "logprob": result["logprob"],
-                    "tokens": result["tokens"],
-                    "token_logprobs": result["token_logprobs"],
-                }
-                if key == "text"
-                else {
-                    key: result[key].tolist(),
-                    "logprob": result["logprob"],
-                }
-                for result in self.results
-            ],
-        }

duckdb-nsql/manifest/manifest/caches/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- """Cache init."""

duckdb-nsql/manifest/manifest/caches/array_cache.py DELETED Viewed

@@ -1,116 +0,0 @@
-"""Array cache."""
-from pathlib import Path
-from typing import Union
-import numpy as np
-from sqlitedict import SqliteDict
-def open_mmap_arr(file: Union[Path, str], size: float) -> np.memmap:
-    """Open memmap."""
-    if not Path(file).exists():
-        mode = "w+"
-    else:
-        mode = "r+"
-    arr = np.memmap(  # type: ignore
-        str(file),
-        dtype=np.float32,  # This means we only support float 32
-        mode=mode,
-        shape=size,
-    )
-    return arr
-class ArrayCache:
-    """Array cache."""
-    def __init__(self, folder: Union[str, Path]) -> None:
-        """
-        Initialize the array writer.
-        Args:
-            folder: folder to write to.
-        """
-        self.folder = Path(folder)
-        self.folder.mkdir(exist_ok=True, parents=True)
-        self.hash2arrloc = SqliteDict(
-            self.folder / "hash2arrloc.sqlite", autocommit=True
-        )
-        # Approx 1GB (I think)
-        self.max_memmap_size = 20480000
-        self.cur_file_idx = 0
-        # Get the last file idx used
-        for key in self.hash2arrloc:
-            file_data = self.hash2arrloc[key]
-            if file_data["file_idx"] > self.cur_file_idx:
-                self.cur_file_idx = file_data["file_idx"]
-        self.cur_memmap = open_mmap_arr(
-            self.folder / f"{self.cur_file_idx}.npy",
-            self.max_memmap_size,
-        )
-        # Make sure there is space left in the memmap
-        non_zero = np.nonzero(self.cur_memmap)[0]
-        if len(non_zero) > 0:
-            self.cur_offset = int(np.max(non_zero) + 1)
-        else:
-            self.cur_offset = 0
-        # If no space, make a new memmap
-        if self.cur_offset == self.max_memmap_size:
-            self.cur_file_idx += 1
-            self.cur_memmap = open_mmap_arr(
-                self.folder / f"{self.cur_file_idx}.npy",
-                self.max_memmap_size,
-            )
-            self.cur_offset = 0
-    def contains_key(self, key: str) -> bool:
-        """
-        Check if the key is in the cache.
-        Args:
-            key: key to check.
-        Returns:
-            True if the key is in the cache.
-        """
-        return key in self.hash2arrloc
-    def put(self, key: str, arr: np.ndarray) -> None:
-        """Save array in store and associate location with key."""
-        # Check if there is space in the memmap
-        arr_shape = arr.shape
-        arr = arr.flatten()
-        if len(arr) > self.max_memmap_size:
-            raise ValueError(
-                f"Array is too large to be cached. Max is {self.max_memmap_size}"
-            )
-        if self.cur_offset + len(arr) > self.max_memmap_size:
-            self.cur_file_idx += 1
-            self.cur_memmap = open_mmap_arr(
-                self.folder / f"{self.cur_file_idx}.npy",
-                self.max_memmap_size,
-            )
-            self.cur_offset = 0
-        self.cur_memmap[self.cur_offset : self.cur_offset + len(arr)] = arr
-        self.cur_memmap.flush()
-        self.hash2arrloc[key] = {
-            "file_idx": self.cur_file_idx,
-            "offset": self.cur_offset,
-            "flatten_size": len(arr),
-            "shape": arr_shape,
-            "dtype": arr.dtype,
-        }
-        self.cur_offset += len(arr)
-        return
-    def get(self, key: str) -> np.ndarray:
-        """Get array associated with location from key."""
-        file_data = self.hash2arrloc[key]
-        memmap = open_mmap_arr(
-            self.folder / f"{file_data['file_idx']}.npy",
-            self.max_memmap_size,
-        )
-        arr = memmap[
-            file_data["offset"] : file_data["offset"] + file_data["flatten_size"]
-        ]
-        return arr.reshape(file_data["shape"]).astype(file_data["dtype"])

duckdb-nsql/manifest/manifest/caches/cache.py DELETED Viewed

@@ -1,135 +0,0 @@
-"""Cache for queries and responses."""
-from abc import ABC, abstractmethod
-from typing import Any, Dict, Type, Union
-from manifest.caches.serializers import ArraySerializer, NumpyByteSerializer, Serializer
-from manifest.request import DiffusionRequest, EmbeddingRequest, LMRequest, Request
-from manifest.response import Response
-# Non-text return type caches
-ARRAY_CACHE_TYPES = {EmbeddingRequest, DiffusionRequest}
-class Cache(ABC):
-    """A cache for request/response pairs."""
-    def __init__(
-        self,
-        connection_str: str,
-        request_type: Type[Request] = LMRequest,
-        cache_args: Dict[str, Any] = {},
-    ):
-        """
-        Initialize cache.
-        Args:
-            connection_str: connection string.
-            request_type: request type.
-            cache_args: arguments for cache.
-        cache_args are any arguments needed to initialize the cache.
-        Further, cache_args can contain `array_serializer` as a string
-        for embedding or image return types (e.g. diffusers) with values
-        as `local_file` or `byte_string`. `local_file` will save the
-        array in a local file and cache a pointer to the file.
-        `byte_string` will convert the array to a byte string and cache
-        the entire byte string. `byte_string` is default.
-        Args:
-            connection_str: connection string for cache.
-            cache_args: cache arguments.
-        """
-        self.request_type = request_type
-        self.connect(connection_str, cache_args)
-        if self.request_type in ARRAY_CACHE_TYPES:
-            array_serializer = cache_args.pop("array_serializer", "byte_string")
-            if array_serializer not in ["local_file", "byte_string"]:
-                raise ValueError(
-                    "array_serializer must be local_file or byte_string,"
-                    f" not {array_serializer}"
-                )
-            self.serializer = (
-                ArraySerializer()
-                if array_serializer == "local_file"
-                else NumpyByteSerializer()
-            )
-        else:
-            # If user has array_serializer type, it will throw an error as
-            # it is not recognized for non-array return types.
-            self.serializer = Serializer()
-    @abstractmethod
-    def close(self) -> None:
-        """Close the cache."""
-        raise NotImplementedError()
-    @abstractmethod
-    def connect(self, connection_str: str, cache_args: Dict[str, Any]) -> None:
-        """
-        Connect to cache.
-        Args:
-            connection_str: connection string.
-        """
-        raise NotImplementedError()
-    @abstractmethod
-    def get_key(self, key: str, table: str = "default") -> Union[str, None]:
-        """
-        Get the key for a request.
-        With return None if key is not in cache.
-        Args:
-            key: key for cache.
-            table: table to get key in.
-        """
-        raise NotImplementedError()
-    @abstractmethod
-    def set_key(self, key: str, value: str, table: str = "default") -> None:
-        """
-        Set the value for the key.
-        Will override old value.
-        Args:
-            key: key for cache.
-            value: new value for key.
-            table: table to set key in.
-        """
-        raise NotImplementedError()
-    @abstractmethod
-    def commit(self) -> None:
-        """Commit any results."""
-        raise NotImplementedError()
-    def get(self, request: Dict) -> Union[Response, None]:
-        """Get the result of request (by calling compute as needed).
-        Args:
-            request: request to get.
-            response: response to get.
-        Returns:
-            Response object or None if not in cache.
-        """
-        key = self.serializer.request_to_key(request)
-        cached_response = self.get_key(key)
-        if cached_response:
-            response = self.serializer.key_to_response(cached_response)
-            response["cached"] = True
-            return Response.from_dict(response, request_dict=request)
-        return None
-    def set(self, request: Dict, response: Dict) -> None:
-        """Set the value for the key.
-        Args:
-            request: request to set.
-            response: response to set.
-        """
-        key = self.serializer.request_to_key(request)
-        self.set_key(key, self.serializer.response_to_key(response))

duckdb-nsql/manifest/manifest/caches/noop.py DELETED Viewed

@@ -1,47 +0,0 @@
-"""Noop cache."""
-from typing import Any, Dict, Union
-from manifest.caches.cache import Cache
-class NoopCache(Cache):
-    """A Noop cache that caches nothing for request/response pairs."""
-    def connect(self, connection_str: str, cache_args: Dict[str, Any]) -> None:
-        """
-        Connect to client.
-        Args:
-            connection_str: connection string.
-            cache_args: arguments for cache.
-        """
-        pass
-    def close(self) -> None:
-        """Close the client."""
-        pass
-    def get_key(self, key: str, table: str = "default") -> Union[str, None]:
-        """
-        Return None key for never in cache.
-        Args:
-            key: key for cache.
-            table: table to get key in.
-        """
-        return None
-    def set_key(self, key: str, value: str, table: str = "default") -> None:
-        """
-        Do not set anything as no cache.
-        Args:
-            key: key for cache.
-            value: new value for key.
-            table: table to set key in.
-        """
-        pass
-    def commit(self) -> None:
-        """Commit any results."""
-        pass

duckdb-nsql/manifest/manifest/caches/postgres.py DELETED Viewed

@@ -1,131 +0,0 @@
-"""Postgres cache."""
-import hashlib
-import logging
-from typing import Any, Dict, Union
-logger = logging.getLogger("postgresql")
-logger.setLevel(logging.WARNING)
-from ..caches.cache import Cache
-try:
-    import sqlalchemy  # type: ignore
-    from google.cloud.sql.connector import Connector  # type: ignore
-    from sqlalchemy import Column, String  # type: ignore
-    from sqlalchemy.ext.declarative import declarative_base  # type: ignore
-    from sqlalchemy.orm import sessionmaker  # type: ignore
-    Base = declarative_base()
-    class Request(Base):  # type: ignore
-        """The request table."""
-        __tablename__ = "requests"
-        key = Column(String, primary_key=True)
-        response = Column(
-            String
-        )  # FIXME: ideally should be an hstore, but I don't want to set it up on GCP
-    missing_dependencies = None
-except ImportError as e:
-    missing_dependencies = e
-class PostgresCache(Cache):
-    """A PostgreSQL cache for request/response pairs."""
-    def connect(self, connection_str: str, cache_args: Dict[str, Any]) -> None:
-        """
-        Connect to client.
-        Args:
-            connection_str: connection string.
-            cache_args: arguments for cache should include the following fields:
-                {
-                    "cache_user": "",
-                    "cache_password": "",
-                    "cache_db": ""
-                }
-        """
-        if missing_dependencies:
-            raise ValueError(
-                "Missing dependencies for GCP PostgreSQL cache. "
-                "Install with `pip install manifest[gcp]`",
-                missing_dependencies,
-            )
-        connector = Connector()
-        def getconn() -> Any:
-            conn = connector.connect(
-                connection_str,
-                "pg8000",
-                user=cache_args.pop("cache_user"),
-                password=cache_args.pop("cache_password"),
-                db=cache_args.pop("cache_db"),
-            )
-            return conn
-        engine = sqlalchemy.create_engine(
-            "postgresql+pg8000://",
-            creator=getconn,
-        )
-        engine.dialect.description_encoding = None  # type: ignore
-        db_exists = len(sqlalchemy.inspect(engine).get_table_names()) > 0
-        if not db_exists:
-            logger.info("Creating database...")
-            Base.metadata.create_all(engine)
-        self.session = sessionmaker(bind=engine)()
-    def close(self) -> None:
-        """Close the client."""
-        self.session.close()
-    @staticmethod
-    def _hash_key(key: str, table: str) -> str:
-        """Compute MD5 hash of the key."""
-        return hashlib.md5(f"{key}:{table}".encode("utf-8")).hexdigest()
-    def get_key(self, key: str, table: str = "default") -> Union[str, None]:
-        """
-        Get the key for a request.
-        With return None if key is not in cache.
-        Args:
-            key: key for cache.
-            table: table to get key in.
-        """
-        request = (
-            self.session.query(Request)  # type: ignore
-            .filter_by(key=self._hash_key(key, table))
-            .first()
-        )
-        out = request.response if request else None
-        return out  # type: ignore
-    def set_key(self, key: str, value: str, table: str = "default") -> None:
-        """
-        Set the value for the key.
-        Will override old value.
-        Args:
-            key: key for cache.
-            value: new value for key.
-            table: table to set key in.
-        """
-        key = self._hash_key(key, table)
-        request = self.session.query(Request).filter_by(key=key).first()  # type: ignore
-        if request:
-            request.response = value  # type: ignore
-        else:
-            self.session.add(Request(key=key, response=value))
-        self.commit()
-    def commit(self) -> None:
-        """Commit any results."""
-        self.session.commit()

duckdb-nsql/manifest/manifest/caches/redis.py DELETED Viewed

@@ -1,64 +0,0 @@
-"""Redis cache."""
-from typing import Any, Dict, Union
-import redis
-from manifest.caches.cache import Cache
-class RedisCache(Cache):
-    """A Redis cache for request/response pairs."""
-    def connect(self, connection_str: str, cache_args: Dict[str, Any]) -> None:
-        """
-        Connect to client.
-        Args:
-            connection_str: connection string.
-            cache_args: arguments for cache.
-        """
-        host, port = connection_str.split(":")
-        self.redis = redis.Redis(host=host, port=int(port), db=0)
-        return
-    def close(self) -> None:
-        """Close the client."""
-        self.redis.close()
-    def _normalize_table_key(self, key: str, table: str) -> str:
-        """Cast key for prompt key."""
-        return f"{table}:{key}"
-    def get_key(self, key: str, table: str = "default") -> Union[str, None]:
-        """
-        Get the key for a request.
-        With return None if key is not in cache.
-        Args:
-            key: key for cache.
-            table: table to get key in.
-        """
-        norm_key = self._normalize_table_key(key, table)
-        if self.redis.exists(norm_key):
-            return self.redis.get(norm_key).decode("utf-8")
-        else:
-            return None
-    def set_key(self, key: str, value: str, table: str = "default") -> None:
-        """
-        Set the value for the key.
-        Will override old value.
-        Args:
-            key: key for cache.
-            value: new value for key.
-            table: table to set key in.
-        """
-        self.redis.set(self._normalize_table_key(key, table), value)
-        self.commit()
-    def commit(self) -> None:
-        """Commit any results."""
-        pass

duckdb-nsql/manifest/manifest/caches/serializers.py DELETED Viewed

@@ -1,204 +0,0 @@
-"""Serializer."""
-import io
-import json
-import os
-from pathlib import Path
-from typing import Dict
-import numpy as np
-import xxhash
-from manifest.caches.array_cache import ArrayCache
-class Serializer:
-    """Serializer."""
-    def request_to_key(self, request: Dict) -> str:
-        """
-        Normalize a request into a key.
-        Args:
-            request: request to normalize.
-        Returns:
-            normalized key.
-        """
-        return json.dumps(request, sort_keys=True)
-    def key_to_request(self, key: str) -> Dict:
-        """
-        Convert the normalized version to the request.
-        Args:
-            key: normalized key to convert.
-        Returns:
-            unnormalized request dict.
-        """
-        return json.loads(key)
-    def response_to_key(self, response: Dict) -> str:
-        """
-        Normalize a response into a key.
-        Args:
-            response: response to normalize.
-        Returns:
-            normalized key.
-        """
-        return json.dumps(response, sort_keys=True)
-    def key_to_response(self, key: str) -> Dict:
-        """
-        Convert the normalized version to the response.
-        Args:
-            key: normalized key to convert.
-        Returns:
-            unnormalized response dict.
-        """
-        return json.loads(key)
-class NumpyByteSerializer(Serializer):
-    """Serializer by casting array to byte string."""
-    def response_to_key(self, response: Dict) -> str:
-        """
-        Normalize a response into a key.
-        Args:
-            response: response to normalize.
-        Returns:
-            normalized key.
-        """
-        sub_response = response["response"]
-        # Assume response is a dict with keys "choices" -> List dicts
-        # with keys "array".
-        choices = sub_response["choices"]
-        # We don't want to modify the response in place
-        # but we want to avoid calling deepcopy on an array
-        del sub_response["choices"]
-        response_copy = sub_response.copy()
-        sub_response["choices"] = choices
-        response_copy["choices"] = []
-        for choice in choices:
-            if "array" not in choice:
-                raise ValueError(
-                    f"Choice with keys {choice.keys()} does not have array key."
-                )
-            arr = choice["array"]
-            # Avoid copying an array
-            del choice["array"]
-            new_choice = choice.copy()
-            choice["array"] = arr
-            with io.BytesIO() as f:
-                np.savez_compressed(f, data=arr)
-                hash_str = f.getvalue().hex()
-            new_choice["array"] = hash_str
-            response_copy["choices"].append(new_choice)
-        response["response"] = response_copy
-        return json.dumps(response, sort_keys=True)
-    def key_to_response(self, key: str) -> Dict:
-        """
-        Convert the normalized version to the response.
-        Args:
-            key: normalized key to convert.
-        Returns:
-            unnormalized response dict.
-        """
-        response = json.loads(key)
-        for choice in response["response"]["choices"]:
-            hash_str = choice["array"]
-            byte_str = bytes.fromhex(hash_str)
-            with io.BytesIO(byte_str) as f:
-                choice["array"] = np.load(f)["data"]
-        return response
-class ArraySerializer(Serializer):
-    """Serializer for array."""
-    def __init__(self) -> None:
-        """
-        Initialize array serializer.
-        We don't want to cache the array. We hash the value and
-        store the array in a memmap file. Store filename/offsets
-        in sqlitedict to keep track of hash -> array.
-        """
-        super().__init__()
-        self.hash = xxhash.xxh64()
-        manifest_home = Path(os.environ.get("MANIFEST_HOME", Path.home()))
-        cache_folder = manifest_home / ".manifest" / "array_cache"
-        self.writer = ArrayCache(cache_folder)
-    def response_to_key(self, response: Dict) -> str:
-        """
-        Normalize a response into a key.
-        Convert arrays to hash string for cache key.
-        Args:
-            response: response to normalize.
-        Returns:
-            normalized key.
-        """
-        sub_response = response["response"]
-        # Assume response is a dict with keys "choices" -> List dicts
-        # with keys "array".
-        choices = sub_response["choices"]
-        # We don't want to modify the response in place
-        # but we want to avoid calling deepcopy on an array
-        del sub_response["choices"]
-        response_copy = sub_response.copy()
-        sub_response["choices"] = choices
-        response_copy["choices"] = []
-        for choice in choices:
-            if "array" not in choice:
-                raise ValueError(
-                    f"Choice with keys {choice.keys()} does not have array key."
-                )
-            arr = choice["array"]
-            # Avoid copying an array
-            del choice["array"]
-            new_choice = choice.copy()
-            choice["array"] = arr
-            self.hash.update(arr)
-            hash_str = self.hash.hexdigest()
-            self.hash.reset()
-            new_choice["array"] = hash_str
-            response_copy["choices"].append(new_choice)
-            if not self.writer.contains_key(hash_str):
-                self.writer.put(hash_str, arr)
-        response["response"] = response_copy
-        return json.dumps(response, sort_keys=True)
-    def key_to_response(self, key: str) -> Dict:
-        """
-        Convert the normalized version to the response.
-        Convert the hash string keys to the arrays.
-        Args:
-            key: normalized key to convert.
-        Returns:
-            unnormalized response dict.
-        """
-        response = json.loads(key)
-        for choice in response["response"]["choices"]:
-            hash_str = choice["array"]
-            choice["array"] = self.writer.get(hash_str)
-        return response

duckdb-nsql/manifest/manifest/caches/sqlite.py DELETED Viewed

@@ -1,65 +0,0 @@
-"""SQLite cache."""
-import logging
-from typing import Any, Dict, Union
-from sqlitedict import SqliteDict
-from manifest.caches.cache import Cache
-logging.getLogger("sqlitedict").setLevel(logging.WARNING)
-class SQLiteCache(Cache):
-    """A SQLite cache for request/response pairs."""
-    def connect(self, connection_str: str, cache_args: Dict[str, Any]) -> None:
-        """
-        Connect to client.
-        Args:
-            connection_str: connection string.
-            cache_args: arguments for cache.
-        """
-        self.cache_file = connection_str
-        if not self.cache_file:
-            self.cache_file = ".sqlite.cache"
-        self.cache = SqliteDict(self.cache_file, autocommit=True)
-        return
-    def close(self) -> None:
-        """Close the client."""
-        self.cache.close()
-    def _normalize_table_key(self, key: str, table: str) -> str:
-        """Cast key for prompt key."""
-        return f"{table}:{key}"
-    def get_key(self, key: str, table: str = "default") -> Union[str, None]:
-        """
-        Get the key for a request.
-        With return None if key is not in cache.
-        Args:
-            key: key for cache.
-            table: table to get key in.
-        """
-        return self.cache.get(self._normalize_table_key(key, table))
-    def set_key(self, key: str, value: str, table: str = "default") -> None:
-        """
-        Set the value for the key.
-        Will override old value.
-        Args:
-            key: key for cache.
-            value: new value for key.
-            table: table to set key in.
-        """
-        self.cache[self._normalize_table_key(key, table)] = value
-        self.commit()
-    def commit(self) -> None:
-        """Commit any results."""
-        self.cache.commit()

duckdb-nsql/manifest/manifest/clients/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- """Client init."""

duckdb-nsql/manifest/manifest/clients/ai21.py DELETED Viewed

@@ -1,125 +0,0 @@
-"""AI21 client."""
-import logging
-import os
-from typing import Any, Dict, Optional
-from manifest.clients.client import Client
-from manifest.request import LMRequest
-logger = logging.getLogger(__name__)
-AI21_ENGINES = {
-    "j2-ultra",
-    "j2-mid",
-    "j2-light",
-}
-class AI21Client(Client):
-    """AI21Client client."""
-    # User param -> (client param, default value)
-    PARAMS = {
-        "engine": ("engine", "j2-ultra"),
-        "temperature": ("temperature", 0.7),
-        "max_tokens": ("maxTokens", 40),
-        "top_k": ("topKReturn", 0),
-        "n": ("numResults", 1),
-        "top_p": ("topP", 1.0),
-        "stop_sequences": ("stopSequences", []),
-    }
-    REQUEST_CLS = LMRequest
-    NAME = "ai21"
-    def connect(
-        self,
-        connection_str: Optional[str] = None,
-        client_args: Dict[str, Any] = {},
-    ) -> None:
-        """
-        Connect to the AI21 server.
-        connection_str is passed as default AI21_API_KEY if variable not set.
-        Args:
-            connection_str: connection string.
-            client_args: client arguments.
-        """
-        # Taken from https://docs.ai21.com/
-        self.host = "https://api.ai21.com/studio/v1"
-        self.api_key = connection_str or os.environ.get("AI21_API_KEY")
-        if self.api_key is None:
-            raise ValueError(
-                "AI21 API key not set. Set AI21_API_KEY environment "
-                "variable or pass through `client_connection`."
-            )
-        for key in self.PARAMS:
-            setattr(self, key, client_args.pop(key, self.PARAMS[key][1]))
-        if getattr(self, "engine") not in AI21_ENGINES:
-            raise ValueError(
-                f"Invalid engine {getattr(self, 'engine')}. Must be {AI21_ENGINES}."
-            )
-    def close(self) -> None:
-        """Close the client."""
-        pass
-    def get_generation_url(self) -> str:
-        """Get generation URL."""
-        return self.host + "/" + getattr(self, "engine") + "/complete"
-    def get_generation_header(self) -> Dict[str, str]:
-        """
-        Get generation header.
-        Returns:
-            header.
-        """
-        return {"Authorization": f"Bearer {self.api_key}"}
-    def supports_batch_inference(self) -> bool:
-        """Return whether the client supports batch inference."""
-        return False
-    def supports_streaming_inference(self) -> bool:
-        """Return whether the client supports streaming inference.
-        Override in child client class.
-        """
-        return False
-    def get_model_params(self) -> Dict:
-        """
-        Get model params.
-        By getting model params from the server, we can add to request
-        and make sure cache keys are unique to model.
-        Returns:
-            model params.
-        """
-        return {"model_name": self.NAME, "engine": getattr(self, "engine")}
-    def postprocess_response(self, response: Dict, request: Dict) -> Dict[str, Any]:
-        """
-        Format response to dict.
-        Args:
-            response: response
-            request: request
-        Return:
-            response as dict
-        """
-        return {
-            "object": "text_completion",
-            "model": getattr(self, "engine"),
-            "choices": [
-                {
-                    "text": item["data"]["text"],
-                    "token_logprobs": item["data"]["tokens"],
-                }
-                for item in response["completions"]
-            ],
-        }

duckdb-nsql/manifest/manifest/clients/azureendpoint.py DELETED Viewed

@@ -1,139 +0,0 @@
-"""OpenRouter client."""
-import copy
-import logging
-import os
-from typing import Any, Dict, Optional
-import time
-from manifest.clients.client import Client
-from manifest.request import LMRequest
-import urllib.request
-import json
-import os
-import ssl
-logger = logging.getLogger(__name__)
-def allowSelfSignedHttps(allowed):
-    # bypass the server certificate verification on client side
-    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
-        ssl._create_default_https_context = ssl._create_unverified_context
-allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.
-class AzureEndpointClient(Client):
-    """OpenRouter client."""
-    # Params are defined in https://openrouter.ai/docs/parameters
-    PARAMS = {
-        "engine": ("model", "meta-llama/codellama-70b-instruct"),
-        "max_tokens": ("max_tokens", 1000),
-        "temperature": ("temperature", 0.1),
-        "top_k": ("k", 0),
-        "frequency_penalty": ("frequency_penalty", 0.0),
-        "presence_penalty": ("presence_penalty", 0.0),
-        "stop_sequences": ("stop", None),
-    }
-    REQUEST_CLS = LMRequest
-    NAME = "azureendpoint"
-    IS_CHAT = True
-    def connect(
-        self,
-        connection_str: Optional[str] = None,
-        client_args: Dict[str, Any] = {},
-    ) -> None:
-        """
-        Connect to the OpenRouter server.
-        connection_str is passed as default OPENROUTER_API_KEY if variable not set.
-        Args:
-            connection_str: connection string.
-            client_args: client arguments.
-        """
-        self.host = os.environ.get("AZURE_HOST")
-        # Replace this with the primary/secondary key, AMLToken, or Microsoft Entra ID token for the endpoint
-        self.api_key = os.environ.get("AZURE_API_KEY")
-        if not self.api_key:
-            raise Exception("A key should be provided to invoke the endpoint")
-        for key in self.PARAMS:
-            setattr(self, key, client_args.pop(key, self.PARAMS[key][1]))
-    def close(self) -> None:
-        """Close the client."""
-    def get_generation_header(self) -> Dict[str, str]:
-        """
-        Get generation header.
-        Returns:
-            header.
-        """
-        return {'Content-Type':'application/json', 'Authorization':('Bearer '+ self.api_key), 'azureml-model-deployment': 'duckdb-nsql-v2-phi-medium-1' }
-    def get_generation_url(self) -> str:
-        """Get generation URL."""
-        return self.host + "/score"
-    def supports_batch_inference(self) -> bool:
-        """Return whether the client supports batch inference."""
-        return False
-    def supports_streaming_inference(self) -> bool:
-        """Return whether the client supports streaming inference.
-        Override in child client class.
-        """
-        return True
-    def get_model_params(self) -> Dict:
-        """
-        Get model params.
-        By getting model params from the server, we can add to request
-        and make sure cache keys are unique to model.
-        Returns:
-            model params.
-        """
-        return {"model_name": AzureEndpointClient.NAME, "engine": getattr(self, 'engine')}
-    def preprocess_request_params(self, request: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        Preprocess request params.
-        Args:
-            request: request params.
-        Returns:
-            request params.
-        """
-        # Format for chat model
-        request = copy.deepcopy(request)
-        prompt = request.pop("prompt")
-        data = {"input_data": {"input_string": [{"role": "user", "content": prompt}], "parameters": {"stop":"\n```", "max_tokens": 500}}}
-        #body = str(str.encode(json.dumps(data)))
-        return super().preprocess_request_params(data)
-    def postprocess_response(self, response: Dict, request: Dict) -> Dict[str, Any]:
-        """
-        Format response to dict.
-        Args:
-            response: response
-            request: request
-        Return:
-            response as dict
-        """
-        new_choices = []
-        response = copy.deepcopy(response)
-        if "output" in response:
-            new_choices.append({"text": response["output"]})
-        else:
-            new_choices.append({"text": ""})
-        response["choices"] = new_choices
-        return super().postprocess_response(response, request)

duckdb-nsql/manifest/manifest/clients/azureopenai.py DELETED Viewed

@@ -1,113 +0,0 @@
-"""Azure client."""
-import logging
-import os
-from typing import Any, Dict, Optional, Type
-from manifest.clients.openai import OPENAI_ENGINES, OpenAIClient
-from manifest.request import LMRequest, Request
-logger = logging.getLogger(__name__)
-# Azure deployment name can only use letters and numbers, no spaces. Hyphens ("-") and
-# underscores ("_") may be used, except as ending characters. We create this mapping to
-# handle difference between Azure and OpenAI
-AZURE_DEPLOYMENT_NAME_MAPPING = {
-    "gpt-3.5-turbo": "gpt-35-turbo",
-    "gpt-3.5-turbo-0301": "gpt-35-turbo-0301",
-}
-OPENAI_DEPLOYMENT_NAME_MAPPING = {
-    "gpt-35-turbo": "gpt-3.5-turbo",
-    "gpt-35-turbo-0301": "gpt-3.5-turbo-0301",
-}
-class AzureClient(OpenAIClient):
-    """Azure client."""
-    PARAMS = OpenAIClient.PARAMS
-    REQUEST_CLS: Type[Request] = LMRequest
-    NAME = "azureopenai"
-    def connect(
-        self,
-        connection_str: Optional[str] = None,
-        client_args: Dict[str, Any] = {},
-    ) -> None:
-        """
-        Connect to the AzureOpenAI server.
-        connection_str is passed as default AZURE_OPENAI_KEY if variable not set.
-        Args:
-            connection_str: connection string.
-            client_args: client arguments.
-        """
-        self.api_key, self.host = None, None
-        if connection_str:
-            connection_parts = connection_str.split("::")
-            if len(connection_parts) == 1:
-                self.api_key = connection_parts[0]
-            elif len(connection_parts) == 2:
-                self.api_key, self.host = connection_parts
-            else:
-                raise ValueError(
-                    "Invalid connection string. "
-                    "Must be either AZURE_OPENAI_KEY or "
-                    "AZURE_OPENAI_KEY::AZURE_OPENAI_ENDPOINT"
-                )
-        self.api_key = self.api_key or os.environ.get("AZURE_OPENAI_KEY")
-        if self.api_key is None:
-            raise ValueError(
-                "AzureOpenAI API key not set. Set AZURE_OPENAI_KEY environment "
-                "variable or pass through `client_connection`."
-            )
-        self.host = self.host or os.environ.get("AZURE_OPENAI_ENDPOINT")
-        if self.host is None:
-            raise ValueError(
-                "Azure Service URL not set "
-                "(e.g. https://openai-azure-service.openai.azure.com/)."
-                " Set AZURE_OPENAI_ENDPOINT or pass through `client_connection`."
-                " as AZURE_OPENAI_KEY::AZURE_OPENAI_ENDPOINT"
-            )
-        self.host = self.host.rstrip("/")
-        for key in self.PARAMS:
-            setattr(self, key, client_args.pop(key, self.PARAMS[key][1]))
-        if getattr(self, "engine") not in OPENAI_ENGINES:
-            raise ValueError(
-                f"Invalid engine {getattr(self, 'engine')}. Must be {OPENAI_ENGINES}."
-            )
-    def get_generation_url(self) -> str:
-        """Get generation URL."""
-        engine = getattr(self, "engine")
-        deployment_name = AZURE_DEPLOYMENT_NAME_MAPPING.get(engine, engine)
-        return (
-            self.host
-            + "/openai/deployments/"
-            + deployment_name
-            + "/completions?api-version=2023-05-15"
-        )
-    def get_generation_header(self) -> Dict[str, str]:
-        """
-        Get generation header.
-        Returns:
-            header.
-        """
-        return {"api-key": f"{self.api_key}"}
-    def get_model_params(self) -> Dict:
-        """
-        Get model params.
-        By getting model params from the server, we can add to request
-        and make sure cache keys are unique to model.
-        Returns:
-            model params.
-        """
-        # IMPORTANT!!!
-        # Azure models are the same as openai models. So we want to unify their
-        # cached. Make sure we retrun the OpenAI name here.
-        return {"model_name": OpenAIClient.NAME, "engine": getattr(self, "engine")}

duckdb-nsql/manifest/manifest/clients/azureopenai_chat.py DELETED Viewed

@@ -1,116 +0,0 @@
-"""Azure client."""
-import logging
-import os
-from typing import Any, Dict, Optional
-from manifest.clients.openai_chat import OPENAICHAT_ENGINES, OpenAIChatClient
-from manifest.request import LMRequest
-logger = logging.getLogger(__name__)
-# Azure deployment name can only use letters and numbers, no spaces. Hyphens ("-") and
-# underscores ("_") may be used, except as ending characters. We create this mapping to
-# handle difference between Azure and OpenAI
-AZURE_DEPLOYMENT_NAME_MAPPING = {
-    "gpt-3.5-turbo": "gpt-35-turbo",
-    "gpt-3.5-turbo-0301": "gpt-35-turbo-0301",
-}
-OPENAI_DEPLOYMENT_NAME_MAPPING = {
-    "gpt-35-turbo": "gpt-3.5-turbo",
-    "gpt-35-turbo-0301": "gpt-3.5-turbo-0301",
-}
-class AzureChatClient(OpenAIChatClient):
-    """Azure chat client."""
-    # User param -> (client param, default value)
-    PARAMS = OpenAIChatClient.PARAMS
-    REQUEST_CLS = LMRequest
-    NAME = "azureopenaichat"
-    IS_CHAT = True
-    def connect(
-        self,
-        connection_str: Optional[str] = None,
-        client_args: Dict[str, Any] = {},
-    ) -> None:
-        """
-        Connect to the AzureOpenAI server.
-        connection_str is passed as default AZURE_OPENAI_KEY if variable not set.
-        Args:
-            connection_str: connection string.
-            client_args: client arguments.
-        """
-        self.api_key, self.host = None, None
-        if connection_str:
-            connection_parts = connection_str.split("::")
-            if len(connection_parts) == 1:
-                self.api_key = connection_parts[0]
-            elif len(connection_parts) == 2:
-                self.api_key, self.host = connection_parts
-            else:
-                raise ValueError(
-                    "Invalid connection string. "
-                    "Must be either AZURE_OPENAI_KEY or "
-                    "AZURE_OPENAI_KEY::AZURE_OPENAI_ENDPOINT"
-                )
-        self.api_key = self.api_key or os.environ.get("AZURE_OPENAI_KEY")
-        if self.api_key is None:
-            raise ValueError(
-                "AzureOpenAI API key not set. Set AZURE_OPENAI_KEY environment "
-                "variable or pass through `client_connection`."
-            )
-        self.host = self.host or os.environ.get("AZURE_OPENAI_ENDPOINT")
-        if self.host is None:
-            raise ValueError(
-                "Azure Service URL not set "
-                "(e.g. https://openai-azure-service.openai.azure.com/)."
-                " Set AZURE_OPENAI_ENDPOINT or pass through `client_connection`."
-                " as AZURE_OPENAI_KEY::AZURE_OPENAI_ENDPOINT"
-            )
-        self.host = self.host.rstrip("/")
-        for key in self.PARAMS:
-            setattr(self, key, client_args.pop(key, self.PARAMS[key][1]))
-        if getattr(self, "engine") not in OPENAICHAT_ENGINES:
-            raise ValueError(
-                f"Invalid engine {getattr(self, 'engine')}. "
-                f"Must be {OPENAICHAT_ENGINES}."
-            )
-    def get_generation_url(self) -> str:
-        """Get generation URL."""
-        engine = getattr(self, "engine")
-        deployment_name = AZURE_DEPLOYMENT_NAME_MAPPING.get(engine, engine)
-        return (
-            self.host
-            + "/openai/deployments/"
-            + deployment_name
-            + "/chat/completions?api-version=2023-05-15"
-        )
-    def get_generation_header(self) -> Dict[str, str]:
-        """
-        Get generation header.
-        Returns:
-            header.
-        """
-        return {"api-key": f"{self.api_key}"}
-    def get_model_params(self) -> Dict:
-        """
-        Get model params.
-        By getting model params from the server, we can add to request
-        and make sure cache keys are unique to model.
-        Returns:
-            model params.
-        """
-        # IMPORTANT!!!
-        # Azure models are the same as openai models. So we want to unify their
-        # cached. Make sure we retrun the OpenAI name here.
-        return {"model_name": OpenAIChatClient.NAME, "engine": getattr(self, "engine")}

duckdb-nsql/manifest/manifest/clients/client.py DELETED Viewed

@@ -1,699 +0,0 @@
-"""Client class."""
-import asyncio
-import copy
-import json
-import logging
-import math
-from abc import ABC, abstractmethod
-from typing import Any, Dict, Generator, List, Optional, Tuple, Union, cast
-import aiohttp
-import requests
-import tqdm.asyncio
-from tenacity import RetryCallState, retry, stop_after_attempt, wait_random_exponential
-from manifest.request import (
-    DEFAULT_REQUEST_KEYS,
-    NOT_CACHE_KEYS,
-    LMChatRequest,
-    LMRequest,
-    LMScoreRequest,
-    Request,
-)
-from manifest.response import (
-    RESPONSE_CONSTRUCTORS,
-    ArrayModelChoice,
-    LMModelChoice,
-    ModelChoices,
-    Response,
-    Usage,
-    Usages,
-)
-logger = logging.getLogger(__name__)
-ATTEMPTS_BEFORE_STOP = 4
-ATTEMPTS_TIMEOUT = 30
-# http_status mainly for azure and e.code mainly for openai usage
-# e.http_status == 408 occurs when Azure times out
-# e.code == 429 rate lime
-# e.code == 500 or 502 occurs when server error
-API_ERROR_CODE = {408, 429, 500, 502, 520, 524}
-def retry_if_ratelimit(retry_base: RetryCallState) -> bool:
-    """Return whether to retry if ratelimited."""
-    try:
-        if isinstance(retry_base.outcome.exception(), requests.exceptions.HTTPError):
-            exception = cast(
-                requests.exceptions.HTTPError, retry_base.outcome.exception()
-            )
-            # 500 is a server error, 429 is a rate limit error
-            if exception.response.status_code in API_ERROR_CODE:  # type: ignore
-                return True
-    except Exception:
-        pass
-    return True
-def return_error_response(retry_state: RetryCallState) -> dict:
-    """Return error response if all retries failed."""
-    request_params = retry_state.args[1]
-    number_of_prompts = (
-        len(request_params["prompt"])
-        if "prompt" in request_params
-        else len(request_params["messages"])
-    )
-    return {
-        "choices": [],
-        "usage": {
-            "total_tokens": 0,
-            "prompt_tokens": 0,
-            "completion_tokens": 0,
-        },
-        "errors": [str(retry_state.outcome.exception())] * number_of_prompts,
-    }
-class Client(ABC):
-    """Client class."""
-    # Must be overridden by child class
-    PARAMS: Dict[str, Tuple[str, Any]] = {}
-    REQUEST_CLS = Request
-    NAME: str = None
-    IS_CHAT: bool = False
-    def __init__(
-        self, connection_str: Optional[str] = None, client_args: Dict[str, Any] = {}
-    ):
-        """
-        Initialize client.
-        kwargs are passed to client as default parameters.
-        For clients like OpenAI that do not require a connection,
-        the connection_str can be None.
-        Args:
-            connection_str: connection string for client.
-            client_args: client arguments.
-        """
-        self.connect(connection_str, client_args)
-    @abstractmethod
-    def connect(
-        self, connection_str: Optional[str], client_args: Dict[str, Any]
-    ) -> None:
-        """
-        Connect to client.
-        Override in child client class.
-        Args:
-            connection_str: connection string.
-        """
-        raise NotImplementedError()
-    @abstractmethod
-    def close(self) -> None:
-        """Close the client.
-        Override in child client class.
-        """
-        raise NotImplementedError()
-    @abstractmethod
-    def get_generation_url(self) -> str:
-        """Get generation URL.
-        Override in child client class.
-        """
-        raise NotImplementedError()
-    @abstractmethod
-    def get_generation_header(self) -> Dict[str, str]:
-        """
-        Get generation header.
-        Override in child client class.
-        Returns:
-            header.
-        """
-        raise NotImplementedError()
-    @abstractmethod
-    def supports_batch_inference(self) -> bool:
-        """Return whether the client supports batch inference.
-        Override in child client class.
-        """
-        raise NotImplementedError()
-    @abstractmethod
-    def supports_streaming_inference(self) -> bool:
-        """Return whether the client supports streaming inference.
-        Override in child client class.
-        """
-        raise NotImplementedError()
-    @abstractmethod
-    def get_model_params(self) -> Dict:
-        """
-        Get model params.
-        By getting model params from the server, we can add to request
-        and make sure cache keys are unique to model.
-        Override in child client class.
-        Returns:
-            model params.
-        """
-        raise NotImplementedError()
-    def get_tokenizer(self, model: str) -> Tuple[Any, int]:
-        """Get tokenizer for model.
-        Override in child client class. Return None, -1 if not supported
-        or no prompt truncation required.
-        Returns:
-            tokenizer: tokenizer with encoder and decode
-            max_length: max length of model
-        """
-        return None, -1
-    def get_model_inputs(self) -> List:
-        """
-        Get allowable model inputs.
-        Returns:
-            model inputs.
-        """
-        return list(self.PARAMS.keys())
-    def split_usage(self, request: Dict, choices: List[str]) -> List[Dict[str, int]]:
-        """Split usage into list of usages for each prompt."""
-        # TODO: add this in using default tokenizer
-        return []
-    def preprocess_request_params(self, request: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        Preprocess request params.
-        Override in child client class to reformat requests to model.
-        Args:
-            request: request params.
-        Returns:
-            request params.
-        """
-        return request
-    def postprocess_response(
-        self, response: Dict[str, Any], request: Dict[str, Any]
-    ) -> Dict[str, Any]:
-        """
-        Postprocess and validate response as dict.
-        Override in child client class to reform model responses.
-        Args:
-            response: response
-            request: request
-        Return:
-            response as dict
-        """
-        if "choices" not in response:
-            raise ValueError(f"Invalid response: {response}")
-        if "usage" in response:
-            # Handle splitting the usages for batch requests
-            if len(response["choices"]) == 1:
-                if isinstance(response["usage"], list):
-                    response["usage"] = response["usage"][0]
-                response["usage"] = [response["usage"]]
-            else:
-                # Try to split usage
-                split_usage = self.split_usage(request, response["choices"])
-                if split_usage:
-                    response["usage"] = split_usage
-        return response
-    def get_request(
-        self, prompt: Union[str, List[str]], request_args: Dict[str, Any]
-    ) -> Request:
-        """
-        Parse model kwargs to request.
-        Args:
-            prompt: prompt.
-            request_args: request arguments.
-        Returns:
-            request.
-        """
-        params = {"prompt": prompt}
-        # Adds default values from self.PARAMS if not in request_args
-        for key in self.PARAMS:
-            params[key] = request_args.pop(key, getattr(self, key))
-        # Allows for overriding DEFAULT_REQUEST_KEYS even if they are not
-        # in self.PARAMS. Note that DEFAULT_REQUEST_KEYS match the default
-        # values in Request.
-        for key in DEFAULT_REQUEST_KEYS:
-            if key not in params and key in request_args:
-                params[key] = request_args.pop(key)
-        return self.REQUEST_CLS(**params)  # type: ignore
-    def _get_request_params(self, request: Request) -> Dict[str, Any]:
-        """Get request params.
-        Add default keys that we need for requests such as batch_size.
-        We drop these before sending to the model.
-        """
-        params_to_add = DEFAULT_REQUEST_KEYS.copy()
-        # This will override DEFAULT_REQUEST_KEYS with those in PARAMS
-        params_to_add.update(self.PARAMS)
-        # to_dict will handle parameter renaming but not any
-        # default value handling - that is done in get_request()
-        request_params = request.to_dict(params_to_add)
-        return request_params
-    def get_cache_key(self, request: Request) -> Dict[str, Any]:
-        """Get cache key for request.
-        Skip keys that are not cache keys such as batch_size.
-        """
-        request_params = self._get_request_params(request)
-        for key in NOT_CACHE_KEYS:
-            request_params.pop(key, None)
-        # Make sure to add model params and request class
-        request_params.update(self.get_model_params())
-        request_params["request_cls"] = request.__class__.__name__
-        return request_params
-    def _split_requests(
-        self, request_params: Dict[str, Any], batch_size: int, key: str = "prompt"
-    ) -> List[Dict[str, Any]]:
-        """Split request into batch_sized request.
-        Args:
-            request_params: request params.
-            batch_size: batch size for requests.
-            key: key to batch over
-        Returns:
-            list of request params.
-        """
-        data = copy.deepcopy(request_params[key])
-        data_size = len(request_params[key])
-        request_params_list = []
-        for i in range(0, data_size, batch_size):
-            params = copy.deepcopy(request_params)
-            params[key] = data[i] if batch_size == 1 else data[i : i + batch_size]
-            request_params_list.append(params)
-        return request_params_list
-    def _get_model_choices(self, response: Dict) -> ModelChoices:
-        """Format response to ModelChoices."""
-        # Array or text response
-        response_type = RESPONSE_CONSTRUCTORS[self.REQUEST_CLS]["response_type"]
-        if response_type == "array":
-            choices: List[Union[LMModelChoice, ArrayModelChoice]] = [
-                ArrayModelChoice(**choice) for choice in response["choices"]
-            ]
-        else:
-            choices = [LMModelChoice(**choice) for choice in response["choices"]]
-        return ModelChoices(choices=choices)
-    def _stitch_responses(self, request: Request, responses: List[Dict]) -> Response:
-        """Stitch responses together.
-        Useful for batch requests.
-        """
-        choices = []
-        usages = []
-        for res_dict in responses:
-            choices.extend(res_dict["choices"])
-            if "usage" in res_dict:
-                usages.extend(res_dict["usage"])
-        final_response_dict = {"choices": choices}
-        final_usages = None
-        if usages:
-            final_usages = Usages(usages=[Usage(**usage) for usage in usages])
-        # TODO: Add usage based on tokenizer
-        return Response(
-            self._get_model_choices(final_response_dict),
-            cached=False,
-            request=request,
-            usages=final_usages,
-            **RESPONSE_CONSTRUCTORS[self.REQUEST_CLS],  # type: ignore
-        )
-    def _verify_request_lengths(
-        self, request: Dict[str, Any], model: str, max_tokens: int
-    ) -> None:
-        """Verify that the request length is not too long."""
-        encoder, max_length = self.get_tokenizer(model)
-        if not encoder or max_length < 0:
-            return
-        if isinstance(request["prompt"], str):
-            prompts = [request["prompt"]]
-        else:
-            prompts = request["prompt"]
-        for i in range(len(prompts)):
-            prompt = prompts[i]
-            encoded_prompt = encoder.encode(prompt)
-            if len(encoded_prompt) + max_tokens > max_length:
-                logger.warning(
-                    f"Prompt {prompt} is too long for model {model}. "
-                    "Truncating prompt from left."
-                )
-                # -20 to be safe
-                prompt = encoder.decode(
-                    encoded_prompt[-int(max_length - max_tokens - 20) :]
-                )
-                prompts[i] = prompt
-        if isinstance(request["prompt"], str):
-            request["prompt"] = prompts[0]
-        else:
-            request["prompt"] = prompts
-    @retry(
-        reraise=True,
-        wait=wait_random_exponential(min=1, max=ATTEMPTS_TIMEOUT),
-        stop=stop_after_attempt(ATTEMPTS_BEFORE_STOP),
-    )
-    def _run_completion(
-        self, request_params: Dict[str, Any], retry_timeout: int
-    ) -> Dict:
-        """Execute completion request.
-        Args:
-            request_params: request params.
-            retry_timeout: retry timeout.
-        Returns:
-            response as dict.
-        """
-        request_params = self.preprocess_request_params(request_params)
-        print(request_params)
-        post_str = self.get_generation_url()
-        res = requests.post(
-            post_str,
-            headers=self.get_generation_header(),
-            json=request_params,
-            timeout=retry_timeout,
-        )
-        try:
-            res.raise_for_status()
-        except requests.exceptions.HTTPError as e:
-            logger.warning(
-                str(e)
-            )
-            raise Exception()
-        return self.postprocess_response(res.json(), request_params)
-    @retry(
-        reraise=True,
-        retry=retry_if_ratelimit,
-        wait=wait_random_exponential(min=1, max=ATTEMPTS_TIMEOUT),
-        stop=stop_after_attempt(ATTEMPTS_BEFORE_STOP),
-    )
-    async def _arun_completion(
-        self, request_params: Dict[str, Any], retry_timeout: int
-    ) -> Dict:
-        """Async execute completion request.
-        Args:
-            request_params: request params.
-            retry_timeout: retry timeout.
-        Returns:
-            response as dict.
-        """
-        request_params = self.preprocess_request_params(request_params)
-        post_str = self.get_generation_url()
-        async with aiohttp.ClientSession(timeout=retry_timeout) as session:
-            async with session.post(
-                post_str,
-                headers=self.get_generation_header(),
-                json=request_params,
-                timeout=retry_timeout,
-            ) as res:
-                res.raise_for_status()
-                res_json = await res.json(content_type=None)
-                return self.postprocess_response(res_json, request_params)
-    @retry(
-        reraise=True,
-        retry=retry_if_ratelimit,
-        wait=wait_random_exponential(min=1, max=ATTEMPTS_TIMEOUT),
-        stop=stop_after_attempt(ATTEMPTS_BEFORE_STOP),
-    )
-    def _run_streaming_completion(
-        self, request_params: Dict[str, Any], retry_timeout: int
-    ) -> Generator[Dict, None, None]:
-        """Execute completion request streaming.
-        Args:
-            request_params: request params.
-            retry_timeout: retry timeout.
-        Returns:
-            response as dict.
-        """
-        request_params = self.preprocess_request_params(request_params)
-        request_params["stream"] = True
-        post_str = self.get_generation_url()
-        res_iter = requests.post(
-            post_str,
-            headers=self.get_generation_header(),
-            json=request_params,
-            timeout=retry_timeout,
-            stream=True,
-        )
-        for res_token in res_iter.iter_lines():
-            if res_token:
-                decoded_res_token = res_token.decode("utf-8")
-                decoded_res_token = decoded_res_token.replace("data: ", "")
-                if decoded_res_token == "[DONE]":
-                    break
-                try:
-                    decoded_res_token_dct = json.loads(decoded_res_token)
-                    postprocess_res_token_dct = self.postprocess_response(
-                        decoded_res_token_dct, request_params
-                    )
-                    # If nothing is returned, skip
-                    if (
-                        not postprocess_res_token_dct
-                        or not postprocess_res_token_dct["choices"]
-                    ):
-                        continue
-                    yield postprocess_res_token_dct
-                except Exception as e:
-                    raise e
-    def run_request(self, request: Request) -> Response:
-        """
-        Run request.
-        Args:
-            request: request.
-        Returns:
-            response.
-        """
-        # Make everything list for consistency
-        if isinstance(request.prompt, list):
-            prompt_list = request.prompt
-        else:
-            prompt_list = [request.prompt]
-        request_params = self._get_request_params(request)
-        # Set the params as a list. Do not set the request
-        # object itself as the cache will then store it as a
-        # list which is inconsistent with the request input.
-        request_params["prompt"] = prompt_list
-        # If batch_size is not set, set it to 1
-        batch_size = request_params.pop("batch_size") or 1
-        if not self.supports_batch_inference() and batch_size != 1:
-            logger.warning(
-                f"{self.__class__.__name__} does not support batch inference."
-                " Setting batch size to 1"
-            )
-            batch_size = 1
-        # Take the default keys we need and drop the rest as they
-        # are not part of the model request.
-        retry_timeout = request_params.pop("client_timeout")
-        for key in DEFAULT_REQUEST_KEYS:
-            request_params.pop(key, None)
-        # Make sure requests are in the request length
-        # If no tokenizer is set or not LM request, this
-        # will do nothing
-        if isinstance(request, LMRequest):
-            self._verify_request_lengths(
-                request_params, model=request.engine, max_tokens=request.max_tokens
-            )
-        # Batch requests
-        num_batches = len(prompt_list) // batch_size
-        if len(prompt_list) % batch_size != 0:
-            batch_size = int(math.ceil(len(prompt_list) / (num_batches + 1)))
-        request_batches = self._split_requests(request_params, batch_size)
-        response_dicts = [
-            self._run_completion(batch, retry_timeout) for batch in request_batches
-        ]
-        # Flatten responses
-        return self._stitch_responses(request, response_dicts)
-    async def arun_batch_request(
-        self, request: Request, verbose: bool = False
-    ) -> Response:
-        """
-        Run async request.
-        Args:
-            request: request.s
-        Returns:
-            response.
-        """
-        required_batch_size = None
-        if not self.supports_batch_inference():
-            required_batch_size = 1
-        if not isinstance(request.prompt, list):
-            raise AssertionError(
-                "request.prompt must be a list for async batch inference."
-            )
-        request_params = self._get_request_params(request)
-        # Take the default keys we need and drop the rest as they
-        # are not part of the model request.
-        retry_timeout = request_params.pop("client_timeout")
-        batch_size = request_params.pop("batch_size")
-        batch_size = required_batch_size or batch_size
-        for key in DEFAULT_REQUEST_KEYS:
-            request_params.pop(key, None)
-        # Make sure requests are in the request length
-        # If no tokenizer is set or not LM request, this
-        # will do nothing
-        if isinstance(request, LMRequest):
-            self._verify_request_lengths(
-                request_params, model=request.engine, max_tokens=request.max_tokens
-            )
-        # Batch requests
-        num_batches = len(request.prompt) // batch_size
-        if len(request.prompt) % batch_size != 0:
-            batch_size = int(math.ceil(len(request.prompt) / (num_batches + 1)))
-        request_batches = self._split_requests(request_params, batch_size)
-        all_tasks = [
-            asyncio.create_task(self._arun_completion(batch, retry_timeout))
-            for batch in request_batches
-        ]
-        responses = await tqdm.asyncio.tqdm.gather(*all_tasks, disable=not verbose)
-        # Flatten responses
-        return self._stitch_responses(request, responses)
-    def run_chat_request(
-        self,
-        request: LMChatRequest,
-    ) -> Response:
-        """
-        Get the response from chat model.
-        Args:
-            request: request.
-        Returns:
-            response.
-        """
-        request_params = self._get_request_params(request)
-        # Take the default keys we need and drop the rest as they
-        # are not part of the model request.
-        retry_timeout = request_params.pop("client_timeout")
-        for key in DEFAULT_REQUEST_KEYS:
-            request_params.pop(key, None)
-        # Make sure requests are in the request length
-        # If no tokenizer is set or not LM request, this
-        # will do nothing
-        self._verify_request_lengths(
-            request_params, model=request.engine, max_tokens=request.max_tokens
-        )
-        response_dict = self._run_completion(request_params, retry_timeout)
-        usages = None
-        if "usage" in response_dict:
-            usages = [Usage(**usage) for usage in response_dict["usage"]]
-        return Response(
-            response=self._get_model_choices(response_dict),
-            cached=False,
-            request=request,
-            usages=Usages(usages=usages) if usages else None,
-            **RESPONSE_CONSTRUCTORS[LMChatRequest],  # type: ignore
-        )
-    def run_streaming_request(
-        self, request: Request
-    ) -> Generator[Response, None, None]:
-        """
-        Run streaming request.
-        Args:
-            request: request.
-        Returns:
-            response.
-        """
-        if not isinstance(request.prompt, str):
-            raise ValueError("Streaming requests must have a single prompt.")
-        if not self.supports_streaming_inference():
-            raise ValueError(
-                f"{self.__class__.__name__} does not support streaming inference."
-            )
-        request_params = self._get_request_params(request)
-        # Take the default keys we need and drop the rest as they
-        # are not part of the model request.
-        retry_timeout = request_params.pop("client_timeout")
-        for key in DEFAULT_REQUEST_KEYS:
-            request_params.pop(key, None)
-        # Make sure requests are in the request length
-        # If no tokenizer is set or not LM request, this
-        # will do nothing
-        if isinstance(request, LMRequest):
-            self._verify_request_lengths(
-                request_params, model=request.engine, max_tokens=request.max_tokens
-            )
-        for token_response in self._run_streaming_completion(
-            request_params, retry_timeout
-        ):
-            yield self._stitch_responses(request, [token_response])
-    def run_score_prompt_request(
-        self,
-        request: LMScoreRequest,
-    ) -> Response:
-        """
-        Get the logit score of the prompt via a forward pass of the model.
-        Args:
-            request: request.
-        Returns:
-            response.
-        """
-        raise NotImplementedError(
-            f"{self.__class__.__name__} does not support prompt scoring request."
-        )

duckdb-nsql/manifest/manifest/clients/cohere.py DELETED Viewed

@@ -1,125 +0,0 @@
-"""Cohere client."""
-import logging
-import os
-from typing import Any, Dict, Optional
-from manifest.clients.client import Client
-from manifest.request import LMRequest
-logger = logging.getLogger(__name__)
-COHERE_MODELS = {"small", "medium", "large", "xlarge"}
-class CohereClient(Client):
-    """Cohere client."""
-    # Params are defined in https://docs.cohere.ai/generate-reference
-    PARAMS = {
-        "engine": ("model", "xlarge"),
-        "max_tokens": ("max_tokens", 20),
-        "temperature": ("temperature", 0.75),
-        "n": ("num_generations", 1),
-        "top_k": ("k", 0),
-        "top_p": ("p", 0.75),
-        "frequency_penalty": ("frequency_penalty", 0.0),
-        "presence_penalty": ("presence_penalty", 0.0),
-        "stop_sequences": ("stop_sequences", None),
-    }
-    REQUEST_CLS = LMRequest
-    NAME = "cohere"
-    def connect(
-        self,
-        connection_str: Optional[str] = None,
-        client_args: Dict[str, Any] = {},
-    ) -> None:
-        """
-        Connect to the Cohere server.
-        connection_str is passed as default COHERE_API_KEY if variable not set.
-        Args:
-            connection_str: connection string.
-            client_args: client arguments.
-        """
-        self.api_key = connection_str or os.environ.get("COHERE_API_KEY")
-        if self.api_key is None:
-            raise ValueError(
-                "Cohere API key not set. Set COHERE_API_KEY environment "
-                "variable or pass through `client_connection`."
-            )
-        self.host = "https://api.cohere.ai"
-        for key in self.PARAMS:
-            setattr(self, key, client_args.pop(key, self.PARAMS[key][1]))
-        if getattr(self, "engine") not in COHERE_MODELS:
-            raise ValueError(
-                f"Invalid engine {getattr(self, 'engine')}. Must be {COHERE_MODELS}."
-            )
-    def close(self) -> None:
-        """Close the client."""
-    def get_generation_url(self) -> str:
-        """Get generation URL."""
-        return self.host + "/generate"
-    def get_generation_header(self) -> Dict[str, str]:
-        """
-        Get generation header.
-        Returns:
-            header.
-        """
-        return {
-            "Cohere-Version": "2021-11-08",
-            "Authorization": f"Bearer {self.api_key}",
-        }
-    def supports_batch_inference(self) -> bool:
-        """Return whether the client supports batch inference."""
-        return False
-    def supports_streaming_inference(self) -> bool:
-        """Return whether the client supports streaming inference.
-        Override in child client class.
-        """
-        return False
-    def get_model_params(self) -> Dict:
-        """
-        Get model params.
-        By getting model params from the server, we can add to request
-        and make sure cache keys are unique to model.
-        Returns:
-            model params.
-        """
-        return {"model_name": self.NAME, "engine": getattr(self, "engine")}
-    def postprocess_response(self, response: Dict, request: Dict) -> Dict[str, Any]:
-        """
-        Format response to dict.
-        Args:
-            response: response
-            request: request
-        Return:
-            response as dict
-        """
-        return {
-            "object": "text_completion",
-            "model": getattr(self, "engine"),
-            "choices": [
-                {
-                    "text": item["text"],
-                    "text_logprob": item.get("likelihood", None),
-                    "token_logprobs": item.get("token_likelihoods", None),
-                }
-                for item in response["generations"]
-            ],
-        }

duckdb-nsql/manifest/manifest/clients/diffuser.py DELETED Viewed

@@ -1,112 +0,0 @@
-"""Diffuser client."""
-import logging
-from functools import lru_cache
-from typing import Any, Dict, Optional
-import numpy as np
-import requests
-from manifest.clients.client import Client
-from manifest.request import DiffusionRequest
-logger = logging.getLogger(__name__)
-class DiffuserClient(Client):
-    """Diffuser client."""
-    # User param -> (client param, default value)
-    PARAMS = {
-        "num_inference_steps": ("num_inference_steps", 50),
-        "height": ("height", 512),
-        "width": ("width", 512),
-        "n": ("num_images_per_prompt", 1),
-        "guidance_scale": ("guidance_scale", 7.5),
-        "eta": ("eta", 0.0),
-    }
-    REQUEST_CLS = DiffusionRequest
-    NAME = "diffuser"
-    def connect(
-        self,
-        connection_str: Optional[str] = None,
-        client_args: Dict[str, Any] = {},
-    ) -> None:
-        """
-        Connect to the Diffuser url.
-        Arsg:
-            connection_str: connection string.
-            client_args: client arguments.
-        """
-        self.host = connection_str.rstrip("/")
-        for key in self.PARAMS:
-            setattr(self, key, client_args.pop(key, self.PARAMS[key][1]))
-        self.model_params = self.get_model_params()
-    def to_numpy(self, image: np.ndarray) -> np.ndarray:
-        """Convert a numpy image to a PIL image.
-        Adapted from https://github.com/huggingface/diffusers/blob/src/diffusers/pipelines/pipeline_utils.py#L808   # noqa: E501
-        """
-        image = (image * 255).round().astype("uint8")
-        return image
-    def close(self) -> None:
-        """Close the client."""
-        pass
-    def get_generation_url(self) -> str:
-        """Get generation URL."""
-        return self.host + "/completions"
-    def get_generation_header(self) -> Dict[str, str]:
-        """
-        Get generation header.
-        Returns:
-            header.
-        """
-        return {}
-    def supports_batch_inference(self) -> bool:
-        """Return whether the client supports batch inference."""
-        return True
-    def supports_streaming_inference(self) -> bool:
-        """Return whether the client supports streaming inference.
-        Override in child client class.
-        """
-        return False
-    @lru_cache(maxsize=1)
-    def get_model_params(self) -> Dict:
-        """
-        Get model params.
-        By getting model params from the server, we can add to request
-        and make sure cache keys are unique to model.
-        Returns:
-            model params.
-        """
-        res = requests.post(self.host + "/params").json()
-        res["client_name"] = self.NAME
-        return res
-    def postprocess_response(self, response: Dict, request: Dict) -> Dict[str, Any]:
-        """
-        Format response to dict.
-        Args:
-            response: response
-            request: request
-        Return:
-            response as dict
-        """
-        # Convert array to np.array
-        for choice in response["choices"]:
-            choice["array"] = self.to_numpy(np.array(choice["array"]))
-        return response

duckdb-nsql/manifest/manifest/clients/dummy.py DELETED Viewed

@@ -1,251 +0,0 @@
-"""Dummy client."""
-import hashlib
-import logging
-from typing import Any, Dict, List, Optional, Tuple
-import numpy as np
-import tiktoken
-from manifest.clients.client import Client
-from manifest.request import LMChatRequest, LMRequest, LMScoreRequest, Request
-from manifest.response import LMModelChoice, ModelChoices, Response, Usage, Usages
-logger = logging.getLogger(__name__)
-class DummyClient(Client):
-    """Dummy client."""
-    # User param -> (client param, default value)
-    PARAMS = {
-        "engine": ("model", "text-davinci-003"),
-        "temperature": ("temperature", 0.0),
-        "max_tokens": ("max_tokens", 10),
-        "n": ("n", 1),
-        "top_p": ("top_p", 1.0),
-        "top_k": ("best_of", 1),
-        "batch_size": ("batch_size", 20),
-    }
-    REQUEST_CLS = LMRequest
-    NAME = "dummy"
-    def connect(
-        self,
-        connection_str: Optional[str] = None,
-        client_args: Dict[str, Any] = {},
-    ) -> None:
-        """
-        Connect to dummpy server.
-        This is a dummy client that returns identity responses. Used for testing.
-        Args:
-            connection_str: connection string.
-            client_args: client arguments.
-        """
-        # We tiktoken as it is faster than HF for tokenizing
-        # Use any model to create the tokenizer
-        self.encoder = tiktoken.get_encoding("cl100k_base")
-        for key in self.PARAMS:
-            setattr(self, key, client_args.pop(key, self.PARAMS[key][1]))
-    def close(self) -> None:
-        """Close the client."""
-        pass
-    def get_generation_url(self) -> str:
-        """Get generation URL."""
-        return "dummy"
-    def supports_batch_inference(self) -> bool:
-        """Return whether the client supports batch inference."""
-        return True
-    def supports_streaming_inference(self) -> bool:
-        """Return whether the client supports streaming inference.
-        Override in child client class.
-        """
-        return False
-    def get_generation_header(self) -> Dict[str, str]:
-        """
-        Get generation header.
-        Returns:
-            header.
-        """
-        return {}
-    def get_model_params(self) -> Dict:
-        """
-        Get model params.
-        By getting model params from the server, we can add to request
-        and make sure cache keys are unique to model.
-        Returns:
-            model params.
-        """
-        return {"engine": "dummy", "model": getattr(self, "engine")}
-    def get_mock_output(
-        self, output_toks: int, is_completion: bool, seed: Optional[int] = None
-    ) -> LMModelChoice:
-        """Return mock model output by generating random tokens."""
-        np.random.seed(seed)
-        random_tokens = np.random.randint(
-            0, self.encoder.max_token_value + 1, output_toks
-        )
-        response = self.encoder.decode(random_tokens)  # type: ignore
-        if is_completion:
-            np.random.seed(seed)
-            random_logprobs = np.random.uniform(
-                low=-2, high=-0.00001, size=output_toks
-            ).tolist()
-        else:
-            # Return all Nones to mimic chat models
-            # OpenAI chat models do not return logprobs
-            random_logprobs = [None] * output_toks
-        return LMModelChoice(
-            text=response,
-            token_logprobs=random_logprobs,
-            tokens=random_tokens.tolist(),
-        )
-    def get_mock_choices(
-        self,
-        prompt_list: List[str],
-        request_params: Dict,
-        is_completion: bool,
-    ) -> Tuple[List[LMModelChoice], List[Usage]]:
-        """Get choices and usages of mock output."""
-        choices = []
-        usages = []
-        for prompt in prompt_list:
-            num_prompt_tokens = len(self.encoder.encode(prompt))
-            if request_params["temperature"] == 0:
-                # Get integer seed from hash of prompt
-                seed = (
-                    int(hashlib.sha256(prompt.encode("utf-8")).hexdigest(), 16)
-                    % 10**8
-                )
-            else:
-                # Get random seed
-                seed = None
-            for _ in range(int(request_params["n"])):
-                choice = self.get_mock_output(
-                    request_params["max_tokens"], is_completion=is_completion, seed=seed
-                )
-                choices.append(choice)
-                usages.append(
-                    Usage(
-                        prompt_tokens=num_prompt_tokens,
-                        completion_tokens=request_params["max_tokens"],
-                        total_tokens=num_prompt_tokens + request_params["max_tokens"],
-                    )
-                )
-        return choices, usages
-    def run_request(self, request: Request) -> Response:
-        """
-        Get request string function.
-        Args:
-            request: request.
-        Returns:
-            request function that takes no input.
-            request parameters as dict.
-        """
-        if isinstance(request.prompt, list):
-            prompt_list = request.prompt
-        else:
-            prompt_list = [request.prompt]
-        request_params = request.to_dict(self.PARAMS)
-        choices, usages = self.get_mock_choices(
-            prompt_list, request_params, is_completion=True
-        )
-        return Response(
-            response=ModelChoices(choices=choices),  # type: ignore
-            cached=False,
-            request=request,
-            usages=Usages(usages=usages),
-            response_type="text",
-            request_type=self.REQUEST_CLS,
-        )
-    async def arun_batch_request(
-        self, request: Request, verbose: bool = False
-    ) -> Response:
-        """
-        Get async request string function.
-        Args:
-            request: request.
-        Returns:
-            response.
-        """
-        return self.run_request(request)
-    def run_chat_request(
-        self,
-        request: LMChatRequest,
-    ) -> Response:
-        """
-        Get the response from chat model.
-        Args:
-            request: request.
-        Returns:
-            response.
-        """
-        prompt_list = ["_".join(pmp["content"] for pmp in request.prompt)]
-        request_params = request.to_dict(self.PARAMS)
-        choices, usages = self.get_mock_choices(
-            prompt_list, request_params, is_completion=False
-        )
-        return Response(
-            response=ModelChoices(choices=choices),  # type: ignore
-            cached=False,
-            request=request,
-            usages=Usages(usages=usages),
-            response_type="text",
-            request_type=LMChatRequest,
-        )
-    def run_score_prompt_request(
-        self,
-        request: LMScoreRequest,
-    ) -> Response:
-        """
-        Get the logit score of the prompt via a forward pass of the model.
-        Args:
-            request: request.
-        Returns:
-            request function that takes no input.
-            request parameters as dict.
-        """
-        if isinstance(request.prompt, list):
-            prompt_list = request.prompt
-        else:
-            prompt_list = [request.prompt]
-        request_params = request.to_dict(self.PARAMS)
-        choices, usages = self.get_mock_choices(
-            prompt_list, request_params, is_completion=True
-        )
-        return Response(
-            response=ModelChoices(choices=choices),  # type: ignore
-            cached=False,
-            request=request,
-            usages=Usages(usages=usages),
-            response_type="text",
-            request_type=LMScoreRequest,
-        )

duckdb-nsql/manifest/manifest/clients/google.py DELETED Viewed

@@ -1,197 +0,0 @@
-"""Google client."""
-import logging
-import os
-import subprocess
-from typing import Any, Dict, Optional, Type
-from manifest.clients.client import Client
-from manifest.request import LMRequest, Request
-logger = logging.getLogger(__name__)
-# https://cloud.google.com/vertex-ai/docs/generative-ai/start/quickstarts/api-quickstart
-GOOGLE_ENGINES = {
-    "text-bison",
-}
-def get_project_id() -> Optional[str]:
-    """Get project ID.
-    Run
-    `gcloud config get-value project`
-    """
-    try:
-        project_id = subprocess.run(
-            ["gcloud", "config", "get-value", "project"],
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-        )
-        if project_id.stderr.decode("utf-8").strip():
-            return None
-        return project_id.stdout.decode("utf-8").strip()
-    except Exception:
-        return None
-class GoogleClient(Client):
-    """Google client."""
-    # User param -> (client param, default value)
-    PARAMS = {
-        "engine": ("model", "text-bison"),
-        "temperature": ("temperature", 1.0),
-        "max_tokens": ("maxOutputTokens", 10),
-        "top_p": ("topP", 1.0),
-        "top_k": ("topK", 1),
-        "batch_size": ("batch_size", 20),
-    }
-    REQUEST_CLS: Type[Request] = LMRequest
-    NAME = "google"
-    def connect(
-        self,
-        connection_str: Optional[str] = None,
-        client_args: Dict[str, Any] = {},
-    ) -> None:
-        """
-        Connect to the GoogleVertex API.
-        connection_str is passed as default GOOGLE_API_KEY if variable not set.
-        Args:
-            connection_str: connection string.
-            client_args: client arguments.
-        """
-        connection_parts = connection_str.split("::")
-        if len(connection_parts) == 1:
-            self.api_key = connection_parts[0]
-            self.project_id = None
-        elif len(connection_parts) == 2:
-            self.api_key, self.project_id = connection_parts
-        else:
-            raise ValueError(
-                "Invalid connection string. "
-                "Must be either API_KEY or API_KEY::PROJECT_ID"
-            )
-        self.api_key = self.api_key or os.environ.get("GOOGLE_API_KEY")
-        if self.api_key is None:
-            raise ValueError(
-                "GoogleVertex API key not set. Set GOOGLE_API_KEY environment "
-                "variable or pass through `client_connection`. This can be "
-                "found by running `gcloud auth print-access-token`"
-            )
-        self.project_id = (
-            self.project_id or os.environ.get("GOOGLE_PROJECT_ID") or get_project_id()
-        )
-        if self.project_id is None:
-            raise ValueError("GoogleVertex project ID not set. Set GOOGLE_PROJECT_ID")
-        self.host = f"https://us-central1-aiplatform.googleapis.com/v1/projects/{self.project_id}/locations/us-central1/publishers/google/models"  # noqa: E501
-        for key in self.PARAMS:
-            setattr(self, key, client_args.pop(key, self.PARAMS[key][1]))
-        if getattr(self, "engine") not in GOOGLE_ENGINES:
-            raise ValueError(
-                f"Invalid engine {getattr(self, 'engine')}. Must be {GOOGLE_ENGINES}."
-            )
-    def close(self) -> None:
-        """Close the client."""
-        pass
-    def get_generation_url(self) -> str:
-        """Get generation URL."""
-        model = getattr(self, "engine")
-        return self.host + f"/{model}:predict"
-    def get_generation_header(self) -> Dict[str, str]:
-        """
-        Get generation header.
-        Returns:
-            header.
-        """
-        return {"Authorization": f"Bearer {self.api_key}"}
-    def supports_batch_inference(self) -> bool:
-        """Return whether the client supports batch inference."""
-        return True
-    def supports_streaming_inference(self) -> bool:
-        """Return whether the client supports streaming inference.
-        Override in child client class.
-        """
-        return False
-    def get_model_params(self) -> Dict:
-        """
-        Get model params.
-        By getting model params from the server, we can add to request
-        and make sure cache keys are unique to model.
-        Returns:
-            model params.
-        """
-        return {"model_name": self.NAME, "engine": getattr(self, "engine")}
-    def preprocess_request_params(self, request: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        Preprocess request params.
-        Args:
-            request: request params.
-        Returns:
-            request params.
-        """
-        # Refortmat the request params for google
-        prompt = request.pop("prompt")
-        if isinstance(prompt, str):
-            prompt_list = [prompt]
-        else:
-            prompt_list = prompt
-        google_request = {
-            "instances": [{"prompt": prompt} for prompt in prompt_list],
-            "parameters": request,
-        }
-        return super().preprocess_request_params(google_request)
-    def postprocess_response(
-        self, response: Dict[str, Any], request: Dict[str, Any]
-    ) -> Dict[str, Any]:
-        """
-        Validate response as dict.
-        Assumes response is dict
-        {
-            "predictions": [
-                {
-                    "safetyAttributes": {
-                        "categories": ["Violent", "Sexual"],
-                        "blocked": false,
-                        "scores": [0.1, 0.1]
-                    },
-                    "content": "SELECT * FROM "WWW";"
-                }
-            ]
-        }
-        Args:
-            response: response
-            request: request
-        Return:
-            response as dict
-        """
-        google_predictions = response.pop("predictions")
-        new_response = {
-            "choices": [
-                {
-                    "text": prediction["content"],
-                }
-                for prediction in google_predictions
-            ]
-        }
-        return super().postprocess_response(new_response, request)

duckdb-nsql/manifest/manifest/clients/google_chat.py DELETED Viewed

@@ -1,155 +0,0 @@
-"""Google client."""
-import copy
-import logging
-import os
-from typing import Any, Dict, Optional, Type
-from manifest.clients.google import GoogleClient, get_project_id
-from manifest.request import LMRequest, Request
-logger = logging.getLogger(__name__)
-# https://cloud.google.com/vertex-ai/docs/generative-ai/start/quickstarts/api-quickstart
-GOOGLE_ENGINES = {
-    "chat-bison",
-}
-class GoogleChatClient(GoogleClient):
-    """GoogleChat client."""
-    # User param -> (client param, default value)
-    PARAMS = {
-        "engine": ("model", "chat-bison"),
-        "temperature": ("temperature", 1.0),
-        "max_tokens": ("maxOutputTokens", 10),
-        "top_p": ("topP", 1.0),
-        "top_k": ("topK", 1),
-        "batch_size": ("batch_size", 20),
-    }
-    REQUEST_CLS: Type[Request] = LMRequest
-    NAME = "googlechat"
-    IS_CHAT = True
-    def connect(
-        self,
-        connection_str: Optional[str] = None,
-        client_args: Dict[str, Any] = {},
-    ) -> None:
-        """
-        Connect to the GoogleVertex API.
-        connection_str is passed as default GOOGLE_API_KEY if variable not set.
-        Args:
-            connection_str: connection string.
-            client_args: client arguments.
-        """
-        connection_parts = connection_str.split("::")
-        if len(connection_parts) == 1:
-            self.api_key = connection_parts[0]
-        elif len(connection_parts) == 2:
-            self.api_key, self.project_id = connection_parts
-        else:
-            raise ValueError(
-                "Invalid connection string. "
-                "Must be either API_KEY or API_KEY::PROJECT_ID"
-            )
-        self.api_key = self.api_key or os.environ.get("GOOGLE_API_KEY")
-        if self.api_key is None:
-            raise ValueError(
-                "GoogleVertex API key not set. Set GOOGLE_API_KEY environment "
-                "variable or pass through `client_connection`. This can be "
-                "found by running `gcloud auth print-access-token`"
-            )
-        self.project_id = (
-            self.project_id or os.environ.get("GOOGLE_PROJECT_ID") or get_project_id()
-        )
-        if self.project_id is None:
-            raise ValueError("GoogleVertex project ID not set. Set GOOGLE_PROJECT_ID")
-        self.host = f"https://us-central1-aiplatform.googleapis.com/v1/projects/{self.project_id}/locations/us-central1/publishers/google/models"  # noqa: E501
-        for key in self.PARAMS:
-            setattr(self, key, client_args.pop(key, self.PARAMS[key][1]))
-        if getattr(self, "engine") not in GOOGLE_ENGINES:
-            raise ValueError(
-                f"Invalid engine {getattr(self, 'engine')}. Must be {GOOGLE_ENGINES}."
-            )
-    def supports_batch_inference(self) -> bool:
-        """Return whether the client supports batch inference."""
-        return False
-    def preprocess_request_params(self, request: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        Preprocess request params.
-        Args:
-            request: request params.
-        Returns:
-            request params.
-        """
-        # Format for chat model
-        request = copy.deepcopy(request)
-        prompt = request.pop("prompt")
-        if isinstance(prompt, str):
-            messages = [{"author": "user", "content": prompt}]
-        elif isinstance(prompt, list) and isinstance(prompt[0], str):
-            prompt_list = prompt
-            messages = [{"author": "user", "content": prompt} for prompt in prompt_list]
-        elif isinstance(prompt, list) and isinstance(prompt[0], dict):
-            for pmt_dict in prompt:
-                if "author" not in pmt_dict or "content" not in pmt_dict:
-                    raise ValueError(
-                        "Prompt must be list of dicts with 'author' and 'content' "
-                        f"keys. Got {prompt}."
-                    )
-            messages = prompt
-        else:
-            raise ValueError(
-                "Prompt must be string, list of strings, or list of dicts."
-                f"Got {prompt}"
-            )
-        new_request = {
-            "instances": [{"messages": messages}],
-            "parameters": request,
-        }
-        return super(GoogleClient, self).preprocess_request_params(new_request)
-    def postprocess_response(self, response: Dict, request: Dict) -> Dict[str, Any]:
-        """
-        Validate response as dict.
-        Assumes response is dict
-        {
-            "candidates": [
-                {
-                    "safetyAttributes": {
-                        "categories": ["Violent", "Sexual"],
-                        "blocked": false,
-                        "scores": [0.1, 0.1]
-                    },
-                    "author": "1",
-                    "content": "SELECT * FROM "WWW";"
-                }
-            ]
-        }
-        Args:
-            response: response
-            request: request
-        Return:
-            response as dict
-        """
-        google_predictions = response.pop("predictions")
-        new_response = {
-            "choices": [
-                {
-                    "text": prediction["candidates"][0]["content"],
-                }
-                for prediction in google_predictions
-            ]
-        }
-        return super(GoogleClient, self).postprocess_response(new_response, request)

duckdb-nsql/manifest/manifest/clients/huggingface.py DELETED Viewed

@@ -1,137 +0,0 @@
-"""Hugging Face client."""
-import logging
-from functools import lru_cache
-from typing import Any, Dict, Optional
-import requests
-from manifest.clients.client import Client
-from manifest.request import DEFAULT_REQUEST_KEYS, LMRequest, LMScoreRequest
-from manifest.response import LMModelChoice, ModelChoices, Response
-logger = logging.getLogger(__name__)
-class HuggingFaceClient(Client):
-    """HuggingFace client."""
-    # User param -> (client param, default value)
-    PARAMS = {
-        "temperature": ("temperature", 0.1),
-        "max_tokens": ("max_tokens", 10),
-        "n": ("n", 1),
-        "top_p": ("top_p", 1.0),
-        "top_k": ("top_k", 50),
-        "repetition_penalty": ("repetition_penalty", 1.0),
-        "do_sample": ("do_sample", True),
-    }
-    REQUEST_CLS = LMRequest
-    NAME = "huggingface"
-    def connect(
-        self,
-        connection_str: Optional[str] = None,
-        client_args: Dict[str, Any] = {},
-    ) -> None:
-        """
-        Connect to the HuggingFace url.
-        Arsg:
-            connection_str: connection string.
-            client_args: client arguments.
-        """
-        if not connection_str:
-            raise ValueError("Must provide connection string")
-        self.host = connection_str.rstrip("/")
-        for key in self.PARAMS:
-            setattr(self, key, client_args.pop(key, self.PARAMS[key][1]))
-    def close(self) -> None:
-        """Close the client."""
-        pass
-    def get_generation_url(self) -> str:
-        """Get generation URL."""
-        return self.host + "/completions"
-    def get_generation_header(self) -> Dict[str, str]:
-        """
-        Get generation header.
-        Returns:
-            header.
-        """
-        return {}
-    def supports_batch_inference(self) -> bool:
-        """Return whether the client supports batch inference."""
-        return True
-    def supports_streaming_inference(self) -> bool:
-        """Return whether the client supports streaming inference.
-        Override in child client class.
-        """
-        return False
-    @lru_cache(maxsize=1)
-    def get_model_params(self) -> Dict:
-        """
-        Get model params.
-        By getting model params from the server, we can add to request
-        and make sure cache keys are unique to model.
-        Returns:
-            model params.
-        """
-        res = requests.post(self.host + "/params").json()
-        res["client_name"] = self.NAME
-        return res
-    def run_score_prompt_request(
-        self,
-        request: LMScoreRequest,
-    ) -> Response:
-        """
-        Get the logit score of the prompt via a forward pass of the model.
-        Args:
-            request: request.
-        Returns:
-            request function that takes no input.
-            request parameters as dict.
-        """
-        request_params = self._get_request_params(request)
-        retry_timeout = request_params.pop("client_timeout")
-        for key in DEFAULT_REQUEST_KEYS:
-            request_params.pop(key, None)
-        # Do not add params like we do with request as the model isn't sampling
-        request_params = {"prompt": request.prompt}
-        post_str = self.host + "/score_sequence"
-        try:
-            res = requests.post(
-                post_str,
-                json=request_params,
-                timeout=retry_timeout,
-            )
-            res.raise_for_status()
-        except requests.Timeout as e:
-            logger.error("HF request timed out. Increase client_timeout.")
-            raise e
-        except requests.exceptions.HTTPError as e:
-            logger.error(res.text)
-            raise e
-        response_dict = res.json()
-        return Response(
-            response=ModelChoices(
-                choices=[LMModelChoice(**choice) for choice in response_dict["choices"]]
-            ),
-            cached=False,
-            request=request,
-            usages=None,
-            response_type="text",
-            request_type=LMScoreRequest,
-        )

duckdb-nsql/manifest/manifest/clients/huggingface_embedding.py DELETED Viewed

@@ -1,98 +0,0 @@
-"""Hugging Face client."""
-import logging
-from functools import lru_cache
-from typing import Any, Dict, Optional, Tuple
-import numpy as np
-import requests
-from manifest.clients.client import Client
-from manifest.request import EmbeddingRequest
-logger = logging.getLogger(__name__)
-class HuggingFaceEmbeddingClient(Client):
-    """HuggingFaceEmbedding client."""
-    # User param -> (client param, default value)
-    PARAMS: Dict[str, Tuple[str, Any]] = {}
-    REQUEST_CLS = EmbeddingRequest
-    NAME = "huggingfaceembedding"
-    def connect(
-        self,
-        connection_str: Optional[str] = None,
-        client_args: Dict[str, Any] = {},
-    ) -> None:
-        """
-        Connect to the HuggingFace url.
-        Arsg:
-            connection_str: connection string.
-            client_args: client arguments.
-        """
-        if not connection_str:
-            raise ValueError("Must provide connection string")
-        self.host = connection_str.rstrip("/")
-        for key in self.PARAMS:
-            setattr(self, key, client_args.pop(key, self.PARAMS[key][1]))
-    def close(self) -> None:
-        """Close the client."""
-        pass
-    def get_generation_url(self) -> str:
-        """Get generation URL."""
-        return self.host + "/embed"
-    def get_generation_header(self) -> Dict[str, str]:
-        """
-        Get generation header.
-        Returns:
-            header.
-        """
-        return {}
-    def supports_batch_inference(self) -> bool:
-        """Return whether the client supports batch inference."""
-        return True
-    def supports_streaming_inference(self) -> bool:
-        """Return whether the client supports streaming inference.
-        Override in child client class.
-        """
-        return False
-    @lru_cache(maxsize=1)
-    def get_model_params(self) -> Dict:
-        """
-        Get model params.
-        By getting model params from the server, we can add to request
-        and make sure cache keys are unique to model.
-        Returns:
-            model params.
-        """
-        res = requests.post(self.host + "/params").json()
-        res["client_name"] = self.NAME
-        return res
-    def postprocess_response(self, response: Dict, request: Dict) -> Dict[str, Any]:
-        """
-        Format response to dict.
-        Args:
-            response: response
-            request: request
-        Return:
-            response as dict
-        """
-        # Convert array to np.array
-        for choice in response["choices"]:
-            choice["array"] = np.array(choice["array"])
-        return response

duckdb-nsql/manifest/manifest/clients/openai.py DELETED Viewed

@@ -1,162 +0,0 @@
-"""OpenAI client."""
-import logging
-import os
-from typing import Any, Dict, List, Optional, Type
-import tiktoken
-from manifest.clients.client import Client
-from manifest.request import LMRequest, Request
-logger = logging.getLogger(__name__)
-OPENAI_ENGINES = {
-    "gpt-3.5-turbo-instruct",
-    "text-davinci-003",
-    "text-davinci-002",
-    "text-davinci-001",
-    "davinci",
-    "curie",
-    "ada",
-    "babbage",
-    "text-curie-001",
-    "text-babbage-001",
-    "text-ada-001",
-    "code-davinci-002",
-    "code-cushman-001",
-}
-class OpenAIClient(Client):
-    """OpenAI client."""
-    # User param -> (client param, default value)
-    PARAMS = {
-        "engine": ("model", "text-davinci-003"),
-        "temperature": ("temperature", 1.0),
-        "max_tokens": ("max_tokens", 10),
-        "n": ("n", 1),
-        "top_p": ("top_p", 1.0),
-        "top_k": ("best_of", 1),
-        "logprobs": ("logprobs", None),
-        "stop_sequences": ("stop", None),  # OpenAI doesn't like empty lists
-        "presence_penalty": ("presence_penalty", 0.0),
-        "frequency_penalty": ("frequency_penalty", 0.0),
-        "batch_size": ("batch_size", 20),
-    }
-    REQUEST_CLS: Type[Request] = LMRequest
-    NAME = "openai"
-    def connect(
-        self,
-        connection_str: Optional[str] = None,
-        client_args: Dict[str, Any] = {},
-    ) -> None:
-        """
-        Connect to the OpenAI server.
-        connection_str is passed as default OPENAI_API_KEY if variable not set.
-        Args:
-            connection_str: connection string.
-            client_args: client arguments.
-        """
-        self.api_key = connection_str or os.environ.get("OPENAI_API_KEY")
-        if self.api_key is None:
-            raise ValueError(
-                "OpenAI API key not set. Set OPENAI_API_KEY environment "
-                "variable or pass through `client_connection`."
-            )
-        self.host = "https://api.openai.com/v1"
-        for key in self.PARAMS:
-            setattr(self, key, client_args.pop(key, self.PARAMS[key][1]))
-        if getattr(self, "engine") not in OPENAI_ENGINES:
-            raise ValueError(
-                f"Invalid engine {getattr(self, 'engine')}. Must be {OPENAI_ENGINES}."
-            )
-    def close(self) -> None:
-        """Close the client."""
-        pass
-    def get_generation_url(self) -> str:
-        """Get generation URL."""
-        return self.host + "/completions"
-    def get_generation_header(self) -> Dict[str, str]:
-        """
-        Get generation header.
-        Returns:
-            header.
-        """
-        return {"Authorization": f"Bearer {self.api_key}"}
-    def supports_batch_inference(self) -> bool:
-        """Return whether the client supports batch inference."""
-        return True
-    def supports_streaming_inference(self) -> bool:
-        """Return whether the client supports streaming inference.
-        Override in child client class.
-        """
-        return True
-    def get_model_params(self) -> Dict:
-        """
-        Get model params.
-        By getting model params from the server, we can add to request
-        and make sure cache keys are unique to model.
-        Returns:
-            model params.
-        """
-        return {"model_name": self.NAME, "engine": getattr(self, "engine")}
-    def postprocess_response(self, response: Dict, request: Dict) -> Dict[str, Any]:
-        """
-        Validate response as dict.
-        Args:
-            response: response
-            request: request
-        Return:
-            response as dict
-        """
-        validated_response = super().postprocess_response(response, request)
-        # Handle logprobs
-        for choice in validated_response["choices"]:
-            if "logprobs" in choice:
-                logprobs = choice.pop("logprobs")
-                if logprobs and "token_logprobs" in logprobs:
-                    choice["token_logprobs"] = logprobs["token_logprobs"]
-                    choice["tokens"] = logprobs["tokens"]
-        return validated_response
-    def split_usage(self, request: Dict, choices: List[str]) -> List[Dict[str, int]]:
-        """Split usage into list of usages for each prompt."""
-        try:
-            encoding = tiktoken.encoding_for_model(getattr(self, "engine"))
-        except Exception:
-            return []
-        prompt = request["prompt"]
-        # If n > 1 and prompt is a string, we need to split it into a list
-        if isinstance(prompt, str):
-            prompts = [prompt] * len(choices)
-        else:
-            prompts = prompt
-        assert len(prompts) == len(choices)
-        usages = []
-        for pmt, chc in zip(prompts, choices):
-            pmt_tokens = len(encoding.encode(pmt))
-            chc_tokens = len(encoding.encode(chc["text"]))  # type: ignore
-            usage = {
-                "prompt_tokens": pmt_tokens,
-                "completion_tokens": chc_tokens,
-                "total_tokens": pmt_tokens + chc_tokens,
-            }
-            usages.append(usage)
-        return usages