Spaces:
Paused
Paused
⚡️ Limit the number of tokens sent to the backend (#93)
Browse files
.env
CHANGED
|
@@ -6,6 +6,7 @@ MONGODB_DB_NAME=chat-ui
|
|
| 6 |
HF_TOKEN=#your huggingface token here
|
| 7 |
COOKIE_NAME=hf-chat
|
| 8 |
|
|
|
|
| 9 |
PUBLIC_ORIGIN=#https://hf.co
|
| 10 |
PUBLIC_MODEL_ENDPOINT=https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-6-llama-30b
|
| 11 |
PUBLIC_MODEL_NAME=OpenAssistant/oasst-sft-6-llama-30b # public facing link
|
|
|
|
| 6 |
HF_TOKEN=#your huggingface token here
|
| 7 |
COOKIE_NAME=hf-chat
|
| 8 |
|
| 9 |
+
PUBLIC_MAX_INPUT_TOKENS=1024
|
| 10 |
PUBLIC_ORIGIN=#https://hf.co
|
| 11 |
PUBLIC_MODEL_ENDPOINT=https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-6-llama-30b
|
| 12 |
PUBLIC_MODEL_NAME=OpenAssistant/oasst-sft-6-llama-30b # public facing link
|
src/lib/buildPrompt.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import {
|
| 2 |
PUBLIC_ASSISTANT_MESSAGE_TOKEN,
|
|
|
|
| 3 |
PUBLIC_SEP_TOKEN,
|
| 4 |
PUBLIC_USER_MESSAGE_TOKEN,
|
| 5 |
} from "$env/static/public";
|
|
@@ -11,7 +12,7 @@ import type { Message } from "./types/Message";
|
|
| 11 |
* <|assistant|>hi<|endoftext|><|prompter|>hello<|endoftext|><|assistant|>
|
| 12 |
*/
|
| 13 |
export function buildPrompt(messages: Message[]): string {
|
| 14 |
-
|
| 15 |
messages
|
| 16 |
.map(
|
| 17 |
(m) =>
|
|
@@ -20,6 +21,8 @@ export function buildPrompt(messages: Message[]): string {
|
|
| 20 |
: PUBLIC_ASSISTANT_MESSAGE_TOKEN + m.content) +
|
| 21 |
(m.content.endsWith(PUBLIC_SEP_TOKEN) ? "" : PUBLIC_SEP_TOKEN)
|
| 22 |
)
|
| 23 |
-
.join("") + PUBLIC_ASSISTANT_MESSAGE_TOKEN
|
| 24 |
-
|
|
|
|
|
|
|
| 25 |
}
|
|
|
|
| 1 |
import {
|
| 2 |
PUBLIC_ASSISTANT_MESSAGE_TOKEN,
|
| 3 |
+
PUBLIC_MAX_INPUT_TOKENS,
|
| 4 |
PUBLIC_SEP_TOKEN,
|
| 5 |
PUBLIC_USER_MESSAGE_TOKEN,
|
| 6 |
} from "$env/static/public";
|
|
|
|
| 12 |
* <|assistant|>hi<|endoftext|><|prompter|>hello<|endoftext|><|assistant|>
|
| 13 |
*/
|
| 14 |
export function buildPrompt(messages: Message[]): string {
|
| 15 |
+
const prompt =
|
| 16 |
messages
|
| 17 |
.map(
|
| 18 |
(m) =>
|
|
|
|
| 21 |
: PUBLIC_ASSISTANT_MESSAGE_TOKEN + m.content) +
|
| 22 |
(m.content.endsWith(PUBLIC_SEP_TOKEN) ? "" : PUBLIC_SEP_TOKEN)
|
| 23 |
)
|
| 24 |
+
.join("") + PUBLIC_ASSISTANT_MESSAGE_TOKEN;
|
| 25 |
+
|
| 26 |
+
// Not super precise, but it's truncated in the model's backend anyway
|
| 27 |
+
return prompt.split(" ").slice(-parseInt(PUBLIC_MAX_INPUT_TOKENS)).join(" ");
|
| 28 |
}
|
src/routes/conversation/[id]/+page.svelte
CHANGED
|
@@ -8,7 +8,7 @@
|
|
| 8 |
import { invalidate } from "$app/navigation";
|
| 9 |
import { base } from "$app/paths";
|
| 10 |
import { trimSuffix } from "$lib/utils/trimSuffix";
|
| 11 |
-
import { PUBLIC_SEP_TOKEN } from "$env/static/public";
|
| 12 |
import { trimPrefix } from "$lib/utils/trimPrefix";
|
| 13 |
import { shareConversation } from "$lib/shareConversation";
|
| 14 |
import { UrlDependency } from "$lib/types/UrlDependency";
|
|
@@ -41,7 +41,7 @@
|
|
| 41 |
repetition_penalty: 1.2,
|
| 42 |
top_k: 50,
|
| 43 |
// @ts-ignore
|
| 44 |
-
truncate:
|
| 45 |
watermark: false,
|
| 46 |
max_new_tokens: 1024,
|
| 47 |
stop: ["<|endoftext|>"],
|
|
|
|
| 8 |
import { invalidate } from "$app/navigation";
|
| 9 |
import { base } from "$app/paths";
|
| 10 |
import { trimSuffix } from "$lib/utils/trimSuffix";
|
| 11 |
+
import { PUBLIC_SEP_TOKEN, PUBLIC_MAX_INPUT_TOKENS } from "$env/static/public";
|
| 12 |
import { trimPrefix } from "$lib/utils/trimPrefix";
|
| 13 |
import { shareConversation } from "$lib/shareConversation";
|
| 14 |
import { UrlDependency } from "$lib/types/UrlDependency";
|
|
|
|
| 41 |
repetition_penalty: 1.2,
|
| 42 |
top_k: 50,
|
| 43 |
// @ts-ignore
|
| 44 |
+
truncate: parseInt(PUBLIC_MAX_INPUT_TOKENS),
|
| 45 |
watermark: false,
|
| 46 |
max_new_tokens: 1024,
|
| 47 |
stop: ["<|endoftext|>"],
|
src/routes/conversation/[id]/summarize/+server.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import { HF_TOKEN } from "$env/static/private";
|
| 2 |
-
import { PUBLIC_MODEL_ENDPOINT } from "$env/static/public";
|
| 3 |
import { buildPrompt } from "$lib/buildPrompt";
|
| 4 |
import { collections } from "$lib/server/database.js";
|
| 5 |
import { textGeneration } from "@huggingface/inference";
|
|
@@ -33,6 +33,7 @@ export async function POST({ params, locals, fetch }) {
|
|
| 33 |
top_k: 50,
|
| 34 |
watermark: false,
|
| 35 |
max_new_tokens: 1024,
|
|
|
|
| 36 |
stop: ["<|endoftext|>"],
|
| 37 |
return_full_text: false,
|
| 38 |
};
|
|
|
|
| 1 |
import { HF_TOKEN } from "$env/static/private";
|
| 2 |
+
import { PUBLIC_MAX_INPUT_TOKENS, PUBLIC_MODEL_ENDPOINT } from "$env/static/public";
|
| 3 |
import { buildPrompt } from "$lib/buildPrompt";
|
| 4 |
import { collections } from "$lib/server/database.js";
|
| 5 |
import { textGeneration } from "@huggingface/inference";
|
|
|
|
| 33 |
top_k: 50,
|
| 34 |
watermark: false,
|
| 35 |
max_new_tokens: 1024,
|
| 36 |
+
truncate: parseInt(PUBLIC_MAX_INPUT_TOKENS),
|
| 37 |
stop: ["<|endoftext|>"],
|
| 38 |
return_full_text: false,
|
| 39 |
};
|