Spaces:

misalsathsara
/

phi15-js-api

Sleeping

App Files Files Community

misalsathsara commited on about 1 month ago

Commit

0d621b0

verified ·

1 Parent(s): 81dff5e

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -15

app.py CHANGED Viewed

@@ -1,24 +1,33 @@
 import os
-# Redirect Hugging Face cache to a writable directory
 os.environ["HF_HOME"] = "/tmp"
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf-cache"
-from fastapi import FastAPI
-from pydantic import BaseModel
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import torch
-import re
 app = FastAPI()
 model_id = "misalsathsara/phi1.5-js-codegen"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(model_id)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 model.eval()
 system_prompt = """
 You are a smart javascript assistant that only generates only the best simple javascript functions without any comments like this:
 function transform(row) {
@@ -49,13 +58,13 @@ class RequestData(BaseModel):
 def generate_code(data: RequestData):
     instruction = data.instruction
     full_prompt = system_prompt + f"\n### Instruction:\n{instruction}\n\n### Response:\n"
     input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids.to(device)
     with torch.no_grad():
         output_ids = model.generate(
             input_ids,
-            max_new_tokens=200,
             temperature=0.3,
             top_k=50,
             top_p=0.95,
@@ -65,17 +74,12 @@ def generate_code(data: RequestData):
     generated_text = tokenizer.decode(output_ids[0][input_ids.shape[-1]:], skip_special_tokens=True)
-    # Only return JavaScript function — no extra text
     # Extract only the JavaScript function that ends with return row;
     match = re.search(r"function\s+transform\s*\([^)]*\)\s*{[^}]*return row;\s*}", generated_text, re.DOTALL)
     if match:
         clean_output = match.group(0).strip()
     else:
-        # fallback: try to grab only up to "return row;"
         fallback = generated_text.split("return row;")[0] + "return row;"
         clean_output = fallback.strip()
-    from fastapi.responses import PlainTextResponse
-    return PlainTextResponse(clean_output)

 import os
+import torch
+import re
+from fastapi import FastAPI
+from fastapi.responses import PlainTextResponse
+from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# Set cache directory for HF Spaces
 os.environ["HF_HOME"] = "/tmp"
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf-cache"
+# Optional: speed up inference on CPU
+torch.set_num_threads(1)
 app = FastAPI()
+# Load model + tokenizer
 model_id = "misalsathsara/phi1.5-js-codegen"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(model_id)
+# Optional: Compile model if using PyTorch >= 2 (comment out if error)
+# model = torch.compile(model)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 model.eval()
+# JS assistant system prompt
 system_prompt = """
 You are a smart javascript assistant that only generates only the best simple javascript functions without any comments like this:
 function transform(row) {
 def generate_code(data: RequestData):
     instruction = data.instruction
     full_prompt = system_prompt + f"\n### Instruction:\n{instruction}\n\n### Response:\n"
     input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids.to(device)
     with torch.no_grad():
         output_ids = model.generate(
             input_ids,
+            max_new_tokens=100,  # Faster
             temperature=0.3,
             top_k=50,
             top_p=0.95,
     generated_text = tokenizer.decode(output_ids[0][input_ids.shape[-1]:], skip_special_tokens=True)
     # Extract only the JavaScript function that ends with return row;
     match = re.search(r"function\s+transform\s*\([^)]*\)\s*{[^}]*return row;\s*}", generated_text, re.DOTALL)
     if match:
         clean_output = match.group(0).strip()
     else:
         fallback = generated_text.split("return row;")[0] + "return row;"
         clean_output = fallback.strip()
+    return PlainTextResponse(clean_output)