Synced repo using 'sync_with_huggingface' Github Action
Browse files- Dockerfile +2 -2
- app.py +2 -2
Dockerfile
CHANGED
@@ -82,8 +82,8 @@ EXPOSE 7860
|
|
82 |
|
83 |
# Start the FastAPI app using Uvicorn web server
|
84 |
# CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "14000", "--limit-concurrency", "1000"]
|
85 |
-
RUN python3 download.py
|
86 |
|
87 |
# RUN chmod 755 models
|
88 |
|
89 |
-
CMD ["python3", "app.py", "--host=0.0.0.0", "--port=7860", "--model_path=
|
|
|
82 |
|
83 |
# Start the FastAPI app using Uvicorn web server
|
84 |
# CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "14000", "--limit-concurrency", "1000"]
|
85 |
+
# RUN python3 download.py
|
86 |
|
87 |
# RUN chmod 755 models
|
88 |
|
89 |
+
CMD ["python3", "app.py", "--host=0.0.0.0", "--port=7860", "--model_path=BAAI/bge-small-en-v1.5", "--num_workers=2"]
|
app.py
CHANGED
@@ -70,9 +70,9 @@ print_arguments(args)
|
|
70 |
assert os.path.exists(args.model_path), f"{args.model_path}"
|
71 |
#
|
72 |
if args.use_gpu:
|
73 |
-
model = SentenceTransformer(args.model_path, device="cuda", compute_type="float16")
|
74 |
else:
|
75 |
-
model = SentenceTransformer(args.model_path, device='cpu')
|
76 |
|
77 |
|
78 |
app = FastAPI(title="embedding Inference")
|
|
|
70 |
assert os.path.exists(args.model_path), f"{args.model_path}"
|
71 |
#
|
72 |
if args.use_gpu:
|
73 |
+
model = SentenceTransformer(args.model_path, device="cuda", compute_type="float16", cache_folder=".")
|
74 |
else:
|
75 |
+
model = SentenceTransformer(args.model_path, device='cpu', cache_folder=".")
|
76 |
|
77 |
|
78 |
app = FastAPI(title="embedding Inference")
|