working server
Browse files- api/README.md +6 -15
- api/ollama_lightrag_server.py +8 -6
- api/requirements.txt +2 -1
api/README.md
CHANGED
@@ -56,13 +56,13 @@ Available options:
|
|
56 |
|-----------|---------|-------------|
|
57 |
| --host | 0.0.0.0 | Server host |
|
58 |
| --port | 9621 | Server port |
|
59 |
-
| --model |
|
60 |
-
| --embedding-model |
|
61 |
| --ollama-host | http://localhost:11434 | Ollama host URL |
|
62 |
-
| --working-dir | ./
|
63 |
| --max-async | 4 | Maximum async operations |
|
64 |
| --max-tokens | 32768 | Maximum token size |
|
65 |
-
| --embedding-dim |
|
66 |
| --max-embed-tokens | 8192 | Maximum embedding token size |
|
67 |
| --input-file | ./book.txt | Initial input file |
|
68 |
| --log-level | INFO | Logging level |
|
@@ -79,11 +79,11 @@ python ollama_lightrag_server.py
|
|
79 |
python ollama_lightrag_server.py --model llama2:13b --port 8080 --working-dir ./custom_rag
|
80 |
```
|
81 |
|
|
|
82 |
```bash
|
83 |
-
python ollama_lightrag_server.py --model mistral-nemo:latest
|
84 |
```
|
85 |
|
86 |
-
|
87 |
## API Endpoints
|
88 |
|
89 |
### Query Endpoints
|
@@ -165,13 +165,6 @@ When the server is running, visit:
|
|
165 |
- Swagger UI: http://localhost:9621/docs
|
166 |
- ReDoc: http://localhost:9621/redoc
|
167 |
|
168 |
-
## Contributing
|
169 |
-
|
170 |
-
1. Fork the repository
|
171 |
-
2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
|
172 |
-
3. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
|
173 |
-
4. Push to the branch (`git push origin feature/AmazingFeature`)
|
174 |
-
5. Open a Pull Request
|
175 |
|
176 |
## License
|
177 |
|
@@ -182,5 +175,3 @@ This project is licensed under the MIT License - see the LICENSE file for detail
|
|
182 |
- Built with [FastAPI](https://fastapi.tiangolo.com/)
|
183 |
- Uses [LightRAG](https://github.com/HKUDS/LightRAG) for document processing
|
184 |
- Powered by [Ollama](https://ollama.ai/) for LLM inference
|
185 |
-
|
186 |
-
## Support
|
|
|
56 |
|-----------|---------|-------------|
|
57 |
| --host | 0.0.0.0 | Server host |
|
58 |
| --port | 9621 | Server port |
|
59 |
+
| --model | mistral-nemo:latest | LLM model name |
|
60 |
+
| --embedding-model | bge-m3:latest | Embedding model name |
|
61 |
| --ollama-host | http://localhost:11434 | Ollama host URL |
|
62 |
+
| --working-dir | ./rag_storage | Working directory for RAG |
|
63 |
| --max-async | 4 | Maximum async operations |
|
64 |
| --max-tokens | 32768 | Maximum token size |
|
65 |
+
| --embedding-dim | 1024 | Embedding dimensions |
|
66 |
| --max-embed-tokens | 8192 | Maximum embedding token size |
|
67 |
| --input-file | ./book.txt | Initial input file |
|
68 |
| --log-level | INFO | Logging level |
|
|
|
79 |
python ollama_lightrag_server.py --model llama2:13b --port 8080 --working-dir ./custom_rag
|
80 |
```
|
81 |
|
82 |
+
Make sure the models are installed in your ollama instance
|
83 |
```bash
|
84 |
+
python ollama_lightrag_server.py --model mistral-nemo:latest --embedding-model bge-m3 --embedding-dim 1024
|
85 |
```
|
86 |
|
|
|
87 |
## API Endpoints
|
88 |
|
89 |
### Query Endpoints
|
|
|
165 |
- Swagger UI: http://localhost:9621/docs
|
166 |
- ReDoc: http://localhost:9621/redoc
|
167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
|
169 |
## License
|
170 |
|
|
|
175 |
- Built with [FastAPI](https://fastapi.tiangolo.com/)
|
176 |
- Uses [LightRAG](https://github.com/HKUDS/LightRAG) for document processing
|
177 |
- Powered by [Ollama](https://ollama.ai/) for LLM inference
|
|
|
|
api/ollama_lightrag_server.py
CHANGED
@@ -13,6 +13,7 @@ from enum import Enum
|
|
13 |
from pathlib import Path
|
14 |
import shutil
|
15 |
import aiofiles
|
|
|
16 |
|
17 |
def parse_args():
|
18 |
parser = argparse.ArgumentParser(
|
@@ -30,17 +31,17 @@ def parse_args():
|
|
30 |
help='Directory containing input documents (default: ./inputs)')
|
31 |
|
32 |
# Model configuration
|
33 |
-
parser.add_argument('--model', default='
|
34 |
-
parser.add_argument('--embedding-model', default='
|
35 |
-
help='Embedding model name (default:
|
36 |
parser.add_argument('--ollama-host', default='http://localhost:11434',
|
37 |
help='Ollama host URL (default: http://localhost:11434)')
|
38 |
|
39 |
# RAG configuration
|
40 |
parser.add_argument('--max-async', type=int, default=4, help='Maximum async operations (default: 4)')
|
41 |
parser.add_argument('--max-tokens', type=int, default=32768, help='Maximum token size (default: 32768)')
|
42 |
-
parser.add_argument('--embedding-dim', type=int, default=
|
43 |
-
help='Embedding dimensions (default:
|
44 |
parser.add_argument('--max-embed-tokens', type=int, default=8192,
|
45 |
help='Maximum embedding token size (default: 8192)')
|
46 |
|
@@ -150,6 +151,7 @@ def create_app(args):
|
|
150 |
doc_manager.mark_as_indexed(file_path)
|
151 |
logging.info(f"Indexed file: {file_path}")
|
152 |
except Exception as e:
|
|
|
153 |
logging.error(f"Error indexing file {file_path}: {str(e)}")
|
154 |
|
155 |
logging.info(f"Indexed {len(new_files)} documents from {args.input_dir}")
|
@@ -328,7 +330,7 @@ def create_app(args):
|
|
328 |
raise HTTPException(status_code=500, detail=str(e))
|
329 |
|
330 |
|
331 |
-
@app.get("/
|
332 |
async def get_status():
|
333 |
"""Get current system status"""
|
334 |
return {
|
|
|
13 |
from pathlib import Path
|
14 |
import shutil
|
15 |
import aiofiles
|
16 |
+
from ascii_colors import ASCIIColors, trace_exception
|
17 |
|
18 |
def parse_args():
|
19 |
parser = argparse.ArgumentParser(
|
|
|
31 |
help='Directory containing input documents (default: ./inputs)')
|
32 |
|
33 |
# Model configuration
|
34 |
+
parser.add_argument('--model', default='mistral-nemo:latest', help='LLM model name (default: mistral-nemo:latest)')
|
35 |
+
parser.add_argument('--embedding-model', default='bge-m3:latest',
|
36 |
+
help='Embedding model name (default: bge-m3:latest)')
|
37 |
parser.add_argument('--ollama-host', default='http://localhost:11434',
|
38 |
help='Ollama host URL (default: http://localhost:11434)')
|
39 |
|
40 |
# RAG configuration
|
41 |
parser.add_argument('--max-async', type=int, default=4, help='Maximum async operations (default: 4)')
|
42 |
parser.add_argument('--max-tokens', type=int, default=32768, help='Maximum token size (default: 32768)')
|
43 |
+
parser.add_argument('--embedding-dim', type=int, default=1024,
|
44 |
+
help='Embedding dimensions (default: 1024)')
|
45 |
parser.add_argument('--max-embed-tokens', type=int, default=8192,
|
46 |
help='Maximum embedding token size (default: 8192)')
|
47 |
|
|
|
151 |
doc_manager.mark_as_indexed(file_path)
|
152 |
logging.info(f"Indexed file: {file_path}")
|
153 |
except Exception as e:
|
154 |
+
trace_exception(e)
|
155 |
logging.error(f"Error indexing file {file_path}: {str(e)}")
|
156 |
|
157 |
logging.info(f"Indexed {len(new_files)} documents from {args.input_dir}")
|
|
|
330 |
raise HTTPException(status_code=500, detail=str(e))
|
331 |
|
332 |
|
333 |
+
@app.get("/health")
|
334 |
async def get_status():
|
335 |
"""Get current system status"""
|
336 |
return {
|
api/requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
fastapi
|
2 |
uvicorn
|
3 |
-
python-multipart
|
|
|
|
1 |
fastapi
|
2 |
uvicorn
|
3 |
+
python-multipart
|
4 |
+
ascii_colors
|