feat: Implement build vector store workflow and local script for artifact generation
Browse files- .github/workflows/build-vector-store.yml +111 -0
- .gitignore +1 -1
- scripts/build-vector-store.sh +38 -0
.github/workflows/build-vector-store.yml
CHANGED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Build Vector Store
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
workflow_dispatch:
|
| 5 |
+
inputs:
|
| 6 |
+
force_recreate:
|
| 7 |
+
description: 'Force recreation of the vector store'
|
| 8 |
+
required: false
|
| 9 |
+
default: true
|
| 10 |
+
type: boolean
|
| 11 |
+
push:
|
| 12 |
+
branches: [main]
|
| 13 |
+
paths:
|
| 14 |
+
- 'data/**'
|
| 15 |
+
schedule:
|
| 16 |
+
# Run daily at midnight UTC
|
| 17 |
+
- cron: '0 0 * * *'
|
| 18 |
+
|
| 19 |
+
jobs:
|
| 20 |
+
build-vector-store:
|
| 21 |
+
runs-on: ubuntu-latest
|
| 22 |
+
|
| 23 |
+
steps:
|
| 24 |
+
- name: Checkout repository
|
| 25 |
+
uses: actions/checkout@v3
|
| 26 |
+
with:
|
| 27 |
+
fetch-depth: 0 # Fetch all history for proper versioning
|
| 28 |
+
|
| 29 |
+
- name: Set up Python
|
| 30 |
+
uses: actions/setup-python@v4
|
| 31 |
+
with:
|
| 32 |
+
python-version: '3.13'
|
| 33 |
+
cache: 'pip'
|
| 34 |
+
|
| 35 |
+
- name: Install dependencies
|
| 36 |
+
run: |
|
| 37 |
+
# Install uv
|
| 38 |
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
| 39 |
+
export PATH="$HOME/.cargo/bin:$PATH"
|
| 40 |
+
|
| 41 |
+
# Use uv to install dependencies
|
| 42 |
+
uv pip install --upgrade pip
|
| 43 |
+
uv sync
|
| 44 |
+
- name: Build vector store
|
| 45 |
+
id: build-vector
|
| 46 |
+
run: |
|
| 47 |
+
FORCE_RECREATE=${{ github.event.inputs.force_recreate == 'true' || github.event_name == 'workflow_dispatch' }}
|
| 48 |
+
|
| 49 |
+
# Build with or without force recreate based on the input
|
| 50 |
+
if [ "$FORCE_RECREATE" = "true" ]; then
|
| 51 |
+
uv run python py-src/pipeline.py --force-recreate --ci --output-dir ./artifacts
|
| 52 |
+
else
|
| 53 |
+
uv run python py-src/pipeline.py --ci --output-dir ./artifacts
|
| 54 |
+
fi
|
| 55 |
+
|
| 56 |
+
# Set artifacts directory for the next steps
|
| 57 |
+
echo "ARTIFACTS_DIR=./artifacts" >> $GITHUB_ENV
|
| 58 |
+
|
| 59 |
+
# Create a zip file of the vector store
|
| 60 |
+
cd db
|
| 61 |
+
zip -r ../artifacts/vector_store.zip vector_store_4
|
| 62 |
+
cd ..
|
| 63 |
+
|
| 64 |
+
- name: Read CI summary
|
| 65 |
+
id: ci_summary
|
| 66 |
+
run: |
|
| 67 |
+
# Read the CI summary file to extract values
|
| 68 |
+
if [ -f "${{ env.ARTIFACTS_DIR }}/ci_summary.json" ]; then
|
| 69 |
+
STATUS=$(jq -r '.status' ${{ env.ARTIFACTS_DIR }}/ci_summary.json)
|
| 70 |
+
DOC_COUNT=$(jq -r '.document_count' ${{ env.ARTIFACTS_DIR }}/ci_summary.json)
|
| 71 |
+
|
| 72 |
+
echo "status=$STATUS" >> $GITHUB_OUTPUT
|
| 73 |
+
echo "document_count=$DOC_COUNT" >> $GITHUB_OUTPUT
|
| 74 |
+
else
|
| 75 |
+
echo "status=failure" >> $GITHUB_OUTPUT
|
| 76 |
+
echo "document_count=0" >> $GITHUB_OUTPUT
|
| 77 |
+
fi
|
| 78 |
+
|
| 79 |
+
- name: Upload artifacts
|
| 80 |
+
uses: actions/upload-artifact@v3
|
| 81 |
+
with:
|
| 82 |
+
name: vector-store-artifacts
|
| 83 |
+
path: |
|
| 84 |
+
${{ env.ARTIFACTS_DIR }}/*.json
|
| 85 |
+
${{ env.ARTIFACTS_DIR }}/vector_store.zip
|
| 86 |
+
|
| 87 |
+
- name: Get version
|
| 88 |
+
id: get_version
|
| 89 |
+
run: |
|
| 90 |
+
# Create a version based on date and document count
|
| 91 |
+
VERSION="v$(date +'%Y.%m.%d')-docs${{ steps.ci_summary.outputs.document_count }}"
|
| 92 |
+
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
| 93 |
+
|
| 94 |
+
- name: Create GitHub Release
|
| 95 |
+
id: create_release
|
| 96 |
+
uses: softprops/action-gh-release@v1
|
| 97 |
+
if: steps.ci_summary.outputs.status == 'success'
|
| 98 |
+
with:
|
| 99 |
+
tag_name: ${{ steps.get_version.outputs.version }}
|
| 100 |
+
name: Vector Store ${{ steps.get_version.outputs.version }}
|
| 101 |
+
body: |
|
| 102 |
+
Vector store updated with ${{ steps.ci_summary.outputs.document_count }} documents.
|
| 103 |
+
|
| 104 |
+
This is an automated release created by the vector store build workflow.
|
| 105 |
+
files: |
|
| 106 |
+
${{ env.ARTIFACTS_DIR }}/*.json
|
| 107 |
+
${{ env.ARTIFACTS_DIR }}/vector_store.zip
|
| 108 |
+
draft: false
|
| 109 |
+
prerelease: false
|
| 110 |
+
env:
|
| 111 |
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
.gitignore
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
|
| 2 |
db/
|
| 3 |
-
|
| 4 |
|
| 5 |
# Byte-compiled / optimized / DLL files
|
| 6 |
__pycache__/
|
|
|
|
| 1 |
|
| 2 |
db/
|
| 3 |
+
artifacts/
|
| 4 |
|
| 5 |
# Byte-compiled / optimized / DLL files
|
| 6 |
__pycache__/
|
scripts/build-vector-store.sh
CHANGED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Script to build vector store locally
|
| 3 |
+
# Usage: ./scripts/build-vector-store.sh [--force-recreate]
|
| 4 |
+
|
| 5 |
+
FORCE_RECREATE=""
|
| 6 |
+
if [[ "$1" == "--force-recreate" ]]; then
|
| 7 |
+
FORCE_RECREATE="--force-recreate"
|
| 8 |
+
fi
|
| 9 |
+
|
| 10 |
+
# Set output directory for artifacts
|
| 11 |
+
OUTPUT_DIR="./artifacts"
|
| 12 |
+
mkdir -p $OUTPUT_DIR
|
| 13 |
+
|
| 14 |
+
echo "Building vector store with output to $OUTPUT_DIR"
|
| 15 |
+
echo "Force recreate: ${FORCE_RECREATE:-false}"
|
| 16 |
+
|
| 17 |
+
# Run pipeline in CI mode
|
| 18 |
+
python py-src/pipeline.py $FORCE_RECREATE --ci --output-dir $OUTPUT_DIR
|
| 19 |
+
|
| 20 |
+
# Check if successful
|
| 21 |
+
if [ $? -eq 0 ]; then
|
| 22 |
+
echo "Build successful!"
|
| 23 |
+
|
| 24 |
+
# Create a zip of the vector store
|
| 25 |
+
if [ -d "./db/vector_store_4" ]; then
|
| 26 |
+
echo "Creating vector store zip file in $OUTPUT_DIR"
|
| 27 |
+
cd db
|
| 28 |
+
zip -r ../$OUTPUT_DIR/vector_store.zip vector_store_4
|
| 29 |
+
cd ..
|
| 30 |
+
echo "Vector store zip created at $OUTPUT_DIR/vector_store.zip"
|
| 31 |
+
fi
|
| 32 |
+
|
| 33 |
+
echo "Artifacts available in $OUTPUT_DIR:"
|
| 34 |
+
ls -la $OUTPUT_DIR
|
| 35 |
+
else
|
| 36 |
+
echo "Build failed!"
|
| 37 |
+
exit 1
|
| 38 |
+
fi
|