source ~/venvs/pdf-env/bin/activate cd ~/rag-demo/pdf-pipeline export $(grep -v '^#' .env | xargs -d '\n')
python -m src.db_schema # uuesti skeemiga
./scripts/db_vector_välja_muutmine.sh
python -m src.extract_pdf # parandatud ekstraheerimisega python -m src.check_status # parema ülevaatega
python -m src.find_duplicates # duplikaatide kontroll
python -m src.clean_and_normalize
python -m src.create_chunks
python -m src.embed_chunks
python -m src.sync_weaviate
docker exec -it postgres_postgis psql -U osm -d pdf_research -h localhost -c "SELECT COUNT(*) FROM processed_documents;" docker exec -it postgres_postgis psql -U osm -d pdf_research -h localhost -c " SELECT id, raw_doc_id, page, LENGTH(content_text) AS len, has_table FROM processed_documents ORDER BY id LIMIT 20; "
python -m src.query_hybrid python -m src.generate_answer
python -m src.rag_api
Ava brauseris:
http://localhost:8071/docs
# Lihtsustatud otsing
curl "http://localhost:8000/search-simple?q=young%20driver%20risk"
# POST requestiga (täielik kontroll)
curl -X POST "http://localhost:8071/search" \
-H "Content-Type: application/json" \
-d '{
"query": "young driver accident risk",
"top_articles": 10,
"top_chunks": 20,
"temperature": 0.6,
"max_tokens": 1500
}' | jq '.'
import requests
import json
# POST päring
response = requests.post(
"http://localhost:8071/search",
json={
"query": "traffic flow prediction",
"top_articles": 10,
"top_chunks": 20,
"temperature": 0.6,
"max_tokens": 1500
}
)
result = response.json()
print("Küsimus:", result["query"])
print("Vastus:", result["answer"])
print("Allikad:", result["articles"])