Spaces:

SkazuHD
/

docker-test

Build error

App Files Files Community

docker-test / pyproject.toml

SkazuHD

init space

d660b02 about 2 months ago

raw

history blame contribute delete

5.77 kB

	[tool.poetry]
	name = "llm-engineering"
	version = "0.1.0"
	description = ""
	authors = ["iusztinpaul <[email protected]>"]
	license = "MIT"
	readme = "README.md"

	[tool.poetry.dependencies]
	python = "~3.11"
	pymongo = "^4.6.2"
	click = "^8.0.1"
	loguru = "^0.7.2"
	rich = "^13.7.1"
	numpy = "^1.26.4"
	poethepoet = "0.29.0"
	datasets = "^3.0.1"

	# Digital data ETL
	selenium = "^4.21.0"
	webdriver-manager = "^4.0.1"
	beautifulsoup4 = "^4.12.3"
	html2text = "^2024.2.26"
	jmespath = "^1.0.1"
	chromedriver-autoinstaller = "^0.6.4"

	# Feature engineering
	qdrant-client = "^1.8.0"
	langchain = "^0.3.9"
	sentence-transformers = "^3.0.0"

	# RAG
	langchain-openai = "^0.2.11"
	jinja2 = "^3.1.4"
	tiktoken = "^0.7.0"
	fake-useragent = "^1.5.1"
	langchain-community = "^0.3.9"

	# Inference
	fastapi = ">=0.115.2,<1.0"
	uvicorn = "^0.30.6"
	opik = "^0.2.2"
	langchain-core = "^0.3.21"
	langchain-ollama = "^0.2.1"
	gradio = "^5.8.0"
	clearml = "^1.16.5"
	python-dotenv = "^1.0.1"


	[tool.poetry.group.dev.dependencies]
	ruff = "^0.4.9"
	pre-commit = "^3.7.1"
	pytest = "^8.2.2"


	[tool.poetry.group.aws.dependencies]
	sagemaker = ">=2.232.2"
	s3fs = ">2022.3.0"
	aws-profile-manager = "^0.7.3"
	kubernetes = "^30.1.0"
	sagemaker-huggingface-inference-toolkit = "^2.4.0"


	[build-system]
	requires = ["poetry-core"]
	build-backend = "poetry.core.masonry.api"

	# ----------------------------------
	# --- Poe the Poet Configuration ---
	# ----------------------------------

	[tool.poe.tasks]

	# Data pipelines
	run-digital-data-etl-cs370 = "poetry run python -m tools.run --run-etl --no-cache --etl-config-filename digital_data_etl_cs370.yaml"
	run-digital-data-etl = [
	"run-digital-data-etl-cs370",
	]
	run-feature-engineering-pipeline = "poetry run python -m tools.run --no-cache --run-feature-engineering"
	run-generate-instruct-datasets-pipeline = "poetry run python -m tools.run --no-cache --run-generate-instruct-datasets"
	run-generate-preference-datasets-pipeline = "poetry run python -m tools.run --no-cache --run-generate-preference-datasets"
	run-end-to-end-data-pipeline = "poetry run python -m tools.run --no-cache --run-end-to-end-data"

	# Utility pipelines
	run-export-artifact-to-json-pipeline = "poetry run python -m tools.run --no-cache --run-export-artifact-to-json"
	run-export-data-warehouse-to-json = "poetry run python -m tools.data_warehouse --export-raw-data"
	run-import-data-warehouse-from-json = "poetry run python -m tools.data_warehouse --import-raw-data"

	# Training pipelines
	run-training-pipeline = "poetry run python -m tools.run --no-cache --run-training"
	run-evaluation-pipeline = "poetry run python -m tools.run --no-cache --run-evaluation"

	# Inference
	call-rag-retrieval-module = "poetry run python -m tools.rag"

	run-inference-ml-service = "poetry run uvicorn tools.ml_service:app --host 0.0.0.0 --port 8000 --reload"
	call-inference-ml-service = "curl -X POST 'http://127.0.0.1:8000/rag' -H 'Content-Type: application/json' -d '{\"query\": \"My name is Paul Iusztin. Could you draft a LinkedIn post discussing RAG systems? I am particularly interested in how RAG works and how it is integrated with vector DBs and LLMs.\"}'"

	# Infrastructure
	## Local infrastructure
	local-docker-infrastructure-up = "docker compose up -d"
	local-docker-infrastructure-down = "docker compose stop"
	local-zenml-server-down = "poetry run zenml down"
	local-infrastructure-up = [
	"local-docker-infrastructure-up",
	"local-zenml-server-down",
	"local-zenml-server-up",
	]
	local-infrastructure-down = [
	"local-docker-infrastructure-down",
	"local-zenml-server-down",
	]
	set-local-stack = "poetry run zenml stack set default"
	set-aws-stack = "poetry run zenml stack set aws-stack"
	set-asynchronous-runs = "poetry run zenml orchestrator update aws-stack --synchronous=False"
	zenml-server-disconnect = "poetry run zenml disconnect"

	## Settings
	export-settings-to-zenml = "poetry run python -m tools.run --export-settings"
	delete-settings-zenml = "poetry run zenml secret delete settings"

	## SageMaker
	create-sagemaker-role = "poetry run python -m llm_engineering.infrastructure.aws.roles.create_sagemaker_role"
	create-sagemaker-execution-role = "poetry run python -m llm_engineering.infrastructure.aws.roles.create_execution_role"
	deploy-inference-endpoint = "poetry run python -m llm_engineering.infrastructure.aws.deploy.huggingface.run"
	test-sagemaker-endpoint = "poetry run python -m llm_engineering.model.inference.test"
	delete-inference-endpoint = "poetry run python -m llm_engineering.infrastructure.aws.deploy.delete_sagemaker_endpoint"

	## Docker
	build-docker-image = "docker buildx build --platform linux/amd64 -t llmtwin -f Dockerfile ."
	run-docker-end-to-end-data-pipeline = "docker run --rm --network host --shm-size=2g --env-file .env llmtwin poetry poe --no-cache --run-end-to-end-data"
	bash-docker-container = "docker run --rm -it --network host --env-file .env llmtwin bash"

	# QA
	lint-check = "poetry run ruff check ."
	format-check = "poetry run ruff format --check ."
	lint-check-docker = "sh -c 'docker run --rm -i hadolint/hadolint < Dockerfile'"
	gitleaks-check = "docker run -v .:/src zricethezav/gitleaks:latest dir /src/llm_engineering"
	lint-fix = "poetry run ruff check --fix ."
	format-fix = "poetry run ruff format ."

	[tool.poe.tasks.local-zenml-server-up]
	control.expr = "sys.platform"

	[[tool.poe.tasks.local-zenml-server-up.switch]]
	case = "darwin"
	env = { OBJC_DISABLE_INITIALIZE_FORK_SAFETY = "YES" }
	cmd = "poetry run zenml up"

	[[tool.poe.tasks.local-zenml-server-up.switch]]
	cmd = "poetry run zenml up"

	# Tests
	[tool.poe.tasks.test]
	cmd = "poetry run pytest tests/"
	env = { ENV_FILE = ".env.testing" }