diff --git a/.DEBIAN/conffiles b/.DEBIAN/conffiles index 008d731..cd8fdd7 100644 --- a/.DEBIAN/conffiles +++ b/.DEBIAN/conffiles @@ -1 +1,2 @@ /etc/fastapi-dls/env +/etc/systemd/system/fastapi-dls.service diff --git a/.DEBIAN/env.default b/.DEBIAN/env.default new file mode 100644 index 0000000..835f29e --- /dev/null +++ b/.DEBIAN/env.default @@ -0,0 +1,27 @@ +# Toggle debug mode +#DEBUG=false + +# Where the client can find the DLS server +DLS_URL=127.0.0.1 +DLS_PORT=443 + +# CORS configuration +## comma separated list without spaces +#CORS_ORIGINS="https://$DLS_URL:$DLS_PORT" + +# Lease expiration in days +LEASE_EXPIRE_DAYS=90 +LEASE_RENEWAL_PERIOD=0.2 + +# Database location +## https://docs.sqlalchemy.org/en/14/core/engines.html +DATABASE=sqlite:////etc/fastapi-dls/db.sqlite + +# UUIDs for identifying the instance +#SITE_KEY_XID="00000000-0000-0000-0000-000000000000" +#INSTANCE_REF="10000000-0000-0000-0000-000000000001" +#ALLOTMENT_REF="20000000-0000-0000-0000-000000000001" + +# Site-wide signing keys +INSTANCE_KEY_RSA=/etc/fastapi-dls/instance.private.pem +INSTANCE_KEY_PUB=/etc/fastapi-dls/instance.public.pem diff --git a/.DEBIAN/fastapi-dls.service b/.DEBIAN/fastapi-dls.service new file mode 100644 index 0000000..368d494 --- /dev/null +++ b/.DEBIAN/fastapi-dls.service @@ -0,0 +1,25 @@ +[Unit] +Description=Service for fastapi-dls +Documentation=https://git.collinwebdesigns.de/oscar.krause/fastapi-dls +After=network.target + +[Service] +User=www-data +Group=www-data +AmbientCapabilities=CAP_NET_BIND_SERVICE +WorkingDirectory=/usr/share/fastapi-dls/app +EnvironmentFile=/etc/fastapi-dls/env +ExecStart=uvicorn main:app \ + --env-file /etc/fastapi-dls/env \ + --host $DLS_URL --port $DLS_PORT \ + --app-dir /usr/share/fastapi-dls/app \ + --ssl-keyfile /etc/fastapi-dls/webserver.key \ + --ssl-certfile /etc/fastapi-dls/webserver.crt \ + --proxy-headers +Restart=always +KillSignal=SIGQUIT +Type=simple +NotifyAccess=all + +[Install] +WantedBy=multi-user.target diff --git a/.DEBIAN/postinst b/.DEBIAN/postinst index d4ceee0..fbf9b82 100644 --- a/.DEBIAN/postinst +++ b/.DEBIAN/postinst @@ -3,89 +3,26 @@ WORKING_DIR=/usr/share/fastapi-dls CONFIG_DIR=/etc/fastapi-dls -echo "> Create config directory ..." -mkdir -p $CONFIG_DIR - -# normally we would define services in `conffiles` and as separate file, but we like to keep thinks simple. -echo "> Install service ..." -cat </etc/systemd/system/fastapi-dls.service -[Unit] -Description=Service for fastapi-dls -Documentation=https://git.collinwebdesigns.de/oscar.krause/fastapi-dls -After=network.target - -[Service] -User=www-data -Group=www-data -AmbientCapabilities=CAP_NET_BIND_SERVICE -WorkingDirectory=$WORKING_DIR/app -EnvironmentFile=$CONFIG_DIR/env -ExecStart=uvicorn main:app \\ - --env-file /etc/fastapi-dls/env \\ - --host \$DLS_URL --port \$DLS_PORT \\ - --app-dir $WORKING_DIR/app \\ - --ssl-keyfile /etc/fastapi-dls/webserver.key \\ - --ssl-certfile /etc/fastapi-dls/webserver.crt \\ - --proxy-headers -Restart=always -KillSignal=SIGQUIT -Type=simple -NotifyAccess=all - -[Install] -WantedBy=multi-user.target - -EOF - -systemctl daemon-reload - -# normally we would define configfiles in `conffiles` and as separate file, but we like to keep thinks simple. -if [[ ! -f $CONFIG_DIR/env ]]; then - echo "> Writing initial config ..." - touch $CONFIG_DIR/env - cat <$CONFIG_DIR/env -# Toggle debug mode -#DEBUG=false - -# Where the client can find the DLS server -DLS_URL=127.0.0.1 -DLS_PORT=443 - -# CORS configuration -## comma separated list without spaces -#CORS_ORIGINS="https://$DLS_URL:$DLS_PORT" - -# Lease expiration in days -LEASE_EXPIRE_DAYS=90 - -# Database location -## https://docs.sqlalchemy.org/en/14/core/engines.html -DATABASE=sqlite:///$CONFIG_DIR/db.sqlite - -# UUIDs for identifying the instance -#SITE_KEY_XID="00000000-0000-0000-0000-000000000000" -#INSTANCE_REF="00000000-0000-0000-0000-000000000000" - -# Site-wide signing keys -INSTANCE_KEY_RSA=$CONFIG_DIR/instance.private.pem -INSTANCE_KEY_PUB=$CONFIG_DIR/instance.public.pem - -EOF +if [[ ! -f $CONFIG_DIR/instance.private.pem ]]; then + echo "> Create dls-instance keypair ..." + openssl genrsa -out $CONFIG_DIR/instance.private.pem 2048 + openssl rsa -in $CONFIG_DIR/instance.private.pem -outform PEM -pubout -out $CONFIG_DIR/instance.public.pem +else + echo "> Create dls-instance keypair skipped! (exists)" fi -echo "> Create dls-instance keypair ..." -openssl genrsa -out $CONFIG_DIR/instance.private.pem 2048 -openssl rsa -in $CONFIG_DIR/instance.private.pem -outform PEM -pubout -out $CONFIG_DIR/instance.public.pem - while true; do - read -p "> Do you wish to create self-signed webserver certificate? [Y/n]" yn - yn=${yn:-y} # ${parameter:-word} If parameter is unset or null, the expansion of word is substituted. Otherwise, the value of parameter is substituted. + [[ -f $CONFIG_DIR/webserver.key ]] && default_answer="N" || default_answer="Y" + [[ $default_answer == "Y" ]] && V="Y/n" || V="y/N" + read -p "> Do you wish to create self-signed webserver certificate? [${V}]" yn + yn=${yn:-$default_answer} # ${parameter:-word} If parameter is unset or null, the expansion of word is substituted. Otherwise, the value of parameter is substituted. case $yn in [Yy]*) + echo "> Generating keypair ..." openssl req -x509 -nodes -days 3650 -newkey rsa:2048 -keyout $CONFIG_DIR/webserver.key -out $CONFIG_DIR/webserver.crt break ;; - [Nn]*) break ;; + [Nn]*) echo "> Generating keypair skipped! (exists)"; break ;; *) echo "Please answer [y] or [n]." ;; esac done @@ -115,7 +52,7 @@ cat </cert/instance.private.pem` | Site-wide private RSA key for singing JWTs | -| `INSTANCE_KEY_PUB` | `/cert/instance.public.pem` | Site-wide public key | +| Variable | Default | Usage | +|------------------------|----------------------------------------|------------------------------------------------------------------------------------------------------| +| `DEBUG` | `false` | Toggles `fastapi` debug mode | +| `DLS_URL` | `localhost` | Used in client-token to tell guest driver where dls instance is reachable | +| `DLS_PORT` | `443` | Used in client-token to tell guest driver where dls instance is reachable | +| `TOKEN_EXPIRE_DAYS` | `1` | Client auth-token validity (used for authenticate client against api, **not `.tok` file!**) | +| `LEASE_EXPIRE_DAYS` | `90` | Lease time in days | +| `LEASE_RENEWAL_PERIOD` | `0.15` | The percentage of the lease period that must elapse before a licensed client can renew a license \*1 | +| `DATABASE` | `sqlite:///db.sqlite` | See [official SQLAlchemy docs](https://docs.sqlalchemy.org/en/14/core/engines.html) | +| `CORS_ORIGINS` | `https://{DLS_URL}` | Sets `Access-Control-Allow-Origin` header (comma separated string) \*2 | +| `SITE_KEY_XID` | `00000000-0000-0000-0000-000000000000` | Site identification uuid | +| `INSTANCE_REF` | `10000000-0000-0000-0000-000000000001` | Instance identification uuid | +| `ALLOTMENT_REF` | `20000000-0000-0000-0000-000000000001` | Allotment identification uuid | +| `INSTANCE_KEY_RSA` | `/cert/instance.private.pem` | Site-wide private RSA key for singing JWTs \*3 | +| `INSTANCE_KEY_PUB` | `/cert/instance.public.pem` | Site-wide public key \*3 | -\* Always use `https`, since guest-drivers only support secure connections! +\*1 For example, if the lease period is one day and the renewal period is 20%, the client attempts to renew its license +every 4.8 hours. If network connectivity is lost, the loss of connectivity is detected during license renewal and the +client has 19.2 hours in which to re-establish connectivity before its license expires. + +\*2 Always use `https`, since guest-drivers only support secure connections! + +\*3 If you recreate instance keys you need to **recreate client-token for each guest**! # Setup (Client) @@ -308,7 +262,7 @@ Successfully tested with this package versions: ## Linux ```shell -curl --insecure -X GET https:///client-token -o /etc/nvidia/ClientConfigToken/client_configuration_token.tok +curl --insecure -L -X GET https:///client-token -o /etc/nvidia/ClientConfigToken/client_configuration_token_$(date '+%d-%m-%Y-%H-%M-%S').tok service nvidia-gridd restart nvidia-smi -q | grep "License" ``` @@ -321,13 +275,73 @@ Now restart `NvContainerLocalSystem` service. **Power-Shell** ```Shell -curl.exe --insecure -X GET https:///client-token -o "C:\Program Files\NVIDIA Corporation\vGPU Licensing\ClientConfigToken\client_configuration_token_$($(Get-Date).tostring('dd-MM-yy-hh-mm-ss')).tok" +curl.exe --insecure -L -X GET https:///client-token -o "C:\Program Files\NVIDIA Corporation\vGPU Licensing\ClientConfigToken\client_configuration_token_$($(Get-Date).tostring('dd-MM-yy-hh-mm-ss')).tok" Restart-Service NVDisplay.ContainerLocalSystem 'C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe' -q | Select-String "License" ``` +## Endpoints + +### `GET /` + +Redirect to `/-/readme`. + +### `GET /-/health` + +Status endpoint, used for *healthcheck*. + +### `GET /-/config` + +Shows current runtime environment variables and their values. + +### `GET /-/readme` + +HTML rendered README.md. + +### `GET /-/docs`, `GET /-/redoc` + +OpenAPI specifications rendered from `GET /-/openapi.json`. + +### `GET /-/manage` + +Shows a very basic UI to delete origins or leases. + +### `GET /-/origins?leases=false` + +List registered origins. + +| Query Parameter | Default | Usage | +|-----------------|---------|--------------------------------------| +| `leases` | `false` | Include referenced leases per origin | + +### `DELETE /-/origins` + +Deletes all origins and their leases. + +### `GET /-/leases?origin=false` + +List current leases. + +| Query Parameter | Default | Usage | +|-----------------|---------|-------------------------------------| +| `origin` | `false` | Include referenced origin per lease | + +### `DELETE /-/lease/{lease_ref}` + +Deletes an lease. + +### `GET /-/client-token` + +Generate client token, (see [installation](#installation)). + +### Others + +There are many other internal api endpoints for handling authentication and lease process. + # Troubleshoot +**Please make sure that fastapi-dls and your guests are on the same timezone!** + ## Linux Logs are available with `journalctl -u nvidia-gridd -f`. @@ -346,6 +360,9 @@ This message can be ignored. - Ref. https://github.com/encode/uvicorn/issues/441 +
+ Log example + ``` WARNING:uvicorn.error:Invalid HTTP request received. Traceback (most recent call last): @@ -364,6 +381,8 @@ Traceback (most recent call last): h11._util.RemoteProtocolError: no request line received ``` +
+ ## Windows ### Required cipher on Windows Guests (e.g. managed by domain controller with GPO) @@ -431,6 +450,38 @@ Dec 20 17:53:34 ubuntu-grid-server nvidia-gridd[10354]: License acquired success +### Error on releasing leases on shutdown (can be ignored and/or fixed with reverse proxy) + +The driver wants to release current leases on shutting down windows. This endpoint needs to be a http endpoint. +The error message can safely be ignored (since we have no license limitation :P) and looks like this: + +
+ Log example + +``` +<1>:NLS initialized +<1>:License acquired successfully. (Info: 192.168.178.110, NVIDIA RTX Virtual Workstation; Expiry: 2023-3-30 23:0:22 GMT) +<0>:Failed to return license to 192.168.178.110 (Error: Generic network communication failure) +<0>:End Logging +``` + +#### log with nginx as reverse proxy (see [docker-compose.yml](docker-compose.yml)) + +``` +<1>:NLS initialized +<2>:NLS initialized +<1>:Valid GRID license not found. GPU features and performance will be fully degraded. To enable full functionality please configure licensing details. +<1>:License acquired successfully. (Info: 192.168.178.33, NVIDIA RTX Virtual Workstation; Expiry: 2023-1-4 16:48:20 GMT) +<2>:Valid GRID license not found. GPU features and performance will be fully degraded. To enable full functionality please configure licensing details. +<2>:License acquired successfully from local trusted store. (Info: 192.168.178.33, NVIDIA RTX Virtual Workstation; Expiry: 2023-1-4 16:48:20 GMT) +<2>:End Logging +<1>:End Logging +<0>:License returned successfully. (Info: 192.168.178.33) +<0>:End Logging +``` + +
+ # Credits Thanks to vGPU community and all who uses this project and report bugs. diff --git a/app/main.py b/app/main.py index 63df4f4..573cb42 100644 --- a/app/main.py +++ b/app/main.py @@ -6,16 +6,16 @@ from os.path import join, dirname from os import getenv as env from dotenv import load_dotenv -from fastapi import FastAPI, HTTPException +from fastapi import FastAPI from fastapi.requests import Request -import json +from json import loads as json_loads from datetime import datetime from dateutil.relativedelta import relativedelta from calendar import timegm -from jose import jws, jwk, jwt +from jose import jws, jwk, jwt, JWTError from jose.constants import ALGORITHMS from starlette.middleware.cors import CORSMiddleware -from starlette.responses import StreamingResponse, JSONResponse, HTMLResponse, Response, RedirectResponse +from starlette.responses import StreamingResponse, JSONResponse as JSONr, HTMLResponse as HTMLr, Response, RedirectResponse from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker @@ -32,14 +32,17 @@ app = FastAPI(title='FastAPI-DLS', description='Minimal Delegated License Servic db = create_engine(str(env('DATABASE', 'sqlite:///db.sqlite'))) db_init(db), migrate(db) +# everything prefixed with "INSTANCE_*" is used as "SERVICE_INSTANCE_*" or "SI_*" in official dls service DLS_URL = str(env('DLS_URL', 'localhost')) DLS_PORT = int(env('DLS_PORT', '443')) SITE_KEY_XID = str(env('SITE_KEY_XID', '00000000-0000-0000-0000-000000000000')) -INSTANCE_REF = str(env('INSTANCE_REF', '00000000-0000-0000-0000-000000000000')) +INSTANCE_REF = str(env('INSTANCE_REF', '10000000-0000-0000-0000-000000000001')) +ALLOTMENT_REF = str(env('ALLOTMENT_REF', '20000000-0000-0000-0000-000000000001')) INSTANCE_KEY_RSA = load_key(str(env('INSTANCE_KEY_RSA', join(dirname(__file__), 'cert/instance.private.pem')))) INSTANCE_KEY_PUB = load_key(str(env('INSTANCE_KEY_PUB', join(dirname(__file__), 'cert/instance.public.pem')))) -TOKEN_EXPIRE_DELTA = relativedelta(hours=1) # days=1 -LEASE_EXPIRE_DELTA = relativedelta(days=int(env('LEASE_EXPIRE_DAYS', 90))) +TOKEN_EXPIRE_DELTA = relativedelta(days=int(env('TOKEN_EXPIRE_DAYS', 1)), hours=int(env('TOKEN_EXPIRE_HOURS', 0))) +LEASE_EXPIRE_DELTA = relativedelta(days=int(env('LEASE_EXPIRE_DAYS', 90)), hours=int(env('LEASE_EXPIRE_HOURS', 0))) +LEASE_RENEWAL_PERIOD = float(env('LEASE_RENEWAL_PERIOD', 0.15)) CORS_ORIGINS = str(env('CORS_ORIGINS', '')).split(',') if (env('CORS_ORIGINS')) else [f'https://{DLS_URL}'] jwt_encode_key = jwk.construct(INSTANCE_KEY_RSA.export_key().decode('utf-8'), algorithm=ALGORITHMS.RS256) @@ -68,11 +71,6 @@ async def index(): return RedirectResponse('/-/readme') -@app.get('/status', summary='* Status', description='returns current service status, version (incl. git-commit) and some variables.', deprecated=True) -async def status(request: Request): - return JSONResponse({'status': 'up', 'version': VERSION, 'commit': COMMIT, 'debug': DEBUG}) - - @app.get('/-/', summary='* Index') async def _index(): return RedirectResponse('/-/readme') @@ -80,14 +78,32 @@ async def _index(): @app.get('/-/health', summary='* Health') async def _health(request: Request): - return JSONResponse({'status': 'up', 'version': VERSION, 'commit': COMMIT, 'debug': DEBUG}) + return JSONr({'status': 'up'}) + + +@app.get('/-/config', summary='* Config', description='returns environment variables.') +async def _config(): + return JSONr({ + 'VERSION': str(VERSION), + 'COMMIT': str(COMMIT), + 'DEBUG': str(DEBUG), + 'DLS_URL': str(DLS_URL), + 'DLS_PORT': str(DLS_PORT), + 'SITE_KEY_XID': str(SITE_KEY_XID), + 'INSTANCE_REF': str(INSTANCE_REF), + 'ALLOTMENT_REF': [str(ALLOTMENT_REF)], + 'TOKEN_EXPIRE_DELTA': str(TOKEN_EXPIRE_DELTA), + 'LEASE_EXPIRE_DELTA': str(LEASE_EXPIRE_DELTA), + 'LEASE_RENEWAL_PERIOD': str(LEASE_RENEWAL_PERIOD), + 'CORS_ORIGINS': str(CORS_ORIGINS), + }) @app.get('/-/readme', summary='* Readme') async def _readme(): from markdown import markdown content = load_file('../README.md').decode('utf-8') - return HTMLResponse(markdown(text=content, extensions=['tables', 'fenced_code', 'md_in_html', 'nl2br', 'toc'])) + return HTMLr(markdown(text=content, extensions=['tables', 'fenced_code', 'md_in_html', 'nl2br', 'toc'])) @app.get('/-/manage', summary='* Management UI') @@ -99,14 +115,18 @@ async def _manage(request: Request): FastAPI-DLS Management - +