wip: not ready for prime time yet...

This commit is contained in:
Régis Behmo 2021-04-16 10:54:08 +02:00
parent e7e6024b15
commit 0be4960ab3
13 changed files with 391 additions and 49 deletions

View File

@ -13,8 +13,22 @@ TODO:
- make database name a tutor config
- make clickhouse host a tutor config
- specify TTL for tables?
- don't connect with default user, but use a dedicated openedx user
- rename database to "openedx"
- set permissions for each course/org: one datasource per org/course???
- how to handle migrations?
- prevent access to the full tracking message in the tracking table
- Expose data with redash
- Provision dashboards
- Provision dashboards
- Custom users
- Expose grades
- Reproduce dashboards from https://edx.readthedocs.io/projects/edx-insights/en/latest/Overview.html
- prevent users from running TRUNCATE from redash
- frontend user creation:
- generate random frontend user password in "tutor vision frontend createuser"
- create root users
- add delete user command
- add users to shared openedx organization
- Utility tools for authentication
- Kubernetes compatibility
- Sweet readme
@ -34,14 +48,41 @@ Usage
tutor plugins enable vision
tutor local quickstart
To access the analytics frontend, open http(s)://vision.<YOUR_LMS_HOST> in your browser. When running locally, this will be http://vision.local.overhang.io. The email address and password required for logging in are::
Create a root user to access the frontend::
tutor config printvalue VISION_REDASH_ROOT_EMAIL
tutor config printvalue VISION_REDASH_ROOT_PASSWORD
# You will be prompted for your password
tutor vision frontend createuser --root admin admin@youremail.com
Grant this user access to all data::
tutor vision datalake createuser admin
tutor vision datalake setpermissions admin
You can then access the frontend with the user credentials you just created. Open http(s)://vision.<YOUR_LMS_HOST> in your browser. When running locally, this will be http://vision.local.overhang.io.
Management
----------
To add a new, non-admin user::
# Create a datalake user
tutor vision datalake createuser yourusername
# Remember to restrict access, otherwise the new user will have access to everything
tutor vision datalake setpermissions --course-id 'course-v1:edX+DemoX+Demo_Course' yourusername
# Create a corresponding user on the frontend
tutor vision frontend createuser yourusername yourusername@youremail.com
Development
-----------
To explore the clickhouse database as root, run::
tutor local run vision-clickhouse clickhouse-client --host vision-clickhouse \
--database $(tutor config printvalue VISION_CLICKHOUSE_DATABASE) \
--user $(tutor config printvalue VISION_CLICKHOUSE_USERNAME) \
--password $(tutor config printvalue VISION_CLICKHOUSE_PASSWORD)
To reload Vector configuration after changes to vector.toml, run::
tutor config save && tutor local exec vision-vector sh kill -s HUP

121
tutorvision/cli.py Normal file
View File

@ -0,0 +1,121 @@
import click
from tutor import config as tutor_config
from tutor.commands.compose import ComposeJobRunner
from tutor.commands.local import docker_compose as local_docker_compose
@click.group(help="Manage your Vision platform")
def vision_command():
pass
@click.group(help="Manage datalake access")
def datalake():
pass
@click.command(name="createuser", help="Create new user or update existing one")
@click.argument("username")
@click.pass_obj
def datalake_createuser(context, username):
run_datalake_query(context.root, f"CREATE USER OR REPLACE {username}")
@click.command(name="setpermissions", help="Restrict user access")
@click.argument("username")
@click.option(
"-c",
"--course-id",
"course_ids",
multiple=True,
help=(
"Grant access to a course data. This option may be used multiple times to grant "
"access to multiple courses."
),
)
@click.pass_obj
def datalake_setpermissions(context, username, course_ids):
condition = "1"
if course_ids:
condition = " OR ".join(
[
"course_id = '{course_id}'".format(course_id=course_id)
for course_id in course_ids
]
)
query = f"""
GRANT SELECT ON events TO {username};
CREATE ROW POLICY OR REPLACE {username} ON events AS RESTRICTIVE FOR SELECT USING {condition} TO {username};
GRANT SELECT ON coursegrades TO {username};
CREATE ROW POLICY OR REPLACE {username} ON coursegrades AS RESTRICTIVE FOR SELECT USING {condition} TO {username};
GRANT SELECT ON courseenrollments TO {username};
CREATE ROW POLICY OR REPLACE {username} ON courseenrollments AS RESTRICTIVE FOR SELECT USING {condition} TO {username};
"""
run_datalake_query(context.root, query)
def run_datalake_query(root, query):
config = tutor_config.load(root)
command = f"""clickhouse client \
--host={config["VISION_CLICKHOUSE_HOST"]} \
--port={config["VISION_CLICKHOUSE_PORT"]} \
--user={config["VISION_CLICKHOUSE_USERNAME"]} \
--password={config["VISION_CLICKHOUSE_PASSWORD"]} \
--database={config["VISION_CLICKHOUSE_DATABASE"]} \
--multiline --multiquery \
--query "{query}"
"""
runner = ComposeJobRunner(root, config, local_docker_compose)
runner.run_job("vision-clickhouse", command)
@click.group(name="frontend", help="Manage the frontend access")
def frontend_command():
pass
@click.command(name="createuser", help="Create a new user to access the frontend")
@click.option(
"-p",
"--password",
default="",
prompt="User password",
hide_input=True,
confirmation_prompt=True,
help="User password: if undefined you will be prompted to input a password",
)
@click.option(
"-r",
"--root",
"is_root",
is_flag=True,
default=False,
help="Grant root/administration privileges on the frontend to this user",
)
@click.argument("username")
@click.argument("email")
@click.pass_obj
def frontend_createuser(context, password, is_root, username, email):
config = tutor_config.load(context.root)
config.update(
{
"password": password,
"is_root": is_root,
"username": username,
"email": email,
}
)
runner = ComposeJobRunner(context.root, config, local_docker_compose)
runner.run_job_from_template(
"vision-redash", "vision", "hooks", "vision-redash", "createuser"
)
datalake.add_command(datalake_createuser)
datalake.add_command(datalake_setpermissions)
vision_command.add_command(datalake)
frontend_command.add_command(frontend_createuser)
vision_command.add_command(frontend_command)

View File

@ -1,10 +1,12 @@
vision-clickhouse-job:
image: {{ VISION_CLICKHOUSE_DOCKER_IMAGE }}
depends_on: {{ [("vision-clickhouse", VISION_RUN_CLICKHOUSE)]|list_if }}
volumes:
- ../plugins/vision/apps/clickhouse/migrations.d/:/etc/clickhouse-server/migrations.d/:ro
vision-redash-job:
image: {{ VISION_REDASH_DOCKER_IMAGE }}
command: create_db
env_file: ../plugins/vision/apps/redash/env
depends_on:
- vision-postgres
- vision-postgresql
- vision-redis

View File

@ -16,6 +16,7 @@ vision-clickhouse:
image: {{ VISION_CLICKHOUSE_DOCKER_IMAGE }}
volumes:
- ../../data/vision/clickhouse:/var/lib/clickhouse
- ../plugins/vision/apps/clickhouse/users.d/vision.xml:/etc/clickhouse-server/users.d/vision.xml:ro
ulimits:
nofile:
soft: 262144
@ -23,6 +24,8 @@ vision-clickhouse:
restart: unless-stopped
{% endif %}
# TODO make sure that we run the right services for redash 9.0 https://github.com/getredash/redash/blob/master/CHANGELOG.md
# https://github.com/getredash/setup/blob/master/data/docker-compose.yml
# frontend
vision-redash-server:
image: {{ VISION_REDASH_DOCKER_IMAGE }}
@ -32,7 +35,7 @@ vision-redash-server:
REDASH_WEB_WORKERS: 4
restart: unless-stopped
depends_on:
- vision-postgres
- vision-postgresql
- vision-redis
vision-redash-scheduler:
image: {{ VISION_REDASH_DOCKER_IMAGE }}
@ -43,7 +46,7 @@ vision-redash-scheduler:
WORKERS_COUNT: 1
restart: unless-stopped
depends_on:
- vision-postgres
- vision-postgresql
- vision-redis
vision-redash-scheduled-worker:
image: {{ VISION_REDASH_DOCKER_IMAGE }}
@ -54,7 +57,7 @@ vision-redash-scheduled-worker:
WORKERS_COUNT: 1
restart: unless-stopped
depends_on:
- vision-postgres
- vision-postgresql
- vision-redis
vision-redash-adhoc-worker:
image: {{ VISION_REDASH_DOCKER_IMAGE }}
@ -65,17 +68,17 @@ vision-redash-adhoc-worker:
WORKERS_COUNT: 2
restart: unless-stopped
depends_on:
- vision-postgres
- vision-postgresql
- vision-redis
vision-redis:
image: docker.io/redis:5.0-alpine
restart: unless-stopped
vision-postgres:
vision-postgresql:
image: docker.io/postgres:9.6-alpine
environment:
POSTGRES_USER: "{{ VISION_REDASH_POSTGRESQL_USER }}"
POSTGRES_PASSWORD: "{{ VISION_REDASH_POSTGRESQL_PASSWORD }}"
POSTGRES_DB: "{{ VISION_REDASH_POSTGRESQL_DB }}"
POSTGRES_USER: "{{ VISION_POSTGRESQL_USER }}"
POSTGRES_PASSWORD: "{{ VISION_POSTGRESQL_PASSWORD }}"
POSTGRES_DB: "{{ VISION_POSTGRESQL_DB }}"
volumes:
- ../../data/vision/redash/postgres:/var/lib/postgresql/data
restart: unless-stopped
- ../../data/vision/postgresql:/var/lib/postgresql/data
restart: unless-stopped

View File

@ -2,6 +2,7 @@ from glob import glob
import os
from .__about__ import __version__
from .cli import vision_command
HERE = os.path.abspath(os.path.dirname(__file__))
@ -9,28 +10,31 @@ templates = os.path.join(HERE, "templates")
config = {
"add": {
"REDASH_POSTGRESQL_PASSWORD": "{{ 20|random_string }}",
"CLICKHOUSE_PASSWORD": "{{ 20|random_string }}",
"POSTGRESQL_PASSWORD": "{{ 20|random_string }}",
"REDASH_COOKIE_SECRET": "{{ 20|random_string }}",
"REDASH_ROOT_PASSWORD": "{{ 20|random_string }}",
"REDASH_PASSWORD": "{{ 20|random_string }}",
"REDASH_SECRET_KEY": "{{ 20|random_string }}",
},
"defaults": {
"CLICKHOUSE_DOCKER_IMAGE": "docker.io/yandex/clickhouse-server:20.8.14.4",
"CLICKHOUSE_DOCKER_IMAGE": "docker.io/yandex/clickhouse-server:21.2.7.11",
"RUN_CLICKHOUSE": True,
"CLICKHOUSE_HOST": "vision-clickhouse",
"CLICKHOUSE_HTTP_PORT": 8123,
"CLICKHOUSE_PORT": 9000,
"CLICKHOUSE_DATABASE": "openedx_{{ ID }}",
"REDASH_DOCKER_IMAGE": "docker.io/redash/redash:8.0.0.b32245",
"REDASH_POSTGRESQL_USER": "redash",
"REDASH_POSTGRESQL_DB": "redash",
"CLICKHOUSE_DATABASE": "openedx",
"CLICKHOUSE_USERNAME": "openedx",
"POSTGRESQL_USER": "redash",
"POSTGRESQL_DB": "redash",
"REDASH_DOCKER_IMAGE": "docker.io/redash/redash:9.0.0-beta.b42121",
"REDASH_HOST": "vision.{{ LMS_HOST }}",
"REDASH_ROOT_USERNAME": "admin",
"REDASH_ROOT_EMAIL": "{{ CONTACT_EMAIL }}",
"REDASH_USERNAME": "admin",
"REDASH_EMAIL": "{{ CONTACT_EMAIL }}",
},
}
hooks = {"init": ["vision-clickhouse", "vision-redash"]}
command = vision_command
def patches():

View File

@ -0,0 +1,30 @@
CREATE TABLE tracking
(
`time` DateTime,
`message` String,
) ENGINE MergeTree
ORDER BY time;
CREATE TABLE events
(
`time` DateTime,
`message` String,
`name` String,
`course_id` String,
`user_id` Int64,
`event_source` String
)
ENGINE MergeTree
ORDER BY time;
CREATE MATERIALIZED VIEW events_mv TO events AS
SELECT
time,
JSONExtractString(message, 'name') AS name,
JSONExtract(message, 'context', 'course_id', 'String') AS course_id,
JSONExtract(message, 'context', 'user_id', 'Int64') AS user_id,
JSONExtractString(message, 'event_source') AS event_source
FROM tracking;
-- Grant everyone access to the view
CREATE ROW POLICY common ON events FOR SELECT USING 1 TO ALL;

View File

@ -0,0 +1,11 @@
CREATE TABLE coursegrades
(
`percent_grade` Double,
`passed_timestamp` DateTime NULL,
`user_id` UInt64,
`course_id` String
)
ENGINE = MySQL('{{ MYSQL_HOST }}:{{ MYSQL_PORT }}', '{{ OPENEDX_MYSQL_DATABASE }}', 'grades_persistentcoursegrade', '{{ OPENEDX_MYSQL_USERNAME }}', '{{ OPENEDX_MYSQL_PASSWORD }}');
-- Grant everyone access to the view
CREATE ROW POLICY common ON coursegrades FOR SELECT USING 1 TO ALL;

View File

@ -0,0 +1,37 @@
CREATE TABLE openedx_courseenrollments
(
`created` DateTime NULL,
`user_id` UInt64,
`course_id` String,
`is_active` UInt8,
`mode` String
)
ENGINE = MySQL('{{ MYSQL_HOST }}:{{ MYSQL_PORT }}', '{{ OPENEDX_MYSQL_DATABASE }}', 'student_courseenrollment', '{{ OPENEDX_MYSQL_USERNAME }}', '{{ OPENEDX_MYSQL_PASSWORD }}');
CREATE TABLE openedx_userprofiles
(
`user_id` UInt64,
`year_of_birth` UInt32,
`gender` String,
`level_of_education` String,
`city` String,
`state` String,
`country` String
)
ENGINE = MySQL('{{ MYSQL_HOST }}:{{ MYSQL_PORT }}', '{{ OPENEDX_MYSQL_DATABASE }}', 'auth_userprofile', '{{ OPENEDX_MYSQL_USERNAME }}', '{{ OPENEDX_MYSQL_PASSWORD }}');
CREATE LIVE VIEW courseenrollments WITH PERIODIC REFRESH 30 AS
SELECT
openedx_courseenrollments.course_id AS course_id,
openedx_courseenrollments.user_id AS user_id,
openedx_userprofiles.year_of_birth AS user_year_of_birth,
openedx_userprofiles.gender AS user_gender,
openedx_userprofiles.level_of_education AS user_level_of_education,
openedx_userprofiles.city AS user_city,
openedx_userprofiles.state AS user_state,
openedx_userprofiles.country AS user_country
FROM openedx_courseenrollments
INNER JOIN openedx_userprofiles ON openedx_courseenrollments.user_id = openedx_userprofiles.user_id;
-- Grant everyone access to the view
CREATE ROW POLICY common ON courseenrollments FOR SELECT USING 1 TO ALL;

View File

@ -0,0 +1,9 @@
<?xml version="1.0"?>
<yandex>
<users>
<{{ VISION_CLICKHOUSE_USERNAME }}>
<password>{{ VISION_CLICKHOUSE_PASSWORD }}</password>
<access_management>1</access_management>
</{{ VISION_CLICKHOUSE_USERNAME }}>
</users>
</yandex>

View File

@ -3,7 +3,7 @@ REDASH_LOG_LEVEL=INFO
REDASH_REDIS_URL=redis://vision-redis:6379/0
REDASH_COOKIE_SECRET="{{ VISION_REDASH_COOKIE_SECRET }}"
REDASH_SECRET_KEY="{{ VISION_REDASH_SECRET_KEY }}"
REDASH_DATABASE_URL="postgresql://{{ VISION_REDASH_POSTGRESQL_USER }}:{{ VISION_REDASH_POSTGRESQL_PASSWORD }}@vision-postgres/{{ VISION_REDASH_POSTGRESQL_DB }}"
REDASH_DATABASE_URL="postgresql://{{ VISION_POSTGRESQL_USER }}:{{ VISION_POSTGRESQL_PASSWORD }}@vision-postgresql/{{ VISION_POSTGRESQL_DB }}"
REDASH_MAIL_SERVER="{{ SMTP_HOST }}"
REDASH_MAIL_PORT="{{ SMTP_PORT }}"
REDASH_MAIL_USE_TLS="{{ SMTP_USE_TLS }}"

View File

@ -1,21 +1,44 @@
clickhouse-client --host {{ VISION_CLICKHOUSE_HOST }} --port {{ VISION_CLICKHOUSE_PORT }} \
--query "CREATE DATABASE IF NOT EXISTS {{ VISION_CLICKHOUSE_DATABASE }}"
clickhouse_client_base() {
clickhouse client --host {{ VISION_CLICKHOUSE_HOST }} --port {{ VISION_CLICKHOUSE_PORT }} \
--user {{ VISION_CLICKHOUSE_USERNAME }} \
--password {{ VISION_CLICKHOUSE_PASSWORD }} "$@"
}
clickhouse_client() {
clickhouse_client_base --database={{ VISION_CLICKHOUSE_DATABASE }} "$@"
}
clickhouse_client_query() {
clickhouse_client --query "$1"
}
clickhouse_client_file() {
clickhouse_client --multiquery --multiline < "$1"
}
run_migration() {
migration_name=$(basename "$1")
echo -n "Applying migration $migration_name... "
is_applied=$(clickhouse_client_query "SELECT 'applied' FROM migrations WHERE name='$migration_name'")
if [ "$is_applied" = "applied" ]
then
echo "SKIP"
return
fi
clickhouse_client_file "$1"
clickhouse_client_query "INSERT INTO migrations (name) VALUES ('$migration_name')"
echo "OK"
}
run_migrations() {
for migration in /etc/clickhouse-server/migrations.d/*.sql
do
run_migration $migration
done
}
init_db() {
# Create database
clickhouse_client_base --query "CREATE DATABASE IF NOT EXISTS {{ VISION_CLICKHOUSE_DATABASE }}"
# Create migrations table
clickhouse_client_query "CREATE TABLE IF NOT EXISTS migrations (name String) ENGINE = MergeTree PRIMARY KEY(name) ORDER BY name"
}
# TODO add PARTITION BY?
clickhouse-client --host {{ VISION_CLICKHOUSE_HOST }} --port {{ VISION_CLICKHOUSE_PORT}} --database {{ VISION_CLICKHOUSE_DATABASE }} \
--query 'CREATE TABLE IF NOT EXISTS tracking (
`time` DateTime,
`message` String
) ENGINE MergeTree ORDER BY time'
init_db
run_migrations
# TODO add materialized view https://youtu.be/pZkKsfr8n3M?t=1731
clickhouse-client --host {{ VISION_CLICKHOUSE_HOST }} --port {{ VISION_CLICKHOUSE_PORT}} --database {{ VISION_CLICKHOUSE_DATABASE }} \
--query 'CREATE VIEW IF NOT EXISTS events AS
SELECT
time,
JSONExtractString(message, 'name') as name,
JSONExtract(message, 'context', 'course_id', 'String') as course_id,
JSONExtract(message, 'context', 'user_id', 'Int64') as user_id
FROM tracking
WHERE JSONExtractString(message, 'event_source')='browser'
ORDER BY time'
# TODO enable live views https://clickhouse.tech/docs/en/sql-reference/statements/create/view/#live-view

View File

@ -0,0 +1,66 @@
cat << EOF | python
from redash import create_app
from redash import models
from redash.query_runner.clickhouse import ClickHouse
from redash.utils.configuration import ConfigurationContainer
app = create_app()
app.app_context().push()
# Get organization
org = models.Organization.get_by_slug('default')
# Get or create group
group = models.Group.query.filter(models.Group.name == "{{ username }}", models.Group.org == org).first()
if group:
print("Group already exists")
else:
group = models.Group(name="{{ username }}", org=org, permissions=models.Group.DEFAULT_PERMISSIONS)
models.db.session.add(group)
models.db.session.commit()
print("Created group '{}'".format(group.name))
{% if is_root %}
for permission in ["admin", "super_admin"]:
if permission not in group.permissions:
print("Adding permission '{}' to group".format(permission))
group.permissions.append(permission)
models.db.session.add(group)
models.db.session.commit()
{% endif %}
# Get or create user
user = models.User.query.filter(models.User.email == "{{ email }}").first()
if user:
print("User already exists")
else:
user = models.User(org=org, email="{{ email }}", name="{{ username }}", group_ids=[group.id])
print("Created user '{}/{}'".format(user.email, user.name))
user.hash_password("""{{ password }}""")
models.db.session.add(user)
models.db.session.commit()
# Get or create datasource
options = ConfigurationContainer(
{
"url": "http://{{ VISION_CLICKHOUSE_HOST }}:{{ VISION_CLICKHOUSE_HTTP_PORT }}",
"user": "{{ username }}",
"password": "",
"dbname": "{{ VISION_CLICKHOUSE_DATABASE }}",
},
ClickHouse.configuration_schema()
)
data_source = models.DataSource.query.filter(models.DataSource.name == "{{ username }}").first()
if data_source:
print("Data source already exists")
else:
data_source = models.DataSource(
name="{{ username }}",
type="clickhouse",
options=options,
org=org,
)
data_source_group = models.DataSourceGroup(data_source=data_source, group=group)
models.db.session.add_all([data_source, data_source_group])
models.db.session.commit()
print("Created datasource '{}'".format(data_source.name))
EOF

View File

@ -1,6 +1 @@
./manage.py database create_tables
./manage.py users create_root --password={{ VISION_REDASH_ROOT_PASSWORD }} {{ VISION_REDASH_ROOT_EMAIL }} {{ VISION_REDASH_ROOT_USERNAME }} || echo "Skipping admin user creation"
(./manage.py ds list | grep datalake && echo "datalake data source already exists") || \
(echo "creating datalake data source..." && \
./manage.py ds new --type=clickhouse --options='{"url":"http://{{ VISION_CLICKHOUSE_HOST }}:{{ VISION_CLICKHOUSE_HTTP_PORT }}", "dbname": "{{ VISION_CLICKHOUSE_DATABASE }}"}' datalake)