feat: k8s support \o/

This commit is contained in:
Régis Behmo 2021-06-03 19:15:48 +02:00
parent 2b118a67ba
commit a9f2f70f0d
21 changed files with 683 additions and 722 deletions

View File

@ -1,11 +1,7 @@
Tutor Vision: scalable, real-time analytics for Open edX Tutor Vision: scalable, real-time analytics for Open edX
======================================================== ========================================================
TODO: TODO: Sweet readme
- Kubernetes compatibility
- Sweet readme
- Rename to ocean?
Installation Installation
------------ ------------
@ -42,15 +38,28 @@ Then, create the corresponding user on the frontend::
Your frontend user will automatically be associated to the datalake database you created, provided they share the same name. Your frontend user will automatically be associated to the datalake database you created, provided they share the same name.
Vision comes with a convenient pre-built dashboard that you can add to any user account::
tutor local run vision-superset vision bootstrap-dashboards yourusername /app/bootstrap/courseoverview.json
Course block IDs and names are loaded from the Open edX modulestore into the datalake. After making changes to your course, you might want to refresh the course structure stored in the datalake. To do so, run:: Course block IDs and names are loaded from the Open edX modulestore into the datalake. After making changes to your course, you might want to refresh the course structure stored in the datalake. To do so, run::
tutor local init --limit=vision tutor local init --limit=vision
Or, if you want to avoid running the full plugin initialization:: Or, if you want to avoid running the full plugin initialization::
tutor local run -v $(tutor config printroot)/env/plugins/vision/apps/openedx/scripts/:/openedx/scripts lms \ tutor local run \
python /openedx/scripts/importcoursedata.py \ -v $(tutor config printroot)/env/plugins/vision/apps/openedx/scripts/:/openedx/scripts \
"http://$(tutor config printvalue VISION_CLICKHOUSE_USERNAME):$(tutor config printvalue VISION_CLICKHOUSE_PASSWORD)@$(tutor config printvalue VISION_CLICKHOUSE_HOST):$(tutor config printvalue VISION_CLICKHOUSE_HTTP_PORT)/?database=$(tutor config printvalue VISION_CLICKHOUSE_DATABASE)" -v $(tutor config printroot)/env/plugins/vision/apps/clickhouse/auth.json:/openedx/clickhouse-auth.json \
lms python /openedx/scripts/importcoursedata.py
When running on Kubernetes instead of locally, most commands above can be re-written with `tutor k8s exec service "command"` instead of `tutor local run service command`. For instance::
# Privileved user creation
tutor k8s exec vision-superset "superset fab create-admin --username yourusername --email user@example.com"
# Unprivileged user creation
tutor k8s exec vision-clickhouse "vision createuser --course-id='course-v1:edX+DemoX+Demo_Course' --org-id='edX' yourusername"
tutor k8s exec vision-superset "vision createuser yourusername yourusername@youremail.com"
Development Development
----------- -----------

View File

@ -0,0 +1,306 @@
---
####### Vision plugin
# log collection
# https://vector.dev/docs/setup/installation/platforms/kubernetes/
# https://github.com/timberio/vector/blob/master/distribution/kubernetes/vector-agent/resources.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: vision-vector
labels:
app.kubernetes.io/name: vision-vector
automountServiceAccountToken: true
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: vision-vector
rules:
- apiGroups:
- ""
resources:
- pods
verbs:
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: vision-vector
labels:
app.kubernetes.io/name: vision-vector
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: vision-vector
subjects:
- kind: ServiceAccount
name: vision-vector
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: vision-vector
labels:
app.kubernetes.io/name: vision-vector
spec:
selector:
matchLabels:
name: vision-vector
template:
metadata:
labels:
name: vision-vector
spec:
serviceAccountName: vision-vector
# Run vector next to LMS
affinity:
podAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- lms
topologyKey: kubernetes.io/hostname
containers:
- name: vision-vector
image: docker.io/timberio/vector:0.13.X-alpine
env:
- name: VECTOR_SELF_NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: VECTOR_SELF_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: VECTOR_SELF_POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: PROCFS_ROOT
value: /host/proc
- name: SYSFS_ROOT
value: /host/sys
volumeMounts:
- name: var-log
mountPath: /var/log/
readOnly: true
- mountPath: /etc/vector/vector.toml
name: config
subPath: vector.toml
readOnly: true
volumes:
- name: var-log
hostPath:
path: /var/log/
- name: config
configMap:
name: vision-vector-config
{% if VISION_RUN_CLICKHOUSE %}
---
# data storage
apiVersion: apps/v1
kind: Deployment
metadata:
name: vision-clickhouse
labels:
app.kubernetes.io/name: vision-clickhouse
spec:
selector:
matchLabels:
app.kubernetes.io/name: vision-clickhouse
template:
metadata:
labels:
app.kubernetes.io/name: vision-clickhouse
spec:
containers:
- name: vision-clickhouse
image: {{ VISION_CLICKHOUSE_DOCKER_IMAGE }}
volumeMounts:
- mountPath: /var/lib/clickhouse
name: data
- mountPath: /etc/clickhouse-server/users.d/vision.xml
name: user-config
subPath: vision.xml
- mountPath: /scripts/clickhouse-auth.json
name: clickhouse-auth
subPath: auth.json
ports:
- containerPort: 8123
- containerPort: 9000
volumes:
- name: data
persistentVolumeClaim:
claimName: vision-clickhouse
- name: user-config
configMap:
name: vision-clickhouse-user-config
- name: clickhouse-auth
configMap:
name: vision-clickhouse-auth
{% endif %}
---
# vision frontend
apiVersion: apps/v1
kind: Deployment
metadata:
name: vision-superset
labels:
app.kubernetes.io/name: vision-superset
spec:
selector:
matchLabels:
app.kubernetes.io/name: vision-superset
template:
metadata:
labels:
app.kubernetes.io/name: vision-superset
spec:
containers:
- name: vision-superset
image: {{ VISION_SUPERSET_DOCKER_IMAGE }}
volumeMounts:
- mountPath: /app/superset_config.py
name: config
subPath: superset_config.py
- mountPath: /app/bootstrap/
name: bootstrap
- mountPath: /scripts/clickhouse-auth.json
name: clickhouse-auth
subPath: auth.json
volumes:
- name: config
configMap:
name: vision-superset-config
- name: bootstrap
configMap:
name: vision-superset-bootstrap
- name: clickhouse-auth
configMap:
name: vision-clickhouse-auth
---
# frontend worker
apiVersion: apps/v1
kind: Deployment
metadata:
name: vision-superset-worker
labels:
app.kubernetes.io/name: vision-superset-worker
spec:
selector:
matchLabels:
app.kubernetes.io/name: vision-superset-worker
template:
metadata:
labels:
app.kubernetes.io/name: vision-superset-worker
spec:
containers:
- name: vision-superset-worker
image: {{ VISION_SUPERSET_DOCKER_IMAGE }}
args: ["celery", "worker", "--app=superset.tasks.celery_app:app", "-Ofair", "-l", "INFO"]
volumeMounts:
- mountPath: /app/superset_config.py
name: config
subPath: superset_config.py
volumes:
- name: config
configMap:
name: vision-superset-config
---
# frontend celery beat
apiVersion: apps/v1
kind: Deployment
metadata:
name: vision-superset-worker-beat
labels:
app.kubernetes.io/name: vision-superset-worker-beat
spec:
selector:
matchLabels:
app.kubernetes.io/name: vision-superset-worker-beat
template:
metadata:
labels:
app.kubernetes.io/name: vision-superset-worker-beat
spec:
containers:
- name: vision-superset-worker-beat
image: {{ VISION_SUPERSET_DOCKER_IMAGE }}
args: ["celery", "beat", "--app=superset.tasks.celery_app:app", "--pidfile", "/tmp/celerybeat.pid", "-l", "INFO", "--schedule=/tmp/celerybeat-schedule"]
volumeMounts:
- mountPath: /app/superset_config.py
name: config
subPath: superset_config.py
volumes:
- name: config
configMap:
name: vision-superset-config
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vision-redis
labels:
app.kubernetes.io/name: vision-redis
spec:
selector:
matchLabels:
app.kubernetes.io/name: vision-redis
template:
metadata:
labels:
app.kubernetes.io/name: vision-redis
spec:
containers:
- name: vision-superset-worker
image: docker.io/redis:5.0-alpine
ports:
- containerPort: 6379
{% if VISION_RUN_POSTGRESQL %}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vision-postgresql
labels:
app.kubernetes.io/name: vision-postgresql
spec:
selector:
matchLabels:
app.kubernetes.io/name: vision-postgresql
strategy:
type: Recreate
template:
metadata:
labels:
app.kubernetes.io/name: vision-postgresql
spec:
containers:
- name: vision-postgresql
image: docker.io/postgres:9.6-alpine
env:
- name: POSTGRES_USER
value: "{{ VISION_POSTGRESQL_USER }}"
- name: POSTGRES_PASSWORD
value: "{{ VISION_POSTGRESQL_PASSWORD }}"
- name: POSTGRES_DB
value: "{{ VISION_POSTGRESQL_DB }}"
# The following is required, otherwise postgresql refuses to
# write to the non-empty directory which contains "lost+found".
- name: PGDATA
value: /var/lib/postgresql/data/pgdata
ports:
- containerPort: 5432
volumeMounts:
- mountPath: /var/lib/postgresql/data
name: data
volumes:
- name: data
persistentVolumeClaim:
claimName: vision-postgresql
{% endif %}

View File

@ -0,0 +1,91 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: vision-clickhouse-job
labels:
app.kubernetes.io/component: job
spec:
template:
spec:
restartPolicy: Never
containers:
- name: vision-clickhouse
image: {{ VISION_CLICKHOUSE_DOCKER_IMAGE }}
volumeMounts:
- mountPath: /scripts/clickhouse-auth.json
name: clickhouse-auth
subPath: auth.json
- mountPath: /etc/clickhouse-server/migrations.d
name: migrations
volumes:
- name: clickhouse-auth
configMap:
name: vision-clickhouse-auth
- name: migrations
configMap:
name: vision-clickhouse-migrations
---
apiVersion: batch/v1
kind: Job
metadata:
name: vision-superset-job
labels:
app.kubernetes.io/component: job
spec:
template:
spec:
restartPolicy: Never
containers:
- name: vision-superset
image: {{ VISION_SUPERSET_DOCKER_IMAGE }}
volumeMounts:
- mountPath: /app/superset_config.py
name: config
subPath: superset_config.py
volumes:
- name: config
configMap:
name: vision-superset-config
---
apiVersion: batch/v1
kind: Job
metadata:
name: vision-openedx-job
labels:
app.kubernetes.io/component: job
spec:
template:
spec:
restartPolicy: Never
containers:
- name: vision-openedx
image: {{ DOCKER_IMAGE_OPENEDX }}
volumeMounts:
- mountPath: /openedx/edx-platform/lms/envs/tutor/
name: settings-lms
- mountPath: /openedx/edx-platform/cms/envs/tutor/
name: settings-cms
- mountPath: /openedx/config
name: config
- mountPath: /openedx/scripts
name: scripts
- mountPath: /openedx/clickhouse-auth.json
name: clickhouse-auth
subPath: auth.json
volumes:
- name: settings-lms
configMap:
name: openedx-settings-lms
- name: settings-cms
configMap:
name: openedx-settings-cms
- name: config
configMap:
name: openedx-config
- name: scripts
configMap:
name: vision-openedx-scripts
- name: clickhouse-auth
configMap:
name: vision-clickhouse-auth

View File

@ -0,0 +1,57 @@
#### Vision services
{% if VISION_RUN_CLICKHOUSE %}
---
apiVersion: v1
kind: Service
metadata:
name: vision-clickhouse
spec:
type: NodePort
ports:
- port: 8123
protocol: TCP
name: "native"
- port: 9000
protocol: TCP
name: "http"
selector:
app.kubernetes.io/name: vision-clickhouse
{% endif %}
{% if VISION_RUN_POSTGRESQL %}
---
apiVersion: v1
kind: Service
metadata:
name: vision-postgresql
spec:
type: NodePort
ports:
- port: 5432
protocol: TCP
selector:
app.kubernetes.io/name: vision-postgresql
{% endif %}
---
apiVersion: v1
kind: Service
metadata:
name: vision-redis
spec:
type: NodePort
ports:
- port: 6379
protocol: TCP
selector:
app.kubernetes.io/name: vision-redis
---
apiVersion: v1
kind: Service
metadata:
name: vision-superset
spec:
type: NodePort
ports:
- port: 8000
protocol: TCP
selector:
app.kubernetes.io/name: vision-superset

View File

@ -0,0 +1,32 @@
{% if VISION_RUN_CLICKHOUSE %}
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: vision-clickhouse
labels:
app.kubernetes.io/component: volume
app.kubernetes.io/name: vision-clickhouse
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
{% endif %}
{% if VISION_RUN_POSTGRESQL %}
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: vision-postgresql
labels:
app.kubernetes.io/component: volume
app.kubernetes.io/name: vision-postgresql
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 2Gi
{% endif %}

View File

@ -0,0 +1,21 @@
- name: vision-vector-config
files:
- plugins/vision/apps/vector/vector.toml
- name: vision-clickhouse-user-config
files:
- plugins/vision/apps/clickhouse/users.d/vision.xml
- name: vision-clickhouse-migrations
files:{% for file in "vision/apps/clickhouse/migrations.d"|walk_templates %}
- plugins/{{ file }}{% endfor %}
- name: vision-clickhouse-auth
files:
- plugins/vision/apps/clickhouse/auth.json
- name: vision-superset-config
files:
- plugins/vision/apps/superset/superset_config.py
- name: vision-superset-bootstrap
files:{% for file in "vision/apps/superset/bootstrap"|walk_templates %}
- plugins/{{ file }}{% endfor %}
- name: vision-openedx-scripts
files:{% for file in "vision/apps/openedx/scripts"|walk_templates %}
- plugins/{{ file }}{% endfor %}

View File

@ -2,6 +2,7 @@ vision-clickhouse-job:
image: {{ VISION_CLICKHOUSE_DOCKER_IMAGE }} image: {{ VISION_CLICKHOUSE_DOCKER_IMAGE }}
depends_on: {{ [("vision-clickhouse", VISION_RUN_CLICKHOUSE)]|list_if }} depends_on: {{ [("vision-clickhouse", VISION_RUN_CLICKHOUSE)]|list_if }}
volumes: volumes:
- ../plugins/vision/apps/clickhouse/auth.json:/scripts/clickhouse-auth.json:ro
- ../plugins/vision/apps/clickhouse/migrations.d/:/etc/clickhouse-server/migrations.d/:ro - ../plugins/vision/apps/clickhouse/migrations.d/:/etc/clickhouse-server/migrations.d/:ro
vision-superset-job: vision-superset-job:
image: {{ VISION_SUPERSET_DOCKER_IMAGE }} image: {{ VISION_SUPERSET_DOCKER_IMAGE }}
@ -20,4 +21,5 @@ vision-openedx-job:
- ../apps/openedx/settings/cms/:/openedx/edx-platform/cms/envs/tutor/:ro - ../apps/openedx/settings/cms/:/openedx/edx-platform/cms/envs/tutor/:ro
- ../apps/openedx/config/:/openedx/config/:ro - ../apps/openedx/config/:/openedx/config/:ro
- ../plugins/vision/apps/openedx/scripts/:/openedx/scripts/:ro - ../plugins/vision/apps/openedx/scripts/:/openedx/scripts/:ro
depends_on: {{ [("mysql", RUN_MYSQL), ("mongodb", RUN_MONGODB)]|list_if }} - ../plugins/vision/apps/clickhouse/auth.json:/openedx/clickhouse-auth.json:ro
depends_on: {{ [("mysql", RUN_MYSQL), ("mongodb", RUN_MONGODB)]|list_if }}

View File

@ -1,15 +1,14 @@
####### vision plugin ####### Vision plugin
# log collection # log collection
vision-vector: vision-vector:
image: docker.io/timberio/vector:0.13.X-alpine image: docker.io/timberio/vector:0.13.X-alpine
volumes: volumes:
- ../plugins/vision/apps/vector/vector.toml:/etc/vector/vector.toml:ro - ../plugins/vision/apps/vector/vector.toml:/etc/vector/vector.toml:ro
{% if VISION_DOCKER_HOST %}- {{ VISION_DOCKER_HOST }}:/var/run/docker.sock:ro{% endif %} {% if VISION_DOCKER_HOST_SOCK_PATH %}- {{ VISION_DOCKER_HOST_SOCK_PATH }}:/var/run/docker.sock:ro{% endif %}
environment: environment:
- DOCKER_HOST=/var/run/docker.sock - DOCKER_HOST=/var/run/docker.sock
restart: unless-stopped restart: unless-stopped
{% if VISION_RUN_CLICKHOUSE %} {% if VISION_RUN_CLICKHOUSE %}
# log storage # log storage
vision-clickhouse: vision-clickhouse:
@ -17,20 +16,19 @@ vision-clickhouse:
volumes: volumes:
- ../../data/vision/clickhouse:/var/lib/clickhouse - ../../data/vision/clickhouse:/var/lib/clickhouse
- ../plugins/vision/apps/clickhouse/users.d/vision.xml:/etc/clickhouse-server/users.d/vision.xml:ro - ../plugins/vision/apps/clickhouse/users.d/vision.xml:/etc/clickhouse-server/users.d/vision.xml:ro
env_file: ../plugins/vision/apps/env - ../plugins/vision/apps/clickhouse/auth.json:/scripts/clickhouse-auth.json:ro
ulimits: ulimits:
nofile: nofile:
soft: 262144 soft: 262144
hard: 262144 hard: 262144
restart: unless-stopped restart: unless-stopped
{% endif %} {% endif %}
vision-superset: vision-superset:
image: {{ VISION_SUPERSET_DOCKER_IMAGE }} image: {{ VISION_SUPERSET_DOCKER_IMAGE }}
volumes: volumes:
- ../plugins/vision/apps/superset/superset_config.py:/app/superset_config.py:ro - ../plugins/vision/apps/superset/superset_config.py:/app/superset_config.py:ro
- ../plugins/vision/apps/superset/bootstrap:/app/bootstrap - ../plugins/vision/apps/clickhouse/auth.json:/scripts/clickhouse-auth.json:ro
env_file: ../plugins/vision/apps/env - ../plugins/vision/apps/superset/bootstrap:/app/bootstrap:ro
restart: unless-stopped restart: unless-stopped
depends_on: depends_on:
- vision-redis - vision-redis

View File

@ -15,20 +15,20 @@ config = {
}, },
"defaults": { "defaults": {
"VERSION": __version__, "VERSION": __version__,
"CLICKHOUSE_DOCKER_IMAGE": "{{ DOCKER_REGISTRY }}overhangio/clickhouse:{{ VISION_VERSION }}", "CLICKHOUSE_DOCKER_IMAGE": "{{ DOCKER_REGISTRY }}overhangio/vision-clickhouse:{{ VISION_VERSION }}",
"RUN_CLICKHOUSE": True, "RUN_CLICKHOUSE": True,
"CLICKHOUSE_SCHEME": "http",
"CLICKHOUSE_HOST": "vision-clickhouse", "CLICKHOUSE_HOST": "vision-clickhouse",
"CLICKHOUSE_HTTP_PORT": 8123, "CLICKHOUSE_HTTP_PORT": 8123,
"CLICKHOUSE_HTTP_SCHEME": "http",
"CLICKHOUSE_PORT": 9000, "CLICKHOUSE_PORT": 9000,
"CLICKHOUSE_DATABASE": "openedx", "CLICKHOUSE_DATABASE": "openedx",
"CLICKHOUSE_USERNAME": "openedx", "CLICKHOUSE_USERNAME": "openedx",
"DOCKER_HOST": "/var/run/docker.sock", "DOCKER_HOST_SOCK_PATH": "/var/run/docker.sock",
"POSTGRESQL_USER": "superset", "POSTGRESQL_USER": "superset",
"POSTGRESQL_DB": "superset", "POSTGRESQL_DB": "superset",
"RUN_CLICKHOUSE": True, "RUN_CLICKHOUSE": True,
"RUN_POSTGRESQL": True, "RUN_POSTGRESQL": True,
"SUPERSET_DOCKER_IMAGE": "{{ DOCKER_REGISTRY }}overhangio/superset:{{ VISION_VERSION }}", "SUPERSET_DOCKER_IMAGE": "{{ DOCKER_REGISTRY }}overhangio/vision-superset:{{ VISION_VERSION }}",
"SUPERSET_HOST": "vision.{{ LMS_HOST }}", "SUPERSET_HOST": "vision.{{ LMS_HOST }}",
"SUPERSET_DATABASE": "openedx", "SUPERSET_DATABASE": "openedx",
}, },
@ -37,7 +37,11 @@ config = {
hooks = { hooks = {
"build-image": { "build-image": {
"vision-clickhouse": "{{ VISION_CLICKHOUSE_DOCKER_IMAGE }}", "vision-clickhouse": "{{ VISION_CLICKHOUSE_DOCKER_IMAGE }}",
"vision-superset": "{{ VISION_SUPERSET_DOCKER_IMAGE }}" "vision-superset": "{{ VISION_SUPERSET_DOCKER_IMAGE }}",
},
"remote-image": {
"vision-clickhouse": "{{ VISION_CLICKHOUSE_DOCKER_IMAGE }}",
"vision-superset": "{{ VISION_SUPERSET_DOCKER_IMAGE }}",
}, },
"init": ["vision-clickhouse", "vision-superset", "vision-openedx"], "init": ["vision-clickhouse", "vision-superset", "vision-openedx"],
} }

View File

@ -0,0 +1,9 @@
{
"host": "{{ VISION_CLICKHOUSE_HOST }}",
"port": {{ VISION_CLICKHOUSE_PORT }},
"http_port": "{{ VISION_CLICKHOUSE_HTTP_PORT }}",
"http_scheme": "{{ VISION_CLICKHOUSE_HTTP_SCHEME }}",
"username": "{{ VISION_CLICKHOUSE_USERNAME }}",
"password": "{{ VISION_CLICKHOUSE_PASSWORD }}",
"database": "{{ VISION_CLICKHOUSE_DATABASE }}"
}

View File

@ -38,8 +38,8 @@ SELECT
openedx_course_enrollments.is_active AS enrollment_is_active, openedx_course_enrollments.is_active AS enrollment_is_active,
openedx_course_enrollments.mode AS enrollment_mode, openedx_course_enrollments.mode AS enrollment_mode,
openedx_course_enrollments.user_id AS user_id, openedx_course_enrollments.user_id AS user_id,
openedx_course_enrollments.username AS username, openedx_users.username AS username,
openedx_course_enrollments.email AS user_email, openedx_users.email AS user_email,
openedx_user_profiles.year_of_birth AS user_year_of_birth, openedx_user_profiles.year_of_birth AS user_year_of_birth,
openedx_user_profiles.gender AS user_gender, openedx_user_profiles.gender AS user_gender,
openedx_user_profiles.level_of_education AS user_level_of_education, openedx_user_profiles.level_of_education AS user_level_of_education,
@ -47,7 +47,7 @@ SELECT
openedx_user_profiles.state AS user_state, openedx_user_profiles.state AS user_state,
openedx_user_profiles.country AS user_country openedx_user_profiles.country AS user_country
FROM openedx_course_enrollments FROM openedx_course_enrollments
INNER JOIN openedx_user_profiles ON openedx_course_enrollments.user_id = openedx_user_profiles.user_id; INNER JOIN openedx_user_profiles ON openedx_course_enrollments.user_id = openedx_user_profiles.user_id
INNER JOIN openedx_users ON openedx_course_enrollments.user_id = openedx_users.id; INNER JOIN openedx_users ON openedx_course_enrollments.user_id = openedx_users.id;
-- Grant everyone access to the view -- Grant everyone access to the view

View File

@ -1,4 +1,4 @@
CREATE TABLE openedx_block_completion CREATE TABLE _openedx_block_completion
( (
`modified` DateTime NULL, `modified` DateTime NULL,
`course_key` String, `course_key` String,
@ -14,15 +14,15 @@ set allow_experimental_live_view = 1;
CREATE LIVE VIEW course_block_completion WITH PERIODIC REFRESH 30 AS CREATE LIVE VIEW course_block_completion WITH PERIODIC REFRESH 30 AS
SELECT SELECT
openedx_block_completion.course_key AS course_id, _openedx_block_completion.course_key AS course_id,
openedx_block_completion.block_key AS block_key, _openedx_block_completion.block_key AS block_key,
openedx_block_completion.user_id AS user_id, _openedx_block_completion.user_id AS user_id,
openedx_block_completion.completion AS completion, _openedx_block_completion.completion AS completion,
course_blocks.position as position, course_blocks.position as position,
course_blocks.display_name as display_name, course_blocks.display_name as display_name,
course_blocks.full_name as full_name course_blocks.full_name as full_name
FROM openedx_block_completion FROM _openedx_block_completion
INNER JOIN course_blocks ON openedx_block_completion.block_key = course_blocks.block_key; INNER JOIN course_blocks ON _openedx_block_completion.block_key = course_blocks.block_key;
-- Grant everyone access to the view -- Grant everyone access to the view
CREATE ROW POLICY common ON course_block_completion FOR SELECT USING 1 TO ALL; CREATE ROW POLICY common ON course_block_completion FOR SELECT USING 1 TO ALL;

View File

@ -1,5 +0,0 @@
VISION_CLICKHOUSE_HOST={{ VISION_CLICKHOUSE_HOST }}
VISION_CLICKHOUSE_PORT={{ VISION_CLICKHOUSE_PORT }}
VISION_CLICKHOUSE_USERNAME={{ VISION_CLICKHOUSE_USERNAME }}
VISION_CLICKHOUSE_PASSWORD={{ VISION_CLICKHOUSE_PASSWORD }}
VISION_CLICKHOUSE_DATABASE={{ VISION_CLICKHOUSE_DATABASE }}

View File

@ -1,4 +1,6 @@
import argparse import argparse
import json
import os
import requests import requests
from MySQLdb import escape_string as sql_escape_string from MySQLdb import escape_string as sql_escape_string
@ -7,27 +9,30 @@ import lms.startup
lms.startup.run() lms.startup.run()
from courseware.courses import get_course from lms.djangoapps.courseware.courses import get_course
from opaque_keys.edx.keys import CourseKey
from xmodule.modulestore.django import modulestore from xmodule.modulestore.django import modulestore
with open(os.path.join(os.path.dirname(__file__), "..", "clickhouse-auth.json")) as f:
CLICKHOUSE_AUTH = json.load(f)
def main(): def main():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Import course block information into the datalake" description="Import course block information into the datalake"
) )
parser.add_argument("-c", "--course-id", action="append", help="Limit import to these courses") parser.add_argument(
parser.add_argument("uri", help="Clickhouse URI") "-c", "--course-id", action="append", help="Limit import to these courses"
)
args = parser.parse_args() args = parser.parse_args()
module_store = modulestore() module_store = modulestore()
course_ids = args.course_id or [] course_ids = args.course_id or []
for course in module_store.get_courses(): for course in module_store.get_courses():
if str(course.id) in course_ids or not course_ids: if str(course.id) in course_ids or not course_ids:
import_course(course.id, args.uri) import_course(course.id)
def import_course(course_key, clickhouse_uri): def import_course(course_key):
course_id = str(course_key) course_id = str(course_key)
# Reload course to fetch all children items # Reload course to fetch all children items
course = get_course(course_key, depth=None) course = get_course(course_key, depth=None)
@ -53,13 +58,12 @@ def import_course(course_key, clickhouse_uri):
"ALTER TABLE course_blocks DELETE WHERE course_id = '{}';", "ALTER TABLE course_blocks DELETE WHERE course_id = '{}';",
course_id, course_id,
), ),
clickhouse_uri,
) )
insert_query = sql_query( insert_query = sql_query(
"INSERT INTO course_blocks (course_id, block_key, block_id, position, display_name, full_name) VALUES " "INSERT INTO course_blocks (course_id, block_key, block_id, position, display_name, full_name) VALUES "
) )
insert_query += ", ".join(values) insert_query += ", ".join(values)
make_query(insert_query, clickhouse_uri) make_query(insert_query)
def iter_course_blocks(item, prefix=""): def iter_course_blocks(item, prefix=""):
@ -76,8 +80,12 @@ def sql_query(template, *args, **kwargs):
return template.format(*args, **kwargs) return template.format(*args, **kwargs)
def make_query(query, url): def make_query(query):
response = requests.post(url, data=query) clickhouse_uri = (
f"{CLICKHOUSE_AUTH['http_scheme']}://{CLICKHOUSE_AUTH['username']}:{CLICKHOUSE_AUTH['password']}@"
f"{CLICKHOUSE_AUTH['host']}:{CLICKHOUSE_AUTH['http_port']}/?database={CLICKHOUSE_AUTH['database']}"
)
response = requests.post(clickhouse_uri, data=query)
if response.status_code != 200: if response.status_code != 200:
print(response.content.decode()) print(response.content.decode())
raise ValueError("An error occurred while attempting to post a query") raise ValueError("An error occurred while attempting to post a query")

View File

@ -1,602 +0,0 @@
{
"dashboards": [
{
"__Dashboard__": {
"css": "",
"dashboard_title": "Student Engagement",
"description": null,
"json_metadata": "{\"timed_refresh_immune_slices\": [], \"filter_scopes\": {\"17\": {\"course_id\": {\"scope\": [\"ROOT_ID\"], \"immune\": []}, \"__time_range\": {\"scope\": [\"ROOT_ID\"], \"immune\": []}}}, \"expanded_slices\": {}, \"refresh_frequency\": 0, \"default_filters\": \"{\\\"17\\\": {\\\"__time_range\\\": \\\"Last week\\\"}}\", \"color_scheme\": null, \"remote_id\": 2}",
"position_json": "{\"CHART-_XdNUl5YJ9\":{\"children\":[],\"id\":\"CHART-_XdNUl5YJ9\",\"meta\":{\"chartId\":17,\"height\":35,\"sliceName\":\"Select course ID and time range\",\"uuid\":\"80ca2797-395e-45cb-a14f-c6a98cf0d9d1\",\"width\":4},\"parents\":[\"ROOT_ID\",\"GRID_ID\",\"ROW-yPuXNZUCnv\"],\"type\":\"CHART\"},\"CHART-p4ta63zmN2\":{\"children\":[],\"id\":\"CHART-p4ta63zmN2\",\"meta\":{\"chartId\":6,\"height\":35,\"sliceName\":\"Watched a video\",\"uuid\":\"fdc5ce1f-412f-434f-8a7c-d4ef3d2ede7c\",\"width\":2},\"parents\":[\"ROOT_ID\",\"GRID_ID\",\"ROW-yPuXNZUCnv\"],\"type\":\"CHART\"},\"CHART-t7KLpPYQxw\":{\"children\":[],\"id\":\"CHART-t7KLpPYQxw\",\"meta\":{\"chartId\":8,\"height\":35,\"sliceName\":\"Tried a problem\",\"uuid\":\"dfd0088c-74dd-4dfb-a221-4c1633d17072\",\"width\":2},\"parents\":[\"ROOT_ID\",\"GRID_ID\",\"ROW-yPuXNZUCnv\"],\"type\":\"CHART\"},\"CHART-xXCRFE4mZa\":{\"children\":[],\"id\":\"CHART-xXCRFE4mZa\",\"meta\":{\"chartId\":5,\"height\":35,\"sliceName\":\"Active students\",\"uuid\":\"b46a1e93-2bf6-4330-b9b4-67ae57d45a4e\",\"width\":2},\"parents\":[\"ROOT_ID\",\"GRID_ID\",\"ROW-yPuXNZUCnv\"],\"type\":\"CHART\"},\"DASHBOARD_VERSION_KEY\":\"v2\",\"GRID_ID\":{\"children\":[\"ROW-yPuXNZUCnv\"],\"id\":\"GRID_ID\",\"parents\":[\"ROOT_ID\"],\"type\":\"GRID\"},\"HEADER_ID\":{\"id\":\"HEADER_ID\",\"meta\":{\"text\":\"Student Engagement\"},\"type\":\"HEADER\"},\"ROOT_ID\":{\"children\":[\"GRID_ID\"],\"id\":\"ROOT_ID\",\"type\":\"ROOT\"},\"ROW-yPuXNZUCnv\":{\"children\":[\"CHART-_XdNUl5YJ9\",\"CHART-xXCRFE4mZa\",\"CHART-p4ta63zmN2\",\"CHART-t7KLpPYQxw\"],\"id\":\"ROW-yPuXNZUCnv\",\"meta\":{\"0\":\"ROOT_ID\",\"background\":\"BACKGROUND_TRANSPARENT\"},\"parents\":[\"ROOT_ID\",\"GRID_ID\"],\"type\":\"ROW\"}}",
"slices": [
{
"__Slice__": {
"cache_timeout": null,
"datasource_name": "openedx.User events",
"datasource_type": "table",
"id": 6,
"params": "{\"adhoc_filters\": [{\"clause\": \"WHERE\", \"comparator\": \"'play_video'\", \"expressionType\": \"SIMPLE\", \"filterOptionName\": \"filter_7autufejah_v1qfuj79p2a\", \"isExtra\": false, \"isNew\": false, \"operator\": \"==\", \"sqlExpression\": null, \"subject\": \"name\"}], \"datasource\": \"9__table\", \"extra_form_data\": {}, \"granularity_sqla\": \"time\", \"header_font_size\": 0.4, \"metric\": \"Distinct user IDs\", \"slice_id\": 6, \"subheader_font_size\": 0.15, \"time_range\": \"No filter\", \"time_range_endpoints\": [\"inclusive\", \"exclusive\"], \"url_params\": {}, \"viz_type\": \"big_number_total\", \"y_axis_format\": \"SMART_NUMBER\", \"remote_id\": 6, \"datasource_name\": \"User events\", \"schema\": \"openedx\", \"database_name\": \"admin\"}",
"slice_name": "Watched a video",
"viz_type": "big_number_total"
}
},
{
"__Slice__": {
"cache_timeout": null,
"datasource_name": "openedx.User events",
"datasource_type": "table",
"id": 5,
"params": "{\"adhoc_filters\": [], \"datasource\": \"9__table\", \"extra_form_data\": {}, \"granularity_sqla\": \"time\", \"header_font_size\": 0.4, \"metric\": \"Distinct user IDs\", \"slice_id\": 5, \"subheader\": \"\", \"subheader_font_size\": 0.15, \"time_range\": \"No filter\", \"time_range_endpoints\": [\"inclusive\", \"exclusive\"], \"url_params\": {}, \"viz_type\": \"big_number_total\", \"y_axis_format\": \"SMART_NUMBER\", \"remote_id\": 5, \"datasource_name\": \"User events\", \"schema\": \"openedx\", \"database_name\": \"admin\"}",
"slice_name": "Active students",
"viz_type": "big_number_total"
}
},
{
"__Slice__": {
"cache_timeout": null,
"datasource_name": "openedx.User events",
"datasource_type": "table",
"id": 8,
"params": "{\"adhoc_filters\": [{\"clause\": \"WHERE\", \"comparator\": \"problem_check\", \"expressionType\": \"SIMPLE\", \"filterOptionName\": \"filter_7autufejah_v1qfuj79p2a\", \"isExtra\": false, \"isNew\": false, \"operator\": \"==\", \"sqlExpression\": null, \"subject\": \"name\"}], \"datasource\": \"9__table\", \"extra_form_data\": {}, \"granularity_sqla\": \"time\", \"header_font_size\": 0.4, \"metric\": \"Distinct user IDs\", \"subheader_font_size\": 0.15, \"time_range\": \"DATEADD(DATETIME(\\\"now\\\"), -7, day) : now\", \"time_range_endpoints\": [\"inclusive\", \"exclusive\"], \"url_params\": {}, \"viz_type\": \"big_number_total\", \"y_axis_format\": \"SMART_NUMBER\", \"remote_id\": 8, \"datasource_name\": \"User events\", \"schema\": \"openedx\", \"database_name\": \"admin\"}",
"slice_name": "Tried a problem",
"viz_type": "big_number_total"
}
},
{
"__Slice__": {
"cache_timeout": null,
"datasource_name": "openedx.Course enrollments",
"datasource_type": "table",
"id": 17,
"params": "{\"adhoc_filters\": [], \"datasource\": \"10__table\", \"date_filter\": true, \"extra_form_data\": {}, \"filter_configs\": [{\"asc\": true, \"clearable\": true, \"column\": \"course_id\", \"key\": \"CQE2v7Ajx\", \"label\": \"Course ID\", \"multiple\": true, \"searchAllOptions\": false}], \"slice_id\": 17, \"time_grain_sqla\": \"PT1M\", \"time_range\": \"Last week\", \"time_range_endpoints\": [\"inclusive\", \"exclusive\"], \"url_params\": {}, \"viz_type\": \"filter_box\", \"remote_id\": 17, \"datasource_name\": \"Course enrollments\", \"schema\": \"openedx\", \"database_name\": \"admin\"}",
"slice_name": "Select course ID and time range",
"viz_type": "filter_box"
}
}
],
"slug": null
}
}
],
"datasources": [
{
"__SqlaTable__": {
"cache_timeout": null,
"columns": [
{
"__TableColumn__": {
"changed_by_fk": 1,
"changed_on": {
"__datetime__": "2021-05-20T16:15:53"
},
"column_name": "time",
"created_by_fk": 1,
"created_on": {
"__datetime__": "2021-05-20T16:15:53"
},
"description": null,
"expression": null,
"filterable": true,
"groupby": true,
"id": 47,
"is_active": true,
"is_dttm": true,
"python_date_format": null,
"table_id": 9,
"type": "DATETIME",
"uuid": "33179b70-8d58-4f4e-b4b8-be67177ad571",
"verbose_name": null
}
},
{
"__TableColumn__": {
"changed_by_fk": 1,
"changed_on": {
"__datetime__": "2021-05-20T16:15:53"
},
"column_name": "course_id",
"created_by_fk": 1,
"created_on": {
"__datetime__": "2021-05-20T16:15:53"
},
"description": null,
"expression": null,
"filterable": true,
"groupby": true,
"id": 48,
"is_active": true,
"is_dttm": false,
"python_date_format": null,
"table_id": 9,
"type": "STRING",
"uuid": "c6e20ed6-d503-44db-8d2c-276759bd3b55",
"verbose_name": null
}
},
{
"__TableColumn__": {
"changed_by_fk": 1,
"changed_on": {
"__datetime__": "2021-05-20T16:15:53"
},
"column_name": "name",
"created_by_fk": 1,
"created_on": {
"__datetime__": "2021-05-20T16:15:53"
},
"description": null,
"expression": null,
"filterable": true,
"groupby": true,
"id": 49,
"is_active": true,
"is_dttm": false,
"python_date_format": null,
"table_id": 9,
"type": "STRING",
"uuid": "c5b4ef75-3c49-44a2-b266-ce8be95d8db1",
"verbose_name": null
}
},
{
"__TableColumn__": {
"changed_by_fk": 1,
"changed_on": {
"__datetime__": "2021-05-20T16:15:53"
},
"column_name": "user_id",
"created_by_fk": 1,
"created_on": {
"__datetime__": "2021-05-20T16:15:53"
},
"description": null,
"expression": null,
"filterable": true,
"groupby": true,
"id": 50,
"is_active": true,
"is_dttm": false,
"python_date_format": null,
"table_id": 9,
"type": "INT64",
"uuid": "e7eb1920-3a1f-4807-a8f6-9009e59ffa74",
"verbose_name": null
}
}
],
"database_id": 1,
"default_endpoint": null,
"description": null,
"extra": null,
"fetch_values_predicate": null,
"filter_select_enabled": false,
"main_dttm_col": null,
"metrics": [
{
"__SqlMetric__": {
"changed_by_fk": 1,
"changed_on": {
"__datetime__": "2021-05-20T16:18:54"
},
"created_by_fk": 1,
"created_on": {
"__datetime__": "2021-05-20T16:18:54"
},
"d3format": null,
"description": null,
"expression": "COUNT(DISTINCT(user_id))",
"extra": "{}",
"id": 15,
"metric_name": "Distinct user IDs",
"metric_type": null,
"table_id": 9,
"uuid": "69932270-ec14-4c56-abf9-8d117ffcd056",
"verbose_name": "",
"warning_text": null
}
}
],
"offset": 0,
"params": "{\"remote_id\": 9, \"database_name\": \"admin\", \"import_time\": 1621942851}",
"schema": "openedx",
"sql": "SELECT time,\r\n course_id, name, user_id\r\nFROM openedx.events\r\nWHERE event_source = 'browser'",
"table_name": "User events",
"template_params": null
}
},
{
"__SqlaTable__": {
"cache_timeout": null,
"columns": [
{
"__TableColumn__": {
"changed_by_fk": 1,
"changed_on": {
"__datetime__": "2021-05-20T16:46:34"
},
"column_name": "course_id",
"created_by_fk": 1,
"created_on": {
"__datetime__": "2021-05-20T16:41:16"
},
"description": null,
"expression": null,
"filterable": true,
"groupby": true,
"id": 51,
"is_active": null,
"is_dttm": false,
"python_date_format": null,
"table_id": 10,
"type": "STRING",
"uuid": "f30d1ace-61e7-417d-bb42-cbe2202ba77d",
"verbose_name": null
}
},
{
"__TableColumn__": {
"changed_by_fk": 1,
"changed_on": {
"__datetime__": "2021-05-20T16:46:34"
},
"column_name": "enrollment_created",
"created_by_fk": 1,
"created_on": {
"__datetime__": "2021-05-20T16:41:16"
},
"description": null,
"expression": null,
"filterable": true,
"groupby": true,
"id": 52,
"is_active": null,
"is_dttm": false,
"python_date_format": null,
"table_id": 10,
"type": "NULLABLE(DATETIME)",
"uuid": "214e5525-f504-4967-98fe-594246784958",
"verbose_name": null
}
},
{
"__TableColumn__": {
"changed_by_fk": 1,
"changed_on": {
"__datetime__": "2021-05-20T16:46:34"
},
"column_name": "enrollment_is_active",
"created_by_fk": 1,
"created_on": {
"__datetime__": "2021-05-20T16:41:16"
},
"description": null,
"expression": null,
"filterable": true,
"groupby": true,
"id": 53,
"is_active": null,
"is_dttm": false,
"python_date_format": null,
"table_id": 10,
"type": "UINT8",
"uuid": "adda2e42-b890-4614-ad32-70b29df97a50",
"verbose_name": null
}
},
{
"__TableColumn__": {
"changed_by_fk": 1,
"changed_on": {
"__datetime__": "2021-05-20T16:46:34"
},
"column_name": "enrollment_mode",
"created_by_fk": 1,
"created_on": {
"__datetime__": "2021-05-20T16:41:16"
},
"description": null,
"expression": null,
"filterable": true,
"groupby": true,
"id": 54,
"is_active": null,
"is_dttm": false,
"python_date_format": null,
"table_id": 10,
"type": "STRING",
"uuid": "95b86e16-a423-46dd-8572-aa59de477a49",
"verbose_name": null
}
},
{
"__TableColumn__": {
"changed_by_fk": 1,
"changed_on": {
"__datetime__": "2021-05-20T16:46:34"
},
"column_name": "user_id",
"created_by_fk": 1,
"created_on": {
"__datetime__": "2021-05-20T16:41:16"
},
"description": null,
"expression": null,
"filterable": true,
"groupby": true,
"id": 55,
"is_active": null,
"is_dttm": false,
"python_date_format": null,
"table_id": 10,
"type": "UINT64",
"uuid": "349b4226-71f9-4e36-919a-f3166905519f",
"verbose_name": null
}
},
{
"__TableColumn__": {
"changed_by_fk": 1,
"changed_on": {
"__datetime__": "2021-05-20T16:46:34"
},
"column_name": "user_year_of_birth",
"created_by_fk": 1,
"created_on": {
"__datetime__": "2021-05-20T16:41:16"
},
"description": null,
"expression": null,
"filterable": true,
"groupby": true,
"id": 56,
"is_active": null,
"is_dttm": false,
"python_date_format": null,
"table_id": 10,
"type": "UINT32",
"uuid": "a97a8573-d7f6-4a2f-a36a-ad6605649785",
"verbose_name": null
}
},
{
"__TableColumn__": {
"changed_by_fk": 1,
"changed_on": {
"__datetime__": "2021-05-20T16:46:34"
},
"column_name": "user_gender",
"created_by_fk": 1,
"created_on": {
"__datetime__": "2021-05-20T16:41:16"
},
"description": null,
"expression": null,
"filterable": true,
"groupby": true,
"id": 57,
"is_active": null,
"is_dttm": false,
"python_date_format": null,
"table_id": 10,
"type": "STRING",
"uuid": "7a78fd5a-7408-4feb-9981-b8a1519305ec",
"verbose_name": null
}
},
{
"__TableColumn__": {
"changed_by_fk": 1,
"changed_on": {
"__datetime__": "2021-05-20T16:46:34"
},
"column_name": "user_level_of_education",
"created_by_fk": 1,
"created_on": {
"__datetime__": "2021-05-20T16:41:16"
},
"description": null,
"expression": null,
"filterable": true,
"groupby": true,
"id": 58,
"is_active": null,
"is_dttm": false,
"python_date_format": null,
"table_id": 10,
"type": "STRING",
"uuid": "14b87739-fd01-460f-a3cc-cd1a222be651",
"verbose_name": null
}
},
{
"__TableColumn__": {
"changed_by_fk": 1,
"changed_on": {
"__datetime__": "2021-05-20T16:46:34"
},
"column_name": "user_city",
"created_by_fk": 1,
"created_on": {
"__datetime__": "2021-05-20T16:41:16"
},
"description": null,
"expression": null,
"filterable": true,
"groupby": true,
"id": 59,
"is_active": null,
"is_dttm": false,
"python_date_format": null,
"table_id": 10,
"type": "STRING",
"uuid": "120e3998-09bd-4c4b-9c60-40bf6406c8ce",
"verbose_name": null
}
},
{
"__TableColumn__": {
"changed_by_fk": 1,
"changed_on": {
"__datetime__": "2021-05-20T16:46:34"
},
"column_name": "user_state",
"created_by_fk": 1,
"created_on": {
"__datetime__": "2021-05-20T16:41:16"
},
"description": null,
"expression": null,
"filterable": true,
"groupby": true,
"id": 60,
"is_active": null,
"is_dttm": false,
"python_date_format": null,
"table_id": 10,
"type": "STRING",
"uuid": "2656d45a-c083-4966-9aef-1370fefda078",
"verbose_name": null
}
},
{
"__TableColumn__": {
"changed_by_fk": 1,
"changed_on": {
"__datetime__": "2021-05-20T16:46:34"
},
"column_name": "user_country",
"created_by_fk": 1,
"created_on": {
"__datetime__": "2021-05-20T16:41:16"
},
"description": null,
"expression": null,
"filterable": true,
"groupby": true,
"id": 61,
"is_active": null,
"is_dttm": false,
"python_date_format": null,
"table_id": 10,
"type": "STRING",
"uuid": "06f431fd-e83d-4551-8fca-808bc2f39854",
"verbose_name": null
}
},
{
"__TableColumn__": {
"changed_by_fk": 1,
"changed_on": {
"__datetime__": "2021-05-20T16:46:34"
},
"column_name": "level_of_education",
"created_by_fk": 1,
"created_on": {
"__datetime__": "2021-05-20T16:41:16"
},
"description": null,
"expression": null,
"filterable": true,
"groupby": true,
"id": 62,
"is_active": null,
"is_dttm": false,
"python_date_format": null,
"table_id": 10,
"type": "NULLABLE(STRING)",
"uuid": "0862dff7-c61f-4e72-be59-af77825af9ff",
"verbose_name": null
}
},
{
"__TableColumn__": {
"changed_by_fk": 1,
"changed_on": {
"__datetime__": "2021-05-20T16:46:34"
},
"column_name": "level_of_education_order",
"created_by_fk": 1,
"created_on": {
"__datetime__": "2021-05-20T16:41:16"
},
"description": null,
"expression": null,
"filterable": true,
"groupby": true,
"id": 63,
"is_active": null,
"is_dttm": false,
"python_date_format": null,
"table_id": 10,
"type": "NULLABLE(UINT8)",
"uuid": "94f69a29-a2fb-4dee-b9ba-e612fed44a73",
"verbose_name": null
}
},
{
"__TableColumn__": {
"changed_by_fk": 1,
"changed_on": {
"__datetime__": "2021-05-20T16:46:34"
},
"column_name": "gender",
"created_by_fk": 1,
"created_on": {
"__datetime__": "2021-05-20T16:46:34"
},
"description": null,
"expression": null,
"filterable": true,
"groupby": true,
"id": 64,
"is_active": true,
"is_dttm": false,
"python_date_format": null,
"table_id": 10,
"type": "NULLABLE(STRING)",
"uuid": "8b474ea7-40d3-4280-96d9-9a9e15c3997f",
"verbose_name": null
}
}
],
"database_id": 1,
"default_endpoint": null,
"description": null,
"extra": null,
"fetch_values_predicate": null,
"filter_select_enabled": false,
"main_dttm_col": null,
"metrics": [
{
"__SqlMetric__": {
"changed_by_fk": 1,
"changed_on": {
"__datetime__": "2021-05-20T16:46:34"
},
"created_by_fk": 1,
"created_on": {
"__datetime__": "2021-05-20T16:41:16"
},
"d3format": null,
"description": null,
"expression": "count(*)",
"extra": "{\"warning_markdown\":null}",
"id": 16,
"metric_name": "count",
"metric_type": null,
"table_id": 10,
"uuid": "0967c3ab-96e0-4d06-9861-573e5beac200",
"verbose_name": null,
"warning_text": null
}
}
],
"offset": 0,
"params": "{\"remote_id\": 10, \"database_name\": \"admin\", \"import_time\": 1622022621}",
"schema": "openedx",
"sql": "SELECT\r\n *,\r\n CASE\r\n WHEN user_gender = 'f' THEN 'Female'\r\n WHEN user_gender = 'm' THEN 'Male'\r\n WHEN user_gender = 'o' THEN 'Other'\r\n END AS gender,\r\n CASE\r\n WHEN user_level_of_education = 'none' THEN 'No formal education'\r\n WHEN user_level_of_education = 'b' THEN 'Bachelor''s degree'\r\n WHEN user_level_of_education = 'a' THEN 'Associate degree'\r\n WHEN user_level_of_education = 'hs' THEN 'Secondary/high school'\r\n WHEN user_level_of_education = 'jhs' THEN 'Junior secondary/junior high/middle school'\r\n WHEN user_level_of_education = 'el' THEN 'Elementary/primary school'\r\n WHEN user_level_of_education = 'm' THEN 'Master''s or professional degree'\r\n WHEN user_level_of_education = 'p' THEN 'Doctorate'\r\n WHEN user_level_of_education = 'other' THEN 'Other education'\r\n END AS level_of_education,\r\n CASE\r\n WHEN user_level_of_education = 'none' THEN 1\r\n WHEN user_level_of_education = 'b' THEN 2\r\n WHEN user_level_of_education = 'a' THEN 3\r\n WHEN user_level_of_education = 'hs' THEN 4\r\n WHEN user_level_of_education = 'jhs' THEN 5\r\n WHEN user_level_of_education = 'el' THEN 6\r\n WHEN user_level_of_education = 'm' THEN 7\r\n WHEN user_level_of_education = 'p' THEN 8\r\n WHEN user_level_of_education = 'other' THEN 9\r\n END AS level_of_education_order\r\nFROM openedx.course_enrollments",
"table_name": "Course enrollments",
"template_params": null
}
}
]
}

View File

@ -6,21 +6,27 @@ address = "127.0.0.1:8686"
### Sources ### Sources
# Capture logs from all containers # Capture logs from all containers
[sources.containers] [sources.docker_logs]
type = "docker_logs" type = "docker_logs"
[sources.kubernetes_logs]
type = "kubernetes_logs"
### Transforms ### Transforms
# Select lms & cms containers # Select lms & cms containers
[transforms.openedx_containers] [transforms.openedx_docker_containers]
type = "filter" type = "filter"
inputs = ["containers"] inputs = ["docker_logs"]
condition = 'includes(["lms", "cms"], .label."com.docker.compose.service")' condition = 'includes(["lms", "cms"], .label."com.docker.compose.service")'
[transforms.openedx_kubernetes_containers]
type = "filter"
inputs = ["docker_logs", "kubernetes_logs"]
condition = '.kubernetes.pod_namespace == "{{ K8S_NAMESPACE }}" && includes(["lms", "cms"], .kubernetes.container_name)'
# Parse tracking logs: extract time # Parse tracking logs: extract time
[transforms.tracking] [transforms.tracking]
type = "remap" type = "remap"
inputs = ["openedx_containers"] inputs = ["openedx_docker_containers", "openedx_kubernetes_containers"]
# Time formats: https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html#specifiers # Time formats: https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html#specifiers
source = ''' source = '''
parsed, err_regex = parse_regex(.message, r'^.* \[tracking\] [^{}]* (?P<tracking_message>\{.*\})$') parsed, err_regex = parse_regex(.message, r'^.* \[tracking\] [^{}]* (?P<tracking_message>\{.*\})$')
@ -56,9 +62,10 @@ source = '''
# Log all events to stdout, for debugging # Log all events to stdout, for debugging
[sinks.out] [sinks.out]
type = "console" type = "console"
inputs = ["tracking_debug"] inputs = ["openedx_kubernetes_containers"]
# inputs = ["tracking_debug"]
encoding.codec = "json" encoding.codec = "json"
encoding.only_fields = ["time", "message.context.course_id", "message.context.user_id", "message.name"] # encoding.only_fields = ["time", "message.context.course_id", "message.context.user_id", "message.name"]
# # Send logs to clickhouse # # Send logs to clickhouse
[sinks.clickhouse] [sinks.clickhouse]
@ -66,7 +73,7 @@ type = "clickhouse"
# Required: https://github.com/timberio/vector/issues/5797 # Required: https://github.com/timberio/vector/issues/5797
encoding.timestamp_format = "unix" encoding.timestamp_format = "unix"
inputs = ["tracking"] inputs = ["tracking"]
endpoint = "{{ VISION_CLICKHOUSE_SCHEME }}://{{ VISION_CLICKHOUSE_HOST }}:{{ VISION_CLICKHOUSE_HTTP_PORT }}" endpoint = "{{ VISION_CLICKHOUSE_HTTP_SCHEME }}://{{ VISION_CLICKHOUSE_HOST }}:{{ VISION_CLICKHOUSE_HTTP_PORT }}"
database = "{{ VISION_CLICKHOUSE_DATABASE }}" database = "{{ VISION_CLICKHOUSE_DATABASE }}"
table = "_tracking" table = "_tracking"
healthcheck = true healthcheck = true

View File

@ -1,9 +1,15 @@
#! /usr/bin/env python3 #! /usr/bin/env python3
import argparse import argparse
from glob import glob
import json
import os import os
import subprocess import subprocess
with open(os.path.join(os.path.dirname(__file__), "clickhouse-auth.json")) as f:
CLICKHOUSE_AUTH = json.load(f)
def main(): def main():
parser = argparse.ArgumentParser("Manage your Clickhouse instance") parser = argparse.ArgumentParser("Manage your Clickhouse instance")
subparsers = parser.add_subparsers() subparsers = parser.add_subparsers()
@ -29,12 +35,31 @@ def main():
parser_createuser.add_argument("username") parser_createuser.add_argument("username")
parser_createuser.set_defaults(func=command_create_user) parser_createuser.set_defaults(func=command_create_user)
# Apply migrations
parser_migrate = subparsers.add_parser("migrate")
parser_migrate.add_argument(
"-p",
"--path",
default="/etc/clickhouse-server/migrations.d/",
help="Run migrations from this directory.",
)
parser_migrate.add_argument(
"-d",
"--dry-run",
action="store_true",
help="Don't actually apply migrations",
)
parser_migrate.set_defaults(func=command_migrate)
args = parser.parse_args() args = parser.parse_args()
args.func(args) if hasattr(args, "func"):
args.func(args)
else:
parser.print_help()
def command_client(args): def command_client(args):
run_query() subprocess.check_call(get_client_command())
def command_create_user(args): def command_create_user(args):
@ -48,8 +73,10 @@ def command_create_user(args):
condition = " OR ".join(conditions) if conditions else "1" condition = " OR ".join(conditions) if conditions else "1"
username = args.username username = args.username
# Note that the "CREATE TEMPORARY TABLE" grant is required to make use of "numbers()" functions. # Note that the "CREATE TEMPORARY TABLE" grant is required to make use of "numbers()" functions.
run_query(f"""CREATE USER IF NOT EXISTS {username}; run_query(
GRANT CREATE TEMPORARY TABLE ON *.* TO {username};""") f"""CREATE USER IF NOT EXISTS {username};
GRANT CREATE TEMPORARY TABLE ON *.* TO {username};"""
)
# Find the list of tables to which the user should have access: all tables that do not start with "_" # Find the list of tables to which the user should have access: all tables that do not start with "_"
tables = run_query("SHOW TABLES").strip().split("\n") tables = run_query("SHOW TABLES").strip().split("\n")
for table in tables: for table in tables:
@ -60,31 +87,66 @@ CREATE ROW POLICY OR REPLACE {username} ON {table} AS RESTRICTIVE FOR SELECT USI
run_query(query) run_query(query)
def run_query(query=None): def command_migrate(args):
args = [] # Create database
if os.environ.get("VISION_CLICKHOUSE_SCHEME") == "https": query = f"""CREATE DATABASE IF NOT EXISTS {CLICKHOUSE_AUTH["database"]}"""
args.append("--secure") subprocess.check_call(get_client_command_no_db("--query", query))
if query: # Create migrations table
args += ["--query", query] run_query(
"CREATE TABLE IF NOT EXISTS _migrations (name String) ENGINE = MergeTree PRIMARY KEY(name) ORDER BY name"
)
# Apply migrations
migrations = sorted(glob(os.path.join(args.path, "*")))
for path in migrations:
migration_name = os.path.basename(path)
print(
f"Applying migration {migration_name}... ", end=" "
)
query = f"SELECT 'applied' FROM _migrations WHERE name='{migration_name}'"
is_applied = run_query(query)
print_suffix = " (fake)" if args.dry_run else ""
if is_applied == "applied":
print(f"SKIP{print_suffix}")
else:
if not args.dry_run:
run_command("--queries-file", path)
run_query("INSERT INTO _migrations (name) VALUES ('{migration_name}')")
print(f"OK{print_suffix}")
def run_query(query):
return run_command("--query", query)
def run_command(*args):
result = subprocess.check_output(get_client_command(*args))
return result.decode().strip()
def get_client_command(*args):
return get_client_command_no_db("--database", CLICKHOUSE_AUTH["database"], *args)
def get_client_command_no_db(*args):
command = [ command = [
"clickhouse", "clickhouse",
"client", "client",
"--host", "--host",
os.environ["VISION_CLICKHOUSE_HOST"], CLICKHOUSE_AUTH["host"],
"--port", "--port",
os.environ["VISION_CLICKHOUSE_PORT"], str(CLICKHOUSE_AUTH["port"]),
"--user", "--user",
os.environ["VISION_CLICKHOUSE_USERNAME"], CLICKHOUSE_AUTH["username"],
"--password", "--password",
os.environ["VISION_CLICKHOUSE_PASSWORD"], CLICKHOUSE_AUTH["password"],
"--database",
os.environ["VISION_CLICKHOUSE_DATABASE"],
"--multiline", "--multiline",
"--multiquery", "--multiquery",
*args,
] ]
print(" ".join(command)) if CLICKHOUSE_AUTH["http_scheme"] == "https":
return subprocess.check_output(command).decode() command.append("--secure")
command += args
return command
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -88,6 +88,7 @@ def main():
def bootstrap_user(args): def bootstrap_user(args):
# Bootstrap database # Bootstrap database
database_name = args.db or args.username database_name = args.db or args.username
bootstrap_database(args.username, database_name)
# Get or create user # Get or create user
user = security_manager.find_user(args.username) user = security_manager.find_user(args.username)
@ -144,10 +145,13 @@ def bootstrap_user(args):
print("Done.") print("Done.")
def bootstrap_database(database_name): def bootstrap_database(username, database_name):
host = os.environ["VISION_CLICKHOUSE_HOST"] with open(os.path.join(os.path.dirname(__file__), "clickhouse-auth.json")) as f:
port = os.environ["VISION_CLICKHOUSE_PORT"] CLICKHOUSE_AUTH = json.load(f)
database = os.environ["VISION_CLICKHOUSE_DATABASE"]
host = CLICKHOUSE_AUTH["host"]
port = CLICKHOUSE_AUTH["port"]
database = CLICKHOUSE_AUTH["database"]
uri = f"clickhouse+native://{username}:@{host}:{port}/{database}" uri = f"clickhouse+native://{username}:@{host}:{port}/{database}"
get_or_create_db(database_name, uri, always_create=True) get_or_create_db(database_name, uri, always_create=True)

View File

@ -1,43 +1 @@
clickhouse_client_base() { vision migrate --path=/etc/clickhouse-server/migrations.d
clickhouse client \
{% if VISION_CLICKHOUSE_SCHEME == "https" %}--secure{% endif %} --host {{ VISION_CLICKHOUSE_HOST }} --port {{ VISION_CLICKHOUSE_PORT }} \
--user {{ VISION_CLICKHOUSE_USERNAME }} \
--password {{ VISION_CLICKHOUSE_PASSWORD }} "$@"
}
clickhouse_client() {
clickhouse_client_base --database={{ VISION_CLICKHOUSE_DATABASE }} "$@"
}
clickhouse_client_query() {
clickhouse_client --query "$1"
}
clickhouse_client_file() {
clickhouse_client --multiquery --multiline < "$1"
}
run_migration() {
migration_name=$(basename "$1")
echo -n "Applying migration $migration_name... "
is_applied=$(clickhouse_client_query "SELECT 'applied' FROM _migrations WHERE name='$migration_name'")
if [ "$is_applied" = "applied" ]
then
echo "SKIP"
return
fi
clickhouse_client_file "$1"
clickhouse_client_query "INSERT INTO _migrations (name) VALUES ('$migration_name')"
echo "OK"
}
run_migrations() {
for migration in /etc/clickhouse-server/migrations.d/*.sql
do
run_migration $migration
done
}
init_db() {
# Create database
clickhouse_client_base --query "CREATE DATABASE IF NOT EXISTS {{ VISION_CLICKHOUSE_DATABASE }}"
# Create migrations table
clickhouse_client_query "CREATE TABLE IF NOT EXISTS _migrations (name String) ENGINE = MergeTree PRIMARY KEY(name) ORDER BY name"
}
init_db
run_migrations

View File

@ -1 +1 @@
python /openedx/scripts/importcoursedata.py http://{{ VISION_CLICKHOUSE_USERNAME }}:{{ VISION_CLICKHOUSE_PASSWORD }}@{{ VISION_CLICKHOUSE_HOST }}:{{ VISION_CLICKHOUSE_HTTP_PORT }}/?database={{ VISION_CLICKHOUSE_DATABASE }} python /openedx/scripts/importcoursedata.py