feat: org-based access control

Also:
- upgrade Vector.
- https-compatible clickhouse host
- get rid of a few TODOs.
This commit is contained in:
Régis Behmo 2021-05-04 14:59:41 +02:00
parent a1812988f8
commit 5f6d9fc761
11 changed files with 113 additions and 64 deletions

8
.gitlab-ci.yml Normal file
View File

@ -0,0 +1,8 @@
# variables:
# TUTOR_PLUGIN: vision
# TUTOR_PYPI_PRIVATE_PACKAGE: tutor-vision
# OPENEDX_RELEASE: koa
#
# include:
# - project: 'community/tutor-ci'
# file: 'plugin-gitlab-ci.yml'

View File

@ -1,31 +1,15 @@
vision plugin for `Tutor <https://docs.tutor.overhang.io>`__
===================================================================================
Tutor Vision: scalable, real-time analytics for Open edX
========================================================
TODO:
- Collect data with Vector
- Collect tracking logs
- Collect nginx logs
- Send logs to clickhouse
- Make it optional to mount /var/run/docker.sock
- adjust vector verbosity
- log everything to file instead of console? -> tmp volume
- Provision clickhouse
- make database name a tutor config
- make clickhouse host a tutor config
- specify TTL for tables?
- set permissions for each org:
- Expose data with redash
- Provision dashboards
- Custom users
- Expose grades
- Reproduce dashboards from https://edx.readthedocs.io/projects/edx-insights/en/latest/Overview.html
- prevent users from running TRUNCATE from redash
- Reproduce dashboards from https://datastudio.google.com/embed/u/0/reporting/1gd-YXUtHFzHm3qddPTO8r272kyRD-uDG/page/4f5xB
- frontend user creation:
- generate random frontend user password in "tutor vision frontend createuser"
- create root users
- add delete user command
- add users to shared openedx organization
- Utility tools for authentication
- Kubernetes compatibility
- Sweet readme
@ -34,7 +18,7 @@ Installation
::
pip install git+https://github.com/overhangio/tutor-vision
tutor license install tutor-vision
Usage
-----
@ -69,13 +53,17 @@ To add a new, non-admin user::
# Create a corresponding user on the frontend
tutor vision frontend createuser yourusername yourusername@youremail.com
Note that you may grant a user access to the data of an organization instead of just a course. To do so, run::
tutor vision datalake setpermissions --org-id yourorg yourusername
Development
-----------
To reload Vector configuration after changes to vector.toml, run::
tutor config save && tutor local exec vision-vector sh kill -s HUP
tutor config save && tutor local exec vision-vector sh -c "kill -s HUP 1"
To explore the clickhouse database as root, run::

View File

@ -34,16 +34,27 @@ def datalake_createuser(context, username):
"access to multiple courses."
),
)
@click.option(
"-o",
"--org-id",
"org_ids",
multiple=True,
help=(
"Grant access to the course data of an organization. This option may be used multiple times to grant "
"access to multiple organizations."
),
)
@click.pass_obj
def datalake_setpermissions(context, username, course_ids):
def datalake_setpermissions(context, username, course_ids, org_ids):
conditions = []
for course_id in course_ids:
conditions.append(f"course_id = '{course_id}'")
for org_id in org_ids:
conditions.append(f"course_id LIKE 'course-v1:{org_id}+%'")
condition = "1"
if course_ids:
condition = " OR ".join(
[
"course_id = '{course_id}'".format(course_id=course_id)
for course_id in course_ids
]
)
if conditions:
condition = " OR ".join(conditions)
# TODO rename courseenrollments to course_enrollments (and other tables as well)
# Note that the "CREATE TEMPORARY TABLE" grant is required to make use of "numbers()" functions.
query = f"""
@ -69,9 +80,9 @@ CREATE ROW POLICY OR REPLACE {username} ON video_view_segments AS RESTRICTIVE FO
def run_datalake_query(root, query):
config = tutor_config.load(root)
command_secure_opt = "--secure" if config["VISION_CLICKHOUSE_SCHEME"] == "https" else ""
command = f"""clickhouse client \
--host={config["VISION_CLICKHOUSE_HOST"]} \
--port={config["VISION_CLICKHOUSE_PORT"]} \
{command_secure_opt} --host={config["VISION_CLICKHOUSE_HOST"]} --port={config["VISION_CLICKHOUSE_PORT"]} \
--user={config["VISION_CLICKHOUSE_USERNAME"]} \
--password={config["VISION_CLICKHOUSE_PASSWORD"]} \
--database={config["VISION_CLICKHOUSE_DATABASE"]} \

View File

@ -2,10 +2,10 @@
# log collection
vision-vector:
image: docker.io/timberio/vector:0.11.X-alpine
image: docker.io/timberio/vector:0.13.X-alpine
volumes:
- ../plugins/vision/apps/vector/vector.toml:/etc/vector/vector.toml:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
{% if VISION_DOCKER_HOST %}- {{ VISION_DOCKER_HOST }}:/var/run/docker.sock:ro{% endif %}
environment:
- DOCKER_HOST=/var/run/docker.sock
restart: unless-stopped

View File

@ -19,11 +19,13 @@ config = {
"defaults": {
"CLICKHOUSE_DOCKER_IMAGE": "docker.io/yandex/clickhouse-server:21.2.7.11",
"RUN_CLICKHOUSE": True,
"CLICKHOUSE_SCHEME": "http",
"CLICKHOUSE_HOST": "vision-clickhouse",
"CLICKHOUSE_HTTP_PORT": 8123,
"CLICKHOUSE_PORT": 9000,
"CLICKHOUSE_DATABASE": "openedx",
"CLICKHOUSE_USERNAME": "openedx",
"DOCKER_HOST": "/var/run/docker.sock",
"POSTGRESQL_USER": "redash",
"POSTGRESQL_DB": "redash",
"REDASH_DOCKER_IMAGE": "docker.io/redash/redash:9.0.0-beta.b42121",

View File

@ -8,4 +8,4 @@ CREATE TABLE coursegrades
ENGINE = MySQL('{{ MYSQL_HOST }}:{{ MYSQL_PORT }}', '{{ OPENEDX_MYSQL_DATABASE }}', 'grades_persistentcoursegrade', '{{ OPENEDX_MYSQL_USERNAME }}', '{{ OPENEDX_MYSQL_PASSWORD }}');
-- Grant everyone access to the view
CREATE ROW POLICY common ON coursegrades FOR SELECT USING 1 TO ALL;
CREATE ROW POLICY common ON coursegrades FOR SELECT USING 1 TO ALL;

View File

@ -2,6 +2,7 @@ PYTHONUNBUFFERED=0
PYTHONPATH=/app
# Clickhouse
CLICKHOUSE_SCHEME="{{ VISION_CLICKHOUSE_SCHEME }}"
CLICKHOUSE_HOST="{{ VISION_CLICKHOUSE_HOST }}"
CLICKHOUSE_PORT="{{ VISION_CLICKHOUSE_HTTP_PORT }}"
CLICKHOUSE_DATABASE="{{ VISION_CLICKHOUSE_DATABASE }}"

View File

@ -115,8 +115,10 @@ def get_datasource(org, group, username):
# Get or create datasource
options = ConfigurationContainer(
{
"url": "http://{}:{}".format(
os.environ["CLICKHOUSE_HOST"], os.environ["CLICKHOUSE_PORT"]
"url": "{}://{}:{}".format(
os.environ["CLICKHOUSE_SCHEME"],
os.environ["CLICKHOUSE_HOST"],
os.environ["CLICKHOUSE_PORT"],
),
"user": username,
"password": "",

View File

@ -0,0 +1,27 @@
import lms.startup
lms.startup.run()
from xmodule.modulestore.django import modulestore
from opaque_keys.edx.keys import CourseKey
from courseware.courses import get_course
def main():
module_store = modulestore()
for course in module_store.get_courses():
course_id = course.id.html_id()
course_key = CourseKey.from_string(course_id)
course = get_course(course_key, depth=None)
for position, child in enumerate(iter_children(course)):
print(course.display_name, position, child.location.block_id, child.display_name)
def iter_children(item):
yield item
for child in item.get_children():
yield from iter_children(child)
if __name__ == "__main__":
main()

View File

@ -10,36 +10,45 @@ address = "127.0.0.1:8686"
type = "docker_logs"
### Transforms
# Select lms & cms containers
[transforms.openedx_containers]
type = "filter"
inputs = ["containers"]
condition.type = "check_fields"
condition."label.com.docker.compose.regex" = "(lms|cms)"
[transforms.nginx_containers]
type = "filter"
inputs = ["containers"]
condition.type = "check_fields"
condition."label.com.docker.compose.eq" = "nginx"
[transforms.tracking_raw]
type = "regex_parser"
inputs = ["openedx_containers"]
drop_failed = true
drop_field = false
field = "message"
# 2021-03-09 13:08:41,292 INFO 21 [tracking] [user 3] [ip 172.18.0.1] logger.py:42 - {...}
patterns = ["^.* \\[tracking\\] [^{}]* (?P<tracking_message>\\{.*\\})$"]
condition = 'includes(["lms", "cms"], .label."com.docker.compose.service")'
# Parse tracking logs: extract time
[transforms.tracking]
type = "remap"
inputs = ["tracking_raw"]
inputs = ["openedx_containers"]
# Time formats: https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html#specifiers
source = '''
.message = .tracking_message
.tracking_message = parse_json(.tracking_message)
.time = parse_timestamp(.tracking_message.time, format="%+")
del(.tracking_message)
parsed, err_regex = parse_regex(.message, r'^.* \[tracking\] [^{}]* (?P<tracking_message>\{.*\})$')
if err_regex != null {
abort
}
message = parsed.tracking_message
parsed_json, err_json = parse_json(parsed.tracking_message)
if err_json != null {
log("Unable to parse JSON from tracking log message: " + err_json, level: "error")
abort
}
time, err_timestamp = parse_timestamp(parsed_json.time, "%+")
if err_timestamp != null {
log("Unable to parse timestamp from tracking log message: " + err_timestamp, level: "error")
abort
}
. = {"time": time, "message": message}
'''
drop_on_error = true
drop_on_abort = true
[transforms.tracking_debug]
type = "remap"
inputs = ["tracking"]
# Time formats: https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html#specifiers
source = '''
.message = parse_json!(.message)
'''
### Sinks
@ -47,17 +56,17 @@ del(.tracking_message)
# Log all events to stdout, for debugging
[sinks.out]
type = "console"
inputs = ["tracking"]
inputs = ["tracking_debug"]
encoding.codec = "json"
encoding.only_fields = ["time", "message.context.course_id", "message.context.user_id", "message.name"]
# Send logs to clickhouse
# # Send logs to clickhouse
[sinks.clickhouse]
type = "clickhouse"
encoding.only_fields = ["time", "message"]
# Required: https://github.com/timberio/vector/issues/5797
encoding.timestamp_format = "unix"
inputs = ["tracking"]
endpoint = "http://{{ VISION_CLICKHOUSE_HOST }}:{{ VISION_CLICKHOUSE_HTTP_PORT }}"
endpoint = "{{ VISION_CLICKHOUSE_SCHEME }}://{{ VISION_CLICKHOUSE_HOST }}:{{ VISION_CLICKHOUSE_HTTP_PORT }}"
database = "{{ VISION_CLICKHOUSE_DATABASE }}"
table = "tracking"
healthcheck = true

View File

@ -1,5 +1,6 @@
clickhouse_client_base() {
clickhouse client --host {{ VISION_CLICKHOUSE_HOST }} --port {{ VISION_CLICKHOUSE_PORT }} \
clickhouse client \
{% if VISION_CLICKHOUSE_SCHEME == "https" %}--secure{% endif %} --host {{ VISION_CLICKHOUSE_HOST }} --port {{ VISION_CLICKHOUSE_PORT }} \
--user {{ VISION_CLICKHOUSE_USERNAME }} \
--password {{ VISION_CLICKHOUSE_PASSWORD }} "$@"
}