feat: org-based access control
Also: - upgrade Vector. - https-compatible clickhouse host - get rid of a few TODOs.
This commit is contained in:
parent
a1812988f8
commit
5f6d9fc761
8
.gitlab-ci.yml
Normal file
8
.gitlab-ci.yml
Normal file
@ -0,0 +1,8 @@
|
||||
# variables:
|
||||
# TUTOR_PLUGIN: vision
|
||||
# TUTOR_PYPI_PRIVATE_PACKAGE: tutor-vision
|
||||
# OPENEDX_RELEASE: koa
|
||||
#
|
||||
# include:
|
||||
# - project: 'community/tutor-ci'
|
||||
# file: 'plugin-gitlab-ci.yml'
|
||||
32
README.rst
32
README.rst
@ -1,31 +1,15 @@
|
||||
vision plugin for `Tutor <https://docs.tutor.overhang.io>`__
|
||||
===================================================================================
|
||||
Tutor Vision: scalable, real-time analytics for Open edX
|
||||
========================================================
|
||||
|
||||
TODO:
|
||||
- Collect data with Vector
|
||||
- Collect tracking logs
|
||||
- Collect nginx logs
|
||||
- Send logs to clickhouse
|
||||
- Make it optional to mount /var/run/docker.sock
|
||||
- adjust vector verbosity
|
||||
- log everything to file instead of console? -> tmp volume
|
||||
- Provision clickhouse
|
||||
- make database name a tutor config
|
||||
- make clickhouse host a tutor config
|
||||
- specify TTL for tables?
|
||||
- set permissions for each org:
|
||||
|
||||
- Expose data with redash
|
||||
- Provision dashboards
|
||||
- Custom users
|
||||
- Expose grades
|
||||
- Reproduce dashboards from https://edx.readthedocs.io/projects/edx-insights/en/latest/Overview.html
|
||||
- prevent users from running TRUNCATE from redash
|
||||
- Reproduce dashboards from https://datastudio.google.com/embed/u/0/reporting/1gd-YXUtHFzHm3qddPTO8r272kyRD-uDG/page/4f5xB
|
||||
- frontend user creation:
|
||||
- generate random frontend user password in "tutor vision frontend createuser"
|
||||
- create root users
|
||||
- add delete user command
|
||||
- add users to shared openedx organization
|
||||
- Utility tools for authentication
|
||||
- Kubernetes compatibility
|
||||
- Sweet readme
|
||||
|
||||
@ -34,7 +18,7 @@ Installation
|
||||
|
||||
::
|
||||
|
||||
pip install git+https://github.com/overhangio/tutor-vision
|
||||
tutor license install tutor-vision
|
||||
|
||||
Usage
|
||||
-----
|
||||
@ -69,13 +53,17 @@ To add a new, non-admin user::
|
||||
# Create a corresponding user on the frontend
|
||||
tutor vision frontend createuser yourusername yourusername@youremail.com
|
||||
|
||||
Note that you may grant a user access to the data of an organization instead of just a course. To do so, run::
|
||||
|
||||
tutor vision datalake setpermissions --org-id yourorg yourusername
|
||||
|
||||
Development
|
||||
-----------
|
||||
|
||||
|
||||
To reload Vector configuration after changes to vector.toml, run::
|
||||
|
||||
tutor config save && tutor local exec vision-vector sh kill -s HUP
|
||||
tutor config save && tutor local exec vision-vector sh -c "kill -s HUP 1"
|
||||
|
||||
To explore the clickhouse database as root, run::
|
||||
|
||||
|
||||
@ -34,16 +34,27 @@ def datalake_createuser(context, username):
|
||||
"access to multiple courses."
|
||||
),
|
||||
)
|
||||
@click.option(
|
||||
"-o",
|
||||
"--org-id",
|
||||
"org_ids",
|
||||
multiple=True,
|
||||
help=(
|
||||
"Grant access to the course data of an organization. This option may be used multiple times to grant "
|
||||
"access to multiple organizations."
|
||||
),
|
||||
)
|
||||
@click.pass_obj
|
||||
def datalake_setpermissions(context, username, course_ids):
|
||||
def datalake_setpermissions(context, username, course_ids, org_ids):
|
||||
conditions = []
|
||||
for course_id in course_ids:
|
||||
conditions.append(f"course_id = '{course_id}'")
|
||||
for org_id in org_ids:
|
||||
conditions.append(f"course_id LIKE 'course-v1:{org_id}+%'")
|
||||
condition = "1"
|
||||
if course_ids:
|
||||
condition = " OR ".join(
|
||||
[
|
||||
"course_id = '{course_id}'".format(course_id=course_id)
|
||||
for course_id in course_ids
|
||||
]
|
||||
)
|
||||
if conditions:
|
||||
condition = " OR ".join(conditions)
|
||||
|
||||
# TODO rename courseenrollments to course_enrollments (and other tables as well)
|
||||
# Note that the "CREATE TEMPORARY TABLE" grant is required to make use of "numbers()" functions.
|
||||
query = f"""
|
||||
@ -69,9 +80,9 @@ CREATE ROW POLICY OR REPLACE {username} ON video_view_segments AS RESTRICTIVE FO
|
||||
|
||||
def run_datalake_query(root, query):
|
||||
config = tutor_config.load(root)
|
||||
command_secure_opt = "--secure" if config["VISION_CLICKHOUSE_SCHEME"] == "https" else ""
|
||||
command = f"""clickhouse client \
|
||||
--host={config["VISION_CLICKHOUSE_HOST"]} \
|
||||
--port={config["VISION_CLICKHOUSE_PORT"]} \
|
||||
{command_secure_opt} --host={config["VISION_CLICKHOUSE_HOST"]} --port={config["VISION_CLICKHOUSE_PORT"]} \
|
||||
--user={config["VISION_CLICKHOUSE_USERNAME"]} \
|
||||
--password={config["VISION_CLICKHOUSE_PASSWORD"]} \
|
||||
--database={config["VISION_CLICKHOUSE_DATABASE"]} \
|
||||
|
||||
@ -2,10 +2,10 @@
|
||||
|
||||
# log collection
|
||||
vision-vector:
|
||||
image: docker.io/timberio/vector:0.11.X-alpine
|
||||
image: docker.io/timberio/vector:0.13.X-alpine
|
||||
volumes:
|
||||
- ../plugins/vision/apps/vector/vector.toml:/etc/vector/vector.toml:ro
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
{% if VISION_DOCKER_HOST %}- {{ VISION_DOCKER_HOST }}:/var/run/docker.sock:ro{% endif %}
|
||||
environment:
|
||||
- DOCKER_HOST=/var/run/docker.sock
|
||||
restart: unless-stopped
|
||||
|
||||
@ -19,11 +19,13 @@ config = {
|
||||
"defaults": {
|
||||
"CLICKHOUSE_DOCKER_IMAGE": "docker.io/yandex/clickhouse-server:21.2.7.11",
|
||||
"RUN_CLICKHOUSE": True,
|
||||
"CLICKHOUSE_SCHEME": "http",
|
||||
"CLICKHOUSE_HOST": "vision-clickhouse",
|
||||
"CLICKHOUSE_HTTP_PORT": 8123,
|
||||
"CLICKHOUSE_PORT": 9000,
|
||||
"CLICKHOUSE_DATABASE": "openedx",
|
||||
"CLICKHOUSE_USERNAME": "openedx",
|
||||
"DOCKER_HOST": "/var/run/docker.sock",
|
||||
"POSTGRESQL_USER": "redash",
|
||||
"POSTGRESQL_DB": "redash",
|
||||
"REDASH_DOCKER_IMAGE": "docker.io/redash/redash:9.0.0-beta.b42121",
|
||||
|
||||
@ -8,4 +8,4 @@ CREATE TABLE coursegrades
|
||||
ENGINE = MySQL('{{ MYSQL_HOST }}:{{ MYSQL_PORT }}', '{{ OPENEDX_MYSQL_DATABASE }}', 'grades_persistentcoursegrade', '{{ OPENEDX_MYSQL_USERNAME }}', '{{ OPENEDX_MYSQL_PASSWORD }}');
|
||||
|
||||
-- Grant everyone access to the view
|
||||
CREATE ROW POLICY common ON coursegrades FOR SELECT USING 1 TO ALL;
|
||||
CREATE ROW POLICY common ON coursegrades FOR SELECT USING 1 TO ALL;
|
||||
|
||||
@ -2,6 +2,7 @@ PYTHONUNBUFFERED=0
|
||||
PYTHONPATH=/app
|
||||
|
||||
# Clickhouse
|
||||
CLICKHOUSE_SCHEME="{{ VISION_CLICKHOUSE_SCHEME }}"
|
||||
CLICKHOUSE_HOST="{{ VISION_CLICKHOUSE_HOST }}"
|
||||
CLICKHOUSE_PORT="{{ VISION_CLICKHOUSE_HTTP_PORT }}"
|
||||
CLICKHOUSE_DATABASE="{{ VISION_CLICKHOUSE_DATABASE }}"
|
||||
|
||||
@ -115,8 +115,10 @@ def get_datasource(org, group, username):
|
||||
# Get or create datasource
|
||||
options = ConfigurationContainer(
|
||||
{
|
||||
"url": "http://{}:{}".format(
|
||||
os.environ["CLICKHOUSE_HOST"], os.environ["CLICKHOUSE_PORT"]
|
||||
"url": "{}://{}:{}".format(
|
||||
os.environ["CLICKHOUSE_SCHEME"],
|
||||
os.environ["CLICKHOUSE_HOST"],
|
||||
os.environ["CLICKHOUSE_PORT"],
|
||||
),
|
||||
"user": username,
|
||||
"password": "",
|
||||
|
||||
@ -0,0 +1,27 @@
|
||||
import lms.startup
|
||||
|
||||
lms.startup.run()
|
||||
|
||||
from xmodule.modulestore.django import modulestore
|
||||
from opaque_keys.edx.keys import CourseKey
|
||||
from courseware.courses import get_course
|
||||
|
||||
|
||||
def main():
|
||||
module_store = modulestore()
|
||||
for course in module_store.get_courses():
|
||||
course_id = course.id.html_id()
|
||||
course_key = CourseKey.from_string(course_id)
|
||||
course = get_course(course_key, depth=None)
|
||||
for position, child in enumerate(iter_children(course)):
|
||||
print(course.display_name, position, child.location.block_id, child.display_name)
|
||||
|
||||
|
||||
def iter_children(item):
|
||||
yield item
|
||||
for child in item.get_children():
|
||||
yield from iter_children(child)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -10,36 +10,45 @@ address = "127.0.0.1:8686"
|
||||
type = "docker_logs"
|
||||
|
||||
### Transforms
|
||||
|
||||
# Select lms & cms containers
|
||||
[transforms.openedx_containers]
|
||||
type = "filter"
|
||||
inputs = ["containers"]
|
||||
condition.type = "check_fields"
|
||||
condition."label.com.docker.compose.regex" = "(lms|cms)"
|
||||
|
||||
[transforms.nginx_containers]
|
||||
type = "filter"
|
||||
inputs = ["containers"]
|
||||
condition.type = "check_fields"
|
||||
condition."label.com.docker.compose.eq" = "nginx"
|
||||
|
||||
[transforms.tracking_raw]
|
||||
type = "regex_parser"
|
||||
inputs = ["openedx_containers"]
|
||||
drop_failed = true
|
||||
drop_field = false
|
||||
field = "message"
|
||||
# 2021-03-09 13:08:41,292 INFO 21 [tracking] [user 3] [ip 172.18.0.1] logger.py:42 - {...}
|
||||
patterns = ["^.* \\[tracking\\] [^{}]* (?P<tracking_message>\\{.*\\})$"]
|
||||
condition = 'includes(["lms", "cms"], .label."com.docker.compose.service")'
|
||||
|
||||
# Parse tracking logs: extract time
|
||||
[transforms.tracking]
|
||||
type = "remap"
|
||||
inputs = ["tracking_raw"]
|
||||
inputs = ["openedx_containers"]
|
||||
# Time formats: https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html#specifiers
|
||||
source = '''
|
||||
.message = .tracking_message
|
||||
.tracking_message = parse_json(.tracking_message)
|
||||
.time = parse_timestamp(.tracking_message.time, format="%+")
|
||||
del(.tracking_message)
|
||||
parsed, err_regex = parse_regex(.message, r'^.* \[tracking\] [^{}]* (?P<tracking_message>\{.*\})$')
|
||||
if err_regex != null {
|
||||
abort
|
||||
}
|
||||
message = parsed.tracking_message
|
||||
parsed_json, err_json = parse_json(parsed.tracking_message)
|
||||
if err_json != null {
|
||||
log("Unable to parse JSON from tracking log message: " + err_json, level: "error")
|
||||
abort
|
||||
}
|
||||
time, err_timestamp = parse_timestamp(parsed_json.time, "%+")
|
||||
if err_timestamp != null {
|
||||
log("Unable to parse timestamp from tracking log message: " + err_timestamp, level: "error")
|
||||
abort
|
||||
}
|
||||
. = {"time": time, "message": message}
|
||||
'''
|
||||
drop_on_error = true
|
||||
drop_on_abort = true
|
||||
|
||||
[transforms.tracking_debug]
|
||||
type = "remap"
|
||||
inputs = ["tracking"]
|
||||
# Time formats: https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html#specifiers
|
||||
source = '''
|
||||
.message = parse_json!(.message)
|
||||
'''
|
||||
|
||||
### Sinks
|
||||
@ -47,17 +56,17 @@ del(.tracking_message)
|
||||
# Log all events to stdout, for debugging
|
||||
[sinks.out]
|
||||
type = "console"
|
||||
inputs = ["tracking"]
|
||||
inputs = ["tracking_debug"]
|
||||
encoding.codec = "json"
|
||||
encoding.only_fields = ["time", "message.context.course_id", "message.context.user_id", "message.name"]
|
||||
|
||||
# Send logs to clickhouse
|
||||
# # Send logs to clickhouse
|
||||
[sinks.clickhouse]
|
||||
type = "clickhouse"
|
||||
encoding.only_fields = ["time", "message"]
|
||||
# Required: https://github.com/timberio/vector/issues/5797
|
||||
encoding.timestamp_format = "unix"
|
||||
inputs = ["tracking"]
|
||||
endpoint = "http://{{ VISION_CLICKHOUSE_HOST }}:{{ VISION_CLICKHOUSE_HTTP_PORT }}"
|
||||
endpoint = "{{ VISION_CLICKHOUSE_SCHEME }}://{{ VISION_CLICKHOUSE_HOST }}:{{ VISION_CLICKHOUSE_HTTP_PORT }}"
|
||||
database = "{{ VISION_CLICKHOUSE_DATABASE }}"
|
||||
table = "tracking"
|
||||
healthcheck = true
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
clickhouse_client_base() {
|
||||
clickhouse client --host {{ VISION_CLICKHOUSE_HOST }} --port {{ VISION_CLICKHOUSE_PORT }} \
|
||||
clickhouse client \
|
||||
{% if VISION_CLICKHOUSE_SCHEME == "https" %}--secure{% endif %} --host {{ VISION_CLICKHOUSE_HOST }} --port {{ VISION_CLICKHOUSE_PORT }} \
|
||||
--user {{ VISION_CLICKHOUSE_USERNAME }} \
|
||||
--password {{ VISION_CLICKHOUSE_PASSWORD }} "$@"
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user