feat: working video x ray stats!!!

This commit is contained in:
Régis Behmo 2021-04-26 08:49:01 +02:00
parent 30a92393a1
commit a4e3a28328
10 changed files with 147 additions and 21 deletions

View File

@ -13,11 +13,7 @@ TODO:
- make database name a tutor config
- make clickhouse host a tutor config
- specify TTL for tables?
- don't connect with default user, but use a dedicated openedx user
- rename database to "openedx"
- set permissions for each course/org: one datasource per org/course???
- how to handle migrations?
- prevent access to the full tracking message in the tracking table
- set permissions for each org:
- Expose data with redash
- Provision dashboards
- Custom users
@ -76,6 +72,11 @@ To add a new, non-admin user::
Development
-----------
To reload Vector configuration after changes to vector.toml, run::
tutor config save && tutor local exec vision-vector sh kill -s HUP
To explore the clickhouse database as root, run::
tutor local run vision-clickhouse clickhouse-client --host vision-clickhouse \
@ -83,11 +84,11 @@ To explore the clickhouse database as root, run::
--user $(tutor config printvalue VISION_CLICKHOUSE_USERNAME) \
--password $(tutor config printvalue VISION_CLICKHOUSE_PASSWORD)
To reload Vector configuration after changes to vector.toml, run::
To launch a Python shell in Redash, run::
tutor config save && tutor local exec vision-vector sh kill -s HUP
tutor local run vision-redash ./manage.py shell
License
-------
This software is licensed under the terms of the AGPLv3.
This software is licensed under the terms of the AGPLv3.

View File

@ -1 +1 @@
__version__ = "0.1.0"
__version__ = "11.0.0"

View File

@ -44,7 +44,11 @@ def datalake_setpermissions(context, username, course_ids):
for course_id in course_ids
]
)
# TODO rename courseenrollments to course_enrollments (and other tables as well)
# Note that the "CREATE TEMPORARY TABLE" grant is required to make use of "numbers()" functions.
query = f"""
GRANT CREATE TEMPORARY TABLE ON *.* TO {username};
GRANT SELECT ON events TO {username};
CREATE ROW POLICY OR REPLACE {username} ON events AS RESTRICTIVE FOR SELECT USING {condition} TO {username};
@ -53,6 +57,12 @@ CREATE ROW POLICY OR REPLACE {username} ON coursegrades AS RESTRICTIVE FOR SELEC
GRANT SELECT ON courseenrollments TO {username};
CREATE ROW POLICY OR REPLACE {username} ON courseenrollments AS RESTRICTIVE FOR SELECT USING {condition} TO {username};
GRANT SELECT ON video_events TO {username};
CREATE ROW POLICY OR REPLACE {username} ON video_events AS RESTRICTIVE FOR SELECT USING {condition} TO {username};
GRANT SELECT ON video_view_segments TO {username};
CREATE ROW POLICY OR REPLACE {username} ON video_view_segments AS RESTRICTIVE FOR SELECT USING {condition} TO {username};
"""
run_datalake_query(context.root, query)

View File

@ -29,7 +29,7 @@ vision-clickhouse:
# https://github.com/getredash/redash/blob/master/CHANGELOG.md
# frontend
vision-redash-server:
vision-redash:
image: {{ VISION_REDASH_DOCKER_IMAGE }}
command: server
env_file: ../plugins/vision/apps/redash/env

View File

@ -1,6 +1,6 @@
# Vision
upstream vision-backend {
server vision-redash-server:5000 fail_timeout=0;
server vision-redash:5000 fail_timeout=0;
}
server {
listen 80;

View File

@ -1,8 +1,9 @@
CREATE TABLE tracking
(
`time` DateTime,
`message` String,
) ENGINE MergeTree
`message` String
)
ENGINE MergeTree
ORDER BY time;
CREATE TABLE events
@ -26,5 +27,5 @@ SELECT
JSONExtractString(message, 'event_source') AS event_source
FROM tracking;
-- Grant everyone access to the view
-- Grant everyone access to the events table
CREATE ROW POLICY common ON events FOR SELECT USING 1 TO ALL;

View File

@ -20,9 +20,15 @@ CREATE TABLE openedx_userprofiles
)
ENGINE = MySQL('{{ MYSQL_HOST }}:{{ MYSQL_PORT }}', '{{ OPENEDX_MYSQL_DATABASE }}', 'auth_userprofile', '{{ OPENEDX_MYSQL_USERNAME }}', '{{ OPENEDX_MYSQL_PASSWORD }}');
-- enable live views
set allow_experimental_live_view = 1;
CREATE LIVE VIEW courseenrollments WITH PERIODIC REFRESH 30 AS
SELECT
openedx_courseenrollments.course_id AS course_id,
openedx_courseenrollments.created AS enrollment_created,
openedx_courseenrollments.is_active AS enrollment_is_active,
openedx_courseenrollments.mode AS enrollment_mode,
openedx_courseenrollments.user_id AS user_id,
openedx_userprofiles.year_of_birth AS user_year_of_birth,
openedx_userprofiles.gender AS user_gender,
@ -34,4 +40,4 @@ FROM openedx_courseenrollments
INNER JOIN openedx_userprofiles ON openedx_courseenrollments.user_id = openedx_userprofiles.user_id;
-- Grant everyone access to the view
CREATE ROW POLICY common ON courseenrollments FOR SELECT USING 1 TO ALL;
CREATE ROW POLICY common ON courseenrollments FOR SELECT USING 1 TO ALL;

View File

@ -0,0 +1,77 @@
CREATE TABLE video_events
(
`course_id` String,
`video_id` String,
`user_id` Int64,
`name` String,
`time` DateTime,
`position` Float
)
ENGINE MergeTree
ORDER BY time;
-- Collect video events and store them in the video_events table
CREATE MATERIALIZED VIEW video_events_mv TO video_events AS
SELECT
JSONExtract(message, 'context', 'course_id', 'String') AS course_id,
JSONExtractString(JSONExtractString(message, 'event'), 'id') as video_id,
JSONExtract(message, 'context', 'user_id', 'Int64') AS user_id,
JSONExtractString(message, 'name') as name,
time,
JSONExtractFloat(JSONExtractString(message, 'event'), 'currentTime') AS position
FROM tracking
WHERE name IN ('play_video', 'pause_video', 'stop_video');
CREATE MATERIALIZED VIEW video_seek_events_mv TO video_events AS
SELECT
JSONExtract(message, 'context', 'course_id', 'String') AS course_id,
JSONExtractString(JSONExtractString(message, 'event'), 'id') as video_id,
JSONExtract(message, 'context', 'user_id', 'Int64') AS user_id,
JSONExtractString(message, 'name') as name,
time,
JSONExtractFloat(JSONExtractString(message, 'event'), 'old_time') AS position
FROM tracking
WHERE name = 'seek_video';
-- For ease of access, create a simple view to aggregate the viewed video segments
CREATE VIEW video_view_segments AS
SELECT
course_id,
video_id,
user_id,
start_time,
start_position,
start_event,
time as end_time,
position AS end_position,
name as end_event,
end_position - start_position as duration
FROM video_events AS video_events_end
ASOF LEFT JOIN (
SELECT
time as start_time,
course_id AS course_id_start,
video_id as video_id_start,
user_id AS user_id_start,
name as start_event,
position AS start_position
FROM video_events
WHERE start_event = 'play_video'
) AS video_events_start
ON course_id_start = course_id AND video_id_start = video_id AND user_id_start = user_id AND start_time < end_time
WHERE end_event IN ('pause_video', 'stop_video', 'seek_video');
CREATE ROW POLICY common ON video_events FOR SELECT USING 1 TO ALL;
CREATE ROW POLICY common ON video_view_segments FOR SELECT USING 1 TO ALL;
-- TODO remove this
-- SELECT arrayJoin(range(toUInt64(floor(start_position)), toUInt64(ceil(end_position)))) AS bin,
-- count(*) AS total_views,
-- count(distinct(user_id)) AS unique_views,
-- total_views - unique_views AS replay_views,
-- video_id
-- FROM video_view_segments
-- WHERE video_id = 'DEFINE_ME_video_id'
-- GROUP BY bin,
-- video_id
-- ORDER BY bin

View File

@ -40,5 +40,3 @@ init_db() {
init_db
run_migrations
# TODO enable live views https://clickhouse.tech/docs/en/sql-reference/statements/create/view/#live-view

View File

@ -8,14 +8,38 @@ app = create_app()
app.app_context().push()
# Get organization
org = models.Organization.get_by_slug('default')
org_slug = "default"
org = models.Organization.get_by_slug(org_slug)
if org:
print("Org already exists")
else:
print("Creating org...")
org = models.Organization(name=org_slug, slug=org_slug, settings={"beacon_consent": False})
models.db.session.add(org)
models.db.session.commit()
# Get org admin group
if org.admin_group:
print("Org admin group already exists")
else:
print("Creating org admin group...")
admin_group = models.Group(
name="admin",
permissions=["admin", "super_admin"],
org=org,
type=models.Group.BUILTIN_GROUP,
)
models.db.session.add_all([org, admin_group])
models.db.session.commit()
# Get or create group
group = models.Group.query.filter(models.Group.name == "{{ username }}", models.Group.org == org).first()
if group:
print("Group already exists")
print("Group '{{ username }}' already exists")
else:
group = models.Group(name="{{ username }}", org=org, permissions=models.Group.DEFAULT_PERMISSIONS)
excluded_permissions = {% if is_root %}[]{% else %}["list_users"]{% endif %}
permissions = filter(lambda p: p not in excluded_permissions, models.Group.DEFAULT_PERMISSIONS)
group = models.Group(name="{{ username }}", org=org, permissions=permissions)
models.db.session.add(group)
models.db.session.commit()
print("Created group '{}'".format(group.name))
@ -38,6 +62,15 @@ else:
user.hash_password("""{{ password }}""")
models.db.session.add(user)
models.db.session.commit()
{% if is_root %}
if org.admin_group.id in user.group_ids:
print("User is already in admin group")
else:
print("Adding user to admin group...")
user.group_ids = user.group_ids + [org.admin_group.id]
models.db.session.add(user)
models.db.session.commit()
{% endif %}
# Get or create datasource
options = ConfigurationContainer(
@ -63,4 +96,4 @@ else:
models.db.session.add_all([data_source, data_source_group])
models.db.session.commit()
print("Created datasource '{}'".format(data_source.name))
EOF
EOF