Merge remote-tracking branch 'origin/master' into nightly

This commit is contained in:
Overhang.IO 2022-12-12 18:06:57 +00:00
commit ff1ea7c615
15 changed files with 214 additions and 89 deletions

View File

@ -2,7 +2,7 @@ variables:
TUTOR_PLUGIN: cairn
TUTOR_IMAGES: cairn-clickhouse cairn-superset
TUTOR_PYPI_PRIVATE_PACKAGE: tutor-cairn
OPENEDX_RELEASE: nutmeg
OPENEDX_RELEASE: olive
include:
- project: 'community/tutor-ci'

View File

@ -61,22 +61,23 @@ Then, restart your platform and run the initialization scripts::
tutor local quickstart
Create credentials to access the Clickhouse database::
Create a user to access both in the Clickhouse database and the Superset frontend::
tutor local run cairn-clickhouse cairn createuser YOURUSERNAME
tutor local do cairn-createuser YOURUSERNAME YOURUSERNAME@YOUREMAIL.COM
Create an admin user to access the frontend::
You can use the ``--password=<PASSWORD>`` option to provide a password on the command line.
# You will be prompted for a new password
tutor local run cairn-superset cairn createuser --admin YOURUSERNAME YOURUSERNAME@YOUREMAIL.COM
To make this user an administrator, add the ``--admin`` option::
tutor local do cairn-createuser --admin YOURUSERNAME YOURUSERNAME@YOUREMAIL.COM
To add the default dashboards to the new user, add the ``--bootstrap-dashboards`` option::
tutor local do cairn-createuser --bootstrap-dashboards YOURUSERNAME YOURUSERNAME@YOUREMAIL.COM
You can then access the frontend with the user credentials you just created. Open http(s)://data.<YOUR_LMS_HOST> in your browser. When running locally, this will be http://data.local.overhang.io. The admin user will automatically be granted access to the "openedx" database in Superset and will be able to query all tables.
At this point, your user should have access to Cairn but its account will not include any dashboard. To import the "Course overview" dashboard that comes with Cairn, run::
tutor local run cairn-superset cairn bootstrap-dashboards YOURUSERNAME /app/bootstrap/courseoverview.json
Some event data will be missing from your dashboards: just start using your LMS and refresh your dashboard. The new events should appear immediately.
Some event data might be missing from your dashboards: just start using your LMS and refresh your dashboard. The new events should appear immediately.
.. image:: https://overhang.io/static/catalog/img/cairn/courseoverview-01.png
:alt: Course overview dashboard part 1

View File

@ -39,8 +39,8 @@ setup(
packages=find_packages(exclude=["tests*"]),
include_package_data=True,
python_requires=">=3.7",
install_requires=["tutor>=14.0.0,<15.0.0"],
entry_points={"tutor.plugin.v0": ["cairn = tutorcairn.plugin"]},
install_requires=["tutor>=15.0.0,<16.0.0"],
entry_points={"tutor.plugin.v1": ["cairn = tutorcairn.plugin"]},
classifiers=[
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",

View File

@ -1,2 +1,2 @@
__version__ = "14.0.4"
__version__ = "15.0.0"

View File

@ -51,10 +51,21 @@ spec:
- mountPath: /app/superset_config.py
name: config
subPath: superset_config.py
- mountPath: /app/bootstrap/
name: bootstrap
- mountPath: /scripts/clickhouse-auth.json
name: clickhouse-auth
subPath: auth.json
volumes:
- name: config
configMap:
name: cairn-superset-config
- name: bootstrap
configMap:
name: cairn-superset-bootstrap
- name: clickhouse-auth
configMap:
name: cairn-clickhouse-auth
---
apiVersion: batch/v1
kind: Job

View File

@ -17,6 +17,8 @@ cairn-superset-job:
image: {{ CAIRN_SUPERSET_DOCKER_IMAGE }}
volumes:
- ../plugins/cairn/apps/superset/superset_config.py:/app/superset_config.py:ro
- ../plugins/cairn/apps/clickhouse/auth.json:/scripts/clickhouse-auth.json:ro
- ../plugins/cairn/apps/superset/bootstrap:/app/bootstrap:ro
healthcheck:
disable: true
depends_on:

View File

@ -1,60 +1,163 @@
from __future__ import annotations
from glob import glob
import os
import typing as t
import click
import pkg_resources
from tutor import hooks
from .__about__ import __version__
HERE = os.path.abspath(os.path.dirname(__file__))
templates = os.path.join(HERE, "templates")
config = {
"add": {
"CLICKHOUSE_PASSWORD": "{{ 20|random_string }}",
"POSTGRESQL_PASSWORD": "{{ 20|random_string }}",
"SUPERSET_SECRET_KEY": "{{ 20|random_string }}",
},
"defaults": {
"VERSION": __version__,
"DOCKER_HOST_SOCK_PATH": "/var/run/docker.sock",
"HOST": "data.{{ LMS_HOST }}",
hooks.Filters.CONFIG_UNIQUE.add_items(
[
("CAIRN_CLICKHOUSE_PASSWORD", "{{ 20|random_string }}"),
("CAIRN_POSTGRESQL_PASSWORD", "{{ 20|random_string }}"),
("CAIRN_SUPERSET_SECRET_KEY", "{{ 20|random_string }}"),
]
)
hooks.Filters.CONFIG_DEFAULTS.add_items(
[
("CAIRN_VERSION", __version__),
("CAIRN_DOCKER_HOST_SOCK_PATH", "/var/run/docker.sock"),
("CAIRN_HOST", "data.{{ LMS_HOST }}"),
# Clickhouse
"RUN_CLICKHOUSE": True,
"CLICKHOUSE_DOCKER_IMAGE": "{{ DOCKER_REGISTRY }}overhangio/cairn-clickhouse:{{ CAIRN_VERSION }}",
"CLICKHOUSE_HOST": "cairn-clickhouse",
"CLICKHOUSE_HTTP_PORT": 8123,
"CLICKHOUSE_HTTP_SCHEME": "http",
"CLICKHOUSE_PORT": 9000,
"CLICKHOUSE_DATABASE": "openedx",
"CLICKHOUSE_USERNAME": "openedx",
("CAIRN_RUN_CLICKHOUSE", True),
(
"CAIRN_CLICKHOUSE_DOCKER_IMAGE",
"{{ DOCKER_REGISTRY }}overhangio/cairn-clickhouse:{{ CAIRN_VERSION }}",
),
("CAIRN_CLICKHOUSE_HOST", "cairn-clickhouse"),
("CAIRN_CLICKHOUSE_HTTP_PORT", 8123),
("CAIRN_CLICKHOUSE_HTTP_SCHEME", "http"),
("CAIRN_CLICKHOUSE_PORT", 9000),
("CAIRN_CLICKHOUSE_DATABASE", "openedx"),
("CAIRN_CLICKHOUSE_USERNAME", "openedx"),
# Superset/Postgresql
"RUN_POSTGRESQL": True,
"POSTGRESQL_DATABASE": "superset",
"POSTGRESQL_USERNAME": "superset",
"SUPERSET_DOCKER_IMAGE": "{{ DOCKER_REGISTRY }}overhangio/cairn-superset:{{ CAIRN_VERSION }}",
"SUPERSET_LANGUAGE_CODE": "{{ LANGUAGE_CODE[:2] }}",
("CAIRN_RUN_POSTGRESQL", True),
("CAIRN_POSTGRESQL_DATABASE", "superset"),
("CAIRN_POSTGRESQL_USERNAME", "superset"),
(
"CAIRN_SUPERSET_DOCKER_IMAGE",
"{{ DOCKER_REGISTRY }}overhangio/cairn-superset:{{ CAIRN_VERSION }}",
),
("CAIRN_SUPERSET_LANGUAGE_CODE", "{{ LANGUAGE_CODE[:2] }}"),
# Vector
"VECTOR_DOCKER_IMAGE": "docker.io/timberio/vector:0.24.1-alpine",
},
}
# https://hub.docker.com/r/timberio/vector/tags
# https://github.com/vectordotdev/vector/releases
("CAIRN_VECTOR_DOCKER_IMAGE", "docker.io/timberio/vector:0.25.1-alpine"),
]
)
hooks = {
"build-image": {
"cairn-clickhouse": "{{ CAIRN_CLICKHOUSE_DOCKER_IMAGE }}",
"cairn-superset": "{{ CAIRN_SUPERSET_DOCKER_IMAGE }}",
},
"remote-image": {
"cairn-clickhouse": "{{ CAIRN_CLICKHOUSE_DOCKER_IMAGE }}",
"cairn-superset": "{{ CAIRN_SUPERSET_DOCKER_IMAGE }}",
},
"init": ["cairn-clickhouse", "cairn-superset", "cairn-openedx"],
}
# Init scripts
for service in ["cairn-clickhouse", "cairn-superset", "cairn-openedx"]:
with open(
os.path.join(HERE, "templates", "cairn", "tasks", service, "init"),
encoding="utf-8",
) as fi:
task = fi.read()
hooks.Filters.CLI_DO_INIT_TASKS.add_item((service, task))
# Docker images
hooks.Filters.IMAGES_BUILD.add_items(
[
(
"cairn-clickhouse",
("plugins", "cairn", "build", "cairn-clickhouse"),
"{{ CAIRN_CLICKHOUSE_DOCKER_IMAGE }}",
(),
),
(
"cairn-superset",
("plugins", "cairn", "build", "cairn-superset"),
"{{ CAIRN_SUPERSET_DOCKER_IMAGE }}",
(),
),
]
)
hooks.Filters.IMAGES_PULL.add_items(
[
(
"cairn-clickhouse",
"{{ CAIRN_CLICKHOUSE_DOCKER_IMAGE }}",
),
(
"cairn-clickhouse",
"{{ CAIRN_CLICKHOUSE_DOCKER_IMAGE }}",
),
]
)
hooks.Filters.IMAGES_PUSH.add_items(
[
(
"cairn-superset",
"{{ CAIRN_SUPERSET_DOCKER_IMAGE }}",
),
(
"cairn-superset",
"{{ CAIRN_SUPERSET_DOCKER_IMAGE }}",
),
]
)
def patches():
all_patches = {}
for path in glob(os.path.join(HERE, "patches", "*")):
with open(path) as patch_file:
name = os.path.basename(path)
content = patch_file.read()
all_patches[name] = content
return all_patches
@click.command(
name="cairn-createuser", help="Create a Cairn user, both in Clickhouse and Superset"
)
@click.option(
"--bootstrap-dashboards",
is_flag=True,
help="Load the default Cairn dashboards to the user's Superset account",
)
@click.option("--admin", is_flag=True)
@click.option(
"-p",
"--password",
help="Specify password from the command line. If undefined, you will be prompted to input a password",
prompt=True,
hide_input=True,
)
@click.argument("username")
@click.argument("email")
def create_user_command(
bootstrap_dashboards: bool, admin: bool, password: str, username: str, email: str
) -> t.Iterable[tuple[str, str]]:
admin_opt = " --admin" if admin else ""
yield from [
("cairn-clickhouse", f"cairn createuser {username}"),
(
"cairn-superset",
f"cairn createuser{admin_opt} --password={password} {username} {email}",
),
]
if bootstrap_dashboards:
yield (
"cairn-superset",
f"cairn bootstrap-dashboards {username} /app/bootstrap/courseoverview.json",
)
hooks.Filters.CLI_DO_COMMANDS.add_item(create_user_command)
####### Boilerplate code
# Add the "templates" folder as a template root
hooks.Filters.ENV_TEMPLATE_ROOTS.add_item(
pkg_resources.resource_filename("tutorcairn", "templates")
)
# Render the "build" and "apps" folders
hooks.Filters.ENV_TEMPLATE_TARGETS.add_items(
[
("cairn/build", "plugins"),
("cairn/apps", "plugins"),
],
)
# Load patches from files
for path in glob(
os.path.join(pkg_resources.resource_filename("tutorcairn", "patches"), "*")
):
with open(path, encoding="utf-8") as patch_file:
hooks.Filters.ENV_PATCHES.add_item((os.path.basename(path), patch_file.read()))

View File

@ -57,6 +57,8 @@ RESULTS_BACKEND = RedisCache(
key_prefix="superset_results",
)
# TODO implement FILTER_STATE_CACHE_CONFIG and EXPLORE_FORM_DATA_CACHE_CONFIG such that we get rid of the warning messages
class CeleryConfig: # pylint: disable=too-few-public-methods
BROKER_URL = f"redis://{REDIS_HOST}:{REDIS_PORT}/{REDIS_CELERY_DB}"
CELERY_IMPORTS = ("superset.sql_lab", "superset.tasks")

View File

@ -1,5 +1,5 @@
# https://hub.docker.com/r/yandex/clickhouse-server/tags
FROM docker.io/yandex/clickhouse-server:21.11.6.7
FROM docker.io/yandex/clickhouse-server:22.1.3.7
RUN apt update && apt install -y python3
COPY ./scripts /scripts

View File

@ -1,15 +1,20 @@
# Superset image with additional database drivers
# https://hub.docker.com/r/apache/superset
# https://github.com/apache/superset/releases
# https://github.com/apache/superset/blob/master/Dockerfile
# https://superset.apache.org/docs/databases/installing-database-drivers
FROM docker.io/apache/superset:1.3.2
FROM docker.io/apache/superset:2.0.0
USER root
# https://pypi.org/project/clickhouse-driver/
# https://pypi.org/project/clickhouse-sqlalchemy/
# https://pypi.org/project/mysqlclient/
RUN pip install clickhouse-driver==0.2.2 mysqlclient==2.1.0
RUN pip install clickhouse-sqlalchemy==0.1.7
# https://pypi.org/project/clickhouse-sqlalchemy/
RUN pip install clickhouse-driver==0.2.4 mysqlclient==2.1.1
# Later versions of clickhouse-sqlalchemy will not work.
# Note that this connector be replaced by clickhouse-connect in v2.0.1:
# https://github.com/apache/superset/pull/22039
RUN pip install clickhouse-sqlalchemy==0.1.10
COPY --chown=superset:superset ./scripts /scripts
RUN chmod a+x /scripts/*
@ -17,7 +22,8 @@ ENV PATH /scripts:${PATH}
USER superset
HEALTHCHECK CMD curl -f "http://localhost:8000"
# This is required to have a proper healthcheck
ENV SUPERSET_PORT=8000
ENTRYPOINT []
CMD gunicorn \

View File

@ -16,7 +16,7 @@ from superset.models.core import Database
from superset.models.slice import Slice
from superset.extensions import db, security_manager
import superset.dashboards.commands.importers.v0 as importers
from superset.utils.core import get_or_create_db
from superset.utils.database import get_or_create_db
from werkzeug.security import generate_password_hash
@ -90,6 +90,8 @@ def main():
args.func(args)
# Note: we'd like to get rid of this command by relying on `superset fab create-user`
# but the "create-user" command fails if the user already exists.
def bootstrap_user(args):
# Bootstrap database
database_name = args.db or args.username
@ -121,7 +123,9 @@ def bootstrap_user(args):
if user is None or user is False:
# This may happen for instance when the email address is already associated
# to a different username
raise RuntimeError(f"Failed to create user '{args.username}' email='{args.email}'")
raise RuntimeError(
f"Failed to create user '{args.username}' email='{args.email}'"
)
# Associate role with the same name to user, if it exists
role_name = args.role or args.username
@ -155,7 +159,10 @@ def bootstrap_user(args):
def bootstrap_database(username, database_name):
with open(os.path.join(os.path.dirname(__file__), "clickhouse-auth.json")) as f:
with open(
os.path.join(os.path.dirname(__file__), "clickhouse-auth.json"),
encoding="utf-8",
) as f:
CLICKHOUSE_AUTH = json.load(f)
host = CLICKHOUSE_AUTH["host"]
@ -164,18 +171,12 @@ def bootstrap_database(username, database_name):
uri = f"clickhouse+native://{username}:@{host}:{port}/{database}"
database = get_or_create_db(database_name, uri, always_create=True)
# TODO get rid of the following once we upgrade clickhouse-sqlalchemy and we
# can disable reflection from the DSN
# https://github.com/xzkostyan/clickhouse-sqlalchemy/issues/151
# https://github.com/xzkostyan/clickhouse-sqlalchemy/issues/140
# This is preventing us from creating datasets from physical tables.
database_extra = json.loads(database.extra)
database_extra["engine_params"] = {"connect_args": {"server_version": "18.12.14"}}
database.extra = json.dumps(database_extra)
db.session.add(database)
db.session.commit()
# Note: we would like to start using superset's native export/import-dashboards command
# but we failed to get it to work.
def bootstrap_dashboards(args):
database_name = args.db or args.username
database = load_database(database_name)
@ -183,9 +184,7 @@ def bootstrap_dashboards(args):
for path in args.path:
print(
"importing dashboard {} for user='{}' db='{}'... ".format(
path, user.username, database.database_name
)
f"importing dashboard {path} for user='{user.username}' db='{database.database_name}'... "
)
dashboard = load_dashboard_file(path)
import_dashboard(dashboard, user, database)
@ -206,7 +205,7 @@ def load_user(username):
def load_dashboard_file(path):
with open(path) as f:
with open(path, encoding="utf-8") as f:
return json.load(f, object_hook=importers.decode_dashboards)
@ -227,16 +226,16 @@ def import_dashboard(data, user, database):
slices = dashboard.slices[:]
old_to_new_slice_id_map = {}
for slice in slices:
for dashboard_slice in slices:
# Make sure the new slice does not point to the old slice
params_dict = slice.params_dict
params_dict = dashboard_slice.params_dict
params_dict["database_name"] = database.name
params_dict.pop("remote_id")
slice.params = json.dumps(params_dict)
old_slice_id = slice.id
slice.id = None
dashboard_slice.params = json.dumps(params_dict)
old_slice_id = dashboard_slice.id
dashboard_slice.id = None
# Create new slice
new_slice_id = importers.import_chart(slice, None, now)
new_slice_id = importers.import_chart(dashboard_slice, None, now)
new_slice = db.session.query(Slice).get(new_slice_id)
old_to_new_slice_id_map[old_slice_id] = new_slice_id
# Make current user the new owner
@ -262,7 +261,6 @@ def import_dashboard(data, user, database):
dashboard.position_json = json.dumps(position)
# Update filter mapping
# import pdb; pdb.set_trace()
metadata = json.loads(dashboard.json_metadata)
new_filter_scopes = {}
for old_slice_id, filter_scope in metadata["filter_scopes"].items():
@ -274,7 +272,9 @@ def import_dashboard(data, user, database):
]
new_filter_scope[filter_name] = properties
# Note that filter scope keys are str, not int
new_filter_scopes[str(old_to_new_slice_id_map[int(old_slice_id)])] = new_filter_scope
new_filter_scopes[
str(old_to_new_slice_id_map[int(old_slice_id)])
] = new_filter_scope
metadata["filter_scopes"] = new_filter_scopes
dashboard.json_metadata = json.dumps(metadata)