Reputation: 113
I try to run DataHub (https://datahub.io/) and / or OpenMetaData (https://open-metadata.org/) locally for testing. Both are used via docker-compose files.
For OpenMetaData I used:
version: "3.9"
volumes:
ingestion-volume-dag-airflow:
ingestion-volume-dags:
ingestion-volume-tmp:
services:
mysql:
container_name: openmetadata_mysql
image: openmetadata/db:0.13.0
restart: always
environment:
MYSQL_ROOT_PASSWORD: password
expose:
- 3306
volumes:
- ./docker-volume/db-data:/var/lib/mysql
networks:
- app_net
healthcheck:
test: mysql --user=root --password=$$MYSQL_ROOT_PASSWORD --silent --execute "use openmetadata_db"
interval: 15s
timeout: 10s
retries: 10
elasticsearch:
container_name: openmetadata_elasticsearch
image: docker.elastic.co/elasticsearch/elasticsearch:7.10.2
environment:
- discovery.type=single-node
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m
networks:
- app_net
ports:
- "9200:9200"
- "9300:9300"
openmetadata-server:
container_name: openmetadata_server
restart: always
image: openmetadata/server:0.13.0
environment:
# OpenMetadata Server Authentication Configuration
AUTHORIZER_CLASS_NAME: ${AUTHORIZER_CLASS_NAME:-org.openmetadata.service.security.DefaultAuthorizer}
AUTHORIZER_REQUEST_FILTER: ${AUTHORIZER_REQUEST_FILTER:-org.openmetadata.service.security.JwtFilter}
AUTHORIZER_ADMIN_PRINCIPALS: ${AUTHORIZER_ADMIN_PRINCIPALS:-[admin]}
AUTHORIZER_ALLOWED_REGISTRATION_DOMAIN: ${AUTHORIZER_ALLOWED_REGISTRATION_DOMAIN:-["all"]}
AUTHORIZER_INGESTION_PRINCIPALS: ${AUTHORIZER_INGESTION_PRINCIPALS:-[ingestion-bot]}
AUTHORIZER_PRINCIPAL_DOMAIN: ${AUTHORIZER_PRINCIPAL_DOMAIN:-"openmetadata.org"}
AUTHORIZER_ENFORCE_PRINCIPAL_DOMAIN: ${AUTHORIZER_ENFORCE_PRINCIPAL_DOMAIN:-false}
AUTHORIZER_ENABLE_SECURE_SOCKET: ${AUTHORIZER_ENABLE_SECURE_SOCKET:-false}
AUTHENTICATION_PROVIDER: ${AUTHENTICATION_PROVIDER:-basic}
CUSTOM_OIDC_AUTHENTICATION_PROVIDER_NAME: ${CUSTOM_OIDC_AUTHENTICATION_PROVIDER_NAME:-""}
AUTHENTICATION_PUBLIC_KEYS: ${AUTHENTICATION_PUBLIC_KEYS:-[http://localhost:8585/api/v1/config/jwks]}
AUTHENTICATION_AUTHORITY: ${AUTHENTICATION_AUTHORITY:-https://accounts.google.com}
AUTHENTICATION_CLIENT_ID: ${AUTHENTICATION_CLIENT_ID:-""}
AUTHENTICATION_CALLBACK_URL: ${AUTHENTICATION_CALLBACK_URL:-""}
AUTHENTICATION_JWT_PRINCIPAL_CLAIMS: ${AUTHENTICATION_JWT_PRINCIPAL_CLAIMS:-[email,preferred_username,sub]}
AUTHENTICATION_ENABLE_SELF_SIGNUP: ${AUTHENTICATION_ENABLE_SELF_SIGNUP:-true}
# JWT Configuration
RSA_PUBLIC_KEY_FILE_PATH: ${RSA_PUBLIC_KEY_FILE_PATH:-"./conf/public_key.der"}
RSA_PRIVATE_KEY_FILE_PATH: ${RSA_PRIVATE_KEY_FILE_PATH:-"./conf/private_key.der"}
JWT_ISSUER: ${JWT_ISSUER:-"open-metadata.org"}
JWT_KEY_ID: ${JWT_KEY_ID:-"Gb389a-9f76-gdjs-a92j-0242bk94356"}
# OpenMetadata Server Airflow Configuration
AIRFLOW_HOST: ${AIRFLOW_HOST:-http://ingestion:8080}
SERVER_HOST_API_URL: ${SERVER_HOST_API_URL:-http://openmetadata-server:8585/api}
AIRFLOW_AUTH_PROVIDER: ${AIRFLOW_AUTH_PROVIDER:-no-auth}
# OpenMetadata Airflow Azure SSO Configuration
OM_AUTH_AIRFLOW_AZURE_CLIENT_SECRET: ${OM_AUTH_AIRFLOW_AZURE_CLIENT_SECRET:-""}
OM_AUTH_AIRFLOW_AZURE_AUTHORITY_URL: ${OM_AUTH_AIRFLOW_AZURE_AUTHORITY_URL:-""}
OM_AUTH_AIRFLOW_AZURE_SCOPES: ${OM_AUTH_AIRFLOW_AZURE_SCOPES:-[]}
OM_AUTH_AIRFLOW_AZURE_CLIENT_ID: ${OM_AUTH_AIRFLOW_AZURE_CLIENT_ID:-""}
# OpenMetadata Airflow Google SSO Configuration
OM_AUTH_AIRFLOW_GOOGLE_SECRET_KEY_PATH: ${OM_AUTH_AIRFLOW_GOOGLE_SECRET_KEY_PATH:- ""}
OM_AUTH_AIRFLOW_GOOGLE_AUDIENCE: ${OM_AUTH_AIRFLOW_GOOGLE_AUDIENCE:-"https://www.googleapis.com/oauth2/v4/token"}
# OpenMetadata Airflow Okta SSO Configuration
OM_AUTH_AIRFLOW_OKTA_CLIENT_ID: ${OM_AUTH_AIRFLOW_OKTA_CLIENT_ID:-""}
OM_AUTH_AIRFLOW_OKTA_ORGANIZATION_URL: ${OM_AUTH_AIRFLOW_OKTA_ORGANIZATION_URL:-""}
OM_AUTH_AIRFLOW_OKTA_PRIVATE_KEY: ${OM_AUTH_AIRFLOW_OKTA_PRIVATE_KEY:-""}
OM_AUTH_AIRFLOW_OKTA_SA_EMAIL: ${OM_AUTH_AIRFLOW_OKTA_SA_EMAIL:-""}
OM_AUTH_AIRFLOW_OKTA_SCOPES: ${OM_AUTH_AIRFLOW_OKTA_SCOPES:-[]}
# OpenMetadata Airflow Auth0 SSO Configuration
OM_AUTH_AIRFLOW_AUTH0_CLIENT_ID: ${OM_AUTH_AIRFLOW_AUTH0_CLIENT_ID:-""}
OM_AUTH_AIRFLOW_AUTH0_CLIENT_SECRET: ${OM_AUTH_AIRFLOW_AUTH0_CLIENT_SECRET:-""}
OM_AUTH_AIRFLOW_AUTH0_DOMAIN_URL: ${OM_AUTH_AIRFLOW_AUTH0_DOMAIN_URL:-""}
# OpenMetadata Airflow Custom OIDC SSO Configuration
OM_AUTH_AIRFLOW_CUSTOM_OIDC_CLIENT_ID: ${OM_AUTH_AIRFLOW_CUSTOM_OIDC_CLIENT_ID:-""}
OM_AUTH_AIRFLOW_CUSTOM_OIDC_SECRET_KEY: ${OM_AUTH_AIRFLOW_CUSTOM_OIDC_SECRET_KEY:-""}
OM_AUTH_AIRFLOW_CUSTOM_OIDC_TOKEN_ENDPOINT_URL: ${OM_AUTH_AIRFLOW_CUSTOM_OIDC_TOKEN_ENDPOINT_URL:-""}
# OpenMetadata Airflow JWT Token Configuration
OM_AUTH_JWT_TOKEN: ${OM_AUTH_JWT_TOKEN:-""}
# Database configuration for MySQL
DB_DRIVER_CLASS: ${DB_DRIVER_CLASS:-com.mysql.cj.jdbc.Driver}
DB_SCHEME: ${DB_SCHEME:-mysql}
DB_USE_SSL: ${DB_USE_SSL:-false}
DB_USER: ${DB_USER:-openmetadata_user}
DB_USER_PASSWORD: ${DB_USER_PASSWORD:-openmetadata_password}
DB_HOST: ${DB_HOST:-mysql}
DB_PORT: ${DB_PORT:-3306}
OM_DATABASE: ${OM_DATABASE:-openmetadata_db}
# Airflow SSL Configurations
AIRFLOW_VERIFY_SSL: ${AIRFLOW_VERIFY_SSL:-"no-ssl"}
AIRFLOW_SSL_CERT_PATH: ${AIRFLOW_SSL_CERT_PATH:-""}
# ElasticSearch Configurations
ELASTICSEARCH_HOST: ${ELASTICSEARCH_HOST:- elasticsearch}
ELASTICSEARCH_PORT: ${ELASTICSEARCH_PORT:-9200}
ELASTICSEARCH_SCHEME: ${ELASTICSEARCH_SCHEME:-http}
ELASTICSEARCH_USER: ${ELASTICSEARCH_USER:-""}
ELASTICSEARCH_PASSWORD: ${ELASTICSEARCH_PASSWORD:-""}
# Heap OPTS Configurations
OPENMETADATA_HEAP_OPTS: ${OPENMETADATA_HEAP_OPTS:--Xmx1G -Xms1G}
expose:
- 8585
- 8586
ports:
- "8585:8585"
- "8586:8586"
depends_on:
elasticsearch:
condition: service_started
mysql:
condition: service_healthy
networks:
- app_net
healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ]
ingestion:
container_name: openmetadata_ingestion
image: openmetadata/ingestion:0.13.0
depends_on:
elasticsearch:
condition: service_started
mysql:
condition: service_healthy
openmetadata-server:
condition: service_healthy
environment:
AIRFLOW__API__AUTH_BACKENDS: airflow.api.auth.backend.basic_auth
AIRFLOW__CORE__EXECUTOR: LocalExecutor
AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/opt/airflow/dag_generated_configs"
DB_HOST: ${AIRFLOW_DB_HOST:-mysql}
DB_PORT: ${AIRFLOW_DB_PORT:-3306}
AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db}
AIRFLOW_DB_SCHEME: ${AIRFLOW_DB_SCHEME:-mysql+pymysql}
DB_USER: ${AIRFLOW_DB_USER:-airflow_user}
DB_PASSWORD: ${AIRFLOW_DB_PASSWORD:-airflow_pass}
entrypoint: /bin/bash
command:
- "/opt/airflow/ingestion_dependency.sh"
expose:
- 8080
ports:
- "8080:8080"
networks:
- app_net
volumes:
- ingestion-volume-dag-airflow:/opt/airflow/dag_generated_configs
- ingestion-volume-dags:/opt/airflow/dags
- ingestion-volume-tmp:/tmp
networks:
app_net:
ipam:
driver: default
config:
- subnet: "172.16.240.0/24"
and for DataHub I followed the steps described here: https://datahubproject.io/docs/quickstart/
Everything runs locally on my Mac.
Now my problem - I have also a local postgres up and running. I can access the database via PGAdmin.
version: '3.8'
services:
db:
container_name: pg_container
image: postgres
restart: always
environment:
POSTGRES_USER: admin
POSTGRES_PASSWORD: admin
POSTGRES_DB: phd_test
ports:
- "5432:5432"
pgadmin:
container_name: pgadmin4_container
image: dpage/pgadmin4
restart: always
environment:
PGADMIN_DEFAULT_EMAIL: [email protected]
PGADMIN_DEFAULT_PASSWORD: admin
ports:
- "5050:80"
In both cases DataHub & OpenMetaData I can not connect to this local Postgres DB. In both cases I get errors like for example:
'sqlalchemy.exc.OperationalError: (psycopg2.OperationalError) connection to server at "127.0.0.1", port 5432 failed: Connection refused\n'
'\tIs the server running on that host and accepting TCP/IP connections?\n'
'\n'
'(Background on this error at: https://sqlalche.me/e/14/e3q8)\n',
"2022-12-04 13:58:08.971818 [exec_id=9e2a2163-24e0-4afb-8d2d-cb1b3033bc91] INFO: Failed to execute 'datahub ingest'",
I also tried different endpoints within DataHub and OpenMetadata for the Postgres like
Does anybody else have a local connection between one of these tools up and running? As the errors are similar in both tools I think that it could be a docker network error, so that the postgres container can not be seen by other containers (like DataHub or OpenMetaData).
Upvotes: 1
Views: 1519
Reputation: 48
Maybe you figured it out, but the problem here is that the open metadata is running in a docker container; thus, reference to localhost in open metadata refers to the container where open metadata is running and not to the host machine of the open metadata docker.
If you are running Postgres locally you need to access the host's localhost and not the container's localhost. You can refer to host's localhost via host.docker.internal
.
Similarly, if you are using a docker container for Postgres and not exposing the port to your local network you need to find out the host of that container which is usually the id. For this, you can run docker network ls
to see all the containers in docker bridge network and then inspect the container to see the IP address and container id's using docker inspect <NETWORK ID>
.
Upvotes: 3