Skip to content

Commit 2080eb6

Browse files
author
gdgate
authored
Merge pull request #1644 from phong-nguyen-duy/TMA-1630
FEATURE: TMA-1630 Support Snowflake, BigQuery as input source Reviewed-by: https://github.com/danh-ung
2 parents dd03a7b + 5b4b757 commit 2080eb6

15 files changed

Lines changed: 405 additions & 12 deletions

File tree

Dockerfile

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,16 +43,24 @@ RUN groupadd -g 48 apache \
4343
USER apache
4444

4545
ADD ./bin ./bin
46+
ADD --chown=apache:apache ./ci ./ci
4647
ADD --chown=apache:apache ./lib ./lib
4748
ADD ./SDK_VERSION .
4849
ADD ./VERSION .
4950
ADD ./Gemfile .
5051
ADD ./gooddata.gemspec .
5152

52-
RUN mkdir -p tmp
53-
COPY spec/lcm/redshift_driver_pom.xml tmp/pom.xml
54-
RUN mvn -f tmp/pom.xml clean install -P binary-packaging
55-
RUN cp -rf tmp/target/*.jar ./lib/gooddata/cloud_resources/redshift/drivers/
53+
#build redshift dependencies
54+
RUN mvn -f ci/redshift/pom.xml clean install -P binary-packaging
55+
RUN cp -rf ci/redshift/target/*.jar ./lib/gooddata/cloud_resources/redshift/drivers/
56+
57+
#build snowflake dependencies
58+
RUN mvn -f ci/snowflake/pom.xml clean install -P binary-packaging
59+
RUN cp -rf ci/snowflake/target/*.jar ./lib/gooddata/cloud_resources/snowflake/drivers/
60+
61+
#build bigquery dependencies
62+
RUN mvn -f ci/bigquery/pom.xml clean install -P binary-packaging
63+
RUN cp -rf ci/bigquery/target/*.jar ./lib/gooddata/cloud_resources/bigquery/drivers/
5664

5765
RUN bundle install
5866

ci/bigquery/pom.xml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0"
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5+
<modelVersion>4.0.0</modelVersion>
6+
7+
<groupId>com.gooddata.lcm</groupId>
8+
<artifactId>lcm-bigquery-driver</artifactId>
9+
<version>1.0-SNAPSHOT</version>
10+
11+
<dependencies>
12+
<dependency>
13+
<groupId>com.google.auth</groupId>
14+
<artifactId>google-auth-library-oauth2-http</artifactId>
15+
<version>0.16.2</version>
16+
</dependency>
17+
<dependency>
18+
<groupId>org.apache.commons</groupId>
19+
<artifactId>commons-text</artifactId>
20+
<version>1.7</version>
21+
</dependency>
22+
<dependency>
23+
<groupId>com.google.cloud</groupId>
24+
<artifactId>google-cloud-bigquery</artifactId>
25+
<version>1.102.0</version>
26+
</dependency>
27+
</dependencies>
28+
29+
<profiles>
30+
<profile>
31+
<id>binary-packaging</id>
32+
<build>
33+
<plugins>
34+
<plugin>
35+
<artifactId>maven-dependency-plugin</artifactId>
36+
<executions>
37+
<execution>
38+
<phase>package</phase>
39+
<goals>
40+
<goal>copy-dependencies</goal>
41+
</goals>
42+
<configuration>
43+
<outputDirectory>${project.build.directory}</outputDirectory>
44+
<!-- compile scope gives runtime and compile dependencies (skips test deps) -->
45+
<includeScope>runtime</includeScope>
46+
</configuration>
47+
</execution>
48+
</executions>
49+
</plugin>
50+
</plugins>
51+
</build>
52+
</profile>
53+
</profiles>
54+
</project>

ci/snowflake/pom.xml

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0"
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5+
<modelVersion>4.0.0</modelVersion>
6+
7+
<groupId>com.gooddata.lcm</groupId>
8+
<artifactId>lcm-snowflake-driver</artifactId>
9+
<version>1.0-SNAPSHOT</version>
10+
11+
<dependencies>
12+
<dependency>
13+
<groupId>net.snowflake</groupId>
14+
<artifactId>snowflake-jdbc</artifactId>
15+
<version>3.6.22</version>
16+
</dependency>
17+
<dependency>
18+
<groupId>org.slf4j</groupId>
19+
<artifactId>slf4j-api</artifactId>
20+
<version>1.7.2</version>
21+
</dependency>
22+
</dependencies>
23+
24+
<profiles>
25+
<profile>
26+
<id>binary-packaging</id>
27+
<build>
28+
<plugins>
29+
<plugin>
30+
<artifactId>maven-dependency-plugin</artifactId>
31+
<executions>
32+
<execution>
33+
<phase>package</phase>
34+
<goals>
35+
<goal>copy-dependencies</goal>
36+
</goals>
37+
<configuration>
38+
<outputDirectory>${project.build.directory}</outputDirectory>
39+
<!-- compile scope gives runtime and compile dependencies (skips test deps) -->
40+
<includeScope>runtime</includeScope>
41+
</configuration>
42+
</execution>
43+
</executions>
44+
</plugin>
45+
</plugins>
46+
</build>
47+
</profile>
48+
</profiles>
49+
50+
<repositories>
51+
<repository>
52+
<id>my-repo1</id>
53+
<name>my custom repo</name>
54+
<url>https://repository.mulesoft.org/nexus/content/repositories/public/</url>
55+
</repository>
56+
</repositories>
57+
</project>

lcm.rake

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ end
118118
namespace :docker do
119119
desc 'Build Docker image'
120120
task :build do
121-
Rake::Task["maven:build_redshift"].invoke
121+
Rake::Task["maven:build_dependencies"].invoke
122122
system('docker build -f Dockerfile.jruby -t gooddata/appstore .')
123123
end
124124

@@ -129,11 +129,15 @@ namespace :docker do
129129
end
130130

131131
namespace :maven do
132-
task :build_redshift do
133-
system("cp -rf spec/lcm/redshift_driver_pom.xml tmp/pom.xml")
134-
system('mvn -f tmp/pom.xml clean install -P binary-packaging')
135-
system('cp -rf tmp/target/*.jar lib/gooddata/cloud_resources/redshift/drivers/')
136-
system('rm -rf lib/gooddata/cloud_resources/redshift/drivers/lcm-redshift-driver*.jar')
132+
task :build_dependencies do
133+
system('mvn -f ci/snowflake/pom.xml clean install -P binary-packaging')
134+
system('cp -rf ci/snowflake/target/*.jar lib/gooddata/cloud_resources/snowflake/drivers/')
135+
136+
system('mvn -f ci/bigquery/pom.xml clean install -P binary-packaging')
137+
system('cp -rf ci/bigquery/target/*.jar lib/gooddata/cloud_resources/bigquery/drivers/')
138+
139+
system('mvn -f ci/redshift/pom.xml clean install -P binary-packaging')
140+
system('cp -rf ci/redshift/target/*.jar lib/gooddata/cloud_resources/redshift/drivers/')
137141
end
138142
end
139143

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# encoding: UTF-8
2+
#
3+
# Copyright (c) 2010-2019 GoodData Corporation. All rights reserved.
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
require 'securerandom'
8+
require 'java'
9+
require 'pathname'
10+
require_relative '../cloud_resource_client'
11+
12+
base = Pathname(__FILE__).dirname.expand_path
13+
Dir.glob(base + 'drivers/*.jar').each do |file|
14+
require file unless file.start_with?('lcm-bigquery-driver')
15+
end
16+
17+
java_import 'com.google.auth.oauth2.ServiceAccountCredentials'
18+
java_import 'com.google.cloud.bigquery.BigQuery'
19+
java_import 'com.google.cloud.bigquery.BigQueryOptions'
20+
java_import 'com.google.cloud.bigquery.FieldList'
21+
java_import 'com.google.cloud.bigquery.FieldValue'
22+
java_import 'com.google.cloud.bigquery.FieldValueList'
23+
java_import 'com.google.cloud.bigquery.QueryJobConfiguration'
24+
java_import 'com.google.cloud.bigquery.TableResult'
25+
java_import 'org.apache.commons.text.StringEscapeUtils'
26+
27+
module GoodData
28+
module CloudResources
29+
class BigQueryClient < CloudResourceClient
30+
class << self
31+
def accept?(type)
32+
type == 'bigquery'
33+
end
34+
end
35+
36+
def initialize(options = {})
37+
raise("Data Source needs a client to BigQuery to be able to query the storage but 'bigquery_client' is empty.") unless options['bigquery_client']
38+
39+
if options['bigquery_client']['connection'].is_a?(Hash)
40+
@project = options['bigquery_client']['connection']['project']
41+
@schema = options['bigquery_client']['connection']['schema'] || 'public'
42+
@authentication = options['bigquery_client']['connection']['authentication']
43+
else
44+
raise('Missing connection info for BigQuery client')
45+
46+
end
47+
end
48+
49+
def realize_query(query, _params)
50+
GoodData.gd_logger.info("Realize SQL query: type=bigquery status=started")
51+
52+
client = create_client
53+
filename = "#{SecureRandom.urlsafe_base64(6)}_#{Time.now.to_i}.csv"
54+
measure = Benchmark.measure do
55+
query_config = QueryJobConfiguration.newBuilder(query).setDefaultDataset(@schema).build
56+
table_result = client.query(query_config)
57+
58+
if table_result.getTotalRows > 0
59+
result = table_result.iterateAll
60+
field_list = table_result.getSchema.getFields
61+
col_count = field_list.size
62+
CSV.open(filename, 'wb', :force_quotes => true) do |csv|
63+
csv << Array(1..col_count).map { |i| field_list.get(i - 1).getName } # build the header
64+
result.each do |row|
65+
csv << Array(1..col_count).map { |i| row.get(i - 1).getStringValue }
66+
end
67+
end
68+
end
69+
end
70+
GoodData.gd_logger.info("Realize SQL query: type=redshift status=finished duration=#{measure.real}")
71+
filename
72+
end
73+
74+
private
75+
76+
def create_client
77+
GoodData.logger.info "Setting up connection to BigQuery"
78+
client_email = @authentication['serviceAccount']['clientEmail']
79+
private_key = @authentication['serviceAccount']['privateKey']
80+
credentials = ServiceAccountCredentials.fromPkcs8(nil, client_email, StringEscapeUtils.unescapeJson(private_key), nil, nil)
81+
BigQueryOptions.newBuilder.setProjectId(@project).setCredentials(credentials).build.getService
82+
end
83+
end
84+
end
85+
end

lib/gooddata/cloud_resources/bigquery/drivers/.gitkeepme

Whitespace-only changes.

lib/gooddata/cloud_resources/snowflake/drivers/.gitkeepme

Whitespace-only changes.
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# encoding: UTF-8
2+
#
3+
# Copyright (c) 2010-2019 GoodData Corporation. All rights reserved.
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
require 'securerandom'
8+
require 'java'
9+
require 'pathname'
10+
require_relative '../cloud_resource_client'
11+
12+
base = Pathname(__FILE__).dirname.expand_path
13+
Dir.glob(base + 'drivers/*.jar').each do |file|
14+
require file unless file.start_with?('lcm-snowflake-driver')
15+
end
16+
17+
module GoodData
18+
module CloudResources
19+
class SnowflakeClient < CloudResourceClient
20+
class << self
21+
def accept?(type)
22+
type == 'snowflake'
23+
end
24+
end
25+
26+
def initialize(options = {})
27+
raise("Data Source needs a client to Snowflake to be able to query the storage but 'snowflake_client' is empty.") unless options['snowflake_client']
28+
29+
if options['snowflake_client']['connection'].is_a?(Hash)
30+
@database = options['snowflake_client']['connection']['database']
31+
@schema = options['snowflake_client']['connection']['schema'] || 'public'
32+
@warehouse = options['snowflake_client']['connection']['warehouse']
33+
@url = options['snowflake_client']['connection']['url']
34+
@authentication = options['snowflake_client']['connection']['authentication']
35+
else
36+
raise('Missing connection info for Snowflake client')
37+
38+
end
39+
40+
Java.net.snowflake.client.jdbc.SnowflakeDriver
41+
end
42+
43+
def realize_query(query, _params)
44+
GoodData.gd_logger.info("Realize SQL query: type=snowflake status=started")
45+
46+
connect
47+
filename = "#{SecureRandom.urlsafe_base64(6)}_#{Time.now.to_i}.csv"
48+
measure = Benchmark.measure do
49+
statement = @connection.create_statement
50+
51+
has_result = statement.execute(query)
52+
if has_result
53+
result = statement.get_result_set
54+
metadata = result.get_meta_data
55+
col_count = metadata.column_count
56+
CSV.open(filename, 'wb', :force_quotes => true) do |csv|
57+
csv << Array(1..col_count).map { |i| metadata.get_column_name(i) } # build the header
58+
csv << Array(1..col_count).map { |i| result.get_string(i) } while result.next
59+
end
60+
end
61+
end
62+
GoodData.gd_logger.info("Realize SQL query: type=snowflake status=finished duration=#{measure.real}")
63+
filename
64+
ensure
65+
@connection.close unless @connection.nil?
66+
@connection = nil
67+
end
68+
69+
def connect
70+
GoodData.logger.info "Setting up connection to Snowflake #{@url}"
71+
72+
prop = java.util.Properties.new
73+
prop.setProperty('user', @authentication['basic']['userName'])
74+
prop.setProperty('password', @authentication['basic']['password'])
75+
prop.setProperty('schema', @schema)
76+
prop.setProperty('warehouse', @warehouse)
77+
prop.setProperty('db', @database)
78+
79+
@connection = java.sql.DriverManager.getConnection(@url, prop)
80+
end
81+
end
82+
end
83+
end

lib/gooddata/helpers/data_helper.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def realize(params = {})
4444
realize_link
4545
when 's3'
4646
realize_s3(params)
47-
when 'redshift'
47+
when 'redshift', 'snowflake', 'bigquery'
4848
raise GoodData::InvalidEnvError, "DataSource does not support type \"#{source}\" on the platform #{RUBY_PLATFORM}" unless RUBY_PLATFORM =~ /java/
4949

5050
require_relative '../cloud_resources/cloud_resources'

0 commit comments

Comments
 (0)