Skip to content

Commit b9bd8f0

Browse files
authored
GCP ML services integration. (#925)
* Cloud Translation, Natural language, Vision, Video intelligence GCP integrations. * Fixes for style guide
1 parent 6946ddd commit b9bd8f0

File tree

9 files changed

+538
-14
lines changed

9 files changed

+538
-14
lines changed

Dockerfile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,10 @@ RUN pip install --upgrade cython && \
268268
# which is loaded at startup.
269269
pip install google-cloud-bigquery==1.12.1 && \
270270
pip install google-cloud-storage && \
271+
pip install google-cloud-translate==3.* && \
272+
pip install google-cloud-language==2.* && \
273+
pip install google-cloud-videointelligence==2.* && \
274+
pip install google-cloud-vision==2.* && \
271275
pip install ortools && \
272276
pip install scattertext && \
273277
# Pandas data reader

patches/kaggle_gcp.py

Lines changed: 129 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22
import inspect
3-
from google.auth import credentials
3+
from google.auth import credentials, environment_vars
44
from google.auth.exceptions import RefreshError
55
from google.api_core.gapic_v1.client_info import ClientInfo
66
from google.cloud import bigquery
@@ -22,7 +22,7 @@ def get_integrations():
2222
target = GcpTarget[integration.upper()]
2323
kernel_integrations.add_integration(target)
2424
except KeyError as e:
25-
Log.error(f"Unknown integration target: {e}")
25+
Log.error(f"Unknown integration target: {integration.upper()}")
2626
return kernel_integrations
2727

2828

@@ -45,6 +45,17 @@ def has_gcs(self):
4545
def has_automl(self):
4646
return GcpTarget.AUTOML in self.integrations
4747

48+
def has_translation(self):
49+
return GcpTarget.TRANSLATION in self.integrations
50+
51+
def has_natural_language(self):
52+
return GcpTarget.NATURAL_LANGUAGE in self.integrations
53+
54+
def has_video_intelligence(self):
55+
return GcpTarget.VIDEO_INTELLIGENCE in self.integrations
56+
57+
def has_vision(self):
58+
return GcpTarget.VISION in self.integrations
4859

4960
class KaggleKernelCredentials(credentials.Credentials):
5061
"""Custom Credentials used to authenticate using the Kernel's connected OAuth account.
@@ -65,6 +76,14 @@ def refresh(self, request):
6576
self.token, self.expiry = client._get_gcs_access_token()
6677
elif self.target == GcpTarget.AUTOML:
6778
self.token, self.expiry = client._get_automl_access_token()
79+
elif self.target == GcpTarget.TRANSLATION:
80+
self.token, self.expiry = client._get_translation_access_token()
81+
elif self.target == GcpTarget.NATURAL_LANGUAGE:
82+
self.token, self.expiry = client._get_natural_language_access_token()
83+
elif self.target == GcpTarget.VIDEO_INTELLIGENCE:
84+
self.token, self.expiry = client._get_video_intelligence_access_token()
85+
elif self.target == GcpTarget.VISION:
86+
self.token, self.expiry = client._get_vision_access_token()
6887
except ConnectionError as e:
6988
Log.error(f"Connection error trying to refresh access token: {e}")
7089
print("There was a connection error trying to fetch the access token. "
@@ -78,6 +97,12 @@ def refresh(self, request):
7897
f"Please ensure you have selected a {self.target.service} account in the Notebook Add-ons menu.")
7998
raise RefreshError('Unable to refresh access token.') from e
8099

100+
class KaggleKernelWithProjetCredentials(KaggleKernelCredentials):
101+
""" Wrapper Kaggle Credentials with quota_project_id.
102+
"""
103+
def __init__(self, parentCredential=None, quota_project_id=None):
104+
super().__init__(target=parentCredential.target)
105+
self._quota_project_id=quota_project_id
81106

82107
class _DataProxyConnection(Connection):
83108
"""Custom Connection class used to proxy the BigQuery client to Kaggle's data proxy."""
@@ -122,13 +147,16 @@ def __init__(self, *args, **kwargs):
122147
def has_been_monkeypatched(method):
123148
return "kaggle_gcp" in inspect.getsourcefile(method)
124149

150+
def is_user_secrets_token_set():
151+
return "KAGGLE_USER_SECRETS_TOKEN" in os.environ
152+
153+
def is_proxy_token_set():
154+
return "KAGGLE_DATA_PROXY_TOKEN" in os.environ
155+
125156
def init_bigquery():
126-
from google.auth import environment_vars
127157
from google.cloud import bigquery
128158

129-
is_proxy_token_set = "KAGGLE_DATA_PROXY_TOKEN" in os.environ
130-
is_user_secrets_token_set = "KAGGLE_USER_SECRETS_TOKEN" in os.environ
131-
if not (is_proxy_token_set or is_user_secrets_token_set):
159+
if not (is_proxy_token_set() or is_user_secrets_token_set()):
132160
return bigquery
133161

134162
# If this Notebook has bigquery integration on startup, preload the Kaggle Credentials
@@ -185,14 +213,24 @@ def patched_init(self, *args, **kwargs):
185213
specified_credentials = kwargs.get('credentials')
186214
if specified_credentials is None:
187215
Log.info("No credentials specified, using KaggleKernelCredentials.")
188-
kwargs['credentials'] = kaggle_kernel_credentials
216+
# Some GCP services demand the billing and target project must be the same.
217+
# To avoid using default service account based credential as caller credential
218+
# user need to provide ClientOptions with quota_project_id:
219+
# srv.Client(client_options=client_options.ClientOptions(quota_project_id="YOUR PROJECT"))
220+
client_options=kwargs.get('client_options')
221+
if client_options != None and client_options.quota_project_id != None:
222+
kwargs['credentials'] = KaggleKernelWithProjetCredentials(
223+
parentCredential = kaggle_kernel_credentials,
224+
quota_project_id = client_options.quota_project_id)
225+
else:
226+
kwargs['credentials'] = kaggle_kernel_credentials
189227

190228
kwargs['client_info'] = set_kaggle_user_agent(kwargs.get('client_info'))
191-
192229
return client_init(self, *args, **kwargs)
193230

194231
if (not has_been_monkeypatched(client_klass.__init__)):
195232
client_klass.__init__ = patched_init
233+
Log.info(f"Client patched: {client_klass}")
196234

197235
def set_kaggle_user_agent(client_info: ClientInfo):
198236
# Add kaggle client user agent in order to attribute usage.
@@ -203,9 +241,8 @@ def set_kaggle_user_agent(client_info: ClientInfo):
203241
return client_info
204242

205243
def init_gcs():
206-
is_user_secrets_token_set = "KAGGLE_USER_SECRETS_TOKEN" in os.environ
207244
from google.cloud import storage
208-
if not is_user_secrets_token_set:
245+
if not is_user_secrets_token_set():
209246
return storage
210247

211248
from kaggle_gcp import get_integrations
@@ -220,9 +257,8 @@ def init_gcs():
220257
return storage
221258

222259
def init_automl():
223-
is_user_secrets_token_set = "KAGGLE_USER_SECRETS_TOKEN" in os.environ
224260
from google.cloud import automl, automl_v1beta1
225-
if not is_user_secrets_token_set:
261+
if not is_user_secrets_token_set():
226262
return
227263

228264
from kaggle_gcp import get_integrations
@@ -251,10 +287,91 @@ def init_automl():
251287
# the TablesClient is GA.
252288
monkeypatch_client(automl_v1beta1.TablesClient, kaggle_kernel_credentials)
253289

290+
def init_translation_v2():
291+
from google.cloud import translate_v2
292+
if not is_user_secrets_token_set():
293+
return translate_v2
294+
295+
from kaggle_gcp import get_integrations
296+
if not get_integrations().has_translation():
297+
return translate_v2
298+
from kaggle_secrets import GcpTarget
299+
kernel_credentials = KaggleKernelCredentials(target=GcpTarget.TRANSLATION)
300+
monkeypatch_client(translate_v2.Client, kernel_credentials)
301+
return translate_v2
302+
303+
def init_translation_v3():
304+
# Translate v3 exposes different client than translate v2.
305+
from google.cloud import translate_v3
306+
if not is_user_secrets_token_set():
307+
return translate_v3
308+
309+
from kaggle_gcp import get_integrations
310+
if not get_integrations().has_translation():
311+
return translate_v3
312+
from kaggle_secrets import GcpTarget
313+
kernel_credentials = KaggleKernelCredentials(target=GcpTarget.TRANSLATION)
314+
monkeypatch_client(translate_v3.TranslationServiceClient, kernel_credentials)
315+
return translate_v3
316+
317+
def init_natural_language():
318+
from google.cloud import language
319+
if not is_user_secrets_token_set():
320+
return language
321+
322+
from kaggle_gcp import get_integrations
323+
if not get_integrations().has_natural_language():
324+
return language
325+
326+
from kaggle_secrets import GcpTarget
327+
kernel_credentials = KaggleKernelCredentials(target=GcpTarget.NATURAL_LANGUAGE)
328+
monkeypatch_client(language.LanguageServiceClient, kernel_credentials)
329+
monkeypatch_client(language.LanguageServiceAsyncClient, kernel_credentials)
330+
return language
331+
332+
def init_video_intelligence():
333+
from google.cloud import videointelligence
334+
if not is_user_secrets_token_set():
335+
return videointelligence
336+
337+
from kaggle_gcp import get_integrations
338+
if not get_integrations().has_video_intelligence():
339+
return videointelligence
340+
341+
from kaggle_secrets import GcpTarget
342+
kernel_credentials = KaggleKernelCredentials(target=GcpTarget.VIDEO_INTELLIGENCE)
343+
monkeypatch_client(
344+
videointelligence.VideoIntelligenceServiceClient,
345+
kernel_credentials)
346+
monkeypatch_client(
347+
videointelligence.VideoIntelligenceServiceAsyncClient,
348+
kernel_credentials)
349+
return videointelligence
350+
351+
def init_vision():
352+
from google.cloud import vision
353+
if not is_user_secrets_token_set():
354+
return vision
355+
356+
from kaggle_gcp import get_integrations
357+
if not get_integrations().has_vision():
358+
return vision
359+
360+
from kaggle_secrets import GcpTarget
361+
kernel_credentials = KaggleKernelCredentials(target=GcpTarget.VISION)
362+
monkeypatch_client(vision.ImageAnnotatorClient, kernel_credentials)
363+
monkeypatch_client(vision.ImageAnnotatorAsyncClient, kernel_credentials)
364+
return vision
365+
254366
def init():
255367
init_bigquery()
256368
init_gcs()
257369
init_automl()
370+
init_translation_v2()
371+
init_translation_v3()
372+
init_natural_language()
373+
init_video_intelligence()
374+
init_vision()
258375

259376
# We need to initialize the monkeypatching of the client libraries
260377
# here since there is a circular dependency between our import hook version

patches/kaggle_secrets.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ class GcpTarget(Enum):
2424
BIGQUERY = (1, "BigQuery")
2525
GCS = (2, "Google Cloud Storage")
2626
AUTOML = (3, "Cloud AutoML")
27+
TRANSLATION = (4, "Cloud Translation")
28+
NATURAL_LANGUAGE = (5, "Cloud Natural Language")
29+
VIDEO_INTELLIGENCE = (6, "Cloud Video Intelligence")
30+
VISION = (7, "Cloud Vision")
2731

2832
def __init__(self, target, service):
2933
self._target = target
@@ -154,6 +158,18 @@ def _get_gcs_access_token(self) -> Tuple[str, Optional[datetime]]:
154158
def _get_automl_access_token(self) -> Tuple[str, Optional[datetime]]:
155159
return self._get_access_token(GcpTarget.AUTOML)
156160

161+
def _get_translation_access_token(self) -> Tuple[str, Optional[datetime]]:
162+
return self._get_access_token(GcpTarget.TRANSLATION)
163+
164+
def _get_natural_language_access_token(self) -> Tuple[str, Optional[datetime]]:
165+
return self._get_access_token(GcpTarget.NATURAL_LANGUAGE)
166+
167+
def _get_video_intelligence_access_token(self) -> Tuple[str, Optional[datetime]]:
168+
return self._get_access_token(GcpTarget.VIDEO_INTELLIGENCE)
169+
170+
def _get_vision_access_token(self) -> Tuple[str, Optional[datetime]]:
171+
return self._get_access_token(GcpTarget.VISION)
172+
157173
def _get_access_token(self, target: GcpTarget) -> Tuple[str, Optional[datetime]]:
158174
request_body = {
159175
'Target': target.target

patches/sitecustomize.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,20 @@
77
import importlib.machinery
88

99
class GcpModuleFinder(importlib.abc.MetaPathFinder):
10-
_MODULES = ['google.cloud.bigquery', 'google.cloud.storage', 'google.cloud.automl_v1beta1']
10+
_MODULES = [
11+
'google.cloud.bigquery',
12+
'google.cloud.storage',
13+
'google.cloud.automl_v1beta1',
14+
'google.cloud.translate',
15+
'google.cloud.translate_v2',
16+
'google.cloud.translate_v3',
17+
'google.cloud.language',
18+
'google.cloud.language_v1',
19+
'google.cloud.videointelligence',
20+
'google.cloud.videointelligence_v1',
21+
'google.cloud.vision',
22+
'google.cloud.vision_v1',
23+
]
1124
_KAGGLE_GCP_PATH = 'kaggle_gcp.py'
1225
def __init__(self):
1326
pass
@@ -41,13 +54,21 @@ def create_module(self, spec):
4154
'google.cloud.bigquery': kaggle_gcp.init_bigquery,
4255
'google.cloud.storage': kaggle_gcp.init_gcs,
4356
'google.cloud.automl_v1beta1': kaggle_gcp.init_automl,
57+
'google.cloud.translate': kaggle_gcp.init_translation_v3,
58+
'google.cloud.translate_v2': kaggle_gcp.init_translation_v2,
59+
'google.cloud.translate_v3': kaggle_gcp.init_translation_v3,
60+
'google.cloud.language': kaggle_gcp.init_natural_language,
61+
'google.cloud.language_v1': kaggle_gcp.init_natural_language,
62+
'google.cloud.videointelligence': kaggle_gcp.init_video_intelligence,
63+
'google.cloud.videointelligence_v1': kaggle_gcp.init_video_intelligence,
64+
'google.cloud.vision': kaggle_gcp.init_vision,
65+
'google.cloud.vision_v1': kaggle_gcp.init_vision
4466
}
4567
monkeypatch_gcp_module = _LOADERS[spec.name]()
4668
return monkeypatch_gcp_module
4769

4870
def exec_module(self, module):
4971
pass
5072

51-
5273
if not hasattr(sys, 'frozen'):
5374
sys.meta_path.insert(0, GcpModuleFinder())

tests/test_natural_language.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import unittest
2+
import inspect
3+
4+
from unittest.mock import Mock, patch
5+
6+
from kaggle_gcp import KaggleKernelCredentials, init_natural_language
7+
from test.support import EnvironmentVarGuard
8+
from google.cloud import language
9+
10+
def _make_credentials():
11+
import google.auth.credentials
12+
return Mock(spec=google.auth.credentials.Credentials)
13+
14+
class TestCloudNaturalLanguage(unittest.TestCase):
15+
class FakeClient:
16+
def __init__(self, credentials=None, client_info=None, **kwargs):
17+
self.credentials = credentials
18+
19+
class FakeConnection():
20+
def __init__(self, user_agent):
21+
self.user_agent = user_agent
22+
if (client_info is not None):
23+
self._connection = FakeConnection(client_info.user_agent)
24+
25+
@patch("google.cloud.language.LanguageServiceClient", new=FakeClient)
26+
def test_default_credentials(self):
27+
env = EnvironmentVarGuard()
28+
env.set('KAGGLE_USER_SECRETS_TOKEN', 'foobar')
29+
env.set('KAGGLE_KERNEL_INTEGRATIONS', 'NATURAL_LANGUAGE')
30+
with env:
31+
init_natural_language()
32+
client = language.LanguageServiceClient()
33+
self.assertIsNotNone(client.credentials)
34+
self.assertIsInstance(client.credentials, KaggleKernelCredentials)
35+
36+
@patch("google.cloud.language.LanguageServiceClient", new=FakeClient)
37+
def test_user_provided_credentials(self):
38+
credentials = _make_credentials()
39+
env = EnvironmentVarGuard()
40+
env.set('KAGGLE_USER_SECRETS_TOKEN', 'foobar')
41+
env.set('KAGGLE_KERNEL_INTEGRATIONS', 'NATURAL_LANGUAGE')
42+
with env:
43+
init_natural_language()
44+
client = language.LanguageServiceClient(credentials=credentials)
45+
self.assertIsNotNone(client.credentials)
46+
self.assertNotIsInstance(client.credentials, KaggleKernelCredentials)
47+
48+
49+
def test_monkeypatching_succeed(self):
50+
env = EnvironmentVarGuard()
51+
env.set('KAGGLE_USER_SECRETS_TOKEN', 'foobar')
52+
env.set('KAGGLE_KERNEL_INTEGRATIONS', 'TRANSLATION')
53+
with env:
54+
init_natural_language()
55+
client = language.LanguageServiceClient.__init__
56+
self.assertTrue("kaggle_gcp" in inspect.getsourcefile(client))
57+
58+
def test_monkeypatching_idempotent(self):
59+
env = EnvironmentVarGuard()
60+
env.set('KAGGLE_USER_SECRETS_TOKEN', 'foobar')
61+
env.set('KAGGLE_KERNEL_INTEGRATIONS', 'NATURAL_LANGUAGE')
62+
with env:
63+
init_natural_language()
64+
client1 = language.LanguageServiceClient.__init__
65+
init_natural_language()
66+
client2 = language.LanguageServiceClient.__init__
67+
self.assertEqual(client1, client2)

0 commit comments

Comments
 (0)