@@ -100,9 +100,12 @@ def get_remote_function_locations(bq_location):
100
100
return bq_location , cloud_function_region
101
101
102
102
103
- def _get_hash (def_ ):
103
+ def _get_hash (def_ , package_requirements = None ):
104
104
"Get hash (32 digits alphanumeric) of a function."
105
105
def_repr = cloudpickle .dumps (def_ , protocol = _pickle_protocol_version )
106
+ if package_requirements :
107
+ for p in sorted (package_requirements ):
108
+ def_repr += p .encode ()
106
109
return hashlib .md5 (def_repr ).hexdigest ()
107
110
108
111
@@ -129,18 +132,18 @@ class IbisSignature(NamedTuple):
129
132
output_type : IbisDataType
130
133
131
134
132
- def get_cloud_function_name (def_ , uniq_suffix = None ):
135
+ def get_cloud_function_name (def_ , uniq_suffix = None , package_requirements = None ):
133
136
"Get a name for the cloud function for the given user defined function."
134
- cf_name = _get_hash (def_ )
137
+ cf_name = _get_hash (def_ , package_requirements )
135
138
cf_name = f"bigframes-{ cf_name } " # for identification
136
139
if uniq_suffix :
137
140
cf_name = f"{ cf_name } -{ uniq_suffix } "
138
141
return cf_name
139
142
140
143
141
- def get_remote_function_name (def_ , uniq_suffix = None ):
144
+ def get_remote_function_name (def_ , uniq_suffix = None , package_requirements = None ):
142
145
"Get a name for the BQ remote function for the given user defined function."
143
- bq_rf_name = _get_hash (def_ )
146
+ bq_rf_name = _get_hash (def_ , package_requirements )
144
147
bq_rf_name = f"bigframes_{ bq_rf_name } " # for identification
145
148
if uniq_suffix :
146
149
bq_rf_name = f"{ bq_rf_name } _{ uniq_suffix } "
@@ -200,7 +203,8 @@ def create_bq_remote_function(
200
203
RETURNS { bq_function_return_type }
201
204
REMOTE WITH CONNECTION `{ self ._gcp_project_id } .{ self ._bq_location } .{ self ._bq_connection_id } `
202
205
OPTIONS (
203
- endpoint = "{ endpoint } "
206
+ endpoint = "{ endpoint } ",
207
+ max_batching_rows = 1000
204
208
)"""
205
209
206
210
logger .info (f"Creating BQ remote function: { create_function_ddl } " )
@@ -320,11 +324,14 @@ def {handler_func_name}(request):
320
324
321
325
return handler_func_name
322
326
323
- def generate_cloud_function_code (self , def_ , dir ):
327
+ def generate_cloud_function_code (self , def_ , dir , package_requirements = None ):
324
328
"""Generate the cloud function code for a given user defined function."""
325
329
326
330
# requirements.txt
327
331
requirements = ["cloudpickle >= 2.1.0" ]
332
+ if package_requirements :
333
+ requirements .extend (package_requirements )
334
+ requirements = sorted (requirements )
328
335
requirements_txt = os .path .join (dir , "requirements.txt" )
329
336
with open (requirements_txt , "w" ) as f :
330
337
f .write ("\n " .join (requirements ))
@@ -333,12 +340,14 @@ def generate_cloud_function_code(self, def_, dir):
333
340
entry_point = self .generate_cloud_function_main_code (def_ , dir )
334
341
return entry_point
335
342
336
- def create_cloud_function (self , def_ , cf_name ):
343
+ def create_cloud_function (self , def_ , cf_name , package_requirements = None ):
337
344
"""Create a cloud function from the given user defined function."""
338
345
339
346
# Build and deploy folder structure containing cloud function
340
347
with tempfile .TemporaryDirectory () as dir :
341
- entry_point = self .generate_cloud_function_code (def_ , dir )
348
+ entry_point = self .generate_cloud_function_code (
349
+ def_ , dir , package_requirements
350
+ )
342
351
archive_path = shutil .make_archive (dir , "zip" , dir )
343
352
344
353
# We are creating cloud function source code from the currently running
@@ -392,6 +401,9 @@ def create_cloud_function(self, def_, cf_name):
392
401
function .build_config .source .storage_source .object_ = (
393
402
upload_url_response .storage_source .object_
394
403
)
404
+ function .service_config = functions_v2 .ServiceConfig ()
405
+ function .service_config .available_memory = "1024M"
406
+ function .service_config .timeout_seconds = 600
395
407
create_function_request .function = function
396
408
397
409
# Create the cloud function and wait for it to be ready to use
@@ -422,6 +434,7 @@ def provision_bq_remote_function(
422
434
output_type ,
423
435
reuse ,
424
436
name ,
437
+ package_requirements ,
425
438
):
426
439
"""Provision a BigQuery remote function."""
427
440
# If reuse of any existing function with the same name (indicated by the
@@ -435,19 +448,25 @@ def provision_bq_remote_function(
435
448
436
449
# Derive the name of the cloud function underlying the intended BQ
437
450
# remote function
438
- cloud_function_name = get_cloud_function_name (def_ , uniq_suffix )
451
+ cloud_function_name = get_cloud_function_name (
452
+ def_ , uniq_suffix , package_requirements
453
+ )
439
454
cf_endpoint = self .get_cloud_function_endpoint (cloud_function_name )
440
455
441
456
# Create the cloud function if it does not exist
442
457
if not cf_endpoint :
443
- cf_endpoint = self .create_cloud_function (def_ , cloud_function_name )
458
+ cf_endpoint = self .create_cloud_function (
459
+ def_ , cloud_function_name , package_requirements
460
+ )
444
461
else :
445
462
logger .info (f"Cloud function { cloud_function_name } already exists." )
446
463
447
464
# Derive the name of the remote function
448
465
remote_function_name = name
449
466
if not remote_function_name :
450
- remote_function_name = get_remote_function_name (def_ , uniq_suffix )
467
+ remote_function_name = get_remote_function_name (
468
+ def_ , uniq_suffix , package_requirements
469
+ )
451
470
rf_endpoint , rf_conn = self .get_remote_function_specs (remote_function_name )
452
471
453
472
# Create the BQ remote function in following circumstances:
@@ -619,6 +638,7 @@ def remote_function(
619
638
bigquery_connection : Optional [str ] = None ,
620
639
reuse : bool = True ,
621
640
name : Optional [str ] = None ,
641
+ packages : Optional [Sequence [str ]] = None ,
622
642
):
623
643
"""Decorator to turn a user defined function into a BigQuery remote function.
624
644
@@ -710,6 +730,10 @@ def remote_function(
710
730
caution, because two users working in the same project and dataset
711
731
could overwrite each other's remote functions if they use the same
712
732
persistent name.
733
+ packages (str[], Optional):
734
+ Explicit name of the external package dependencies. Each dependency
735
+ is added to the `requirements.txt` as is, and can be of the form
736
+ supported in https://pip.pypa.io/en/stable/reference/requirements-file-format/.
713
737
714
738
"""
715
739
import bigframes .pandas as bpd
@@ -821,6 +845,7 @@ def wrapper(f):
821
845
ibis_signature .output_type ,
822
846
reuse ,
823
847
name ,
848
+ packages ,
824
849
)
825
850
826
851
node = remote_function_node (dataset_ref .routine (rf_name ), ibis_signature )
0 commit comments