Skip to content

Commit ca91a9a

Browse files
achoumcopybara-github
authored andcommitted
Release YDF 0.4.3 and TF-DF 1.9.1
PiperOrigin-RevId: 631693889
1 parent 12f70f5 commit ca91a9a

File tree

8 files changed

+164
-24
lines changed

8 files changed

+164
-24
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# Changelog
22

3+
## 1.9.1 - 2024-05-07
4+
5+
### Fix
6+
7+
- Solve dependency collision of YDF Proto between PYDF and TF-DF.
8+
39
## 1.9.0 - 2024-03-12
410

511
### Fix

configure/setup.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,13 @@
1616
1717
This file is used by tools/build_pip_package.sh.
1818
"""
19+
1920
import platform
2021
import setuptools
2122
from setuptools.command.install import install
2223
from setuptools.dist import Distribution
2324

24-
_VERSION = "1.9.0"
25+
_VERSION = "1.9.1"
2526

2627
with open("README.md", "r", encoding="utf-8") as fh:
2728
long_description = fh.read()
@@ -35,6 +36,7 @@
3536
"wheel",
3637
"wurlitzer",
3738
"tf_keras~=2.16",
39+
"ydf",
3840
]
3941

4042

@@ -54,6 +56,7 @@ def has_ext_modules(self):
5456
def is_pure(self):
5557
return False
5658

59+
5760
try:
5861
from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
5962

documentation/known_issues.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ The following table shows the compatibility between
5454

5555
tensorflow_decision_forests | tensorflow
5656
--------------------------- | ---------------
57+
1.9.1 | 2.16.1
5758
1.9.0 | 2.16.1
5859
1.8.0 - 1.8.1 | 2.15.0
5960
1.6.0 - 1.7.0 | 2.14.0

tensorflow_decision_forests/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
5252
"""
5353

54-
__version__ = "1.9.0"
54+
__version__ = "1.9.1"
5555
__author__ = "Mathieu Guillame-Bert"
5656

5757
compatible_tf_versions = ["2.16.1"]

tensorflow_decision_forests/keras/wrappers_pre_generated.py

Lines changed: 63 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,18 @@ class CartModel(core.CoreModel):
257257
expressed in seconds. Each learning algorithm is free to use this
258258
parameter at it sees fit. Enabling maximum training duration makes the
259259
model training non-deterministic. Default: -1.0.
260+
mhld_oblique_max_num_attributes: For MHLD oblique splits i.e.
261+
`split_axis=MHLD_OBLIQUE`. Maximum number of attributes in the projection.
262+
Increasing this value increases the training time. Decreasing this value
263+
acts as a regularization. The value should be in [2,
264+
num_numerical_features]. If the value is above the total number of
265+
numerical features, the value is capped automatically. The value 1 is
266+
allowed but results in ordinary (non-oblique) splits. Default: None.
267+
mhld_oblique_sample_attributes: For MHLD oblique splits i.e.
268+
`split_axis=MHLD_OBLIQUE`. If true, applies the attribute sampling
269+
controlled by the "num_candidate_attributes" or
270+
"num_candidate_attributes_ratio" parameters. If false, all the attributes
271+
are tested. Default: None.
260272
min_examples: Minimum number of examples in a node. Default: 5.
261273
missing_value_policy: Method used to handle missing attribute values. -
262274
`GLOBAL_IMPUTATION`: Missing attribute values are imputed, with the mean
@@ -345,9 +357,11 @@ class CartModel(core.CoreModel):
345357
split_axis: What structure of split to consider for numerical features. -
346358
`AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time). This
347359
is the "classical" way to train a tree. Default value. - `SPARSE_OBLIQUE`:
348-
Sparse oblique splits (i.e. splits one a small number of features) from
349-
"Sparse Projection Oblique Random Forests", Tomita et al., 2020. Default:
350-
"AXIS_ALIGNED".
360+
Sparse oblique splits (i.e. random splits one a small number of features)
361+
from "Sparse Projection Oblique Random Forests", Tomita et al., 2020. -
362+
`MHLD_OBLIQUE`: Multi-class Hellinger Linear Discriminant splits from
363+
"Classification Based on Multivariate Contrast Patterns", Canete-Sifuentes
364+
et al., 2029 Default: "AXIS_ALIGNED".
351365
uplift_min_examples_in_treatment: For uplift models only. Minimum number of
352366
examples per treatment in a node. Default: 5.
353367
uplift_split_score: For uplift models only. Splitter score i.e. score
@@ -402,6 +416,8 @@ def __init__(
402416
max_num_nodes: Optional[int] = None,
403417
maximum_model_size_in_memory_in_bytes: Optional[float] = -1.0,
404418
maximum_training_duration_seconds: Optional[float] = -1.0,
419+
mhld_oblique_max_num_attributes: Optional[int] = None,
420+
mhld_oblique_sample_attributes: Optional[bool] = None,
405421
min_examples: Optional[int] = 5,
406422
missing_value_policy: Optional[str] = "GLOBAL_IMPUTATION",
407423
num_candidate_attributes: Optional[int] = 0,
@@ -445,6 +461,8 @@ def __init__(
445461
maximum_model_size_in_memory_in_bytes
446462
),
447463
"maximum_training_duration_seconds": maximum_training_duration_seconds,
464+
"mhld_oblique_max_num_attributes": mhld_oblique_max_num_attributes,
465+
"mhld_oblique_sample_attributes": mhld_oblique_sample_attributes,
448466
"min_examples": min_examples,
449467
"missing_value_policy": missing_value_policy,
450468
"num_candidate_attributes": num_candidate_attributes,
@@ -1124,6 +1142,18 @@ class GradientBoostedTreesModel(core.CoreModel):
11241142
expressed in seconds. Each learning algorithm is free to use this
11251143
parameter at it sees fit. Enabling maximum training duration makes the
11261144
model training non-deterministic. Default: -1.0.
1145+
mhld_oblique_max_num_attributes: For MHLD oblique splits i.e.
1146+
`split_axis=MHLD_OBLIQUE`. Maximum number of attributes in the projection.
1147+
Increasing this value increases the training time. Decreasing this value
1148+
acts as a regularization. The value should be in [2,
1149+
num_numerical_features]. If the value is above the total number of
1150+
numerical features, the value is capped automatically. The value 1 is
1151+
allowed but results in ordinary (non-oblique) splits. Default: None.
1152+
mhld_oblique_sample_attributes: For MHLD oblique splits i.e.
1153+
`split_axis=MHLD_OBLIQUE`. If true, applies the attribute sampling
1154+
controlled by the "num_candidate_attributes" or
1155+
"num_candidate_attributes_ratio" parameters. If false, all the attributes
1156+
are tested. Default: None.
11271157
min_examples: Minimum number of examples in a node. Default: 5.
11281158
missing_value_policy: Method used to handle missing attribute values. -
11291159
`GLOBAL_IMPUTATION`: Missing attribute values are imputed, with the mean
@@ -1232,9 +1262,11 @@ class GradientBoostedTreesModel(core.CoreModel):
12321262
split_axis: What structure of split to consider for numerical features. -
12331263
`AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time). This
12341264
is the "classical" way to train a tree. Default value. - `SPARSE_OBLIQUE`:
1235-
Sparse oblique splits (i.e. splits one a small number of features) from
1236-
"Sparse Projection Oblique Random Forests", Tomita et al., 2020. Default:
1237-
"AXIS_ALIGNED".
1265+
Sparse oblique splits (i.e. random splits one a small number of features)
1266+
from "Sparse Projection Oblique Random Forests", Tomita et al., 2020. -
1267+
`MHLD_OBLIQUE`: Multi-class Hellinger Linear Discriminant splits from
1268+
"Classification Based on Multivariate Contrast Patterns", Canete-Sifuentes
1269+
et al., 2029 Default: "AXIS_ALIGNED".
12381270
subsample: Ratio of the dataset (sampling without replacement) used to train
12391271
individual trees for the random sampling method. If \\"subsample\\" is set
12401272
and if \\"sampling_method\\" is NOT set or set to \\"NONE\\", then
@@ -1324,6 +1356,8 @@ def __init__(
13241356
max_num_nodes: Optional[int] = None,
13251357
maximum_model_size_in_memory_in_bytes: Optional[float] = -1.0,
13261358
maximum_training_duration_seconds: Optional[float] = -1.0,
1359+
mhld_oblique_max_num_attributes: Optional[int] = None,
1360+
mhld_oblique_sample_attributes: Optional[bool] = None,
13271361
min_examples: Optional[int] = 5,
13281362
missing_value_policy: Optional[str] = "GLOBAL_IMPUTATION",
13291363
num_candidate_attributes: Optional[int] = -1,
@@ -1397,6 +1431,8 @@ def __init__(
13971431
maximum_model_size_in_memory_in_bytes
13981432
),
13991433
"maximum_training_duration_seconds": maximum_training_duration_seconds,
1434+
"mhld_oblique_max_num_attributes": mhld_oblique_max_num_attributes,
1435+
"mhld_oblique_sample_attributes": mhld_oblique_sample_attributes,
14001436
"min_examples": min_examples,
14011437
"missing_value_policy": missing_value_policy,
14021438
"num_candidate_attributes": num_candidate_attributes,
@@ -2213,6 +2249,18 @@ class RandomForestModel(core.CoreModel):
22132249
expressed in seconds. Each learning algorithm is free to use this
22142250
parameter at it sees fit. Enabling maximum training duration makes the
22152251
model training non-deterministic. Default: -1.0.
2252+
mhld_oblique_max_num_attributes: For MHLD oblique splits i.e.
2253+
`split_axis=MHLD_OBLIQUE`. Maximum number of attributes in the projection.
2254+
Increasing this value increases the training time. Decreasing this value
2255+
acts as a regularization. The value should be in [2,
2256+
num_numerical_features]. If the value is above the total number of
2257+
numerical features, the value is capped automatically. The value 1 is
2258+
allowed but results in ordinary (non-oblique) splits. Default: None.
2259+
mhld_oblique_sample_attributes: For MHLD oblique splits i.e.
2260+
`split_axis=MHLD_OBLIQUE`. If true, applies the attribute sampling
2261+
controlled by the "num_candidate_attributes" or
2262+
"num_candidate_attributes_ratio" parameters. If false, all the attributes
2263+
are tested. Default: None.
22162264
min_examples: Minimum number of examples in a node. Default: 5.
22172265
missing_value_policy: Method used to handle missing attribute values. -
22182266
`GLOBAL_IMPUTATION`: Missing attribute values are imputed, with the mean
@@ -2315,9 +2363,11 @@ class RandomForestModel(core.CoreModel):
23152363
split_axis: What structure of split to consider for numerical features. -
23162364
`AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time). This
23172365
is the "classical" way to train a tree. Default value. - `SPARSE_OBLIQUE`:
2318-
Sparse oblique splits (i.e. splits one a small number of features) from
2319-
"Sparse Projection Oblique Random Forests", Tomita et al., 2020. Default:
2320-
"AXIS_ALIGNED".
2366+
Sparse oblique splits (i.e. random splits one a small number of features)
2367+
from "Sparse Projection Oblique Random Forests", Tomita et al., 2020. -
2368+
`MHLD_OBLIQUE`: Multi-class Hellinger Linear Discriminant splits from
2369+
"Classification Based on Multivariate Contrast Patterns", Canete-Sifuentes
2370+
et al., 2029 Default: "AXIS_ALIGNED".
23212371
uplift_min_examples_in_treatment: For uplift models only. Minimum number of
23222372
examples per treatment in a node. Default: 5.
23232373
uplift_split_score: For uplift models only. Splitter score i.e. score
@@ -2380,6 +2430,8 @@ def __init__(
23802430
max_num_nodes: Optional[int] = None,
23812431
maximum_model_size_in_memory_in_bytes: Optional[float] = -1.0,
23822432
maximum_training_duration_seconds: Optional[float] = -1.0,
2433+
mhld_oblique_max_num_attributes: Optional[int] = None,
2434+
mhld_oblique_sample_attributes: Optional[bool] = None,
23832435
min_examples: Optional[int] = 5,
23842436
missing_value_policy: Optional[str] = "GLOBAL_IMPUTATION",
23852437
num_candidate_attributes: Optional[int] = 0,
@@ -2433,6 +2485,8 @@ def __init__(
24332485
maximum_model_size_in_memory_in_bytes
24342486
),
24352487
"maximum_training_duration_seconds": maximum_training_duration_seconds,
2488+
"mhld_oblique_max_num_attributes": mhld_oblique_max_num_attributes,
2489+
"mhld_oblique_sample_attributes": mhld_oblique_sample_attributes,
24362490
"min_examples": min_examples,
24372491
"missing_value_policy": missing_value_policy,
24382492
"num_candidate_attributes": num_candidate_attributes,
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#!/bin/bash
2+
# Copyright 2021 Google LLC.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# https://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
17+
18+
# Converts a non-submitted CL to a standalone Bazel project in a local
19+
# directory, compile the project and run the tests.
20+
#
21+
# Usage example:
22+
# third_party/tensorflow_decision_forests/tools/run_e2e_tfdf_test.sh
23+
24+
set -vex
25+
26+
LOCAL_DIR="/usr/local/google/home/${USER}/git/decision-forests"
27+
28+
CL=$(hg exportedcl)
29+
echo "Current CL: ${CL}"
30+
echo "Make sure the CL is synced!"
31+
32+
function export_project() {
33+
COPYBARA="/google/bin/releases/copybara/public/copybara/copybara"
34+
35+
# Test the copy bara configuration.
36+
bazel test third_party/tensorflow_decision_forests:copybara_test
37+
38+
echo "Export a Bazel project locally"
39+
echo "=============================="
40+
41+
rm -fr ${LOCAL_DIR}
42+
${COPYBARA} third_party/tensorflow_decision_forests/copy.bara.sky presubmit_piper_to_gerrit ${CL} \
43+
--dry-run --init-history --squash --force \
44+
--git-destination-path ${LOCAL_DIR} --ignore-noop
45+
46+
/google/bin/releases/opensource/thirdparty/cross/cross ${LOCAL_DIR}
47+
}
48+
49+
echo "Test the project"
50+
echo "================"
51+
52+
run_all() {
53+
cd ${LOCAL_DIR}
54+
55+
# Start the Docker
56+
sudo ./tools/start_compile_docker.sh /bin/bash
57+
58+
# In the docker, you can now trigger the builder with the following line in
59+
# the docker:
60+
# RUN_TESTS=1 PY_VERSION=3.9 TF_VERSION=2.16.1 ./tools/test_bazel.sh
61+
62+
# Alternatively, you can trigger the build directly with:
63+
# sudo ./tools/start_compile_docker.sh "RUN_TESTS=1 PY_VERSION=3.8 TF_VERSION=2.10.0 ./tools/test_bazel.sh && chmod -R a+xrw . && /bin/bash"
64+
}
65+
66+
export_project
67+
run_all

tools/build_pip_package.sh

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -116,15 +116,16 @@ function assemble_files() {
116116
# Distribution server binaries
117117
cp ${SRCBIN}/keras/grpc_worker_main ${SRCPK}/tensorflow_decision_forests/keras/
118118

119-
# YDF's proto wrappers.
120-
YDFSRCBIN="bazel-bin/external/ydf/yggdrasil_decision_forests"
121-
mkdir -p ${SRCPK}/yggdrasil_decision_forests
122-
pushd ${YDFSRCBIN}
123-
find . -name \*.py -exec rsync -R -arv {} ${SRCPK}/yggdrasil_decision_forests \;
124-
popd
125-
126-
# Add __init__.py to all exported Yggdrasil sub-directories.
127-
find ${SRCPK}/yggdrasil_decision_forests -type d -exec touch {}/__init__.py \;
119+
# Note: Starting with TF-DF 0.9.1, the YDF Protos are included by (P)YDF.
120+
# TODO: Remove this block.
121+
# # YDF's proto wrappers.
122+
# YDFSRCBIN="bazel-bin/external/ydf/yggdrasil_decision_forests"
123+
# mkdir -p ${SRCPK}/yggdrasil_decision_forests
124+
# pushd ${YDFSRCBIN}
125+
# find . -name \*.py -exec rsync -R -arv {} ${SRCPK}/yggdrasil_decision_forests \;
126+
# popd
127+
# # Add __init__.py to all exported Yggdrasil sub-directories.
128+
# find ${SRCPK}/yggdrasil_decision_forests -type d -exec touch {}/__init__.py \;
128129
}
129130

130131
# Build a pip package.

tools/start_compile_docker.sh

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,23 @@
5858
# ./tools/build_pip_package.sh ALL_VERSIONS_ALREADY_ASSEMBLED
5959
#
6060
# https://hub.docker.com/r/tensorflow/build/tags?page=1
61-
DOCKER=tensorflow/build:2.17-python3.9
6261

6362
# Current directory
6463
# Useful if Yggdrasil Decision Forests is available locally in a neighbor
6564
# directory.
6665
TFDF_DIRNAME=${PWD##*/}
6766

68-
# Download docker
69-
docker pull ${DOCKER}
67+
DOCKER_IMAGE=tensorflow/build:2.16-python3.9
68+
DOCKER_CONTAINER=compile_tfdf
69+
70+
echo "Available containers:"
71+
sudo sudo docker container ls -a --size
72+
73+
set +e # Ignore error if the container already exist
74+
CREATE_DOCKER_FLAGS="-i -t -p 8889:8889 --network host -v ${PWD}/..:/working_dir -w /working_dir/${TFDF_DIRNAME}"
75+
sudo docker create ${CREATE_DOCKER_FLAGS} --name ${DOCKER_CONTAINER} ${DOCKER_IMAGE}
76+
sudo docker start ${DOCKER_CONTAINER}
77+
set -e
7078

7179
# Start docker
72-
docker run -it -v ${PWD}/..:/working_dir -w /working_dir/${TFDF_DIRNAME} ${DOCKER} $@
80+
sudo docker exec -it ${DOCKER_CONTAINER} /bin/bash -c $@

0 commit comments

Comments
 (0)