secretflow.ml.boost.sgb_v.factory.components#

Classes:

OrderMapManager()

GradientEncryptor()

Manage all encryptions related to y, gradients, hessians

Sampler()

LeafManager()

DataPreprocessor()

ModelBuilder()

Functions related to build models including making predictions and add trees

LossComputer()

Compute loss, gradients and hessians

TreeTrainer()

NodeSelector()

LevelWiseCache()

Shuffler()

BucketSumCalculator()

SplitFinder()

SplitTreeBuilder()

LevelWiseTreeTrainer()

class secretflow.ml.boost.sgb_v.factory.components.OrderMapManager[source]#

Bases: Component

Methods:

__init__()

show_params()

set_params(params)

get_params(params)

set_devices(devices)

build_order_map(x)

get_order_map()

get_feature_buckets()

get_bucket_lists(col_choices_list)

compute_left_child_selects(actor_index, ...)

batch_query_split_points_each_party(queries_list)

batch_compute_left_child_selects_each_party(...)

__init__() None[source]#
show_params()[source]#
set_params(params: Dict)[source]#
get_params(params: dict)[source]#
set_devices(devices: Devices)[source]#
build_order_map(x: FedNdarray)[source]#
get_order_map() FedNdarray[source]#
get_feature_buckets() List[PYUObject][source]#
get_bucket_lists(col_choices_list: List[PYUObject]) List[PYUObject][source]#
compute_left_child_selects(actor_index: int, feature: int, split_point_index: int, sampled_indices: Optional[List[int]] = None) PYUObject[source]#
batch_query_split_points_each_party(queries_list: List[PYUObject]) List[PYUObject][source]#
batch_compute_left_child_selects_each_party(split_feature_buckets_each_party: List[PYUObject], sampled_indices: Optional[List[int]] = None) List[PYUObject][source]#
class secretflow.ml.boost.sgb_v.factory.components.GradientEncryptor[source]#

Bases: Component

Manage all encryptions related to y, gradients, hessians

Methods:

__init__()

show_params()

set_devices(devices)

get_params(params)

set_params(params)

pack(g, h)

encrypt(gh, tree_index)

cache_to_workers(encrypted_gh, gh)

get_move_config(pyu)

__init__()[source]#
show_params()[source]#
set_devices(devices: Devices)[source]#
get_params(params: dict)[source]#
set_params(params: dict)[source]#
pack(g: PYUObject, h: PYUObject) PYUObject[source]#
encrypt(gh: PYUObject, tree_index: int) HEUObject[source]#
cache_to_workers(encrypted_gh: HEUObject, gh: PYUObject) Dict[PYU, Union[HEUObject, PYUObject]][source]#
get_move_config(pyu)[source]#
class secretflow.ml.boost.sgb_v.factory.components.Sampler[source]#

Bases: Component

Methods:

__init__()

show_params()

set_params(params)

get_params(params)

set_devices(_)

generate_col_choices(feature_buckets)

Generate column sample choices.

generate_row_choices(row_num)

apply_vector_sampling(x, indices)

Sample x for a single partition.

apply_v_fed_sampling(X[, row_choices, ...])

Sample X based on row choices and col choices.

__init__()[source]#
show_params()[source]#
set_params(params: dict)[source]#
get_params(params: dict)[source]#
set_devices(_: Devices)[source]#
generate_col_choices(feature_buckets: List[PYUObject]) Tuple[List[PYUObject], List[PYUObject]][source]#

Generate column sample choices.

Parameters:

feature_buckets (List[PYUObject]) – Behind PYUObject is List[int], bucket num for each feature.

Returns:

first list is column choices, second is total number of buckets after sampling

Return type:

Tuple[List[PYUObject], List[PYUObject]]

generate_row_choices(row_num) Union[None, ndarray][source]#
apply_vector_sampling(x: PYUObject, indices: Union[PYUObject, ndarray])[source]#

Sample x for a single partition. Assuming we have a column vector. Assume the indices was generated from row sampling by sampler

apply_v_fed_sampling(X: FedNdarray, row_choices: Union[None, ndarray, PYUObject] = None, col_choices: List[Union[None, ndarray, PYUObject]] = []) FedNdarray[source]#

Sample X based on row choices and col choices. Assume the choices were generated by sampler.

Parameters:
  • X (FedNdarray) – Array to sample from

  • row_choices (Union[None, np.ndarray, PYUObject]) – row sampling choices. devices are assumed to be ordered as X.

  • col_choices (List[Union[None, np.ndarray,PYUObject]) – col sampling choices. devices are assumed to be ordered as X.

Returns:

subsampled X shape (Tuple[int, int]): shape of X_sub

Return type:

X_sub (FedNdarray)

class secretflow.ml.boost.sgb_v.factory.components.LeafManager[source]#

Bases: Component

Methods:

__init__()

show_params()

set_params(params)

get_params(params)

set_devices(devices)

clear_leaves()

extend_leaves(pruned_node_selects, ...)

get_leaf_selects()

get_leaf_indices()

compute_leaf_weights(g, h)

__init__() None[source]#
show_params()[source]#
set_params(params: dict)[source]#
get_params(params: dict)[source]#
set_devices(devices: Devices)[source]#
clear_leaves()[source]#
extend_leaves(pruned_node_selects: List[ndarray], pruned_node_indices: List[int])[source]#
get_leaf_selects()[source]#
get_leaf_indices()[source]#
compute_leaf_weights(g, h)[source]#
class secretflow.ml.boost.sgb_v.factory.components.DataPreprocessor[source]#

Bases: Component

Methods:

__init__()

show_params()

set_params(_)

get_params(_)

set_devices(_)

validate(dataset, label)

__init__() None[source]#
show_params()[source]#
set_params(_)[source]#
get_params(_)[source]#
set_devices(_)[source]#
validate(dataset, label) Tuple[FedNdarray, Tuple[int, int], PYUObject, Tuple[int, int]][source]#
class secretflow.ml.boost.sgb_v.factory.components.ModelBuilder[source]#

Bases: Component

Functions related to build models including making predictions and add trees

Methods:

__init__()

show_params()

set_params(params)

get_params(params)

set_devices(devices)

init_pred(sample_num)

init_model()

insert_tree(tree)

get_tree_num()

finish()

__init__()[source]#
show_params()[source]#
set_params(params: dict)[source]#
get_params(params: dict)[source]#
set_devices(devices: Devices)[source]#
init_pred(sample_num: int) PYUObject[source]#
init_model()[source]#
insert_tree(tree: DistributedTree)[source]#
get_tree_num() int[source]#
finish() SgbModel[source]#
class secretflow.ml.boost.sgb_v.factory.components.LossComputer[source]#

Bases: Component

Compute loss, gradients and hessians

Methods:

__init__()

show_params()

set_params(params)

get_params(params)

set_devices(devices)

compute_gh(y, pred)

__init__() None[source]#
show_params()[source]#
set_params(params: dict)[source]#
get_params(params: dict)[source]#
set_devices(devices: Devices)[source]#
compute_gh(y: Union[PYUObject, ndarray], pred: Union[PYUObject, ndarray]) Tuple[PYUObject, PYUObject][source]#
class secretflow.ml.boost.sgb_v.factory.components.TreeTrainer[source]#

Bases: Composite

Methods:

show_params()

set_params(params)

set_devices(devices)

train_tree(cur_tree_num, order_map_manager, ...)

train on training data

show_params()[source]#
set_params(params: dict)[source]#
set_devices(devices: Devices)[source]#
abstract train_tree(cur_tree_num, order_map_manager, y, pred, x_shape) DistributedTree[source]#

train on training data

class secretflow.ml.boost.sgb_v.factory.components.NodeSelector[source]#

Bases: Component

Methods:

__init__()

show_params()

set_params(_)

get_params(_)

set_devices(devices)

root_select(sample_num)

is_list_empty(any_list)

pick_children_node_ss(node_select_list)

get_child_select(nodes_s, lchild_ss, ...)

compute the next level's sample select indices.

__init__() None[source]#
show_params()[source]#
set_params(_: dict)[source]#
get_params(_: dict)[source]#
set_devices(devices: Devices)[source]#
root_select(sample_num)[source]#
is_list_empty(any_list: Union[PYUObject, List]) PYUObject[source]#
pick_children_node_ss(node_select_list: PYUObject) Tuple[List[PYUObject], List[bool], int][source]#
get_child_select(nodes_s: List[ndarray], lchild_ss: List[ndarray], gain_is_cost_effective: List[bool], split_node_indices: List[int]) Tuple[List[ndarray], List[int], List[ndarray], List[int]][source]#

compute the next level’s sample select indices.

Parameters:
  • nodes_s – List[np.ndarray]. sample select indices of each node from current level’s nodes.

  • lchilds_ss – List[np.ndarray]. left children’s sample selects idx for current level’s nodes. A non-empty single sample select is a np.ndarray with the shape n_samples * 1 and with entries being 0 and 1s. 1 indicates the sample remains in node.

  • gain_is_cost_effective – List[bool]. indicate whether node should be split.

  • split_node_indices – List[int]. node indices at the current level.

Returns:

sample select indices for nodes in next tree level. node indices for the next level sample_selects for pruned nodes node_indices for pruned nodes

class secretflow.ml.boost.sgb_v.factory.components.LevelWiseCache[source]#

Bases: Component

Methods:

__init__()

show_params()

set_params(_)

get_params(_)

set_devices(devices)

reset_level_caches()

collect_level_node_GH(worker, bucket_sums, ...)

get_level_nodes_GH(worker)

update_level_cache(is_last_level, ...)

__init__()[source]#
show_params()[source]#
set_params(_: dict)[source]#
get_params(_: dict)[source]#
set_devices(devices: Devices)[source]#
reset_level_caches()[source]#
collect_level_node_GH(worker, bucket_sums, is_lefts)[source]#
get_level_nodes_GH(worker) List[source]#
update_level_cache(is_last_level, gain_is_cost_effective)[source]#
class secretflow.ml.boost.sgb_v.factory.components.Shuffler[source]#

Bases: Component

Methods:

__init__()

show_params()

set_params(params)

get_params(params)

set_devices(devices)

reset_shuffle_masks()

create_shuffle_mask(worker_index, key, ...)

unshuffle_split_buckets(...)

unshuffle split buckets viewed by each parition

__init__()[source]#
show_params()[source]#
set_params(params: dict)[source]#
get_params(params: dict)[source]#
set_devices(devices: Devices)[source]#
reset_shuffle_masks()[source]#
create_shuffle_mask(worker_index: int, key: int, bucket_list: List[PYUObject]) List[int][source]#
unshuffle_split_buckets(split_buckets_parition_wise: List[PYUObject]) List[PYUObject][source]#

unshuffle split buckets viewed by each parition

Parameters:

split_buckets_parition_wise (List[PYUObject]) – PYUObject is List[int], split buckets viewed from this partition

Returns:

unshuffled split buckets

Return type:

List[List[PYUObject]]

class secretflow.ml.boost.sgb_v.factory.components.BucketSumCalculator[source]#

Bases: Composite

Methods:

__init__()

show_params()

set_params(_)

get_params(_)

set_devices(devices)

calculate_bucket_sum_level_wise(shuffler, ...)

update_level_cache(is_last_level, ...)

__init__()[source]#
show_params()[source]#
set_params(_: dict)[source]#
get_params(_: dict)[source]#
set_devices(devices: Devices)[source]#
calculate_bucket_sum_level_wise(shuffler: Shuffler, encrypted_gh_dict: Dict[PYU, HEUObject], children_split_node_selects: List[PYUObject], is_lefts: List[bool], order_map_sub: FedNdarray, bucket_num: int, bucket_lists: List[PYUObject], gradient_encryptor: GradientEncryptor, node_num: int) Tuple[PYUObject, PYUObject][source]#
update_level_cache(is_last_level, gain_is_cost_effective)[source]#
class secretflow.ml.boost.sgb_v.factory.components.SplitFinder[source]#

Bases: Component

Methods:

__init__()

show_params()

set_params(params)

get_params(params)

set_devices(devices)

find_best_splits(G, H, tree_num, level)

__init__() None[source]#
show_params()[source]#
set_params(params: dict)[source]#
get_params(params: dict)[source]#
set_devices(devices: Devices)[source]#
find_best_splits(G: ndarray, H: ndarray, tree_num: int, level: int) Tuple[ndarray, ndarray][source]#
class secretflow.ml.boost.sgb_v.factory.components.SplitTreeBuilder[source]#

Bases: Component

Methods:

__init__()

show_params()

set_params(_)

get_params(_)

set_devices(devices)

reset()

set_col_choices_and_buckets(col_choices, ...)

split_bucket_to_partition(split_buckets)

map split bucket to position in the partition or -1 if not in partition

get_split_feature_list_wise_each_party(...)

map the unmasked split buckets to feature and split point

do_split_list_wise_each_party(...)

insert split points to split trees

do_split(split_buckets, sampled_rows, ...)

insert_split_trees_into_distributed_tree(...)

__init__() None[source]#
show_params()[source]#
set_params(_)[source]#
get_params(_)[source]#
set_devices(devices: Devices)[source]#
reset()[source]#
set_col_choices_and_buckets(col_choices: List[PYUObject], total_buckets: List[PYUObject], feature_buckets: List[PYUObject])[source]#
split_bucket_to_partition(split_buckets: PYUObject) List[PYUObject][source]#

map split bucket to position in the partition or -1 if not in partition

Parameters:

split_buckets (PYUObject) – PYUObject is in fact a List[int].

Returns:

each PYUObject is in fact a List[int]. split buckets viewed by each party

Return type:

List[PYUObject]

get_split_feature_list_wise_each_party(un_shuffled_split_buckets_each_party: List[PYUObject]) List[PYUObject][source]#

map the unmasked split buckets to feature and split point

Parameters:

split_buckets_each_party (List[PYUObject]) – split buckets viewed by each party. PYUOBject is a list of int. -1 if not here.

Returns:

PYUObject is in fact a List[Union[None, Tuple[int, int]]], None if -1 else (feature_index, bucket_index) for split.

Return type:

List[PYUObject]

do_split_list_wise_each_party(split_features: List[PYUObject], split_points: List[PYUObject], left_child_selects: List[PYUObject], gain_is_cost_effective: List[bool], node_indices: Union[List[int], PYUObject]) List[List[int]][source]#

insert split points to split trees

Parameters:
  • split_features (List[PYUObject]) – party wise. each PYUObject is List[Tuple[int, int]]. len = node indices length.

  • split_points (List[PYUObject]) – : party wise. each PYUObject is List[float]. len = node indices length.

  • left_child_selects (List[PYUObject]) – party wise. each PYUObject is List[np.ndarray]

  • gain_is_cost_effective (List[bool]) – if gain is cost effective

  • node_indices (Union[List[int], PYUObject]) – node indices.

Returns:

left child selects for the new split nodes.

Return type:

left_child_selects

do_split(split_buckets: List[int], sampled_rows: List[int], gain_is_cost_effective: List[bool], node_indices: Union[List[int], PYUObject], shuffler: Shuffler, order_map_manager: OrderMapManager) List[PYUObject][source]#
insert_split_trees_into_distributed_tree(distributed_tree: DistributedTree, leaf_node_indices: PYUObject)[source]#
class secretflow.ml.boost.sgb_v.factory.components.LevelWiseTreeTrainer[source]#

Bases: TreeTrainer

Methods:

__init__()

show_params()

set_params(params)

get_params(params)

set_devices(devices)

train_tree(cur_tree_num, order_map_manager, ...)

train on training data

__init__() None[source]#
show_params()[source]#
set_params(params: dict)[source]#
get_params(params: dict)[source]#
set_devices(devices: Devices)[source]#
train_tree(cur_tree_num, order_map_manager: OrderMapManager, y: PYUObject, pred: Union[PYUObject, ndarray], x_shape: Tuple[int, int]) DistributedTree[source]#

train on training data

secretflow.ml.boost.sgb_v.factory.components.component#

Classes:

Devices(label_holder, workers, heu)

Component()

Composite()

Functions:

print_params(params)

class secretflow.ml.boost.sgb_v.factory.components.component.Devices(label_holder: secretflow.device.device.pyu.PYU, workers: List[secretflow.device.device.pyu.PYU], heu: secretflow.device.device.heu.HEU)[source]#

Bases: object

Attributes:

label_holder

workers

heu

Methods:

__init__(label_holder, workers, heu)

label_holder: PYU#
workers: List[PYU]#
heu: HEU#
__init__(label_holder: PYU, workers: List[PYU], heu: HEU) None#
class secretflow.ml.boost.sgb_v.factory.components.component.Component[source]#

Bases: ABC

Methods:

show_params()

get_params(params)

set_params(params)

set_devices(devices)

abstract show_params()[source]#
abstract get_params(params: dict)[source]#
abstract set_params(params: dict)[source]#
abstract set_devices(devices: Devices)[source]#
class secretflow.ml.boost.sgb_v.factory.components.component.Composite[source]#

Bases: Component

Methods:

__init__()

show_params()

get_params_dict([params])

set_params(params)

set_devices(devices)

__init__() None[source]#
show_params()[source]#
get_params_dict(params: dict = {})[source]#
set_params(params: dict)[source]#
set_devices(devices: Devices)[source]#
secretflow.ml.boost.sgb_v.factory.components.component.print_params(params)[source]#