secretflow.ml.boost.sgb_v.factory.components#

Classes:

OrderMapManager()

GradientEncryptor()

Manage all encryptions related to y, gradients, hessians

Sampler()

LeafManager()

DataPreprocessor()

ModelBuilder()

Functions related to build models including making predictions and add trees

LossComputer()

Compute loss, gradients and hessians

TreeTrainer()

NodeSelector()

LevelWiseCache()

Shuffler()

BucketSumCalculator()

SplitFinder()

SplitTreeBuilder()

LevelWiseTreeTrainer()

class secretflow.ml.boost.sgb_v.factory.components.OrderMapManager[源代码]#

基类:Component

Methods:

__init__()

show_params()

set_params(params)

get_params(params)

set_devices(devices)

build_order_map(x)

get_order_map()

get_feature_buckets()

get_bucket_lists(col_choices_list)

compute_left_child_selects(actor_index, ...)

batch_query_split_points_each_party(queries_list)

batch_compute_left_child_selects_each_party(...)

__init__() None[源代码]#
show_params()[源代码]#
set_params(params: Dict)[源代码]#
get_params(params: dict)[源代码]#
set_devices(devices: Devices)[源代码]#
build_order_map(x: FedNdarray)[源代码]#
get_order_map() FedNdarray[源代码]#
get_feature_buckets() List[PYUObject][源代码]#
get_bucket_lists(col_choices_list: List[PYUObject]) List[PYUObject][源代码]#
compute_left_child_selects(actor_index: int, feature: int, split_point_index: int, sampled_indices: Optional[List[int]] = None) PYUObject[源代码]#
batch_query_split_points_each_party(queries_list: List[PYUObject]) List[PYUObject][源代码]#
batch_compute_left_child_selects_each_party(split_feature_buckets_each_party: List[PYUObject], sampled_indices: Optional[List[int]] = None) List[PYUObject][源代码]#
class secretflow.ml.boost.sgb_v.factory.components.GradientEncryptor[源代码]#

基类:Component

Manage all encryptions related to y, gradients, hessians

Methods:

__init__()

show_params()

set_devices(devices)

get_params(params)

set_params(params)

pack(g, h)

encrypt(gh, tree_index)

cache_to_workers(encrypted_gh, gh)

get_move_config(pyu)

__init__()[源代码]#
show_params()[源代码]#
set_devices(devices: Devices)[源代码]#
get_params(params: dict)[源代码]#
set_params(params: dict)[源代码]#
pack(g: PYUObject, h: PYUObject) PYUObject[源代码]#
encrypt(gh: PYUObject, tree_index: int) HEUObject[源代码]#
cache_to_workers(encrypted_gh: HEUObject, gh: PYUObject) Dict[PYU, Union[HEUObject, PYUObject]][源代码]#
get_move_config(pyu)[源代码]#
class secretflow.ml.boost.sgb_v.factory.components.Sampler[源代码]#

基类:Component

Methods:

__init__()

show_params()

set_params(params)

get_params(params)

set_devices(_)

generate_col_choices(feature_buckets)

Generate column sample choices.

generate_row_choices(row_num)

apply_vector_sampling(x, indices)

Sample x for a single partition.

apply_v_fed_sampling(X[, row_choices, ...])

Sample X based on row choices and col choices.

__init__()[源代码]#
show_params()[源代码]#
set_params(params: dict)[源代码]#
get_params(params: dict)[源代码]#
set_devices(_: Devices)[源代码]#
generate_col_choices(feature_buckets: List[PYUObject]) Tuple[List[PYUObject], List[PYUObject]][源代码]#

Generate column sample choices.

参数:

feature_buckets (List[PYUObject]) – Behind PYUObject is List[int], bucket num for each feature.

返回:

first list is column choices, second is total number of buckets after sampling

返回类型:

Tuple[List[PYUObject], List[PYUObject]]

generate_row_choices(row_num) Union[None, ndarray][源代码]#
apply_vector_sampling(x: PYUObject, indices: Union[PYUObject, ndarray])[源代码]#

Sample x for a single partition. Assuming we have a column vector. Assume the indices was generated from row sampling by sampler

apply_v_fed_sampling(X: FedNdarray, row_choices: Union[None, ndarray, PYUObject] = None, col_choices: List[Union[None, ndarray, PYUObject]] = []) FedNdarray[源代码]#

Sample X based on row choices and col choices. Assume the choices were generated by sampler.

参数:
  • X (FedNdarray) – Array to sample from

  • row_choices (Union[None, np.ndarray, PYUObject]) – row sampling choices. devices are assumed to be ordered as X.

  • col_choices (List[Union[None, np.ndarray,PYUObject]) – col sampling choices. devices are assumed to be ordered as X.

返回:

subsampled X shape (Tuple[int, int]): shape of X_sub

返回类型:

X_sub (FedNdarray)

class secretflow.ml.boost.sgb_v.factory.components.LeafManager[源代码]#

基类:Component

Methods:

__init__()

show_params()

set_params(params)

get_params(params)

set_devices(devices)

clear_leaves()

extend_leaves(pruned_node_selects, ...)

get_leaf_selects()

get_leaf_indices()

compute_leaf_weights(g, h)

__init__() None[源代码]#
show_params()[源代码]#
set_params(params: dict)[源代码]#
get_params(params: dict)[源代码]#
set_devices(devices: Devices)[源代码]#
clear_leaves()[源代码]#
extend_leaves(pruned_node_selects: List[ndarray], pruned_node_indices: List[int])[源代码]#
get_leaf_selects()[源代码]#
get_leaf_indices()[源代码]#
compute_leaf_weights(g, h)[源代码]#
class secretflow.ml.boost.sgb_v.factory.components.DataPreprocessor[源代码]#

基类:Component

Methods:

__init__()

show_params()

set_params(_)

get_params(_)

set_devices(_)

validate(dataset, label)

__init__() None[源代码]#
show_params()[源代码]#
set_params(_)[源代码]#
get_params(_)[源代码]#
set_devices(_)[源代码]#
validate(dataset, label) Tuple[FedNdarray, Tuple[int, int], PYUObject, Tuple[int, int]][源代码]#
class secretflow.ml.boost.sgb_v.factory.components.ModelBuilder[源代码]#

基类:Component

Functions related to build models including making predictions and add trees

Methods:

__init__()

show_params()

set_params(params)

get_params(params)

set_devices(devices)

init_pred(sample_num)

init_model()

insert_tree(tree)

get_tree_num()

finish()

__init__()[源代码]#
show_params()[源代码]#
set_params(params: dict)[源代码]#
get_params(params: dict)[源代码]#
set_devices(devices: Devices)[源代码]#
init_pred(sample_num: int) PYUObject[源代码]#
init_model()[源代码]#
insert_tree(tree: DistributedTree)[源代码]#
get_tree_num() int[源代码]#
finish() SgbModel[源代码]#
class secretflow.ml.boost.sgb_v.factory.components.LossComputer[源代码]#

基类:Component

Compute loss, gradients and hessians

Methods:

__init__()

show_params()

set_params(params)

get_params(params)

set_devices(devices)

compute_gh(y, pred)

__init__() None[源代码]#
show_params()[源代码]#
set_params(params: dict)[源代码]#
get_params(params: dict)[源代码]#
set_devices(devices: Devices)[源代码]#
compute_gh(y: Union[PYUObject, ndarray], pred: Union[PYUObject, ndarray]) Tuple[PYUObject, PYUObject][源代码]#
class secretflow.ml.boost.sgb_v.factory.components.TreeTrainer[源代码]#

基类:Composite

Methods:

show_params()

set_params(params)

set_devices(devices)

train_tree(cur_tree_num, order_map_manager, ...)

train on training data

show_params()[源代码]#
set_params(params: dict)[源代码]#
set_devices(devices: Devices)[源代码]#
abstract train_tree(cur_tree_num, order_map_manager, y, pred, x_shape) DistributedTree[源代码]#

train on training data

class secretflow.ml.boost.sgb_v.factory.components.NodeSelector[源代码]#

基类:Component

Methods:

__init__()

show_params()

set_params(_)

get_params(_)

set_devices(devices)

root_select(sample_num)

is_list_empty(any_list)

pick_children_node_ss(node_select_list)

get_child_select(nodes_s, lchild_ss, ...)

compute the next level's sample select indices.

__init__() None[源代码]#
show_params()[源代码]#
set_params(_: dict)[源代码]#
get_params(_: dict)[源代码]#
set_devices(devices: Devices)[源代码]#
root_select(sample_num)[源代码]#
is_list_empty(any_list: Union[PYUObject, List]) PYUObject[源代码]#
pick_children_node_ss(node_select_list: PYUObject) Tuple[List[PYUObject], List[bool], int][源代码]#
get_child_select(nodes_s: List[ndarray], lchild_ss: List[ndarray], gain_is_cost_effective: List[bool], split_node_indices: List[int]) Tuple[List[ndarray], List[int], List[ndarray], List[int]][源代码]#

compute the next level’s sample select indices.

参数:
  • nodes_s – List[np.ndarray]. sample select indices of each node from current level’s nodes.

  • lchilds_ss – List[np.ndarray]. left children’s sample selects idx for current level’s nodes. A non-empty single sample select is a np.ndarray with the shape n_samples * 1 and with entries being 0 and 1s. 1 indicates the sample remains in node.

  • gain_is_cost_effective – List[bool]. indicate whether node should be split.

  • split_node_indices – List[int]. node indices at the current level.

返回:

sample select indices for nodes in next tree level. node indices for the next level sample_selects for pruned nodes node_indices for pruned nodes

class secretflow.ml.boost.sgb_v.factory.components.LevelWiseCache[源代码]#

基类:Component

Methods:

__init__()

show_params()

set_params(_)

get_params(_)

set_devices(devices)

reset_level_caches()

collect_level_node_GH(worker, bucket_sums, ...)

get_level_nodes_GH(worker)

update_level_cache(is_last_level, ...)

__init__()[源代码]#
show_params()[源代码]#
set_params(_: dict)[源代码]#
get_params(_: dict)[源代码]#
set_devices(devices: Devices)[源代码]#
reset_level_caches()[源代码]#
collect_level_node_GH(worker, bucket_sums, is_lefts)[源代码]#
get_level_nodes_GH(worker) List[源代码]#
update_level_cache(is_last_level, gain_is_cost_effective)[源代码]#
class secretflow.ml.boost.sgb_v.factory.components.Shuffler[源代码]#

基类:Component

Methods:

__init__()

show_params()

set_params(params)

get_params(params)

set_devices(devices)

reset_shuffle_masks()

create_shuffle_mask(worker_index, key, ...)

unshuffle_split_buckets(...)

unshuffle split buckets viewed by each parition

__init__()[源代码]#
show_params()[源代码]#
set_params(params: dict)[源代码]#
get_params(params: dict)[源代码]#
set_devices(devices: Devices)[源代码]#
reset_shuffle_masks()[源代码]#
create_shuffle_mask(worker_index: int, key: int, bucket_list: List[PYUObject]) List[int][源代码]#
unshuffle_split_buckets(split_buckets_parition_wise: List[PYUObject]) List[PYUObject][源代码]#

unshuffle split buckets viewed by each parition

参数:

split_buckets_parition_wise (List[PYUObject]) – PYUObject is List[int], split buckets viewed from this partition

返回:

unshuffled split buckets

返回类型:

List[List[PYUObject]]

class secretflow.ml.boost.sgb_v.factory.components.BucketSumCalculator[源代码]#

基类:Composite

Methods:

__init__()

show_params()

set_params(_)

get_params(_)

set_devices(devices)

calculate_bucket_sum_level_wise(shuffler, ...)

update_level_cache(is_last_level, ...)

__init__()[源代码]#
show_params()[源代码]#
set_params(_: dict)[源代码]#
get_params(_: dict)[源代码]#
set_devices(devices: Devices)[源代码]#
calculate_bucket_sum_level_wise(shuffler: Shuffler, encrypted_gh_dict: Dict[PYU, HEUObject], children_split_node_selects: List[PYUObject], is_lefts: List[bool], order_map_sub: FedNdarray, bucket_num: int, bucket_lists: List[PYUObject], gradient_encryptor: GradientEncryptor, node_num: int) Tuple[PYUObject, PYUObject][源代码]#
update_level_cache(is_last_level, gain_is_cost_effective)[源代码]#
class secretflow.ml.boost.sgb_v.factory.components.SplitFinder[源代码]#

基类:Component

Methods:

__init__()

show_params()

set_params(params)

get_params(params)

set_devices(devices)

find_best_splits(G, H, tree_num, level)

__init__() None[源代码]#
show_params()[源代码]#
set_params(params: dict)[源代码]#
get_params(params: dict)[源代码]#
set_devices(devices: Devices)[源代码]#
find_best_splits(G: ndarray, H: ndarray, tree_num: int, level: int) Tuple[ndarray, ndarray][源代码]#
class secretflow.ml.boost.sgb_v.factory.components.SplitTreeBuilder[源代码]#

基类:Component

Methods:

__init__()

show_params()

set_params(_)

get_params(_)

set_devices(devices)

reset()

set_col_choices_and_buckets(col_choices, ...)

split_bucket_to_partition(split_buckets)

map split bucket to position in the partition or -1 if not in partition

get_split_feature_list_wise_each_party(...)

map the unmasked split buckets to feature and split point

do_split_list_wise_each_party(...)

insert split points to split trees

do_split(split_buckets, sampled_rows, ...)

insert_split_trees_into_distributed_tree(...)

__init__() None[源代码]#
show_params()[源代码]#
set_params(_)[源代码]#
get_params(_)[源代码]#
set_devices(devices: Devices)[源代码]#
reset()[源代码]#
set_col_choices_and_buckets(col_choices: List[PYUObject], total_buckets: List[PYUObject], feature_buckets: List[PYUObject])[源代码]#
split_bucket_to_partition(split_buckets: PYUObject) List[PYUObject][源代码]#

map split bucket to position in the partition or -1 if not in partition

参数:

split_buckets (PYUObject) – PYUObject is in fact a List[int].

返回:

each PYUObject is in fact a List[int]. split buckets viewed by each party

返回类型:

List[PYUObject]

get_split_feature_list_wise_each_party(un_shuffled_split_buckets_each_party: List[PYUObject]) List[PYUObject][源代码]#

map the unmasked split buckets to feature and split point

参数:

split_buckets_each_party (List[PYUObject]) – split buckets viewed by each party. PYUOBject is a list of int. -1 if not here.

返回:

PYUObject is in fact a List[Union[None, Tuple[int, int]]], None if -1 else (feature_index, bucket_index) for split.

返回类型:

List[PYUObject]

do_split_list_wise_each_party(split_features: List[PYUObject], split_points: List[PYUObject], left_child_selects: List[PYUObject], gain_is_cost_effective: List[bool], node_indices: Union[List[int], PYUObject]) List[List[int]][源代码]#

insert split points to split trees

参数:
  • split_features (List[PYUObject]) – party wise. each PYUObject is List[Tuple[int, int]]. len = node indices length.

  • split_points (List[PYUObject]) – : party wise. each PYUObject is List[float]. len = node indices length.

  • left_child_selects (List[PYUObject]) – party wise. each PYUObject is List[np.ndarray]

  • gain_is_cost_effective (List[bool]) – if gain is cost effective

  • node_indices (Union[List[int], PYUObject]) – node indices.

返回:

left child selects for the new split nodes.

返回类型:

left_child_selects

do_split(split_buckets: List[int], sampled_rows: List[int], gain_is_cost_effective: List[bool], node_indices: Union[List[int], PYUObject], shuffler: Shuffler, order_map_manager: OrderMapManager) List[PYUObject][源代码]#
insert_split_trees_into_distributed_tree(distributed_tree: DistributedTree, leaf_node_indices: PYUObject)[源代码]#
class secretflow.ml.boost.sgb_v.factory.components.LevelWiseTreeTrainer[源代码]#

基类:TreeTrainer

Methods:

__init__()

show_params()

set_params(params)

get_params(params)

set_devices(devices)

train_tree(cur_tree_num, order_map_manager, ...)

train on training data

__init__() None[源代码]#
show_params()[源代码]#
set_params(params: dict)[源代码]#
get_params(params: dict)[源代码]#
set_devices(devices: Devices)[源代码]#
train_tree(cur_tree_num, order_map_manager: OrderMapManager, y: PYUObject, pred: Union[PYUObject, ndarray], x_shape: Tuple[int, int]) DistributedTree[源代码]#

train on training data

secretflow.ml.boost.sgb_v.factory.components.component#

Classes:

Devices(label_holder, workers, heu)

Component()

Composite()

Functions:

print_params(params)

class secretflow.ml.boost.sgb_v.factory.components.component.Devices(label_holder: secretflow.device.device.pyu.PYU, workers: List[secretflow.device.device.pyu.PYU], heu: secretflow.device.device.heu.HEU)[源代码]#

基类:object

Attributes:

label_holder

workers

heu

Methods:

__init__(label_holder, workers, heu)

label_holder: PYU#
workers: List[PYU]#
heu: HEU#
__init__(label_holder: PYU, workers: List[PYU], heu: HEU) None#
class secretflow.ml.boost.sgb_v.factory.components.component.Component[源代码]#

基类:ABC

Methods:

show_params()

get_params(params)

set_params(params)

set_devices(devices)

abstract show_params()[源代码]#
abstract get_params(params: dict)[源代码]#
abstract set_params(params: dict)[源代码]#
abstract set_devices(devices: Devices)[源代码]#
class secretflow.ml.boost.sgb_v.factory.components.component.Composite[源代码]#

基类:Component

Methods:

__init__()

show_params()

get_params_dict([params])

set_params(params)

set_devices(devices)

__init__() None[源代码]#
show_params()[源代码]#
get_params_dict(params: dict = {})[源代码]#
set_params(params: dict)[源代码]#
set_devices(devices: Devices)[源代码]#
secretflow.ml.boost.sgb_v.factory.components.component.print_params(params)[源代码]#