Index A | B | C | D | E | F | G | I | L | M | N | O | P | R | S | T | V | W A abbr_match() (in module emm.features.features_name) (in module emm.preprocessing.abbreviation_util) abbreviate() (emm.data.noiser.Noiser method) abbreviations_to_words() (in module emm.preprocessing.abbreviation_util) abs_len_diff() (in module emm.features.features_name) AbstractPreprocessor (class in emm.preprocessing.base_name_preprocessor) add_aggregation_layer() (emm.PandasEntityMatching method) (emm.pipeline.pandas_entity_matching.PandasEntityMatching method) (emm.pipeline.PandasEntityMatching method) add_supervised_model() (emm.PandasEntityMatching method) (emm.pipeline.pandas_entity_matching.PandasEntityMatching method) (emm.pipeline.PandasEntityMatching method) add_uid_column() (in module emm.helper.spark_utils) as_matrix() (in module emm.indexing.spark_indexing_utils) as_uri() (emm.helper.custom_path.CustomPath method) auto_repartitioning() (in module emm.helper.spark_utils) B BaseEntityAggregation (class in emm.aggregation.base_entity_aggregation) BaseEntityMatching (class in emm.pipeline.base_entity_matching) BaseFeatureExtractor (class in emm.features.base_feature_extractor) BaseIndexer (class in emm.indexing.base_indexer) BaseSupervisedModel (class in emm.supervised_model.base_supervised_model) C calc_diff_features() (in module emm.features.features_rank) calc_extra_features() (in module emm.features.features_extra) calc_features() (emm.supervised_model.pandas_supervised_model.PandasSupervisedLayerTransformer method) (emm.supervised_model.PandasSupervisedLayerTransformer method) calc_features_from_sm() (in module emm.supervised_model.base_supervised_model) calc_lef_features() (in module emm.features.features_lef) calc_name_features() (in module emm.features.features_name) calc_rank_features() (in module emm.features.features_rank) calc_score() (emm.indexing.pandas_cos_sim_matcher.PandasCosSimIndexer method) (emm.indexing.pandas_sni.PandasSortedNeighbourhoodIndexer method) (emm.indexing.PandasCosSimIndexer method) (emm.indexing.PandasSortedNeighbourhoodIndexer method) (emm.supervised_model.pandas_supervised_model.PandasSupervisedLayerTransformer method) (emm.supervised_model.PandasSupervisedLayerTransformer method) calc_threshold() (emm.pipeline.base_entity_matching.BaseEntityMatching method) change_letter() (emm.data.noiser.Noiser method) change_word() (emm.data.noiser.Noiser method) check_uid() (in module emm.helper.spark_utils) collect_matrix() (in module emm.indexing.spark_indexing_utils) column_prefix() (emm.indexing.pandas_cos_sim_matcher.PandasCosSimIndexer method) (emm.indexing.pandas_sni.PandasSortedNeighbourhoodIndexer method) (emm.indexing.PandasCosSimIndexer method) (emm.indexing.PandasSortedNeighbourhoodIndexer method) common_words (emm.features.features_vocabulary.Vocabulary attribute) compute_vocabulary_features() (in module emm.features.features_vocabulary) CosSimBaseIndexer (class in emm.indexing.base_indexer) create_example_noised_names() (in module emm.data.create_data) create_func_dict() (emm.preprocessing.base_name_preprocessor.AbstractPreprocessor method) (emm.preprocessing.pandas_preprocessor.PandasPreprocessor method) (emm.preprocessing.PandasPreprocessor method) (in module emm.preprocessing.functions) create_new_model_pipeline() (in module emm.supervised_model.base_supervised_model) create_noised_data() (in module emm.data.create_data) create_noiser() (in module emm.data.noiser) create_positive_negative_samples() (in module emm.data.negative_data_creation) create_training_data() (in module emm.data) (in module emm.data.create_data) create_training_name_pairs() (emm.PandasEntityMatching method) (emm.pipeline.pandas_entity_matching.PandasEntityMatching method) (emm.pipeline.PandasEntityMatching method) create_vocabulary() (in module emm.features.features_vocabulary) curry() (in module emm.indexing.spark_indexing_utils) custom_basename_and_lef() (in module emm.features.features_lef) CustomPath (class in emm.helper.custom_path) cut_word() (emm.data.noiser.Noiser method) D data() (in module emm.resources) decide_threshold() (in module emm.threshold.threshold_decision) decrease_window_by_one_step() (emm.indexing.base_indexer.BaseIndexer method) (emm.indexing.base_indexer.CosSimBaseIndexer method) (emm.indexing.base_indexer.SNBaseIndexer method) (emm.indexing.pandas_candidate_selection.PandasCandidateSelectionTransformer method) (emm.PandasEntityMatching method) (emm.pipeline.pandas_entity_matching.PandasEntityMatching method) (emm.pipeline.PandasEntityMatching method) diff_to_next() (in module emm.features.features_rank) diff_to_prev() (in module emm.features.features_rank) difference() (emm.loggers.Timer method) (emm.loggers.timer.Timer method) dist_to_max() (in module emm.features.features_rank) dist_to_min() (in module emm.features.features_rank) dot_product() (in module emm.indexing.spark_indexing_utils) down_casting_int() (in module emm.indexing.spark_indexing_utils) drop_letter() (emm.data.noiser.Noiser method) drop_word() (emm.data.noiser.Noiser method) E emm module emm.aggregation module emm.aggregation.base_entity_aggregation module emm.aggregation.pandas_entity_aggregation module emm.base module emm.base.module module emm.base.pipeline module emm.data module emm.data.create_data module emm.data.negative_data_creation module emm.data.noiser module emm.data.prepare_name_pairs module emm.features module emm.features.base_feature_extractor module emm.features.features_extra module emm.features.features_lef module emm.features.features_name module emm.features.features_rank module emm.features.features_vocabulary module emm.features.pandas_feature_extractor module emm.helper module emm.helper.blocking_functions module emm.helper.custom_path module emm.helper.io module emm.helper.sklearn_pipeline module emm.helper.spark_utils module emm.helper.util module emm.indexing module emm.indexing.base_indexer module emm.indexing.pandas_candidate_selection module emm.indexing.pandas_cos_sim_matcher module emm.indexing.pandas_naive_indexer module emm.indexing.pandas_normalized_tfidf module emm.indexing.pandas_sni module emm.indexing.spark_indexing_utils module emm.loggers module emm.loggers.logger module emm.loggers.timer module emm.parameters module emm.pipeline module emm.pipeline.base_entity_matching module emm.pipeline.pandas_entity_matching module emm.preprocessing module emm.preprocessing.abbreviation_util module emm.preprocessing.base_name_preprocessor module emm.preprocessing.functions module emm.preprocessing.pandas_functions module emm.preprocessing.pandas_preprocessor module emm.resources module emm.supervised_model module emm.supervised_model.base_supervised_model module emm.supervised_model.pandas_supervised_model module emm.threshold module emm.threshold.threshold_decision module emm.version module end() (emm.loggers.Timer method) (emm.loggers.timer.Timer method) explode_candidates() (in module emm.indexing.spark_indexing_utils) extract_abbr_merged_initials() (in module emm.features.features_name) (in module emm.preprocessing.abbreviation_util) extract_abbr_merged_word_pieces() (in module emm.features.features_name) (in module emm.preprocessing.abbreviation_util) extract_lef() (in module emm.features.features_lef) F feat_ptp() (in module emm.features.features_rank) features_schema_from_sm() (in module emm.supervised_model.base_supervised_model) find_abbr_merged_initials() (in module emm.features.features_name) (in module emm.preprocessing.abbreviation_util) find_abbr_merged_word_pieces() (in module emm.features.features_name) (in module emm.preprocessing.abbreviation_util) first() (in module emm.helper.blocking_functions) first2() (in module emm.helper.blocking_functions) first3() (in module emm.helper.blocking_functions) fit() (emm.aggregation.pandas_entity_aggregation.PandasEntityAggregation method) (emm.aggregation.PandasEntityAggregation method) (emm.features.pandas_feature_extractor.PandasFeatureExtractor method) (emm.features.PandasFeatureExtractor method) (emm.indexing.pandas_candidate_selection.PandasCandidateSelectionTransformer method) (emm.indexing.pandas_cos_sim_matcher.PandasCosSimIndexer method) (emm.indexing.pandas_naive_indexer.PandasNaiveIndexer method) (emm.indexing.pandas_normalized_tfidf.PandasNormalizedTfidfVectorizer method) (emm.indexing.pandas_sni.PandasSortedNeighbourhoodIndexer method) (emm.indexing.PandasCosSimIndexer method) (emm.indexing.PandasNaiveIndexer method) (emm.indexing.PandasSortedNeighbourhoodIndexer method) (emm.PandasEntityMatching method) (emm.pipeline.pandas_entity_matching.PandasEntityMatching method) (emm.pipeline.PandasEntityMatching method) (emm.preprocessing.pandas_preprocessor.PandasPreprocessor method) (emm.preprocessing.PandasPreprocessor method) (emm.supervised_model.pandas_supervised_model.PandasSupervisedLayerTransformer method) (emm.supervised_model.PandasSupervisedLayerTransformer method) fit_classifier() (emm.PandasEntityMatching method) (emm.pipeline.pandas_entity_matching.PandasEntityMatching method) (emm.pipeline.PandasEntityMatching method) fit_transform() (emm.aggregation.pandas_entity_aggregation.PandasEntityAggregation method) (emm.aggregation.PandasEntityAggregation method) (emm.indexing.pandas_candidate_selection.PandasCandidateSelectionTransformer method) (emm.indexing.pandas_normalized_tfidf.PandasNormalizedTfidfVectorizer method) (emm.preprocessing.pandas_preprocessor.PandasPreprocessor method) (emm.preprocessing.PandasPreprocessor method) (emm.supervised_model.pandas_supervised_model.PandasSupervisedLayerTransformer method) (emm.supervised_model.PandasSupervisedLayerTransformer method) flatten_df() (in module emm.indexing.spark_indexing_utils) format_values() (in module emm.loggers.timer) G get_business_type() (in module emm.features.features_lef) get_group() (emm.aggregation.base_entity_aggregation.BaseEntityAggregation method) get_gt_group() (emm.aggregation.base_entity_aggregation.BaseEntityAggregation method) get_model_title() (emm.pipeline.base_entity_matching.BaseEntityMatching method) (in module emm.helper.util) get_threshold_agg_name() (emm.pipeline.base_entity_matching.BaseEntityMatching static method) get_threshold_curves_parameters() (in module emm.threshold.threshold_decision) group_by_uid() (in module emm.features.features_rank) groupby() (in module emm.helper.util) I increase_window_by_one_step() (emm.indexing.base_indexer.BaseIndexer method) (emm.indexing.base_indexer.CosSimBaseIndexer method) (emm.indexing.base_indexer.SNBaseIndexer method) (emm.indexing.pandas_candidate_selection.PandasCandidateSelectionTransformer method) (emm.PandasEntityMatching method) (emm.pipeline.pandas_entity_matching.PandasEntityMatching method) (emm.pipeline.PandasEntityMatching method) indexers_set_values() (in module emm.helper.util) initialize() (emm.PandasEntityMatching method) (emm.pipeline.pandas_entity_matching.PandasEntityMatching method) (emm.pipeline.PandasEntityMatching method) insert_letter() (emm.data.noiser.Noiser method) insert_word() (emm.data.noiser.Noiser method) IOFunc (class in emm.helper.io) is_local (emm.helper.custom_path.CustomPath property) is_series_unique() (in module emm.aggregation.base_entity_aggregation) L label() (emm.loggers.Timer method) (emm.loggers.timer.Timer method) legal_abbreviations_to_words() (in module emm.preprocessing.abbreviation_util) len_ratio() (in module emm.features.features_name) load() (emm.PandasEntityMatching static method) (emm.pipeline.pandas_entity_matching.PandasEntityMatching static method) (emm.pipeline.PandasEntityMatching static method) load_joblib() (in module emm.helper.io) load_pickle() (in module emm.helper.io) log_param() (emm.loggers.Timer method) (emm.loggers.timer.Timer method) log_params() (emm.loggers.Timer method) (emm.loggers.timer.Timer method) logical_repartitioning() (in module emm.helper.spark_utils) logSchema() (in module emm.loggers.logger) logShow() (in module emm.loggers.logger) lower() (in module emm.preprocessing.pandas_functions) M make_combi() (in module emm.features.features_lef) matching_legal_terms() (in module emm.features.features_lef) matching_max_candidate() (in module emm.aggregation.base_entity_aggregation) merge_indexers() (in module emm.data.negative_data_creation) merge_words() (emm.data.noiser.Noiser method) module emm emm.aggregation emm.aggregation.base_entity_aggregation emm.aggregation.pandas_entity_aggregation emm.base emm.base.module emm.base.pipeline emm.data emm.data.create_data emm.data.negative_data_creation emm.data.noiser emm.data.prepare_name_pairs emm.features emm.features.base_feature_extractor emm.features.features_extra emm.features.features_lef emm.features.features_name emm.features.features_rank emm.features.features_vocabulary emm.features.pandas_feature_extractor emm.helper emm.helper.blocking_functions emm.helper.custom_path emm.helper.io emm.helper.sklearn_pipeline emm.helper.spark_utils emm.helper.util emm.indexing emm.indexing.base_indexer emm.indexing.pandas_candidate_selection emm.indexing.pandas_cos_sim_matcher emm.indexing.pandas_naive_indexer emm.indexing.pandas_normalized_tfidf emm.indexing.pandas_sni emm.indexing.spark_indexing_utils emm.loggers emm.loggers.logger emm.loggers.timer emm.parameters emm.pipeline emm.pipeline.base_entity_matching emm.pipeline.pandas_entity_matching emm.preprocessing emm.preprocessing.abbreviation_util emm.preprocessing.base_name_preprocessor emm.preprocessing.functions emm.preprocessing.pandas_functions emm.preprocessing.pandas_preprocessor emm.resources emm.supervised_model emm.supervised_model.base_supervised_model emm.supervised_model.pandas_supervised_model emm.threshold emm.threshold.threshold_decision emm.version Module (class in emm.base.module) N NaiveIndexer (in module emm.indexing.pandas_naive_indexer) name_cut() (in module emm.features.features_name) negative_rerank_cossim() (in module emm.data.negative_data_creation) negative_rerank_sni() (in module emm.data.negative_data_creation) noise() (emm.data.noiser.Noiser method) Noiser (class in emm.data.noiser) notebook() (in module emm.resources) O original_abbr_match() (in module emm.features.features_name) P pandas_create_noised_data() (in module emm.data.create_data) pandas_split_data() (in module emm.data.create_data) PandasCandidateSelectionTransformer (class in emm.indexing.pandas_candidate_selection) PandasCosSimIndexer (class in emm.indexing) (class in emm.indexing.pandas_cos_sim_matcher) PandasEntityAggregation (class in emm.aggregation) (class in emm.aggregation.pandas_entity_aggregation) PandasEntityMatching (class in emm) (class in emm.pipeline) (class in emm.pipeline.pandas_entity_matching) PandasFeatureExtractor (class in emm.features) (class in emm.features.pandas_feature_extractor) PandasNaiveIndexer (class in emm.indexing) (class in emm.indexing.pandas_naive_indexer) PandasNormalizedTfidfVectorizer (class in emm.indexing.pandas_normalized_tfidf) PandasPreprocessor (class in emm.preprocessing) (class in emm.preprocessing.pandas_preprocessor) PandasSortedNeighbourhoodIndexer (class in emm.indexing) (class in emm.indexing.pandas_sni) PandasSupervisedLayerTransformer (class in emm.supervised_model) (class in emm.supervised_model.pandas_supervised_model) Pipeline (class in emm.base.pipeline) prepare_name_pairs() (in module emm.data.prepare_name_pairs) prepare_name_pairs_pd() (in module emm.data.prepare_name_pairs) preprocess() (in module emm.preprocessing.abbreviation_util) ptp() (in module emm.features.features_rank) R rank() (in module emm.features.features_rank) reader (emm.helper.io.IOFunc property) regex_replace() (in module emm.preprocessing.pandas_functions) remove_blacklisted_names() (emm.aggregation.base_entity_aggregation.BaseEntityAggregation method) (emm.aggregation.pandas_entity_aggregation.PandasEntityAggregation method) (emm.aggregation.PandasEntityAggregation method) rename_columns() (in module emm.helper.util) replace_none() (in module emm.preprocessing.functions) retrieve_kvk_test_sample() (in module emm.data.create_data) run_custom_function() (in module emm.preprocessing.pandas_functions) S save() (emm.PandasEntityMatching method) (emm.pipeline.pandas_entity_matching.PandasEntityMatching method) (emm.pipeline.PandasEntityMatching method) save_file() (in module emm.helper.io) select_best_score() (emm.supervised_model.pandas_supervised_model.PandasSupervisedLayerTransformer method) (emm.supervised_model.PandasSupervisedLayerTransformer method) select_with_prefix() (in module emm.indexing.pandas_candidate_selection) set_logger() (in module emm) (in module emm.loggers.logger) set_partitions() (in module emm.helper.spark_utils) set_reader() (emm.helper.io.IOFunc method) set_return_sm_features() (emm.PandasEntityMatching method) (emm.pipeline.pandas_entity_matching.PandasEntityMatching method) (emm.pipeline.PandasEntityMatching method) set_score_request() (emm.helper.sklearn_pipeline.SklearnPipelineWrapper method) set_spark_job_group() (in module emm.helper.spark_utils) set_threshold() (emm.pipeline.base_entity_matching.BaseEntityMatching method) SklearnPipelineWrapper (class in emm.helper.sklearn_pipeline) SNBaseIndexer (class in emm.indexing.base_indexer) spark_checkpoint() (in module emm.helper.spark_utils) split_data() (in module emm.data.create_data) split_word() (emm.data.noiser.Noiser method) stack_features() (in module emm.indexing.spark_indexing_utils) start() (emm.loggers.Timer method) (emm.loggers.timer.Timer method) store_ground_truth (emm.indexing.pandas_sni.PandasSortedNeighbourhoodIndexer property) (emm.indexing.PandasSortedNeighbourhoodIndexer property) string_columns_to_pyarrow() (in module emm.helper.util) swap_letter() (emm.data.noiser.Noiser method) swap_words() (emm.data.noiser.Noiser method) T take_topn_per_group() (in module emm.indexing.spark_indexing_utils) test_classifier() (emm.PandasEntityMatching method) (emm.pipeline.pandas_entity_matching.PandasEntityMatching method) (emm.pipeline.PandasEntityMatching method) Timer (class in emm.loggers) (class in emm.loggers.timer) top2_dist() (in module emm.features.features_rank) train_model() (in module emm.supervised_model) (in module emm.supervised_model.base_supervised_model) train_test_model() (in module emm.supervised_model) (in module emm.supervised_model.base_supervised_model) transform() (emm.aggregation.pandas_entity_aggregation.PandasEntityAggregation method) (emm.aggregation.PandasEntityAggregation method) (emm.features.pandas_feature_extractor.PandasFeatureExtractor method) (emm.features.PandasFeatureExtractor method) (emm.helper.sklearn_pipeline.SklearnPipelineWrapper method) (emm.indexing.pandas_candidate_selection.PandasCandidateSelectionTransformer method) (emm.indexing.pandas_cos_sim_matcher.PandasCosSimIndexer method) (emm.indexing.pandas_naive_indexer.PandasNaiveIndexer method) (emm.indexing.pandas_normalized_tfidf.PandasNormalizedTfidfVectorizer method) (emm.indexing.pandas_sni.PandasSortedNeighbourhoodIndexer method) (emm.indexing.PandasCosSimIndexer method) (emm.indexing.PandasNaiveIndexer method) (emm.indexing.PandasSortedNeighbourhoodIndexer method) (emm.PandasEntityMatching method) (emm.pipeline.pandas_entity_matching.PandasEntityMatching method) (emm.pipeline.PandasEntityMatching method) (emm.preprocessing.pandas_preprocessor.PandasPreprocessor method) (emm.preprocessing.PandasPreprocessor method) (emm.supervised_model.pandas_supervised_model.PandasSupervisedLayerTransformer method) (emm.supervised_model.PandasSupervisedLayerTransformer method) transform_parallel() (emm.indexing.pandas_normalized_tfidf.PandasNormalizedTfidfVectorizer method) trim() (in module emm.preprocessing.pandas_functions) trim_lower() (in module emm.preprocessing.pandas_functions) types_by_lef_dict() (in module emm.features.features_lef) V version() (emm.indexing.base_indexer.BaseIndexer static method) (emm.pipeline.base_entity_matching.BaseEntityMatching static method) very_common_words (emm.features.features_vocabulary.Vocabulary attribute) Vocabulary (class in emm.features.features_vocabulary) W writer (emm.helper.io.IOFunc property)