Class XGBoost

  • All Implemented Interfaces:
    Destroyable, GlobalInfoSupporter, LoggingLevelHandler, LoggingSupporter, OptionHandler, SizeOfHandler, TechnicalInformationHandler, Serializable, weka.classifiers.Classifier, weka.core.CapabilitiesHandler, weka.core.OptionHandler

    public class XGBoost
    extends AbstractSimpleClassifier
    implements TechnicalInformationHandler
    Classifier implementing XGBoost.

     @inproceedings{Chen2016,
        address = {New York, NY, USA},
        author = {Chen, Tianqi and Guestrin, Carlos},
        booktitle = {Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
        pages = {785--794},
        publisher = {ACM},
        series = {KDD '16},
        title = {XGBoost: A Scalable Tree Boosting System},
        year = {2016},
        ISBN = {978-1-4503-4232-2},
        keywords = {large-scale machine learning},
        location = {San Francisco, California, USA},
        URL = {http://doi.acm.org/10.1145/2939672.2939785}
     }
     


    -logging-level <OFF|SEVERE|WARNING|INFO|CONFIG|FINE|FINER|FINEST> (property: loggingLevel)
        The logging level for outputting errors and debugging output.
        default: WARNING
     
    -booster <GBTREE|GBLINEAR|DART> (property: booster)
        Which booster to use.
        default: GBTREE
     
    -verbosity <SILENT|WARNING|INFO|DEBUG> (property: verbosity)
        Verbosity of printing messages.
        default: WARNING
     
    -nthread <int> (property: numThreads)
        The number of parallel threads used to run XGBoost.
        default: -1
     
    -eta <float> (property: eta)
        The step size shrinkage to use in updates to prevent overfitting.
        default: 0.3
        minimum: 0.0
        maximum: 1.0
     
    -gamma <float> (property: gamma)
        The minimum loss reduction required to make a further partition on a leaf
        node of the tree.
        default: 0.0
        minimum: 0.0
        maximum: Infinity
     
    -max_depth <int> (property: maxDepth)
        The maximum depth of a tree.
        default: 6
        minimum: 0
        maximum: 2147483647
     
    -min_child_weight <float> (property: minChildWeight)
        The minimum sum of instance weights (hessian) needed in a child.
        default: 1.0
        minimum: 0.0
        maximum: Infinity
     
    -max_delta_step <float> (property: maximumDeltaStep)
        The maximum delta step we allow each leaf output to be.
        default: 0.0
        minimum: 0.0
        maximum: 3.4028235E38
     
    -subsample <float> (property: subsampleRatio)
        The sub-sample ratio of the training instances.
        default: 1.0
        minimum: 1.4E-45
        maximum: 1.0
     
    -colsample_bytree <float> (property: columnSampleByTree)
        The sub-sample ratio of columns when constructing each tree.
        default: 1.0
        minimum: 1.4E-45
        maximum: 1.0
     
    -colsample_bylevel <float> (property: columnSampleByLevel)
        The sub-sample ratio of columns for each level.
        default: 1.0
        minimum: 1.4E-45
        maximum: 1.0
     
    -colsample_bynode <float> (property: columnSampleByNode)
        The sub-sample ratio of columns for each node (split).
        default: 1.0
        minimum: 1.4E-45
        maximum: 1.0
     
    -tree_method <AUTO|EXACT|APPROX|HIST|GPU_EXACT|GPU_HIST> (property: treeMethod)
        The tree construction algorithm used in XGBoost.
        default: AUTO
     
    -scale_pos_weight <float> (property: scalePositiveWeights)
        Scales the weights of positive examples by this factor.
        default: 1.0
        minimum: 0.0
        maximum: 3.4028235E38
     
    -process_type <DEFAULT|UPDATE> (property: processType)
        The type of boosting process to run.
        default: DEFAULT
     
    -grow_policy <DEPTHWISE|LOSSGUIDE> (property: growPolicy)
        The way new nodes are added to the tree.
        default: DEPTHWISE
     
    -max_leaves <int> (property: maxLeaves)
        The maximum number of nodes to be added.
        default: 0
        minimum: 0
        maximum: 2147483647
     
    -max_bin <int> (property: maxBin)
        The maximum number of discrete bins to bucket continuous features.
        default: 256
        minimum: 2
        maximum: 2147483647
     
    -predictor <CPU|GPU|DEFAULT> (property: predictor)
        The type of predictor algorithm to use.
        default: DEFAULT
     
    -num_parallel_tree <int> (property: numberOfParallelTrees)
        The number of parallel trees constructed during each iteration.
        default: 1
        minimum: 1
        maximum: 2147483647
     
    -sample_type <UNIFORM|WEIGHTED> (property: sampleType)
        The type of sampling algorithm.
        default: UNIFORM
     
    -normalize_type <TREE|FOREST> (property: normaliseType)
        The type of normalisation algorithm.
        default: TREE
     
    -rate_drop <float> (property: rateDrop)
        The dropout rate (a fraction of previous trees to drop during the dropout
        ).
        default: 0.0
        minimum: 0.0
        maximum: 1.0
     
    -one_drop <boolean> (property: oneDrop)
        Whether at least one tree is always dropped during the dropout.
        default: false
     
    -skip_drop <float> (property: skipDrop)
        The probability of skipping the dropout procedure during a boosting iteration.
        default: 0.0
        minimum: 0.0
        maximum: 1.0
     
    -lambda <float> (property: lambda)
        The L2 regularisation term on weights.
        default: 1.0
     
    -alpha <float> (property: alpha)
        The L1 regularisation term on weights.
        default: 0.0
     
    -updater <SHOTGUN|COORD_DESCENT> (property: updater)
        The choice of algorithm to fit the linear model.
        default: SHOTGUN
     
    -feature_selector <CYCLIC|SHUFFLE|RANDOM|GREEDY|THRIFTY> (property: featureSelector)
        The feature selection and ordering method.
        default: CYCLIC
     
    -top_k <int> (property: topK)
        The number of top features to select when using the greedy or thrifty feature
        selector.
        default: 0
        minimum: 0
        maximum: 2147483647
     
    -tweedie_variance_power <float> (property: tweedieVariancePower)
        The parameter that controls the variance of the Tweedie distribution.
        default: 1.5
        minimum: 1.0
        maximum: 2.0
     
    -objective <LINEAR_REGRESSION|LOGISTIC_REGRESSION|LOGISTIC_REGRESSION_FOR_BINARY_CLASSIFICATION|LOGIT_RAW_REGRESSION_FOR_BINARY_CLASSIFICATION|HINGE_LOSS_FOR_BINARY_CLASSIFICATION|POISSON_REGRESSION_FOR_COUNT_DATA|COX_REGRESSION|SOFTMAX_MULTICLASS_CLASSIFICATION|SOFTPROB_MULTICLASS_CLASSIFICATION|LAMBDAMART_PAIRWISE_RANKING|LAMBDAMART_MAXIMISE_NDCG|LAMBDAMART_MAXIMISE_MAP|GAMMA_REGRESSION|TWEEDIE_REGRESSION> (property: objective)
        The learning objective.
        default: LINEAR_REGRESSION
     
    -base_score <float> (property: baseScore)
        The initial prediction score of all instances (global bias).
        default: 0.5
     
    -seed <int> (property: seed)
        The random number seed.
        default: 0
     
    -rounds <int> (property: numberOfRounds)
        The number of boosting rounds to perform.
        default: 2
        minimum: 1
        maximum: 2147483647
     
    -other_params <adams.core.base.BaseKeyValuePair> [-other_params ...] (property: otherParameters)
        Passes any additional parameters to XGBoost.
        default:
     

    Wrapper class that uses the XGBoost4J library to implement XGBoost as a WEKA classifier.

    Author:
    Corey Sterling (csterlin at waikato dot ac dot nz), FracPete (fracpete at waikato dot ac dot nz)
    See Also:
    Serialized Form
    • Field Detail

      • MIN_GLIBC_VERSION

        public static final String[] MIN_GLIBC_VERSION
      • m_Verbosity

        protected XGBoost.Verbosity m_Verbosity
        Verbosity of printing messages.
      • m_NumberOfThreads

        protected int m_NumberOfThreads
        The number of threads to use.
      • m_Eta

        protected float m_Eta
        The eta value (learning rate).
      • m_Gamma

        protected float m_Gamma
        The gamma value (minimum split loss).
      • m_MaxDepth

        protected int m_MaxDepth
        The maximum depth of the tree.
      • m_MinChildWeight

        protected float m_MinChildWeight
        The minimum child weight.
      • m_MaxDeltaStep

        protected float m_MaxDeltaStep
        Maximum delta step.
      • m_Subsample

        protected float m_Subsample
        Subsample ratio of the training instances.
      • m_ColumnSampleByTree

        protected float m_ColumnSampleByTree
        Subsample ratio of columns when constructing each tree.
      • m_ColumnSampleByLevel

        protected float m_ColumnSampleByLevel
        Subsample ratio of columns for each level.
      • m_ColumnSampleByNode

        protected float m_ColumnSampleByNode
        Subsample ratio of columns for each node (split).
      • m_TreeMethod

        protected XGBoost.TreeMethod m_TreeMethod
        The tree construction algorithm.
      • m_ScalePositiveWeights

        protected float m_ScalePositiveWeights
        Scales the weights of positive instances by this factor.
      • m_ProcessType

        protected XGBoost.ProcessType m_ProcessType
        The type of boosting process to run.
      • m_GrowPolicy

        protected XGBoost.GrowPolicy m_GrowPolicy
        Controls the way new nodes are added to the tree.
      • m_MaxLeaves

        protected int m_MaxLeaves
        Maximum number of nodes to be added.
      • m_MaxBin

        protected int m_MaxBin
        Maximum number of discrete bins to bucket continuous features.
      • m_Predictor

        protected XGBoost.Predictor m_Predictor
        The type of predictor algorithm to use.
      • m_NumberOfParallelTrees

        protected int m_NumberOfParallelTrees
        The number of parallel trees constructed during each iteration.
      • m_RateDrop

        protected float m_RateDrop
        Dropout rate.
      • m_OneDrop

        protected boolean m_OneDrop
        Whether to always drop at least one tree during dropout.
      • m_SkipDrop

        protected float m_SkipDrop
        Probability of skipping the dropout procedure during the boosting operation.
      • m_Lambda

        protected float m_Lambda
        L2 regularisation term on weights.
      • m_Alpha

        protected float m_Alpha
        L1 regularisation term on weights.
      • m_Updater

        protected XGBoost.Updater m_Updater
        Choice of algorithm to fit linear model.
      • m_TopK

        protected int m_TopK
        The number of top features to select.
      • m_TweedieVariancePower

        protected float m_TweedieVariancePower
        Parameter that controls the variance of the Tweedie distribution.
      • m_BaseScore

        protected float m_BaseScore
        Global bias.
      • m_Seed

        protected int m_Seed
        The random number seed.
      • m_NumberOfRounds

        protected int m_NumberOfRounds
        The number of boosting rounds to perform.
      • m_OtherParameters

        protected BaseKeyValuePair[] m_OtherParameters
        Allows the user to enter arbitrary parameters.
      • m_Booster

        protected ml.dmlc.xgboost4j.java.Booster m_Booster
        The trained model.
      • m_Header

        protected weka.core.Instances m_Header
        the training dataset.
      • m_Params

        protected Map<String,​Object> m_Params
        the xgboost parameters.
    • Constructor Detail

      • XGBoost

        public XGBoost()
    • Method Detail

      • getBooster

        public XGBoost.BoosterType getBooster()
        Gets the type of booster to use.
        Returns:
        The booster type.
      • setBooster

        public void setBooster​(XGBoost.BoosterType value)
        Sets the type of booster to use.
        Parameters:
        value - The booster type.
      • boosterTipText

        public String boosterTipText()
        Gets the tip-text for the booster option.
        Returns:
        The tip-text as a string.
      • getVerbosity

        public XGBoost.Verbosity getVerbosity()
        Gets the verbosity level.
        Returns:
        The verbosity level.
      • setVerbosity

        public void setVerbosity​(XGBoost.Verbosity value)
        Sets the verbosity level.
        Parameters:
        value - The verbosity level.
      • verbosityTipText

        public String verbosityTipText()
        Gets the tip-text for the verbosity option.
        Returns:
        The tip-text as a string.
      • getNumThreads

        public int getNumThreads()
        Gets the number of parallel threads used to run XGBoost.
        Returns:
        The number of threads.
      • setNumThreads

        public void setNumThreads​(int value)
        Sets the number of parallel threads used to run XGBoost.
        Parameters:
        value - The number of threads.
      • numThreadsTipText

        public String numThreadsTipText()
        Gets the tip-text for the numThreads option.
        Returns:
        The tip-text as a string.
      • getEta

        public float getEta()
        Gets the step size shrinkage to use in updates to prevent overfitting.
        Returns:
        The eta value.
      • setEta

        public void setEta​(float value)
        Sets the step size shrinkage to use in updates to prevent overfitting.
        Parameters:
        value - The eta value.
      • etaTipText

        public String etaTipText()
        Gets the tip-text for the eta option.
        Returns:
        The tip-text as a string.
      • getGamma

        public float getGamma()
        Gets the minimum loss reduction required to make a further partition on a leaf node of the tree.
        Returns:
        The gamma value.
      • setGamma

        public void setGamma​(float value)
        Sets the minimum loss reduction required to make a further partition on a leaf node of the tree.
        Parameters:
        value - The gamma value.
      • gammaTipText

        public String gammaTipText()
        Gets the tip-text for the gamma option.
        Returns:
        The tip-text as a string.
      • getMaxDepth

        public int getMaxDepth()
        Gets the maximum depth of a tree.
        Returns:
        The maximum depth.
      • setMaxDepth

        public void setMaxDepth​(int value)
        Sets the maximum depth of a tree.
        Parameters:
        value - The maximum depth.
      • maxDepthTipText

        public String maxDepthTipText()
        Gets the tip-text for the maxDepth option.
        Returns:
        The tip-text as a string.
      • getMinChildWeight

        public float getMinChildWeight()
        Gets the minimum sum of instance weights (hessian) needed in a child.
        Returns:
        The minimum sum.
      • setMinChildWeight

        public void setMinChildWeight​(float value)
        Sets the minimum sum of instance weights (hessian) needed in a child.
        Parameters:
        value - The minimum sum.
      • minChildWeightTipText

        public String minChildWeightTipText()
        Gets the tip-text for the minChildWeight option.
        Returns:
        The tip-text as a string.
      • getMaximumDeltaStep

        public float getMaximumDeltaStep()
        Gets the maximum delta step we allow each leaf output to be.
        Returns:
        The maximum delta step.
      • setMaximumDeltaStep

        public void setMaximumDeltaStep​(float value)
        Sets the maximum delta step we allow each leaf output to be.
        Parameters:
        value - The maximum delta step.
      • maximumDeltaStepTipText

        public String maximumDeltaStepTipText()
        Gets the tip-text for the maximumDeltaStep option.
        Returns:
        The tip-text as a string.
      • getSubsampleRatio

        public float getSubsampleRatio()
        Gets the sub-sample ratio of the training instances.
        Returns:
        The sub-sample ratio.
      • setSubsampleRatio

        public void setSubsampleRatio​(float value)
        Sets the sub-sample ratio of the training instances.
        Parameters:
        value - The sub-sample ratio.
      • subsampleRatioTipText

        public String subsampleRatioTipText()
        Gets the tip-text for the subsampleRatio option.
        Returns:
        The tip-text as a string.
      • getColumnSampleByTree

        public float getColumnSampleByTree()
        Gets the sub-sample ratio of columns when constructing each tree.
        Returns:
        The sub-sample ratio.
      • setColumnSampleByTree

        public void setColumnSampleByTree​(float value)
        Sets the sub-sample ratio of columns when constructing each tree.
        Parameters:
        value - The sub-sample ratio.
      • columnSampleByTreeTipText

        public String columnSampleByTreeTipText()
        Gets the tip-text for the columnSampleByTree option.
        Returns:
        The tip-text as a string.
      • getColumnSampleByLevel

        public float getColumnSampleByLevel()
        Gets the sub-sample ratio of columns for each level.
        Returns:
        The sub-sample ratio.
      • setColumnSampleByLevel

        public void setColumnSampleByLevel​(float value)
        Sets the sub-sample ratio of columns for each level.
        Parameters:
        value - The sub-sample ratio.
      • columnSampleByLevelTipText

        public String columnSampleByLevelTipText()
        Gets the tip-text for the columnSampleByLevel option.
        Returns:
        The tip-text as a string.
      • getColumnSampleByNode

        public float getColumnSampleByNode()
        Gets the sub-sample ratio of columns for each node (split).
        Returns:
        The sub-sample ratio.
      • setColumnSampleByNode

        public void setColumnSampleByNode​(float value)
        Sets the sub-sample ratio of columns for each node (split).
        Parameters:
        value - The sub-sample ratio.
      • columnSampleByNodeTipText

        public String columnSampleByNodeTipText()
        Gets the tip-text for the columnSampleByNode option.
        Returns:
        The tip-text as a string.
      • getTreeMethod

        public XGBoost.TreeMethod getTreeMethod()
        Gets the tree construction algorithm used in XGBoost.
        Returns:
        The algorithm.
      • setTreeMethod

        public void setTreeMethod​(XGBoost.TreeMethod value)
        Sets the tree construction algorithm used in XGBoost.
        Parameters:
        value - The algorithm.
      • treeMethodTipText

        public String treeMethodTipText()
        Gets the tip-text for the treeMethod option.
        Returns:
        The tip-text as a string.
      • getScalePositiveWeights

        public float getScalePositiveWeights()
        Gets the positive-weights scale factor.
        Returns:
        The scale factor.
      • setScalePositiveWeights

        public void setScalePositiveWeights​(float value)
        Sets the positive-weights scale factor.
        Parameters:
        value - The scale factor.
      • scalePositiveWeightsTipText

        public String scalePositiveWeightsTipText()
        Gets the tip-text for the scalePositiveWeights option.
        Returns:
        The tip-text as a string.
      • getProcessType

        public XGBoost.ProcessType getProcessType()
        Gets the type of boosting process to run.
        Returns:
        The process type.
      • setProcessType

        public void setProcessType​(XGBoost.ProcessType value)
        Sets the type of boosting process to run.
        Parameters:
        value - The process type.
      • processTypeTipText

        public String processTypeTipText()
        Gets the tip-text for the processType option.
        Returns:
        The tip-text as a string.
      • getGrowPolicy

        public XGBoost.GrowPolicy getGrowPolicy()
        Gets the way new nodes are added to the tree.
        Returns:
        The grow policy.
      • setGrowPolicy

        public void setGrowPolicy​(XGBoost.GrowPolicy value)
        Sets the way new nodes are added to the tree.
        Parameters:
        value - The grow policy.
      • growPolicyTipText

        public String growPolicyTipText()
        Gets the tip-text for the growPolicy option.
        Returns:
        The tip-text as a string.
      • getMaxLeaves

        public int getMaxLeaves()
        Gets the maximum number of nodes to be added.
        Returns:
        The maximum number of nodes.
      • setMaxLeaves

        public void setMaxLeaves​(int value)
        Sets the maximum number of nodes to be added.
        Parameters:
        value - The maximum number of nodes.
      • maxLeavesTipText

        public String maxLeavesTipText()
        Gets the tip-text for the maxLeaves option.
        Returns:
        The tip-text as a string.
      • getMaxBin

        public int getMaxBin()
        Gets the maximum number of discrete bins to bucket continuous features.
        Returns:
        The maximum number of bins.
      • setMaxBin

        public void setMaxBin​(int value)
        Sets the maximum number of discrete bins to bucket continuous features.
        Parameters:
        value - The maximum number of bins.
      • maxBinTipText

        public String maxBinTipText()
        Gets the tip-text for the maxBin option.
        Returns:
        The tip-text as a string.
      • getPredictor

        public XGBoost.Predictor getPredictor()
        Gets the type of predictor algorithm to use.
        Returns:
        The predictor algorithm.
      • setPredictor

        public void setPredictor​(XGBoost.Predictor value)
        Sets the type of predictor algorithm to use.
        Parameters:
        value - The predictor algorithm.
      • predictorTipText

        public String predictorTipText()
        Gets the tip-text for the predictor option.
        Returns:
        The tip-text as a string.
      • getNumberOfParallelTrees

        public int getNumberOfParallelTrees()
        Gets the number of parallel trees constructed during each iteration.
        Returns:
        The number of parallel trees.
      • setNumberOfParallelTrees

        public void setNumberOfParallelTrees​(int value)
        Sets the number of parallel trees constructed during each iteration.
        Parameters:
        value - The number of parallel trees.
      • numberOfParallelTreesTipText

        public String numberOfParallelTreesTipText()
        Gets the tip-text for the numberOfParallelTrees option.
        Returns:
        The tip-text as a string.
      • getSampleType

        public XGBoost.SampleType getSampleType()
        Gets the type of sampling algorithm.
        Returns:
        The type of sampling algorithm.
      • setSampleType

        public void setSampleType​(XGBoost.SampleType value)
        Sets the type of sampling algorithm.
        Parameters:
        value - The type of sampling algorithm.
      • sampleTypeTipText

        public String sampleTypeTipText()
        Gets the tip-text for the sampleType option.
        Returns:
        The tip-text as a string.
      • getNormaliseType

        public XGBoost.NormaliseType getNormaliseType()
        Gets the type of normalisation algorithm.
        Returns:
        The type of normalisation algorithm.
      • setNormaliseType

        public void setNormaliseType​(XGBoost.NormaliseType value)
        Sets the type of normalisation algorithm.
        Parameters:
        value - The type of normalisation algorithm.
      • normaliseTypeTipText

        public String normaliseTypeTipText()
        Gets the tip-text for the normaliseType option.
        Returns:
        The tip-text as a string.
      • getRateDrop

        public float getRateDrop()
        Gets the dropout rate (a fraction of previous trees to drop during the dropout).
        Returns:
        The dropout rate.
      • setRateDrop

        public void setRateDrop​(float value)
        Sets the dropout rate (a fraction of previous trees to drop during the dropout).
        Parameters:
        value - The dropout rate.
      • rateDropTipText

        public String rateDropTipText()
        Gets the tip-text for the rateDrop option.
        Returns:
        The tip-text as a string.
      • getOneDrop

        public boolean getOneDrop()
        Sets whether at least one tree is always dropped during the dropout.
        Returns:
        The flag state.
      • setOneDrop

        public void setOneDrop​(boolean value)
        Sets whether at least one tree is always dropped during the dropout.
        Parameters:
        value - The flag state.
      • oneDropTipText

        public String oneDropTipText()
        Gets the tip-text for the oneDrop option.
        Returns:
        The tip-text as a string.
      • getSkipDrop

        public float getSkipDrop()
        Gets the probability of skipping the dropout procedure during a boosting iteration.
        Returns:
        The probability.
      • setSkipDrop

        public void setSkipDrop​(float value)
        Sets the probability of skipping the dropout procedure during a boosting iteration.
        Parameters:
        value - The probability.
      • skipDropTipText

        public String skipDropTipText()
        Gets the tip-text for the skipDrop option.
        Returns:
        The tip-text as a string.
      • getLambda

        public float getLambda()
        Gets the L2 regularisation term on weights.
        Returns:
        The L2 regularisation term.
      • setLambda

        public void setLambda​(float value)
        Sets the L2 regularisation term on weights.
        Parameters:
        value - The L2 regularisation term.
      • lambdaTipText

        public String lambdaTipText()
        Gets the tip-text for the lambda option.
        Returns:
        The tip-text as a string.
      • getAlpha

        public float getAlpha()
        Gets the L1 regularisation term on weights.
        Returns:
        The L1 regularisation term.
      • setAlpha

        public void setAlpha​(float value)
        Sets the L1 regularisation term on weights.
        Parameters:
        value - The L1 regularisation term.
      • alphaTipText

        public String alphaTipText()
        Gets the tip-text for the alpha option.
        Returns:
        The tip-text as a string.
      • getUpdater

        public XGBoost.Updater getUpdater()
        Gets the choice of algorithm to fit the linear model.
        Returns:
        The algorithm.
      • setUpdater

        public void setUpdater​(XGBoost.Updater value)
        Sets the choice of algorithm to fit the linear model.
        Parameters:
        value - The algorithm.
      • updaterTipText

        public String updaterTipText()
        Gets the tip-text for the updater option.
        Returns:
        The tip-text as a string.
      • getFeatureSelector

        public XGBoost.FeatureSelector getFeatureSelector()
        Gets the feature selection and ordering method.
        Returns:
        The feature selector.
      • setFeatureSelector

        public void setFeatureSelector​(XGBoost.FeatureSelector value)
        Gets the feature selection and ordering method.
        Parameters:
        value - The feature selector.
      • featureSelectorTipText

        public String featureSelectorTipText()
        Gets the tip-text for the featureSelector option.
        Returns:
        The tip-text as a string.
      • getTopK

        public int getTopK()
        Gets the number of top features to select when using the greedy or thrifty feature selector.
        Returns:
        The number of features to select.
      • setTopK

        public void setTopK​(int value)
        Sets the number of top features to select when using the greedy or thrifty feature selector.
        Parameters:
        value - The number of features to select.
      • topKTipText

        public String topKTipText()
        Gets the tip-text for the topK option.
        Returns:
        The tip-text as a string.
      • getTweedieVariancePower

        public float getTweedieVariancePower()
        Gets the parameter that controls the variance of the Tweedie distribution.
        Returns:
        The parameter value.
      • setTweedieVariancePower

        public void setTweedieVariancePower​(float value)
        Sets the parameter that controls the variance of the Tweedie distribution.
        Parameters:
        value - The parameter value.
      • tweedieVariancePowerTipText

        public String tweedieVariancePowerTipText()
        Gets the tip-text for the tweedieVariancePower option.
        Returns:
        The tip-text as a string.
      • getObjective

        public XGBoost.Objective getObjective()
        Gets the learning objective.
        Returns:
        The learning objective.
      • setObjective

        public void setObjective​(XGBoost.Objective value)
        Sets the learning objective.
        Parameters:
        value - The learning objective.
      • objectiveTipText

        public String objectiveTipText()
        Gets the tip-text for the objective option.
        Returns:
        The tip-text as a string.
      • getBaseScore

        public float getBaseScore()
        Gets the initial prediction score of all instances (global bias).
        Returns:
        The global bias.
      • setBaseScore

        public void setBaseScore​(float value)
        Sets the initial prediction score of all instances (global bias).
        Parameters:
        value - The global bias.
      • baseScoreTipText

        public String baseScoreTipText()
        Gets the tip-text for the baseScore option.
        Returns:
        The tip-text as a string.
      • getSeed

        public int getSeed()
        Gets the random number seed.
        Returns:
        The seed value.
      • setSeed

        public void setSeed​(int value)
        Sets the random number seed.
        Parameters:
        value - The seed value.
      • seedTipText

        public String seedTipText()
        Gets the tip-text for the seed option.
        Returns:
        The tip-text as a string.
      • getNumberOfRounds

        public int getNumberOfRounds()
        Gets the number of boosting rounds to perform.
        Returns:
        The number of boosting rounds to perform.
      • setNumberOfRounds

        public void setNumberOfRounds​(int value)
        Sets the number of boosting rounds to perform.
        Parameters:
        value - The number of boosting rounds to perform.
      • numberOfRoundsTipText

        public String numberOfRoundsTipText()
        Gets the tip-text for the number of rounds option.
        Returns:
        The tip-text as a string.
      • getOtherParameters

        public BaseKeyValuePair[] getOtherParameters()
        Gets any other XGBoost parameters the user has set.
        Returns:
        The parameters.
      • setOtherParameters

        public void setOtherParameters​(BaseKeyValuePair[] value)
        Sets any additional XGBoost parameters.
        Parameters:
        value - The parameters, as a string of name=value pairs.
      • otherParametersTipText

        public String otherParametersTipText()
        Gets the tip-text for the otherParameters option.
        Returns:
        The tip-text as a string.
      • getTechnicalInformation

        public TechnicalInformation getTechnicalInformation()
        Returns an instance of a TechnicalInformation object, containing detailed information about the technical background of this class, e.g., paper reference or book this class is based on.
        Specified by:
        getTechnicalInformation in interface TechnicalInformationHandler
        Returns:
        the technical information about this class
      • numberOfRequiredDMatrixColumns

        protected int numberOfRequiredDMatrixColumns​(weka.core.Instances instances)
        Calculates the number of columns required to represent the attributes of the given dataset when converted to a DMatrix.
        Parameters:
        instances - The dataset being converted.
        Returns:
        The number of columns required by the converted DMatrix.
      • instancesToDMatrix

        protected ml.dmlc.xgboost4j.java.DMatrix instancesToDMatrix​(weka.core.Instance[] instances)
                                                             throws ml.dmlc.xgboost4j.java.XGBoostError
        Converts a WEKA dataset into a DMatrix (the input type expected by XGBoost).
        Parameters:
        instances - The dataset to convert.
        Returns:
        The converted dataset.
        Throws:
        ml.dmlc.xgboost4j.java.XGBoostError
      • createParamsFromOptions

        protected Map<String,​Object> createParamsFromOptions()
        Converts the options into a parameter map as expected by XGBoost.
        Returns:
        The parameter map.
      • getCapabilities

        public weka.core.Capabilities getCapabilities()
        Returns the Capabilities of this classifier. Maximally permissive capabilities are allowed by default. Derived classifiers should override this method and first disable all capabilities and then enable just those capabilities that make sense for the scheme.
        Specified by:
        getCapabilities in interface weka.core.CapabilitiesHandler
        Specified by:
        getCapabilities in interface weka.classifiers.Classifier
        Overrides:
        getCapabilities in class AbstractSimpleClassifier
        Returns:
        the capabilities of this object
        See Also:
        Capabilities
      • buildClassifier

        public void buildClassifier​(weka.core.Instances instances)
                             throws Exception
        Trains the XGBoost classifier on the incoming dataset.
        Specified by:
        buildClassifier in interface weka.classifiers.Classifier
        Parameters:
        instances - The training dataset.
        Throws:
        Exception - Any internal XGBoost error.
      • classifyInstance

        public double classifyInstance​(weka.core.Instance instance)
                                throws Exception
        Classifies the given test instance. The instance has to belong to a dataset when it's being classified. Note that a classifier MUST implement either this or distributionForInstance().
        Specified by:
        classifyInstance in interface weka.classifiers.Classifier
        Overrides:
        classifyInstance in class AbstractSimpleClassifier
        Parameters:
        instance - the instance to be classified
        Returns:
        the predicted most likely class for the instance or Utils.missingValue() if no prediction is made
        Throws:
        Exception - if an error occurred during the prediction
      • toString

        public String toString()
        Returns a description of this classifier.
        Overrides:
        toString in class AbstractOptionHandler
        Returns:
        a description of this classifier as a string.
      • main

        public static void main​(String[] args)
        Main method for running this class.
        Parameters:
        args - the options