import pandas as pdimport numpy as npfrom sklearn.feature_selection import SelectKBest, f_classiffrom sklearn.impute import SimpleImputerfrom sklearn.compose import ColumnTransformerfrom sklearn.preprocessing import OneHotEncoder# ------------------------------------- Filter Method -----------------------------------------------------# Load your dataset into a pandas DataFramedf = pd.read_csv('hour.csv')# Separate the features (X) and target variable (y)y = df['hoursbefore']X = df[["parity","TIME","avgtotalmotion","avgtotalsteps","avglyingbouts","avgrumination","avgactivity","avghoursstanding","avghourslying"]]# Identify numeric and categorical columnsnumeric_cols = X.select_dtypes(include=np.number).columnscategorical_cols = X.select_dtypes(include='object').columns# Create transformers for numeric and categorical columnsnumeric_transformer =SimpleImputer(strategy='mean')categorical_transformer =SimpleImputer(strategy='most_frequent')# Apply transformers to the columnspreprocessor =ColumnTransformer(transformers=[('numeric', numeric_transformer, numeric_cols),('categorical', categorical_transformer, categorical_cols)])# Fit and transform the dataX_preprocessed = preprocessor.fit_transform(X)# Convert back to DataFrameX_preprocessed = pd.DataFrame(X_preprocessed,columns=numeric_cols.tolist()+ categorical_cols.tolist())# Perform one-hot encodingencoder =OneHotEncoder(drop='first')X_encoded = encoder.fit_transform(X_preprocessed[categorical_cols])X_encoded = pd.DataFrame(X_encoded.toarray(),columns=encoder.get_feature_names_out(categorical_cols))# Concatenate encoded features with numeric featuresX_final = pd.concat([X_preprocessed[numeric_cols], X_encoded],axis=1)# Initialize SelectKBest with f_classif scoring functionk =5# Number of top features to selectselector =SelectKBest(score_func=f_classif,k=k)# Fit the selector to the data and transform the featuresX_new = selector.fit_transform(X_final, y)# Get the selected feature indicesfeature_indices = selector.get_support(indices=True)# Get the selected feature namesselected_features = X_final.columns[feature_indices]# Print the selected feature namesprint("Selected Features:")print(selected_features)# Check the size of the new feature setprint("Size of New Feature Set:", X_new.shape)# ------------------------------------- Wrapper Method -----------------------------------------------------# Separate the features (X) and target variable (y)y = df['target_variable']X = df.drop('target_variable',axis=1)# Initialize the Random Forest classifierclf =RandomForestClassifier()# Initialize RFE with the classifier and number of desired features to selectn_features =200# Number of top features to selectrfe =RFE(estimator=clf,n_features_to_select=n_features)# Fit RFE to the data and transform the featuresX_new = rfe.fit_transform(X, y)# Get the selected feature indicesfeature_indices = rfe.get_support(indices=True)# Print the selected feature indicesprint("Selected Feature Indices:")print(feature_indices)# Check the size of the new feature setprint("Size of New Feature Set:", X_new.shape)# ------------------------------------- Embedded Method -----------------------------------------------------# Separate the features (X) and target variable (y)y = df['target_variable']X = df.drop('target_variable',axis=1)# Initialize the RandomForestClassifier modelrf_model =RandomForestClassifier(random_state=45)# Fit the RandomForestClassifier model to the datarf_model.fit(X, y)# Get feature importances from the trained modelfeature_importances = rf_model.feature_importances_# Sort the feature importances in descending ordersorted_indices = feature_importances.argsort()[::-1]# Set the number of top features to selectk =5# Get the indices of the top k featuresselected_feature_indices = sorted_indices[:k]# Get the selected feature namesselected_features = X.columns[selected_feature_indices]# Print the selected feature namesprint("Selected Features:")print(selected_features)# Create a new DataFrame with the selected featuresX_new = X[selected_features]# Check the size of the new feature setprint("Size of New Feature Set:", X_new.shape)