{ "cells": [ { "cell_type": "markdown", "source": [ "# CSC 3105 Project" ], "metadata": { "collapsed": false }, "id": "cda961ffb493d00c" }, { "cell_type": "markdown", "source": [ "# Load and Clean the Data\n", "\n", "This code block performs the following operations:\n", "\n", "1. Imports necessary libraries for data handling and cleaning.\n", "2. Defines a function `load_data` to load the data from a given directory into a pandas dataframe.\n", "3. Defines a function `clean_data` to clean the loaded data. The cleaning process includes:\n", " - Handling missing values by dropping them.\n", " - Removing duplicate rows.\n", " - Converting the 'NLOS' column to integer data type.\n", " - Normalizing the 'Measured range (time of flight)' column.\n", " - Creating new features 'FP_SUM' and 'SNR'.\n", " - One-hot encoding categorical features.\n", " - Performing feature extraction on 'CIR' columns.\n", " - Dropping the original 'CIR' columns.\n", " - Checking for columns with only one unique value and dropping them.\n", "4. Checks if a pickle file with the cleaned data exists. If it does, it loads the data from the file. If it doesn't, it loads and cleans the data using the defined functions.\n", "5. Prints the first few rows of the cleaned data and its column headers." ], "metadata": { "collapsed": false }, "id": "73fe8802e95a784f" }, { "cell_type": "code", "outputs": [], "source": [ "import os\n", "\n", "import pandas as pd\n", "\n", "DATASET_DIR = './UWB-LOS-NLOS-Data-Set/dataset'\n", "\n", "\n", "def load_data(dataset_dir):\n", " # Load the data\n", " # file_paths = [os.path.join(dirpath, file) for dirpath, _, filenames in os.walk(dataset_dir) for file in filenames if 'uwb_dataset_part7.csv' not in file]\n", " file_paths = [os.path.join(dirpath, file) for dirpath, _, filenames in os.walk(dataset_dir) for file in filenames]\n", " data = pd.concat((pd.read_csv(file_path) for file_path in file_paths))\n", " print(f\"Original data shape: {data.shape}\")\n", " return data\n" ], "metadata": { "collapsed": false }, "id": "7bcd7cfc8dd11cbb" }, { "cell_type": "code", "outputs": [], "source": [ "from sklearn.preprocessing import LabelEncoder\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.decomposition import PCA\n", "import matplotlib.pyplot as plt\n", "\n", "\n", "def clean_data(data):\n", " print(\"Starting data cleaning process...\")\n", "\n", " # Calculate total number of missing values in the data\n", " total_missing = data.isnull().sum().sum()\n", " print(f\"Total number of missing values: {total_missing}\")\n", "\n", " # Statistical Analysis\n", " print(\"Statistical Analysis:\")\n", " print(data.describe())\n", "\n", " # Plot Boxplot to check for outliers for the first 15 columns\n", " print(\"Boxplot of the first 15 columns:\")\n", " fig, axs = plt.subplots(15,1,dpi=95, figsize=(7,17))\n", " for i, col in enumerate(data.columns[:15]):\n", " axs[i].boxplot(data[col], vert=False)\n", " axs[i].set_ylabel(col)\n", " plt.show()\n", "\n", " # Data has no missing values\n", " data = data.dropna()\n", " print(\"Missing values dropped.\")\n", "\n", " # Data has no duplicate rows\n", " data = data.drop_duplicates()\n", " print(\"Duplicate rows dropped.\")\n", "\n", " # Convert 'NLOS' column to integer data type (0 for LOS, 1 for NLOS)\n", " data['NLOS'] = data['NLOS'].astype(int)\n", " print(\"'NLOS' column converted to integer data type.\")\n", "\n", " # Create new feature 'FP_SUM' by adding 'FP_AMP1', 'FP_AMP2', and 'FP_AMP3'\n", " # data['FP_SUM'] = data['FP_AMP1'] + data['FP_AMP2'] + data['FP_AMP3']\n", " # print(\"New feature 'FP_SUM' created.\")\n", " # data.drop(['FP_AMP1', 'FP_AMP2', 'FP_AMP3'], axis=1, inplace=True)\n", "\n", " # Calculate new feature 'First_Path_Power_Level'\n", " data['First_Path_Power_Level'] = (10 * np.log10(\n", " (data['FP_AMP1'] ** 2 + data['FP_AMP2'] ** 2 + data['FP_AMP3'] ** 2) / (data['RXPACC'] ** 2))) - 64\n", " print(\"New feature 'First_Path_Power_Level' calculated.\")\n", " data.drop(['FP_AMP1', 'FP_AMP2', 'FP_AMP3', 'RXPACC', 'PRFR'], axis=1, inplace=True)\n", "\n", " # Calculate SNR as the ratio of 'CIR_PWR' to 'STDEV_NOISE' for each data point\n", " data['SNR'] = data['CIR_PWR'] / data['STDEV_NOISE']\n", " print(\"New feature 'SNR' created.\")\n", " data.drop(['CIR_PWR', 'STDEV_NOISE'], axis=1, inplace=True)\n", "\n", " # One-hot encode categorical features\n", " categorical_features = ['CH', 'FRAME_LEN', 'PREAM_LEN', 'BITRATE']\n", " encoder = LabelEncoder()\n", " for feature in categorical_features:\n", " data[feature] = encoder.fit_transform(data[feature])\n", " print(\"Categorical features one-hot encoded.\")\n", "\n", " # Extract the 'CIR' columns\n", " cir_columns = [f\"CIR{i}\" for i in range(1015)]\n", " # cir_columns = [f\"CIR{i}\" for i in range(1016)]\n", " cir_data = data[cir_columns]\n", " print(\"'CIR' columns extracted.\")\n", "\n", " # Perform PCA on the 'CIR' columns\n", " pca = PCA(n_components=0.95)\n", " cir_pca = pca.fit_transform(cir_data)\n", " print(\"PCA performed on 'CIR' columns.\")\n", "\n", " # Create a DataFrame with the principal components\n", " cir_pca_df = pd.DataFrame(cir_pca, columns=[f\"PC{i}\" for i in range(1, pca.n_components_ + 1)])\n", " print(\"DataFrame with principal components created.\")\n", "\n", " # Drop the original 'CIR' columns from the data\n", " data = data.drop(columns=cir_columns)\n", " print(\"Original 'CIR' columns dropped.\")\n", "\n", " # Add the principal components to the original data\n", " # Reset the index of both dataframes\n", " data = data.reset_index(drop=True)\n", " cir_pca_df = cir_pca_df.reset_index(drop=True)\n", " print(\"Indexes of both dataframes reset.\")\n", "\n", " # Concatenate the dataframes\n", " data = pd.concat([data, cir_pca_df], axis=1)\n", " print(\"Dataframes concatenated.\")\n", "\n", " # List of columns to check for unique values\n", " columns_to_check = ['CH', 'PREAM_LEN', 'BITRATE']\n", "\n", " # Iterate over the columns\n", " for column in columns_to_check:\n", " # If the column has only one unique value, drop it\n", " if data[column].nunique() == 1:\n", " data = data.drop(column, axis=1)\n", " print(f\"Column '{column}' dropped due to having only one unique value.\")\n", "\n", " # Standardize the numerical columns (excluding 'NLOS')\n", " numerical_cols = data.select_dtypes(include=[np.number]).columns\n", " numerical_cols = numerical_cols.drop('NLOS')\n", " scaler = StandardScaler()\n", " data[numerical_cols] = scaler.fit_transform(data[numerical_cols])\n", " print(\"Numerical columns standardized.\")\n", "\n", " # Print the shape of the cleaned data\n", " print(f\"Cleaned data shape: {data.shape}\")\n", "\n", " print(\"Data cleaning process completed.\")\n", " # Return the cleaned data\n", " return data" ], "metadata": { "collapsed": false }, "id": "685463c2d6065b08" }, { "cell_type": "code", "outputs": [], "source": [ "import pickle\n", "\n", "# File='data_original.pkl'\n", "File = 'data.pkl'\n", "\n", "# Check if the file exists\n", "if os.path.exists(File):\n", " # If the file exists, load it\n", " print(\"Loading data from pickle file...\")\n", " with open(File, 'rb') as f:\n", " data = pickle.load(f)\n", " print(\"Data loaded successfully.\")\n", "else:\n", " # If the file doesn't exist, load and clean the data\n", " print(\"Pickle file not found. Loading and cleaning data...\")\n", " data = load_data(DATASET_DIR)\n", " data = clean_data(data)\n", " print(\"Data loaded and cleaned successfully.\")\n", " print(\"Saving cleaned data to pickle file...\")\n", " with open(File, 'wb') as f:\n", " pickle.dump(data, f)\n", " print(\"Cleaned data saved to pickle file successfully.\")\n", "\n", "print(\"First few rows of the data:\")\n", "print(data.head())\n", "\n", "# Print Headers\n", "print(\"Column headers:\")\n", "print(data.columns)" ], "metadata": { "collapsed": false }, "id": "79c2c23691b26753" }, { "cell_type": "code", "outputs": [], "source": [ "MODEL_DIR = './models'\n", "\n", "\n", "def train_and_save_model(classifier, X_train, y_train, file_name):\n", " if not os.path.exists(MODEL_DIR):\n", " os.makedirs(MODEL_DIR)\n", "\n", " file_path = os.path.join(MODEL_DIR, file_name)\n", "\n", " # Check if the file exists\n", " if not os.path.exists(file_path):\n", " print(f\"Training the model and saving it to {file_path}\")\n", " # Train the classifier\n", " classifier.fit(X_train, y_train)\n", "\n", " # Save the trained model as a pickle string.\n", " saved_model = pickle.dumps(classifier)\n", "\n", " # Save the pickled model to a file\n", " with open(file_path, 'wb') as file:\n", " file.write(saved_model)\n", "\n", " # Load the pickled model from the file\n", " with open(file_path, 'rb') as file:\n", " loaded_model = pickle.load(file)\n", "\n", " return loaded_model" ], "metadata": { "collapsed": false }, "id": "12e16974341e6266" }, { "cell_type": "markdown", "source": [ "The selected code is performing data standardization, which is a common preprocessing step in many machine learning workflows. \n", "\n", "The purpose of standardization is to transform the data such that it has a mean of 0 and a standard deviation of 1. This is done to ensure that all features have the same scale, which is a requirement for many machine learning algorithms.\n", "\n", "The mathematical formulas used in this process are as follows:\n", "\n", "1. Calculate the mean (μ) of the data:\n", "\n", "$$\n", "\\mu = \\frac{1}{n} \\sum_{i=1}^{n} x_i\n", "$$\n", "Where:\n", "- $n$ is the number of observations in the data\n", "- $x_i$ is the value of the $i$-th observation\n", "- $\\sum$ denotes the summation over all observations\n", "\n", "2. Standardize the data by subtracting the mean from each observation and dividing by the standard deviation:\n", "\n", "$$\n", "\\text{Data}_i = \\frac{x_i - \\mu}{\\sigma}\n", "$$\n", "Where:\n", "- $\\text{Data}_i$ is the standardized value of the $i$-th observation\n", "- $\\sigma$ is the standard deviation of the data\n", "- $x_i$ is the value of the $i$-th observation\n", "- $\\mu$ is the mean of the data\n", "\n", "The `StandardScaler` class from the `sklearn.preprocessing` module is used to perform this standardization. The `fit_transform` method is used to calculate the mean and standard deviation of the data and then perform the standardization.\n", "\n", "**Note:** By setting the explained variance to 0.95, we are saying that we want to choose the smallest number of principal components such that 95% of the variance in the original data is retained. This means that the transformed data will retain 95% of the information of the original data, while potentially having fewer dimensions.\n" ], "metadata": { "collapsed": false }, "id": "b36814c942066d6" }, { "cell_type": "markdown", "source": [ "## Data Mining / Machine Learning\n", "\n", "### I. Supervised Learning\n", "- **Decision**: Supervised learning is used due to the labeled dataset.\n", "- **Algorithm**: Random Forest Classifier is preferred for its performance in classification tasks.\n", "\n", "### II. Training/Test Split Ratio\n", "- **Decision**: 70:30 split is chosen for training/test dataset.\n", "- **Reasoning**: This split ensures sufficient data for training and testing.\n", "\n", "### III. Performance Metrics\n", "- **Classification Accuracy**: Measures the proportion of correctly classified instances.\n", "- **Confusion Matrix**: Provides a summary of predicted and actual classes.\n", "- **Classification Report**: Provides detailed metrics such as precision, recall, F1-score, and support for each class.\n", "\n", "The Random Forest Classifier is trained on the training set and evaluated on the test set using accuracy and classification report metrics.\n" ], "metadata": { "collapsed": false }, "id": "8fefd253728ea2f0" }, { "cell_type": "markdown", "source": [ "# Split the data into training and testing sets\n", "\n", "The next step is to split the data into training and testing sets. This is a common practice in machine learning, where the training set is used to train the model, and the testing set is used to evaluate its performance.\n", "\n", "We will use the `train_test_split` function from the `sklearn.model_selection` module to split the data into training and testing sets. We will use 70% of the data for training and 30% for testing, which is a common split ratio." ], "metadata": { "collapsed": false }, "id": "7d64d6490fa1c2c2" }, { "cell_type": "code", "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "# Split the data into training and test sets\n", "X_train, X_test, y_train, y_test = train_test_split(data, data['NLOS'], test_size=0.2)\n", "\n", "# Load uwb_dataset_part7.csv\n", "# uwb_dataset_part7 = pd.read_csv('./UWB-LOS-NLOS-Data-Set/dataset/uwb_dataset_part7.csv')\n", "\n", "# Clean the data\n", "# uwb_dataset_part7 = clean_data(uwb_dataset_part7)\n", "\n", "# Split the data into features and labels\n", "# X_test = uwb_dataset_part7\n", "# y_test = uwb_dataset_part7['NLOS']\n", "\n", "# print(f\"{y_train}\")\n", "# print(\"Y Test\")\n", "# print(f\"{y_test}\")\n" ], "metadata": { "collapsed": false }, "id": "54d2a6506b584a03" }, { "cell_type": "markdown", "source": [ "# Train a Random Forest Classifier\n", "\n", "The next step is to train a machine learning model on the training data. We will use the `RandomForestClassifier` class from the `sklearn.ensemble` module to train a random forest classifier.\n", "\n", "The random forest classifier is an ensemble learning method that operates by constructing a multitude of decision trees at training time and outputting the class that is the mode of the classes (classification) or mean prediction (regression) of the individual trees.\n", "\n", "We will use the `fit` method of the `RandomForestClassifier` object to train the model on the training data." ], "metadata": { "collapsed": false }, "id": "ab55160e30fd6f99" }, { "cell_type": "code", "outputs": [], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "\n", "# Initialize the classifier with parameters to prevent overfitting\n", "classifier = RandomForestClassifier(n_estimators=200, max_depth=10, min_samples_split=10, min_samples_leaf=5, max_features='sqrt')\n", "\n", "loaded_model = train_and_save_model(classifier, X_train, y_train, 'random_forest_classifier.pkl')\n" ], "metadata": { "collapsed": false }, "id": "dc485f3de9f8936f" }, { "cell_type": "markdown", "source": [ "# Evaluate the Model\n", "\n", "To evaluate the performance of the trained model on the testing data, we will use the `predict` method of the `RandomForestClassifier` object to make predictions on the testing data. We will then use the `accuracy_score` and `classification_report` functions from the `sklearn.metrics` module to calculate the accuracy and generate a classification report.\n", "\n", "- **Accuracy:** The accuracy score function calculates the proportion of correctly classified instances.\n", "\n", "- **Precision:** The ratio of correctly predicted positive observations to the total predicted positive observations. It is calculated as:\n", "\n", " $$\n", " \\text{Precision} = \\frac{\\text{True Positives}}{\\text{True Positives} + \\text{False Positives}}\n", " $$\n", "\n", "- **Recall:** The ratio of correctly predicted positive observations to all observations in the actual class. It is calculated as:\n", "\n", " $$\n", " \\text{Recall} = \\frac{\\text{True Positives}}{\\text{True Positives} + \\text{False Negatives}}\n", " $$\n", "\n", "- **F1 Score:** The weighted average of precision and recall. It is calculated as:\n", "\n", " $$\n", " \\text{F1 Score} = 2 \\times \\frac{\\text{Precision} \\times \\text{Recall}}{\\text{Precision} + \\text{Recall}}\n", " $$\n", "\n", "- **Support:** The number of actual occurrences of the class in the dataset.\n", "\n", "The classification report provides a summary of the precision, recall, F1-score, and support for each class in the testing data, giving insight into how well the model is performing for each class.\n" ], "metadata": { "collapsed": false }, "id": "424cc5954c9e81cc" }, { "cell_type": "code", "outputs": [], "source": [ "from sklearn.metrics import accuracy_score, classification_report\n", "from sklearn.model_selection import cross_val_score\n", "\n", "# Make predictions on the test set using the loaded model\n", "y_pred = loaded_model.predict(X_test)\n", "\n", "# Evaluate the loaded model\n", "accuracy = accuracy_score(y_test, y_pred)\n", "classification_rep = classification_report(y_test, y_pred)\n", "cross_val_score = cross_val_score(loaded_model, X_test, y_test, cv=5)\n", "\n", "print(f\"Accuracy: {accuracy}\")\n", "print(f\"Classification Report:\\n{classification_rep}\")\n", "print(f\"Cross Validation Score: {cross_val_score}\")\n" ], "metadata": { "collapsed": false }, "id": "702b4f40dda16736" }, { "cell_type": "markdown", "source": [ "# Visualize a Decision Tree from the Random Forest\n" ], "metadata": { "collapsed": false }, "id": "41957f9babb74a3" }, { "cell_type": "code", "outputs": [], "source": [ "from sklearn.tree import plot_tree\n", "import matplotlib.pyplot as plt\n", "\n", "# Select one tree from the forest\n", "estimator = loaded_model.estimators_[0]\n", "\n", "plt.figure(figsize=(100, 100))\n", "plot_tree(estimator,\n", " filled=True,\n", " rounded=True,\n", " class_names=['NLOS', 'LOS'],\n", " feature_names=data.columns,\n", " max_depth=5) # Limit the depth of the tree\n", "plt.show()" ], "metadata": { "collapsed": false }, "id": "1f6f826d6234591c" }, { "cell_type": "markdown", "source": [ "# Support Vector Machine (SVM)" ], "metadata": { "collapsed": false }, "id": "eef3be2c3026a909" }, { "cell_type": "code", "outputs": [], "source": [ "# import os\n", "# from sklearn.svm import SVC\n", "# import pickle\n", "# \n", "# svm = SVC(kernel='linear', random_state=42)\n", "# loaded_model = train_and_save_model(svm, X_train, y_train, 'svm_classifier.pkl')\n", "# \n", "# # Predict the labels for the test set with each model\n", "# y_pred_svm = loaded_model.predict(X_test)\n", "# \n", "# # Calculate the accuracy of each model\n", "# accuracy_svm = accuracy_score(y_test, y_pred_svm)\n", "# \n", "# # Print the accuracy of each model\n", "# print(f\"Accuracy of SVM: {accuracy_svm}\")" ], "metadata": { "collapsed": false }, "id": "c970b0c1593d955c" }, { "cell_type": "markdown", "source": [ "# Logistic Regression" ], "metadata": { "collapsed": false }, "id": "cccaf1db0d5060a8" }, { "cell_type": "code", "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression\n", "from sklearn.model_selection import cross_val_score\n", "\n", "# Logistic Regression with L2 regularization\n", "log_reg = LogisticRegression(penalty='l2', C=0.1)\n", "\n", "# Use the train_and_save_model function to train and save the model\n", "loaded_model = train_and_save_model(log_reg, X_train, y_train, 'logistic_regression_model.pkl')" ], "metadata": { "collapsed": false }, "id": "ee7506f4aa805faf" }, { "cell_type": "code", "outputs": [], "source": [ "\n", "# Predict on the test set\n", "y_pred_log_reg = loaded_model.predict(X_test)\n", "\n", "# Calculate accuracy\n", "accuracy_log_reg = accuracy_score(y_test, y_pred_log_reg)\n", "print(f\"Accuracy of Logistic Regression: {accuracy_log_reg}\")\n", "\n", "# Perform 5-fold cross validation\n", "scores = cross_val_score(log_reg, X_train, y_train, cv=5)\n", "print(f\"Cross-validated scores: {scores}\")" ], "metadata": { "collapsed": false }, "id": "a44d38efa4b86d93" }, { "cell_type": "code", "outputs": [], "source": [ "from sklearn.metrics import roc_curve, auc\n", "import matplotlib.pyplot as plt\n", "\n", "# Compute ROC curve and ROC area for each class\n", "fpr, tpr, _ = roc_curve(y_test, y_pred_log_reg)\n", "roc_auc = auc(fpr, tpr)\n", "\n", "plt.figure()\n", "lw = 2\n", "plt.plot(fpr, tpr, color='darkorange',\n", " lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)\n", "plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')\n", "plt.xlim([0.0, 1.0])\n", "plt.ylim([0.0, 1.05])\n", "plt.xlabel('False Positive Rate')\n", "plt.ylabel('True Positive Rate')\n", "plt.title('Receiver Operating Characteristic')\n", "plt.legend(loc=\"lower right\")\n", "plt.show()" ], "metadata": { "collapsed": false }, "id": "a3646a4965b0707c" }, { "cell_type": "markdown", "source": [ "# Gradient Boosting Classifier" ], "metadata": { "collapsed": false }, "id": "aeaf5eeffa7ec104" }, { "cell_type": "code", "outputs": [], "source": [ "from sklearn.ensemble import GradientBoostingClassifier\n", "\n", "# Gradient Boosting Classifier\n", "gbc = GradientBoostingClassifier()\n", "\n", "# Use the train_and_save_model function to train and save the model\n", "loaded_model = train_and_save_model(gbc, X_train, y_train, 'gradient_boosting_classifier.pkl')\n" ], "metadata": { "collapsed": false }, "id": "c7ecae5d021ad44f" }, { "cell_type": "code", "outputs": [], "source": [ "y_pred_gbc = loaded_model.predict(X_test)\n", "accuracy_gbc = accuracy_score(y_test, y_pred_gbc)\n", "print(f\"Accuracy of Gradient Boosting Classifier: {accuracy_gbc}\")\n" ], "metadata": { "collapsed": false }, "id": "4a8a1c3a7289ef7a" }, { "cell_type": "markdown", "source": [ "# K-Nearest Neighbors (KNN, K=15)\n", "\n", "This code block is implementing the K-Nearest Neighbors (KNN) algorithm for classification. The KNN algorithm is a type of instance-based learning, or lazy learning, where the function is only approximated locally and all computation is deferred until function evaluation. \n", "\n", "The KNN algorithm works by finding the distances between a query and all the examples in the data, selecting the specified number examples (K) closest to the query, then votes for the most frequent label (in the case of classification) or averages the labels (in the case of regression). \n", "\n", "The number of neighbors, K, is set to 15 in this case. This means that the algorithm looks at the 15 nearest neighbors to decide the class of the test instance. \n", "\n", "The mathematical concept behind KNN is the Euclidean distance. Given two points P1(x1, y1) and P2(x2, y2) in a 2D space, the Euclidean distance between P1 and P2 is calculated as:\n", "\n", "$$\n", "\\text{Distance} = \\sqrt{(x2 - x1)^2 + (y2 - y1)^2}\n", "$$\n", "In higher dimensional space, the formula is generalized as:\n", "$$\n", "\\text{Distance} = \\sqrt{\\sum_{i=1}^{n} (x_i - y_i)^2}\n", "$$\n", "Where:\n", "- $n$ is the number of dimensions\n", "- $x_i$ and $y_i$ are the $i$-th dimensions of the two points\n" ], "metadata": { "collapsed": false }, "id": "25102568a6e5c457" }, { "cell_type": "code", "outputs": [], "source": [ "from sklearn.neighbors import KNeighborsClassifier\n", "\n", "# K-Nearest Neighbors\n", "knn = KNeighborsClassifier(n_neighbors=11)\n", "loaded_model = train_and_save_model(knn, X_train, y_train, 'knn_classifier.pkl')\n" ], "metadata": { "collapsed": false }, "id": "705c62e64bf6d614" }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy of K-Nearest Neighbors: 0.8823809523809524\n" ] } ], "source": [ "y_pred_knn = loaded_model.predict(X_test)\n", "accuracy_knn = accuracy_score(y_test, y_pred_knn)\n", "print(f\"Accuracy of K-Nearest Neighbors: {accuracy_knn}\")" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-11T09:35:15.226753Z", "start_time": "2024-03-11T09:35:14.516824Z" } }, "id": "cf4df4ef7bbfd74", "execution_count": 30 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.8775595238095238\n", "{'n_neighbors': 11}\n" ] } ], "source": [ "from sklearn.model_selection import GridSearchCV\n", "\n", "# Define the parameter values that should be searched\n", "k_range = list(range(1, 31))\n", "\n", "# Create a parameter grid: map the parameter names to the values that should be searched\n", "param_grid = dict(n_neighbors=k_range)\n", "\n", "# Instantiate the grid\n", "grid = GridSearchCV(knn, param_grid, cv=10, scoring='accuracy')\n", "\n", "# Fit the grid with data\n", "grid.fit(X_train, y_train)\n", "\n", "# View the complete results\n", "grid.cv_results_\n", "\n", "# Examine the best model\n", "print(grid.best_score_)\n", "print(grid.best_params_)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-11T09:36:32.996298Z", "start_time": "2024-03-11T09:35:15.228121Z" } }, "id": "faabcf63e34005a9", "execution_count": 31 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Original number of features: 48, reduced number of features: 2\n" ] }, { "data": { "text/plain": "
", "image/png": "" }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", "# Apply PCA to reduce dimensionality to 2D\n", "pca = PCA(n_components=2)\n", "X_test_2d = pca.fit_transform(X_test)\n", "\n", "# Print the number of features\n", "print(f\"Original number of features: {X_test.shape[1]}, reduced number of features: {X_test_2d.shape[1]}\")\n", "\n", "# Create a scatter plot\n", "plt.figure(figsize=(10, 7))\n", "\n", "# Create a color map\n", "cmap = plt.cm.viridis\n", "\n", "# Plot NLOS points\n", "nlos = plt.scatter(X_test_2d[y_pred_knn == 1, 0], X_test_2d[y_pred_knn == 1, 1], c='blue', label='NLOS')\n", "\n", "# Plot LOS points\n", "los = plt.scatter(X_test_2d[y_pred_knn == 0, 0], X_test_2d[y_pred_knn == 0, 1], c='red', label='LOS')\n", "\n", "# Add labels\n", "plt.xlabel('Principal Component 1')\n", "plt.ylabel('Principal Component 2')\n", "plt.title('2D Scatter Plot for LOS and NLOS')\n", "\n", "# Add a legend\n", "plt.legend(handles=[nlos, los])\n", "\n", "plt.show()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-11T09:36:33.464205Z", "start_time": "2024-03-11T09:36:32.997412Z" } }, "id": "2ed22b3fc59f74e6", "execution_count": 32 }, { "cell_type": "markdown", "source": [ "# Naive Bayes" ], "metadata": { "collapsed": false }, "id": "5b9b66f92968957c" }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training the model and saving it to ./models/naive_bayes_classifier.pkl\n" ] } ], "source": [ "from sklearn.naive_bayes import GaussianNB\n", "\n", "# Naive Bayes\n", "nb = GaussianNB()\n", "loaded_model = train_and_save_model(nb, X_train, y_train, 'naive_bayes_classifier.pkl')" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-11T09:36:33.504010Z", "start_time": "2024-03-11T09:36:33.465404Z" } }, "id": "3d984228fb1d3026", "execution_count": 33 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy of Naive Bayes: 1.0\n" ] } ], "source": [ "y_pred_nb = loaded_model.predict(X_test)\n", "accuracy_nb = accuracy_score(y_test, y_pred_nb)\n", "print(f\"Accuracy of Naive Bayes: {accuracy_nb}\")" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-11T09:36:33.517348Z", "start_time": "2024-03-11T09:36:33.505627Z" } }, "id": "98cd350871bc3201", "execution_count": 34 }, { "cell_type": "markdown", "source": [ "# K-Means Clustering" ], "metadata": { "collapsed": false }, "id": "92c8498137a5e32e" }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training the model and saving it to ./models/kmeans_clustering.pkl\n" ] } ], "source": [ "from sklearn.cluster import KMeans\n", "\n", "# K-Means Clustering\n", "kmeans = KMeans(n_clusters=2, max_iter=600)\n", "loaded_model = train_and_save_model(kmeans, X_train, y_train, 'kmeans_clustering.pkl')" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-11T09:36:33.715869Z", "start_time": "2024-03-11T09:36:33.518445Z" } }, "id": "305a796294814705", "execution_count": 35 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy of K-Means Clustering: 0.7902380952380952\n" ] } ], "source": [ "y_pred_kmeans = loaded_model.predict(X_test)\n", "accuracy_kmeans = accuracy_score(y_test, y_pred_kmeans)\n", "print(f\"Accuracy of K-Means Clustering: {accuracy_kmeans}\")\n" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-11T09:36:33.726224Z", "start_time": "2024-03-11T09:36:33.717138Z" } }, "id": "494bb537046bf5a7", "execution_count": 36 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Data table with cluster labels:\n", " NLOS RANGE FP_IDX MAX_NOISE FRAME_LEN PREAM_LEN CIR1015 \\\n", "20604 1.0 -1.091501 -0.145210 0.221321 0.874210 -0.196211 1.136854 \n", "3563 1.0 -1.146681 0.298744 0.916686 -1.143918 -0.196211 -0.618518 \n", "5721 0.0 0.135182 0.742698 -0.889546 0.874210 -0.196211 1.136854 \n", "4229 0.0 -0.696755 0.076767 1.242906 0.874210 -0.196211 1.136854 \n", "12800 1.0 -0.908984 0.964675 -0.731587 -1.143918 -0.196211 -0.618518 \n", "... ... ... ... ... ... ... ... \n", "33593 NaN NaN NaN NaN NaN NaN NaN \n", "33594 NaN NaN NaN NaN NaN NaN NaN \n", "33596 NaN NaN NaN NaN NaN NaN NaN \n", "33598 NaN NaN NaN NaN NaN NaN NaN \n", "33599 NaN NaN NaN NaN NaN NaN NaN \n", "\n", " First_Path_Power_Level SNR PC1 ... PC31 PC32 \\\n", "20604 0.834381 -0.598329 -1.844408 ... 0.293678 -0.661449 \n", "3563 1.017452 0.104326 -0.529890 ... -0.025231 0.132386 \n", "5721 0.764416 0.496930 0.549152 ... -0.725225 -0.125061 \n", "4229 1.133312 0.140279 -0.624938 ... -0.378297 -0.497954 \n", "12800 0.108009 0.404357 0.589113 ... -0.237710 -0.273787 \n", "... ... ... ... ... ... ... \n", "33593 NaN NaN NaN ... NaN NaN \n", "33594 NaN NaN NaN ... NaN NaN \n", "33596 NaN NaN NaN ... NaN NaN \n", "33598 NaN NaN NaN ... NaN NaN \n", "33599 NaN NaN NaN ... NaN NaN \n", "\n", " PC33 PC34 PC35 PC36 PC37 PC38 PC39 \\\n", "20604 0.406192 0.268709 0.329607 -0.169345 0.432609 0.242939 -0.501317 \n", "3563 -0.587800 -0.257166 -0.431297 -0.725000 -0.485302 -0.010506 0.128235 \n", "5721 -0.187973 -1.005691 0.543882 0.963563 -0.028384 -0.270283 0.746807 \n", "4229 -0.473187 -0.578283 -0.260293 0.001544 0.735248 0.056007 -0.402481 \n", "12800 -0.028760 0.268048 -0.099191 -1.102713 0.678101 -0.348046 0.819805 \n", "... ... ... ... ... ... ... ... \n", "33593 NaN NaN NaN NaN NaN NaN NaN \n", "33594 NaN NaN NaN NaN NaN NaN NaN \n", "33596 NaN NaN NaN NaN NaN NaN NaN \n", "33598 NaN NaN NaN NaN NaN NaN NaN \n", "33599 NaN NaN NaN NaN NaN NaN NaN \n", "\n", " Cluster \n", "20604 0.0 \n", "3563 0.0 \n", "5721 0.0 \n", "4229 0.0 \n", "12800 1.0 \n", "... ... \n", "33593 1.0 \n", "33594 0.0 \n", "33596 1.0 \n", "33598 0.0 \n", "33599 1.0 \n", "\n", "[35278 rows x 49 columns]\n" ] } ], "source": [ "labels = loaded_model.labels_\n", "# Print the data table with the cluster labels\n", "print(f\"Data table with cluster labels:\\n{pd.concat([X_test, pd.DataFrame({'Cluster': labels})], axis=1)}\")\n", "\n" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-11T09:36:33.751015Z", "start_time": "2024-03-11T09:36:33.727429Z" } }, "id": "62401c8d1a4d61cc", "execution_count": 37 }, { "cell_type": "markdown", "source": [ "# Neural Network" ], "metadata": { "collapsed": false }, "id": "862a9b7ee430a667" }, { "cell_type": "code", "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-03-11 17:36:34.132308: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2024-03-11 17:36:34.191657: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n", "2024-03-11 17:36:34.356171: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2024-03-11 17:36:34.356217: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2024-03-11 17:36:34.367283: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", "2024-03-11 17:36:34.394553: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n", "2024-03-11 17:36:34.395227: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2024-03-11 17:36:35.762774: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/100\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2024-03-11 17:36:37.442250: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", "2024-03-11 17:36:37.442821: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.\n", "Skipping registering GPU devices...\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "1050/1050 [==============================] - 3s 2ms/step - loss: 0.3589 - accuracy: 0.8329 - val_loss: 0.0501 - val_accuracy: 0.9894\n", "Epoch 2/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 0.0506 - accuracy: 0.9843 - val_loss: 0.0018 - val_accuracy: 0.9995\n", "Epoch 3/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 0.0113 - accuracy: 0.9970 - val_loss: 1.6444e-04 - val_accuracy: 1.0000\n", "Epoch 4/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 0.0056 - accuracy: 0.9990 - val_loss: 1.6097e-05 - val_accuracy: 1.0000\n", "Epoch 5/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 0.0044 - accuracy: 0.9989 - val_loss: 4.0286e-06 - val_accuracy: 1.0000\n", "Epoch 6/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 0.0016 - accuracy: 0.9997 - val_loss: 7.1377e-07 - val_accuracy: 1.0000\n", "Epoch 7/100\n", "1050/1050 [==============================] - 2s 2ms/step - loss: 0.0015 - accuracy: 0.9997 - val_loss: 9.0846e-07 - val_accuracy: 1.0000\n", "Epoch 8/100\n", "1050/1050 [==============================] - 3s 3ms/step - loss: 9.9385e-04 - accuracy: 0.9998 - val_loss: 1.8082e-07 - val_accuracy: 1.0000\n", "Epoch 9/100\n", "1050/1050 [==============================] - 3s 3ms/step - loss: 0.0019 - accuracy: 0.9998 - val_loss: 3.0069e-08 - val_accuracy: 1.0000\n", "Epoch 10/100\n", "1050/1050 [==============================] - 3s 3ms/step - loss: 0.0011 - accuracy: 0.9998 - val_loss: 2.4598e-04 - val_accuracy: 0.9999\n", "Epoch 11/100\n", "1050/1050 [==============================] - 3s 3ms/step - loss: 0.0021 - accuracy: 0.9995 - val_loss: 2.6330e-08 - val_accuracy: 1.0000\n", "Epoch 12/100\n", "1050/1050 [==============================] - 4s 3ms/step - loss: 0.0018 - accuracy: 0.9999 - val_loss: 2.6572e-08 - val_accuracy: 1.0000\n", "Epoch 13/100\n", "1050/1050 [==============================] - 3s 3ms/step - loss: 6.9760e-04 - accuracy: 0.9998 - val_loss: 5.6330e-09 - val_accuracy: 1.0000\n", "Epoch 14/100\n", "1050/1050 [==============================] - 3s 3ms/step - loss: 7.4783e-04 - accuracy: 0.9998 - val_loss: 7.5862e-09 - val_accuracy: 1.0000\n", "Epoch 15/100\n", "1050/1050 [==============================] - 4s 3ms/step - loss: 7.9774e-04 - accuracy: 0.9998 - val_loss: 1.3675e-08 - val_accuracy: 1.0000\n", "Epoch 16/100\n", "1050/1050 [==============================] - 4s 4ms/step - loss: 5.4841e-04 - accuracy: 0.9998 - val_loss: 1.2942e-09 - val_accuracy: 1.0000\n", "Epoch 17/100\n", "1050/1050 [==============================] - 3s 3ms/step - loss: 0.0014 - accuracy: 0.9997 - val_loss: 6.1260e-10 - val_accuracy: 1.0000\n", "Epoch 18/100\n", "1050/1050 [==============================] - 4s 4ms/step - loss: 0.0018 - accuracy: 0.9999 - val_loss: 2.6419e-09 - val_accuracy: 1.0000\n", "Epoch 19/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 8.0635e-04 - accuracy: 0.9999 - val_loss: 1.2913e-09 - val_accuracy: 1.0000\n", "Epoch 20/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 2.6192e-04 - accuracy: 1.0000 - val_loss: 8.3605e-10 - val_accuracy: 1.0000\n", "Epoch 21/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 5.2448e-04 - accuracy: 0.9998 - val_loss: 1.1165e-09 - val_accuracy: 1.0000\n", "Epoch 22/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 9.2879e-04 - accuracy: 0.9999 - val_loss: 3.9623e-10 - val_accuracy: 1.0000\n", "Epoch 23/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 3.7082e-04 - accuracy: 0.9999 - val_loss: 2.9649e-10 - val_accuracy: 1.0000\n", "Epoch 24/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 2.6710e-04 - accuracy: 0.9999 - val_loss: 3.5300e-09 - val_accuracy: 1.0000\n", "Epoch 25/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 5.3370e-04 - accuracy: 0.9999 - val_loss: 2.5315e-10 - val_accuracy: 1.0000\n", "Epoch 26/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 7.1824e-04 - accuracy: 0.9999 - val_loss: 4.3657e-10 - val_accuracy: 1.0000\n", "Epoch 27/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 1.8679e-04 - accuracy: 0.9999 - val_loss: 1.7553e-10 - val_accuracy: 1.0000\n", "Epoch 28/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 5.2067e-04 - accuracy: 0.9999 - val_loss: 1.3316e-09 - val_accuracy: 1.0000\n", "Epoch 29/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 2.6381e-04 - accuracy: 0.9999 - val_loss: 5.6867e-10 - val_accuracy: 1.0000\n", "Epoch 30/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 4.4551e-04 - accuracy: 0.9999 - val_loss: 9.0114e-10 - val_accuracy: 1.0000\n", "Epoch 31/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 6.7519e-04 - accuracy: 0.9999 - val_loss: 1.7674e-09 - val_accuracy: 1.0000\n", "Epoch 32/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 8.1789e-04 - accuracy: 0.9999 - val_loss: 4.1441e-10 - val_accuracy: 1.0000\n", "Epoch 33/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 4.6309e-04 - accuracy: 0.9999 - val_loss: 4.5561e-10 - val_accuracy: 1.0000\n", "Epoch 34/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 2.9113e-04 - accuracy: 0.9999 - val_loss: 1.0894e-10 - val_accuracy: 1.0000\n", "Epoch 35/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 4.8002e-04 - accuracy: 0.9999 - val_loss: 9.3475e-11 - val_accuracy: 1.0000\n", "Epoch 36/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 4.2280e-04 - accuracy: 0.9999 - val_loss: 4.3340e-10 - val_accuracy: 1.0000\n", "Epoch 37/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 9.3696e-04 - accuracy: 0.9998 - val_loss: 2.6858e-10 - val_accuracy: 1.0000\n", "Epoch 38/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 2.6842e-04 - accuracy: 1.0000 - val_loss: 1.8001e-10 - val_accuracy: 1.0000\n", "Epoch 39/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 3.5864e-04 - accuracy: 0.9999 - val_loss: 1.1462e-10 - val_accuracy: 1.0000\n", "Epoch 40/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 3.7118e-04 - accuracy: 0.9999 - val_loss: 4.5263e-11 - val_accuracy: 1.0000\n", "Epoch 41/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 4.3556e-04 - accuracy: 0.9999 - val_loss: 4.1749e-12 - val_accuracy: 1.0000\n", "Epoch 42/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 3.2305e-04 - accuracy: 0.9999 - val_loss: 5.6594e-12 - val_accuracy: 1.0000\n", "Epoch 43/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 2.9655e-04 - accuracy: 0.9999 - val_loss: 8.7916e-11 - val_accuracy: 1.0000\n", "Epoch 44/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 3.4267e-04 - accuracy: 0.9999 - val_loss: 3.0908e-10 - val_accuracy: 1.0000\n", "Epoch 45/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 5.3887e-05 - accuracy: 1.0000 - val_loss: 4.7371e-11 - val_accuracy: 1.0000\n", "Epoch 46/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 1.2410e-04 - accuracy: 1.0000 - val_loss: 8.3557e-11 - val_accuracy: 1.0000\n", "Epoch 47/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 3.3613e-04 - accuracy: 1.0000 - val_loss: 4.1424e-11 - val_accuracy: 1.0000\n", "Epoch 48/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 1.0253e-04 - accuracy: 1.0000 - val_loss: 2.1587e-11 - val_accuracy: 1.0000\n", "Epoch 49/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 1.7809e-04 - accuracy: 0.9999 - val_loss: 3.7521e-11 - val_accuracy: 1.0000\n", "Epoch 50/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 2.6325e-04 - accuracy: 0.9999 - val_loss: 1.2730e-11 - val_accuracy: 1.0000\n", "Epoch 51/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 8.8403e-05 - accuracy: 1.0000 - val_loss: 1.6883e-11 - val_accuracy: 1.0000\n", "Epoch 52/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 1.2979e-04 - accuracy: 1.0000 - val_loss: 2.6181e-12 - val_accuracy: 1.0000\n", "Epoch 53/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 2.7255e-04 - accuracy: 0.9999 - val_loss: 3.3547e-14 - val_accuracy: 1.0000\n", "Epoch 54/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 4.0768e-04 - accuracy: 0.9999 - val_loss: 1.9424e-14 - val_accuracy: 1.0000\n", "Epoch 55/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 2.9720e-04 - accuracy: 0.9999 - val_loss: 5.9644e-14 - val_accuracy: 1.0000\n", "Epoch 56/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 3.8690e-05 - accuracy: 1.0000 - val_loss: 5.4575e-13 - val_accuracy: 1.0000\n", "Epoch 57/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 5.1015e-05 - accuracy: 1.0000 - val_loss: 2.7099e-13 - val_accuracy: 1.0000\n", "Epoch 58/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 7.5753e-05 - accuracy: 1.0000 - val_loss: 1.1129e-14 - val_accuracy: 1.0000\n", "Epoch 59/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 4.9979e-04 - accuracy: 1.0000 - val_loss: 1.0860e-14 - val_accuracy: 1.0000\n", "Epoch 60/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 7.2751e-04 - accuracy: 0.9999 - val_loss: 1.0344e-10 - val_accuracy: 1.0000\n", "Epoch 61/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 2.4999e-04 - accuracy: 0.9999 - val_loss: 3.8716e-13 - val_accuracy: 1.0000\n", "Epoch 62/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 2.1617e-04 - accuracy: 0.9999 - val_loss: 9.1705e-14 - val_accuracy: 1.0000\n", "Epoch 63/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 5.4722e-04 - accuracy: 0.9999 - val_loss: 1.8718e-13 - val_accuracy: 1.0000\n", "Epoch 64/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 0.0013 - accuracy: 0.9999 - val_loss: 3.2839e-12 - val_accuracy: 1.0000\n", "Epoch 65/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 3.5692e-04 - accuracy: 0.9999 - val_loss: 3.2787e-12 - val_accuracy: 1.0000\n", "Epoch 66/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 2.1968e-04 - accuracy: 1.0000 - val_loss: 1.0670e-11 - val_accuracy: 1.0000\n", "Epoch 67/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 2.5617e-04 - accuracy: 0.9999 - val_loss: 3.8569e-12 - val_accuracy: 1.0000\n", "Epoch 68/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 1.8893e-04 - accuracy: 1.0000 - val_loss: 6.3491e-12 - val_accuracy: 1.0000\n", "Epoch 69/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 8.1594e-05 - accuracy: 1.0000 - val_loss: 2.9203e-12 - val_accuracy: 1.0000\n", "Epoch 70/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 7.6071e-04 - accuracy: 0.9999 - val_loss: 1.9254e-13 - val_accuracy: 1.0000\n", "Epoch 71/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 1.2615e-04 - accuracy: 1.0000 - val_loss: 4.7641e-13 - val_accuracy: 1.0000\n", "Epoch 72/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 2.1528e-04 - accuracy: 1.0000 - val_loss: 5.8274e-13 - val_accuracy: 1.0000\n", "Epoch 73/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 1.6981e-04 - accuracy: 1.0000 - val_loss: 2.4507e-13 - val_accuracy: 1.0000\n", "Epoch 74/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 6.5145e-04 - accuracy: 0.9999 - val_loss: 1.1665e-12 - val_accuracy: 1.0000\n", "Epoch 75/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 3.1496e-05 - accuracy: 1.0000 - val_loss: 4.0618e-11 - val_accuracy: 1.0000\n", "Epoch 76/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 2.2472e-04 - accuracy: 0.9999 - val_loss: 2.1636e-11 - val_accuracy: 1.0000\n", "Epoch 77/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 4.5909e-04 - accuracy: 0.9999 - val_loss: 8.5723e-13 - val_accuracy: 1.0000\n", "Epoch 78/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 4.1828e-04 - accuracy: 0.9999 - val_loss: 1.0288e-13 - val_accuracy: 1.0000\n", "Epoch 79/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 2.5019e-04 - accuracy: 0.9999 - val_loss: 1.9727e-13 - val_accuracy: 1.0000\n", "Epoch 80/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 0.0014 - accuracy: 0.9999 - val_loss: 2.4567e-13 - val_accuracy: 1.0000\n", "Epoch 81/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 2.6808e-04 - accuracy: 0.9999 - val_loss: 1.4976e-13 - val_accuracy: 1.0000\n", "Epoch 82/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 4.3370e-05 - accuracy: 1.0000 - val_loss: 1.2957e-13 - val_accuracy: 1.0000\n", "Epoch 83/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 1.3340e-04 - accuracy: 1.0000 - val_loss: 1.7993e-13 - val_accuracy: 1.0000\n", "Epoch 84/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 0.0012 - accuracy: 0.9999 - val_loss: 6.4421e-13 - val_accuracy: 1.0000\n", "Epoch 85/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 8.9951e-05 - accuracy: 1.0000 - val_loss: 1.0772e-13 - val_accuracy: 1.0000\n", "Epoch 86/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 2.1728e-05 - accuracy: 1.0000 - val_loss: 6.3775e-14 - val_accuracy: 1.0000\n", "Epoch 87/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 1.8351e-04 - accuracy: 0.9999 - val_loss: 2.3062e-13 - val_accuracy: 1.0000\n", "Epoch 88/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 2.5166e-05 - accuracy: 1.0000 - val_loss: 1.6359e-13 - val_accuracy: 1.0000\n", "Epoch 89/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 4.7680e-05 - accuracy: 1.0000 - val_loss: 8.0235e-14 - val_accuracy: 1.0000\n", "Epoch 90/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 8.1901e-05 - accuracy: 1.0000 - val_loss: 1.0808e-14 - val_accuracy: 1.0000\n", "Epoch 91/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 1.4072e-04 - accuracy: 1.0000 - val_loss: 2.9378e-14 - val_accuracy: 1.0000\n", "Epoch 92/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 4.4237e-04 - accuracy: 0.9999 - val_loss: 2.1903e-13 - val_accuracy: 1.0000\n", "Epoch 93/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 4.1000e-04 - accuracy: 1.0000 - val_loss: 7.1069e-13 - val_accuracy: 1.0000\n", "Epoch 94/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 1.4392e-04 - accuracy: 0.9999 - val_loss: 6.6802e-13 - val_accuracy: 1.0000\n", "Epoch 95/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 9.6308e-04 - accuracy: 0.9999 - val_loss: 2.2745e-13 - val_accuracy: 1.0000\n", "Epoch 96/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 3.4975e-05 - accuracy: 1.0000 - val_loss: 3.1772e-14 - val_accuracy: 1.0000\n", "Epoch 97/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 1.2595e-04 - accuracy: 1.0000 - val_loss: 1.1878e-13 - val_accuracy: 1.0000\n", "Epoch 98/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 1.8944e-04 - accuracy: 0.9999 - val_loss: 1.3093e-13 - val_accuracy: 1.0000\n", "Epoch 99/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 2.1216e-05 - accuracy: 1.0000 - val_loss: 5.9066e-14 - val_accuracy: 1.0000\n", "Epoch 100/100\n", "1050/1050 [==============================] - 1s 1ms/step - loss: 3.3804e-05 - accuracy: 1.0000 - val_loss: 4.5820e-14 - val_accuracy: 1.0000\n", "263/263 [==============================] - 0s 853us/step\n", "Accuracy: 1.0\n" ] } ], "source": [ "# Neural Network\n", "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import Dense\n", "from sklearn.metrics import accuracy_score\n", "from tensorflow.keras.layers import Dropout\n", "\n", "# Define the model\n", "model = Sequential()\n", "model.add(Dense(32, input_dim=X_train.shape[1], activation='relu')) # Input layer\n", "model.add(Dropout(0.5)) # Dropout layer\n", "model.add(Dense(32, activation='relu')) # Hidden layer\n", "model.add(Dropout(0.5)) # Dropout layer\n", "model.add(Dense(1, activation='sigmoid')) # Output layer\n", "\n", "# Compile the model\n", "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n", "\n", "# Train the model\n", "model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))\n", "\n", "# Make predictions on the test set\n", "y_pred_prob = model.predict(X_test)\n", "y_pred = (y_pred_prob > 0.5).astype(\"int32\")\n", "\n", "# Calculate the accuracy of the model\n", "accuracy = accuracy_score(y_test, y_pred)\n", "\n", "print(f\"Accuracy: {accuracy}\")" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-11T09:39:19.675444Z", "start_time": "2024-03-11T09:36:33.752081Z" } }, "id": "7b2464a3243d2114", "execution_count": 38 } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }