# import necessary libraries
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

# load the dataset
cancer = load_breast_cancer()

# convert the data to a Pandas dataframe
df = pd.DataFrame(cancer.data, columns=cancer.feature_names)

# add the target column to the dataframe
df['target'] = data.target

# view the dataset
print(df.head())


# import NumPy library
import numpy as np

# separate features and target variable
X, y = cancer.data, cancer.target

# add missing values to the data
np.random.seed(42)
missing_mask = np.random.rand(*X.shape) < 0.2
X[missing_mask] = np.nan

# scale the data with weird scales
X[:, 0] *= 1000
X[:, 1] *= 100
X[:, 2] *= 10

# print the features and target variable
print(f"First 5 Features: \n{X[:5]}")
print(f"\nFirst 5 Targets: \n{y[:5]}")