# import necessary libraries
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
# load the dataset
cancer = load_breast_cancer()
# import NumPy library
import numpy as np
# separate features and target variable
X, y = cancer.data, cancer.target
# add missing values to the data
np.random.seed(42)
missing_mask = np.random.rand(*X.shape) < 0.2
X[missing_mask] = np.nan
# scale the data with weird scales
X[:, 0] *= 1000
X[:, 1] *= 100
X[:, 2] *= 10
# import train_test_split
from sklearn.model_selection import train_test_split
# split the data into training and testing
X_train, X_test, y_train, y_test = train_test_split(
X, y,
test_size = 0.2,
random_state = 42)