import numpy as np

# Set the random seed for reproducibility
np.random.seed(2023)

# Define the number of data points
num_points = 500

# Define the true slope and intercept
true_slope = 2.5
true_intercept = -1.0

# Generate random x values between 0 and 10
x = np.random.uniform(low=0, high=10, size=num_points)

# Generate random noise from a normal distribution
noise = np.random.normal(loc=0, scale=2, size=num_points)

# Generate the corresponding y values using the linear equation
y = true_slope * x + true_intercept + noise


# Filter out positive y values
positive_indices = y > 0
x_positive = x[positive_indices]
y_positive = y[positive_indices]


import matplotlib.pyplot as plt

# Plot the regression line
plt.scatter(x_positive, y_positive,linewidth=0.1, label='Train data')

plt.xlabel('x_train')
plt.ylabel('y_train')

# Add a legend
plt.legend()

# Display the plot
plt.show()


from sklearn.linear_model import LinearRegression

# Reshape the x values to a 2D array
X_positive = x_positive.reshape(-1, 1)

# Create an instance of the LinearRegression model
model = LinearRegression()

# Fit the model to the data
model.fit(X_positive, y_positive)

# Print the slope and intercept of the fitted line
print("Slope:", model.coef_[0])
print("Intercept:", model.intercept_)

Slope: 2.486491175477071
Intercept: -0.7950527569904953


# Generate random x values for the test set
x_test = np.random.uniform(low=0, high=10, size=num_points)


# Reshape the x_test values to a 2D array
X_test = x_test.reshape(-1, 1)

# Use the trained model to make predictions on the test set
y_pred = model.predict(X_test)


condition = y_pred < 0  # condition for selecting negative values


print("Negative predicted values:", y_pred [condition])

Negative predicted values: [-0.05614822 -0.08037182 -0.48316137 -0.49445412 -0.6621156  -0.37483847
 -0.6515161  -0.66494108 -0.57789142 -0.04041215 -0.41109205 -0.26584649
 -0.34848786 -0.78734712 -0.37189036]


# Plot the predictions
plt.scatter(x_test, y_pred, marker='.',label='Test data')

# Color specific points differently
plt.scatter(x_test[condition], y_pred[condition], marker='X', color='red', label='Negative values')

plt.xlabel('x_test')
plt.ylabel('y_predict')

# Add a legend
plt.legend()

# Display the plot
plt.show()