import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
# Sample data
data = np.random.normal(0, 1, 1000)
# Statistical metrics
mean = np.mean(data)
variance = np.var(data)
std_dev = np.std(data)
skewness = stats.skew(data)
kurtosis = stats.kurtosis(data)
data_min = np.min(data)
data_max = np.max(data)
# Confidence interval (95%)
confidence = stats.norm.interval(0.95, loc=mean, scale=std_dev/np.sqrt(len(data)))
# Print metrics
print(f'Mean: {mean}')
print(f'Variance: {variance}')
print(f'Standard Deviation: {std_dev}')
print(f'Skewness: {skewness}')
print(f'Kurtosis: {kurtosis}')
print(f'Minimum: {data_min}')
print(f'Maximum: {data_max}')
print(f'95% Confidence Interval: {confidence}')
# Plotting
plt.hist(data, bins=30, alpha=0.6, color='g', edgecolor='black')
plt.axvline(mean, color='r', linestyle='dashed', linewidth=2, label=f'Mean: {mean:.2f}')
plt.axvline(data_min, color='blue', linestyle='dashed', linewidth=2, label=f'Min: {data_min:.2f}')
plt.axvline(data_max, color='purple', linestyle='dashed', linewidth=2, label=f'Max: {data_max:.2f}')
plt.axvline(confidence[0], color='orange', linestyle='dashed', linewidth=2, label=f'95% CI: {confidence[0]:.2f}')
plt.axvline(confidence[1], color='orange', linestyle='dashed', linewidth=2)
plt.legend()
plt.title('Data Distribution with Statistical Metrics')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.show()
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
# Study hours data (example dataset)
study_hours = np.array([2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 15, 16, 18, 20])
# Statistical metrics
mean = np.mean(study_hours)
variance = np.var(study_hours)
std_dev = np.std(study_hours)
skewness = stats.skew(study_hours)
kurtosis = stats.kurtosis(study_hours)
mode = stats.mode(study_hours)[0] # Access mode directly
data_min = np.min(study_hours)
q1 = np.percentile(study_hours, 25)
median = np.median(study_hours)
q3 = np.percentile(study_hours, 75)
data_max = np.max(study_hours)
iqr = q3 - q1 # Interquartile range
# Standardization
standardized_data = (study_hours - mean) / std_dev
# Print statistical summary
print(f'Mean: {mean}')
print(f'Variance: {variance}')
print(f'Standard Deviation: {std_dev}')
print(f'Skewness: {skewness}')
print(f'Kurtosis: {kurtosis}')
print(f'Minimum: {data_min}')
print(f'1st Quartile: {q1}')
print(f'Median: {median}')
print(f'3rd Quartile: {q3}')
print(f'Maximum: {data_max}')
print(f'Interquartile Range: {iqr}')
# Boxplot
plt.figure(figsize=(10, 6))
plt.boxplot(study_hours, vert=False)
plt.title('Boxplot of Study Hours')
plt.xlabel('Study Hours')
plt.show()
# Plot of standardized data
plt.figure(figsize=(10, 6))
sns.kdeplot(standardized_data, color='blue', label='Standardized Data', fill=True)
plt.axvline(0, color='red', linestyle='dashed', linewidth=2, label='Mean (Standardized)')
plt.title('Density Plot of Standardized Study Hours')
plt.xlabel('Standardized Value')
plt.legend()
plt.show()
import numpy as np
import matplotlib.pyplot as plt
from skimage.transform import radon
from skimage.data import shepp_logan_phantom
# Create a sample phantom image (Shepp-Logan phantom used in tomography)
image = shepp_logan_phantom()
# Define the angles for the tomographic lines (projection angles)
theta = np.linspace(0., 180., max(image.shape), endpoint=False)
# Apply the Radon transform (tomography)
sinogram = radon(image, theta=theta, circle=True)
# Plot the original image and the Radon transform (tomographic lines)
plt.figure(figsize=(10, 8))
# Original image (phantom)
plt.subplot(1, 2, 1)
plt.imshow(image, cmap='gray')
plt.title('Original Image (Phantom)')
plt.axis('off')
# Radon transform (Sinogram)
plt.subplot(1, 2, 2)
plt.imshow(sinogram, cmap='gray', aspect='auto', extent=(0, 180, 0, sinogram.shape[0]))
plt.title('Radon Transform (Sinogram)')
plt.xlabel('Projection Angle (degrees)')
plt.ylabel('Projection Position')
plt.tight_layout()
plt.show()
import numpy as np
import statsmodels.api as sm
import pandas as pd
import matplotlib.pyplot as plt
# Sample data (X: independent variable, Y: dependent variable)
X = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30])
Y = np.array([45, 47, 49, 52, 55, 57, 59, 62, 64, 67, 70, 72, 75, 77, 80, 82, 85, 87, 90, 92, 95, 97, 100, 102, 105, 107, 110, 112, 115, 118])
# Add a constant to the independent variable (for intercept)
X = sm.add_constant(X)
# Fit the regression model
model = sm.OLS(Y, X)
results = model.fit()
# Print regression statistics
print(f'Multiple Correlation R: {np.sqrt(results.rsquared)}')
print(f'R^2: {results.rsquared}')
print(f'Adjusted R^2: {results.rsquared_adj}')
print(f'Standard Error: {np.sqrt(results.mse_resid)}')
print(f'F-statistic: {results.fvalue}')
print(f'P-value of F-statistic: {results.f_pvalue}')
print(f'Number of observations: {results.nobs}')
# Confidence intervals (95%)
conf_intervals = results.conf_int(0.05)
print(f'95% Confidence Intervals:\n{conf_intervals}')
# Plotting the regression line with confidence intervals
plt.figure(figsize=(10, 6))
plt.scatter(X[:, 1], Y, label='Observed data', color='blue')
plt.plot(X[:, 1], results.fittedvalues, color='red', label=f'Regression line (R² = {results.rsquared:.2f})')
plt.fill_between(X[:, 1], conf_intervals[0, 0] + conf_intervals[1, 0] * X[:, 1],
conf_intervals[0, 1] + conf_intervals[1, 1] * X[:, 1], color='gray', alpha=0.2, label='95% Confidence Interval')
plt.title('Linear Regression with 95% Confidence Interval')
plt.xlabel('Independent Variable (X)')
plt.ylabel('Dependent Variable (Y)')
plt.legend()
plt.show()
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
# Sample data (X: independent variable, Y: dependent variable)
X = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
Y = np.array([3, 4, 5, 7, 9, 11, 13, 14, 15, 19])
# Add a constant to the independent variable (for intercept)
X = sm.add_constant(X)
# Fit the regression model
model = sm.OLS(Y, X)
results = model.fit()
# Predicted values
Y_pred = results.fittedvalues
# Calculate Sum of Squares
SST = np.sum((Y - np.mean(Y)) ** 2) # Total Sum of Squares
SSR = np.sum((Y_pred - np.mean(Y)) ** 2) # Sum of Squares for Regression
SSE = np.sum((Y - Y_pred) ** 2) # Residual Sum of Squares (Error)
# Calculate R^2 and Adjusted R^2
R_squared = SSR / SST
adjusted_R_squared = 1 - ((1 - R_squared) * (len(Y) - 1) / (len(Y) - X.shape[1]))
# Standard error of the residuals
n = len(Y)
p = X.shape[1] - 1 # number of predictors
residual_std_error = np.sqrt(SSE / (n - p - 1))
# Multiple correlation coefficient
R = np.sqrt(R_squared)
# Print the statistics
print(f"SSR (Sum of Squares for Regression): {SSR}")
print(f"SSE (Residual Sum of Squares): {SSE}")
print(f"SST (Total Sum of Squares): {SST}")
print(f"R^2: {R_squared}")
print(f"Adjusted R^2: {adjusted_R_squared}")
print(f"Residual Standard Error: {residual_std_error}")
print(f"Multiple Correlation Coefficient (R): {R}")
# Plot actual vs predicted
plt.figure(figsize=(10, 6))
plt.scatter(Y, Y_pred, color='blue', label='Predicted vs Actual')
plt.plot([min(Y), max(Y)], [min(Y), max(Y)], color='red', label='Perfect Fit')
plt.title('Actual vs Predicted Values')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.legend()
plt.show()
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm, binom, beta
from scipy.integrate import quad
# Parameters
n = 20 # Number of trials for binomial distribution
p = 0.5 # Probability of success for binomial distribution
a, b = 2, 5 # Parameters for beta distribution
# Normal Distribution
x = np.linspace(-4, 4, 1000)
pdf_normal = norm.pdf(x, loc=0, scale=1)
# Binomial Distribution
k = np.arange(0, n+1)
pmf_binom = binom.pmf(k, n, p)
# Beta Distribution
x_beta = np.linspace(0, 1, 1000)
pdf_beta = beta.pdf(x_beta, a, b)
# Hazard Function and Instantaneous Failure Rate
def hazard_function(t, beta=1.5, eta=10):
return (beta / eta) * (t / eta) ** (beta - 1)
def instantaneous_failure_rate(t, beta=1.5, eta=10):
return hazard_function(t, beta, eta)
# Time values
t = np.linspace(0.1, 30, 500)
hazard = hazard_function(t)
instantaneous_failure = instantaneous_failure_rate(t)
# Plot Normal Distribution
plt.figure(figsize=(14, 10))
plt.subplot(2, 3, 1)
plt.plot(x, pdf_normal, color='blue')
plt.title('Normal Distribution PDF')
plt.xlabel('x')
plt.ylabel('Probability Density')
# Plot Binomial Distribution
plt.subplot(2, 3, 2)
plt.stem(k, pmf_binom, basefmt='C0-', use_line_collection=True)
plt.title('Binomial Distribution PMF')
plt.xlabel('k')
plt.ylabel('Probability')
# Plot Beta Distribution
plt.subplot(2, 3, 3)
plt.plot(x_beta, pdf_beta, color='green')
plt.title('Beta Distribution PDF')
plt.xlabel('x')
plt.ylabel('Probability Density')
# Plot Hazard Function
plt.subplot(2, 3, 4)
plt.plot(t, hazard, color='red')
plt.title('Hazard Function')
plt.xlabel('Time')
plt.ylabel('Hazard Rate')
# Plot Instantaneous Failure Rate
plt.subplot(2, 3, 5)
plt.plot(t, instantaneous_failure, color='purple')
plt.title('Instantaneous Failure Rate')
plt.xlabel('Time')
plt.ylabel('Failure Rate')
# Plot Failure Periods
plt.subplot(2, 3, 6)
plt.plot(t, hazard, label='Early Failure Period', color='orange')
plt.plot(t, np.ones_like(t) * 0.5, label='Random Failure Period', color='cyan')
plt.plot(t, np.exp(-0.1 * t), label='Wear-Out Failure Period', color='magenta')
plt.title('Failure Periods')
plt.xlabel('Time')
plt.ylabel('Failure Rate')
plt.legend()
plt.tight_layout()
plt.show()
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import expon, chi2
from scipy.optimize import newton
# Parameters
mu = 20.6 # Estimated parameter for the exponential distribution
n = 10 # Number of observations
alpha = 0.05 # Significance level for confidence interval
# Generate sample data
np.random.seed(0)
data = np.random.exponential(scale=mu, size=n)
W = np.sum(data) # Sum of the observations
# Calculate the maximum likelihood estimate of mu
mu_hat = W / n
# Variance and standard deviation of the exponential distribution
variance = mu**2
std_dev = mu
# Chi-squared distribution for confidence intervals
df = 2 * n
chi2_upper = chi2.ppf(1 - alpha/2, df)
chi2_lower = chi2.ppf(alpha/2, df)
# Confidence interval for mu
CI_lower = (2 * W) / chi2_upper
CI_upper = (2 * W) / chi2_lower
# Probability density function for exponential distribution
x = np.linspace(0, 2 * mu, 1000)
pdf_exp = expon.pdf(x, scale=mu)
# Plot Exponential Distribution PDF
plt.figure(figsize=(12, 8))
plt.subplot(2, 2, 1)
plt.plot(x, pdf_exp, color='blue')
plt.title('Exponential Distribution PDF')
plt.xlabel('x')
plt.ylabel('Probability Density')
# Plot Confidence Interval
plt.subplot(2, 2, 2)
plt.fill_between([CI_lower, CI_upper], [0, 0], [1, 1], color='yellow', alpha=0.5)
plt.axvline(CI_lower, color='red', linestyle='--', label='Lower CI Bound')
plt.axvline(CI_upper, color='green', linestyle='--', label='Upper CI Bound')
plt.title('Confidence Interval for $\mu$')
plt.xlabel('Value of $\mu$')
plt.ylabel('Density')
plt.legend()
# Plot Histogram of Data
plt.subplot(2, 2, 3)
plt.hist(data, bins=10, density=True, alpha=0.6, color='g', edgecolor='black')
plt.plot(x, pdf_exp, color='blue', lw=2)
plt.title('Histogram of Sample Data with Exponential PDF')
plt.xlabel('x')
plt.ylabel('Density')
# Plot Chi-Squared Distribution for Confidence Interval
plt.subplot(2, 2, 4)
chi2_values = np.linspace(0, 50, 1000)
chi2_pdf = chi2.pdf(chi2_values, df)
plt.plot(chi2_values, chi2_pdf, color='purple')
plt.axvline(chi2_upper, color='red', linestyle='--', label='Upper Bound (Chi-squared)')
plt.axvline(chi2_lower, color='green', linestyle='--', label='Lower Bound (Chi-squared)')
plt.title('Chi-Squared Distribution for Confidence Interval')
plt.xlabel('Value')
plt.ylabel('Probability Density')
plt.legend()
plt.tight_layout()
plt.show()
print(f"Estimated parameter (mu): {mu_hat:.2f}")
print(f"95% Confidence Interval for mu: ({CI_lower:.2f}, {CI_upper:.2f})")
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal, beta
from matplotlib.patches import Ellipse
# 2D Normal Distribution Parameters
mean = [0, 0]
cov = [[1, 0.5], [0.5, 1]] # Covariance matrix
# 2D Beta Distribution Parameters
alpha = 2.0
beta_param = 5.0
# Create a grid of points
x = np.linspace(-4, 4, 100)
y = np.linspace(-4, 4, 100)
X, Y = np.meshgrid(x, y)
pos = np.dstack((X, Y))
# Compute PDF for 2D Normal Distribution
rv = multivariate_normal(mean, cov)
pdf_normal = rv.pdf(pos)
# Compute PDF for 2D Beta Distribution
# Beta distribution is defined on the interval [0,1], so we map the coordinates to [0,1]
X_beta = (X - np.min(X)) / (np.max(X) - np.min(X))
Y_beta = (Y - np.min(Y)) / (np.max(Y) - np.min(Y))
pdf_beta = beta.pdf(X_beta, alpha, beta_param) * beta.pdf(Y_beta, alpha, beta_param)
# Plot 2D Normal Distribution
plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
plt.contourf(X, Y, pdf_normal, cmap='viridis')
plt.colorbar(label='Probability Density')
plt.title('2D Normal Distribution')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
# Plot 2D Beta Distribution
plt.subplot(1, 2, 2)
plt.contourf(X, Y, pdf_beta, cmap='plasma')
plt.colorbar(label='Probability Density')
plt.title('2D Beta Distribution')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.tight_layout()
plt.show()