0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

データサイエンス統計

Last updated at Posted at 2024-09-19
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

# Sample data
data = np.random.normal(0, 1, 1000)

# Statistical metrics
mean = np.mean(data)
variance = np.var(data)
std_dev = np.std(data)
skewness = stats.skew(data)
kurtosis = stats.kurtosis(data)
data_min = np.min(data)
data_max = np.max(data)

# Confidence interval (95%)
confidence = stats.norm.interval(0.95, loc=mean, scale=std_dev/np.sqrt(len(data)))

# Print metrics
print(f'Mean: {mean}')
print(f'Variance: {variance}')
print(f'Standard Deviation: {std_dev}')
print(f'Skewness: {skewness}')
print(f'Kurtosis: {kurtosis}')
print(f'Minimum: {data_min}')
print(f'Maximum: {data_max}')
print(f'95% Confidence Interval: {confidence}')

# Plotting
plt.hist(data, bins=30, alpha=0.6, color='g', edgecolor='black')
plt.axvline(mean, color='r', linestyle='dashed', linewidth=2, label=f'Mean: {mean:.2f}')
plt.axvline(data_min, color='blue', linestyle='dashed', linewidth=2, label=f'Min: {data_min:.2f}')
plt.axvline(data_max, color='purple', linestyle='dashed', linewidth=2, label=f'Max: {data_max:.2f}')
plt.axvline(confidence[0], color='orange', linestyle='dashed', linewidth=2, label=f'95% CI: {confidence[0]:.2f}')
plt.axvline(confidence[1], color='orange', linestyle='dashed', linewidth=2)
plt.legend()
plt.title('Data Distribution with Statistical Metrics')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.show()

import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns

# Study hours data (example dataset)
study_hours = np.array([2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 15, 16, 18, 20])

# Statistical metrics
mean = np.mean(study_hours)
variance = np.var(study_hours)
std_dev = np.std(study_hours)
skewness = stats.skew(study_hours)
kurtosis = stats.kurtosis(study_hours)
mode = stats.mode(study_hours)[0]  # Access mode directly
data_min = np.min(study_hours)
q1 = np.percentile(study_hours, 25)
median = np.median(study_hours)
q3 = np.percentile(study_hours, 75)
data_max = np.max(study_hours)
iqr = q3 - q1  # Interquartile range

# Standardization
standardized_data = (study_hours - mean) / std_dev

# Print statistical summary
print(f'Mean: {mean}')
print(f'Variance: {variance}')
print(f'Standard Deviation: {std_dev}')
print(f'Skewness: {skewness}')
print(f'Kurtosis: {kurtosis}')

print(f'Minimum: {data_min}')
print(f'1st Quartile: {q1}')
print(f'Median: {median}')
print(f'3rd Quartile: {q3}')
print(f'Maximum: {data_max}')
print(f'Interquartile Range: {iqr}')

# Boxplot
plt.figure(figsize=(10, 6))
plt.boxplot(study_hours, vert=False)
plt.title('Boxplot of Study Hours')
plt.xlabel('Study Hours')
plt.show()

# Plot of standardized data
plt.figure(figsize=(10, 6))
sns.kdeplot(standardized_data, color='blue', label='Standardized Data', fill=True)
plt.axvline(0, color='red', linestyle='dashed', linewidth=2, label='Mean (Standardized)')
plt.title('Density Plot of Standardized Study Hours')
plt.xlabel('Standardized Value')
plt.legend()
plt.show()

import numpy as np
import matplotlib.pyplot as plt
from skimage.transform import radon
from skimage.data import shepp_logan_phantom

# Create a sample phantom image (Shepp-Logan phantom used in tomography)
image = shepp_logan_phantom()

# Define the angles for the tomographic lines (projection angles)
theta = np.linspace(0., 180., max(image.shape), endpoint=False)

# Apply the Radon transform (tomography)
sinogram = radon(image, theta=theta, circle=True)

# Plot the original image and the Radon transform (tomographic lines)
plt.figure(figsize=(10, 8))

# Original image (phantom)
plt.subplot(1, 2, 1)
plt.imshow(image, cmap='gray')
plt.title('Original Image (Phantom)')
plt.axis('off')

# Radon transform (Sinogram)
plt.subplot(1, 2, 2)
plt.imshow(sinogram, cmap='gray', aspect='auto', extent=(0, 180, 0, sinogram.shape[0]))
plt.title('Radon Transform (Sinogram)')
plt.xlabel('Projection Angle (degrees)')
plt.ylabel('Projection Position')

plt.tight_layout()
plt.show()

import numpy as np
import statsmodels.api as sm
import pandas as pd
import matplotlib.pyplot as plt

# Sample data (X: independent variable, Y: dependent variable)
X = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30])
Y = np.array([45, 47, 49, 52, 55, 57, 59, 62, 64, 67, 70, 72, 75, 77, 80, 82, 85, 87, 90, 92, 95, 97, 100, 102, 105, 107, 110, 112, 115, 118])

# Add a constant to the independent variable (for intercept)
X = sm.add_constant(X)

# Fit the regression model
model = sm.OLS(Y, X)
results = model.fit()

# Print regression statistics
print(f'Multiple Correlation R: {np.sqrt(results.rsquared)}')
print(f'R^2: {results.rsquared}')
print(f'Adjusted R^2: {results.rsquared_adj}')
print(f'Standard Error: {np.sqrt(results.mse_resid)}')
print(f'F-statistic: {results.fvalue}')
print(f'P-value of F-statistic: {results.f_pvalue}')
print(f'Number of observations: {results.nobs}')



# Confidence intervals (95%)
conf_intervals = results.conf_int(0.05)
print(f'95% Confidence Intervals:\n{conf_intervals}')

# Plotting the regression line with confidence intervals
plt.figure(figsize=(10, 6))
plt.scatter(X[:, 1], Y, label='Observed data', color='blue')
plt.plot(X[:, 1], results.fittedvalues, color='red', label=f'Regression line (R² = {results.rsquared:.2f})')
plt.fill_between(X[:, 1], conf_intervals[0, 0] + conf_intervals[1, 0] * X[:, 1], 
                 conf_intervals[0, 1] + conf_intervals[1, 1] * X[:, 1], color='gray', alpha=0.2, label='95% Confidence Interval')
plt.title('Linear Regression with 95% Confidence Interval')
plt.xlabel('Independent Variable (X)')
plt.ylabel('Dependent Variable (Y)')
plt.legend()
plt.show()

import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

# Sample data (X: independent variable, Y: dependent variable)
X = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
Y = np.array([3, 4, 5, 7, 9, 11, 13, 14, 15, 19])

# Add a constant to the independent variable (for intercept)
X = sm.add_constant(X)

# Fit the regression model
model = sm.OLS(Y, X)
results = model.fit()

# Predicted values
Y_pred = results.fittedvalues

# Calculate Sum of Squares
SST = np.sum((Y - np.mean(Y)) ** 2)  # Total Sum of Squares
SSR = np.sum((Y_pred - np.mean(Y)) ** 2)  # Sum of Squares for Regression
SSE = np.sum((Y - Y_pred) ** 2)  # Residual Sum of Squares (Error)

# Calculate R^2 and Adjusted R^2
R_squared = SSR / SST
adjusted_R_squared = 1 - ((1 - R_squared) * (len(Y) - 1) / (len(Y) - X.shape[1]))

# Standard error of the residuals
n = len(Y)
p = X.shape[1] - 1  # number of predictors
residual_std_error = np.sqrt(SSE / (n - p - 1))

# Multiple correlation coefficient
R = np.sqrt(R_squared)

# Print the statistics
print(f"SSR (Sum of Squares for Regression): {SSR}")
print(f"SSE (Residual Sum of Squares): {SSE}")
print(f"SST (Total Sum of Squares): {SST}")
print(f"R^2: {R_squared}")
print(f"Adjusted R^2: {adjusted_R_squared}")
print(f"Residual Standard Error: {residual_std_error}")
print(f"Multiple Correlation Coefficient (R): {R}")

# Plot actual vs predicted
plt.figure(figsize=(10, 6))
plt.scatter(Y, Y_pred, color='blue', label='Predicted vs Actual')
plt.plot([min(Y), max(Y)], [min(Y), max(Y)], color='red', label='Perfect Fit')
plt.title('Actual vs Predicted Values')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.legend()
plt.show()

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm, binom, beta
from scipy.integrate import quad

# Parameters
n = 20  # Number of trials for binomial distribution
p = 0.5  # Probability of success for binomial distribution
a, b = 2, 5  # Parameters for beta distribution

# Normal Distribution
x = np.linspace(-4, 4, 1000)
pdf_normal = norm.pdf(x, loc=0, scale=1)

# Binomial Distribution
k = np.arange(0, n+1)
pmf_binom = binom.pmf(k, n, p)

# Beta Distribution
x_beta = np.linspace(0, 1, 1000)
pdf_beta = beta.pdf(x_beta, a, b)

# Hazard Function and Instantaneous Failure Rate
def hazard_function(t, beta=1.5, eta=10):
    return (beta / eta) * (t / eta) ** (beta - 1)

def instantaneous_failure_rate(t, beta=1.5, eta=10):
    return hazard_function(t, beta, eta)

# Time values
t = np.linspace(0.1, 30, 500)
hazard = hazard_function(t)
instantaneous_failure = instantaneous_failure_rate(t)

# Plot Normal Distribution
plt.figure(figsize=(14, 10))

plt.subplot(2, 3, 1)
plt.plot(x, pdf_normal, color='blue')
plt.title('Normal Distribution PDF')
plt.xlabel('x')
plt.ylabel('Probability Density')

# Plot Binomial Distribution
plt.subplot(2, 3, 2)
plt.stem(k, pmf_binom, basefmt='C0-', use_line_collection=True)
plt.title('Binomial Distribution PMF')
plt.xlabel('k')
plt.ylabel('Probability')

# Plot Beta Distribution
plt.subplot(2, 3, 3)
plt.plot(x_beta, pdf_beta, color='green')
plt.title('Beta Distribution PDF')
plt.xlabel('x')
plt.ylabel('Probability Density')

# Plot Hazard Function
plt.subplot(2, 3, 4)
plt.plot(t, hazard, color='red')
plt.title('Hazard Function')
plt.xlabel('Time')
plt.ylabel('Hazard Rate')

# Plot Instantaneous Failure Rate
plt.subplot(2, 3, 5)
plt.plot(t, instantaneous_failure, color='purple')
plt.title('Instantaneous Failure Rate')
plt.xlabel('Time')
plt.ylabel('Failure Rate')

# Plot Failure Periods
plt.subplot(2, 3, 6)
plt.plot(t, hazard, label='Early Failure Period', color='orange')
plt.plot(t, np.ones_like(t) * 0.5, label='Random Failure Period', color='cyan')
plt.plot(t, np.exp(-0.1 * t), label='Wear-Out Failure Period', color='magenta')
plt.title('Failure Periods')
plt.xlabel('Time')
plt.ylabel('Failure Rate')
plt.legend()

plt.tight_layout()
plt.show()

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import expon, chi2
from scipy.optimize import newton

# Parameters
mu = 20.6  # Estimated parameter for the exponential distribution
n = 10     # Number of observations
alpha = 0.05  # Significance level for confidence interval

# Generate sample data
np.random.seed(0)
data = np.random.exponential(scale=mu, size=n)
W = np.sum(data)  # Sum of the observations

# Calculate the maximum likelihood estimate of mu
mu_hat = W / n

# Variance and standard deviation of the exponential distribution
variance = mu**2
std_dev = mu

# Chi-squared distribution for confidence intervals
df = 2 * n
chi2_upper = chi2.ppf(1 - alpha/2, df)
chi2_lower = chi2.ppf(alpha/2, df)

# Confidence interval for mu
CI_lower = (2 * W) / chi2_upper
CI_upper = (2 * W) / chi2_lower

# Probability density function for exponential distribution
x = np.linspace(0, 2 * mu, 1000)
pdf_exp = expon.pdf(x, scale=mu)

# Plot Exponential Distribution PDF
plt.figure(figsize=(12, 8))

plt.subplot(2, 2, 1)
plt.plot(x, pdf_exp, color='blue')
plt.title('Exponential Distribution PDF')
plt.xlabel('x')
plt.ylabel('Probability Density')

# Plot Confidence Interval
plt.subplot(2, 2, 2)
plt.fill_between([CI_lower, CI_upper], [0, 0], [1, 1], color='yellow', alpha=0.5)
plt.axvline(CI_lower, color='red', linestyle='--', label='Lower CI Bound')
plt.axvline(CI_upper, color='green', linestyle='--', label='Upper CI Bound')
plt.title('Confidence Interval for $\mu$')
plt.xlabel('Value of $\mu$')
plt.ylabel('Density')
plt.legend()

# Plot Histogram of Data
plt.subplot(2, 2, 3)
plt.hist(data, bins=10, density=True, alpha=0.6, color='g', edgecolor='black')
plt.plot(x, pdf_exp, color='blue', lw=2)
plt.title('Histogram of Sample Data with Exponential PDF')
plt.xlabel('x')
plt.ylabel('Density')

# Plot Chi-Squared Distribution for Confidence Interval
plt.subplot(2, 2, 4)
chi2_values = np.linspace(0, 50, 1000)
chi2_pdf = chi2.pdf(chi2_values, df)
plt.plot(chi2_values, chi2_pdf, color='purple')
plt.axvline(chi2_upper, color='red', linestyle='--', label='Upper Bound (Chi-squared)')
plt.axvline(chi2_lower, color='green', linestyle='--', label='Lower Bound (Chi-squared)')
plt.title('Chi-Squared Distribution for Confidence Interval')
plt.xlabel('Value')
plt.ylabel('Probability Density')
plt.legend()

plt.tight_layout()
plt.show()

print(f"Estimated parameter (mu): {mu_hat:.2f}")
print(f"95% Confidence Interval for mu: ({CI_lower:.2f}, {CI_upper:.2f})")

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal, beta
from matplotlib.patches import Ellipse

# 2D Normal Distribution Parameters
mean = [0, 0]
cov = [[1, 0.5], [0.5, 1]]  # Covariance matrix

# 2D Beta Distribution Parameters
alpha = 2.0
beta_param = 5.0

# Create a grid of points
x = np.linspace(-4, 4, 100)
y = np.linspace(-4, 4, 100)
X, Y = np.meshgrid(x, y)
pos = np.dstack((X, Y))

# Compute PDF for 2D Normal Distribution
rv = multivariate_normal(mean, cov)
pdf_normal = rv.pdf(pos)

# Compute PDF for 2D Beta Distribution
# Beta distribution is defined on the interval [0,1], so we map the coordinates to [0,1]
X_beta = (X - np.min(X)) / (np.max(X) - np.min(X))
Y_beta = (Y - np.min(Y)) / (np.max(Y) - np.min(Y))
pdf_beta = beta.pdf(X_beta, alpha, beta_param) * beta.pdf(Y_beta, alpha, beta_param)

# Plot 2D Normal Distribution
plt.figure(figsize=(14, 6))

plt.subplot(1, 2, 1)
plt.contourf(X, Y, pdf_normal, cmap='viridis')
plt.colorbar(label='Probability Density')
plt.title('2D Normal Distribution')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')

# Plot 2D Beta Distribution
plt.subplot(1, 2, 2)
plt.contourf(X, Y, pdf_beta, cmap='plasma')
plt.colorbar(label='Probability Density')
plt.title('2D Beta Distribution')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')

plt.tight_layout()
plt.show()

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import binom, poisson

# 1. Plotting Discrete Uniform Distribution
a, b = 0, 10
uniform_vals = np.arange(a, b+1)
uniform_probs = np.full(len(uniform_vals), 1/(b-a+1))

plt.figure(figsize=(12, 8))

plt.subplot(2, 2, 1)
plt.bar(uniform_vals, uniform_probs, color='blue', alpha=0.7)
plt.title('Uniform Distribution')
plt.xlabel('Values')
plt.ylabel('Probability')

# 2. Plotting Bernoulli Distribution
p = 0.5
bernoulli_vals = [0, 1]
bernoulli_probs = [1-p, p]

plt.subplot(2, 2, 2)
plt.bar(bernoulli_vals, bernoulli_probs, color='green', alpha=0.7)
plt.title('Bernoulli Distribution (p=0.5)')
plt.xlabel('Values')
plt.ylabel('Probability')

# 3. Plotting Binomial Distribution
n, p = 10, 0.5
binom_vals = np.arange(0, n+1)
binom_probs = binom.pmf(binom_vals, n, p)

plt.subplot(2, 2, 3)
plt.bar(binom_vals, binom_probs, color='purple', alpha=0.7)
plt.title(f'Binomial Distribution (n={n}, p={p})')
plt.xlabel('Number of Successes')
plt.ylabel('Probability')

# 4. Plotting Poisson Distribution
lam = 3
poisson_vals = np.arange(0, 15)
poisson_probs = poisson.pmf(poisson_vals, lam)

plt.subplot(2, 2, 4)
plt.bar(poisson_vals, poisson_probs, color='orange', alpha=0.7)
plt.title(f'Poisson Distribution (λ={lam})')
plt.xlabel('Number of Events')
plt.ylabel('Probability')

plt

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm, t, chi2, f

# Create a figure to plot the distributions
plt.figure(figsize=(12, 10))

# 1. Plotting Normal Distribution (Gaussian Distribution)
mean, std_dev = 0, 1  # Parameters for Normal Distribution
x = np.linspace(-5, 5, 1000)
normal_pdf = norm.pdf(x, mean, std_dev)

plt.subplot(3, 2, 1)
plt.plot(x, normal_pdf, color='blue')
plt.title('Normal Distribution (μ=0, σ=1)')
plt.xlabel('x')
plt.ylabel('Density')

# 2. Plotting Standard Normal Distribution
standard_normal_pdf = norm.pdf(x)

plt.subplot(3, 2, 2)
plt.plot(x, standard_normal_pdf, color='green')
plt.title('Standard Normal Distribution (μ=0, σ=1)')
plt.xlabel('x')
plt.ylabel('Density')

# 3. Plotting t-Distribution
df_t = 10  # Degrees of freedom for t-distribution
t_pdf = t.pdf(x, df_t)

plt.subplot(3, 2, 3)
plt.plot(x, t_pdf, color='purple')
plt.title(f't-Distribution (df={df_t})')
plt.xlabel('x')
plt.ylabel('Density')

# 4. Plotting Chi-Squared Distribution
df_chi2 = 5  # Degrees of freedom for Chi-Squared distribution
x_chi2 = np.linspace(0, 20, 1000)
chi2_pdf = chi2.pdf(x_chi2, df_chi2)

plt.subplot(3, 2, 4)
plt.plot(x_chi2, chi2_pdf, color='orange')
plt.title(f'Chi-Squared Distribution (df={df_chi2})')
plt.xlabel('x')
plt.ylabel('Density')

# 5. Plotting F-Distribution
dfn, dfd = 5, 10  # Degrees of freedom for numerator and denominator
x_f = np.linspace(0, 5, 1000)
f_pdf = f.pdf(x_f, dfn, dfd)

plt.subplot(3, 2, 5)
plt.plot(x_f, f_pdf, color='red')
plt.title(f'F-Distribution (df1={dfn}, df2={dfd})')
plt.xlabel('x')
plt.ylabel('Density')

plt.tight_layout()
plt.show()




import numpy as np

# Generate a sample of n random variables from a normal distribution
np.random.seed(42)  # Set seed for reproducibility
n = 10  # Sample size
X = np.random.normal(loc=50, scale=10, size=n)  # Generate random sample

# 1. Sample Mean (標本平均)
X_mean = np.mean(X)

# 2. Sample Variance (標本分散)
S2 = np.sum((X - X_mean) ** 2) / n

# 3. Unbiased Variance (不偏分散)
U2 = np.sum((X - X_mean) ** 2) / (n - 1)

# 4. Verifying the relationship between Unbiased Variance and Sample Variance
# U^2 = (n / (n - 1)) * S^2
U2_from_S2 = (n / (n - 1)) * S2

# Print the results
print(f"Sample: {X}")
print(f"Sample Mean (標本平均): {X_mean:.2f}")
print(f"Sample Variance (標本分散) S^2: {S2:.2f}")
print(f"Unbiased Variance (不偏分散) U^2: {U2:.2f}")
print(f"Unbiased Variance from Sample Variance: {U2_from_S2:.2f}")

import numpy as np
from scipy.stats import norm

# Given parameters
p0 = 0.2  # Null hypothesis proportion
alpha = 0.05  # Significance level (alpha)
n = 100  # Sample size (assumed)
x = 25  # Number of successes (example)
p_hat = x / n  # Sample proportion

# 1. Standard error under the null hypothesis
std_error = np.sqrt(p0 * (1 - p0) / n)

# 2. Test statistic (z-value)
z = (p_hat - p0) / std_error

# 3. Critical value (right-tailed test)
z_critical = norm.ppf(1 - alpha)

# 4. P-value
p_value = 1 - norm.cdf(z)

# 5. Decision
if z > z_critical:
    decision = "Reject the null hypothesis (H0)."
else:
    decision = "Fail to reject the null hypothesis (H0)."

# Print the results
print(f"Sample proportion (p̂): {p_hat:.4f}")
print(f"Test statistic (z): {z:.4f}")
print(f"Critical value (z_critical): {z_critical:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"Decision: {decision}")




import numpy as np
import scipy.stats as stats

# 与えられたデータ
sample_mean = 81.067  # 標本平均
sample_var = 69.967   # 標本分散
n = 64               # 標本サイズ
alpha = 0.05         # 有意水準(95%信頼区間の場合)

# 標本標準偏差
sample_std = np.sqrt(sample_var)

# t分布の臨界値を計算
t_critical = stats.t.ppf(1 - alpha / 2, df=n-1)

# 標本平均の標準誤差
std_error = sample_std / np.sqrt(n)

# 信頼区間の計算
confidence_interval = (sample_mean - t_critical * std_error, sample_mean + t_critical * std_error)

print(f"95%信頼区間: {confidence_interval}")


import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# サンプルデータ(簡単な回帰の例)
# 例: x のデータと対応する y のデータ
x = np.array([1, 2, 3, 4, 5, 6]).reshape(-1, 1)  # 説明変数
y = np.array([1, 2, 1.3, 3.75, 2.25, 5.5])      # 目的変数

# 線形回帰モデルの作成
model = LinearRegression()

# モデルをデータにフィットさせる
model.fit(x, y)

# 回帰直線の予測値を計算
y_pred = model.predict(x)

# 寄与率 (R^2) を計算
r_squared = r2_score(y, y_pred)

print(f'寄与率 (R^2): {r_squared}')

# グラフの描画
plt.scatter(x, y, color='blue', label='実データ')
plt.plot(x, y_pred, color='red', label='回帰直線')
plt.title(f'回帰分析 (R^2 = {r_squared:.2f})')
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.show()


import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import dirichlet

# Parameters α1, α2
alpha = [2, 5, 3]  # Parameters α1, α2, and α3 (related to 2D beta via Dirichlet distribution)

# Generate samples from Dirichlet distribution (treated as 2D)
samples = dirichlet.rvs(alpha, size=1000)

# We use the first two components for a 2D plot
x = samples[:, 0]
y = samples[:, 1]

# Plot
plt.figure(figsize=(6, 6))
plt.scatter(x, y, alpha=0.5)
plt.title('Samples from a 2D Beta Distribution (via Dirichlet)')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True)
plt.show()





import numpy as np

# 活性化関数:ReLU
def relu(x):
    return np.maximum(0, x)

# 活性化関数の導関数
def relu_derivative(x):
    return np.where(x > 0, 1, 0)

# ニューラルネットワークのクラス
class NeuralNetwork:
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        # 重みの初期化
        self.weights_input_hidden1 = np.random.randn(input_size, hidden_size1)
        self.weights_hidden1_hidden2 = np.random.randn(hidden_size1, hidden_size2)
        self.weights_hidden2_output = np.random.randn(hidden_size2, output_size)

        # バイアスの初期化
        self.bias_hidden1 = np.zeros((1, hidden_size1))
        self.bias_hidden2 = np.zeros((1, hidden_size2))
        self.bias_output = np.zeros((1, output_size))

    # 順伝播
    def forward(self, X):
        # 第1隠れ層
        self.hidden1_input = np.dot(X, self.weights_input_hidden1) + self.bias_hidden1
        self.hidden1_output = relu(self.hidden1_input)

        # 第2隠れ層
        self.hidden2_input = np.dot(self.hidden1_output, self.weights_hidden1_hidden2) + self.bias_hidden2
        self.hidden2_output = relu(self.hidden2_input)

        # 出力層
        self.output_input = np.dot(self.hidden2_output, self.weights_hidden2_output) + self.bias_output
        self.output = relu(self.output_input)

        return self.output

# ハイパーパラメータの設定
input_size = 3  # 入力層のノード数
hidden_size1 = 5  # 第1隠れ層のノード数
hidden_size2 = 4  # 第2隠れ層のノード数
output_size = 2  # 出力層のノード数

# ニューラルネットワークのインスタンスを作成
nn = NeuralNetwork(input_size, hidden_size1, hidden_size2, output_size)

# サンプル入力データ(1行3列)
X = np.array([[0.1, 0.5, 0.2]])

# 順伝播の実行
output = nn.forward(X)

# 出力結果
print("出力:", output)




import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVR

# Generate sample data
np.random.seed(0)
X = np.sort(5 * np.random.rand(40, 1), axis=0)  # 1D input space
y = np.sin(X).ravel()  # True function (sine wave)

# Add noise
y[::5] += 3 * (0.5 - np.random.rand(8))

# Build SVR models
svr_rbf = SVR(kernel='rbf', C=100, gamma=0.1)
svr_lin = SVR(kernel='linear', C=100)
svr_poly = SVR(kernel='poly', C=100, degree=3)

# Fit the models
y_rbf = svr_rbf.fit(X, y).predict(X)
y_lin = svr_lin.fit(X, y).predict(X)
y_poly = svr_poly.fit(X, y).predict(X)

# Plotting the results
plt.scatter(X, y, color='darkorange', label='Data points')
plt.plot(X, y_rbf, color='navy', label='RBF kernel', lw=2)
plt.plot(X, y_lin, color='c', label='Linear kernel', lw=2)
plt.plot(X, y_poly, color='cornflowerblue', label='Polynomial kernel', lw=2)
plt.xlabel('Input space')
plt.ylabel('Regression output')
plt.title('SVR Regression with Different Kernels')
plt.legend()
plt.show()




import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# サンプルデータを生成
np.random.seed(0)
x = np.random.rand(100) * 10  # 0〜10の範囲でランダムな値を生成
y = 2.5 * x + np.random.randn(100) * 2  # 線形関係を持つデータにノイズを加える

# 統計量の計算
mean_x = np.mean(x)  # 平均
mean_y = np.mean(y)
std_x = np.std(x)  # 標準偏差
std_y = np.std(y)
var_x = np.var(x)  # 分散
var_y = np.var(y)
cv_x = std_x / mean_x  # 変動係数
cv_y = std_y / mean_y
correlation_matrix = np.corrcoef(x, y)  # 相関係数
correlation_xy = correlation_matrix[0, 1]

# 回帰直線の計算
x_reshape = x.reshape(-1, 1)  # 線形回帰モデルに適合する形状に変更
model = LinearRegression()
model.fit(x_reshape, y)
y_pred = model.predict(x_reshape)  # 回帰直線の予測値

# 決定係数の計算
r_squared = r2_score(y, y_pred)

# 結果の表示
print(f"平均 (x): {mean_x}, 平均 (y): {mean_y}")
print(f"標準偏差 (x): {std_x}, 標準偏差 (y): {std_y}")
print(f"分散 (x): {var_x}, 分散 (y): {var_y}")
print(f"変動係数 (x): {cv_x}, 変動係数 (y): {cv_y}")
print(f"相関係数: {correlation_xy}")
print(f"決定係数: {r_squared}")

# プロット
plt.figure(figsize=(10, 6))
plt.scatter(x, y, label='Data Points', color='blue')  # 元のデータをプロット
plt.plot(x, y_pred, color='red', label=f'Regression Line\ny={model.coef_[0]:.2f}x+{model.intercept_:.2f}')
plt.title('Linear Regression and Data')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.grid(True)
plt.show()





import numpy as np
from scipy.stats import spearmanr, kendalltau
import matplotlib.pyplot as plt

# サンプルデータの生成
np.random.seed(0)
x = np.random.rand(100) * 10  # 0〜10の範囲でランダムな値を生成
y = 2.5 * x + np.random.randn(100) * 5  # 線形関係 + ノイズのデータ

# スピアマンの順位相関係数の計算
spearman_corr, spearman_p = spearmanr(x, y)

# ケンドールの順位相関係数の計算
kendall_corr, kendall_p = kendalltau(x, y)

# 結果を表示
print(f"スピアマンの順位相関係数: {spearman_corr}, p値: {spearman_p}")
print(f"ケンドールの順位相関係数: {kendall_corr}, p値: {kendall_p}")

# データのプロット
plt.figure(figsize=(8, 6))
plt.scatter(x, y, label="Data points", color="blue")
plt.title("Scatter Plot of Data")
plt.xlabel("x")
plt.ylabel("y")
plt.grid(True)
plt.legend()
plt.show()




import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import binom, poisson, norm, chi2, f, t

# プロット設定
fig, axs = plt.subplots(4, 2, figsize=(12, 16))
fig.tight_layout(pad=5.0)

# 1. 一様分布 (Uniform Distribution)
uniform_data = np.random.uniform(low=0, high=10, size=1000)
axs[0, 0].hist(uniform_data, bins=30, color='skyblue', edgecolor='black')
axs[0, 0].set_title('Uniform Distribution')
axs[0, 0].set_xlabel('Value')
axs[0, 0].set_ylabel('Frequency')

# 2. 二項分布 (Binomial Distribution)
n, p = 10, 0.5  # n: 試行回数, p: 成功確率
binom_data = binom.rvs(n, p, size=1000)
axs[0, 1].hist(binom_data, bins=np.arange(0, 12)-0.5, color='skyblue', edgecolor='black')
axs[0, 1].set_title('Binomial Distribution')
axs[0, 1].set_xlabel('Number of Successes')
axs[0, 1].set_ylabel('Frequency')

# 3. ポアソン分布 (Poisson Distribution)
lambda_poisson = 5  # 平均
poisson_data = poisson.rvs(mu=lambda_poisson, size=1000)
axs[1, 0].hist(poisson_data, bins=np.arange(0, 15)-0.5, color='skyblue', edgecolor='black')
axs[1, 0].set_title('Poisson Distribution')
axs[1, 0].set_xlabel('Occurrences')
axs[1, 0].set_ylabel('Frequency')

# 4. 正規分布 (Normal Distribution)
mu, sigma = 0, 1  # 平均と標準偏差
normal_data = np.random.normal(mu, sigma, 1000)
axs[1, 1].hist(normal_data, bins=30, color='skyblue', edgecolor='black')
axs[1, 1].set_title('Normal Distribution')
axs[1, 1].set_xlabel('Value')
axs[1, 1].set_ylabel('Frequency')

# 5. 標準正規分布 (Standard Normal Distribution)
standard_normal_data = np.random.randn(1000)
axs[2, 0].hist(standard_normal_data, bins=30, color='skyblue', edgecolor='black')
axs[2, 0].set_title('Standard Normal Distribution')
axs[2, 0].set_xlabel('Value')
axs[2, 0].set_ylabel('Frequency')

# 6. カイ二乗分布 (Chi-Square Distribution)
df_chi2 = 4  # 自由度
chi2_data = chi2.rvs(df=df_chi2, size=1000)
axs[2, 1].hist(chi2_data, bins=30, color='skyblue', edgecolor='black')
axs[2, 1].set_title(r'Chi-Square Distribution ($\chi^2$)')
axs[2, 1].set_xlabel('Value')
axs[2, 1].set_ylabel('Frequency')

# 7. F分布 (F-Distribution)
dfn, dfd = 5, 2  # 分子・分母の自由度
f_data = f.rvs(dfn, dfd, size=1000)
axs[3, 0].hist(f_data, bins=30, color='skyblue', edgecolor='black')
axs[3, 0].set_title('F-Distribution')
axs[3, 0].set_xlabel('Value')
axs[3, 0].set_ylabel('Frequency')

# 8. T分布 (T-Distribution)
df_t = 10  # 自由度
t_data = t.rvs(df=df_t, size=1000)
axs[3, 1].hist(t_data, bins=30, color='skyblue', edgecolor='black')
axs[3, 1].set_title('T-Distribution')
axs[3, 1].set_xlabel('Value')
axs[3, 1].set_ylabel('Frequency')

plt.show()




import numpy as np
import matplotlib.pyplot as plt

# ダーツの投げる回数
n_darts = 1000

# 連続一様分布でダーツを投げる (0から1の範囲)
darts = np.random.uniform(low=0, high=1, size=n_darts)

# 平均と分散を計算
mean = np.mean(darts)
variance = np.var(darts)

# 結果を表示
print(f"ダーツの平均: {mean}")
print(f"ダーツの分散: {variance}")

# ダーツの結果をヒストグラムとしてプロット
plt.figure(figsize=(8, 6))
plt.hist(darts, bins=30, color='skyblue', edgecolor='black')
plt.title('Darts Throw Simulation (Uniform Distribution)')
plt.xlabel('Position (0 to 1)')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()




import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

# 正規分布のパラメータ
mean = 50  # 平均
std_dev = 10  # 標準偏差
n_samples = 1000  # サンプル数

# 正規分布からデータを生成
data = np.random.normal(loc=mean, scale=std_dev, size=n_samples)

# データの平均と標準偏差を計算
calculated_mean = np.mean(data)
calculated_std_dev = np.std(data)

# 結果を表示
print(f"平均: {calculated_mean}")
print(f"標準偏差: {calculated_std_dev}")

# 標準正規分布 (平均0、分散1) を用いたデータも作成
standard_normal_data = np.random.randn(n_samples)

# シグマ区間の範囲 (±1σ, ±2σ, ±3σ)
x_values = np.linspace(mean - 4*std_dev, mean + 4*std_dev, 1000)
pdf_values = norm.pdf(x_values, loc=mean, scale=std_dev)

# プロットの設定
plt.figure(figsize=(10, 6))

# 正規分布のPDFをプロット
plt.plot(x_values, pdf_values, label="Normal Distribution", color='blue')

# ±1σ, ±2σ, ±3σの範囲に色を塗る
plt.fill_between(x_values, 0, pdf_values, where=(x_values >= mean - std_dev) & (x_values <= mean + std_dev),
                 color='green', alpha=0.3, label=r'$\pm 1\sigma$')
plt.fill_between(x_values, 0, pdf_values, where=(x_values >= mean - 2*std_dev) & (x_values <= mean + 2*std_dev),
                 color='yellow', alpha=0.3, label=r'$\pm 2\sigma$')
plt.fill_between(x_values, 0, pdf_values, where=(x_values >= mean - 3*std_dev) & (x_values <= mean + 3*std_dev),
                 color='red', alpha=0.3, label=r'$\pm 3\sigma$')

# グラフの詳細設定
plt.title('Normal Distribution with ±1σ, ±2σ, ±3σ Ranges')
plt.xlabel('Value')
plt.ylabel('Probability Density')
plt.legend()
plt.grid(True)
plt.show()





import numpy as np
import scipy.stats as stats

# サンプルデータ
data = np.array([10, 20, 30, 40, 50])

# 1. 不偏推定量 (Unbiased Estimator)
# 標本平均は不偏推定量として使われる
sample_mean = np.mean(data)
print(f"標本平均 (不偏推定量): {sample_mean}")

# 2. 標本分散 (Sample Variance)
# デフォルトで分母が n-1 で計算される
sample_variance = np.var(data, ddof=1)
print(f"標本分散 (不偏推定): {sample_variance}")

# 3. 母分散 (Population Variance)
# 分母が n で計算される母分散
population_variance = np.var(data, ddof=0)
print(f"母分散: {population_variance}")

# 4. 不偏標準偏差 (Unbiased Standard Deviation)
# 不偏分散の平方根
unbiased_std_dev = np.std(data, ddof=1)
print(f"不偏標準偏差: {unbiased_std_dev}")

# 5. 相関係数 (Correlation Coefficient)
# 他のデータセットが必要
data2 = np.array([12, 22, 32, 42, 52])

# ピアソンの相関係数を計算
correlation_coefficient = np.corrcoef(data, data2)[0, 1]
print(f"相関係数: {correlation_coefficient}")

# 6. 不偏分散 (Unbiased Variance)
# すでに計算した標本分散 (ddof=1) が不偏分散
print(f"不偏分散: {sample_variance}")



import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import pearsonr

# 母相関係数
true_rho = 0.8  # 本来の母相関係数

# データのサンプル数
n_samples = 1000

# 正規分布に従う2つの変数を生成
mean = [0, 0]  # 平均は0
cov = [[1, true_rho], [true_rho, 1]]  # 分散共分散行列

# 多変量正規分布に従うデータを生成
data = np.random.multivariate_normal(mean, cov, n_samples)
x = data[:, 0]
y = data[:, 1]

# 母相関係数の推定値(ピアソン相関係数)
estimated_rho, _ = pearsonr(x, y)

# 結果を表示
print(f"推定された相関係数 (母相関係数の推定値): {estimated_rho}")

# 散布図をプロット
plt.figure(figsize=(8, 6))
plt.scatter(x, y, alpha=0.5)
plt.title(f'Scatter plot of two variables (True ρ = {true_rho}, Estimated ρ = {estimated_rho:.2f})')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True)
plt.show()

# ヒストグラムをプロット (各変数が正規分布に従うことを確認)
plt.figure(figsize=(8, 4))
plt.subplot(1, 2, 1)
plt.hist(x, bins=30, color='skyblue', edgecolor='black')
plt.title('Distribution of X (Normal)')
plt.grid(True)

plt.subplot(1, 2, 2)
plt.hist(y, bins=30, color='lightgreen', edgecolor='black')
plt.title('Distribution of Y (Normal)')
plt.grid(True)

plt.tight_layout()
plt.show()


import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

# サンプルデータの生成(系統誤差と偶然誤差を含む)
np.random.seed(42)
true_value = 50  # 真の値(母平均)
systematic_error = 5  # 系統誤差
random_error = np.random.normal(0, 5, 100)  # 偶然誤差(平均0, 標準偏差5の正規分布)

# データ = 真の値 + 系統誤差 + 偶然誤差
data = true_value + systematic_error + random_error

# サンプルの平均と標準偏差を計算
sample_mean = np.mean(data)
sample_std_dev = np.std(data, ddof=1)

# 1. 標準誤差 (Standard Error)
n = len(data)  # サンプル数
standard_error = sample_std_dev / np.sqrt(n)
print(f"標準誤差: {standard_error}")

# 2. 信頼区間 (Confidence Interval) の計算
confidence_level = 0.95  # 信頼水準 95%
alpha = 1 - confidence_level  # 有意水準

# t分布を使用して信頼区間を計算(サンプル数が少ない場合にはt分布を使用)
t_value = stats.t.ppf(1 - alpha/2, df=n-1)  # 信頼係数 (t値)
margin_of_error = t_value * standard_error  # 誤差範囲

# 区間推定(信頼区間)
confidence_interval = (sample_mean - margin_of_error, sample_mean + margin_of_error)
print(f"信頼区間: {confidence_interval}")

# 3. 信頼水準、信頼係数、信頼度
print(f"信頼水準: {confidence_level * 100}%")
print(f"信頼係数 (t値): {t_value}")
print(f"信頼度: {1 - alpha}")

# グラフ描画
plt.figure(figsize=(10, 6))

# データのヒストグラム
plt.hist(data, bins=30, color='skyblue', edgecolor='black', alpha=0.7)

# 信頼区間の範囲を表示
plt.axvline(confidence_interval[0], color='red', linestyle='--', label=f"Lower Bound: {confidence_interval[0]:.2f}")
plt.axvline(confidence_interval[1], color='green', linestyle='--', label=f"Upper Bound: {confidence_interval[1]:.2f}")
plt.axvline(sample_mean, color='black', linestyle='-', label=f"Sample Mean: {sample_mean:.2f}")

plt.title('Histogram of Data with Confidence Interval')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.legend()
plt.grid(True)
plt.show()






import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import t

# サンプルデータの生成
np.random.seed(42)
data = np.random.normal(loc=50, scale=10, size=100)  # 平均50、標準偏差10の正規分布

# 1. 標本平均と標本標準偏差の計算
sample_mean = np.mean(data)
sample_std_dev = np.std(data, ddof=1)
n = len(data)

print(f"標本平均: {sample_mean}")
print(f"標本標準偏差: {sample_std_dev}")

# 2. ブートストラップ法による信頼区間の推定
n_bootstraps = 1000  # ブートストラップの反復回数
bootstrap_means = []

# ブートストラップサンプリングを繰り返す
for _ in range(n_bootstraps):
    # データから再サンプリングして平均を計算
    bootstrap_sample = np.random.choice(data, size=n, replace=True)
    bootstrap_means.append(np.mean(bootstrap_sample))

# ブートストラップ平均の標準偏差(標準誤差)
bootstrap_means_std_dev = np.std(bootstrap_means)

# 3. T分布を使用した信頼区間の計算
confidence_level = 0.95
alpha = 1 - confidence_level
t_value = t.ppf(1 - alpha/2, df=n-1)  # T分布のt値

# 信頼区間を計算
margin_of_error = t_value * bootstrap_means_std_dev
confidence_interval = (sample_mean - margin_of_error, sample_mean + margin_of_error)

print(f"信頼区間: {confidence_interval}")

# ヒストグラムをプロットして、ブートストラップ平均を確認
plt.figure(figsize=(10, 6))
plt.hist(bootstrap_means, bins=30, color='skyblue', edgecolor='black', alpha=0.7)
plt.axvline(confidence_interval[0], color='red', linestyle='--', label=f"Lower Bound: {confidence_interval[0]:.2f}")
plt.axvline(confidence_interval[1], color='green', linestyle='--', label=f"Upper Bound: {confidence_interval[1]:.2f}")
plt.axvline(sample_mean, color='black', linestyle='-', label=f"Sample Mean: {sample_mean:.2f}")
plt.title('Histogram of Bootstrap Sample Means with Confidence Interval')
plt.xlabel('Mean')
plt.ylabel('Frequency')
plt.legend()
plt.grid(True)
plt.show()



import numpy as np
from scipy import stats

# 1. サンプルデータの生成(平均52, 標準偏差10の正規分布)
np.random.seed(42)
data = np.random.normal(loc=52, scale=10, size=100)

# 2. 帰無仮説と対立仮説
# 帰無仮説 H0: サンプルの平均は50である(母平均が50)
# 対立仮説 H1: サンプルの平均は50ではない(両側検定)

# 3. 有意水準を設定(例: 0.05、つまり5%)
alpha = 0.05

# 4. 1サンプルT検定を実行
# サンプル平均が母平均50と異なるかどうかを検定
t_statistic, p_value = stats.ttest_1samp(data, popmean=50)

# 5. P値と有意水準を比較して帰無仮説を判断
print(f"T統計量: {t_statistic}")
print(f"P値: {p_value}")

if p_value < alpha:
    print("P値が有意水準より小さいため、帰無仮説を棄却します。")
else:
    print("P値が有意水準より大きいため、帰無仮説を採択します。")

# 6. サンプルの基本統計量を表示
sample_mean = np.mean(data)
sample_std = np.std(data, ddof=1)
print(f"サンプルの平均: {sample_mean}")
print(f"サンプルの標準偏差: {sample_std}")





import numpy as np
import scipy.stats as stats

# サンプルデータの生成(仮に2つの治療群のデータ)
np.random.seed(42)
group_a = np.random.normal(loc=50, scale=10, size=100)  # 基準群
group_b = np.random.normal(loc=48, scale=10, size=100)  # 新治療群

# 非劣性試験の設定
delta = 2  # 非劣性マージン
alpha = 0.05  # 有意水準

# 平均の差の検定
mean_diff = np.mean(group_b) - np.mean(group_a)
std_error = np.sqrt(np.var(group_a, ddof=1)/len(group_a) + np.var(group_b, ddof=1)/len(group_b))
z_statistic = mean_diff / std_error

# P値の計算(片側検定)
p_value = stats.norm.cdf(z_statistic)

# 信頼区間の計算
confidence_interval = (mean_diff - stats.norm.ppf(1 - alpha) * std_error,
                       mean_diff + stats.norm.ppf(1 - alpha) * std_error)

# 結果の表示
print(f"検定統計量 (Z): {z_statistic:.3f}")
print(f"P値: {p_value:.3f}")
print(f"信頼区間: {confidence_interval}")
if p_value > alpha:
    print("非劣性が証明されました。")
else:
    print("非劣性は証明されませんでした。")



import numpy as np
import pandas as pd
from scipy import stats

# サンプルデータの生成(3つのグループ)
np.random.seed(42)
group1 = np.random.normal(loc=55, scale=5, size=30)
group2 = np.random.normal(loc=60, scale=5, size=30)
group3 = np.random.normal(loc=65, scale=5, size=30)

# データフレームにまとめる
data = pd.DataFrame({
    'group': ['Group1'] * 30 + ['Group2'] * 30 + ['Group3'] * 30,
    'value': np.concatenate([group1, group2, group3])
})

# 1元配置分散分析の実行
f_statistic, p_value = stats.f_oneway(group1, group2, group3)

# 総変動の計算
grand_mean = np.mean(data['value'])
total_variance = np.sum((data['value'] - grand_mean)**2)
print(f"総変動: {total_variance:.2f}")

# 結果の表示
print(f"F統計量: {f_statistic:.3f}")
print(f"P値: {p_value:.3f}")

if p_value < 0.05:
    print("少なくとも1つのグループの平均が異なります。")
else:
    print("すべてのグループの平均が等しいです。")




import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
import itertools

# データの生成
np.random.seed(42)

# 要因Aの水準
factor_a_levels = ['A1', 'A2']
# 要因Bの水準
factor_b_levels = ['B1', 'B2']

# 各要因の水準の組み合わせ
combinations = list(itertools.product(factor_a_levels, factor_b_levels))

# 観測値の生成(正規分布を仮定)
data = []
for (a, b) in combinations:
    for _ in range(30):  # 各組み合わせにつき30の観測値
        value = np.random.normal(loc=10 + (2 if a == 'A2' else 0) + (2 if b == 'B2' else 0), scale=1)
        data.append({'FactorA': a, 'FactorB': b, 'Value': value})

# データフレームの作成
df = pd.DataFrame(data)

# 2元配置分散分析の実行
model = ols('Value ~ C(FactorA) + C(FactorB) + C(FactorA):C(FactorB)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)

# 結果の表示
print(anova_table)

# 帰無仮説の設定
# H0: 要因Aの水準による平均に差はない
# H0: 要因Bの水準による平均に差はない
# H0: 要因Aと要因Bの交互作用による平均に差はない

# 結果の解釈
for index, row in anova_table.iterrows():
    if row['PR(>F)'] < 0.05:
        print(f"{index}: 帰無仮説を棄却します(有意差あり)")
    else:
        print(f"{index}: 帰無仮説を採択します(有意差なし)")




from scipy.stats import wilcoxon

# サンプルデータの生成(2つの関連するグループ)
group_a = np.random.normal(loc=50, scale=10, size=30)
group_b = group_a + np.random.normal(loc=5, scale=5, size=30)

# ウィルコクソン順位和検定の実行
stat, p_value = wilcoxon(group_a, group_b)
print(f"\nウィルコクソン順位和検定の統計量: {stat}, P値: {p_value}")

# 結果の解釈
if p_value < 0.05:
    print("有意差があります。")
else:
    print("有意差はありません。")


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import chi2_contingency, fisher_exact, mannwhitneyu, wilcoxon
from statsmodels.stats.proportion import proportions_chisquare
from statsmodels.stats.contingency_tables import mcnemar

# データの生成
np.random.seed(42)

# コクランのQ検定用データ
successes = np.array([10, 15, 5])
trials = np.array([20, 20, 20])

# コクランのQ検定
Q, p_cochran = proportions_chisquare(successes, trials)

# ピアソンのカイ二乗検定用データ
data_chi2 = pd.DataFrame({
    'グループ1': [30, 10],
    'グループ2': [20, 20]
}, index=['成功', '失敗'])

# ピアソンのカイ二乗検定
chi2, p_chi2, _, _ = chi2_contingency(data_chi2)

# フィッシャーの正確確率検定用データ
table_fisher = np.array([[10, 5],
                          [5, 10]])

# フィッシャーの検定
oddsratio, p_fisher = fisher_exact(table_fisher)

# マン・ホイットニーのU検定用データ
group1 = np.random.normal(loc=50, scale=10, size=30)
group2 = np.random.normal(loc=55, scale=10, size=30)

# マン・ホイットニーのU検定
stat_mw, p_mw = mannwhitneyu(group1, group2)

# マクネマー検定用データ
table_mcnemar = np.array([[10, 5],
                           [5, 10]])

# マクネマー検定
result_mcnemar = mcnemar(table_mcnemar)

# 結果をプロットする
labels = [
    'コクランのQ検定', 
    'ピアソンのカイ二乗検定', 
    'フィッシャーの検定', 
    'マン・ホイットニーU検定', 
    'マクネマー検定'
]

p_values = [p_cochran, p_chi2, p_fisher, p_mw, result_mcnemar.pvalue]

plt.figure(figsize=(10, 6))
plt.barh(labels, p_values, color='skyblue')
plt.axvline(x=0.05, color='red', linestyle='--', label='有意水準 (0.05)')
plt.xlabel('P値')
plt.title('各検定のP値')
plt.legend()
plt.xlim(0, 1)
plt.grid(axis='x')
plt.show()


import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.discrete.discrete_model import Probit, Logit

# サンプルデータの作成
np.random.seed(0)
n = 1000
X = np.random.randn(n, 2)
X = sm.add_constant(X)  # 定数項を追加
y = (X[:, 1] + X[:, 2] + np.random.randn(n) > 0).astype(int)

# DataFrameに変換
data = pd.DataFrame(X, columns=['const', 'X1', 'X2'])
data['y'] = y

# プロビット分析
probit_model = Probit(data['y'], data[['const', 'X1', 'X2']])
probit_result = probit_model.fit()

# ロジット分析
logit_model = Logit(data['y'], data[['const', 'X1', 'X2']])
logit_result = logit_model.fit()

# 予測値を計算
X_pred = np.linspace(-3, 3, 100)
X_new = pd.DataFrame({'const': 1, 'X1': X_pred, 'X2': 0})  # X2を0に固定
probit_pred = probit_result.predict(X_new)
logit_pred = logit_result.predict(X_new)

# プロット
plt.figure(figsize=(10, 6))
plt.plot(X_pred, probit_pred, label='Probit', color='blue')
plt.plot(X_pred, logit_pred, label='Logit', color='orange')
plt.title('Probit vs Logit Predictions')
plt.xlabel('X1')
plt.ylabel('Predicted Probability')
plt.axhline(0, color='gray', lw=0.5, ls='--')
plt.axvline(0, color='gray', lw=0.5, ls='--')
plt.legend()
plt.grid()
plt.show()



import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from scipy.cluster.hierarchy import dendrogram, linkage
import seaborn as sns

# サンプルデータの生成
np.random.seed(0)
data = np.random.rand(100, 5)  # 100サンプル、5特徴量
df = pd.DataFrame(data, columns=[f'feature_{i}' for i in range(5)])

# 1. 主成分分析 (PCA)
pca = PCA(n_components=2)
pca_result = pca.fit_transform(df)

plt.figure(figsize=(8, 4))
plt.scatter(pca_result[:, 0], pca_result[:, 1])
plt.title('PCA Result')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.grid()
plt.show()

# 2. 因子分析
from sklearn.decomposition import FactorAnalysis

factor = FactorAnalysis(n_components=2)
factor_result = factor.fit_transform(df)

plt.figure(figsize=(8, 4))
plt.scatter(factor_result[:, 0], factor_result[:, 1])
plt.title('Factor Analysis Result')
plt.xlabel('Factor 1')
plt.ylabel('Factor 2')
plt.grid()
plt.show()

# 3. クラスター分析 (K-means)
kmeans = KMeans(n_clusters=3)
df['cluster'] = kmeans.fit_predict(df)

plt.figure(figsize=(8, 4))
sns.scatterplot(x=pca_result[:, 0], y=pca_result[:, 1], hue=df['cluster'], palette='viridis')
plt.title('K-means Clustering Result on PCA')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.grid()
plt.show()

# 4. 階層クラスタリング
linked = linkage(df, 'ward')

plt.figure(figsize=(10, 7))
dendrogram(linked, orientation='top', distance_sort='descending', show_leaf_counts=True)
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('Sample Index')
plt.ylabel('Distance')
plt.show()


機械学習


import numpy as np

# シグモイド関数
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# シグモイドの微分
def sigmoid_derivative(x):
    return x * (1 - x)

# 損失関数(平均二乗誤差 MSE)
def mean_squared_error(y_true, y_pred):
    return np.mean(np.square(y_true - y_pred))

# ニューラルネットワークのクラス定義
class SimpleNeuralNetwork:
    def __init__(self, input_size, learning_rate=0.1):
        self.weights = np.random.rand(input_size)  # 重み
        self.bias = np.random.rand(1)              # バイアス
        self.learning_rate = learning_rate         # 学習率

    def predict(self, inputs):
        # 重みとバイアスを使って線形結合し、シグモイド関数で非線形化
        return sigmoid(np.dot(inputs, self.weights) + self.bias)

    def train(self, inputs, targets, epochs=1000):
        for epoch in range(epochs):
            for x, y_true in zip(inputs, targets):
                # 順伝播
                y_pred = self.predict(x)
                
                # 誤差の計算
                error = y_true - y_pred
                
                # 逆伝播(勾配計算)
                d_weights = error * sigmoid_derivative(y_pred) * x
                d_bias = error * sigmoid_derivative(y_pred)
                
                # 重みとバイアスの更新
                self.weights += self.learning_rate * d_weights
                self.bias += self.learning_rate * d_bias

            if epoch % 100 == 0:
                mse = mean_squared_error(targets, self.predict(inputs))
                print(f"Epoch {epoch}, Loss: {mse}")

# 入力データ(例: ANDゲート)
inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
targets = np.array([[0], [0], [0], [1]])  # ANDゲートの正解ラベル

# ニューラルネットワークの初期化と訓練
nn = SimpleNeuralNetwork(input_size=2, learning_rate=0.1)
nn.train(inputs, targets, epochs=1000)

# テスト
for input_data in inputs:
    prediction = nn.predict(input_data)
    print(f"Input: {input_data}, Predicted: {prediction}")




import numpy as np

# 単純パーセプトロンのクラス定義
class Perceptron:
    def __init__(self, input_size, learning_rate=0.1, epochs=1000):
        self.weights = np.random.rand(input_size)  # 重みの初期化
        self.bias = np.random.rand(1)              # バイアスの初期化
        self.learning_rate = learning_rate         # 学習率
        self.epochs = epochs                       # 学習の反復回数

    # ステップ関数(単純パーセプトロンの活性化関数)
    def activation_function(self, x):
        return np.where(x >= 0, 1, 0)

    # パーセプトロンの予測
    def predict(self, inputs):
        linear_output = np.dot(inputs, self.weights) + self.bias
        return self.activation_function(linear_output)

    # 訓練用メソッド
    def train(self, inputs, targets):
        for epoch in range(self.epochs):
            for x, y_true in zip(inputs, targets):
                # 予測
                y_pred = self.predict(x)
                
                # 誤差計算
                error = y_true - y_pred
                
                # 重みとバイアスの更新
                self.weights += self.learning_rate * error * x
                self.bias += self.learning_rate * error

            # 定期的に損失を表示
            if epoch % 100 == 0:
                total_loss = np.mean(np.square(targets - self.predict(inputs)))
                print(f'Epoch {epoch}, Loss: {total_loss}')

# サンプルデータ (論理AND演算)
inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
targets = np.array([0, 0, 0, 1])  # ANDゲートのターゲットラベル

# パーセプトロンの初期化と訓練
perceptron = Perceptron(input_size=2, learning_rate=0.1, epochs=1000)
perceptron.train(inputs, targets)

# テスト
for x in inputs:
    print(f'Input: {x}, Predicted: {perceptron.predict(x)}')




import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# サンプルデータ (例えば、広告費用による売上予測)
data = {
    'TV': [230.1, 44.5, 17.2, 151.5, 180.8],
    'Radio': [37.8, 39.3, 45.9, 41.3, 10.8],
    'Newspaper': [69.2, 45.1, 69.3, 58.5, 58.4],
    'Sales': [22.1, 10.4, 9.3, 18.5, 12.9]
}

# データフレームに変換
df = pd.DataFrame(data)

# 特徴量 (独立変数) と目的変数 (従属変数) に分ける
X = df[['TV', 'Radio', 'Newspaper']]  # 独立変数
y = df['Sales']  # 従属変数

# 重回帰モデルの初期化
model = LinearRegression()

# モデルを学習
model.fit(X, y)

# 重回帰モデルの係数と切片
print("回帰係数:", model.coef_)
print("切片 (intercept):", model.intercept_)

# モデルによる予測
y_pred = model.predict(X)

# モデルの性能評価 (平均二乗誤差と決定係数)
mse = mean_squared_error(y, y_pred)
r2 = r2_score(y, y_pred)

print(f"平均二乗誤差 (MSE): {mse}")
print(f"決定係数 (R^2): {r2}")

# 予測式の構築
print(f"予測式: Sales = {model.intercept_:.2f} + {model.coef_[0]:.2f} * TV + {model.coef_[1]:.2f} * Radio + {model.coef_[2]:.2f} * Newspaper")



import numpy as np
import matplotlib.pyplot as plt

# Define prior distribution for bias of coin (uniform distribution)
prior = np.linspace(0, 1, 1000)  # Possible bias values (theta) for the coin

# Prior probability is uniform (equal probability for each bias value)
prior_prob = np.ones_like(prior)

# Likelihood function: P(Data | Bias)
def likelihood(bias, heads, tails):
    return (bias ** heads) * ((1 - bias) ** tails)

# Simulated data: we flip the coin 10 times and observe 7 heads, 3 tails
heads_observed = 7
tails_observed = 3

# Compute the likelihood of the observed data for each possible bias
likelihoods = likelihood(prior, heads_observed, tails_observed)

# Posterior probability using Bayes' theorem (unnormalized)
posterior_unnormalized = likelihoods * prior_prob

# Normalize the posterior distribution to make it a valid probability distribution
posterior_prob = posterior_unnormalized / np.sum(posterior_unnormalized)

# Plotting the prior and posterior distributions
plt.figure(figsize=(10, 6))
plt.plot(prior, posterior_prob, label='Posterior', color='blue')
plt.title('Posterior Probability Distribution for Coin Bias')
plt.xlabel('Bias (P(Heads))')
plt.ylabel('Probability Density')
plt.legend()
plt.grid(True)
plt.show()



import numpy as np
import matplotlib.pyplot as plt

# Define probability distributions for a biased coin
P_heads = 0.7  # True probability of heads
P_tails = 1 - P_heads  # Probability of tails
true_distribution = [P_heads, P_tails]

# Approximate distribution (let's assume we are approximating the coin bias)
Q_heads = 0.6  # Predicted probability of heads
Q_tails = 1 - Q_heads
approx_distribution = [Q_heads, Q_tails]

# Self-information for heads and tails (in bits)
I_heads = -np.log2(P_heads)
I_tails = -np.log2(P_tails)
print(f"Self-information for heads: {I_heads:.4f} bits")
print(f"Self-information for tails: {I_tails:.4f} bits")

# Entropy of the true distribution
def entropy(dist):
    return -np.sum([p * np.log2(p) for p in dist if p > 0])

H_X = entropy(true_distribution)
print(f"Entropy of the coin (H(X)): {H_X:.4f} bits")

# Cross-entropy between true and approximate distribution
def cross_entropy(P, Q):
    return -np.sum([P[i] * np.log2(Q[i]) for i in range(len(P)) if P[i] > 0 and Q[i] > 0])

H_PQ = cross_entropy(true_distribution, approx_distribution)
print(f"Cross-entropy (H(P, Q)): {H_PQ:.4f} bits")

# Mutual information between two random variables
# For simplicity, assume the coin toss results from two coins (X and Y) are independent
# P(X,Y) = P(X) * P(Y), so mutual information is 0 in this case.
# Let's create a joint distribution where they are dependent to compute mutual information

# Example joint distribution: P(X=1, Y=1) = 0.4, P(X=1, Y=0) = 0.3, etc.
joint_distribution = np.array([[0.4, 0.1], [0.2, 0.3]])  # P(X, Y)

# Marginal distributions
P_X = np.sum(joint_distribution, axis=1)  # Sum over Y
P_Y = np.sum(joint_distribution, axis=0)  # Sum over X

# Conditional entropy H(X|Y)
def conditional_entropy(joint_dist, marginal_Y):
    cond_entropy = 0
    for i in range(len(marginal_Y)):
        if marginal_Y[i] > 0:
            cond_entropy += entropy(joint_dist[:, i]) * marginal_Y[i]
    return cond_entropy

H_X_given_Y = conditional_entropy(joint_distribution, P_Y)

# Mutual information I(X; Y)
I_XY = H_X - H_X_given_Y
print(f"Mutual information I(X; Y): {I_XY:.4f} bits")

# Visualizing the distributions and information measures
labels = ['True Heads', 'True Tails', 'Pred Heads', 'Pred Tails']
values = [P_heads, P_tails, Q_heads, Q_tails]

plt.bar(labels, values, color=['blue', 'blue', 'orange', 'orange'])
plt.title("True vs Predicted Coin Bias")
plt.ylabel("Probability")
plt.show()





import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Logistic regression model
class LogisticRegression:
    def __init__(self, learning_rate=0.01, n_iterations=1000, regularization=None, lambda_=0.01):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.regularization = regularization
        self.lambda_ = lambda_
    
    def fit(self, X, y):
        self.m, self.n = X.shape
        self.theta = np.zeros(self.n)
        self.bias = 0
        
        for _ in range(self.n_iterations):
            model = self.predict_proba(X)
            error = model - y
            
            # Gradient computation
            d_theta = (1 / self.m) * np.dot(X.T, error)
            d_bias = (1 / self.m) * np.sum(error)
            
            # L2 regularization term (optional)
            if self.regularization == "L2":
                d_theta += (self.lambda_ / self.m) * self.theta
            
            # Gradient descent update
            self.theta -= self.learning_rate * d_theta
            self.bias -= self.learning_rate * d_bias
    
    def predict_proba(self, X):
        return sigmoid(np.dot(X, self.theta) + self.bias)
    
    def predict(self, X):
        return (self.predict_proba(X) >= 0.5).astype(int)
    
    def log_likelihood(self, X, y):
        predictions = self.predict_proba(X)
        return np.sum(y * np.log(predictions) + (1 - y) * np.log(1 - predictions))

# Generate synthetic dataset for binary classification
X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train logistic regression model with L2 regularization
model = LogisticRegression(learning_rate=0.01, n_iterations=1000, regularization="L2", lambda_=0.1)
model.fit(X_train, y_train)

# Predict and evaluate on test data
y_pred = model.predict(X_test)

# Accuracy
accuracy = np.mean(y_pred == y_test)
print(f"Accuracy: {accuracy:.4f}")

# Log-Likelihood
log_likelihood = model.log_likelihood(X_train, y_train)
print(f"Log-Likelihood: {log_likelihood:.4f}")

# Visualizing the training convergence (loss over iterations)
iterations = np.arange(1, model.n_iterations + 1)
log_likelihoods = []
for i in iterations:
    model.fit(X_train, y_train)
    log_likelihoods.append(model.log_likelihood(X_train, y_train))

plt.plot(iterations, log_likelihoods, label="Log-Likelihood")
plt.xlabel("Iterations")
plt.ylabel("Log-Likelihood")
plt.title("Log-Likelihood Convergence")
plt.legend()
plt.show()



import numpy as np
import matplotlib.pyplot as plt

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Logistic regression model with L2 regularization
class LogisticRegression:
    def __init__(self, learning_rate=0.01, n_iterations=1000, regularization=None, lambda_=0.1):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.regularization = regularization
        self.lambda_ = lambda_
    
    def fit(self, X, y):
        self.m, self.n = X.shape
        self.theta = np.zeros(self.n)
        self.bias = 0
        
        # Training loop
        for _ in range(self.n_iterations):
            y_pred = self.predict_proba(X)
            error = y_pred - y
            
            # Compute gradients
            d_theta = (1 / self.m) * np.dot(X.T, error)
            d_bias = (1 / self.m) * np.sum(error)
            
            # Apply L2 regularization (if specified)
            if self.regularization == "L2":
                d_theta += (self.lambda_ / self.m) * self.theta
            
            # Update parameters using gradient descent
            self.theta -= self.learning_rate * d_theta
            self.bias -= self.learning_rate * d_bias
    
    def predict_proba(self, X):
        return sigmoid(np.dot(X, self.theta) + self.bias)
    
    def predict(self, X):
        return (self.predict_proba(X) >= 0.5).astype(int)
    
    def log_likelihood(self, X, y):
        y_pred = self.predict_proba(X)
        return np.sum(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))

# Generate synthetic dataset (binary classification)
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the logistic regression model with L2 regularization
model = LogisticRegression(learning_rate=0.01, n_iterations=1000, regularization="L2", lambda_=0.1)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = np.mean(y_pred == y_test)
print(f"Accuracy: {accuracy:.4f}")

# Calculate log-likelihood
log_likelihood_value = model.log_likelihood(X_train, y_train)
print(f"Log-Likelihood: {log_likelihood_value:.4f}")

# Visualizing the parameter convergence using log-likelihood
iterations = np.arange(1, model.n_iterations + 1)
log_likelihoods = []
for _ in iterations:
    model.fit(X_train, y_train)
    log_likelihoods.append(model.log_likelihood(X_train, y_train))

plt.plot(iterations, log_likelihoods, label="Log-Likelihood")
plt.xlabel("Iterations")
plt.ylabel("Log-Likelihood")
plt.title("Log-Likelihood Convergence")
plt.legend()
plt.show()


import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from collections import defaultdict

# Naive Bayes Classifier Implementation
class NaiveBayesClassifier:
    def __init__(self):
        self.class_priors = {}
        self.feature_likelihoods = {}
        self.classes = []
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.classes = np.unique(y)
        self.class_priors = defaultdict(float)
        self.feature_likelihoods = defaultdict(lambda: defaultdict(lambda: defaultdict(float)))
        
        # Calculate class priors P(C)
        for c in self.classes:
            class_samples = X[y == c]
            self.class_priors[c] = len(class_samples) / n_samples
            
            # Calculate likelihoods P(X|C)
            for i in range(n_features):
                feature_values, counts = np.unique(class_samples[:, i], return_counts=True)
                total_count = len(class_samples)
                for feature_value, count in zip(feature_values, counts):
                    self.feature_likelihoods[c][i][feature_value] = count / total_count
    
    def predict(self, X):
        predictions = []
        for x in X:
            posteriors = {}
            # Compute posterior for each class P(C|X)
            for c in self.classes:
                posterior = np.log(self.class_priors[c])  # Start with log prior P(C)
                for i, feature_value in enumerate(x):
                    if feature_value in self.feature_likelihoods[c][i]:
                        posterior += np.log(self.feature_likelihoods[c][i][feature_value])  # Add log likelihood P(X|C)
                    else:
                        posterior += np.log(1e-6)  # Handle unseen feature values
                posteriors[c] = posterior
            predictions.append(max(posteriors, key=posteriors.get))
        return np.array(predictions)

# Generate synthetic dataset (binary classification with categorical features)
def generate_synthetic_data():
    X = np.array([[1, 1, 0],
                  [1, 0, 0],
                  [0, 1, 1],
                  [0, 0, 1],
                  [1, 1, 1],
                  [0, 0, 0],
                  [1, 0, 1],
                  [0, 1, 0]])
    
    y = np.array([1, 1, 0, 0, 1, 0, 1, 0])  # Binary class labels
    return X, y

# Bayesian Network Example (simple)
# This part demonstrates how Bayesian networks could be structured (no full implementation for simplicity)
class BayesianNetwork:
    def __init__(self):
        self.graph = defaultdict(list)
        self.probabilities = defaultdict(float)
    
    def add_edge(self, parent, child):
        self.graph[parent].append(child)
    
    def add_probability(self, node, probability):
        self.probabilities[node] = probability
    
    def get_probabilities(self):
        return self.probabilities

# Example usage
if __name__ == "__main__":
    # Generate synthetic data
    X, y = generate_synthetic_data()
    
    # Split dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
    
    # Initialize Naive Bayes classifier
    nb_classifier = NaiveBayesClassifier()
    
    # Fit the model
    nb_classifier.fit(X_train, y_train)
    
    # Predict the test set
    y_pred = nb_classifier.predict(X_test)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Naive Bayes Classifier Accuracy: {accuracy:.4f}")
    
    # Bayesian Network example (basic structure)
    bn = BayesianNetwork()
    bn.add_edge("Rain", "WetGrass")
    bn.add_edge("Sprinkler", "WetGrass")
    bn.add_probability("Rain", 0.3)
    bn.add_probability("Sprinkler", 0.5)
    bn.add_probability("WetGrass", 0.7)
    
    print("Bayesian Network Probabilities:")
    for node, prob in bn.get_probabilities().items():
        print(f"P({node}) = {prob}")



import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Generate synthetic dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Initialize the Gradient Boosting Classifier
gbc = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

# Fit the model to the training data
gbc.fit(X_train, y_train)

# Make predictions on the test set
y_pred = gbc.predict(X_test)

# Evaluate the model's accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of Gradient Boosting Classifier: {accuracy:.4f}")

# Print confusion matrix and classification report
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)

# Feature importance plot
importances = gbc.feature_importances_
indices = np.argsort(importances)[::-1]
plt.figure(figsize=(10, 6))
plt.title("Feature Importances")
plt.bar(range(X_train.shape[1]), importances[indices], color="r", align="center")
plt.xticks(range(X_train.shape[1]), indices)
plt.xlim([-1, X_train.shape[1]])
plt.show()






import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import acf, adfuller
from statsmodels.tsa.seasonal import seasonal_decompose

# Generate a synthetic time series (non-stationary with a trend)
np.random.seed(42)
time = np.arange(100)
trend = time * 0.5  # Trend component
noise = np.random.normal(0, 1, 100)  # Noise component
time_series = trend + noise

# Plot the time series
plt.figure(figsize=(10, 6))
plt.plot(time, time_series, label="Time Series")
plt.title("Generated Time Series with Trend")
plt.xlabel("Time")
plt.ylabel("Value")
plt.legend()
plt.grid(True)
plt.show()

# Mean and Variance of the time series
mean_value = np.mean(time_series)
variance_value = np.var(time_series)

print(f"Mean of the time series: {mean_value:.4f}")
print(f"Variance of the time series: {variance_value:.4f}")

# Autocorrelation function (ACF)
acf_values = acf(time_series, nlags=20)
plt.figure(figsize=(10, 6))
plt.stem(range(1, 21), acf_values[1:], use_line_collection=True)
plt.title("Autocorrelation Function (ACF)")
plt.xlabel("Lag")
plt.ylabel("Autocorrelation")
plt.grid(True)
plt.show()

# Check for stationarity using the Augmented Dickey-Fuller (ADF) test
def check_stationarity(time_series):
    result = adfuller(time_series)
    print("ADF Statistic: %f" % result[0])
    print("p-value: %f" % result[1])
    print("Critical Values:")
    for key, value in result[4].items():
        print('\t%s: %.3f' % (key, value))
    if result[1] < 0.05:
        print("The time series is stationary (reject the null hypothesis).")
    else:
        print("The time series is non-stationary (fail to reject the null hypothesis).")

check_stationarity(time_series)

# Decomposing the time series into trend, seasonal, and residual components
decomposition = seasonal_decompose(time_series, model='additive', period=20)
decomposition.plot()
plt.show()

# Differencing the time series to make it stationary (if necessary)
diff_time_series = np.diff(time_series)
plt.figure(figsize=(10, 6))
plt.plot(diff_time_series, label="Differenced Time Series")
plt.title("Differenced Time Series (1st Order)")
plt.xlabel("Time")
plt.ylabel("Value")
plt.legend()
plt.grid(True)
plt.show()

# Check stationarity again after differencing
check_stationarity(diff_time_series)


import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.arima_process import ArmaProcess
from statsmodels.tsa.stattools import acf, pacf

# Generate synthetic time series data
np.random.seed(42)
n = 500
ar_params = np.array([1, -0.5, 0.25])  # AR(2) process
ma_params = np.array([1, 0.5])  # MA(1) process
ar = np.r_[1, -ar_params[1:]]  # Add 1 for AR lag operator
ma = np.r_[1, ma_params[1:]]  # Add 1 for MA lag operator
arma_process = ArmaProcess(ar, ma)
time_series = arma_process.generate_sample(nsample=n)

# Plot the generated time series
plt.figure(figsize=(10, 6))
plt.plot(time_series)
plt.title("Generated Time Series")
plt.xlabel("Time")
plt.ylabel("Value")
plt.grid(True)
plt.show()

# Autoregressive (AR) Model
ar_model = AutoReg(time_series, lags=2)
ar_fit = ar_model.fit()
print(f"AR(2) Model Summary:\n{ar_fit.summary()}")

# Moving Average (MA) Model (using ARIMA with p=0, d=0, q=1)
ma_model = ARIMA(time_series, order=(0, 0, 1))
ma_fit = ma_model.fit()
print(f"\nMA(1) Model Summary:\n{ma_fit.summary()}")

# Autoregressive Moving Average (ARMA) Model
arma_model = ARIMA(time_series, order=(2, 0, 1))  # AR(2) and MA(1)
arma_fit = arma_model.fit()
print(f"\nARMA(2, 1) Model Summary:\n{arma_fit.summary()}")

# Autoregressive Integrated Moving Average (ARIMA) Model (p=2, d=1, q=1)
arima_model = ARIMA(time_series, order=(2, 1, 1))  # ARIMA with differencing
arima_fit = arima_model.fit()
print(f"\nARIMA(2, 1, 1) Model Summary:\n{arima_fit.summary()}")

# Plotting ACF and PACF
lags = 20
acf_values = acf(time_series, nlags=lags)
pacf_values = pacf(time_series, nlags=lags)

plt.figure(figsize=(12, 6))

# Plot ACF
plt.subplot(121)
plt.stem(range(1, lags + 1), acf_values[1:], use_line_collection=True)
plt.title("Autocorrelation Function (ACF)")
plt.xlabel("Lag")
plt.ylabel("ACF")

# Plot PACF
plt.subplot(122)
plt.stem(range(1, lags + 1), pacf_values[1:], use_line_collection=True)
plt.title("Partial Autocorrelation Function (PACF)")
plt.xlabel("Lag")
plt.ylabel("PACF")

plt.tight_layout()
plt.show()




import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA, FactorAnalysis
from sklearn.manifold import TSNE
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

# Load the Iris dataset (for demonstration)
iris = load_iris()
X = iris.data
y = iris.target

# Standardize the dataset (important for PCA and Factor Analysis)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

### 1. PCA for Dimensionality Reduction ###

# Apply PCA (reduce to 2 components for visualization)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

# Principal component scores
pca_scores = pd.DataFrame(X_pca, columns=['PC1', 'PC2'])

# Principal component loadings
pca_loadings = pd.DataFrame(pca.components_.T, columns=['PC1', 'PC2'], index=iris.feature_names)

print("Principal Component Scores (first 5 rows):")
print(pca_scores.head())

print("\nPrincipal Component Loadings:")
print(pca_loadings)

# Visualize PCA results
plt.figure(figsize=(8, 6))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='viridis', edgecolor='k', s=50)
plt.title('PCA Projection')
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.colorbar()
plt.show()

### 2. Factor Analysis ###

# Apply Factor Analysis (reduce to 2 factors)
factor_analysis = FactorAnalysis(n_components=2)
X_factors = factor_analysis.fit_transform(X_scaled)

# Factor loadings
factor_loadings = pd.DataFrame(factor_analysis.components_.T, columns=['Factor1', 'Factor2'], index=iris.feature_names)

print("\nFactor Loadings:")
print(factor_loadings)

# Visualize Factor Analysis results
plt.figure(figsize=(8, 6))
plt.scatter(X_factors[:, 0], X_factors[:, 1], c=y, cmap='viridis', edgecolor='k', s=50)
plt.title('Factor Analysis Projection')
plt.xlabel('Factor 1')
plt.ylabel('Factor 2')
plt.colorbar()
plt.show()

### 3. t-SNE for Dimensionality Reduction ###

# Apply t-SNE (reduce to 2 dimensions for visualization)
tsne = TSNE(n_components=2, random_state=42)
X_tsne = tsne.fit_transform(X_scaled)

# Visualize t-SNE results
plt.figure(figsize=(8, 6))
plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y, cmap='viridis', edgecolor='k', s=50)
plt.title('t-SNE Projection')
plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')
plt.colorbar()
plt.show()

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from mlxtend.plotting import plot_decision_regions

# Load the Iris dataset for classification
iris = datasets.load_iris()
X = iris.data[:, :2]  # Take only the first two features for easy visualization
y = iris.target

# For simplicity, we take only the first two classes (binary classification)
X = X[y != 2]
y = y[y != 2]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define and train SVM with different kernels

### 1. Linear Kernel SVM ###
linear_svm = SVC(kernel='linear', C=1.0)
linear_svm.fit(X_train, y_train)

# Predictions and evaluation for linear kernel SVM
y_pred_linear = linear_svm.predict(X_test)
accuracy_linear = accuracy_score(y_test, y_pred_linear)
print(f"Linear Kernel SVM Accuracy: {accuracy_linear:.4f}")
print("\nLinear Kernel SVM Classification Report:")
print(classification_report(y_test, y_pred_linear))

# Plot decision boundary for linear kernel SVM
plt.figure(figsize=(8, 6))
plot_decision_regions(X_test, y_test, clf=linear_svm, legend=2)
plt.title('Linear Kernel SVM Decision Boundary')
plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1])
plt.show()

### 2. Polynomial Kernel SVM ###
poly_svm = SVC(kernel='poly', degree=3, C=1.0)
poly_svm.fit(X_train, y_train)

# Predictions and evaluation for polynomial kernel SVM
y_pred_poly = poly_svm.predict(X_test)
accuracy_poly = accuracy_score(y_test, y_pred_poly)
print(f"\nPolynomial Kernel SVM Accuracy: {accuracy_poly:.4f}")
print("\nPolynomial Kernel SVM Classification Report:")
print(classification_report(y_test, y_pred_poly))

# Plot decision boundary for polynomial kernel SVM
plt.figure(figsize=(8, 6))
plot_decision_regions(X_test, y_test, clf=poly_svm, legend=2)
plt.title('Polynomial Kernel SVM Decision Boundary')
plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1])
plt.show()

### 3. RBF Kernel SVM ###
rbf_svm = SVC(kernel='rbf', C=1.0, gamma='scale')
rbf_svm.fit(X_train, y_train)

# Predictions and evaluation for RBF kernel SVM
y_pred_rbf = rbf_svm.predict(X_test)
accuracy_rbf = accuracy_score(y_test, y_pred_rbf)
print(f"\nRBF Kernel SVM Accuracy: {accuracy_rbf:.4f}")
print("\nRBF Kernel SVM Classification Report:")
print(classification_report(y_test, y_pred_rbf))

# Plot decision boundary for RBF kernel SVM
plt.figure(figsize=(8, 6))
plot_decision_regions(X_test, y_test, clf=rbf_svm, legend=2)
plt.title('RBF Kernel SVM Decision Boundary')
plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1])
plt.show()




import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Generate a toy dataset (binary classification)
X, y = make_moons(n_samples=1000, noise=0.2, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define a simple neural network model with one hidden layer
class SimpleNN:
    def __init__(self, input_size, hidden_size, output_size, learning_rate):
        # Initialize parameters (weights and biases)
        self.W1 = np.random.randn(input_size, hidden_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size)
        self.b2 = np.zeros((1, output_size))
        self.learning_rate = learning_rate
    
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def sigmoid_derivative(self, z):
        return z * (1 - z)
    
    def forward(self, X):
        # Forward pass
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.sigmoid(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.sigmoid(self.z2)
        return self.a2
    
    def backward(self, X, y, output):
        # Backpropagation
        m = X.shape[0]
        d_z2 = output - y
        d_W2 = np.dot(self.a1.T, d_z2) / m
        d_b2 = np.sum(d_z2, axis=0, keepdims=True) / m
        
        d_a1 = np.dot(d_z2, self.W2.T)
        d_z1 = d_a1 * self.sigmoid_derivative(self.a1)
        d_W1 = np.dot(X.T, d_z1) / m
        d_b1 = np.sum(d_z1, axis=0, keepdims=True) / m
        
        # Update weights and biases
        self.W1 -= self.learning_rate * d_W1
        self.b1 -= self.learning_rate * d_b1
        self.W2 -= self.learning_rate * d_W2
        self.b2 -= self.learning_rate * d_b2

    def train(self, X_train, y_train, epochs, batch_size):
        # Convert labels to 2D (column vector) for easier matrix operations
        y_train = y_train.reshape(-1, 1)
        m = X_train.shape[0]
        losses = []

        for epoch in range(epochs):
            # Shuffle the data
            permutation = np.random.permutation(m)
            X_train_shuffled = X_train[permutation]
            y_train_shuffled = y_train[permutation]
            
            for i in range(0, m, batch_size):
                # Get mini-batch
                X_batch = X_train_shuffled[i:i + batch_size]
                y_batch = y_train_shuffled[i:i + batch_size]
                
                # Forward and backward propagation
                output = self.forward(X_batch)
                loss = np.mean((output - y_batch) ** 2)  # Mean Squared Error Loss
                self.backward(X_batch, y_batch, output)
                
                losses.append(loss)
            
            if epoch % 10 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}")
        
        return losses

# Hyperparameters
input_size = 2  # Number of features
hidden_size = 5  # Number of neurons in the hidden layer
output_size = 1  # Binary classification output (1 neuron)
learning_rate = 0.1  # Learning rate
epochs = 200  # Number of epochs
batch_size = 32  # Mini-batch size

# Initialize and train the model
nn = SimpleNN(input_size, hidden_size, output_size, learning_rate)
losses = nn.train(X_train, y_train, epochs, batch_size)

# Plot the loss over epochs
plt.plot(losses)
plt.title('Loss over time')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.grid(True)
plt.show()

# Predictions on the test set
y_pred_test = nn.forward(X_test)
y_pred_test = (y_pred_test > 0.5).astype(int)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred_test)
print(f"Test Accuracy: {accuracy:.4f}")




import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

# Load the Iris dataset
data = load_iris()
X = data.data
y = data.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create an MLPClassifier model with specific parameters
mlp = MLPClassifier(hidden_layer_sizes=(10, 5),   # Two hidden layers with 10 and 5 neurons
                    activation='relu',            # Activation function (ReLU in this case)
                    solver='adam',                # Optimizer (Adam in this case)
                    max_iter=1000,                # Maximum iterations for convergence
                    random_state=42)

# Train the model
mlp.fit(X_train, y_train)

# Make predictions
y_pred = mlp.predict(X_test)

# Display the model's accuracy
accuracy = mlp.score(X_test, y_test)
print(f'Accuracy: {accuracy:.4f}')

# Access weights and biases
print("\nWeights between input and first hidden layer:")
print(mlp.coefs_[0])  # Weights between input layer and first hidden layer

print("\nBiases for first hidden layer:")
print(mlp.intercepts_[0])  # Biases for first hidden layer

print("\nWeights between first and second hidden layer:")
print(mlp.coefs_[1])  # Weights between first and second hidden layers

print("\nBiases for second hidden layer:")
print(mlp.intercepts_[1])  # Biases for second hidden layer

print("\nWeights between second hidden layer and output layer:")
print(mlp.coefs_[2])  # Weights between second hidden layer and output layer

print("\nBiases for output layer:")
print(mlp.intercepts_[2])  # Biases for output layer



import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError, CategoricalCrossentropy, KLDivergence
from tensorflow.keras.activations import relu, softmax, tanh, sigmoid

# Sample data for a regression task
X_reg = np.random.random((100, 10))  # 100 samples, 10 features
y_reg = np.random.random((100, 1))   # 100 samples, 1 target variable (continuous)

# Sample data for a classification task (multi-class classification)
X_clf = np.random.random((100, 10))  # 100 samples, 10 features
y_clf = np.random.randint(0, 3, size=(100,))  # 100 samples, 3 classes
y_clf = np.eye(3)[y_clf]  # One-hot encode the classes for multi-class classification

### Model for Regression Task ###

# Create a model with a single hidden layer and output layer
reg_model = Sequential([
    Dense(64, input_dim=10),               # Hidden layer with 64 neurons
    Activation('tanh'),                    # Using tanh as activation function for hidden layer
    Dense(1),                              # Output layer for regression
    Activation('linear')                   # Identity activation function (linear for regression)
])

# Compile the model for regression using Mean Squared Error
reg_model.compile(optimizer=Adam(), loss=MeanSquaredError())

# Train the model (for demonstration, epochs kept low)
reg_model.fit(X_reg, y_reg, epochs=5)

### Model for Classification Task ###

# Create a model with two hidden layers and an output layer
clf_model = Sequential([
    Dense(64, input_dim=10),               # Hidden layer 1
    Activation('relu'),                    # Using ReLU for the first hidden layer
    Dense(32),                             # Hidden layer 2
    Activation('sigmoid'),                 # Using sigmoid for the second hidden layer
    Dense(3),                              # Output layer for 3-class classification
    Activation('softmax')                  # Softmax activation for output layer (multi-class classification)
])

# Compile the model for classification using Cross Entropy loss
clf_model.compile(optimizer=Adam(), loss=CategoricalCrossentropy())

# Train the model (for demonstration, epochs kept low)
clf_model.fit(X_clf, y_clf, epochs=5)

### Model with KL Divergence Loss (distribution learning) ###

# Another classification task where we model a distribution
clf_model_kl = Sequential([
    Dense(64, input_dim=10),
    Activation('relu'),                    # ReLU activation for hidden layers
    Dense(3),
    Activation('softmax')                  # Softmax for output layer
])

# Compile the model using KL Divergence as loss function
clf_model_kl.compile(optimizer=Adam(), loss=KLDivergence())

# Train the model (for demonstration, epochs kept low)
clf_model_kl.fit(X_clf, y_clf, epochs=5)




import numpy as np

# Perceptron class implementation
class Perceptron:
    def __init__(self, learning_rate=0.1, n_iter=1000):
        self.learning_rate = learning_rate
        self.n_iter = n_iter

    def activation(self, x):
        return np.where(x >= 0, 1, 0)  # Step function

    def fit(self, X, y):
        self.weights = np.zeros(X.shape[1] + 1)  # Initialize weights
        for _ in range(self.n_iter):
            for xi, target in zip(X, y):
                update = self.learning_rate * (target - self.activation(np.dot(xi, self.weights[1:]) + self.weights[0]))
                self.weights[1:] += update * xi
                self.weights[0] += update

    def predict(self, X):
        return self.activation(np.dot(X, self.weights[1:]) + self.weights[0])

# Solving the XOR problem (which a simple Perceptron can't solve)
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([0, 1, 1, 0])  # XOR labels

# Instantiate and train Perceptron
perceptron = Perceptron()
perceptron.fit(X, y)
predictions = perceptron.predict(X)

# Output the predictions (which will fail to solve XOR)
print("Perceptron predictions for XOR problem:", predictions)






import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Create sample data (past sales data)
data = {'Month': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
        'Sales': [200, 220, 250, 270, 300, 320, 350, 370, 400, 420, 450, 470]}
df = pd.DataFrame(data)

# Split into features (independent variable) and target (dependent variable)
X = df[['Month']]  # Feature: Month
y = df['Sales']    # Target: Sales

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict sales on the test set
y_pred = model.predict(X_test)

# Calculate Mean Squared Error
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Predict future sales
future_sales = model.predict(np.array([[13], [14], [15]]))
print(f"Predicted sales for months 13, 14, 15: {future_sales}")




import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Create sample data (features of animals)
data = {'Weight': [15, 30, 70, 5, 10, 8, 60, 20],
        'Size': [25, 50, 80, 10, 15, 12, 70, 40],
        'Type': ['Dog', 'Dog', 'Dog', 'Cat', 'Cat', 'Cat', 'Dog', 'Dog']}
df = pd.DataFrame(data)

# Split into features and target
X = df[['Weight', 'Size']]  # Features: Weight and Size
y = df['Type']              # Target: Animal Type

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a decision tree classifier
classifier = DecisionTreeClassifier()
classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

# Predict the type of new animals
new_animals = pd.DataFrame({'Weight': [10, 50], 'Size': [15, 65]})
predicted_type = classifier.predict(new_animals)
print(f"Predicted animal types: {predicted_type}")





import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris

# サンプルデータとして、Irisデータセットを読み込み
data = load_iris()
X = data.data

# 次元削減 (PCAを使用して2次元に減らす)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

# K-meansクラスタリング (3つのクラスタに分割)
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X_pca)
labels = kmeans.labels_

# クラスタリング結果を可視化
plt.figure(figsize=(8, 6))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=labels, cmap='viridis')
plt.title("PCA and K-means Clustering")
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.colorbar(label='Cluster Label')
plt.show()



import numpy as np
import matplotlib.pyplot as plt

# Define the environment
n_states = 5  # Number of states
actions = ['left', 'right']  # Possible actions
q_table = np.zeros((n_states, len(actions)))  # Initialize Q-table

# Parameters
learning_rate = 0.1
discount_factor = 0.9
epsilon = 0.9  # Exploration-exploitation tradeoff
episodes = 50
max_steps = 10  # Maximum steps per episode

# Define reward function
def get_reward(state):
    if state == n_states - 1:
        return 1  # Goal state
    else:
        return 0

# Define action function
def take_action(state, action):
    if action == 0:  # Move left
        next_state = max(0, state - 1)
    else:  # Move right
        next_state = min(n_states - 1, state + 1)
    return next_state

# Q-learning algorithm
for episode in range(episodes):
    state = 0  # Start from state 0
    for step in range(max_steps):
        # Choose action (epsilon-greedy)
        if np.random.rand() < epsilon:
            action = np.random.choice(len(actions))  # Explore
        else:
            action = np.argmax(q_table[state, :])  # Exploit best action

        # Take action and observe next state and reward
        next_state = take_action(state, action)
        reward = get_reward(next_state)

        # Q-value update
        q_predict = q_table[state, action]
        q_target = reward + discount_factor * np.max(q_table[next_state, :])
        q_table[state, action] += learning_rate * (q_target - q_predict)

        # Transition to next state
        state = next_state

        # If goal is reached, end episode
        if state == n_states - 1:
            break

# Display Q-table
print("Trained Q-table:")
print(q_table)

# Plotting the Q-values
plt.imshow(q_table, cmap='cool', interpolation='none')
plt.colorbar()
plt.title("Q-value Table")
plt.xlabel("Actions")
plt.ylabel("States")
plt.show()

import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Load the Iris dataset
iris = load_iris()
X = iris.data  # Input data
y = iris.target.reshape(-1, 1)  # Target data reshaped to a column vector

# One-hot encode the target labels
encoder = OneHotEncoder(sparse=False)
y = encoder.fit_transform(y)

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a simple neural network model
model = models.Sequential([
    layers.InputLayer(input_shape=(X_train.shape[1],)),  # Input layer
    layers.Dense(10, activation='relu'),  # Hidden layer with 10 neurons and ReLU activation
    layers.Dense(3, activation='softmax')  # Output layer with 3 neurons (one for each class)
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=5, verbose=1)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")

# Predict using the trained model
predictions = model.predict(X_test)
print("Predicted labels:", np.argmax(predictions, axis=1))


import numpy as np

# Define prior distribution parameters
prior_mean = 0
prior_variance = 1

# Define likelihood distribution parameters
likelihood_variance = 0.5

# Observed data
data = np.array([1.2, 1.8, 1.5])

# Posterior mean and variance calculation
n = len(data)
posterior_variance = 1 / (1 / prior_variance + n / likelihood_variance)
posterior_mean = (prior_mean / prior_variance + np.sum(data) / likelihood_variance) * posterior_variance

print(f"Posterior Mean: {posterior_mean}")
print(f"Posterior Variance: {posterior_variance}")


from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Sample text data
texts = ["Free money now", "Hey, how are you?", "Free gift voucher", 
         "Meeting at 5 pm", "You have won a prize", "Let's catch up for lunch"]
labels = [1, 0, 1, 0, 1, 0]  # 1: Spam, 0: Not Spam

# Convert text data to a bag-of-words representation
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(texts)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

# Train a Naive Bayes classifier
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = nb_classifier.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")

# Test with a new message
new_message = ["Congratulations! You have won a free ticket."]
new_message_transformed = vectorizer.transform(new_message)
prediction = nb_classifier.predict(new_message_transformed)
print(f"Prediction: {'Spam' if prediction[0] == 1 else 'Not Spam'}")

from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination

# Define a simple Bayesian Network structure
model = BayesianNetwork([('A', 'C'), ('B', 'C')])

# Define the CPDs (Conditional Probability Distributions)
cpd_a = TabularCPD(variable='A', variable_card=2, values=[[0.8], [0.2]])
cpd_b = TabularCPD(variable='B', variable_card=2, values=[[0.6], [0.4]])
cpd_c = TabularCPD(variable='C', variable_card=2,
                   values=[[0.9, 0.7, 0.6, 0.1], [0.1, 0.3, 0.4, 0.9]],
                   evidence=['A', 'B'], evidence_card=[2, 2])

# Add CPDs to the model
model.add_cpds(cpd_a, cpd_b, cpd_c)

# Verify the model structure
assert model.check_model()

# Perform inference
inference = VariableElimination(model)
prob_c_given_a = inference.query(variables=['C'], evidence={'A': 1})
print(prob_c_given_a)


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA

# Generate a synthetic non-stationary time series (random walk)
np.random.seed(42)
n = 100
e = np.random.normal(0, 1, n)
y = np.cumsum(e)  # Random walk time series

# Create a Pandas DataFrame for easier handling
data = pd.DataFrame({'y': y}, index=pd.date_range('2020-01-01', periods=n))

# Plot the time series
plt.figure(figsize=(10, 4))
plt.plot(data, label='Random Walk')
plt.title('Non-stationary Time Series (Random Walk)')
plt.legend()
plt.show()

# Perform the Augmented Dickey-Fuller test for stationarity
adf_result = adfuller(data['y'])
print(f'ADF Statistic: {adf_result[0]:.4f}')
print(f'p-value: {adf_result[1]:.4f}')

# If p-value > 0.05, the time series is non-stationary
if adf_result[1] > 0.05:
    print("The time series is non-stationary. Applying differencing...")

    # Apply differencing to make the time series stationary
    data['y_diff'] = data['y'].diff().dropna()
    
    # Perform ADF test on differenced data
    adf_result_diff = adfuller(data['y_diff'].dropna())
    print(f'ADF Statistic after differencing: {adf_result_diff[0]:.4f}')
    print(f'p-value after differencing: {adf_result_diff[1]:.4f}')
    
    # Plot ACF and PACF of differenced data
    fig, axes = plt.subplots(1, 2, figsize=(15, 4))
    plot_acf(data['y_diff'].dropna(), ax=axes[0])
    plot_pacf(data['y_diff'].dropna(), ax=axes[1])
    plt.show()
    
    # Fit ARIMA model on the differenced data
    model = ARIMA(data['y'], order=(1, 1, 1))  # ARIMA(p, d, q)
    model_fit = model.fit()
    print(model_fit.summary())
    
    # Forecasting
    forecast = model_fit.get_forecast(steps=10)
    forecast_mean = forecast.predicted_mean
    confidence_interval = forecast.conf_int()
    
    # Plot forecast
    plt.figure(figsize=(10, 4))
    plt.plot(data['y'], label='Original')
    plt.plot(forecast_mean, label='Forecast')
    plt.fill_between(confidence_interval.index,
                     confidence_interval.iloc[:, 0],
                     confidence_interval.iloc[:, 1], color='pink', alpha=0.3)
    plt.title('ARIMA Forecast')
    plt.legend()
    plt.show()


import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

# Load a sample dataset (e.g., Breast Cancer dataset)
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X, y = data.data, data.target

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a function to create a Keras model (required for KerasClassifier)
def create_model(learning_rate=0.01, optimizer='adam'):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(16, input_dim=X.shape[1], activation='relu'),
        tf.keras.layers.Dense(8, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Create a KerasClassifier wrapper for the model
model = KerasClassifier(build_fn=create_model, verbose=0)

# Define the hyperparameters grid
param_grid = {
    'batch_size': [10, 20, 50],
    'epochs': [10, 50, 100],
    'learning_rate': [0.01, 0.001, 0.0001],
    'optimizer': ['SGD', 'Adam']
}

# Set up the grid search
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)

# Fit the grid search to the data
grid_result = grid.fit(X_train, y_train)

# Print the best parameters and the corresponding score
print(f"Best Hyperparameters: {grid_result.best_params_}")
print(f"Best Cross-validation Accuracy: {grid_result.best_score_:.4f}")

# Train the model with the best hyperparameters on the training data
best_model = grid_result.best_estimator_
best_model.fit(X_train, y_train, epochs=grid_result.best_params_['epochs'], batch_size=grid_result.best_params_['batch_size'])

# Evaluate the model on the test set
y_pred = best_model.predict(X_test)
y_pred = (y_pred > 0.5).astype(int)  # Convert probabilities to binary classes
print("Classification Report:\n", classification_report(y_test, y_pred))

import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

### Hold-Out Validation
# Split the data into training (80%) and test (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the Decision Tree model
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Evaluate on the test set
y_pred = model.predict(X_test)
holdout_accuracy = accuracy_score(y_test, y_pred)
print(f"Hold-Out Validation Accuracy: {holdout_accuracy:.4f}")

### k-Fold Cross-Validation
# Define the number of folds (e.g., k=5 for 5-fold cross-validation)
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Evaluate the model using k-fold cross-validation
cv_scores = cross_val_score(model, X, y, cv=kf, scoring='accuracy')
print(f"{k}-Fold Cross-Validation Accuracies: {cv_scores}")
print(f"Mean {k}-Fold Cross-Validation Accuracy: {np.mean(cv_scores):.4f}")


import matplotlib.pyplot as plt

# Function to visualize k-fold cross-validation splits
def plot_kfold_splits(X, y, n_splits=5):
    plt.figure(figsize=(12, 6))
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    for fold, (train_idx, test_idx) in enumerate(kf.split(X, y), start=1):
        plt.subplot(1, n_splits, fold)
        plt.scatter(X[train_idx][:, 0], X[train_idx][:, 1], label='Training Set', alpha=0.6)
        plt.scatter(X[test_idx][:, 0], X[test_idx][:, 1], label='Test Set', alpha=0.6)
        plt.title(f'Fold {fold}')
        plt.xlabel('Feature 1')
        plt.ylabel('Feature 2')
        plt.legend()

    plt.suptitle(f'{n_splits}-Fold Cross-Validation Splits')
    plt.show()

# Visualize the k-fold splits (only using the first 2 features for visualization purposes)
plot_kfold_splits(X[:, :2], y, n_splits=5)

import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    classification_report,
    plot_confusion_matrix
)
import matplotlib.pyplot as plt

# Load the Breast Cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a RandomForest classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Generate a confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)

# Display a detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Plot the confusion matrix
fig, ax = plt.subplots(figsize=(6, 6))
plot_confusion_matrix(model, X_test, y_test, cmap=plt.cm.Blues, ax=ax)
plt.title('Confusion Matrix')
plt.show()


from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Load the digits dataset
digits = load_digits()
X = digits.data
y = digits.target

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define an MLP model with multiple layers and different activation functions
mlp = MLPClassifier(hidden_layer_sizes=(50, 30),  # Two hidden layers with 50 and 30 neurons
                    activation='relu',  # ReLU activation function in hidden layers
                    solver='adam',      # Adam optimizer
                    max_iter=200,       # Maximum iterations
                    random_state=42)

# Train the MLP model
mlp.fit(X_train, y_train)

# Make predictions on the test set
y_pred = mlp.predict(X_test)

# Evaluate the performance of the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.4f}")

# Generate a classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Plot the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()


import numpy as np
import matplotlib.pyplot as plt

# Generate synthetic data for linear regression
np.random.seed(42)
X = 2 * np.random.rand(100, 1)  # 100 data points between 0 and 2
y = 4 + 3 * X + np.random.randn(100, 1)  # Linear relationship with noise

# Hyperparameters
learning_rate = 0.1  # Step size
n_iterations = 1000  # Number of iterations for gradient descent
m = len(X)  # Number of samples

# Add bias term (intercept) to the input data
X_b = np.c_[np.ones((m, 1)), X]  # Add a column of ones to X for the bias term

### 1. Batch Gradient Descent ###
# Initialize weights
theta_batch = np.random.randn(2, 1)

# Perform Batch Gradient Descent
for iteration in range(n_iterations):
    gradients = 2/m * X_b.T.dot(X_b.dot(theta_batch) - y)
    theta_batch = theta_batch - learning_rate * gradients

# Print final parameters learned using Batch Gradient Descent
print(f"Batch Gradient Descent Parameters: {theta_batch.ravel()}")

### 2. Stochastic Gradient Descent ###
# Initialize weights
theta_sgd = np.random.randn(2, 1)

# Perform Stochastic Gradient Descent
for epoch in range(n_iterations):
    for i in range(m):
        random_index = np.random.randint(m)
        xi = X_b[random_index:random_index+1]
        yi = y[random_index:random_index+1]
        gradients = 2 * xi.T.dot(xi.dot(theta_sgd) - yi)
        theta_sgd = theta_sgd - learning_rate * gradients

# Print final parameters learned using Stochastic Gradient Descent
print(f"Stochastic Gradient Descent Parameters: {theta_sgd.ravel()}")

### 3. Mini-Batch Gradient Descent ###
# Initialize weights
theta_minibatch = np.random.randn(2, 1)
batch_size = 20  # Mini-batch size

# Perform Mini-Batch Gradient Descent
for epoch in range(n_iterations):
    shuffled_indices = np.random.permutation(m)
    X_b_shuffled = X_b[shuffled_indices]
    y_shuffled = y[shuffled_indices]
    for i in range(0, m, batch_size):
        xi = X_b_shuffled[i:i+batch_size]
        yi = y_shuffled[i:i+batch_size]
        gradients = 2/batch_size * xi.T.dot(xi.dot(theta_minibatch) - yi)
        theta_minibatch = theta_minibatch - learning_rate * gradients

# Print final parameters learned using Mini-Batch Gradient Descent
print(f"Mini-Batch Gradient Descent Parameters: {theta_minibatch.ravel()}")

# Plot the results of the three methods
plt.figure(figsize=(10, 6))
plt.scatter(X, y, label='Data points')
plt.plot(X, X_b.dot(theta_batch), "r-", label='Batch Gradient Descent', linewidth=2)
plt.plot(X, X_b.dot(theta_sgd), "g--", label='Stochastic Gradient Descent', linewidth=2)
plt.plot(X, X_b.dot(theta_minibatch), "b:", label='Mini-Batch Gradient Descent', linewidth=2)
plt.xlabel("x")
plt.ylabel("y")
plt.title("Gradient Descent Methods")
plt.legend()
plt.show()
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

# シンプルな線形データセットを生成
torch.manual_seed(1)  # 再現性のためのシード設定
X = torch.linspace(0, 10, 100).unsqueeze(1)  # (100, 1) の行列に変形
y = 2.5 * X + torch.randn(100, 1) * 2  # 実際の y 値に少しノイズを加える

# モデルの定義
model = nn.Linear(1, 1)  # 入力サイズ 1, 出力サイズ 1 の線形モデル

# 損失関数と最適化関数の設定
criterion = nn.MSELoss()  # 平均二乗誤差
optimizer = optim.SGD(model.parameters(), lr=0.01)

# 学習
epochs = 100  # エポック数
losses = []  # 損失値の保存リスト
for epoch in range(epochs):
    model.train()  # モデルを訓練モードに設定
    optimizer.zero_grad()  # 勾配の初期化

    # 順伝播
    y_pred = model(X)
    loss = criterion(y_pred, y)  # 損失計算

    # 逆伝播とパラメータの更新
    loss.backward()
    optimizer.step()

    # 損失値の記録
    losses.append(loss.item())

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

# 結果のプロット
plt.figure(figsize=(10, 5))

# 学習データとモデルの予測結果をプロット
plt.subplot(1, 2, 1)
plt.scatter(X.numpy(), y.numpy(), label='Actual Data')
plt.plot(X.numpy(), y_pred.detach().numpy(), color='red', label='Fitted Line')
plt.title('Linear Regression with PyTorch')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()

# 学習過程の損失値をプロット
plt.subplot(1, 2, 2)
plt.plot(range(epochs), losses, label='Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD

# データ生成
np.random.seed(1)
X = np.linspace(0, 10, 100)
y = 2.5 * X + np.random.randn(100) * 2

# モデル定義
model = Sequential()
model.add(Dense(1, input_dim=1, kernel_initializer='normal'))

# モデルコンパイルと学習
model.compile(optimizer=SGD(learning_rate=0.01), loss='mean_squared_error')
model.fit(X, y, epochs=100, verbose=0)

# 予測値の計算
y_pred = model.predict(X)

# プロット
plt.scatter(X, y, label='Actual Data')
plt.plot(X, y_pred, color='red', label='Fitted Line (Keras)')
plt.legend()
plt.show()
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# データ生成
np.random.seed(1)
X = np.linspace(0, 10, 100).reshape(-1, 1)
y = 2.5 * X + np.random.randn(100, 1) * 2

# モデル定義
model = tf.keras.Sequential([tf.keras.layers.Dense(1, input_shape=[1])])
model.compile(optimizer=tf.optimizers.SGD(learning_rate=0.01), loss='mean_squared_error')

# 学習
model.fit(X, y, epochs=100, verbose=0)

# 予測
y_pred = model.predict(X)

# プロット
plt.scatter(X, y, label='Actual Data')
plt.plot(X, y_pred, color='red', label='Fitted Line (TensorFlow)')
plt.legend()
plt.show()



import chainer
import chainer.functions as F
import chainer.links as L
from chainer import optimizers, Variable
import numpy as np
import matplotlib.pyplot as plt

# データ生成
np.random.seed(1)
X = np.linspace(0, 10, 100).astype(np.float32)[:, np.newaxis]
y = 2.5 * X + np.random.randn(100, 1).astype(np.float32) * 2

# モデル定義
model = L.Linear(1, 1)
optimizer = optimizers.SGD(lr=0.01)
optimizer.setup(model)

# 学習
for epoch in range(100):
    y_pred = model(Variable(X))
    loss = F.mean_squared_error(y_pred, Variable(y))
    model.cleargrads()
    loss.backward()
    optimizer.update()

# 予測
y_pred = model(Variable(X)).data

# プロット
plt.scatter(X, y, label='Actual Data')
plt.plot(X, y_pred, color='red', label='Fitted Line (Chainer)')
plt.legend()
plt.show()



0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?