# ベイズの定理を計算する関数
def bayes_theorem(prior_A, likelihood_B_given_A, prior_B):
posterior_A_given_B = (likelihood_B_given_A * prior_A) / prior_B
return posterior_A_given_B
# 事前確率P(A)
prior_A = 0.01
# 尤度P(B|A)
likelihood_B_given_A = 0.8
# 周辺確率P(B)
prior_B = 0.02
# ベイズの定理を使って事後確率P(A|B)を計算
posterior_A_given_B = bayes_theorem(prior_A, likelihood_B_given_A, prior_B)
print(f"P(A|B): {posterior_A_given_B:.4f}")
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import rv_discrete
# 離散型確率分布の定義
# ここでは、1から6までの値を持つサイコロの確率分布を考える
values = [1, 2, 3, 4, 5, 6]
probabilities = [1/6] * 6 # 各目が出る確率は1/6
# 離散型確率分布のインスタンスを作成
custom_dist = rv_discrete(name='custom', values=(values, probabilities))
# 確率質量関数(PMF)のプロット
x = np.arange(1, 7)
pmf = custom_dist.pmf(x)
plt.figure(figsize=(8, 6))
plt.stem(x, pmf, basefmt=" ", use_line_collection=True)
plt.xlabel('Value')
plt.ylabel('Probability')
plt.title('Probability Mass Function (PMF)')
plt.grid()
plt.show()
# 累積分布関数(CDF)のプロット
cdf = custom_dist.cdf(x)
plt.figure(figsize=(8, 6))
plt.step(x, cdf, where='mid')
plt.xlabel('Value')
plt.ylabel('Cumulative Probability')
plt.title('Cumulative Distribution Function (CDF)')
plt.grid()
plt.show()
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# 正規分布に従うデータを生成
mean = 0 # 平均
std_dev = 1 # 標準偏差
num_samples = 1000 # サンプル数
data = np.random.normal(mean, std_dev, num_samples)
# データの基本統計量を計算
mean_calculated = np.mean(data)
std_dev_calculated = np.std(data)
variance_calculated = np.var(data)
print(f'Calculated Mean: {mean_calculated}')
print(f'Calculated Standard Deviation: {std_dev_calculated}')
print(f'Calculated Variance: {variance_calculated}')
# ヒストグラムとカーネル密度推定 (KDE) プロットを描画
sns.histplot(data, kde=True, bins=30)
plt.title('Histogram and KDE of Normally Distributed Data')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.show()
import matplotlib.pyplot as plt
import numpy as np
def calculate_mean_variance(values, probabilities):
if len(values) != len(probabilities):
raise ValueError("The length of values and probabilities must be the same.")
if not all(0 <= p <= 1 for p in probabilities):
raise ValueError("Probabilities must be between 0 and 1.")
if abs(sum(probabilities) - 1.0) > 1e-6:
raise ValueError("The sum of probabilities must be 1.")
mean = sum(value * probability for value, probability in zip(values, probabilities))
variance = sum((value - mean) ** 2 * probability for value, probability in zip(values, probabilities))
return mean, variance
def plot_distribution(values, probabilities):
mean, variance = calculate_mean_variance(values, probabilities)
plt.bar(values, probabilities, color='skyblue', alpha=0.7, label='Probability')
plt.axvline(x=mean, color='red', linestyle='--', label=f'Mean: {mean:.2f}')
plt.axvline(x=mean + np.sqrt(variance), color='green', linestyle='--', label=f'Mean + StdDev: {mean + np.sqrt(variance):.2f}')
plt.axvline(x=mean - np.sqrt(variance), color='green', linestyle='--', label=f'Mean - StdDev: {mean - np.sqrt(variance):.2f}')
plt.xlabel('Values')
plt.ylabel('Probabilities')
plt.title('Probability Distribution with Mean and Variance')
plt.legend()
plt.show()
# Example usage
values = [1, 2, 3, 4, 5]
probabilities = [0.1, 0.2, 0.3, 0.2, 0.2]
plot_distribution(values, probabilities)
import matplotlib.pyplot as plt
import numpy as np
def calculate_mean_variance(values, probabilities):
if len(values) != len(probabilities):
raise ValueError("The length of values and probabilities must be the same.")
if not all(0 <= p <= 1 for p in probabilities):
raise ValueError("Probabilities must be between 0 and 1.")
if abs(sum(probabilities) - 1.0) > 1e-6:
raise ValueError("The sum of probabilities must be 1.")
mean = sum(value * probability for value, probability in zip(values, probabilities))
variance = sum((value - mean) ** 2 * probability for value, probability in zip(values, probabilities))
return mean, variance
def plot_distribution(values, probabilities):
mean, variance = calculate_mean_variance(values, probabilities)
plt.bar(values, probabilities, color='skyblue', alpha=0.7, label='Probability')
plt.axvline(x=mean, color='red', linestyle='--', label=f'Mean: {mean:.2f}')
plt.axvline(x=mean + np.sqrt(variance), color='green', linestyle='--', label=f'Mean + StdDev: {mean + np.sqrt(variance):.2f}')
plt.axvline(x=mean - np.sqrt(variance), color='green', linestyle='--', label=f'Mean - StdDev: {mean - np.sqrt(variance):.2f}')
plt.xlabel('Values')
plt.ylabel('Probabilities')
plt.title('Probability Distribution with Mean and Variance')
plt.legend()
plt.show()
# Example usage
values = [1, 2, 3, 4, 5]
probabilities = [0.1, 0.2, 0.3, 0.2, 0.2]
plot_distribution(values, probabilities)
import numpy as np
import matplotlib.pyplot as plt
# 確率変数Xの分布を定義 (ここでは平均0、標準偏差1の正規分布)
mu, sigma = 0, 1
n_samples = 100000 # サンプル数
# 確率変数Xのサンプルを生成
samples = np.random.normal(mu, sigma, n_samples)
# 関数g(X) = X^2 を定義
def g(x):
return x**2
# サンプルに関数gを適用し、その平均を計算
g_samples = g(samples)
expected_value = np.mean(g_samples)
# 結果を出力
print(f"関数g(X)の期待値: {expected_value}")
# ヒストグラムをプロット
plt.figure(figsize=(10, 6))
plt.hist(g_samples, bins=50, density=True, alpha=0.6, color='g', label='g(X) samples')
# 期待値を垂直線でプロット
plt.axvline(expected_value, color='r', linestyle='dashed', linewidth=2, label=f'Expected Value: {expected_value:.2f}')
plt.title('Histogram of g(X) = X^2 with Expected Value')
plt.xlabel('g(X)')
plt.ylabel('Density')
plt.legend()
plt.show()
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
# 二項分布のパラメータ
n = 50
p = 0.1
# ポアソン分布のパラメータ
lambda_poisson = n * p
# 二項分布の確率質量関数 (PMF)
x_binom = np.arange(0, n+1)
pmf_binom = stats.binom.pmf(x_binom, n, p)
# ポアソン分布の確率質量関数 (PMF)
x_poisson = np.arange(0, 20)
pmf_poisson = stats.poisson.pmf(x_poisson, lambda_poisson)
# プロット
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.bar(x_binom, pmf_binom, alpha=0.7, label='Binomial')
plt.xlabel('Number of successes')
plt.ylabel('Probability')
plt.title('Binomial Distribution')
plt.legend()
plt.subplot(1, 2, 2)
plt.bar(x_poisson, pmf_poisson, alpha=0.7, label='Poisson', color='orange')
plt.xlabel('Number of events')
plt.ylabel('Probability')
plt.title('Poisson Distribution')
plt.legend()
plt.tight_layout()
plt.show()
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
# ポアソン分布のパラメータ
lambda_param = 5
# x軸の範囲を設定
x = np.arange(0, 15)
# ポアソン分布の確率質量関数を計算
pmf = stats.poisson.pmf(x, mu=lambda_param)
# 平均と分散を計算
mean = lambda_param
variance = lambda_param
# グラフをプロット
plt.figure(figsize=(10, 6))
plt.stem(x, pmf, use_line_collection=True)
plt.title(f'Poisson Distribution (λ={lambda_param})')
plt.xlabel('Number of events')
plt.ylabel('Probability')
plt.grid(True)
# 平均と分散を表示
plt.axvline(mean, color='r', linestyle='dashed', linewidth=1, label=f'Mean = {mean}')
plt.legend()
# プロットの表示
plt.show()
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
# 平均と分散を設定
mu = 0 # 平均
sigma = 1 # 分散
# 正規分布の確率密度関数を生成
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
y = norm.pdf(x, mu, sigma)
# グラフの描画
plt.figure(figsize=(8, 6))
plt.plot(x, y, label=f'Normal Distribution ($\mu={mu}$, $\sigma={sigma}$)')
plt.title('Normal Distribution')
plt.xlabel('x')
plt.ylabel('Probability Density')
plt.legend()
plt.grid(True)
plt.show()
import matplotlib.pyplot as plt
import numpy as np
# Sample data
data = np.random.randn(100) # Generate 100 random data points
# Calculate minimum, Q1, median, Q3, and maximum
min_val = np.min(data)
q1 = np.percentile(data, 25)
median = np.median(data)
q3 = np.percentile(data, 75)
max_val = np.max(data)
print(f"Minimum: {min_val}")
print(f"Q1: {q1}")
print(f"Median: {median}")
print(f"Q3: {q3}")
print(f"Maximum: {max_val}")
# Plot boxplot
plt.boxplot(data)
plt.title('Boxplot')
plt.ylabel('Value')
plt.show()
import numpy as np
import matplotlib.pyplot as plt
# Data points
x = np.array([2, 4, 9])
y = np.array([3, 7, 11])
# Calculate means
mean_x = np.mean(x)
mean_y = np.mean(y)
# Calculate variances and covariance
variance_x = np.var(x, ddof=0) # ddof=0 for population variance
covariance_xy = np.cov(x, y, ddof=0)[0, 1] # [0, 1] selects the covariance
# Calculate slope and intercept
A = covariance_xy / variance_x
B = mean_y - A * mean_x
# Line equation
line_x = np.linspace(min(x), max(x), 100)
line_y = A * line_x + B
# Plotting the data points and the line of best fit
plt.figure(figsize=(8, 6))
plt.scatter(x, y, color='blue', label='Data points')
plt.plot(line_x, line_y, color='red', label=f'Best fit line: y = {A:.2f}x + {B:.2f}')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Linear Regression using Least Squares Method')
plt.legend()
plt.grid(True)
plt.show()
import numpy as np
import matplotlib.pyplot as plt
# サンプルデータの生成
np.random.seed(0)
data = np.random.randn(1000) # 正規分布に従うランダムデータ
# ヒストグラムのパラメータ
num_bins = 20 # 階級数
# ヒストグラムの計算
counts, bin_edges = np.histogram(data, bins=num_bins)
# 階級値(ビンの中心)を計算
bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
# プロット
plt.figure(figsize=(10, 6))
# ヒストグラムをプロット
plt.hist(data, bins=num_bins, edgecolor='k', alpha=0.7)
# 階級値をプロット
plt.scatter(bin_centers, counts, color='red', zorder=5)
for center, count in zip(bin_centers, counts):
plt.text(center, count, str(count), ha='center', va='bottom')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Histogram with Class Intervals and Class Values')
plt.show()
# 階級、階級値、度数の表示
for i in range(num_bins):
print(f"Class {i+1}: Range({bin_edges[i]:.2f}, {bin_edges[i+1]:.2f}), Class Value: {bin_centers[i]:.2f}, Frequency: {counts[i]}")
import numpy as np
import matplotlib.pyplot as plt
# サンプルデータの生成
np.random.seed(0)
data = np.random.randn(1000) # 正規分布に従うランダムデータ
# ヒストグラムのパラメータ
num_bins = 20 # 階級数
# ヒストグラムの計算
counts, bin_edges = np.histogram(data, bins=num_bins)
# 相対度数の計算
relative_counts = counts / counts.sum()
# 累積度数の計算
cumulative_counts = np.cumsum(counts)
# 累積相対度数の計算
cumulative_relative_counts = np.cumsum(relative_counts)
# 階級値(ビンの中心)を計算
bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
# プロット
plt.figure(figsize=(14, 8))
# ヒストグラム(度数分布)
plt.subplot(3, 1, 1)
plt.hist(data, bins=num_bins, edgecolor='k', alpha=0.7)
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Histogram (Frequency Distribution)')
# 相対度数分布
plt.subplot(3, 1, 2)
plt.bar(bin_centers, relative_counts, width=(bin_edges[1] - bin_edges[0]), edgecolor='k', alpha=0.7)
plt.xlabel('Value')
plt.ylabel('Relative Frequency')
plt.title('Relative Frequency Distribution')
# 累積度数分布
plt.subplot(3, 1, 3)
plt.plot(bin_centers, cumulative_counts, marker='o', linestyle='-')
plt.xlabel('Value')
plt.ylabel('Cumulative Frequency')
plt.title('Cumulative Frequency Distribution')
plt.tight_layout()
plt.show()
# 度数、相対度数、累積度数の表示
for i in range(num_bins):
print(f"Class {i+1}: Range({bin_edges[i]:.2f}, {bin_edges[i+1]:.2f}), "
f"Class Value: {bin_centers[i]:.2f}, Frequency: {counts[i]}, "
f"Relative Frequency: {relative_counts[i]:.4f}, "
f"Cumulative Frequency: {cumulative_counts[i]}, "
f"Cumulative Relative Frequency: {cumulative_relative_counts[i]:.4f}")
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm
# Generate sample data
np.random.seed(0)
data = np.random.randn(1000) # Normally distributed data
# Plot KDE and Gaussian curve
plt.figure(figsize=(10, 6))
# KDE using Seaborn
sns.kdeplot(data, fill=True, color='blue', label='KDE')
# Gaussian curve
mean, std_dev = np.mean(data), np.std(data)
x = np.linspace(min(data), max(data), 1000)
gaussian_curve = norm.pdf(x, mean, std_dev)
plt.plot(x, gaussian_curve, color='red', linestyle='--', label='Gaussian Curve')
# Labels and Title
plt.xlabel('Value')
plt.ylabel('Density')
plt.title('Kernel Density Estimation (KDE) and Gaussian Curve')
plt.legend()
# Show plot
plt.show()
import numpy as np
# Sample data (replace with your dataset)
data = np.array([10, 12, 15, 14, 11, 13, 15, 12, 13, 10])
# Calculate sample mean
sample_mean = np.mean(data)
# Calculate sample variance (biased)
sample_variance_biased = np.var(data)
# Calculate sample variance (unbiased)
sample_variance_unbiased = np.var(data, ddof=1)
# Calculate sample standard deviation
sample_std_deviation = np.std(data)
# Calculate coefficient of variation
coefficient_of_variation = (sample_std_deviation / sample_mean) * 100
# Print results
print(f"Sample Mean: {sample_mean}")
print(f"Sample Variance (biased): {sample_variance_biased}")
print(f"Sample Variance (unbiased): {sample_variance_unbiased}")
print(f"Sample Standard Deviation: {sample_std_deviation}")
print(f"Coefficient of Variation: {coefficient_of_variation:.2f}%")
import numpy as np
import matplotlib.pyplot as plt
# Sample data (replace with your dataset)
np.random.seed(0)
data1 = np.random.normal(100, 10, 200)
data2 = np.random.normal(90, 20, 200)
data3 = np.random.normal(80, 30, 200)
data = [data1, data2, data3]
# Create a boxplot
plt.figure(figsize=(10, 6))
plt.boxplot(data, vert=True, patch_artist=True)
# Customize labels and title
plt.xticks([1, 2, 3], ['Data1', 'Data2', 'Data3'])
plt.xlabel('Dataset')
plt.ylabel('Value')
plt.title('Boxplot of Datasets')
# Show plot
plt.grid(True)
plt.show()
import numpy as np
# Sample data (replace with your dataset)
data1 = np.array([1, 2, 3, 4, 5])
data2 = np.array([5, 4, 3, 2, 1])
# Calculate covariance between data1 and data2
covariance = np.cov(data1, data2)[0, 1]
# Calculate covariance matrix for data1 and data2
covariance_matrix = np.cov(data1, data2)
# Calculate Pearson's correlation coefficient (r) between data1 and data2
correlation_coefficient = np.corrcoef(data1, data2)[0, 1]
# Calculate correlation matrix for data1 and data2
correlation_matrix = np.corrcoef(data1, data2)
# Print results
print(f"Covariance between data1 and data2: {covariance}")
print(f"Covariance matrix:\n{covariance_matrix}")
print(f"Pearson's correlation coefficient (r): {correlation_coefficient}")
print(f"Correlation matrix:\n{correlation_matrix}")
import seaborn as sns
import matplotlib.pyplot as plt
# Sample data (replace with your dataset)
data = sns.load_dataset("tips")
# Create violin plot
plt.figure(figsize=(10, 6))
sns.violinplot(x="day", y="total_bill", data=data)
# Customize labels and title
plt.xlabel('Day of the Week')
plt.ylabel('Total Bill')
plt.title('Violin Plot of Total Bill by Day')
# Show plot
plt.grid(True)
plt.show()
# Sample data for illustration
# Suppose we have a deck of cards
deck = {
'suits': ['Hearts', 'Diamonds', 'Clubs', 'Spades'],
'values': ['Ace', '2', '3', '4', '5', '6', '7', '8', '9', '10', 'Jack', 'Queen', 'King']
}
# Define probabilities for illustration
prob_suit_hearts = 1/4 # Probability of drawing a Heart suit
prob_value_queen = 1/13 # Probability of drawing a Queen card
prob_value_queen_given_hearts = 1/13 # Conditional probability of drawing a Queen given it's a Heart
# Addition Rule: P(A or B) = P(A) + P(B) - P(A and B)
prob_heart_or_queen = prob_suit_hearts + prob_value_queen - prob_value_queen_given_hearts
print(f"Probability of drawing a Heart or a Queen: {prob_heart_or_queen}")
# Conditional Probability: P(A | B) = P(A and B) / P(B)
prob_queen_given_hearts = prob_value_queen_given_hearts / prob_suit_hearts
print(f"Conditional Probability of drawing a Queen given it's a Heart: {prob_queen_given_hearts}")
# Multiplication Rule: P(A and B) = P(A | B) * P(B)
prob_queen_and_hearts = prob_queen_given_hearts * prob_suit_hearts
print(f"Probability of drawing a Queen and it's a Heart: {prob_queen_and_hearts}")
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import uniform, norm
# Example 1: Discrete Uniform Distribution
# Generate random data for illustration
data_uniform = np.random.randint(1, 7, 1000) # Random integers from 1 to 6
# Calculate probabilities using PMF (Probability Mass Function)
counts, bins = np.histogram(data_uniform, bins=6, range=(1, 7))
probabilities = counts / len(data_uniform)
# Plotting the PMF
plt.figure(figsize=(10, 6))
plt.bar(bins[:-1], probabilities, width=0.5, align='center', alpha=0.7)
plt.xlabel('Outcome')
plt.ylabel('Probability')
plt.title('Probability Mass Function (PMF) of Discrete Uniform Distribution')
plt.grid(True)
plt.show()
# Example 2: Continuous Normal Distribution
# Generate random data for illustration
data_normal = np.random.normal(0, 1, 1000) # Random data from a standard normal distribution
# Fit a normal distribution to the data (optional)
mu, std_dev = norm.fit(data_normal)
# Plotting the PDF (Probability Density Function)
plt.figure(figsize=(10, 6))
plt.hist(data_normal, bins=30, density=True, alpha=0.7, color='blue')
plt.xlabel('Value')
plt.ylabel('Probability Density')
plt.title('Probability Density Function (PDF) of Normal Distribution')
plt.grid(True)
# Overlay the fitted PDF
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, mu, std_dev)
plt.plot(x, p, 'k', linewidth=2)
plt.show()
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import uniform
# Parameters for the uniform distribution
a = 0 # Lower bound
b = 10 # Upper bound
# Generate random data for illustration (Uniform distribution)
data_uniform = uniform.rvs(size=1000, loc=a, scale=b-a)
# Calculate PDF (Probability Density Function) for uniform distribution
x = np.linspace(a, b, 100)
pdf_uniform = uniform.pdf(x, loc=a, scale=b-a)
# Calculate CDF (Cumulative Distribution Function) for uniform distribution
cdf_uniform = uniform.cdf(x, loc=a, scale=b-a)
# Plotting PDF and CDF
plt.figure(figsize=(12, 6))
# PDF plot
plt.subplot(1, 2, 1)
plt.plot(x, pdf_uniform, 'b-', label='PDF')
plt.title('Probability Density Function (PDF) of Uniform Distribution')
plt.xlabel('Value')
plt.ylabel('Density')
plt.grid(True)
plt.legend()
# CDF plot
plt.subplot(1, 2, 2)
plt.plot(x, cdf_uniform, 'r-', label='CDF')
plt.title('Cumulative Distribution Function (CDF) of Uniform Distribution')
plt.xlabel('Value')
plt.ylabel('Probability')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
import numpy as np
import matplotlib.pyplot as plt
# Parameters
population_size = 10000 # Population size
sample_size = 100 # Sample size
num_samples = 10000 # Number of samples to draw
# Generate population data (non-normal distribution example)
population = np.random.poisson(5, population_size) # Example of a non-normal distribution (Poisson)
# Function to calculate mean of samples
def calculate_sample_means(population, sample_size, num_samples):
sample_means = []
for _ in range(num_samples):
sample = np.random.choice(population, size=sample_size, replace=True)
sample_mean = np.mean(sample)
sample_means.append(sample_mean)
return sample_means
# Calculate sample means
sample_means = calculate_sample_means(population, sample_size, num_samples)
# Calculate mean and variance of sample means
mean_of_means = np.mean(sample_means)
variance_of_means = np.var(sample_means)
# Plotting the distribution of sample means
plt.figure(figsize=(10, 6))
plt.hist(sample_means, bins=50, density=True, color='blue', alpha=0.7)
plt.title('Distribution of Sample Means (Central Limit Theorem)')
plt.xlabel('Sample Mean')
plt.ylabel('Density')
plt.grid(True)
# Overlay a normal distribution with the same mean and variance
normal_dist = np.random.normal(mean_of_means, np.sqrt(variance_of_means), num_samples)
plt.hist(normal_dist, bins=50, density=True, color='red', histtype='step', linewidth=2)
plt.legend(['Sample Means', 'Normal Distribution'])
plt.show()
# Print mean and variance of sample means
print(f"Mean of Sample Means: {mean_of_means}")
print(f"Variance of Sample Means: {variance_of_means}")
import numpy as np
import matplotlib.pyplot as plt
# Generating a random sample from a normal distribution
np.random.seed(0)
population = np.random.normal(loc=50, scale=10, size=1000) # Example population
# Population mean
population_mean = np.mean(population)
# Population standard deviation
population_std = np.std(population)
# Plotting histogram of population
plt.figure(figsize=(10, 6))
plt.hist(population, bins=30, alpha=0.7, color='blue', edgecolor='black')
plt.axvline(population_mean, color='red', linestyle='dashed', linewidth=1.5, label='Population Mean')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Population Distribution')
plt.legend()
plt.grid(True)
plt.show()
print(f"Population Mean: {population_mean:.2f}")
print(f"Population Standard Deviation: {population_std:.2f}")