開発環境
- OS X El Capitan - Apple (OS)
- Emacs (Text Editor)
- Python 3.5 (プログラミング言語)
Pythonからはじめる数学入門 (Amit Saha (著)、黒川 利明 (翻訳)、オライリージャパン)の3章(データを統計量で記述する)、3.9(プログラミングチャレンジ)、問題3-2(統計電卓)を取り組んでみる。
問題3-2(統計電卓)
コード(Emacs)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
import random
import matplotlib.pyplot as plt
from collections import Counter
def calculate_mean(numbers):
s = sum(numbers)
n = len(numbers)
mean = s / n
return mean
def calculate_median(numbers):
n = len(numbers)
numbers = sorted(numbers)
if n % 2 == 0:
n1 = n / 2
n2 = n1 + 1
i1 = int(n1) - 1
i2 = int(n2) - 1
median = (numbers[i1] + numbers[i2]) / 2
else:
n1 = (n + 1) / 2
i = int(n1) - 1
median = numbers[i]
return median
def calculate_mode(numbers):
c = Counter(numbers)
mode = c.most_common(1)
numbers_freq = c.most_common()
max_count = numbers_freq[0][1]
modes = []
for num, count in numbers_freq:
if count == max_count:
modes.append(num)
else:
break
return modes
def find_differences(numbers):
mean = calculate_mean(numbers)
diff = [num - mean for num in numbers]
return diff
def calculate_variance(numbers):
diff = find_differences(numbers)
squared_diff = [d ** 2 for d in diff]
variance = sum(squared_diff) / len(numbers)
return variance
def calculate_standard_deviation(numbers):
variance = calculate_variance(numbers)
std_dev = variance ** (1 / 2)
return std_dev
def draw_graph(numbers):
n = len(numbers)
x = range(n)
plt.plot(x, numbers)
plt.ylabel('値')
mean = calculate_mean(numbers)
median = calculate_median(numbers)
mode = calculate_mode(numbers)
variance = calculate_variance(numbers)
std_deviation = calculate_standard_deviation(numbers)
for y in [mean, median]:
plt.plot(x, [y for _ in x])
plt.legend(['値', '平均値', '中央値'])
pairs = [('平均', mean), ('中央値', median),
('最頻値', ', '.join(map(str, mode))),
('分散', variance), ('標準偏差', std_deviation)]
for a, b in pairs:
print('{0}: {1}'.format(a, b))
plt.show()
if __name__ == '__main__':
if len(sys.argv) == 2:
filename = sys.argv[1]
with open(filename) as f:
numbers = [int(line.strip()) for line in f]
else:
numbers = [random.randrange(1, 101) for _ in range(100)]
draw_graph(numbers)
入出力結果(Terminal, IPython)
$ ./calculate.py mydata.txt 平均: 477.75 中央値: 500.0 最頻値: 100, 500 分散: 141047.35416666666 標準偏差: 375.5627166887931 $ ./calculate.py 平均: 49.2 中央値: 45.5 最頻値: 26, 51, 71 分散: 867.82 標準偏差: 29.45878476787527 $ ./calculate.py 平均: 53.76 中央値: 55.0 最頻値: 38 分散: 749.3624000000003 標準偏差: 27.374484470031582 $
0 コメント:
コメントを投稿