Introduction to Data Analysis — NumPy, Pandas, Matplotlib
pip install numpy pandas matplotlib
高效數值計算,N 維陣列,是 Pandas 的基礎。
表格資料處理(DataFrame),讀寫 CSV/Excel/JSON。
資料視覺化,畫折線圖、長條圖、圓餅圖等。
import numpy as np
# 建立陣列
a = np.array([1, 2, 3, 4, 5])
b = np.array([10, 20, 30, 40, 50])
# 向量化運算(不需要迴圈!)
print(a + b) # [11 22 33 44 55]
print(a * 2) # [2 4 6 8 10]
print(a ** 2) # [1 4 9 16 25]
# 統計
print(a.mean()) # 3.0
print(a.std()) # 1.41...
print(a.sum()) # 15
print(a.max()) # 5
# 2D 陣列(矩陣)
m = np.array([[1,2,3],[4,5,6]])
print(m.shape) # (2, 3)
print(m.T) # 轉置
import pandas as pd
# 建立 DataFrame
df = pd.DataFrame({
"name": ["Alice", "Bob", "Charlie", "Diana"],
"age": [25, 30, 22, 28],
"score": [88, 92, 75, 95]
})
print(df)
print(df.shape) # (4, 3)
print(df.describe()) # 統計摘要
print(df["score"].mean()) # 87.5
# 選取
print(df["name"]) # 一欄
print(df[["name","score"]]) # 多欄
print(df.iloc[0]) # 第一行
import pandas as pd
df = pd.read_csv("students.csv", encoding="utf-8")
# 篩選
high_score = df[df["score"] >= 90]
# 排序
df_sorted = df.sort_values("score", ascending=False)
# 新增欄位
df["grade"] = df["score"].apply(
lambda x: "A" if x >= 90 else "B" if x >= 80 else "C"
)
# 分組統計
avg_by_class = df.groupby("class")["score"].mean()
# 存檔
df.to_csv("output.csv", index=False, encoding="utf-8")
import matplotlib.pyplot as plt
import numpy as np
# 折線圖
x = np.linspace(0, 10, 100)
plt.plot(x, np.sin(x), label="sin(x)")
plt.plot(x, np.cos(x), label="cos(x)")
plt.legend()
plt.title("三角函數")
plt.xlabel("x"); plt.ylabel("y")
plt.savefig("plot.png")
plt.show()
# 長條圖
subjects = ["數學", "英文", "國文", "理化"]
scores = [90, 85, 78, 92]
plt.bar(subjects, scores)
plt.title("各科成績")
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
# 產生測試資料
data = {
"month": range(1, 13),
"revenue": [120,135,98,145,160,175,190,185,165,155,140,200]
}
df = pd.DataFrame(data)
# 統計摘要
print(f"年營收:{df['revenue'].sum()} 萬")
print(f"月均值:{df['revenue'].mean():.1f} 萬")
print(f"最高月:{df.loc[df['revenue'].idxmax(), 'month']} 月")
# 畫圖
plt.figure(figsize=(10, 5))
plt.plot(df["month"], df["revenue"], marker="o")
plt.title("月營收趨勢")
plt.xlabel("月份"); plt.ylabel("營收(萬)")
plt.xticks(range(1, 13))
plt.grid(True, alpha=0.3)
plt.savefig("revenue.png", dpi=150)
plt.show()
學會了 NumPy 數值運算、Pandas 資料處理與 Matplotlib 視覺化。