Intro:

Chapter 4. 넘파이 기본: 배열과 벡터 연산

Chapter 5. Pandas

기초 개념

import numpy as np
import pandas as pd
logx = np.logspace(0, 1, 100)
linx = np.linspace(0, 10, 100)
 
df = pd.DataFrame()
df['logspace'] = logx  #컬럼명 logspace의 Series 데이터
df['linspace'] = linx  # 컬럼명 linespace의 Series 데이터

Column명 변경

DataFrame의 컬럼명 Rename 3가지 방법

  • pd.DataFrame.columns = [ '칼럼1', '칼럼2', … , ]
  • pd.DataFrame.rename( { 'OLD칼럼명' : 'NEW칼럼명', …, }, axis=1)
  • pd.DataFrame.rename(columns={ 'OLD칼럼명' : 'NEW칼럼명', …, })

몬티홀 딜레마

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
# 그림 그리는 환경
import random
 
def init():
    doors = ['A', 'B', 'C']
    choice = random.choice(doors)
    prize = random.choice(doors)
    return choice, prize
 
def MC(mychoice, prize):
    candidates = ["A", "B", "C"]
    
    if mychoice == prize:
        candidates.remove(prize)
    else:
        candidates = [x for x in candidates if x != mychoice and x != prize]
    return random.choice(candidates)
 
def no_change():
    choice, prize = init()
    opened_door = MC(choice, prize)
    return 1 if choice == prize else 0
 
def change():
    choice, prize = init()
    opened_door = MC(choice, prize)
    return 0 if choice == prize else 1
 
 
from tqdm.auto import tqdm, trange
tries = 1000
result_change = []
result_nochange = []
for _ in tqdm(range(tries), position=0):
    win_nochange = 0
    win_change = 0
    for _ in range(tries):
        win_nochange += no_change()
        win_change += change()
    result_nochange.append(win_nochange)
    result_change.append(win_change)
plt.figure(figsize=(20, 20))
plt.plot(result_change, 'o:', label='Change')
plt.plot(result_nochange, 'o:', label='No Change')
plt.legend()
plt.title('Monty Hall Dilemma')
plt.xlabel('Trial')
plt.ylabel('? / 100')