import pandas as pd


df = pd.read_csv('./auto-mpg.csv', header=None)


#mpg : 연비
#cylinders : 실린더수 
#displacement : 배기량
#horsepower: 출력
#weight : 차중
#acceleration : 가속능력
#model year : 출시년도
#origin : 제조국 1(USA), 2(EU), 3(JPN)
#name : 모델명


df.columns = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model year', 'origin', 'name']


df.head(2)


df.tail()


df.shape

(398, 9)


df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           398 non-null    float64
 1   cylinders     398 non-null    int64  
 2   displacement  398 non-null    float64
 3   horsepower    398 non-null    object 
 4   weight        398 non-null    float64
 5   acceleration  398 non-null    float64
 6   model year    398 non-null    int64  
 7   origin        398 non-null    int64  
 8   name          398 non-null    object 
dtypes: float64(4), int64(3), object(2)
memory usage: 28.1+ KB


df.dtypes

mpg             float64
cylinders         int64
displacement    float64
horsepower       object
weight          float64
acceleration    float64
model year        int64
origin            int64
name             object
dtype: object


df['mpg'].dtype

dtype('float64')


df.describe()


# include='all' 옵션 사용


df.describe(include='all')


# name 열의 기술 통계 정보 확인


df['name'].describe()

count            398
unique           305
top       ford pinto
freq               6
Name: name, dtype: object


# mpg 열의 기술 통계 정보 확인


df['mpg'].describe()

count    398.000000
mean      23.514573
std        7.815984
min        9.000000
25%       17.500000
50%       23.000000
75%       29.000000
max       46.600000
Name: mpg, dtype: float64


df.count()

mpg             398
cylinders       398
displacement    398
horsepower      398
weight          398
acceleration    398
model year      398
origin          398
name            398
dtype: int64


type(df.count())

pandas.core.series.Series


df['origin'].value_counts()

1    249
3     79
2     70
Name: origin, dtype: int64


#df의 특정 열('origin')의 histogram 그리기 - series.hist() 함수


df['origin'].hist()

<AxesSubplot:>


df['origin'].plot(kind='hist', grid=True)

<AxesSubplot:ylabel='Frequency'>


df.mean()

mpg               23.514573
cylinders          5.454774
displacement     193.425879
weight          2970.424623
acceleration      15.568090
model year        76.010050
origin             1.572864
dtype: float64


df['mpg'].mean()

23.514572864321615


df[['mpg','weight']].mean()

mpg         23.514573
weight    2970.424623
dtype: float64


df.median()

mpg               23.0
cylinders          4.0
displacement     148.5
weight          2803.5
acceleration      15.5
model year        76.0
origin             1.0
dtype: float64


df['mpg'].median()

23.0


df.max()

mpg                         46.6
cylinders                      8
displacement               455.0
horsepower                     ?
weight                    5140.0
acceleration                24.8
model year                    82
origin                         3
name            vw rabbit custom
dtype: object


df['mpg'].max()

46.6


df.min()

mpg                                 9.0
cylinders                             3
displacement                       68.0
horsepower                        100.0
weight                           1613.0
acceleration                        8.0
model year                           70
origin                                1
name            amc ambassador brougham
dtype: object


df['mpg'].min()

9.0


df.std()

mpg               7.815984
cylinders         1.701004
displacement    104.269838
weight          846.841774
acceleration      2.757689
model year        3.697627
origin            0.802055
dtype: float64


df['mpg'].std()

7.815984312565782


df.corr()


new_df = df[['mpg', 'weight']]
new_df.corr()


df.plot(kind="scatter", x="weight", y="mpg")

<AxesSubplot:xlabel='weight', ylabel='mpg'>

	mpg	cylinders	displacement	horsepower	weight	acceleration	model year	origin	name
0	18.0	8	307.0	130.0	3504.0	12.0	70	1	chevrolet chevelle malibu
1	15.0	8	350.0	165.0	3693.0	11.5	70	1	buick skylark 320

	mpg	cylinders	displacement	horsepower	weight	acceleration	model year	origin	name
393	27.0	4	140.0	86.00	2790.0	15.6	82	1	ford mustang gl
394	44.0	4	97.0	52.00	2130.0	24.6	82	2	vw pickup
395	32.0	4	135.0	84.00	2295.0	11.6	82	1	dodge rampage
396	28.0	4	120.0	79.00	2625.0	18.6	82	1	ford ranger
397	31.0	4	119.0	82.00	2720.0	19.4	82	1	chevy s-10

	mpg	cylinders	displacement	horsepower	weight	acceleration	model year	origin	name
count	398.000000	398.000000	398.000000	398	398.000000	398.000000	398.000000	398.000000	398
unique	NaN	NaN	NaN	94	NaN	NaN	NaN	NaN	305
top	NaN	NaN	NaN	150.0	NaN	NaN	NaN	NaN	ford pinto
freq	NaN	NaN	NaN	22	NaN	NaN	NaN	NaN	6
mean	23.514573	5.454774	193.425879	NaN	2970.424623	15.568090	76.010050	1.572864	NaN
std	7.815984	1.701004	104.269838	NaN	846.841774	2.757689	3.697627	0.802055	NaN
min	9.000000	3.000000	68.000000	NaN	1613.000000	8.000000	70.000000	1.000000	NaN
25%	17.500000	4.000000	104.250000	NaN	2223.750000	13.825000	73.000000	1.000000	NaN
50%	23.000000	4.000000	148.500000	NaN	2803.500000	15.500000	76.000000	1.000000	NaN
75%	29.000000	8.000000	262.000000	NaN	3608.000000	17.175000	79.000000	2.000000	NaN
max	46.600000	8.000000	455.000000	NaN	5140.000000	24.800000	82.000000	3.000000	NaN

	mpg	cylinders	displacement	weight	acceleration	model year	origin
mpg	1.000000	-0.775396	-0.804203	-0.831741	0.420289	0.579267	0.563450
cylinders	-0.775396	1.000000	0.950721	0.896017	-0.505419	-0.348746	-0.562543
displacement	-0.804203	0.950721	1.000000	0.932824	-0.543684	-0.370164	-0.609409
weight	-0.831741	0.896017	0.932824	1.000000	-0.417457	-0.306564	-0.581024
acceleration	0.420289	-0.505419	-0.543684	-0.417457	1.000000	0.288137	0.205873
model year	0.579267	-0.348746	-0.370164	-0.306564	0.288137	1.000000	0.180662
origin	0.563450	-0.562543	-0.609409	-0.581024	0.205873	0.180662	1.000000

Pandas(0726_day2) - 실습_자동차 연비 분석 (탐색 및 통계 요약)

자동차 연비 데이터셋¶

데이터 살펴보기¶

데이터 개수 확인¶

통계 함수¶