Python-Pandas Notes

Download as pdf or txt
Download as pdf or txt
You are on page 1of 5

#!

/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
PANDAS

Video link: []

Pandas are used for plotting multiple graphs and to interpret


different documents
-how can you organize your data
-applying statistical tools

"""

"""
---------PLOTING MULTIPLE KINDS OF GRAPHS IN ONE
FIGURE-----------------

You can plot mutiple graphs in one go using PANDAS


-three main data structures: Series, data frame, panel
Series: if for 1d data
data frame: numpy(for numbers only) and 2D data
Panel: is for 3D data

"""
import pandas as pd
import matplotlib.pyplot as plt

"""
---------SERIES--------------
"""

x=pd.Series([34774,3463,3547,2346]) #the y-axis you want to draw


#can have list, dict etc
print(x.index) #tells the default index in terms of numbers
#if you want to attribute it to a string, the use the following\

x.index=['Punjab','Sindh','Balochistan','KPK'] #the writings on x-


asis

x.name='Provinces' #to assign the name to your data

print(x.mean) #for mean. also can use median,mode,min,max


print(x.quantile(q=0.5)) #for quantile

fig,axes=plt.subplots(1,3,figsize=(12,5)) #(1,3 are the R,C)

x.plot(ax=axes[0],kind='bar',title='Bar Diagram') #the ax is


defined with axes=0
x.plot(ax=axes[1],kind='line',title='Line Diagram')
x.plot(ax=axes[2],kind='pie',title='Piechart')
######for dictionary########
#you can assign the indexes with the values directly here

z=pd.Series({'a':1,'b':2,'g':3})
print(z) #will give
#a 1
#b 2
#g 3

y=pd.Series([{'a':1,'b':2},[3,4]]) #will print the dictionary at


index 0 and list as 1

#x. plot. bar(x = 'Subject', y = 'Mean', fontsize='9') #another way


of ploting a graph

"""
--------DATA FRAME(MAKING A TABLE(defiing rows and
columns))------------------------------
we can make a table using pandas by creating a dictionary and
defining the variables

Used for a two dimensional list

"""

#first define a data frame and that assigns the colums and the rows
if you first define the column heading and then its contents
df=pd.DataFrame({'Province':['S','P','B','KPK'],'Population':
[1000,2000,3000,4566]})
print(df)
print(df.Population) #to get the content of the population only
print(df.loc[0]) #will tell the information of the first in province
and its corresponding population, in this case: Sindh: 1000
print(df['Province']) #another way of getting a column of elements
df.columns=['Pov','Pops'] #this will change column headings
df.index=['a','b','c','d'] #change index from no. to this

print(df.pop) #to view population column, can be done for pops also
print(df.loc['a']) #city S
#pop 1000 for these
print(df.info()) #for all the info

"""
---------WRITING TO THE FILES--------
"""

#so if I want to write a data to excel I will

df=pd.DataFrame({'Province':['S','P','B','KPK'],'Population':
[1000,2000,3000,4566]})
df_1=df.to_excel('test.xlsx')
df_2=df.to_excel('test.xlsx',index='False') #the index will not
appear

#just to summarize, if you want to read the row use loc[0]


#and if read column, use the heading of that column #df.Population
"""
-------------READING FROM THE FILESS-----------------------
"""
#if you are unable to read your file, then write a file, read it and
then read it again
#or write the whole address p=pd.read_csv('/Users/filzafaisal/
Desktop/pokemon_data.csv')
#find the pathway from excel>file>properties to find the link
address

#if you want to read what you have written


l=pd.read_excel('test.xlsx')
print(l)

#####for general information about data#####


print(l.info())

p=pd.read_csv('pokemon_data.csv') #to open the data file in csv


forma, if you have an xls format, =pd.read_excel('pokemon.xlsx')
#for text use =pd.read_csv('pokemon.txt')
print(p) #to see the entire doc
print(p.loc[0]) #mehtod to see the row you want
print(p.loc['False']) #to see the location of particular value
print(p.head(3)) #to read the top 3 headings
print(p.tail(3)) #to read the bottom 3 headings
print(p.columns) #to print the column headings so you know the names
of the columns
print(p.Name) #to read the elements in a specific column that you
want
print(p[['Name','Attack']]) #if you want to view many columns at
once,just make a list of them

"""
---------MAKE CHANGES IN EXCEL FILE---------
"""

data=pd.read_excel('sample.xlsx')
#then set an index according to the column you want (better if not a
number)

data_new=data.set_index(['Ship
Mode','Customer']).sort_index(level=0) #this made sure that both are
side by side
#to find the values & data within an index
print(data_new.loc['Regular Air'])
#if there were two strings you could have done
print(data_new.loc['Regular Air','Barry French'])

customer_count=data.Customer.value_counts() #apply on original data


#this tells you no. of times customers have been repeated

fig,axes=plt.subplots(1,2,figsize=(12,5))
customer_count.plot(ax=axes[0],kind='bar') #for the customer count
axes[0].set_ylabel('customer') #set y label

"""
-------ADDRESS MISSING NUMBERS-----------
"""

sample2=pd.read_excel('sample2.xlsx')
print(sample2['Unit Price'].isnull()) #if True: means that those
values are null
#so we fill those numbers with 0 by
print(sample2['Unit Price'].fillna(0))

sample3=sample2['Unit Price'].fillna(0) #to have a seperate data


#if you want to fill the data with the previous values
print(sample2['Unit Price'].bfill(0))

#if you want to fill the data with the forward values
print(sample2['Unit Price'].ffill(0))

"""
---------DRAWING GRAPHS OF DATA FRAME(2D FIGS)-----------
"""

#METHOD 1(using series)


#we can use series data to make a series data
y=pd.Series(['khi','lahore'])
z=pd.Series(['sindh','punjab'])
s1=pd.DataFrame([y,z])

s1.index=['a','b']
print(s1)

#METHOD 2(using dictionary)


s2=pd.DataFrame({'Cities':['khi','lahore'],'Age':[30,20]})
#s2=pd.DataFrame({'Cities':pd.Series(['khi','lahore']),'Age':pd.Seri
es([30,20])})
fig,axes=plt.subplots(1,2,figsize=(12,5))
s2.plot(ax=axes[0],x='Cities',y='Age',kind='bar',label='stats')

"""
-----CONVERTING FLOAT TO NUMBERS(by adding new colums)-----
make new columns by editing the existing one
"""
#lambda tells python that we are finding a function
y=lambda x: x**2+1

k=pd.DataFrame({'Cities':pd.Series(['khi','lahore']),'Age':pd.Series
(['30','20'])})
#so here, we will add a new column by lambda
k['NuAge']=k.Age.apply(lambda x: int(x.replace(',','')))

#this will duplicate your existing column and make new one with
edits

"""
--------ORGANIZING DATA------
if you want to organize a particular column in aligned format

"""

k['Cities']=k.Cities.apply(lambda x: x.strip())

#if you want to replace date in index

k.index=pd.date_range('2016-01-01',periods=2) #for the four rows


present in k
print(k) #so here we have dates in the two columns

You might also like