Python-Pandas Notes

#!
/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
PANDAS
Video link: []
Pandas are used for plotting multiple graphs and to interpret

different documents
-how can you organize your data
-applying statistical tools
"""
"""
---------PLOTING MULTIPLE KINDS OF GRAPHS IN ONE
FIGURE-----------------
You can plot mutiple graphs in one go using PANDAS

-three main data structures: Series, data frame, panel
Series: if for 1d data
data frame: numpy(for numbers only) and 2D data
Panel: is for 3D data
"""
import pandas as pd
import matplotlib.pyplot as plt
"""
---------SERIES--------------
"""
x=pd.Series([34774,3463,3547,2346]) #the y-axis you want to draw

#can have list, dict etc
print(x.index) #tells the default index in terms of numbers
#if you want to attribute it to a string, the use the following\
x.index=['Punjab','Sindh','Balochistan','KPK'] #the writings on x-

asis
x.name='Provinces' #to assign the name to your data
print(x.mean) #for mean. also can use median,mode,min,max

print(x.quantile(q=0.5)) #for quantile
fig,axes=plt.subplots(1,3,figsize=(12,5)) #(1,3 are the R,C)
x.plot(ax=axes[0],kind='bar',title='Bar Diagram') #the ax is

defined with axes=0
x.plot(ax=axes[1],kind='line',title='Line Diagram')
x.plot(ax=axes[2],kind='pie',title='Piechart')
######for dictionary########
#you can assign the indexes with the values directly here
z=pd.Series({'a':1,'b':2,'g':3})
print(z) #will give
#a 1
#b 2
#g 3
y=pd.Series([{'a':1,'b':2},[3,4]]) #will print the dictionary at

index 0 and list as 1
#x. plot. bar(x = 'Subject', y = 'Mean', fontsize='9') #another way

of ploting a graph
"""
--------DATA FRAME(MAKING A TABLE(defiing rows and
columns))------------------------------
we can make a table using pandas by creating a dictionary and
defining the variables
Used for a two dimensional list
"""
#first define a data frame and that assigns the colums and the rows
if you first define the column heading and then its contents
df=pd.DataFrame({'Province':['S','P','B','KPK'],'Population':
[1000,2000,3000,4566]})
print(df)
print(df.Population) #to get the content of the population only
print(df.loc[0]) #will tell the information of the first in province
and its corresponding population, in this case: Sindh: 1000
print(df['Province']) #another way of getting a column of elements
df.columns=['Pov','Pops'] #this will change column headings
df.index=['a','b','c','d'] #change index from no. to this
print(df.pop) #to view population column, can be done for pops also
print(df.loc['a']) #city S
#pop 1000 for these
print(df.info()) #for all the info
"""
---------WRITING TO THE FILES--------
"""
#so if I want to write a data to excel I will
df=pd.DataFrame({'Province':['S','P','B','KPK'],'Population':
[1000,2000,3000,4566]})
df_1=df.to_excel('test.xlsx')
df_2=df.to_excel('test.xlsx',index='False') #the index will not
appear
#just to summarize, if you want to read the row use loc[0]

#and if read column, use the heading of that column #df.Population
"""
-------------READING FROM THE FILESS-----------------------
"""
#if you are unable to read your file, then write a file, read it and
then read it again
#or write the whole address p=pd.read_csv('/Users/filzafaisal/
Desktop/pokemon_data.csv')
#find the pathway from excel>file>properties to find the link
address
#if you want to read what you have written

l=pd.read_excel('test.xlsx')
print(l)
#####for general information about data#####

print(l.info())
p=pd.read_csv('pokemon_data.csv') #to open the data file in csv

forma, if you have an xls format, =pd.read_excel('pokemon.xlsx')
#for text use =pd.read_csv('pokemon.txt')
print(p) #to see the entire doc
print(p.loc[0]) #mehtod to see the row you want
print(p.loc['False']) #to see the location of particular value
print(p.head(3)) #to read the top 3 headings
print(p.tail(3)) #to read the bottom 3 headings
print(p.columns) #to print the column headings so you know the names
of the columns
print(p.Name) #to read the elements in a specific column that you
want
print(p[['Name','Attack']]) #if you want to view many columns at
once,just make a list of them
"""
---------MAKE CHANGES IN EXCEL FILE---------
"""
data=pd.read_excel('sample.xlsx')
#then set an index according to the column you want (better if not a
number)
data_new=data.set_index(['Ship
Mode','Customer']).sort_index(level=0) #this made sure that both are
side by side
#to find the values & data within an index
print(data_new.loc['Regular Air'])
#if there were two strings you could have done
print(data_new.loc['Regular Air','Barry French'])
customer_count=data.Customer.value_counts() #apply on original data

#this tells you no. of times customers have been repeated
fig,axes=plt.subplots(1,2,figsize=(12,5))
customer_count.plot(ax=axes[0],kind='bar') #for the customer count
axes[0].set_ylabel('customer') #set y label
"""
-------ADDRESS MISSING NUMBERS-----------
"""
sample2=pd.read_excel('sample2.xlsx')
print(sample2['Unit Price'].isnull()) #if True: means that those
values are null
#so we fill those numbers with 0 by
print(sample2['Unit Price'].fillna(0))
sample3=sample2['Unit Price'].fillna(0) #to have a seperate data

#if you want to fill the data with the previous values
print(sample2['Unit Price'].bfill(0))
#if you want to fill the data with the forward values
print(sample2['Unit Price'].ffill(0))
"""
---------DRAWING GRAPHS OF DATA FRAME(2D FIGS)-----------
"""
#METHOD 1(using series)

#we can use series data to make a series data
y=pd.Series(['khi','lahore'])
z=pd.Series(['sindh','punjab'])
s1=pd.DataFrame([y,z])
s1.index=['a','b']
print(s1)
#METHOD 2(using dictionary)

s2=pd.DataFrame({'Cities':['khi','lahore'],'Age':[30,20]})
#s2=pd.DataFrame({'Cities':pd.Series(['khi','lahore']),'Age':pd.Seri
es([30,20])})
fig,axes=plt.subplots(1,2,figsize=(12,5))
s2.plot(ax=axes[0],x='Cities',y='Age',kind='bar',label='stats')
"""
-----CONVERTING FLOAT TO NUMBERS(by adding new colums)-----
make new columns by editing the existing one
"""
#lambda tells python that we are finding a function
y=lambda x: x**2+1
k=pd.DataFrame({'Cities':pd.Series(['khi','lahore']),'Age':pd.Series
(['30','20'])})
#so here, we will add a new column by lambda
k['NuAge']=k.Age.apply(lambda x: int(x.replace(',','')))
#this will duplicate your existing column and make new one with
edits
"""
--------ORGANIZING DATA------
if you want to organize a particular column in aligned format
"""
k['Cities']=k.Cities.apply(lambda x: x.strip())
#if you want to replace date in index
k.index=pd.date_range('2016-01-01',periods=2) #for the four rows

present in k
print(k) #so here we have dates in the two columns

Python-Pandas Notes

Uploaded by

Copyright:

Available Formats

Python-Pandas Notes

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Python-Pandas Notes

Uploaded by

Copyright:

Available Formats

#!

Pandas are used for plotting multiple graphs and to interpret

You can plot mutiple graphs in one go using PANDAS

x=pd.Series([34774,3463,3547,2346]) #the y-axis you want to draw

x.index=['Punjab','Sindh','Balochistan','KPK'] #the writings on x-

x.name='Provinces' #to assign the name to your data

print(x.mean) #for mean. also can use median,mode,min,max

fig,axes=plt.subplots(1,3,figsize=(12,5)) #(1,3 are the R,C)

x.plot(ax=axes[0],kind='bar',title='Bar Diagram') #the ax is

y=pd.Series([{'a':1,'b':2},[3,4]]) #will print the dictionary at

#x. plot. bar(x = 'Subject', y = 'Mean', fontsize='9') #another way

Used for a two dimensional list

#so if I want to write a data to excel I will

#just to summarize, if you want to read the row use loc[0]

#if you want to read what you have written

#####for general information about data#####

p=pd.read_csv('pokemon_data.csv') #to open the data file in csv

customer_count=data.Customer.value_counts() #apply on original data

sample3=sample2['Unit Price'].fillna(0) #to have a seperate data

#METHOD 1(using series)

#METHOD 2(using dictionary)

#if you want to replace date in index

k.index=pd.date_range('2016-01-01',periods=2) #for the four rows

You might also like