Working on a copy of Pandas DataFrame
import pandas as pd
df: pd.DataFrame = pd.read_csv("avocado.csv")
df_cp = df.copy()
References
https://pythonprogramming.net/introduction-python3-pandas-data-analysis/
import pandas as pd
df: pd.DataFrame = pd.read_csv("avocado.csv")
df_cp = df.copy()
References
https://pythonprogramming.net/introduction-python3-pandas-data-analysis/
import pandas as pd
df: pd.DataFrame = pd.read_csv("avocado.csv")
df.set_index("Date", inplace=True)
df.sort_index(inplace=True)
print(df.index)
References
https://pythonprogramming.net/graph-visualization-python3-pandas-data-analysis/
import pandas as pd
df: pd.DataFrame = pd.read_csv("avocado.csv")
# index of dataframe
print(df.index)
# set index in dataframe
df.set_index("Date", inplace=True)
# or set index this ways
# df = df.set_index("Date")
# print index
print(df.index)
References
https://pythonprogramming.net/introduction-python3-pandas-data-analysis/
import pandas as pd
df: pd.DataFrame = pd.read_csv("avocado.csv")
albany_df = df[df["region"] == "Albany"]
# albany_df = df[df["region"] == "Albany"].copy()
print(albany_df)
males = df[(df[Gender]=='Male') & (df[Year]==2014)]
To store your dataframes in a dict using a for loop:
from collections import defaultdict
dic={}
for g in ['male', 'female']:
dic[g]=defaultdict(dict)
for y in [2013, 2014]:
dic[g][y]=df[(df[Gender]==g) & (df[Year]==y)] #store the DataFrames to a dict of dict
References
https://pythonprogramming.net/introduction-python3-pandas-data-analysis/
https://stackoverflow.com/questions/22086116/how-do-you-filter-pandas-dataframes-by-multiple-columns
import pandas as pd
df: pd.DataFrame = pd.read_csv("avocado.csv")
print(df["AveragePrice"])
References
https://pythonprogramming.net/introduction-python3-pandas-data-analysis/
Create two data frames and append the second to the first one
# Importing pandas as pd
import pandas as pd
# Creating the first Dataframe using dictionary
df1 = df = pd.DataFrame({"a":[1, 2, 3, 4],
"b":[5, 6, 7, 8]})
# Creating the Second Dataframe using dictionary
df2 = pd.DataFrame({"a":[1, 2, 3],
"b":[5, 6, 7]})
# Print df1
print(df1, "\n")
# Print df2
df2
Now append df2 at the end of df1.
# to append df2 at the end of df1 dataframe df1.append(df2)
Notice the index value of second data frame is maintained in the appended data frame. If we do not want it to happen then we can set ignore_index=True.
# A continuous index value will be maintained # across the rows in the new appended data frame. df.append(df2, ignore_index = True)
References
https://www.geeksforgeeks.org/python-pandas-dataframe-append/
Pandas uses the xlwt Python module internally for writing to Excel files.
movies.to_excel('output.xlsx')
You can choose to skip the index by passing along index-False.
movies.to_excel('output.xlsx', index=False)
We can do use these advanced output options by creating a ExcelWriter object and use this object to write to the EXcel file.
writer = pd.ExcelWriter('output.xlsx', engine='xlsxwriter')
movies.to_excel(writer, index=False, sheet_name='report')
workbook = writer.bookworksheet = writer.sheets['report']
header_fmt = workbook.add_format({'bold': True})
worksheet.set_row(0, None, header_fmt)
writer.save()
# importing pandas module
import pandas as pd
# making data frame
data = pd.read_csv("https://media.geeksforgeeks.org/wp-content/uploads/nba.csv")
# dataframe.size
size = data.size
# dataframe.shape
shape = data.shape
# dataframe.ndim
df_ndim = data.ndim
# series.ndim
series_ndim = data["Salary"].ndim
# printing size and shape
print("Size = {}\nShape ={}\nShape[0] x Shape[1] = {}".
format(size, shape, shape[0]*shape[1]))
# printing ndim
print("ndim of dataframe = {}\nndim of series ={}".
format(df_ndim, series_ndim))
Size = 4122 Shape=(458, 9) Shape[0] x Shape[1] = 4122 ndim of dataframe = 2 ndim of series=1
References
https://www.geeksforgeeks.org/python-pandas-df-size-df-shape-and-df-ndim/
# import pandas package as pd
import pandas as pd
# Define a dictionary containing students data
data = {'Name': ['Ankit', 'Amit', 'Aishwarya', 'Priyanka'],
'Age': [21, 19, 20, 18],
'Stream': ['Math', 'Commerce', 'Arts', 'Biology'],
'Percentage': [88, 92, 95, 70]}
# Convert the dictionary into DataFrame
df = pd.DataFrame(data, columns = ['Name', 'Age', 'Stream', 'Percentage'])
print("Given Dataframe :\n", df)
print("\nIterating over rows using iterrows() method :\n")
# iterate through each row and select
# 'Name' and 'Age' column respectively.
for index, row in df.iterrows():
print (row["Name"], row["Age"])
References
https://www.geeksforgeeks.org/different-ways-to-iterate-over-rows-in-pandas-dataframe/
import pandas as pd
excel_file = 'movies.xls' movies = pd.read_excel(excel_file)
movies_sheet1 = pd.read_excel(excel_file, sheetname=0, index_col=0)