Filter by values of a column in Pandas DataFrame

import pandas as pd

df: pd.DataFrame = pd.read_csv("avocado.csv")
albany_df = df[df["region"] == "Albany"]
# albany_df = df[df["region"] == "Albany"].copy()
print(albany_df)
males = df[(df[Gender]=='Male') & (df[Year]==2014)]

To store your dataframes in a dict using a for loop:

from collections import defaultdict
dic={}
for g in ['male', 'female']:
  dic[g]=defaultdict(dict)
  for y in [2013, 2014]:
    dic[g][y]=df[(df[Gender]==g) & (df[Year]==y)] #store the DataFrames to a dict of dict

References
https://pythonprogramming.net/introduction-python3-pandas-data-analysis/
https://stackoverflow.com/questions/22086116/how-do-you-filter-pandas-dataframes-by-multiple-columns

Add rows to Pandas DataFrame

Create two data frames and append the second to the first one

# Importing pandas as pd 
import pandas as pd 

# Creating the first Dataframe using dictionary 
df1 = df = pd.DataFrame({"a":[1, 2, 3, 4], 
            "b":[5, 6, 7, 8]}) 

# Creating the Second Dataframe using dictionary 
df2 = pd.DataFrame({"a":[1, 2, 3], 
          "b":[5, 6, 7]}) 

# Print df1 
print(df1, "\n") 

# Print df2 
df2 

Now append df2 at the end of df1.

# to append df2 at the end of df1 dataframe 
df1.append(df2) 

Notice the index value of second data frame is maintained in the appended data frame. If we do not want it to happen then we can set ignore_index=True.

# A continuous index value will be maintained 
# across the rows in the new appended data frame. 
df.append(df2, ignore_index = True) 

References
https://www.geeksforgeeks.org/python-pandas-dataframe-append/

Writing data to Excel with Pandas

Pandas uses the xlwt Python module internally for writing to Excel files.

movies.to_excel('output.xlsx')

You can choose to skip the index by passing along index-False.

movies.to_excel('output.xlsx', index=False)

We can do use these advanced output options by creating a ExcelWriter object and use this object to write to the EXcel file.

writer = pd.ExcelWriter('output.xlsx', engine='xlsxwriter')
movies.to_excel(writer, index=False, sheet_name='report')
workbook = writer.bookworksheet = writer.sheets['report']
header_fmt = workbook.add_format({'bold': True})
worksheet.set_row(0, None, header_fmt)
writer.save()

References
https://www.dataquest.io/blog/excel-and-pandas/

Pandas df.size, df.shape and df.ndim

# importing pandas module 
import pandas as pd 
  
# making data frame 
data = pd.read_csv("https://media.geeksforgeeks.org/wp-content/uploads/nba.csv") 
  
# dataframe.size 
size = data.size 
  
# dataframe.shape 
shape = data.shape 
  
# dataframe.ndim 
df_ndim = data.ndim 
  
# series.ndim 
series_ndim = data["Salary"].ndim 
  
# printing size and shape 
print("Size = {}\nShape ={}\nShape[0] x Shape[1] = {}". 
format(size, shape, shape[0]*shape[1])) 
  
# printing ndim 
print("ndim of dataframe = {}\nndim of series ={}". 
format(df_ndim, series_ndim))
Size = 4122
Shape=(458, 9)
Shape[0] x Shape[1] = 4122
ndim of dataframe = 2
ndim of series=1

References
https://www.geeksforgeeks.org/python-pandas-df-size-df-shape-and-df-ndim/

Iterate over rows in a DataFrame in Pandas

# import pandas package as pd 
import pandas as pd 
  
# Define a dictionary containing students data 
data = {'Name': ['Ankit', 'Amit', 'Aishwarya', 'Priyanka'], 
                'Age': [21, 19, 20, 18], 
                'Stream': ['Math', 'Commerce', 'Arts', 'Biology'], 
                'Percentage': [88, 92, 95, 70]} 
  
# Convert the dictionary into DataFrame 
df = pd.DataFrame(data, columns = ['Name', 'Age', 'Stream', 'Percentage']) 
  
print("Given Dataframe :\n", df) 
  
print("\nIterating over rows using iterrows() method :\n") 
  
# iterate through each row and select  
# 'Name' and 'Age' column respectively. 
for index, row in df.iterrows(): 
    print (row["Name"], row["Age"])

References
https://www.geeksforgeeks.org/different-ways-to-iterate-over-rows-in-pandas-dataframe/