Bikash Santra
Indian Statistical Institute, Kolkata
import pandas as pd
df = pd.DataFrame(
{
"Name": [
"Braund, Mr. Owen Harris",
"Allen, Mr. William Henry",
"Bonnell, Miss. Elizabeth",
],
"Age": [22, 35, 58],
"Sex": ["male", "male", "female"],
}
)
print(df)
Name Age Sex 0 Braund, Mr. Owen Harris 22 male 1 Allen, Mr. William Henry 35 male 2 Bonnell, Miss. Elizabeth 58 female
df
| Name | Age | Sex | |
|---|---|---|---|
| 0 | Braund, Mr. Owen Harris | 22 | male |
| 1 | Allen, Mr. William Henry | 35 | male |
| 2 | Bonnell, Miss. Elizabeth | 58 | female |
xx = df["Age"]
xx
0 22 1 35 2 58 Name: Age, dtype: int64
df["Age"].max()
58
xx.min()
22
xx.mean()
38.333333333333336
xx.std()
18.230011885167087
df
| Name | Age | Sex | |
|---|---|---|---|
| 0 | Braund, Mr. Owen Harris | 22 | male |
| 1 | Allen, Mr. William Henry | 35 | male |
| 2 | Bonnell, Miss. Elizabeth | 58 | female |
## Describing numerical data
df.describe()
| Age | |
|---|---|
| count | 3.000000 |
| mean | 38.333333 |
| std | 18.230012 |
| min | 22.000000 |
| 25% | 28.500000 |
| 50% | 35.000000 |
| 75% | 46.500000 |
| max | 58.000000 |
dict_1 = {"Country": ["Brazil", "Russia", "India", "China", "South Africa"],
"Capital": ["Brasilia", "Moscow", "New Dehli", "Beijing", "Pretoria"],
"Area": [8.516, 17.10, 3.286, 9.597, 1.221],
"Population": [200.4, 143.5, 1252, 1357, 52.98] }
print(dict_1)
{'Country': ['Brazil', 'Russia', 'India', 'China', 'South Africa'], 'Capital': ['Brasilia', 'Moscow', 'New Dehli', 'Beijing', 'Pretoria'], 'Area': [8.516, 17.1, 3.286, 9.597, 1.221], 'Population': [200.4, 143.5, 1252, 1357, 52.98]}
brics = pd.DataFrame(dict_1)
brics
| Country | Capital | Area | Population | |
|---|---|---|---|---|
| 0 | Brazil | Brasilia | 8.516 | 200.40 |
| 1 | Russia | Moscow | 17.100 | 143.50 |
| 2 | India | New Dehli | 3.286 | 1252.00 |
| 3 | China | Beijing | 9.597 | 1357.00 |
| 4 | South Africa | Pretoria | 1.221 | 52.98 |
brics.head(3)
| Country | Capital | Area | Population | |
|---|---|---|---|---|
| 0 | Brazil | Brasilia | 8.516 | 200.4 |
| 1 | Russia | Moscow | 17.100 | 143.5 |
| 2 | India | New Dehli | 3.286 | 1252.0 |
print(brics)
Country Capital Area Population 0 Brazil Brasilia 8.516 200.40 1 Russia Moscow 17.100 143.50 2 India New Dehli 3.286 1252.00 3 China Beijing 9.597 1357.00 4 South Africa Pretoria 1.221 52.98
# Set the index for brics
brics.index = ["BR", "RU", "IN", "CH", "SA"]
# Print out brics with new index values
print(brics)
Country Capital Area Population BR Brazil Brasilia 8.516 200.40 RU Russia Moscow 17.100 143.50 IN India New Dehli 3.286 1252.00 CH China Beijing 9.597 1357.00 SA South Africa Pretoria 1.221 52.98
## Describing numerical data
brics.describe()
| Area | Population | |
|---|---|---|
| count | 5.000000 | 5.000000 |
| mean | 7.944000 | 601.176000 |
| std | 6.200557 | 645.261454 |
| min | 1.221000 | 52.980000 |
| 25% | 3.286000 | 143.500000 |
| 50% | 8.516000 | 200.400000 |
| 75% | 9.597000 | 1252.000000 |
| max | 17.100000 | 1357.000000 |
brics.info()
<class 'pandas.core.frame.DataFrame'> Index: 5 entries, BR to SA Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Country 5 non-null object 1 Capital 5 non-null object 2 Area 5 non-null float64 3 Population 5 non-null float64 dtypes: float64(2), object(2) memory usage: 200.0+ bytes
import numpy as np
dict_2 = {"Name": ["Alice", "Juliet", "Alex", "Sara", "Oliver"],
"Statistical Inference": [87,np.nan,96,65,87],
"Statistical Methods": [94,87,58,63,72],
"Vectors & Matrices":[92,74,57,96,88],
"Remarks":['Excellent','Good','Good','Good','Very Good']}
print(dict_2)
{'Name': ['Alice', 'Juliet', 'Alex', 'Sara', 'Oliver'], 'Statistical Inference': [87, nan, 96, 65, 87], 'Statistical Methods': [94, 87, 58, 63, 72], 'Vectors & Matrices': [92, 74, 57, 96, 88], 'Remarks': ['Excellent', 'Good', 'Good', 'Good', 'Very Good']}
brics1 = pd.DataFrame(dict_2)
print(brics1)
Name Statistical Inference Statistical Methods Vectors & Matrices \
0 Alice 87.0 94 92
1 Juliet NaN 87 74
2 Alex 96.0 58 57
3 Sara 65.0 63 96
4 Oliver 87.0 72 88
Remarks
0 Excellent
1 Good
2 Good
3 Good
4 Very Good
brics1
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | |
|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent |
| 1 | Juliet | NaN | 87 | 74 | Good |
| 2 | Alex | 96.0 | 58 | 57 | Good |
| 3 | Sara | 65.0 | 63 | 96 | Good |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good |
brics1.describe()
| Statistical Inference | Statistical Methods | Vectors & Matrices | |
|---|---|---|---|
| count | 4.000000 | 5.000000 | 5.000000 |
| mean | 83.750000 | 74.800000 | 81.400000 |
| std | 13.200379 | 15.385058 | 15.962456 |
| min | 65.000000 | 58.000000 | 57.000000 |
| 25% | 81.500000 | 63.000000 | 74.000000 |
| 50% | 87.000000 | 72.000000 | 88.000000 |
| 75% | 89.250000 | 87.000000 | 92.000000 |
| max | 96.000000 | 94.000000 | 96.000000 |
brics1.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 5 entries, 0 to 4 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Name 5 non-null object 1 Statistical Inference 4 non-null float64 2 Statistical Methods 5 non-null int64 3 Vectors & Matrices 5 non-null int64 4 Remarks 5 non-null object dtypes: float64(1), int64(2), object(2) memory usage: 328.0+ bytes
brics1["Vectors & Matrices"]
0 92 1 74 2 57 3 96 4 88 Name: Vectors & Matrices, dtype: int64
brics1.index = ["P1", "P2", "P3", "P4", "P5"]
print(brics1)
Name Statistical Inference Statistical Methods Vectors & Matrices \
P1 Alice 87.0 94 92
P2 Juliet NaN 87 74
P3 Alex 96.0 58 57
P4 Sara 65.0 63 96
P5 Oliver 87.0 72 88
Remarks
P1 Excellent
P2 Good
P3 Good
P4 Good
P5 Very Good
brics1
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | |
|---|---|---|---|---|---|
| P1 | Alice | 87.0 | 94 | 92 | Excellent |
| P2 | Juliet | NaN | 87 | 74 | Good |
| P3 | Alex | 96.0 | 58 | 57 | Good |
| P4 | Sara | 65.0 | 63 | 96 | Good |
| P5 | Oliver | 87.0 | 72 | 88 | Very Good |
brics1.shape
(5, 5)
## Write to EXCEL
brics1.to_excel("brics_test.xlsx", sheet_name="brics_1", index=True)
## Write to CSV
brics1.to_csv("brics_test.csv", header=True, index=False)
## Read from EXCEL
titanic = pd.read_excel("brics_test.xlsx", sheet_name="brics_1", index_col=0)
print(titanic)
Name Statistical Inference Statistical Methods Vectors & Matrices \
P1 Alice 87.0 94 92
P2 Juliet NaN 87 74
P3 Alex 96.0 58 57
P4 Sara 65.0 63 96
P5 Oliver 87.0 72 88
Remarks
P1 Excellent
P2 Good
P3 Good
P4 Good
P5 Very Good
titanic
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | |
|---|---|---|---|---|---|
| P1 | Alice | 87.0 | 94 | 92 | Excellent |
| P2 | Juliet | NaN | 87 | 74 | Good |
| P3 | Alex | 96.0 | 58 | 57 | Good |
| P4 | Sara | 65.0 | 63 | 96 | Good |
| P5 | Oliver | 87.0 | 72 | 88 | Very Good |
titanic.head(3)
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | |
|---|---|---|---|---|---|
| P1 | Alice | 87.0 | 94 | 92 | Excellent |
| P2 | Juliet | NaN | 87 | 74 | Good |
| P3 | Alex | 96.0 | 58 | 57 | Good |
## Read from EXCEL
titanic_csv = pd.read_csv("brics_test.csv")#, index_col=0) ## include index_col only when index is present in the csv file
print(titanic_csv)
Name Statistical Inference Statistical Methods Vectors & Matrices \
0 Alice 87.0 94 92
1 Juliet NaN 87 74
2 Alex 96.0 58 57
3 Sara 65.0 63 96
4 Oliver 87.0 72 88
Remarks
0 Excellent
1 Good
2 Good
3 Good
4 Very Good
titanic_csv
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | |
|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent |
| 1 | Juliet | NaN | 87 | 74 | Good |
| 2 | Alex | 96.0 | 58 | 57 | Good |
| 3 | Sara | 65.0 | 63 | 96 | Good |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good |
titanic_csv.head(2)
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | |
|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent |
| 1 | Juliet | NaN | 87 | 74 | Good |
capitals = titanic_csv["Remarks"]
capitals.head()
0 Excellent 1 Good 2 Good 3 Good 4 Very Good Name: Remarks, dtype: object
type(titanic_csv["Remarks"])
pandas.core.series.Series
type(capitals)
pandas.core.series.Series
capitals.shape
(5,)
cap_area = titanic_csv[["Name", "Remarks"]]
cap_area
| Name | Remarks | |
|---|---|---|
| 0 | Alice | Excellent |
| 1 | Juliet | Good |
| 2 | Alex | Good |
| 3 | Sara | Good |
| 4 | Oliver | Very Good |
cap_area.shape
(5, 2)
## Remove index_col=0 and excute the earlier cells, see the change
### Data fetching based on conditions
titanic_subset = titanic_csv[titanic_csv["Statistical Methods"] > 70.0]
titanic_subset
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | |
|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent |
| 1 | Juliet | NaN | 87 | 74 | Good |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good |
titanic_csv
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | |
|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent |
| 1 | Juliet | NaN | 87 | 74 | Good |
| 2 | Alex | 96.0 | 58 | 57 | Good |
| 3 | Sara | 65.0 | 63 | 96 | Good |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good |
class_23 = titanic_csv[titanic_csv["Remarks"].isin(["Excellent", "Very Good"])]
class_23
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | |
|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good |
## AND (&) OR (|)
cond = titanic_csv[(titanic_csv["Remarks"] == "Good") | (titanic_csv["Vectors & Matrices"] >= 90.00)]
cond
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | |
|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent |
| 1 | Juliet | NaN | 87 | 74 | Good |
| 2 | Alex | 96.0 | 58 | 57 | Good |
| 3 | Sara | 65.0 | 63 | 96 | Good |
titanic_csv[(titanic_csv["Remarks"] == "Good") | (titanic_csv["Vectors & Matrices"] >= 90.00)]
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | |
|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent |
| 1 | Juliet | NaN | 87 | 74 | Good |
| 2 | Alex | 96.0 | 58 | 57 | Good |
| 3 | Sara | 65.0 | 63 | 96 | Good |
cond = titanic_csv[(titanic_csv["Remarks"] == "Good") & (titanic_csv["Vectors & Matrices"] >= 70.00)]
cond
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | |
|---|---|---|---|---|---|
| 1 | Juliet | NaN | 87 | 74 | Good |
| 3 | Sara | 65.0 | 63 | 96 | Good |
## Display only NOT NULL values
area_no_na = titanic_csv[titanic_csv["Statistical Inference"].notna()]
area_no_na
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | |
|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent |
| 2 | Alex | 96.0 | 58 | 57 | Good |
| 3 | Sara | 65.0 | 63 | 96 | Good |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good |
titanic_csv
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | |
|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent |
| 1 | Juliet | NaN | 87 | 74 | Good |
| 2 | Alex | 96.0 | 58 | 57 | Good |
| 3 | Sara | 65.0 | 63 | 96 | Good |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good |
test = titanic_csv.iloc[2:3, 1:3]
test
| Statistical Inference | Statistical Methods | |
|---|---|---|
| 2 | 96.0 | 58 |
ti = titanic_csv.copy()
ti
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | |
|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent |
| 1 | Juliet | NaN | 87 | 74 | Good |
| 2 | Alex | 96.0 | 58 | 57 | Good |
| 3 | Sara | 65.0 | 63 | 96 | Good |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good |
ti.iloc[2:3, 1:3] = np.nan
ti
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | |
|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94.0 | 92 | Excellent |
| 1 | Juliet | NaN | 87.0 | 74 | Good |
| 2 | Alex | NaN | NaN | 57 | Good |
| 3 | Sara | 65.0 | 63.0 | 96 | Good |
| 4 | Oliver | 87.0 | 72.0 | 88 | Very Good |
titanic_csv
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | |
|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent |
| 1 | Juliet | NaN | 87 | 74 | Good |
| 2 | Alex | 96.0 | 58 | 57 | Good |
| 3 | Sara | 65.0 | 63 | 96 | Good |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good |
s = pd.Series([20, 40, 50, np.nan, 70], index=[0,1,2,3,4])
s
0 20.0 1 40.0 2 50.0 3 NaN 4 70.0 dtype: float64
titanic_csv['Python'] = s.values
titanic_csv
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | Python | |
|---|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 |
titanic_csv['1st'] = titanic_csv["Statistical Inference"] > 50
xx = titanic_csv[["Statistical Inference", "Statistical Methods", "Vectors & Matrices"]]
xx
| Statistical Inference | Statistical Methods | Vectors & Matrices | |
|---|---|---|---|
| 0 | 87.0 | 94 | 92 |
| 1 | NaN | 87 | 74 |
| 2 | 96.0 | 58 | 57 |
| 3 | 65.0 | 63 | 96 |
| 4 | 87.0 | 72 | 88 |
titanic_csv['2nd'] = xx.mean(axis=1)
titanic_csv['3rd'] = xx.max(axis=1)
titanic_csv
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | Python | 1st | 2nd | 3rd | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 |
titanic_csv
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | Python | 1st | 2nd | 3rd | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 |
titanic_csv["new"] = titanic_csv["Statistical Methods"] * 1.882
titanic_csv
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | Python | 1st | 2nd | 3rd | new | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 |
titanic_csv["new_1"] = titanic_csv["Statistical Methods"] / titanic_csv["Vectors & Matrices"]
titanic_csv
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | Python | 1st | 2nd | 3rd | new | new_1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
titanic_csv = titanic_csv.rename(
columns={
"new": "Multiply",
"new_1": "Ratio",
}
)
titanic_csv
| Name | Statistical Inference | Statistical Methods | Vectors & Matrices | Remarks | Python | 1st | 2nd | 3rd | Multiply | Ratio | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
titanic_csv = titanic_csv.rename(columns=str.lower)
titanic_csv.head()
| name | statistical inference | statistical methods | vectors & matrices | remarks | python | 1st | 2nd | 3rd | multiply | ratio | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
titanic_csv = titanic_csv.rename(columns=str.capitalize)
titanic_csv.head()
| Name | Statistical inference | Statistical methods | Vectors & matrices | Remarks | Python | 1st | 2nd | 3rd | Multiply | Ratio | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
titanic_csv = titanic_csv.rename(columns=str.upper)
titanic_csv.head()
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
titanic_csv.sort_values(by="STATISTICAL METHODS", ascending=True).head()
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 |
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 |
titanic_csv_1 = titanic_csv.sort_values(by="STATISTICAL METHODS", ascending=True).copy()
titanic_csv_1.head()
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 |
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 |
titanic_csv
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
good = titanic_csv[titanic_csv["REMARKS"] == "Good"].copy()
good
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
s = pd.Series(["tezpur", "kolkata", "tezpur"], index=[1,2,3])
good["Location"] = s.values
good
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Location | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | tezpur |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | kolkata |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | tezpur |
titanic_csv
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
titanic_csv.index = ["BR", "RU", "IN", "CH", "SA"]
titanic_csv.head()
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| BR | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 |
| RU | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 |
| IN | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 |
| CH | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| SA | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
brics
| Country | Capital | Area | Population | |
|---|---|---|---|---|
| BR | Brazil | Brasilia | 8.516 | 200.40 |
| RU | Russia | Moscow | 17.100 | 143.50 |
| IN | India | New Dehli | 3.286 | 1252.00 |
| CH | China | Beijing | 9.597 | 1357.00 |
| SA | South Africa | Pretoria | 1.221 | 52.98 |
merged = pd.concat([titanic_csv, brics], axis=1)
merged.head()
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Country | Capital | Area | Population | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| BR | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | Brazil | Brasilia | 8.516 | 200.40 |
| RU | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | Russia | Moscow | 17.100 | 143.50 |
| IN | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | India | New Dehli | 3.286 | 1252.00 |
| CH | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | China | Beijing | 9.597 | 1357.00 |
| SA | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | South Africa | Pretoria | 1.221 | 52.98 |
titanic_csv.index = [0, 1, 2, 3, 4]
titanic_csv.head()
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
x1 = titanic_csv[0:3].copy()
x1
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 |
x2 = titanic_csv[3:5].copy()
x2
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
x1_x2 = pd.concat([x1, x2], axis=0)
x1_x2.head()
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
titi = titanic_csv.sort_values("STATISTICAL METHODS")
titi
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 |
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 |
titanic_csv
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
x1.head()
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 |
x2.head()
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
x1x2 = pd.concat([x1, x2], keys=["X1", "X2"])
x1x2
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| X1 | 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | |
| X2 | 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
x1x2["NAME"].str.lower()
X1 0 alice
1 juliet
2 alex
X2 3 sara
4 oliver
Name: NAME, dtype: object
x1x2["REMARKS"].str.split(" ")
X1 0 [Excellent]
1 [Good]
2 [Good]
X2 3 [Good]
4 [Very, Good]
Name: REMARKS, dtype: object
x1x2["REMARKS"].str.split(" ").str.get(1)
X1 0 NaN
1 NaN
2 NaN
X2 3 NaN
4 Good
Name: REMARKS, dtype: object
x1x2["REMARKS"].str.contains("Good")
X1 0 False
1 True
2 True
X2 3 True
4 True
Name: REMARKS, dtype: bool
x1x2[x1x2["REMARKS"].str.contains("Good")]
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| X1 | 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | |
| X2 | 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
x1x2
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| X1 | 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | |
| X2 | 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
#Longest name
x1x2["NAME"].str.len()
X1 0 5
1 6
2 4
X2 3 4
4 6
Name: NAME, dtype: int64
x1x2["NAME"].str.len().idxmax()
('X1', 1)
# iloc: location by index of rows and columns
# loc: location by labels of rows and columns
x1x2.loc[x1x2["NAME"].str.len().idxmax(), "NAME"]
'Juliet'
x1x2["Rem:Short"] = x1x2["REMARKS"].replace({"Good": "G", "Very Good": "V", "Excellent": "E"})
x1x2
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| X1 | 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G | |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G | |
| X2 | 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
x1x2.columns
Index(['NAME', 'STATISTICAL INFERENCE', 'STATISTICAL METHODS',
'VECTORS & MATRICES', 'REMARKS', 'PYTHON', '1ST', '2ND', '3RD',
'MULTIPLY', 'RATIO', 'Rem:Short'],
dtype='object')
x1x2=x1x2.rename(columns={'Rem:Short':'RM'})
x1x2.columns
Index(['NAME', 'STATISTICAL INFERENCE', 'STATISTICAL METHODS',
'VECTORS & MATRICES', 'REMARKS', 'PYTHON', '1ST', '2ND', '3RD',
'MULTIPLY', 'RATIO', 'RM'],
dtype='object')
x1x2
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | RM | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| X1 | 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G | |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G | |
| X2 | 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
x1x2 = x1x2.rename(columns={'RM':'Rem:Short'})
x1x2.columns
Index(['NAME', 'STATISTICAL INFERENCE', 'STATISTICAL METHODS',
'VECTORS & MATRICES', 'REMARKS', 'PYTHON', '1ST', '2ND', '3RD',
'MULTIPLY', 'RATIO', 'Rem:Short'],
dtype='object')
x1x2
| NAME | STATISTICAL INFERENCE | STATISTICAL METHODS | VECTORS & MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| X1 | 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G | |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G | |
| X2 | 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
x1x2.columns = x1x2.columns.str.replace(' ', '_')
x1x2.columns
Index(['NAME', 'STATISTICAL_INFERENCE', 'STATISTICAL_METHODS',
'VECTORS_&_MATRICES', 'REMARKS', 'PYTHON', '1ST', '2ND', '3RD',
'MULTIPLY', 'RATIO', 'Rem:Short'],
dtype='object')
x1x2
| NAME | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| X1 | 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G | |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G | |
| X2 | 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
### Dropping the columns
# Column drop
x1x2.drop('Rem:Short', axis=1, inplace=False).head()
# x1x2.head()
| NAME | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| X1 | 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | |
| X2 | 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
x1x2
| NAME | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| X1 | 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G | |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G | |
| X2 | 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
x1x2.drop('Rem:Short', axis=1, inplace=False)
x1x2.head()
| NAME | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| X1 | 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G | |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G | |
| X2 | 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
temp_x = x1x2.copy()
temp_x.drop(['Rem:Short', 'STATISTICAL_METHODS'], axis=1, inplace=True)
temp_x
| NAME | STATISTICAL_INFERENCE | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | ||
|---|---|---|---|---|---|---|---|---|---|---|---|
| X1 | 0 | Alice | 87.0 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 |
| 1 | Juliet | NaN | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | |
| 2 | Alex | 96.0 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | |
| X2 | 3 | Sara | 65.0 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
# Row drop
temp_x.drop(temp_x.index[[0, 1]], axis=0, inplace=True)
temp_x
| NAME | STATISTICAL_INFERENCE | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | ||
|---|---|---|---|---|---|---|---|---|---|---|---|
| X1 | 2 | Alex | 96.0 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 |
| X2 | 3 | Sara | 65.0 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 |
| 4 | Oliver | 87.0 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 |
x1x2
| NAME | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| X1 | 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G | |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G | |
| X2 | 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
# The loc method is used to select rows and columns by label.
# The iloc method is used to select rows and columns by integer position
x1x2.loc[('X1',0):('X2',3), :] # last inclusive
| NAME | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| X1 | 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G | |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G | |
| X2 | 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G |
x1x2.tail(4)
| NAME | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| X1 | 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G | |
| X2 | 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
x1x2_T = x1x2.T
x1x2_T
| X1 | X2 | ||||
|---|---|---|---|---|---|
| 0 | 1 | 2 | 3 | 4 | |
| NAME | Alice | Juliet | Alex | Sara | Oliver |
| STATISTICAL_INFERENCE | 87.0 | NaN | 96.0 | 65.0 | 87.0 |
| STATISTICAL_METHODS | 94 | 87 | 58 | 63 | 72 |
| VECTORS_&_MATRICES | 92 | 74 | 57 | 96 | 88 |
| REMARKS | Excellent | Good | Good | Good | Very Good |
| PYTHON | 20.0 | 40.0 | 50.0 | NaN | 70.0 |
| 1ST | True | False | True | True | True |
| 2ND | 91.0 | 80.5 | 70.333333 | 74.666667 | 82.333333 |
| 3RD | 94.0 | 87.0 | 96.0 | 96.0 | 88.0 |
| MULTIPLY | 176.908 | 163.734 | 109.156 | 118.566 | 135.504 |
| RATIO | 1.021739 | 1.175676 | 1.017544 | 0.65625 | 0.818182 |
| Rem:Short | E | G | G | G | V |
x1x2
| NAME | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| X1 | 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G | |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G | |
| X2 | 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
x1x2.sample(n=2)
| NAME | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| X1 | 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.5 | 87.0 | 163.734 | 1.175676 | G |
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.0 | 94.0 | 176.908 | 1.021739 | E |
x1x2
| NAME | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| X1 | 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G | |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G | |
| X2 | 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
x1x2.loc[5] = ["Oliver", 87.0, 72, 88, "Very Good", 70.0, True, 82.333333, 88.0, 135.504, 0.818182, "V"]
x1x2
| NAME | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| (X1, 0) | Alice | 87.0 | 94 | 92 | Excellent | 20.0 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E |
| (X1, 1) | Juliet | NaN | 87 | 74 | Good | 40.0 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G |
| (X1, 2) | Alex | 96.0 | 58 | 57 | Good | 50.0 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G |
| (X2, 3) | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G |
| (X2, 4) | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
| 5 | Oliver | 87.0 | 72 | 88 | Very Good | 70.0 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
x1x2.loc[len(x1x2.index)]=["Oliver", 87.0, 72, 88, "Very Good", 135.504, True, 0.818182, 95, 110.00, 1.01, "V"]
x1x2
| NAME | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| (X1, 0) | Alice | 87.0 | 94 | 92 | Excellent | 20.000 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E |
| (X1, 1) | Juliet | NaN | 87 | 74 | Good | 40.000 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G |
| (X1, 2) | Alex | 96.0 | 58 | 57 | Good | 50.000 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G |
| (X2, 3) | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G |
| (X2, 4) | Oliver | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
| 5 | Oliver | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
| 6 | Oliver | 87.0 | 72 | 88 | Very Good | 135.504 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V |
data = [{"NAME": "Oliver", "STATISTICAL_INFERENCE": 87.0, "STATISTICAL_METHODS": 72, "VECTORS_&_MATRICES": 88, "REMARKS": "Very Good", "PYTHON": 135.504, "1ST": True, "2ND": 0.818182, "3RD": 95, "MULTIPLY": 110.00, "RATIO": 1.01, "Rem:Short": "V"}]
x1x2 = x1x2.append(data, ignore_index=True, sort=False)
x1x2
| NAME | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.000 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.000 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.000 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
| 5 | Oliver | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
| 6 | Oliver | 87.0 | 72 | 88 | Very Good | 135.504 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V |
| 7 | Oliver | 87.0 | 72 | 88 | Very Good | 135.504 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V |
data = [{"NAME": "Oliver", "STATISTICAL_INFERENCE": 87.0, "STATISTICAL_METHODS": 72, "VECTORS_&_MATRICES": 88, "REMARKS": "Very Good", "PYTHON": 13509, "1ST": True, "2ND": 0.818182, "3RD": 95, "MULTIPLY": 110.00, "RATIO": 1.01, "Rem:Short": "V"}]
x1x2 = x1x2.append(data, ignore_index=False, sort=False)
x1x2
| NAME | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 87.0 | 94 | 92 | Excellent | 20.000 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E |
| 1 | Juliet | NaN | 87 | 74 | Good | 40.000 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G |
| 2 | Alex | 96.0 | 58 | 57 | Good | 50.000 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G |
| 3 | Sara | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G |
| 4 | Oliver | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
| 5 | Oliver | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
| 6 | Oliver | 87.0 | 72 | 88 | Very Good | 135.504 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V |
| 7 | Oliver | 87.0 | 72 | 88 | Very Good | 135.504 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V |
| 0 | Oliver | 87.0 | 72 | 88 | Very Good | 13509.000 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V |
df = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB'), index=['x', 'y'])
df
| A | B | |
|---|---|---|
| x | 1 | 2 |
| y | 3 | 4 |
df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB'), index=['x', 'y'])
df
| A | B | |
|---|---|---|
| x | 1 | 2 |
| y | 3 | 4 |
df.append(df2)
| A | B | |
|---|---|---|
| x | 1 | 2 |
| y | 3 | 4 |
| x | 5 | 6 |
| y | 7 | 8 |
df.append(df2, ignore_index=True)
| A | B | |
|---|---|---|
| 0 | 1 | 2 |
| 1 | 3 | 4 |
| 2 | 5 | 6 |
| 3 | 7 | 8 |
# Inserting column in any position
x1x2.insert(1, "D", 5) # Inplace opeartion
x1x2
| NAME | D | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 5 | 87.0 | 94 | 92 | Excellent | 20.000 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E |
| 1 | Juliet | 5 | NaN | 87 | 74 | Good | 40.000 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G |
| 2 | Alex | 5 | 96.0 | 58 | 57 | Good | 50.000 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G |
| 3 | Sara | 5 | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G |
| 4 | Oliver | 5 | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
| 5 | Oliver | 5 | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
| 6 | Oliver | 5 | 87.0 | 72 | 88 | Very Good | 135.504 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V |
| 7 | Oliver | 5 | 87.0 | 72 | 88 | Very Good | 135.504 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V |
| 0 | Oliver | 5 | 87.0 | 72 | 88 | Very Good | 13509.000 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V |
x1x2.insert(1, "D_new", [5, 5,55,5,60,30,20, 34, 35])
x1x2
| NAME | D_new | D | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 5 | 5 | 87.0 | 94 | 92 | Excellent | 20.000 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E |
| 1 | Juliet | 5 | 5 | NaN | 87 | 74 | Good | 40.000 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G |
| 2 | Alex | 55 | 5 | 96.0 | 58 | 57 | Good | 50.000 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G |
| 3 | Sara | 5 | 5 | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G |
| 4 | Oliver | 60 | 5 | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
| 5 | Oliver | 30 | 5 | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
| 6 | Oliver | 20 | 5 | 87.0 | 72 | 88 | Very Good | 135.504 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V |
| 7 | Oliver | 34 | 5 | 87.0 | 72 | 88 | Very Good | 135.504 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V |
| 0 | Oliver | 35 | 5 | 87.0 | 72 | 88 | Very Good | 13509.000 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V |
x1x2["C"] = [10, 20, 30, 40, 100, 20, 1, 56, 45]
x1x2
| NAME | D_new | D | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | C | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 5 | 5 | 87.0 | 94 | 92 | Excellent | 20.000 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E | 10 |
| 1 | Juliet | 5 | 5 | NaN | 87 | 74 | Good | 40.000 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G | 20 |
| 2 | Alex | 55 | 5 | 96.0 | 58 | 57 | Good | 50.000 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G | 30 |
| 3 | Sara | 5 | 5 | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G | 40 |
| 4 | Oliver | 60 | 5 | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V | 100 |
| 5 | Oliver | 30 | 5 | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V | 20 |
| 6 | Oliver | 20 | 5 | 87.0 | 72 | 88 | Very Good | 135.504 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V | 1 |
| 7 | Oliver | 34 | 5 | 87.0 | 72 | 88 | Very Good | 135.504 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V | 56 |
| 0 | Oliver | 35 | 5 | 87.0 | 72 | 88 | Very Good | 13509.000 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V | 45 |
import numpy as np
x1x2.loc[:, "E"] = list(np.random.rand(9))
x1x2
| NAME | D_new | D | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | C | E | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 5 | 5 | 87.0 | 94 | 92 | Excellent | 20.000 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E | 10 | 0.867215 |
| 1 | Juliet | 5 | 5 | NaN | 87 | 74 | Good | 40.000 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G | 20 | 0.033397 |
| 2 | Alex | 55 | 5 | 96.0 | 58 | 57 | Good | 50.000 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G | 30 | 0.091228 |
| 3 | Sara | 5 | 5 | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G | 40 | 0.820054 |
| 4 | Oliver | 60 | 5 | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V | 100 | 0.337483 |
| 5 | Oliver | 30 | 5 | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V | 20 | 0.511977 |
| 6 | Oliver | 20 | 5 | 87.0 | 72 | 88 | Very Good | 135.504 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V | 1 | 0.034382 |
| 7 | Oliver | 34 | 5 | 87.0 | 72 | 88 | Very Good | 135.504 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V | 56 | 0.478075 |
| 0 | Oliver | 35 | 5 | 87.0 | 72 | 88 | Very Good | 13509.000 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V | 45 | 0.529336 |
x1x2_copy = x1x2.assign(G = x1x2.E * 100) # condition is column label should be single letter
x1x2_copy
| NAME | D_new | D | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | C | E | G | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 5 | 5 | 87.0 | 94 | 92 | Excellent | 20.000 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E | 10 | 0.867215 | 86.721454 |
| 1 | Juliet | 5 | 5 | NaN | 87 | 74 | Good | 40.000 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G | 20 | 0.033397 | 3.339708 |
| 2 | Alex | 55 | 5 | 96.0 | 58 | 57 | Good | 50.000 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G | 30 | 0.091228 | 9.122837 |
| 3 | Sara | 5 | 5 | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G | 40 | 0.820054 | 82.005447 |
| 4 | Oliver | 60 | 5 | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V | 100 | 0.337483 | 33.748330 |
| 5 | Oliver | 30 | 5 | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V | 20 | 0.511977 | 51.197651 |
| 6 | Oliver | 20 | 5 | 87.0 | 72 | 88 | Very Good | 135.504 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V | 1 | 0.034382 | 3.438153 |
| 7 | Oliver | 34 | 5 | 87.0 | 72 | 88 | Very Good | 135.504 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V | 56 | 0.478075 | 47.807491 |
| 0 | Oliver | 35 | 5 | 87.0 | 72 | 88 | Very Good | 13509.000 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V | 45 | 0.529336 | 52.933592 |
x1x2.drop(['D_new', 'C', 'E'], axis=1, inplace=True)
x1x2
| NAME | D | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 5 | 87.0 | 94 | 92 | Excellent | 20.000 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E |
| 1 | Juliet | 5 | NaN | 87 | 74 | Good | 40.000 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G |
| 2 | Alex | 5 | 96.0 | 58 | 57 | Good | 50.000 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G |
| 3 | Sara | 5 | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G |
| 4 | Oliver | 5 | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
| 5 | Oliver | 5 | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
| 6 | Oliver | 5 | 87.0 | 72 | 88 | Very Good | 135.504 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V |
| 7 | Oliver | 5 | 87.0 | 72 | 88 | Very Good | 135.504 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V |
| 0 | Oliver | 5 | 87.0 | 72 | 88 | Very Good | 13509.000 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V |
x1x2_n = x1x2.copy()
x1x2.duplicated()
0 False 1 False 2 False 3 False 4 False 5 False 6 False 7 True 0 False dtype: bool
x1x2 = x1x2[~x1x2.duplicated()]
x1x2
| NAME | D | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 5 | 87.0 | 94 | 92 | Excellent | 20.000 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E |
| 1 | Juliet | 5 | NaN | 87 | 74 | Good | 40.000 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G |
| 2 | Alex | 5 | 96.0 | 58 | 57 | Good | 50.000 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G |
| 3 | Sara | 5 | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G |
| 4 | Oliver | 5 | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
| 5 | Oliver | 5 | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
| 6 | Oliver | 5 | 87.0 | 72 | 88 | Very Good | 135.504 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V |
| 0 | Oliver | 5 | 87.0 | 72 | 88 | Very Good | 13509.000 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V |
# Remove duplicates
x1x2_n.drop_duplicates() # What is other techniques using x1x2.duplicated()?
| NAME | D | STATISTICAL_INFERENCE | STATISTICAL_METHODS | VECTORS_&_MATRICES | REMARKS | PYTHON | 1ST | 2ND | 3RD | MULTIPLY | RATIO | Rem:Short | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alice | 5 | 87.0 | 94 | 92 | Excellent | 20.000 | True | 91.000000 | 94.0 | 176.908 | 1.021739 | E |
| 1 | Juliet | 5 | NaN | 87 | 74 | Good | 40.000 | False | 80.500000 | 87.0 | 163.734 | 1.175676 | G |
| 2 | Alex | 5 | 96.0 | 58 | 57 | Good | 50.000 | True | 70.333333 | 96.0 | 109.156 | 1.017544 | G |
| 3 | Sara | 5 | 65.0 | 63 | 96 | Good | NaN | True | 74.666667 | 96.0 | 118.566 | 0.656250 | G |
| 4 | Oliver | 5 | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
| 5 | Oliver | 5 | 87.0 | 72 | 88 | Very Good | 70.000 | True | 82.333333 | 88.0 | 135.504 | 0.818182 | V |
| 6 | Oliver | 5 | 87.0 | 72 | 88 | Very Good | 135.504 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V |
| 0 | Oliver | 5 | 87.0 | 72 | 88 | Very Good | 13509.000 | True | 0.818182 | 95.0 | 110.000 | 1.010000 | V |