-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathRemove.py
30 lines (19 loc) · 891 Bytes
/
Remove.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import pandas as pd
import numpy as np
from pandas import Series, DataFrame
# Create our data frame.
DF_obj = DataFrame({'column 1': [1,1,2,2,3,3,3],
'column 2': ['a','a','b','b','c','c','c'],
'column 3': ['A','A','B','B','C','C','C']})
# We want to remove rows.
# object_name.duplicated()
# .duplicated() method searches each row, and returns True or False
# to indicate whether it is a duplicate of another row found earlier in DataFrame
DF_obj.duplicated()
# Drop the duplicates. Any rows that are duplicates of rows that came before.
DF_obj.drop_duplicates()
DF_obj = DataFrame({'column 1': [1,1,2,2,3,3,3],
'column 2': ['a','a','b','b','c','c','c'],
'column 3': ['A','A','B','B','C','D','C']})
DF_obj.drop_duplicates(['column 3'])
# Check for duplicates in column 3 and if found, drop the row!