-
Notifications
You must be signed in to change notification settings - Fork 0
/
Data cleaning of a housing.sql
169 lines (115 loc) · 3.83 KB
/
Data cleaning of a housing.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
--select *
--from nashvilehousing
-------------
--select saledate,convert(date,saledate)
--from nashvilehousing
alter table nashvilehousing
add newsalesdate date;
--update nashvilehousing
--set newsalesdate=convert(date,saledate)
--select newsalesdate,convert(date,saledate)
--from nashvilehousing
--------------------------
select propertyaddress
from portfolioproject..nashvilehousing
--where propertyaddress is null
order by ParcelID
select a1.ParcelID,a1.[PropertyAddress],a2.ParcelID,a2.PropertyAddress,ISNULL(a1.propertyaddress,a2.propertyaddress)
from portfolioproject..nashvilehousing a1
join portfolioproject..nashvilehousing a2
on a1.ParcelID=a2.ParcelID
and a1.[UniqueID ] != a2.[UniqueID ]
where a1.propertyaddress is null
update a1
set a1.PropertyAddress=ISNULL(a1.propertyaddress,a2.propertyaddress)
from portfolioproject..nashvilehousing a1
join portfolioproject..nashvilehousing a2
on a1.ParcelID=a2.ParcelID
and a1.[UniqueID ] != a2.[UniqueID ]
where a1.propertyaddress is null
select *
from portfolioproject..nashvilehousing a1
where propertyaddress is null
---------------
select
substring(propertyaddress,1,CHARINDEX(',',propertyaddress)) as address,
substring(propertyaddress,CHARINDEX(',',propertyaddress)+1,len(propertyaddress)) as address
from portfolioproject..nashvilehousing
alter table nashvilehousing
add propertylocation nvarchar(255);
update nashvilehousing
set propertylocation=substring(propertyaddress,1,CHARINDEX(',',propertyaddress))
----------------------------------------------------------------
--ALTER TABLE nashvilehousing i used for a mistake
-- in a column
--DROP COLUMN propertycity;
----------------------------------------------------------------
alter table nashvilehousing
add propertycity nvarchar(255);
update nashvilehousing
set propertycity =substring(propertyaddress,CHARINDEX(',',propertyaddress)+1,len(propertyaddress))
select *
from portfolioproject..nashvilehousing
select
parsename(replace(owneraddress,',','.'),3),
parsename(replace(owneraddress,',','.'),2),
parsename(replace(owneraddress,',','.'),1)
from portfolioproject..nashvilehousing
alter table nashvilehousing
add ownernewaddress nvarchar(255);
update nashvilehousing
set ownernewaddress =parsename(replace(owneraddress,',','.'),3)
alter table nashvilehousing
add ownercity nvarchar(255);
update nashvilehousing
set ownercity =parsename(replace(owneraddress,',','.'),2)
alter table nashvilehousing
add ownerstate nvarchar(255);
update nashvilehousing
set ownerstate=parsename(replace(owneraddress,',','.'),1)
select *
from portfolioproject..nashvilehousing
select soldasvacant,count(soldasvacant)
from portfolioproject..nashvilehousing
group by soldasvacant
order by 2;
select soldasvacant,
case when soldasvacant= 'y' then 'yes'
when soldasvacant= 'n' then 'no'
else soldasvacant
end
from portfolioproject..nashvilehousing
--group by soldasvacant
--order by 2;
update nashvilehousing
set soldasvacant=case when soldasvacant= 'y' then 'yes'
when soldasvacant= 'n' then 'no'
else soldasvacant
end
select soldasvacant,count(soldasvacant)
from portfolioproject..nashvilehousing
group by soldasvacant
order by 2
select *
from portfolioproject..nashvilehousing
with rownos as(
select *,
ROW_NUMBER() over(
partition by parcelid,
propertyaddress,
saleprice,
saledate,
legalreference
order by
uniqueid) rowno
from portfolioproject..nashvilehousing
)
SELECT *
from rownos
--where rowno>1
order by propertyaddress
------------------------
SELECT *
FROM portfolioproject..nashvilehousing
ALTER TABLE NASHVILEHOUSING
DROP COLUMN TAXDISTRICT,PROPERTYADDRESS;