-
Notifications
You must be signed in to change notification settings - Fork 1
/
update_rss.sh
executable file
·73 lines (60 loc) · 2.29 KB
/
update_rss.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/bin/bash
cd $(echo $0 | sed 's#/[^/]*$##')
rss="afp-an.rss"
rooturl="http://hosting.afp.com/clients/assembleenationale/francais/assnat/"
url="$rooturl""index.html"
mkdir -p .cache
touch "$rss" last.html
curl -sL "$url" > recent.html
if ! diff last.html recent.html | grep "^>" > /dev/null; then
rm -f recent.html
exit 0
fi
now=$(date -R)
date=""
title=""
link=""
function decode_entities { perl -MHTML::Entities -MEncode -nlpe '$_=encode("utf8", decode_entities($_))'; }
echo "<?xml version=\"1.0\"?>
<rss version=\"2.0\">
<channel>
<title>AFP AN RSS</title>
<link>http://hosting.afp.com/clients/assembleenationale/francais/assnat/index.html</link>
<description>Les dernières dépèches AFP liées à l'Assemblée nationale</description>
<pubDate>$now</pubDate>
<generator>RegardsCitoyens https://github.com/RegardsCitoyens/AFP-AN-RSS</generator>" > $rss
cat recent.html |
tr '\n' ' ' |
sed 's/<table cellspacing=2 cellpading=2 border=0 width="100%">/\n/g' |
sed 's/\s\+/ /g' |
grep "(AFP) - " |
while read line; do
link="$rooturl"$(echo $line | sed 's/^.*href="//' | sed 's/html">.*$/html/')
id=$(echo $link | sed 's#^.*/assnat/##')
title=$(echo $line | sed 's/.*<a[^>]\+>//' | sed 's/<\/a>.*$//' | decode_entities)
if ! [ -s ".cache/$id" ]; then
curl -sL "$link" > ".cache/$id"
fi
content=$(grep "<font face=" ".cache/$id" |
sed 's/<\/\?[a-z]\+[^>]*>/ /g' |
decode_entities |
sed 's/\s\+/ /g')
desc=$(echo $content | sed 's/^.*heure de Paris - //i')
day=$(echo $content | sed 's/ heure de Paris - .*$//i' | sed 's/^[^0-9]\+, //i' | sed 's/(AFP) - //i' |
sed 's/^\([0-9]\+\) \([a-z]\+\) 20\([0-9]\+\) \([0-9]\+\)h\([0-9]\+\)/\2 \1 20\3 \4:\5/i' |
sed 's/avr/apr/i' | sed 's/fév/feb/i' | sed 's/mai/may/i' | sed 's/juin/jun/i' |
sed 's/juil/jul/i' | sed 's/août/aug/i' | sed 's/déc/dec/i' | sed 's/mars/mar/i')
date=$(date -R -d "$day")
echo " <item>
<title>$title</title>
<link>$link</link>
<description><![CDATA[$desc]]></description>
<pubDate>$date</pubDate>
</item>" >> $rss
done
echo " </channel>
</rss>" >> $rss
mv -f last.html lastold.html
mv -f recent.html last.html
git commit $rss -m "update rss"
git push