-
Notifications
You must be signed in to change notification settings - Fork 0
/
newsbeuter2rsstail
155 lines (136 loc) · 3.99 KB
/
newsbeuter2rsstail
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/usr/bin/gawk -f
#
# newsbeuter2rsstail
# Edward Morbius <dredmorbius@gmail.com>
# Sun May 11 07:51:58 UTC 2014
#
# Parse a Newsbeuter feed and generate a set of rsstail commands
# suitable for inclusion into multitail
#
# Optional argument:
# --tags <taglist> Comma-separated list of tags to select
# Match is case insensitive
#
# TODO:
# Done: Add option to filter by tags
# Exclude "voluble" by default
#
# ----------------------------------------------------------------------
BEGIN {
# Args parsing
tagset = 0
for( i=0; i<ARGC; i++ ) {
if( ARGV[i] == "--tags") {
delete ARGV[i]
taglist = tolower(ARGV[++i])
delete ARGV[i]
# printf( "Your taglist is: %s\n", taglist )
# printf( "Arg index is now: %s (%s)\n", i, ARGIND)
# printf( "Current arg is now: %s\n", ARGV[i])
# printf( "Total args are now: %s\n", ARGC)
tagmax = split( taglist, tags, ",")
tagset = 1
# exit
}
}
}
/^http/ {
tagmatch = 0
# Are we filtering on tags?
# Match is crude -- whole word after last quote. Names CAN match
if( tagset == 1 ) {
feedtags = tolower(substr($0, length($1) + 1))
for( itag=0; itag <= tagmax; itag++ ) {
if( match( feedtags, tags[itag] ) > 1) {
tagmatch = 1
# printf( "Tag match: %s\n", tags[itag])
}
}
}
if( tagset == 1 && tagmatch != 1) {
# printf( "No tag match: %s %s\n", taglist, $0 )
next
}
i++ # Increment feed counter
if( i == 1 ) {
1==1
printf ("-l ") # multitail's first command arg
}
else {
1 == 1
printf( "-L " ) # Additional args, consolidated in first
}
# Exclude voluble feeds (unless we're matching for them
# specifically)
if( tagset != 1 && match( feedtags, "voluble" )) {
next
}
# Feed is the first element
# Extract the feed title.
host = gensub("^https*://([^/]*)/.*", "\\1", "g", $1)
path = gensub("^https*://[^/]*/(.*$)", "\\1", "g", $1)
hostfields = split(host, hostpart, "\\.")
pathfields = split(path, pathpart, "/" )
# We need to make exceptions for blogging sites
if( match(host, "(dreamwidth|blogspot|blogger)")) {
feedtitle = hostpart[1]
}
else if ( match(host, "physics.ucsd.edu" )) {
feedtitle = pathpart[1]
}
else if ( match(host, "blogs.law.harvard.edu" )) {
feedtitle = pathpart[1]
}
# Reddit's subreddits
else if ( match($1, "reddit.com/r" )) {
feedtitle = "/" pathpart[1] "/" pathpart[2]
}
# Reddit user feeds
else if ( match($1, "reddit.com/user" )) {
feedtitle = "/" pathpart[1] "/" pathpart[2]
}
# Reddits messages
else if ( match($1, "reddit.com/message" )) {
feedtitle = hostpart[2] "-" pathpart[1]
}
# Diaspora
else if ( match(host, "^joindiaspora\\.com" )) {
feedtitle = "diaspora" "-" gensub("\\.atom", "", "g", pathpart[2])
}
# Guardian feeds
else if ( match(host, "theguardian.com" )) {
if (match(pathpart[1], "theguardian" )) {
feedtitle = hostpart[2] "-" pathpart[2]
}
else feedtitle = hostpart[2] "-" pathpart[1]
}
# Don Marti
else if ( match(host, "zgp.org" )) {
feedtitle = pathpart[1]
}
else if(hostfields > 2) {
feedtitle = hostpart[2]
}
else {
feedtitle = hostpart[1];
}
# printf( "Hello, world\n" )
# printf( "Feed title: %-20s\n Host: %-40s\n Path: %s\n " \
# "Feed tags: %s\n Feed: %s\n\n", feedtitle, host, path, \
# feedtags, $1)
printf ( "\x22rsstail -n 1 -Z \x27%s: \x27 -z -u \x27%s\x27 | sed \x27s/: Title:/:/; s/^ //\x27 \x22 ", feedtitle, $1 )
# Arguments:
# -n 1: initially load one item
# -a : Show author (not enabled)
# -Z <header>: Add header
# -u <feedurl>: Feed URL
#
# The sed bit strips the "Title: " header and cleans leading
# whitespace.
# clean up
host = ""
path = ""
feedtitle = ""
for( ih=0; ih <= length(hostpart); ih++) { delete hostpart[ih] }
for( ip=0; ip <= length(pathpart); ip++) { delete pathpart[ih] }
}