commit 25d109be5625715da9608c7bca97b591f796c82f
parent f57271cdf809b549ec3a691c493e3a62cfb99088
Author: Kebigon <git@kebigon.xyz>
Date: Sun, 6 Jan 2019 20:42:22 +0900
Add RDF feed parser
Diffstat:
3 files changed, 83 insertions(+), 1 deletion(-)
diff --git a/src/main/java/fr/lrgn/yuzurss/FeedClient.java b/src/main/java/fr/lrgn/yuzurss/FeedClient.java
@@ -15,6 +15,7 @@ import org.springframework.web.reactive.function.client.WebClient.ResponseSpec;
import fr.lrgn.yuzurss.exception.NoParserFoundException;
import fr.lrgn.yuzurss.parser.AtomFeedParser;
import fr.lrgn.yuzurss.parser.FeedParser;
+import fr.lrgn.yuzurss.parser.RDFFeedParser;
import fr.lrgn.yuzurss.parser.RSSFeedParser;
import reactor.core.publisher.Flux;
@@ -23,7 +24,7 @@ public class FeedClient
{
private final Logger log = LoggerFactory.getLogger(getClass());
- private final FeedParser[] parsers = new FeedParser[] { new AtomFeedParser(), new RSSFeedParser() };
+ private final FeedParser[] parsers = new FeedParser[] { new AtomFeedParser(), new RDFFeedParser(), new RSSFeedParser() };
@Cacheable("feeds")
public Flux<FeedEntry> getFeed(URI uri)
diff --git a/src/main/java/fr/lrgn/yuzurss/parser/RDFFeedParser.java b/src/main/java/fr/lrgn/yuzurss/parser/RDFFeedParser.java
@@ -0,0 +1,65 @@
+package fr.lrgn.yuzurss.parser;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+
+import org.json.JSONObject;
+
+import fr.lrgn.yuzurss.FeedEntry;
+import fr.lrgn.yuzurss.exception.DateParseException;
+import reactor.core.publisher.Flux;
+
+public class RDFFeedParser extends FeedParser
+{
+ private static final String RDF_DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ssXXX"; // 2018-11-03T18:12:15+00:00
+
+ private static final ThreadLocal<SimpleDateFormat> rdfDateFormat = new ThreadLocal<SimpleDateFormat>()
+ {
+ @Override
+ protected SimpleDateFormat initialValue()
+ {
+ return new SimpleDateFormat(RDF_DATE_FORMAT);
+ };
+ };
+
+ @Override
+ public boolean acceptFeed(JSONObject root)
+ {
+ return root.has("rdf:RDF");
+ }
+
+ @Override
+ public Flux<FeedEntry> parseFeed(JSONObject root)
+ {
+ Flux<FeedEntry> entries = Flux.empty();
+
+ final JSONObject channel = root.getJSONObject("rdf:RDF").getJSONObject("channel");
+ final String author = channel.getString("title");
+
+ for (final Object entry : root.getJSONObject("rdf:RDF").getJSONArray("item"))
+ {
+ log.debug("Parsing entry {}", entry);
+
+ final String link = ((JSONObject) entry).getString("link");
+ final String title = ((JSONObject) entry).getString("title");
+ final Date published = parseDate(((JSONObject) entry).getString("dc:date"));
+
+ entries = entries.mergeWith(Flux.just(new FeedEntry(title, link, published, author)));
+ }
+
+ return entries;
+ }
+
+ private Date parseDate(String date)
+ {
+ try
+ {
+ return rdfDateFormat.get().parse(date);
+ }
+ catch (final ParseException ex)
+ {
+ throw new DateParseException(date, RDF_DATE_FORMAT, ex);
+ }
+ }
+}
diff --git a/src/test/java/fr/lrgn/yuzurss/YuzuRssApplicationTests.java b/src/test/java/fr/lrgn/yuzurss/YuzuRssApplicationTests.java
@@ -25,11 +25,14 @@ public class YuzuRssApplicationTests
{
private static final String RSS_PATH = "/rss.xml";
private static final String RSS_RESPONSE = "<rss><channel><title>rss_feed</title><item><link>rss_link1</link><title>rss_title1</title><pubDate>Sun, 09 Dec 2018 09:22:00 +0000</pubDate></item><item><link>rss_link2</link><title>rss_title2</title><pubDate>Fri, 19 Oct 2018 21:49:54 +0000</pubDate></item></channel></rss>";
+ private static final String RDF_PATH = "/rdf.xml";
+ private static final String RDF_RESPONSE = "<rdf:RDF><channel><title>rdf_feed</title></channel><item><link>rdf_link1</link><title>rdf_title1</title><dc:date>2019-01-06T14:18:00+09:00</dc:date></item><item><link>rdf_link2</link><title>rdf_title2</title><dc:date>2019-01-06T16:51:00+09:00</dc:date></item></rdf:RDF>";
private static final String ATOM_PATH = "/atom.xml";
private static final String ATOM_RESPONSE = "<feed><entry><author><name>atom_feed</name></author><link><href>atom_link1</href></link><title>atom_title1</title><published>2018-11-03T18:12:15+00:00</published></entry><entry><author><name>atom_feed</name></author><link><href>atom_link2</href></link><title>atom_title2</title><published>2018-10-30T18:12:15+00:00</published></entry></feed>";
private static final String ATOM_RESULT = "[{\"title\":\"atom_title1\",\"link\":\"atom_link1\",\"published\":\"2018-11-03T18:12:15.000+0000\",\"author\":\"atom_feed\"},{\"title\":\"atom_title2\",\"link\":\"atom_link2\",\"published\":\"2018-10-30T18:12:15.000+0000\",\"author\":\"atom_feed\"}]";
private static final String RSS_RESULT = "[{\"title\":\"rss_title1\",\"link\":\"rss_link1\",\"published\":\"2018-12-09T09:22:00.000+0000\",\"author\":\"rss_feed\"},{\"title\":\"rss_title2\",\"link\":\"rss_link2\",\"published\":\"2018-10-19T21:49:54.000+0000\",\"author\":\"rss_feed\"}]";
+ private static final String RDF_RESULT = "[{\"title\":\"rdf_title2\",\"link\":\"rdf_link2\",\"published\":\"2019-01-06T07:51:00.000+0000\",\"author\":\"rdf_feed\"},{\"title\":\"rdf_title1\",\"link\":\"rdf_link1\",\"published\":\"2019-01-06T05:18:00.000+0000\",\"author\":\"rdf_feed\"}]";
private static final String ATOM_RSS_RESULT = "[{\"title\":\"rss_title1\",\"link\":\"rss_link1\",\"published\":\"2018-12-09T09:22:00.000+0000\",\"author\":\"rss_feed\"},{\"title\":\"atom_title1\",\"link\":\"atom_link1\",\"published\":\"2018-11-03T18:12:15.000+0000\",\"author\":\"atom_feed\"},{\"title\":\"atom_title2\",\"link\":\"atom_link2\",\"published\":\"2018-10-30T18:12:15.000+0000\",\"author\":\"atom_feed\"},{\"title\":\"rss_title2\",\"link\":\"rss_link2\",\"published\":\"2018-10-19T21:49:54.000+0000\",\"author\":\"rss_feed\"}]";
@Autowired
@@ -50,6 +53,8 @@ public class YuzuRssApplicationTests
{
case RSS_PATH:
return new MockResponse().setBody(RSS_RESPONSE);
+ case RDF_PATH:
+ return new MockResponse().setBody(RDF_RESPONSE);
case ATOM_PATH:
return new MockResponse().setBody(ATOM_RESPONSE);
}
@@ -82,6 +87,17 @@ public class YuzuRssApplicationTests
}
@Test
+ public void testRdfFeed() throws UnsupportedEncodingException
+ {
+ final ArrayList<String> urls = new ArrayList<String>();
+ urls.add("http://127.0.0.1:" + server.getPort() + "/rdf.xml");
+ final FeedRequestBody body = new FeedRequestBody(urls, 10);
+
+ webClient.post().uri("/feed").body(BodyInserters.fromObject(body)).exchange().expectStatus().isOk().expectBody(String.class)
+ .isEqualTo(RDF_RESULT);
+ }
+
+ @Test
public void testRssFeed() throws UnsupportedEncodingException
{
final ArrayList<String> urls = new ArrayList<String>();