yuzurss

Feed aggregator microservice based on Spring
git clone https://git.neuralcrash.com/yuzurss.git
Log | Files | Refs | README | LICENSE

commit 25d109be5625715da9608c7bca97b591f796c82f
parent f57271cdf809b549ec3a691c493e3a62cfb99088
Author: Kebigon <git@kebigon.xyz>
Date:   Sun,  6 Jan 2019 20:42:22 +0900

Add RDF feed parser

Diffstat:
Msrc/main/java/fr/lrgn/yuzurss/FeedClient.java | 3++-
Asrc/main/java/fr/lrgn/yuzurss/parser/RDFFeedParser.java | 65+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/test/java/fr/lrgn/yuzurss/YuzuRssApplicationTests.java | 16++++++++++++++++
3 files changed, 83 insertions(+), 1 deletion(-)

diff --git a/src/main/java/fr/lrgn/yuzurss/FeedClient.java b/src/main/java/fr/lrgn/yuzurss/FeedClient.java @@ -15,6 +15,7 @@ import org.springframework.web.reactive.function.client.WebClient.ResponseSpec; import fr.lrgn.yuzurss.exception.NoParserFoundException; import fr.lrgn.yuzurss.parser.AtomFeedParser; import fr.lrgn.yuzurss.parser.FeedParser; +import fr.lrgn.yuzurss.parser.RDFFeedParser; import fr.lrgn.yuzurss.parser.RSSFeedParser; import reactor.core.publisher.Flux; @@ -23,7 +24,7 @@ public class FeedClient { private final Logger log = LoggerFactory.getLogger(getClass()); - private final FeedParser[] parsers = new FeedParser[] { new AtomFeedParser(), new RSSFeedParser() }; + private final FeedParser[] parsers = new FeedParser[] { new AtomFeedParser(), new RDFFeedParser(), new RSSFeedParser() }; @Cacheable("feeds") public Flux<FeedEntry> getFeed(URI uri) diff --git a/src/main/java/fr/lrgn/yuzurss/parser/RDFFeedParser.java b/src/main/java/fr/lrgn/yuzurss/parser/RDFFeedParser.java @@ -0,0 +1,65 @@ +package fr.lrgn.yuzurss.parser; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; + +import org.json.JSONObject; + +import fr.lrgn.yuzurss.FeedEntry; +import fr.lrgn.yuzurss.exception.DateParseException; +import reactor.core.publisher.Flux; + +public class RDFFeedParser extends FeedParser +{ + private static final String RDF_DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ssXXX"; // 2018-11-03T18:12:15+00:00 + + private static final ThreadLocal<SimpleDateFormat> rdfDateFormat = new ThreadLocal<SimpleDateFormat>() + { + @Override + protected SimpleDateFormat initialValue() + { + return new SimpleDateFormat(RDF_DATE_FORMAT); + }; + }; + + @Override + public boolean acceptFeed(JSONObject root) + { + return root.has("rdf:RDF"); + } + + @Override + public Flux<FeedEntry> parseFeed(JSONObject root) + { + Flux<FeedEntry> entries = Flux.empty(); + + final JSONObject channel = root.getJSONObject("rdf:RDF").getJSONObject("channel"); + final String author = channel.getString("title"); + + for (final Object entry : root.getJSONObject("rdf:RDF").getJSONArray("item")) + { + log.debug("Parsing entry {}", entry); + + final String link = ((JSONObject) entry).getString("link"); + final String title = ((JSONObject) entry).getString("title"); + final Date published = parseDate(((JSONObject) entry).getString("dc:date")); + + entries = entries.mergeWith(Flux.just(new FeedEntry(title, link, published, author))); + } + + return entries; + } + + private Date parseDate(String date) + { + try + { + return rdfDateFormat.get().parse(date); + } + catch (final ParseException ex) + { + throw new DateParseException(date, RDF_DATE_FORMAT, ex); + } + } +} diff --git a/src/test/java/fr/lrgn/yuzurss/YuzuRssApplicationTests.java b/src/test/java/fr/lrgn/yuzurss/YuzuRssApplicationTests.java @@ -25,11 +25,14 @@ public class YuzuRssApplicationTests { private static final String RSS_PATH = "/rss.xml"; private static final String RSS_RESPONSE = "<rss><channel><title>rss_feed</title><item><link>rss_link1</link><title>rss_title1</title><pubDate>Sun, 09 Dec 2018 09:22:00 +0000</pubDate></item><item><link>rss_link2</link><title>rss_title2</title><pubDate>Fri, 19 Oct 2018 21:49:54 +0000</pubDate></item></channel></rss>"; + private static final String RDF_PATH = "/rdf.xml"; + private static final String RDF_RESPONSE = "<rdf:RDF><channel><title>rdf_feed</title></channel><item><link>rdf_link1</link><title>rdf_title1</title><dc:date>2019-01-06T14:18:00+09:00</dc:date></item><item><link>rdf_link2</link><title>rdf_title2</title><dc:date>2019-01-06T16:51:00+09:00</dc:date></item></rdf:RDF>"; private static final String ATOM_PATH = "/atom.xml"; private static final String ATOM_RESPONSE = "<feed><entry><author><name>atom_feed</name></author><link><href>atom_link1</href></link><title>atom_title1</title><published>2018-11-03T18:12:15+00:00</published></entry><entry><author><name>atom_feed</name></author><link><href>atom_link2</href></link><title>atom_title2</title><published>2018-10-30T18:12:15+00:00</published></entry></feed>"; private static final String ATOM_RESULT = "[{\"title\":\"atom_title1\",\"link\":\"atom_link1\",\"published\":\"2018-11-03T18:12:15.000+0000\",\"author\":\"atom_feed\"},{\"title\":\"atom_title2\",\"link\":\"atom_link2\",\"published\":\"2018-10-30T18:12:15.000+0000\",\"author\":\"atom_feed\"}]"; private static final String RSS_RESULT = "[{\"title\":\"rss_title1\",\"link\":\"rss_link1\",\"published\":\"2018-12-09T09:22:00.000+0000\",\"author\":\"rss_feed\"},{\"title\":\"rss_title2\",\"link\":\"rss_link2\",\"published\":\"2018-10-19T21:49:54.000+0000\",\"author\":\"rss_feed\"}]"; + private static final String RDF_RESULT = "[{\"title\":\"rdf_title2\",\"link\":\"rdf_link2\",\"published\":\"2019-01-06T07:51:00.000+0000\",\"author\":\"rdf_feed\"},{\"title\":\"rdf_title1\",\"link\":\"rdf_link1\",\"published\":\"2019-01-06T05:18:00.000+0000\",\"author\":\"rdf_feed\"}]"; private static final String ATOM_RSS_RESULT = "[{\"title\":\"rss_title1\",\"link\":\"rss_link1\",\"published\":\"2018-12-09T09:22:00.000+0000\",\"author\":\"rss_feed\"},{\"title\":\"atom_title1\",\"link\":\"atom_link1\",\"published\":\"2018-11-03T18:12:15.000+0000\",\"author\":\"atom_feed\"},{\"title\":\"atom_title2\",\"link\":\"atom_link2\",\"published\":\"2018-10-30T18:12:15.000+0000\",\"author\":\"atom_feed\"},{\"title\":\"rss_title2\",\"link\":\"rss_link2\",\"published\":\"2018-10-19T21:49:54.000+0000\",\"author\":\"rss_feed\"}]"; @Autowired @@ -50,6 +53,8 @@ public class YuzuRssApplicationTests { case RSS_PATH: return new MockResponse().setBody(RSS_RESPONSE); + case RDF_PATH: + return new MockResponse().setBody(RDF_RESPONSE); case ATOM_PATH: return new MockResponse().setBody(ATOM_RESPONSE); } @@ -82,6 +87,17 @@ public class YuzuRssApplicationTests } @Test + public void testRdfFeed() throws UnsupportedEncodingException + { + final ArrayList<String> urls = new ArrayList<String>(); + urls.add("http://127.0.0.1:" + server.getPort() + "/rdf.xml"); + final FeedRequestBody body = new FeedRequestBody(urls, 10); + + webClient.post().uri("/feed").body(BodyInserters.fromObject(body)).exchange().expectStatus().isOk().expectBody(String.class) + .isEqualTo(RDF_RESULT); + } + + @Test public void testRssFeed() throws UnsupportedEncodingException { final ArrayList<String> urls = new ArrayList<String>();