1 package org.varienaja.util.wikipedia;
2
3 /**
4 * Helper class.
5 * @author Varienaja
6 */
7 public class WikiContentHelper {
8
9 /**
10 * Removes WikiLink-constructs from plaintext.
11 * <p>[[xx|yy]] --> yy<br/>
12 * [[xx]] --> xx</p>
13 * @param input The String
14 * @return The String with WikiLinks removed.
15 */
16 public static String removeLinks(String input) {
17 String step1 = input.replaceAll("\\[\\[[^\\|\\[\\]]*\\|([.[^\\]]]*)\\]\\]", "$1");
18 String step2 = step1.replaceAll("\\[\\[([^\\]]*)\\]\\]", "$1");
19 return step2;
20 }
21
22 /**
23 * Removes superfluous quotes in a String.<br/>
24 * <p>I talked about '''something''' cool --> I talked about 'something' cool</p>
25 * @param content The String.
26 * @return The String without superfluous quotes.
27 */
28 public static String sanitizeQuotes(String content) {
29 String step1 = content.replaceAll("'''", "'");
30 String step2 = step1.replaceAll("''", "'");
31 return step2;
32 }
33
34 /**
35 * Removes html from a String.
36 * <p>lalala.<ref>[sjalala]</ref> lololo --> lalala. lololo</p>
37 * <p>lalala.<a href>[sjalala]</a> lololo --> lalala. lololo</p>
38 * @param content The String.
39 * @return The String without ref-constructs.
40 */
41 public static String removeRefs(String content) {
42 String result = content.replaceAll("<(\\w*).*>(?:.)*</\\1>", "");
43 return result;
44 }
45
46 /**
47 * Removes html-comments from a String.
48 * @param content The String.
49 * @return The String without comments.
50 */
51 public static String removeComment(String content) {
52 String step1 = content.replaceAll("<!--(?:.)*-->", "");
53 String step2 = step1.replaceAll("<!--(?:.)*-->", "");
54 return step2;
55 }
56
57 /**
58 * Removes image-refs from a String.
59 * <p>[[Image:JohannesBrahms.jpg|thumb|right|250px|Johannes Brahms]]</p>
60 * @param content The String.
61 * @return The String without image-links.
62 */
63 public static String removeImages(String content) {
64 String result = content.replaceAll("\\[\\[Image(?:[.[^\\]]]*)\\]\\]", "");
65 return result;
66 }
67
68 }