View Javadoc

1   package org.musicontroller.core.searching;
2   
3   import java.io.IOException;
4   import java.util.Calendar;
5   import java.util.Date;
6   import java.util.GregorianCalendar;
7   import java.util.HashMap;
8   import java.util.Iterator;
9   import java.util.List;
10  import java.util.Map;
11  import java.util.Map.Entry;
12  
13  import org.apache.log4j.Logger;
14  import org.apache.lucene.analysis.standard.StandardAnalyzer;
15  import org.apache.lucene.document.Document;
16  import org.apache.lucene.document.Field;
17  import org.apache.lucene.index.IndexReader;
18  import org.apache.lucene.index.IndexWriter;
19  import org.apache.lucene.queryParser.MultiFieldQueryParser;
20  import org.apache.lucene.queryParser.ParseException;
21  import org.apache.lucene.search.Hits;
22  import org.apache.lucene.search.IndexSearcher;
23  import org.apache.lucene.search.Query;
24  import org.apache.lucene.search.Searcher;
25  import org.musicontroller.core.AIRelation;
26  import org.musicontroller.core.Artist;
27  import org.musicontroller.core.Band;
28  import org.musicontroller.core.Contract_BA;
29  import org.musicontroller.core.Contract_PS;
30  import org.musicontroller.core.Event;
31  import org.musicontroller.core.Instrument;
32  import org.musicontroller.core.Keyword;
33  import org.musicontroller.core.Playlist;
34  import org.musicontroller.core.Song;
35  import org.musicontroller.dao.Dao;
36  import org.musicontroller.service.FileUtils;
37  import org.varienaja.comments.Comment;
38  import org.varienaja.comments.CommentService;
39  import org.varienaja.util.DateTools;
40  
41  
42  /**
43   * The Indexer periodically scans the database, and updates the Lucene-index.
44   * The 'changed'-field of all Documents, contains the 'changed'-property of the
45   * object. This is used to see if the object has been changed since the last indexation.
46   * Only changed objects are updated in the index to keep disk-io as low as possible. This
47   * also means that we have to be sure that the changed-property of all indexed objects
48   * really is what is says!
49   * 
50   * @author Varienaja
51   * @version $Id: Indexer.java,v 1.1 2010/03/16 18:55:42 varienaja Exp $
52   */
53  public class Indexer {
54  	private static final Logger log = Logger.getLogger(Indexer.class);
55  	
56  	private static final String DATEPATTERN = "yyyyMMddHHmmssSSS";
57  	
58  	private long songCount;
59  	private long bandCount;
60  	private long playlistCount;
61  	private long artistCount;
62  	private long instrumentCount;
63  	private long keywordCount;
64  	private Dao dao;
65  	private CommentService _reviewService;
66  	private Date _lastUpdate;
67  	
68  	private IndexWriter getIndexWriter() {
69  		IndexWriter writer = null;
70  		try {
71  			writer = new IndexWriter(FileUtils.getIndexdir(), new StandardAnalyzer(), false); //Try to open an existing index
72  		} catch (IOException e) {
73  			log.debug("Error creating searchindex "+e+" Trying force-create.");
74  			if (writer==null) {
75  				try {
76  					writer = new IndexWriter(FileUtils.getIndexdir(), new StandardAnalyzer(), true); //If it fails: create a new one
77  				} catch (IOException e2) {
78  					log.error("Error creating searchindex: "+e);
79  					
80  				}
81  			}
82  		}
83  		return writer;
84  	}
85  	
86  	private IndexReader getIndexReader() {
87  		IndexReader reader = null;
88  		try {
89  			reader = IndexReader.open(FileUtils.getIndexdir());
90  		} catch (IOException e) {
91  			log.error("Error reading searchindex: "+e);
92  		}
93  		return reader;
94  	}
95  	
96  	public Indexer(Dao dao) {
97  		songCount=0L;
98  		bandCount=0L;
99  		playlistCount=0L;
100 		artistCount=0L;
101 		instrumentCount=0L;
102 		keywordCount=0L;
103 		this.dao = dao;
104 		_reviewService = new CommentService();
105 		_lastUpdate = null;
106 	}
107 	
108 	/**
109 	 * Reindexes all items. Reindexing is done lazily, items that did not change
110 	 * since the last index-update are not processed.
111 	 */
112 	public synchronized void reindexAll() {
113 		Date now = new Date();
114 		log.debug("Indexing started at: "+now);
115 		Map<String,Object> params = new HashMap<String,Object>();
116 		params.put("lastUpdate",getLastUpdate());
117 		
118 		reindexSongs(params);
119 		reindexArtists(params);
120 		reindexBands(params);
121 		reindexInstruments(params);
122 		reindexKeywords(params);
123 		reindexPlaylists(params);
124 		
125 		storeLastUpdate(now);
126 		log.debug("Optimizing searchindex...");
127 		optimizeIndex();
128 		log.debug("Searchindex optimized.");
129 		log.debug("Indexing finished");
130 	}
131 
132 	/**
133 	 * Reindexes all Songs. Only Songs that changed after the parameter are regarded.
134 	 * @param params Map of Strings to Objects. Must contain an entry for "lastUpdate"--&gt;some Date
135 	 */
136 	@SuppressWarnings("unchecked")
137 	private void reindexSongs(Map<String, Object> params) {
138 		Iterator<Long> it;
139 		List<Long> songIDs = dao.search("select s.id from Song s where s.changed>:lastUpdate", params, 0);
140 		it = songIDs.iterator();
141 		while (it.hasNext()) {
142 			long songid = it.next();
143 			Song song = dao.getSongById(songid);
144 			log.debug("Reindexing song: " + song);
145 			reindexSong(song);
146 			it.remove();
147 		}
148 		log.debug("Songs indexed: "+songCount);
149 	}
150 
151 	/**
152 	 * Reindexes all Playlists. Only Playlists that changed after the parameter are regarded.
153 	 * @param params Map of Strings to Objects. Must contain an entry for "lastUpdate"--&gt;some Date
154 	 */
155 	@SuppressWarnings("unchecked")
156 	private void reindexPlaylists(Map<String, Object> params) {
157 		Iterator<Long> it;
158 		List<Long> playlistIDs = dao.search("select p.id from Playlist p where p.changed>:lastUpdate", params, 0);
159 		it = playlistIDs.iterator();
160 		while (it.hasNext()) {
161 			long playlistid = it.next();
162 			Playlist playlist = dao.getPlaylistById(playlistid, null);
163 			reindexPlaylist(playlist);
164 			it.remove();
165 		}
166 		log.debug("Playlists indexed: "+playlistCount);
167 	}
168 
169 	/**
170 	 * Reindexes all Keywords. Only Keywords that changed after the parameter are regarded.
171 	 * @param params Map of Strings to Objects. Must contain an entry for "lastUpdate"--&gt;some Date
172 	 */
173 	@SuppressWarnings("unchecked")
174 	private void reindexKeywords(Map<String, Object> params) {
175 		Iterator<Long> it;
176 		List<Long> keywordIDs = dao.search("select k.id from Keyword k where k.changed>:lastUpdate", params, 0);
177 		it = keywordIDs.iterator();
178 		while (it.hasNext()) {
179 			long keywordid = it.next();
180 			Keyword keyword = dao.getKeywordById(keywordid);
181 			reindexKeyword(keyword);
182 			it.remove();
183 		}
184 		log.debug("Keywords indexed: "+keywordCount);
185 	}
186 
187 	/**
188 	 * Reindexes all Instruments. Only Instruments that changed after the parameter are regarded.
189 	 * @param params Map of Strings to Objects. Must contain an entry for "lastUpdate"--&gt;some Date
190 	 */
191 	@SuppressWarnings("unchecked")
192 	private void reindexInstruments(Map<String, Object> params) {
193 		Iterator<Long> it;
194 		List<Long> instrumentIDs = dao.search("select i.id from Instrument i where i.changed>:lastUpdate", params, 0);
195 		it = instrumentIDs.iterator();
196 		while (it.hasNext()) {
197 			long instrumentid = it.next();
198 			Instrument instrument = dao.getInstrumentById(instrumentid);
199 			reindexInstrument(instrument);
200 			it.remove();
201 		}
202 		log.debug("Instruments indexed: "+instrumentCount);
203 	}
204 
205 	/**
206 	 * Reindexes all Bands. Only Bands that changed after the parameter are regarded.
207 	 * @param params Map of Strings to Objects. Must contain an entry for "lastUpdate"--&gt;some Date
208 	 */
209 	@SuppressWarnings("unchecked")
210 	private void reindexBands(Map<String, Object> params) {
211 		Iterator<Long> it;
212 		List<Long> bandIDs = dao.search("select b.id from Band b where b.changed>:lastUpdate", params, 0);
213 		it = bandIDs.iterator();
214 		while (it.hasNext()) {
215 			long bandid = it.next();
216 			Band band = dao.getBandById(bandid);
217 			reindexBand(band);
218 			it.remove();
219 		}
220 		log.debug("Bands indexed: "+bandCount);
221 	}
222 
223 	/**
224 	 * Reindexes all Artists. Only Artists that changed after the parameter are regarded.
225 	 * @param params Map of Strings to Objects. Must contain an entry for "lastUpdate"--&gt;some Date
226 	 */
227 	@SuppressWarnings("unchecked")
228 	private void reindexArtists(Map<String, Object> params) {
229 		Iterator<Long> it;
230 		List<Long> artistIDs = dao.search("select a.id from Artist a where a.changed>:lastUpdate", params, 0);
231 		it = artistIDs.iterator();
232 		while (it.hasNext()) {
233 			long artistid = it.next();
234 			Artist artist = dao.getArtistById(artistid);
235 			reindexArtist(artist);
236 			it.remove();
237 		}
238 		log.debug("Artists indexed: "+artistCount);
239 	}
240 
241 	// FIXME BIG MEMORY-LEAK IN THIS METHOD
242 	public Document reindexSong(Song song) {
243 		String id = Long.toString(song.getId());
244 		Document doc = null;
245 		if (song.getChanged().after(getLastUpdate())) {
246 			doc = new Document();
247 	
248 			StringBuilder sb = new StringBuilder();
249 			
250 			Band band = song.getBand();
251 			sb.append(band.getName());
252 			sb.append(" ");
253 			
254 			for (Keyword keyword : song.getKeywordbag().getKeywords()) {
255 				sb.append(keyword.getName());
256 				sb.append(" ");
257 			}
258 			
259 			if (song.getAibag()!=null) {
260 				for (AIRelation air : song.getAibag().getRelations()) {
261 					Artist artist = dao.getArtistById(air.getArtist_id());
262 					sb.append(artist.getFormattedName());
263 					sb.append(" ");
264 					
265 					Instrument instrument = dao.getInstrumentById(air.getInstrument_id());
266 					sb.append(instrument.getName());
267 					sb.append(" ");
268 				}
269 			}
270 			
271 			Calendar c = new GregorianCalendar();
272 			c.add(Calendar.YEAR,-1);
273 			Date oneyearago = c.getTime();
274 			
275 	    	Map<String,Integer> map = new HashMap<String,Integer>();
276 	    	for (Event ev : song.getEvents()) {
277 	    		if (ev.getEventkind()==Event.played && ev.getUser()!=null && ev.getMoment().after(oneyearago)) {
278 	    			
279 	    			String username = ev.getUser().getLoginname();
280 	    			Integer i = map.get(username);
281 	    			if (i==null) {
282 	    				i = 1;
283 	        			map.put(username,i);
284 	    			} else {
285 	    				map.put(username,i+1);
286 	    			}
287 	       		}
288 	    	}
289 	    	
290 	    	for (Entry<String,Integer> entry : map.entrySet()) {
291 	        	doc.add(new Field("user_"+entry.getKey(),entry.getValue().toString(),Field.Store.YES,Field.Index.UN_TOKENIZED));
292 	    	}
293 	    	
294 	    	for (Contract_PS pscontract : song.getPlaylists()) {
295 	    		Playlist playlist = pscontract.getPlaylist();
296 	    		sb.append(playlist.getName());
297 	    		sb.append(" ");
298 	    	}
299 			doc.add(new Field("contents",sb.toString(),Field.Store.YES,Field.Index.TOKENIZED));
300 	
301 	   		updateDocument(doc,song.getName(),"song",id);
302 		}
303     	dao.evict(song);
304 		songCount++;
305     	return doc;
306 	}
307 	
308 	public Document reindexBand(Band band) {
309 		//TODO put an eventcount per user in the document!
310 		
311 		String id = Long.toString(band.getId());
312 		Document doc = null;
313 		
314 		if (band.getChanged().after(getLastUpdate())) {
315 			doc = new Document();
316 	
317 			StringBuilder sb = new StringBuilder();
318 			for (Contract_BA bac : band.getArtists()) {
319 				Artist artist = bac.getArtist();
320 				sb.append(artist.getFormattedName());
321 				sb.append(" ");
322 			}
323 			
324 			Playlist playlist = dao.songsByBand(band.getId());
325 	    	for (Contract_PS pscontract : playlist.getSongs()) {
326 	    		Song song = pscontract.getSong();
327 				sb.append(song.getName());
328 				sb.append(" ");
329 	    	}
330 	    	doc.add(new Field("contents",sb.toString(),Field.Store.YES,Field.Index.TOKENIZED));
331 			
332 	   		updateDocument(doc,band.getName(),"band",id);
333 		}
334     	dao.evict(band);
335     	bandCount++;
336 		return doc;
337 	}
338 
339 	public Document reindexArtist(Artist artist) {
340 		String id = Long.toString(artist.getId());
341 		Document doc = null;
342 		if (artist.getChanged().after(getLastUpdate())) {
343 			doc = new Document();
344 	   		updateDocument(doc,artist.getFormattedName(),"artist",id);
345 		}
346     	dao.evict(artist);
347     	artistCount++;
348 		return doc;
349 	}
350 
351 	public Document reindexInstrument(Instrument instrument) {
352 		String id = Long.toString(instrument.getId());
353 		Document doc = null;
354 		if (instrument.getChanged().after(getLastUpdate())) {
355 			doc = new Document();
356 			updateDocument(doc,instrument.getName(),"instrument",id);
357 		}
358     	dao.evict(instrument);
359     	instrumentCount++;
360 		return doc;
361 	}
362 
363 	public Document reindexKeyword(Keyword keyword) {
364 		String id = Long.toString(keyword.getId());
365 		Document doc = null;
366 		
367 		if (keyword.getChanged().after(getLastUpdate())) {
368 			doc = new Document();
369 			updateDocument(doc,keyword.getName(),"keyword",id);
370 		}
371     	dao.evict(keyword);
372     	keywordCount++;
373 		return doc;
374 	}
375 
376 	public Document reindexPlaylist(Playlist playlist) {
377 		//TODO put an eventcount per user in the document!
378 		String id = Long.toString(playlist.getId());
379 		Document doc = null;
380 
381 		if (playlist.getChanged().after(getLastUpdate())) {
382 			doc = new Document();
383 			StringBuilder sb = new StringBuilder();
384 		    for (Contract_PS pscontract : playlist.getSongs()) {
385 		    	Song song = pscontract.getSong();
386 		    	sb.append(song.getName());
387 		    	sb.append(" ");
388 		    }
389 		    for (Comment review : _reviewService.getComments(playlist)) {
390 		    	sb.append(review.toString());
391 		    }
392 		    doc.add(new Field("contents",sb.toString(),Field.Store.YES,Field.Index.TOKENIZED));
393 			
394 		   	updateDocument(doc,playlist.getName(),"playlist",id);
395 		}
396     	dao.evict(playlist);
397     	playlistCount++;
398 		return doc;
399 	}
400 	
401 	/**
402 	 * Updates a document in the searchindex. If there already is a document in the
403 	 * searchindex identified by the objecttype and objectid, the document is overwritten.
404 	 * Otherwise, an new document is added to the searchindex.
405 	 * @param doc The document to add or update.
406 	 * @param name The name to be used as name-property in the document
407 	 * @param objecttype The description of the type the document is about
408 	 * @param objectid The id of the object the document is about
409 	 * @return Whether the operation succeeded.
410 	 */
411 	private boolean updateDocument(Document doc, String name, String objecttype, String objectid) {
412 		doc.add(new Field("name",name,Field.Store.YES,Field.Index.UN_TOKENIZED));
413 		doc.add(new Field("objecttype",objecttype,Field.Store.YES,Field.Index.UN_TOKENIZED));
414 		doc.add(new Field("objectid",objectid,Field.Store.YES,Field.Index.UN_TOKENIZED));
415 		doc.add(new Field("uniqueid",objecttype+objectid,Field.Store.YES,Field.Index.UN_TOKENIZED));
416 		//use a unique id for documents. The combined objecttype+objectid which was used before
417 		//causes clashes. Sometimes, a Playlist with id 75 is returned instead of Song with that
418 		//id when checking if there already exists a document with that id.
419 		
420 		//As it is very well possible that there's (for instance) a Band with >100 songs,
421 		//we boost the name-field a little, to make sure that you'll find the Band in most
422 		//cases when you provide the bandname in the query.
423 		Field namefield = new Field("contents",name,Field.Store.YES,Field.Index.TOKENIZED);
424 		namefield.setBoost(100);
425 		doc.add(namefield);
426 		
427 		try {
428 			IndexReader reader = getIndexReader();
429 			if (reader!=null) {
430 				Searcher searcher = new IndexSearcher(reader);
431 				
432 			    Query query = MultiFieldQueryParser.parse(new String[]{objecttype+objectid},new String[]{"uniqueid"},new StandardAnalyzer());
433 				Hits hits = searcher.search(query);
434 				if (hits.length()>0) {
435 					Document existing = hits.doc(0);
436 					if ((objecttype+objectid).equals(existing.getField("uniqueid").stringValue())) {
437 						reader.deleteDocument(hits.id(0));
438 					}
439 				}
440 				reader.close();
441 			}
442 			
443     		storeDocument(doc);
444     		return true;
445 		} catch (ParseException pe) {
446 			log.error("Error trying to find existing document: "+pe);
447     	} catch (IOException e) {
448     		log.debug("Error adding document to searchindex: "+e);
449     	}
450 		return false;
451 	}
452 	
453 	/**
454 	 * Searches the index for the existence of a Document of a certain type
455 	 * having a certain id. 
456 	 * TODO Use this in the update procedure as well
457 	 * @param objecttype The object-type
458 	 * @param objectid The id of the object
459 	 * @return The Document, or null if nothing was found.
460 	 */
461 	private Document findDocument(String objecttype, String objectid) {
462 		Document result = null;
463 		try {
464 			IndexReader reader = getIndexReader();
465 			if (reader!=null) {
466 				Searcher searcher = new IndexSearcher(reader);
467 				
468 			    Query query = MultiFieldQueryParser.parse(new String[]{objecttype+objectid},new String[]{"uniqueid"},new StandardAnalyzer());
469 				Hits hits = searcher.search(query);
470 				if (hits.length()>0) {
471 					result = hits.doc(0);
472 					if (!(objecttype+objectid).equals(result.getField("uniqueid").stringValue())) {
473 						result = null;
474 					}
475 				}
476 				reader.close();
477 			}
478 		} catch (ParseException pe) {
479 			log.error("Error trying to find existing document: "+pe);
480     	} catch (IOException e) {
481 			log.error("Error trying to find existing document: "+e);
482     	}
483     	return result;
484 	}
485 	
486 	/**
487 	 * Returns the moment at which the index was last updated.
488 	 * @return The String representation of this moment, or null if the index
489 	 * has never been updated.
490 	 */
491 	private Date getLastUpdate() {
492 		if (_lastUpdate == null) {
493 			_lastUpdate = new Date(0); // Long, long ago.
494 			
495 			Document doc = findDocument("lastupdate","0");
496 			if  (doc!=null) {
497 				Field field = doc.getField("changed");
498 				if (field != null) {
499 					_lastUpdate = DateTools.parseDate(field.stringValue(),DATEPATTERN);
500 				}
501 				log.debug("Last update of the searchindex: " + _lastUpdate);
502 			}
503 		}			
504 		return _lastUpdate;
505 	}
506 		
507 	private void storeLastUpdate(Date now) {
508 		Document doc = new Document();
509 		String fmtDate = DateTools.formatDate(now,DATEPATTERN);
510 		doc.add(new Field("changed",fmtDate,Field.Store.YES,Field.Index.UN_TOKENIZED));
511 		updateDocument(doc,"","lastupdate","0");
512 	}
513 	
514 	private void storeDocument(Document doc) {
515 		try {
516 			IndexWriter writer = getIndexWriter();
517 			writer.addDocument(doc);
518 			writer.close();
519 		} catch (IOException e) {
520 			log.error("Error storing document: "+e);
521 		}
522 	}
523 	
524 	private void optimizeIndex() {
525 		try {
526 			IndexWriter writer = getIndexWriter();
527 			writer.optimize();
528 			writer.close();
529 		} catch (IOException e) {
530 			log.error("Error optimizing index: "+e);
531 		}
532 	}
533 	
534 }