View Javadoc

1   package org.musicontroller.core.jobs;
2   
3   import java.io.File;
4   import java.io.FileInputStream;
5   import java.io.FileNotFoundException;
6   import java.io.FileOutputStream;
7   import java.io.IOException;
8   import java.io.ObjectInputStream;
9   import java.io.ObjectOutputStream;
10  import java.util.HashSet;
11  import java.util.LinkedList;
12  import java.util.List;
13  import java.util.Set;
14  import java.util.regex.Matcher;
15  import java.util.regex.Pattern;
16  
17  import org.apache.log4j.Logger;
18  import org.musicontroller.importer.ImporterException;
19  import org.musicontroller.importer.MP3InspectorJID3Lib;
20  import org.musicontroller.importer.MediafileInspector;
21  import org.musicontroller.importer.MusicArchiveBean;
22  import org.musicontroller.importer.MusicArchiveEntryBean;
23  import org.musicontroller.service.FileUtils;
24  import org.varienaja.util.FileOperations;
25  import org.varienaja.util.wikipedia.WikipediaSearcher;
26  
27  /**
28   * <p>
29   * This Job is responsible for extracting and finding metadata from music-files. 
30   * Job that must be called periodically. When this Job is run, look in the
31   * Unpack-directory for work to do. When there were files to be inspected, they
32   * are inspected one by one. Finally, a new MusicArchiveBean is added to the
33   * internal collection of beans in this class.</p>
34   * 
35   * <p>Every once in a while, this Collection should be read and processed.</p>
36   * 
37   * TODO The 'unpack'-directory should be periodically scanned. Empty directories can be deleted.
38   * 
39   * @author Varienaja
40   * @version $Id: MetadataExtractJob.java,v 1.1 2010/03/16 18:55:42 varienaja Exp $
41   */
42  public class MetadataExtractJob {
43  	private static final Logger log = Logger.getLogger(MetadataExtractJob.class);
44  
45  	private static final String INSPECTED = "inspected";
46  	
47  	/**
48  	 * All MusicArchives that have been inspected.
49  	 * This data is persistent.
50  	 */
51  	private static List<MusicArchiveBean> _inspected;
52  
53  	/**
54  	 * @return The List of inspected songs in the form of MusicArchiveBeans.
55  	 */
56  	public static synchronized List<MusicArchiveBean> getMusicArchiveBeanList() {
57  		return _inspected;
58  	}
59  	
60  	/**
61  	 * Removes a music archive bean from (persistent) storage. All inspected
62  	 * music-files are removed too!
63  	 * 
64  	 * <p>Only directories within the unpack directory can be removed. Path
65  	 * specifications containing <tt>..</tt> are forbidden.
66  	 * 
67  	 * @param archiveBean The archive bean to be removed.
68  	 */
69  	public static synchronized void removeMusicArchiveBean(MusicArchiveBean archiveBean) throws ImporterException {
70  		if (archiveBean==null) return;
71  
72  		for(MusicArchiveEntryBean entryBean: archiveBean.getEntrySet()) {
73  			String filename = entryBean.getEntryName();
74  			if(directoryValidForRemoval(filename)) {
75  				int sepPos = filename.lastIndexOf(File.separator);
76  				if(sepPos>-1) {
77  					String dir = filename.substring(0,sepPos);
78  					if (FileOperations.deleteDir(new File(dir))) {
79  						_inspected.remove(archiveBean);
80  						storeInspectedItems();
81  					}
82  				}
83  			}
84  		}
85  	}
86  	
87  	/**
88  	 * Decides if the directory can be deleted or not. Only subdirectories of the
89  	 * MusiController <tt>upload</tt> and <tt>unpack</tt> areas are valid.
90  	 * Path specifications containing <tt>..</tt> are forbidden.
91  	 * 
92  	 * @param dir The name of the directory to test.
93  	 * @return True if the directory can be deleted, false otherwise.
94  	 */
95  	private static boolean directoryValidForRemoval(String dir) {
96  		if(dir==null || dir.length()<1) {
97  			return false;
98  		}
99  		// The directory name should match the full path of the unpack
100 		// directory plus a directory name within the unpack directory to be valid.
101 		StringBuilder regExpSb = new StringBuilder();
102 		regExpSb.append("^");
103 		regExpSb.append(FileUtils.getUnpackdir());
104 		regExpSb.append(File.separator);
105 		regExpSb.append(".+$");
106 		String regExp = regExpSb.toString();
107 		if(dir.matches(regExp)) {
108 			return dir.indexOf("..")<0;
109 		}
110 		return false;
111 	}
112 
113 	protected static synchronized File getPersistentStorage() {
114 		return new File(FileUtils.getIndexdir()+File.separator+"beans.dat");
115 	}
116 	
117 	/**
118 	 * Creates a new MetadataExtractJob object. On object-creation, the _inspected-property
119 	 * is initialized from persistent storage.
120 	 */
121 	public MetadataExtractJob() {
122 		initInspectedArchives();
123 	}
124 
125 	/**
126 	 * Reads already inspected Archives from persistent storage.
127 	 */
128 	@SuppressWarnings("unchecked")
129 	private synchronized void initInspectedArchives() {
130 		if (_inspected==null) {
131 			_inspected = new LinkedList<MusicArchiveBean>();
132 		
133 			File storage = getPersistentStorage();
134 			if (storage.exists()) {
135 				try {
136 					ObjectInputStream in = new ObjectInputStream(new FileInputStream(storage));
137 					_inspected = (List<MusicArchiveBean>) in.readObject();
138 					in.close();
139 					log.debug("Recovered "+_inspected.size()+" beans from persistent storage");
140 				} catch (FileNotFoundException e) {
141 					log.error("Error opening beanstorage: "+e);
142 				} catch (IOException e) {
143 					log.error("Error reading from beanstorage: "+e);
144 				} catch (ClassNotFoundException e) {
145 					log.error("Error decoding beanstorage: "+e);
146 				}
147 			}
148 		} //else: already initialized
149 	}
150 	
151 	/**
152 	 * Call this method to execute the job: scan for work to do, and work!
153 	 * @return The amount of files processed
154 	 */
155 	public int execute() {
156 		int result = 0;
157 		File unpack = new File(FileUtils.getUnpackdir());
158 		String[] files = unpack.list();
159 		if (files!=null) {
160 			for (String fileName : files) {
161 				if (fileName.startsWith(ImportJob.READY_FOR_INSPECTION)) {
162 					String fn = FileUtils.getUnpackdir()+File.separator+fileName;
163 					result += inspectDirectory(fn);
164 
165 					//Rename after inspection
166 					File dirFile = new File(fn);
167 					String renameddirname = FileOperations.createUniqueFilename(
168 							FileUtils.getUnpackdir()+File.separator,INSPECTED);
169 					File renameddir = new File(renameddirname);
170 					if (dirFile.renameTo(renameddir)) {
171 						fn += File.separator; //To make sure we're not renaming false matching files
172 						renameddirname += File.separator;
173 						
174 						//Loop through all Beans, and see that the filesystem-links are adjusted to the new location.
175 						for (MusicArchiveBean mab : getMusicArchiveBeanList()) {
176 							for (MusicArchiveEntryBean maeb : mab.getEntrySet()) {
177 								String entryname = maeb.getEntryName();
178 								if (entryname.startsWith(fn)) {
179 									// Escape all '(' and ')' chars in the file name.
180 									String escapedFileName = fn.replaceAll("\\(", "\\\\(").replaceAll("\\)", "\\\\)");									
181 									String replaced = escapedFileName.replaceAll("\\+", "\\\\+");
182 									String newName = entryname.replaceFirst(replaced,renameddirname);
183 									maeb.setEntryName(newName);
184 								}
185 							}
186 						}
187 					} else {
188 						log.error("Error while renaming directory: "+fn+" to: "+renameddirname);
189 					}
190 					storeInspectedItems();
191 				}
192 			}
193 		}
194 		
195 		return result;
196 	}
197 	
198 	/**
199 	 * Inspects a directory recursively. It will try to find metadata for
200 	 * all files it finds.
201 	 * @param directory The dirctory to process
202 	 * @return The number of files inspected
203 	 */
204 	private int inspectDirectory(String directory) {
205 		log.debug("inspection of archive "+directory+" started.");
206 		int result = 0;
207 		File dirFile = new File(directory);
208 		String[] files = dirFile.list();
209 		if (files!=null) {
210 			MusicArchiveBean archiveBean = new MusicArchiveBean();
211 			for (String fileName : files) {
212 				if (!fileName.startsWith(".")) {
213 					String fullname = directory+File.separator+fileName;
214 					File inspect = new File(fullname);
215 					if (inspect.isDirectory()) {
216 						result += inspectDirectory(fullname);
217 					} else {
218 						MusicArchiveEntryBean bean = inspectFile(fullname,inspect);
219 						if (bean!=null) {
220 							archiveBean.addEntry(bean);
221 							result++;
222 						}
223 					}
224 				}
225 			}
226 			
227 			if (archiveBean.getEntrySet().size()>0) {
228 				String name = guessName(archiveBean);
229 				archiveBean.setArchiveName(name);
230 				getMusicArchiveBeanList().add(archiveBean);
231 			}
232 		}
233 		log.debug("Inspection of archive "+directory+" finished, found "+result+" tracks.");
234 		return result;
235 	}
236 	
237 	/**
238 	 * Tries to generate a human-readable name for a music archive. It will 
239 	 * return something like "Bandname - Playlistname". If there are several bands
240 	 * or several playlists in the music archive, the bandname will be "Various" and
241 	 * the playlistname will be "Various". If no information whatsoever could be found,
242 	 * the result will be "Unknown".
243 	 * @param archiveBean The music archive.
244 	 * @return A human-readable name.
245 	 */
246 	protected String guessName(MusicArchiveBean archiveBean) {
247 		String bandname = "Unknown";
248 		String albumname = "Unknown";
249 		
250 		String forbiddenCharacters = "[^ -_a-zA-Z0-9]";
251 		
252 		Set<String> bands = new HashSet<String>();
253 		Set<String> albums = new HashSet<String>();
254 		if (archiveBean!=null) {
255 			for (MusicArchiveEntryBean entry : archiveBean.getEntrySet()) {
256 				// Remove any weird characters from the band name and playlist name.
257 				if(entry.getBandName()!=null) {
258 					bands.add(entry.getBandName().replaceAll(forbiddenCharacters,""));
259 				}
260 				if(entry.getPlaylistName()!=null) {
261 					albums.add(entry.getPlaylistName().replaceAll(forbiddenCharacters,""));
262 				}
263 			}
264 		}
265 		
266 		if (bands.size()>0) {
267 			if (bands.size()==1) {
268 				bandname = bands.iterator().next();
269 			} else {
270 				bandname = "Various";
271 			}
272 		}
273 		if (albums.size()>0) {
274 			if (albums.size()==1) {
275 				albumname = albums.iterator().next();
276 			} else {
277 				albumname = "Various";
278 			}
279 		}
280 				
281 		return bandname + " - " + albumname;
282 	}
283 	
284 	/**
285 	 * Extracts metadata from a file into a MusicArchiveEntryBean
286 	 * @param filename The filename
287 	 * @param toInspect The File itself
288 	 * @return A MusicArchiveEntryBean containing metadata
289 	 */
290 	private MusicArchiveEntryBean inspectFile(String filename, File toInspect) {
291 		log.debug("Analyzing: "+filename);
292 		MP3InspectorJID3Lib inspector;
293 		try {
294 			inspector = new MP3InspectorJID3Lib(toInspect);
295 			MusicArchiveEntryBean entryBean = inspectEntry(inspector);
296 			entryBean.setEntryName(filename);
297 			
298 			/* TODO Check the following:
299 			 *  1. Does the playlist of this bean already exist? --> set playlist-exists-bit
300 			 *  2. Does the band/song-combination already exists? --> set song-exists-bit
301 			 *  
302 			 *  In the GUI, let users decide for the first: add to playlist OR create new playlist
303 			 *                               for the second: don't import song but DO import the new playlist OR import the song
304 			 */
305 			
306 			return entryBean;
307 		} catch (IOException e) {
308 			log.error("Error analyzing file: "+e);
309 		} catch (ImporterException e) {
310 			log.error("Error analyzing file: "+e);
311 		}
312 		
313 		return null;
314 	}
315 	
316 	/**
317 	 * Inspect the contents of a single entry. Return the
318 	 * metadata information in a MusicArchiveEntryBean. 
319 	 * <p>Translates all illegal characters in the song name and band name into
320 	 * undescore characters "_".
321 	 * <p>If the band name is empty or equal to "Various", this method tries to find
322 	 * the band name in the song name tag by search for the separator string " / ".
323 	 * If found, the first part will be the band name and the second part the song name.
324 	 * <p>This method will try to set the song index from the MP3 file name if the song
325 	 * index could not be found inside the MP3 file.
326 	 * @param inspector The MP3Inspector file. This object knows about the structure
327 	 * 					of MP3 files and can retrieve metadata information from it.
328 	 * @return A MusicArchiveEntryBean with the metadata information of this entry
329 	 * 		   in the music archive.
330 	 */
331 	private MusicArchiveEntryBean inspectEntry(MediafileInspector inspector) {
332 		MusicArchiveEntryBean bean = new MusicArchiveEntryBean();
333 		bean.setPlaylistName(translate(inspector.getPlaylistname()));
334 		bean.setBandName(translate(inspector.getBandname()));
335 		bean.setSongName(translate(inspector.getSongname()));
336 		bean.setSongLength(inspector.getSonglength());
337 		bean.setSongIndex(inspector.getPlaylistrow());
338 		// TODO The Heuristics should move to the jtag-library!
339 		// Heuristic : If the bandname is empty or "Various" try to
340 		//       find the band name in the song title.
341 		if (bean.getBandName()==null || bean.getBandName().equals("")
342 				|| bean.getBandName().equalsIgnoreCase("Various")) {
343 			if (bean.getSongName()!=null && bean.getSongName().indexOf(" / ")>0) {
344 				int index = bean.getSongName().indexOf(" / ");
345 				String bandname = bean.getSongName().substring(0, index);
346 				String songname = bean.getSongName().substring(index+3);
347 				bean.setBandName(bandname);
348 				bean.setSongName(songname);
349 			} 
350 		}
351 		// Heuristic: If the File name matches "03 - Dress Rehearsal Rag.mp3", and
352 		// The Song name is still empty, then set the song name from the file name.
353 		if (bean.getSongName()==null || bean.getSongName().length()<1) {
354 			Pattern pattern = Pattern.compile("^\\d+\\s+-(.+).[Mm][Pp]3");
355 			Matcher matcher = pattern.matcher(inspector.getFile().getName());
356 			if (matcher.find()) {
357 				String songName = matcher.group(1).trim();
358 				bean.setSongName(songName);
359 			}
360 		}
361 		// Heuristic: If the Song Index is less than 1, try to determine the song
362 		// index from the MP3 file name. Most often, the file name starts with the
363 		// song index, like: "01 Protection.mp3".
364 		if (bean.getSongIndex()<1) {
365 			String songname = inspector.getFile().getName();
366 			Pattern pattern = Pattern.compile("^\\d+\\s+");
367 			Matcher matcher = pattern.matcher(songname);
368 			if (matcher.find()) {
369 				String songIndexMatch = matcher.group().trim();
370 				int songIndex = Integer.parseInt(songIndexMatch);
371 				bean.setSongIndex(songIndex);
372 			}
373 		}
374 		Set<String> keyWords = new HashSet<String>();
375 
376 		
377 		/* Don't trust the genre from the read tag, go to wikipedia first
378 		 * and try to find out if they have more precise keywords listed.
379 		 * We'll be mostly interested in the genre's specified on this song's
380 		 * playlist.
381 		 */  
382 		String id3Keyword = inspector.getKeyword();
383 		String[] wikiKeywords = WikipediaSearcher.getKeywords(bean.getBandName(),bean.getPlaylistName());
384 		if (wikiKeywords==null || wikiKeywords.length==0) {
385 			keyWords.add(id3Keyword);
386 		} else {
387 			for (String wikiKeyword : wikiKeywords) {
388 				keyWords.add(wikiKeyword);
389 			}
390 		}
391 		bean.setKeywords(keyWords);
392 		
393 		return bean;
394 	}
395 	
396     private String translate(String str) {
397     	//TODO Move to the jtag-library
398     	if (str==null) {
399     		return str;
400     	}
401         // Remove any quotes.
402         str = str.replaceAll("'", "");
403 
404         // Convert non-ascii characters to similar ascii characters.
405         for (char[] chars : _CHAR_MAP) {
406             str = str.replace(chars[0], chars[1]);
407         }
408 
409         return str;
410     }
411 
412     /**
413      * Maps from miscellaneous accented characters to similar-looking ASCII characters.
414      */
415     private final char[][] _CHAR_MAP = {
416        	//TODO Move to the jtag-library
417            {'\u00C0', 'A'}, {'\u00C1', 'A'}, {'\u00C2', 'A'}, {'\u00C3', 'A'}, {'\u00C4', 'A'}, {'\u00C5', 'A'}, {'\u00C6', 'A'},
418            {'\u00C8', 'E'}, {'\u00C9', 'E'}, {'\u00CA', 'E'}, {'\u00CB', 'E'}, {'\u00CC', 'I'}, {'\u00CD', 'I'}, {'\u00CE', 'I'},
419            {'\u00CF', 'I'}, {'\u00D2', 'O'}, {'\u00D3', 'O'}, {'\u00D4', 'O'}, {'\u00D5', 'O'}, {'\u00D6', 'O'}, {'\u00D9', 'U'},
420            {'\u00DA', 'U'}, {'\u00DB', 'U'}, {'\u00DC', 'U'}, {'\u00DF', 'B'}, {'\u00E0', 'a'}, {'\u00E1', 'a'}, {'\u00E2', 'a'},
421            {'\u00E3', 'a'}, {'\u00E4', 'a'}, {'\u00E5', 'a'}, {'\u00E6', 'a'}, {'\u00E7', 'c'}, {'\u00E8', 'e'}, {'\u00E9', 'e'},
422            {'\u00EA', 'e'}, {'\u00EB', 'e'}, {'\u00EC', 'i'}, {'\u00ED', 'i'}, {'\u00EE', 'i'}, {'\u00EF', 'i'}, {'\u00F1', 'n'},
423            {'\u00F2', 'o'}, {'\u00F3', 'o'}, {'\u00F4', 'o'}, {'\u00F5', 'o'}, {'\u00F6', 'o'}, {'\u00F8', 'o'}, {'\u00F9', 'u'},
424            {'\u00FA', 'u'}, {'\u00FB', 'u'}, {'\u00FC', 'u'},
425     };
426     
427     /**
428      * Stores the content of the _inspected-property on disk, to prevent dataloss whenever the
429      * application shuts down or crashes. 
430      * @return Whether the operation succeeded or not.
431      */
432     private static synchronized boolean storeInspectedItems() {
433 		File storage = getPersistentStorage();
434 		if (_inspected.size()==0) {
435 			return (storage.exists()) ? storage.delete() : true;
436 		} else {
437 			try {
438 				ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(storage));
439 				out.writeObject(_inspected);
440 				out.flush();
441 				out.close();
442 				return true;
443 			} catch (FileNotFoundException e) {
444 				log.error("Error opening beanstorage: "+e);
445 			} catch (IOException e) {
446 				log.error("Error writing to beanstorage: "+e);
447 			}
448 			return false;
449 		}
450     }
451 
452 
453 }