Collections: Collection.py

File Collection.py, 16.9 KB (added by njaal, 22 months ago)
Line 
1# Author: Njaal Borch <Njaal.Borch@norut.no>
2
3from pysqlite2 import dbapi2 as sqlite
4import re
5import os.path
6
7def sql_escape(string):
8
9    """Escape a string for use with SQL
10   
11    """
12
13    # escape_string screws up if any fancy letters are present
14    return string.replace("'", "\\'")
15    # the mysqldb stuff is on drugs, just escape this by my own
16    #for char in ["\'", "\"", "(", ")", "[", "]"]:
17    #    string = string.replace(char, "\\"+char)
18    return string
19
20class CollectionDB:
21   
22    """A CollectionDB is a class to bundle resources together.  It is
23    based on a sqlite backend, and can serialize to XML (RSS video
24    feed).  """
25
26   
27    def __init__(self, db_name="Collections.data"):
28
29        """Create a new collection DB.  db_name can be ':memory:' to
30        create a in-memory db only, or a filename to persist it.
31       
32        """
33       
34        self.db = sqlite.connect(db_name, isolation_level=None)
35        self.prepare()
36
37    def cursor(self):
38        return self.db.cursor()
39   
40    def prepare(self):
41
42        """Prepare the database - check that all tables
43        exist, or create them if htey dont
44        """
45       
46        cursor = self.db.cursor()
47
48        # Get list of old tables
49        cursor.execute("select tbl_name from sqlite_master where type='table' order by tbl_name")
50       
51        tables = []
52        for row in cursor.fetchall():
53            tables.append(row[0])
54
55        if not "collection" in tables:
56
57            cursor.execute("""
58            create table collection(
59            name TEXT,
60            id BIGINT UNSIGNED PRIMARY KEY,
61            description TEXT,
62            type SMALLINT UNSIGNED)
63            """)
64            cursor.execute("CREATE INDEX c_name_idx ON collection(name)")
65           
66
67        if not "collection_entry" in tables:
68
69            cursor.execute("""
70            create table collection_entry(
71            name TEXT,
72            id TEXT,
73            description TEXT,
74            url TEXT NOT NULL,
75            type SMALLINT UNSIGNED,
76            major INT UNSIGNED DEFAULT 0,
77            minor INT UNSIGNED DEFAULT 0,
78            duplicate INT UNSIGNED DEFAULT 0,
79            quality TEXT DEFAULT '',
80            collection_id BIGINT UNSIGNED NOT NULL,
81            time_stamp TEXT DEFAULT NULL,
82            size BIGINT UNSIGNED DEFAULT 0,
83            PRIMARY KEY (name, collection_id),
84            FOREIGN KEY (collection_id) REFERENCES collection(id)
85            )
86            """)
87            # Also create an index on the cursor id?
88            # This might be automatic as it is a foreign key?
89            #cursor.execute("CREATE INDEX ce_cid_idx ON collection_entry(cursor_id)")
90               
91
92    def clear(self):
93       
94        """Clear the entire database!
95       
96        """
97        cursor = self.db.cursor()
98        cursor.execute("DELETE FROM collection_entry")
99        cursor.execute("DELETE FROM collection")
100       
101    def get_names(self):
102
103        """Return a list of known collections
104       
105        """
106
107        # TODO: Implement sorting on size/something
108        cursor = self.db.cursor()
109        SQL = "SELECT name FROM collection"
110        cursor.execute(SQL)
111
112        names = []
113        for row in cursor.fetchall():
114            names.append(row[0].encode("utf-8", "replace"))
115        return names
116   
117    def get_collection(self, name=None, id=None):
118
119        """Get a collection object for a given collection.  Can either
120        find the collection based on the name or the id.  If both are
121        ignored, an exception is thrown.  If no matching collection
122        was found, a "NotFoundException" is thrown
123
124        """
125
126        if not name and not id:
127            raise Exception("Missing parameter")
128       
129        SQL = "SELECT id FROM collection WHERE "
130        if name:
131            SQL += "name='%s'"%sql_escape(self.clean_up_string(name, fixCaps=True))
132        else:
133            SQL += "id=%d"%id
134
135        cursor = self.db.cursor()
136        cursor.execute(SQL)
137        row = cursor.fetchone()
138        if not row:
139            raise NotFoundException()
140
141        return Collection(self, row[0])
142
143    def remove_collection(self, name=None, id=None):
144       
145        """Remove a collection object for a given collection.  Can either
146        find the collection based on the name or the id.  If both are
147        ignored, an exception is thrown.  If no matching collection
148        was found, a "NotFoundException" is thrown
149
150        """
151
152        if not name and not id:
153            raise Exception("Missing parameter")
154
155        SQL = "DELETE FROM collection WHERE "
156        if name:
157            SQL += "name='%s'"%sql_escape(self.clean_up_string(name, fixCaps=True))
158        else:
159            SQL += "id=%s"%id
160
161        cursor = self.db.cursor()
162        cursor.execute(SQL)
163        if cursor.rowcount < 1:
164            raise NotFoundException()
165       
166    def new_collection(self, name, description, type=0, id=None):
167
168        """Create a new collection.  After a collection has been
169        created (or if it was fetched from the CollectionDB using
170        'get_collection', all update functions on the collection
171        object will propagate to the DB directly. 
172       
173        """
174
175        assert name
176       
177        cursor = self.db.cursor()
178        if not id:
179            # As cursor.lastrowid is not the same as 'id', get it first
180            cursor.execute("SELECT RANDOM()")
181            id = cursor.fetchone()[0]
182
183        SQL = "INSERT INTO collection(name,id,description,type) "\
184              "VALUES ('%s', '%s', '%s', %d)"%\
185              (sql_escape(self.clean_up_string(name, fixCaps=True).encode("utf8", "replace")), id,
186               sql_escape(description.encode("utf8", "replace")), type)
187        cursor.execute(SQL)
188
189        # TODO: Some decent error handling here?
190        if cursor.rowcount == 0:
191            raise Exception("Create failed for unknown reason")
192
193        # Return the collection object
194        return self.get_collection(id=id)
195
196    def clean_up_string(self, str, fixCaps=False):
197
198        """Try to clean up names by removing extra whitespaces, change
199        '.', '_' etc into spaces and so fort.  Returns a cleaned string
200       
201        """
202        for c in ['.', '_', '-']:
203            str = str.replace(c, " ")
204
205        str = str.replace("'", "") # Just remove these, they are annoying
206
207        while str.find("  ") > -1:
208            str = str.replace("  ", " ")
209           
210        if fixCaps:
211            str = str[0].upper() + str[1:].lower()
212
213        return str.strip()
214
215    def _smart_add_make_coll(self, title, major, minor, rest, ext, quality):
216       
217        """Internal function, do not use.  Resolve/create new
218        collection and return the stuff smart_add is supposed to
219        return
220       
221        """
222
223        title = self.clean_up_string(title, fixCaps=True)
224        rest = self.clean_up_string(rest)
225        try:
226            coll = self.get_collection(title)
227        except NotFoundException:
228            # Create a new collection
229            coll = self.new_collection(title, rest)
230            if not coll:
231                raise Exception("Could not smart_add, don't know why")
232
233        major = int(major)
234        minor = int(minor)
235       
236        # Return the required information
237        return (coll, (major, minor, 0, quality)) # TODO: Add type
238       
239    def smart_add(self, name, url):
240
241        """Try to parse the name, and if it is recognized, the guessed
242        correct collection named will be returned together with the
243        necessary information to create the entry
244
245        Returns a touple (collection, (major, minor, type, quality))
246        or raises "NotFoundException" if unable to understand the
247        name.
248       
249        """
250
251        if not (name and url):
252            raise Exception("Require both name or url")
253       
254        # Split the extension off
255        (name, ext) = os.path.splitext(name)
256       
257        # First try to detect the quality
258        quality = ""
259        lname = name.lower()
260        for q in ["720p", "1080i", "1080p", "720i"]:
261            if lname.find(q) > -1:
262                quality = q
263                break
264       
265        # First check the typical "title s1e12 therest"
266        found=False
267        m = re.match("(.*)[sS](\d+)[eE](\d+)(.*)", name)
268        if m:
269            (title, major, minor, rest) = m.groups()
270            return self._smart_add_make_coll(title, major, minor,
271                                             rest, ext, quality)
272
273        # Check "title 1x21 therest"
274        m = re.match("(.*) (\d+)[xX](\d+)(.*)", self.clean_up_string(name))
275        if m:
276            (title, major, minor, rest) = m.groups()
277            return self._smart_add_make_coll(title, major, minor,
278                                             rest, ext, quality)
279
280        # Look for title 0123 therest (season 1, episode 23).  Check
281        # that the collection already exists, as this can give us too
282        # many false positives
283        m = re.match("(.*) (\d\d)(\d\d)(.*)", self.clean_up_string(name))
284        if m:
285            (title, major, minor, rest) = m.groups()
286            title = self.clean_up_string(title, fixCaps=True)
287            # Do we have this collection?
288            try:
289                self.get_collection(title)
290                return self._smart_add_make_coll(title, major, minor,
291                                                 rest, ext, quality)
292            except:
293                # Ignore it, we don't know about it
294                pass
295           
296        m = re.match("(.*) (\d)(\d\d)(.*)", self.clean_up_string(name))
297        if m:
298            (title, major, minor, rest) = m.groups()
299            title = self.clean_up_string(title, fixCaps=True)
300            # Do we have this collection?
301            try:
302                self.get_collection(title)
303                return self._smart_add_make_coll(title, major, minor,
304                                                 rest, ext, quality)
305            except:
306                # Ignore it, we don't know about it
307                pass
308
309        # Title season 3 (complete seasons)
310        # TODO: Also require already existing collection?
311        m = re.match("(.*)season\W*(\d+)(.*)",
312                     self.clean_up_string(name).lower())
313        if m:
314            (title, major, rest) = m.groups()
315            minor=0
316            return self._smart_add_make_coll(title, major, minor,
317                                             rest, ext, quality)
318       
319        raise NotFoundException()
320   
321    def remove_entry_from_all(self, id):
322        """Erase an entry from all collections
323
324        """
325
326        SQL = "DELETE FROM collection_entry WHERE id='%s'"%id
327        cursor = self.db.cursor()
328        print SQL
329        cursor.execute(SQL)
330
331class Collection:
332    """The Collection class is an object describing an actual
333    collection.  It is used to manipulate the collection directly.
334
335    """
336   
337    def __init__(self, db, id):
338
339        """Create a new collection object.  This should never be done
340        directly, but only done by the CollectionDB.  Create a
341        collection by using CollectionDB.new_collectiion()
342
343        Do not use the internal variables directly, but use the set/get
344        functions in order to ensure database persistence.
345       
346        """
347       
348        self.__id = id
349        self.__db = db
350
351    def get_id(self):
352        return self.__id
353   
354    def set_description(self, description):
355        SQL = "UPDATE collection SET description='%s' WHERE id='%s'"%\
356              (sql_escape(description), self.__id)
357        cursor = self.__db.cursor()
358        cursor.execute(SQL)
359        if cursor.rowcount == 0:
360            raise Exception("Update failed") # TODO: Get reason
361       
362    def get_description(self):
363        SQL = "SELECT description FROM collection WHERE id='%s'"%self.__id
364        cursor = self.__db.cursor()
365        cursor.execute(SQL)
366        if cursor.rowcount == 0:
367            raise NotFoundException("Could not find collection %d"%self.__id)
368        return cursor.fetchone()[0]
369   
370    def set_name(self, name):
371        SQL = "UPDATE collection SET name='%s' WHERE id='%s'"%\
372              (sql_escape(self.__db.clean_up_string(name)), self.__id)
373        cursor = self.__db.cursor()
374        cursor.execute(SQL)
375        if cursor.rowcount == 0:
376            raise Exception("Update failed") # TODO: Get reason
377
378    def get_name(self):
379        SQL = "SELECT name FROM collection WHERE id='%s'"%self.__id
380        cursor = self.__db.cursor()
381        cursor.execute(SQL)
382        if cursor.rowcount == 0:
383            raise NotFoundException("Could not find collection %d"%self.__id)
384        return cursor.fetchone()[0]
385
386    def get_entry(self, id=None, name=None):
387
388        """Get an entry of this one (possibly allowing duplicates).
389        Returns a touple
390        (id, name, description, url, major, minor, type, quality, time_stamp, size)
391       
392        """
393       
394        assert (id or name)
395        SQL = "SELECT id, name, description, url, major, minor, "\
396              "type, quality, time_stamp, size FROM collection_entry WHERE "
397        if id:
398            SQL += "id='%s'"%id
399        else:
400            SQL += "name='%s'"%sql_escape(name)
401        cursor = self.__db.cursor()
402        cursor.execute(SQL)
403        row = cursor.fetchone()
404        if not row:
405            raise NotFoundException()
406        (id, name, description, url, major, minor, type, quality, ts, size) = row
407        return (id, name, description, url, major, minor, type, quality, ts, size)
408       
409    def get_entries(self, major=None, minor=None,
410                    quality=None, reverse=False,
411                    dupes=True):
412
413        """Get a list of elements from this collection.  The elements
414        will be returned as a list of touples on the form:
415        (id, name, description, url, major, minor, type, quality, time_stamp, size)
416       
417        If 'reverse' is set to True, the entries will be returned with
418        the highest value on top.
419
420        If dupes is set to False and quality is None, the entry with
421        the highest quality is selected. If all have the same quality,
422        the resource added first will be returned.
423
424        """
425
426        # TODO: Allow other sorts?
427        SQL = "SELECT id, name, description, url, major, minor, type, "\
428              "quality, time_stamp, size FROM collection_entry WHERE "\
429              "collection_id=%s "%self.__id
430       
431        if major:
432            SQL += "AND major=%d "%major
433        if minor:
434            SQL += "AND minor=%d "%minor
435        if quality != None:
436            SQL += "AND quality='%s'"%quality.lower()
437        if reverse:
438            SQL += "ORDER BY major DESC,minor DESC,quality DESC,duplicate"
439        else:
440            SQL += "ORDER BY major,minor,quality DESC,duplicate"
441       
442        cursor = self.__db.cursor()
443        cursor.execute(SQL)
444
445        found = []
446        list = []
447        for (id, name, description, url, major, minor, type, quality, time_stamp, size) in cursor.fetchall():
448            if not dupes:
449                if (major,minor) in found:
450                    continue
451                found.append((major,minor))
452           
453            list.append((id, name, description, url,
454                         major, minor, type, quality, time_stamp, size))
455        return list
456       
457           
458   
459    def add_entry(self, id, name, description, url, major=0, minor=0, type=0, quality=None, time_stamp=None, size=0):
460
461        """Add an element to the collection
462       
463        """
464       
465        if not time_stamp:
466            time_stamp = "NULL"
467        else:
468            time_stamp = "'%s'"%time_stamp
469
470        # First we need to find out if this is a duplicate
471        # TODO: Should lock table here if parallel access can happen!
472        SQL = "SELECT MAX(duplicate) FROM collection_entry "\
473              "WHERE collection_id=%s AND major=%s AND minor=%d"%\
474              (self.__id, major, minor)
475        cursor = self.__db.cursor()
476        cursor.execute(SQL)
477        row = cursor.fetchone()
478        if row[0] == None:
479            dupe = 0
480        else:
481            dupe = row[0] + 1
482
483        if not quality:
484            quality = ""
485        else:
486            quality = quality.lower()
487
488        SQL = "INSERT INTO collection_entry (id, name, description, url, "\
489              "type, major, minor, duplicate, quality, collection_id, "\
490              "time_stamp, size) VALUES "\
491              "('%s', '%s', '%s', '%s', "\
492              "%d, %d, %d, %d, '%s', %d, %s, %d)"%\
493              (sql_escape(id), sql_escape(name),
494               sql_escape(description),
495               sql_escape(url),
496               type, major, minor, dupe, quality, self.__id, time_stamp,
497               size)
498        cursor.execute(SQL)
499        if cursor.rowcount == 0:
500            raise Exception("Insert failed for uknown reason")
501
502        return True
503
504    def remove_entry_by_id(self, id):
505       
506        """Remove an entry from this collection. 
507
508        """
509       
510        SQL = "DELETE FROM collection_entry WHERE id='%s' AND collection_id='%s'"%(id, self.__id)
511        cursor = self.db.cursor()
512        cursor.execute(SQL)
513       
514       
515class NotFoundException(Exception):
516    pass