Mongoengine
基础教程pip
安装
python -m pip install mongoengine
github
源码安装
git clone git://github.com/mongoengine/mongoengine
cd mongoengine
python setup.py install
本地连接
from mongoengine import connect
connection('db')
URL
连接
# 没有账号和密码
connection(host='mongodb://hostname:port/db')
# 带有账户和密码
connection(host='mongdodb://user:password@hostname:port/db')
指定参数连接
connection('db', host='host', post=port, authentication_source='admin/db')
多个数据库连接时,需要指定别名
connection(alias='user-db-alias', db='user-db')
connection(alias='book-db-alias', db='book-db')
connection(alias='users-books-db-alias', db='user-books-db')
class User(Document):
name = StringField()
meta = {'db_alias': 'user-db-alias'}
class Book(Document):
name = StringField()
meta = {'db_alias': 'book-db-alias'}
class AuthorBooks(Document):
author = ReferenceField(User)
book = ReferenceField(Book)
meta = {'db_alias': 'users-books-alias'}
断开连接
disconnct(alias='db') # 如果不加参数,断开默认连接
数据库切换
from mongoengine.context_managers import switch_collection
from mongoengine import *
class User(Document):
name = StringField()
meta = {'db_alias': 'user-db'}
with switch_collection(User, 'archive-user-db') as User:
User(name='Ross').save() # 数据保存在'archive-user-db'中User中
集合切换
from mongoengine.context_managers import switch_collection
from mongoengine import *
class Group(Document):
name = StringField()
Group(name='test').save() # 数据保存在Group集合中
with switch_collection(Group, 'group2000') as Group:
Group(name='hello Group').save() # 数据保存在group2000
常规文档定义: 文档根据其字段顺序进行序列化
from mongoengine import *
import datetime
class Page(Document):
title = StringField(max_length=200, require=True)
date_modified = DatetimeField(default=datetime.datetime.utcnow())
动态文档定义:可以随意添加新的字段
from mongoengine import *
class Page(DynamicDocument):
title = StringField(max_length=200, required=True)
常规字段
BinaryField
BooleanField
DateTimeField
DictField
FloatField
Intfield
ListFields
EmbeddedDocumentField
ReferenceField
class User(Document):
name = StringField()
class Page(Document):
content = StringField()
author = ReferenceField(User)
class Employee(Document):
name = StringField()
boss = ReferenceField('self') # 自关联
profile_page = ReferenceField('ProfilePage') # 未完成的类
class ProfilePage(Doucment):
content = StringField()
GenericReferenceField
class Line(Document):
url = StringField()
class Post(Document):
title = StringField()
class Bookmark(Document):
bookmark_object = GenericReferenceField()
link = Link(url='http://hmarr.com')
link.save()
post = Post(title='Using MongoEngine')
post.save()
Bookmark(bookmark_object=link).save()
Bookmark(bookmark_object=post).save()
字段参数
db_filed(default: None):mongdo中字段名称
required(default: False):如果数据中没有这个字段,会报ValidationError
default(default: None):如果没有赋值,则使用默认值
unique(default: False):唯一值
unique_with(default: None): 联合唯一
primary_key(default: False): 主键
choices(default: None): 限制字段的值范围
validation(Optional):对该字段进行验证
def _not_empty(val):
if not val:
raise ValidationError('value can not be empty')
class Person(Document):
name = StringField(validation=_not_empty)
**kwargs
元类
class Page(Document):
category = IntField()
rating = StringField(unique=True)
title = StringField(max_length=200, required=True)
created = DateTimeField()
meta = {
'allow_inheritance': True
'ordering': ['-created'] # ordering
'collection': 'cmsPage', # collection name
'max_documents': 1000, # max documents
'max_size': 200000, # max size
'indexes': [
'title', # single-field index
'$title', # text index
'#title', # hashed index
('title', '-rating'), # compound index
('category', '_cls'), # compound index
{
'field': ['created'],
'expireAfterSeconds': 3600 # ttl index
}
]
}
过滤查询
# Return a QuerySet that will only iterate over users whose 'country' field is set # to 'uk'
uk_users = User.objects(country='uk')
# embedded document
uk_pages = Page.objects(author__country='uk')
查询操作符
# Only find users whose age is 18 or less
young_users = Users.objects(age__lte=18)
ne
- not equal to
lt
- less than
lte
- less than or equal to
gt
- greater than
gte
- greater than or equal to
not
- negate a standard check, may by used before other operators (e.g. Q(age__not__mod=(5, 0))
)
in
- value is in list (a list of values should be provided)
nin
- value is not in list (a list of values should be provided)
mod
- value % x == y
, where x and y are two provided values
all
- every item in list of values provided is in array
size
- the size of the array is
exists
- value for field exists
字符串查询
exact
- string field exactly matches valueiexact
- string field exactly matches value (case insensitive)contains
- string field contains valueicontains
- string field contains value (case insensitive)startswith
- string field starts with valueistartswith
- string field starts with value (case insensitive)endswith
- string field ends with valueiendswith
- string field ends with value (case insensitive)wholeword
- string field contains whole wordiwholeword
- string field contains whole word (case insensitive)regex
- string field match by regexiregex
- string field match by regex (case insensitive)match
- performs an $elemMatch so you can match an entire document within an array列表查询
class Page(Document):
tags = ListField(StringField)
# This will match all pages that have the word 'coding' as an item in the
# 'tags' list
Page.objects(tags='coding')
# query by position
Page.objects(tags__0 = 'db')
# fetch part of a list, skip 5, limit 10
Page.objects.fields(slice__comments=[5, 10])
原始查询
Page.objects(__raw__={'tags': 'coding'})
排序
# ASC date
blogs = BlogPost.objects().order_by('date')
# ASC date, DESC title
blogs = BlogPost.objects().order_by('+date', '-title')
限制和跳过结果
方法一: 列表切片(建议使用)
users = User.objects[10:15]
方法二: skip + limit
users = User.objects.skip(10).limit(5)
有且仅有一个结果时,建议使用.first()
获取结果
默认文档查询
修改objects方法返回结果
class BlogPost(Document):
title = StringField()
date = DateTimeField()
@queryset_manager
def objects(doc_cls, queryset):
return queryset.order_by('-date')
自定义管理器方法
class BlogPost(Document):
title = StringField()
published = BooleanField()
@queryset_manager
def live_posts(doc_cls, queryset):
return queryset.filter(published=True)
BlogPost(title='test1', published=False).save()
BlogPost(title='test2', published=True).save()
assert len(BlogPost.objects) == 2 # True
assert len(BlogPost.live_posts()) == 1 # True
自定义查询集
class AwesomerQuerySet(QuerySet):
def get_awesome(self):
return self.filter(awesome=True)
class Page(Document):
meta = {'queryset_class': AwesomerQuerySet}
Page.objects.get_awesome()
聚合
.count()
:计数结果
num_users = User.objects.count()
.sum()
:字段值求和,如果不存在,则忽略
yearly_expense = Employee.objects.sum('salary')
average()
:字段平均值
mean_age = User.objects.average('age')
.item_frequencies()
:字段频率
class Article(Document):
tag = ListField(StringField())
tag_freqs = Article.objects.item_frequencies('tag', normalize=True)
from operator import itemgetter
top_tags = sorted(tag_freqs.items(), key=itemgetter(1), reverse=True)[:10]
MongoDB aggregation
class Person(Document):
name = StringField()
Person(name='John').save()
Person(name='Bob').save()
pipeline = [
{'$sort': {'name': -1}},
{'$project': {'_id': 0, 'name': {'$toUpper': '$name'}}}
]
data = Person.objects().aggregate(pipeline)
assert data = [{'name': 'BOB'}, {'name': 'JOHN'}]
查询效率和性能优化
查询字段的子集
class Film(Document):
title = StringField()
year = IntField()
rating = IntField(default=3)
Film(title='The Shawshank Redemption', year=1994, rating).save()
f = Film.objects.only('title').first()
f.title # 'The Shawshank Redemption'
f.year # None
f.rating # 3, default value
exclude()
和only()
作用相反;
后期如果需要丢失字段,直接调用reload()
方法
获取关联查询
select_related(max_depth=1)
关闭非关联查询
post = Post.objects.no_dereference().first()
assert(isinstance(post.author, DBRef)) # True
高级查询
from mongoengine.queryset.visitor import Q
# Get published posts
Post.objects(Q(published=True) | Q(published_date__lte=datetime.now()))
# Get top posts
Post.objects((Q(featured=True) & (Q(hits__gte=1000)) | Q(hits__gte=5000)))
内置验证:调用.validate()
或者.save()
方法时验证
from mongoengine import Document, EmailField
class User(Document):
email = EmailField()
age = IntField(min_value=0, max_value=99)
user = User(email='invalid@', age=24)
user.validate() # raise ValidationError (Invalid email address:['email'])
user.save() # raise ValidationError (Invalid email address:['email'])
user2 = User(email='john..doe@garbage.com', age=1000)
user2.save() # raise ValidationError (Integer value is too large: ['age'])
自定义验证
def not_john_doe(name):
if name == 'John Doe':
raise ValidationError('John Doe is not a valid name')
class Person(Document):
full_name = StringField(validation=not_john_doe)
Person(full_name='Billy Doe').save()
Person(full_name='John Doe').save() # raise ValidaionError (John Doe is not ...)
clean
方法:提供自定义模型验证和/或在验证之前修改某些字段值.当验证开启,调用save()
方法时调用
class Essay(Document):
status = StringField(choices=('Published', 'Draft'), required=True)
pub_date = DateTimeField()
def clean(self):
# Validate that only published essays have a 'pub_date'
if self.status == 'Draft' and self.pub_date is not None:
raise ValidationError('Dreaft entries should not ...')
# Set the pub_date for published items if not set
if self.status == 'Published' and self.pub_date is None:
self.pub_date = datetime.now()
自定义字段
class AgeField(IntField):
def validate(self, value):
super(AgeField, self).validate(value) #let IntField.validate run first
if value == 60:
self.error('60 is not allowed')
class Person(Document):
age = AgeField(min_value=0, max_value=99)
Person(age=20).save() # passes
Person(age=1000).save()
# raise ValidationError(Integer value is too large['age'])
Person(age=60).save()
# raises ValidationError (Person:None) (60 is not allowed: ['age'])
跳过验证
class Person(Document):
age = IntField(max_value=100)
Person(age=1000).save(validate=False)
GridFS
GridFS
用于存储和恢复那些超过16M(BSON文件限制)的文件(如:图片,音频,视频等)。GridFS
同时也是文件存储的一种方式,但它是存储在MongoDB
的集合中。
GridFS
会将大文件对象分割成多个小的chunk
(文件片段),一般为256k/个,每个chunk
将作为MongoDB
的一个文档被存储在chunks
集合中。
GridFS
用两个集合来存储一个文件:fs.files
和fs.chunks
。文件实际内容存储在.chunks
中,和文件有关的meta
数据将会被存在.files
集合中。
写入
class Animal(Document):
genus = StringField()
family = StringField()
photo = FileField()
marmot = Animal(genus='Marmota', family='Sciuridae')
with open('marmot.jpg', 'rb') as fd:
marmot.photo.put(fd, content_type='image/jpeg')
marmot.save()
查询
marmot = Animal.objects(genus='Marmota').first()
photo = marmot.photo.read()
content_type = marmot.photo.content_type
# If you need to read the content of a file multipe times, you'll need to 'rewind' the file-like object
marmot = Animal.objects(genus='Marmota').first()
content1 = marmot.photo.read()
assert content1 != ''
content2 = marmot.photo.read() # will be empty
assert content2 == ''
marmot.photo.seek(0) # rewind the file by setting the current position
content3 = marmot.photo.read()
assert content3 == content1
流式操作
marmot.photo.new_file()
marmot.photo.write('some_image_data')
marmot.photo.write('some_more_image_data')
marmot.photo.close()
marmot.close()
删除
marmot.photo.delete() # Deletes the GridFS document
# Saves the GridFS reference (being None) contained in the marmot instance
marmot.save()
文件替换
another_marmot = open('another_marmot.png', 'rb')
# Replaces the GridFS document
marmot.photo.replace(another_marmot, content_type='image/png')
# Replaces the GridFS reference contained in marmot instance
marmot.save()
Signal
可用signals
主要包括:
pre_init
Document
或EmbeddedDocument
实例期间调用,在收集构造函数参数之后但在对它们进行任何其他处理之前(即默认值的分配)。此信号的处理程序使用 values 关键字参数传递参数字典,并且可以在返回之前修改此字典。post_init
Document
或EmbeddedDocument
实例的所有处理完成后调用。pre_save
save()
中调用。pre_save_post_validation
save()
中调用。post_save
save()
中调用。 传递了创建的附加布尔关键字参数,以指示保存是插入还是更新pre_delete
delete()
中调用post_delete
delete()
内调用pre_bulk_insert
post_bulk_insert
pre_bulk_insert
,文档参数被省略并替换为文档参数。 加载的附加布尔参数将文档的内容标识为 True
时的Document
实例,或者仅标识 False
时插入记录的主键值列表。绑定文档
方法一:
package:1
import logging
from datetime import datetime
from mongoengine import *
from mongoengine import signals
def update_modified(sender, document):
document.modified = datetime.utcnow()
package:2
class Record(Document):
modified = DateTimeField()
signals.pre_save.connect(update_modified)
方法二:EmbeddedDocument仅支持pre/post_init信号
.
class Author(Document):
name = StringField()
@classmethod
def pre_save(cls, sender, document, **kwargs):
logging.debug('Pre Save: %s' % document.name)
@classmethod
def post_save(cls, sender, document, **kwargs):
logging.debug('Post Save: %s' % document.name)
if 'created' in kwargs:
logging.debug('Created')
else:
logging.debug('Updated')
signals.pre_save.connect(Author.pre_save, sender=Author)
sigmals.post_save.connect(Author.post_save, sender=Author)
方法三:
def handler(event):
def decorator(fn):
def apply(cls):
event.connect(fn, sender=cls)
return cls
fn.apply = apply
return fn
return decorator
@handler(signals.pre_save)
def update_modified(sender, document):
document.modified = datetime.utcnow()
@update_modified.apply
class Record(Document):
modified = DateTimeField()
Text Search
定义全文检索: 使用 $ 前缀设置文本索引。
class News(Document):
title = StringField()
content = StringField()
is_active = BooleanField()
meta = {'indexes': [
{'fields': ['$title', '$content'],
'default_language': 'english',
'weights': {'title': 10, 'content': 20}
}
]}
全文检索使用
News(title='Using mongodb text search', content='Testing text search').save()
News(title='MongoEngine 0.9 released', content='Various improvements').save()
document = News.objects.search_text('testing').first()
document.title # may be: 'Using mongodb text search'
document = News.objects.search_text('released').first()
document.title # may be: 'MongoEngine 0.9 released'
使用文本权重排序
obj = News.objects.search_text('mongo').order_by('$text_score')