motor helper

2019/03/15 13:00
阅读数 42
# -*- coding: utf-8 -*-
# @Time : 2019-11-08 14:04
# @Author : cxa
# @File : mongohelper.py
# @Software: PyCharm
import asyncio
from helper.logger_helper import logger as storage
import datetime
from motor.motor_asyncio import AsyncIOMotorClient
from collections import Iterable

try:
    import uvloop

    asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
except ImportError:
    pass

db_configs = {
    'host': '127.0.0.1',
    'port': '27017',
    'db_name': 'spider',
    'user': ''
}


class MotorOperation:
    def __init__(self):
        self.__dict__.update(**db_configs)
        if self.user:
            self.motor_uri = f"mongodb://{self.user}:{self.passwd}@{self.host}:{self.port}/{self.db_name}?authSource={self.db_name}"
        else:
            self.motor_uri = f"mongodb://{self.host}:{self.port}/{self.db_name}"
        self.client = AsyncIOMotorClient(self.motor_uri)
        self.mb = self.client[self.db_name]

    # async def get_use_list(self):
    #     fs = await aiofiles.open("namelist.txt", "r", encoding="utf-8")
    #     data = (i.replace("\n", "") async for i in fs)
    #     return data

    async def save_data_with_status(self, items, col="dianping_seed_data"):
        tasks = []
        for item in items:
            data = dict()
            data["update_time"] = datetime.datetime.now()
            data["create_time"] = datetime.datetime.now()
            data["status"] = 0  # 0初始
            data.update(item)
            tasks.append(data)
        print("tasks", tasks)
        await self.mb[col].insert_many(tasks)

    async def add_index(self, col="dianping_seed_data"):
        # 添加索引
        await self.mb[col].create_index('url')

    async def save_data(self, items, col="dianping_seed_data", key="url"):
        # storage.info(f"此时的items:{items}")
        if isinstance(items, Iterable):
            for item in items:
                try:
                    item[key] = item[key]
                    await self.mb[col].update_one({
                        key: item.get(key)},
                        {'$set': item},
                        upsert=True)
                except Exception as e:
                    storage.error(f"数据插入出错:{e.args}此时的item是:{item}")
        elif isinstance(items, dict):
            try:
                items[key] = items[key]
                await self.mb[col].update_one({
                    key: items.get(key)},
                    {'$set': items},
                    upsert=True)
            except Exception as e:
                storage.error(f"数据插入出错:{e.args}此时的item是:{items}")

    async def change_status(self, condition, col="dianping_seed_data", status_code=1):
        # status_code 0:初始,1:开始下载,2下载完了
        try:
            item = dict()
            item["status"] = status_code
            item["update_time"] = datetime.datetime.now()
            # storage.info(f"修改状态,此时的数据是:{item}")
            await self.mb[col].update_one(condition, {'$set': item})
        except Exception as e:
            storage.error(f"修改状态出错:{e.args}此时的数据是:{item}")

    async def get_detail_data(self, col="dianping_seed_data"):
        data = self.mb[col].find({'status': 0})
        async for item in data:
            print(item)
        return data

    async def reset_status(self, col="dianping_seed_data"):
        await self.mb[col].update_many({'status': 1}, {'$set': {"status": 0}})

    async def reset_all_status(self, col="dianping_seed_data"):
        await self.mb[col].update_many({}, {'$set': {"status": 0}})

    async def find_data(self, col="dianping_seed_data"):
        """

        获取状态为0的数据,作为爬取对象。
        :return:AsyncGeneratorType
        """
        cursor = self.mb[col].find({'status': 0}, {"_id": 0})
        async_gen = (item async for item in cursor)
        return async_gen

    async def delete_old_data(self, col="dianping_seed_data"):
        try:
            yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
            result = await self.mb[col].delete_many({'update_time': {'$lte': yesterday}})
            print(f"成功删除{result.deleted_count}条")
        except Exception as e:
            print("删除错误", e.args)


if __name__ == '__main__':
    m = MotorOperation()
    loop = asyncio.get_event_loop()
    loop.run_until_complete(m.delete_old_data(col="dianping_seed_data"))

展开阅读全文
打赏
0
0 收藏
分享
加载中
更多评论
打赏
0 评论
0 收藏
0
分享
返回顶部
顶部