1. 程式人生 > >docker部署pyspider

docker部署pyspider

感覺pyspider不如scrapy好用

三臺機器,

首先從  git clone https://github.com/binux/pyspider

然後修改Dockerfile ,其中有些下載很慢的,需手動下載新增進去

FROM hub.c.163.com/library/python:2.7
MAINTAINER binux <[email protected]>

# install phantomjs
COPY phantomjs_1.9.6-0wheezy_amd64.deb /opt/pyspider/phantomjs_1.9.6-0wheezy_amd64.deb
RUN apt-get install dpkg \
&& dpkg -i /opt/pyspider/phantomjs_1.9.6-0wheezy_amd64.deb \
&& apt-get -f install


# install requirements
#RUN pip --default-timeout=800 install --egg 'https://dev.mysql.com/get/Downloads/Connector-Python/mysql-connector-python-1.2.3.zip#md5=ce4a24cb1746c1c8f6189a97087f21c1'
COPY requirements.txt /opt/pyspider/requirements.txt
COPY mysql-connector-python-1.2.3.zip /opt/pyspider/mysql-connector-python-1.2.3.zip
COPY config.json /opt/pyspider/config.json
RUN apt-get update \
&& apt-get install unzip \
&& unzip /opt/pyspider/mysql-connector-python-1.2.3.zip -d /opt/pyspider/ \
&& mv /opt/pyspider/mysql-connector-python-1.2.3 /opt/pyspider/mysql-connector-python
WORKDIR /opt/pyspider/mysql-connector-python
RUN python setup.py install

RUN pip --default-timeout=800 install -i https://pypi.douban.com/simple -r /opt/pyspider/requirements.txt

# add all repo
ADD ./ /opt/pyspider

# run test
WORKDIR /opt/pyspider
RUN pip --default-timeout=800 install -i https://pypi.douban.com/simple -e .[all]

RUN pip install selenium 

COPY chromedriver /usr/bin/

WORKDIR /
VOLUME ["/opt/pyspider"]
COPY init.sh /init.sh
RUN set -x \
    && chmod +x init.sh
ENTRYPOINT ["/init.sh"]
#ENTRYPOINT ["pyspider -c /opt/pyspider/config.json"]
#ENTRYPOINT ["pyspider all"]
#CMD ["pyspider"]
EXPOSE 5000 23333 24444 25555
 

config.json檔案詳解
主機:
{
  "taskdb": "mysql+taskdb://root:[email protected]:3306/taskdb",
  "projectdb": "mysql+projectdb://root::3306/projectdb",
  "resultdb": "mysql+resultdb://root:3306/resultdb",
  "message_queue": "redis://1:6379/db",
  "phantomjs-proxy": "127.0.0.1:25555",
  "scheduler" : {
    "xmlrpc-host": "0.0.0.0",
    "delete-time": 3600
  },
  "webui": {
    "port": 5000,
    "username": "test",
    "password": "test",
    "need-auth": true
  }
}

從機:
{
  "taskdb": "mysql+taskdb://r3306/taskdb",
  "projectdb": "mysql+projectdb://r3306/projectdb",
  "resultdb": "mysql+resultdb://r:3306/resultdb",
  "message_queue": "redis://1:6379/db",
  "phantomjs-proxy": "127.0.0.1:25555",
  "fetcher": {
    "xmlrpc-host": ""
  }
}
init.sh檔案詳解
主機:
#!/bin/bash
pyspider -c /opt/pyspider/config.json

從機:
#!/bin/bash
pyspider -c /opt/pyspider/config.json processor
pyspider -c /opt/pyspider/config.json phantomjs
pyspider -c config.json --phantomjs-proxy="localhost:25555" fetcher

 

docker build -t pyspider:0.1 .

docker run --name pyspider -d -p 5000:5000 pyspider:0.1 

docker exec -ti 容器id /bin/bash
 

master:5000