dockerfile
FROM python:3.8-slim-buster
ENV DEBIAN_FRONTEND=noninteractive
ENV TOR_LISTEN_PORT=9050
ENV TOR_CONTROL_PORT=9051
ENV TOR_PASSWORD=mysecretpassword
RUN apt-get update && apt-get install -y --no-install-recommends \
tor \
torsocks \
&& rm -rf /var/lib/apt/lists/*
RUN echo "ControlPort $TOR_CONTROL_PORT" >> /etc/tor/torrc \
&& echo "HashedControlPassword $(echo -n "$TOR_PASSWORD"|tor --quiet --hash-password -)" >> /etc/tor/torrc
COPY . /app
WORKDIR /app
RUN pip install --no-cache-dir -r requirements.txt
CMD ["tor"]
python
def renew_tor_circuit():
with Controller.from_port(port=9051) as controller:
controller.authenticate(password="mysecretpassword")
controller.signal(Signal.NEWNYM)
# Function to get the scraped page data
def scrape_page(url):
# Optionally renew the Tor circuit for each request
renew_tor_circuit()
user_agent = UserAgent().random # Get a random user-agent
headers = {'User-Agent': user_agent}
try:
response = requests.get(url, proxies=proxies, headers=headers)
response.raise_for_status()
return response.text
except requests.HTTPError as http_err:
print(f"HTTP error occurred: {http_err}")
except Exception as err:
print(f"An error occurred: {err}")
return None
if __name__ == "__main__":
url = "http://google.go.jp/" # Replace with the actual page you want to scrape
page_data = scrape_page(url)
if page_data:
print(page_data) # Or process the data further as required