Mainly cleaning up to make pre-commit checks and hopefully CI pass.

This commit is contained in:
marvin8 2023-02-25 16:17:54 +10:00
parent bffb880709
commit b3d4b5e9f9
No known key found for this signature in database
GPG key ID: C29D8D75FCE14912
8 changed files with 121 additions and 69 deletions

View file

@ -41,14 +41,6 @@ repos:
args:
- "--fix"
- repo: https://github.com/econchick/interrogate
rev: 1.5.0 # or master if you're bold
hooks:
- id: interrogate
args:
- "--quiet"
- "--fail-under=95"
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.0.1
hooks:

View file

@ -3,7 +3,7 @@ Feed2Fedi
|Repo| |CI - Woodpecker| |Downloads|
|Checked against| |Checked with| |Interrogate|
|Checked against| |Checked with|
|Code style| |PyPI - Python Version| |PyPI - Wheel|
@ -12,47 +12,36 @@ Feed2Fedi
This is a Python bot that reads RSS feeds and automatically posts them to a Fediverse instance. It will support
instances running Mastodon, Takahe, and Pleroma.
It has been inspired by `feed2toot`_.
Feed2Fedi has been inspired by `feed2toot`_.
Feed2Fedi is in early development. It is not ready for general use
==================================================================
Everything below is just placeholder text for now.
Features
---------
* Feed2Fedi posts to `Mastodon`_
* Media from direct links, Gfycat, Imgur, Reddit, and Giphy is automatically attached in the social media post.
Feed2Fedi attaches up to the first 4 pictures for imgur albums and reddit gallery posts.
* Links that do not contain media can be skipped, ideal for meme accounts like `@babyelephantgifs`_
* NSFW content, spoilers, and self-posts can be filtered
* Feed2Fedi can monitor multiple subreddits at once
* Feed2Fedi posts to `Fediverse`_ instances.
* Feed2Fedi attaches a picture to the post if the feed item contains a "media_thumbnail".
* Feed2Fedi can monitor multiple RSS/ATOM feeds at once
* Feed2Fedi is fully open-source, so you don't have to give an external service full access to your social media accounts
* Feed2Fedi also checks the sha256 checksum of media files to stop posting of the same media file from different subreddits.
* Feed2Fedi can ping a `Healthchecks`_ instance for monitoring continuous operation of Feed2Fedi
* Optionally throttle down frequency of tooting when mastodon errors are detected.
**!!! Feed2Fedi no longer supports posting to Twitter. !!!**
.. Todo: Add Optional Healthchecks to Feed2Fedi
.. * Feed2Fedi can ping a `Healthchecks`_ instance for monitoring continuous operation of Feed2Fedi
If you need twitter functionality look into `reddit-twitter-bot`_ as a possible alternative.
**!!! Feed2Fedi no longer supports deleting old toots. !!!**
If you'd like to delete older toots from your Mastodon account look into `MastodonAmnesia`_ as a tool that might
If you'd like to delete older posts from your Fediverse account look into `Fedinesia`_ as a tool that might
work for you.
Disclaimer
----------
The developers of Feed2Fedi hold no liability for what you do with this script or what happens to you by using this
script. Abusing this script *can* get you banned from Mastodon, so make sure to read up on proper usage of the API
script. Abusing this script *can* get you banned from Fediverse instances, so make sure to read up on proper usage
for each site.
Setup and usage
---------------
For instructions on setting up and using Feed2Fedi, please visit `the wiki`_
.. TODO: Setup and usage
.. ---------------
..
.. For instructions on setting up and using Feed2Fedi, please visit `the wiki`_
Supporting Feed2Fedi
--------------------
@ -64,7 +53,7 @@ There are a number of ways you can support Feed2Fedi:
- You can send me small change in Monero to the address below:
Monero donation address:
`87C65WhSDMhg4GfCBoiy861XTB6DL2MwHT3SWudhjR3LMeGEJG8zeZZ9y4Exrtx5ihavXyfSEschtH4JqHFQS2k1Hmn2Lkt`
`84oC6aUX4yyRoEk6pMVVdZYZP4JGJZk4KKJq1p7n9ZqLPK8zH3W1vpFAnSxDQGbwmZAeXrE4w4ct6HqAXdM1K9LfCAxZx4u`
Changelog
---------
@ -78,7 +67,9 @@ Feed2Fedi is licences under the `GNU General Public License v3.0`_
.. _MastodonAmnesia: https://pypi.org/project/mastodonamnesia/
.. _feed2toot: https://gitlab.com/chaica/feed2toot
.. _Fediverse: https://fediverse.party/
.. _Fedinesia: https://pypi.org/project/fedinesia/
.. _Healthchecks: https://healthchecks.io/
.. _buy me a coffee: https://www.buymeacoffee.com/marvin8
.. _GNU General Public License v3.0: http://www.gnu.org/licenses/agpl-3.0.html
@ -113,7 +104,3 @@ Feed2Fedi is licences under the `GNU General Public License v3.0`_
.. |CI - Woodpecker| image:: https://ci.codeberg.org/api/badges/MarvinsMastodonTools/feed2fedi/status.svg
:target: https://ci.codeberg.org/MarvinsMastodonTools/feed2fedi
.. |Interrogate| image:: https://codeberg.org/MarvinsMastodonTools/feed2fedi/raw/branch/main/interrogate_badge.svg
:alt: Doc-string coverage
:target: https://interrogate.readthedocs.io/en/latest/

View file

@ -1,5 +1,5 @@
Welcome to Feed2Fedi's documentation!
===================================
=====================================
Setting up Tootbot
------------------

View file

@ -1,3 +1,4 @@
"""Module level constants."""
from typing import Final
__version__: Final[str] = "0.0.1"
@ -5,6 +6,3 @@ DISPLAY_NAME: Final[str] = "Feed2Fedi"
WEBSITE: Final[str] = "https://codeberg.org/MarvinsMastodonTools/feed2fedi"
POST_RECORDER_SQLITE_DB: Final[str] = "cache.sqlite"
# Todo:
# Post history in sqlite db with fields[last seen, url)

View file

@ -1,3 +1,4 @@
"""Main processing and main entry point methods for Feed2Fedi."""
import asyncio
from pathlib import Path
@ -10,10 +11,11 @@ from .publish import Fediverse
async def main():
"""Read configuration and feeds, then make posts while avoiding duplicates."""
print(f"Welcome to {DISPLAY_NAME} {__version__}")
config = await Configuration.load_config(file_name=Path("config.ini"))
config.save_config(file_name=Path("config.ini"))
config = await Configuration.load_config(config_file_path=Path("config.ini"))
config.save_config(config_file_path=Path("config.ini"))
post_recorder = PostRecorder()
await post_recorder.db_init()
@ -26,5 +28,6 @@ async def main():
await post_recorder.close_db()
def start_main():
def start_main() -> None:
"""Start processing, i.e. main entry point."""
asyncio.run(main())

View file

@ -1,9 +1,11 @@
"""Classes and methods to collect information needed by Feed2Fedi to make posts on Fediverse instance."""
import asyncio
import os
import re
from pathlib import Path
from typing import List
from typing import Optional
from typing import Tuple
from urllib.parse import urlsplit
import aiofiles
@ -12,6 +14,8 @@ import feedparser
class FeedReader:
"""Instances hold feed items for RSS/Atom feeds passed during instanciation."""
def __init__(self, feeds: List[str]) -> None:
self.items: List[feedparser.util.FeedParserDict] = []
for feed in feeds:
@ -21,15 +25,17 @@ class FeedReader:
async def get_file(
img_url: str,
) -> Optional[Path]:
) -> Tuple[Optional[Path], Optional[str]]:
"""Save a file located at img_url to a file located at filepath.
:param img_url: url of imgur image to download
:returns:
file_path (string): path to downloaded image or None if no image was downloaded
Tuple containing:
file_path (string): path to downloaded image or None if no image was downloaded
mime_type (string): mimetype as returned from URL
"""
file_name = await determine_filename(img_url=img_url)
file_name, mime_type = await determine_filename(img_url=img_url)
chunk_size = 64 * 1024
try:
@ -43,23 +49,29 @@ async def get_file(
await file_out.write(data_chunk)
await asyncio.sleep(0) # allow client session to close before continuing
return file_name
return file_name, mime_type
except aiohttp.ClientError as save_image_error:
print(
"collect.py - get_file(...) -> None - download failed with: %s"
% save_image_error,
)
return None
return None, None
async def determine_filename(img_url: str) -> Optional[Path]:
async def determine_filename(img_url: str) -> Tuple[Optional[Path], Optional[str]]:
"""Determine suitable filename for an image based on URL.
:param img_url: URL to image to determine a file name for.
:returns:
Tuple with Path or None for file name and mime-type or None
"""
# First check if URL starts with http:// or https://
regex = r"^https?://"
match = re.search(regex, img_url, flags=0)
if not match:
print("Post link is not a full link: %s" % img_url)
return None
return None, None
# Acceptable image formats
image_formats = (
@ -82,13 +94,13 @@ async def determine_filename(img_url: str) -> Optional[Path]:
except (aiohttp.ClientError, asyncio.exceptions.TimeoutError) as error:
print("Error while opening URL: %s " % error)
return None
return None, None
if content_type not in image_formats:
print("URL does not point to a valid image file: %s" % img_url)
return None
return None, None
# URL appears to be an image, so determine filename
file_name = os.path.basename(urlsplit(img_url).path)
return Path(file_name)
return Path(file_name), content_type

View file

@ -1,3 +1,4 @@
"""Classes and methods to control how Feed2Fedi works."""
import sys
from configparser import ConfigParser
from dataclasses import dataclass
@ -26,15 +27,25 @@ PR = TypeVar("PR", bound="PostRecorder")
@dataclass
class Configuration:
"""Dataclass to hold configuration settings for Feed2Fedi."""
feeds: List[str]
fedi_instance: str
fedi_access_token: str
@classmethod
async def load_config(cls: Type[ConfigClass], file_name: Path) -> ConfigClass:
async def load_config(
cls: Type[ConfigClass], config_file_path: Path
) -> ConfigClass:
"""Load configuration values from file and create Configuration instance.
:param config_file_path: File name to load configuration values from
:returns:
Configuration instance with values loaded from file_name
"""
parser = ConfigParser()
with open(file=file_name, encoding="UTF-8") as config_file:
with config_file_path.open(mode="r", encoding="UTF-8") as config_file:
parser.read_file(f=config_file)
parsed_feeds: List[str] = []
@ -63,7 +74,11 @@ class Configuration:
fedi_access_token=access_token,
)
def save_config(self, file_name: Path) -> None:
def save_config(self, config_file_path: Path) -> None:
"""Save Configuration to file.
:param config_file_path: File name to save Configuration to
"""
parser = ConfigParser()
Configuration.add_feeds_section(
@ -76,11 +91,16 @@ class Configuration:
access_token=self.fedi_access_token,
)
with open(file=file_name, mode="w", encoding="UTF-8") as config_file:
with config_file_path.open(mode="w", encoding="UTF-8") as config_file:
parser.write(fp=config_file, space_around_delimiters=True)
@staticmethod
def add_feeds_section(parser, feeds):
def add_feeds_section(parser: ConfigParser, feeds: List[str]) -> None:
"""Add Feeds section to config parser.
:param parser: ConfigParser to add eht Feeds section to
:param feeds: List of feed urls to add to Feeds section
"""
parser.add_section(section="Feeds")
feed_number = 1
for feed in feeds:
@ -88,18 +108,39 @@ class Configuration:
feed_number += 1
@staticmethod
def add_fediverse_section(parser, instance, access_token):
def add_fediverse_section(
parser: ConfigParser,
instance: str,
access_token: str,
) -> None:
"""Add Fediverse section to config parser.
:param parser: ConfigParser to add the fediverse section to
:param instance: URL of fediverse instance.
:param access_token: Access_token for authenticating at fediverse instance
"""
parser.add_section(section="Fediverse")
parser.set(section="Fediverse", option="Instance", value=instance)
parser.set(section="Fediverse", option="Access-Token", value=access_token)
@staticmethod
def get_instance() -> str:
"""Get instance URL from user.
:returns:
URL of fediverse instance.
"""
instance = input("[...] Please enter the URL for the instance to connect to: ")
return instance
@staticmethod
async def get_access_token(instance: str) -> str:
"""Get access token from fediverse instance.
:param instance: URL to fediverse instance
:returns:
Access token
"""
try:
async with aiohttp.ClientSession() as session:
# Create app
@ -144,6 +185,8 @@ class Configuration:
class PostRecorder:
"""Record posts, check for duplicates, and deletes old records of posts."""
LAST_POST_TS: Final[str] = "last-post-timestamp"
def __init__(self: PR, history_db_dir: str = ".") -> None:

View file

@ -1,7 +1,8 @@
"""Classes and methods needed to publish posts on a Fediverse instance."""
import json
from typing import List
from typing import Optional
import aiofiles
import aiohttp
from feedparser import FeedParserDict
from minimal_activitypub.client_2_server import ActivityPub
@ -12,11 +13,17 @@ from .control import PostRecorder
class Fediverse:
"""Helper class to publish posts on a fediverse instance from rss feed items."""
def __init__(self, config: Configuration, post_recorder: PostRecorder) -> None:
self.config = config
self.post_recorder = post_recorder
async def publish(self, items: List[FeedParserDict]) -> None:
"""Publish posts to fediverse instance from content in the items list.
:param items: Rss feed items to post
"""
async with aiohttp.ClientSession() as session:
fediverse = ActivityPub(
instance=self.config.fedi_instance,
@ -26,6 +33,8 @@ class Fediverse:
item = items[0]
print(f"item={json.dumps(item, indent=4)}")
if await self.post_recorder.duplicate_check(identifier=item.link):
print(
f"News Item has already been posted - {item.title} at\n{item.link}"
@ -36,18 +45,26 @@ class Fediverse:
media_ids: Optional[List[str]] = None
# Post media if media_thumbnail is present with a url
if item.get("media_thumbnail") and item.get("media_thumbnail")[0].get(
"url"
):
file_name = await get_file(item.get("media_thumbnail")[0].get("url"))
async with aiofiles.open(file=str(file_name), mode="rb") as thumbnail:
media = await fediverse.post_media(
file=thumbnail, mime_type="image/jpeg"
)
media_ids = [media["id"]]
media_path, mime_type = await get_file(
item.get("media_thumbnail")[0].get("url")
)
if media_path:
with media_path.open(mode="rb") as thumbnail:
media = await fediverse.post_media(
file=thumbnail,
mime_type=mime_type,
)
media_ids = [media["id"]]
# Delete temporary file
media_path.unlink()
await fediverse.post_status(
status=f"{item.title}\n{item.link}",
status=f"{item.title}\n\n{item.link}",
media_ids=media_ids,
)