diff --git a/.flake8 b/.flake8 index 328e0a68a..8b6524be5 100644 --- a/.flake8 +++ b/.flake8 @@ -1,4 +1,4 @@ [flake8] max-line-length=100 exclude = .git,__pycache__ -extend-ignore = W605, E303 \ No newline at end of file +extend-ignore = \ No newline at end of file diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 000000000..a7326b658 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,9 @@ +# Lines starting with '#' are comments. +# Each line is a file pattern followed by one or more owners. + +# These owners will be the default owners for everything in the repo. +* @obervinov + +# Order is important. The last matching pattern has the most precedence. +# You can also use email addresses if you prefer. +# docs/* obervinov@pm.me \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 000000000..dd84ea782 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,38 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Browser [e.g. chrome, safari] + - Version [e.g. 22] + +**Smartphone (please complete the following information):** + - Device: [e.g. iPhone6] + - OS: [e.g. iOS8.1] + - Browser [e.g. stock browser, safari] + - Version [e.g. 22] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/custom.md b/.github/ISSUE_TEMPLATE/custom.md new file mode 100644 index 000000000..48d5f81fa --- /dev/null +++ b/.github/ISSUE_TEMPLATE/custom.md @@ -0,0 +1,10 @@ +--- +name: Custom issue template +about: Describe this issue template's purpose here. +title: '' +labels: '' +assignees: '' + +--- + + diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 000000000..bbcbbe7d6 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..0f17753a5 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates +--- +version: 2 +updates: + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 000000000..0d441130c --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,16 @@ +# PR-1: NAME +**Full Changelog**: https://github.com/obervinov/pyinstabot-downloader/compare/1...2 by @obervinov in https://github.com/obervinov/pyinstabot-downloader/pull/1 +## VERSION - YYYY-MM-DD +### What's Changed +#### πŸ› Bug Fixes +* +* +#### πŸ“š Documentation +* +* +#### πŸ’₯ Breaking Changes +* +* +#### πŸš€ Features +* +* diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 000000000..d0d063d4e --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,11 @@ +--- +name: Create GitHub Release + +# yamllint disable-line rule:truthy +on: + push: + branches: [main] + +jobs: + create-release: + uses: obervinov/_templates/.github/workflows/create.release.yml@v1.0.5 diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml new file mode 100644 index 000000000..9e29b7f33 --- /dev/null +++ b/.github/workflows/workflow.yml @@ -0,0 +1,24 @@ +--- +name: Test and Build release + +# yamllint disable-line rule:truthy +on: + push: + branches: + - '*' + - '*/*' + - '**' + +env: + # yamllint disable-line rule:line-length + PROJECT_DESCRIPTION: "This project is a telegram bot that allows you to create backups of content from your Instagram profile to Dropbox or Mega clouds, as well as in the local file system." + + +jobs: + pylint: + uses: obervinov/_templates/.github/workflows/test.pylint.yml@v1.0.5 + verify-changelog: + uses: obervinov/_templates/.github/workflows/verify.changelog.yml@v1.0.5 + build-release: + uses: obervinov/_templates/.github/workflows/build.docker.yml@v1.0.5 + needs: [pylint, verify-changelog] diff --git a/.gitignore b/.gitignore index 45efbd455..7b7d95216 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,7 @@ +.vscode/* .DS_Store +*.tmp +vault/data/ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 000000000..d9f4215c4 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,617 @@ +[MAIN] + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Clear in-memory caches upon conclusion of linting. Useful if running pylint +# in a server-like mode. +clear-cache-post-run=no + +# Load and enable all available extensions. Use --list-extensions to see a list +# all available extensions. +#enable-all-extensions= + +# In error mode, messages with a category besides ERROR or FATAL are +# suppressed, and no reports are done by default. Error mode is compatible with +# disabling specific errors. +#errors-only= + +# Always return a 0 (non-error) status code, even if lint errors are found. +# This is primarily useful in continuous integration scripts. +#exit-zero= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-allow-list= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. (This is an alternative name to extension-pkg-allow-list +# for backward compatibility.) +extension-pkg-whitelist= + +# Return non-zero exit code if any of these messages/categories are detected, +# even if score is above --fail-under value. Syntax same as enable. Messages +# specified are enabled, while categories only check already-enabled messages. +fail-on= + +# Specify a score threshold under which the program will exit with error. +fail-under=10 + +# Interpret the stdin as a python script, whose filename needs to be passed as +# the module_or_package argument. +#from-stdin= + +# Files or directories to be skipped. They should be base names, not paths. +ignore=CVS + +# Add files or directories matching the regular expressions patterns to the +# ignore-list. The regex matches against paths and can be in Posix or Windows +# format. Because '\\' represents the directory delimiter on Windows systems, +# it can't be used as an escape character. +ignore-paths= + +# Files or directories matching the regular expression patterns are skipped. +# The regex matches against base names, not paths. The default value ignores +# Emacs file locks +ignore-patterns=^\.# + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis). It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use, and will cap the count on Windows to +# avoid hangs. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# Minimum Python version to use for version dependent checks. Will default to +# the version used to run pylint. +py-version=3.11 + +# Discover python modules and packages in the file system subtree. +recursive=no + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# In verbose mode, extra non-checker-related info will be displayed. +#verbose= + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. If left empty, argument names will be checked with the set +# naming style. +#argument-rgx= + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. If left empty, attribute names will be checked with the set naming +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo, + bar, + baz, + toto, + tutu, + tata + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +bad-names-rgxs= + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. If left empty, class attribute names will be checked +# with the set naming style. +#class-attribute-rgx= + +# Naming style matching correct class constant names. +class-const-naming-style=UPPER_CASE + +# Regular expression matching correct class constant names. Overrides class- +# const-naming-style. If left empty, class constant names will be checked with +# the set naming style. +#class-const-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. If left empty, class names will be checked with the set naming style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. If left empty, constant names will be checked with the set naming +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. If left empty, function names will be checked with the set +# naming style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + ex, + Run, + _ + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +good-names-rgxs= + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. If left empty, inline iteration names will be checked +# with the set naming style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. If left empty, method names will be checked with the set naming style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. If left empty, module names will be checked with the set naming style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Regular expression matching correct type variable names. If left empty, type +# variable names will be checked with the set naming style. +#typevar-rgx= + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. If left empty, variable names will be checked with the set +# naming style. +#variable-rgx= + + +[CLASSES] + +# Warn about protected attribute access inside special methods +check-protected-access-in-special-methods=no + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + + +[DESIGN] + +# List of regular expressions of class ancestor names to ignore when counting +# public methods (see R0903) +exclude-too-few-public-methods= + +# List of qualified class names to ignore when counting class parents (see +# R0901) +ignored-parents= + +# Maximum number of arguments for function / method. +max-args=5 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when caught. +overgeneral-exceptions=builtins.BaseException,builtins.Exception + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=100 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow explicit reexports by alias from a package __init__. +allow-reexport-from-package=no + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules= + +# Output a graph (.gv or any supported image format) of external dependencies +# to the given file (report RP0402 must not be disabled). +ext-import-graph= + +# Output a graph (.gv or any supported image format) of all (i.e. internal and +# external) dependencies to the given file (report RP0402 must not be +# disabled). +import-graph= + +# Output a graph (.gv or any supported image format) of internal dependencies +# to the given file (report RP0402 must not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE, +# UNDEFINED. +confidence=HIGH, + CONTROL_FLOW, + INFERENCE, + INFERENCE_FAILURE, + UNDEFINED + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then re-enable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable= + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable=c-extension-no-member + + +[METHOD_ARGS] + +# List of qualified names (i.e., library.method) which require a timeout +# parameter e.g. 'requests.api.get,requests.api.post' +timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, + XXX, + TODO + +# Regular expression of note tags to take in consideration. +notes-rgx= + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit,argparse.parse_error + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'fatal', 'error', 'warning', 'refactor', +# 'convention', and 'info' which contain the number of messages in each +# category, as well as 'statement' which is the total number of statements +# analyzed. This score is used by the global evaluation report (RP0004). +evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +msg-template= + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +#output-format= + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + + +[SIMILARITIES] + +# Comments are removed from the similarity computation +ignore-comments=yes + +# Docstrings are removed from the similarity computation +ignore-docstrings=yes + +# Imports are removed from the similarity computation +ignore-imports=yes + +# Signatures are removed from the similarity computation +ignore-signatures=yes + +# Minimum lines number of a similarity. +min-similarity-lines=4 + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. Available dictionaries: none. To make it work, +# install the 'python-enchant' package. +spelling-dict= + +# List of comma separated words that should be considered directives if they +# appear at the beginning of a comment and should not be checked. +spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains the private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=no + +# This flag controls whether the implicit-str-concat should generate a warning +# on implicit string concatenation in sequences defined over several lines. +check-str-concat-over-line-jumps=no + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of symbolic message names to ignore for Mixin members. +ignored-checks-for-mixins=no-member, + not-async-context-manager, + not-context-manager, + attribute-defined-outside-init + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# Regex pattern to define which classes are considered mixins. +mixin-class-rgx=.*[Mm]ixin + +# List of decorators that change the signature of a decorated function. +signature-mutators= + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of names allowed to shadow builtins +allowed-redefined-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..f584c9cc8 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,20 @@ +{ + "cSpell.words": [ + "APPROLE", + "drawio", + "instaloader", + "Pipfile", + "progressbar", + "pyinstabot", + "pylint", + "pytests", + "savepath", + "SECRETID", + "sessionfile", + "shortcode", + "shortcodes", + "storyitem", + "useragent", + "userid" + ] +} \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index c883c96f3..248776992 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,46 @@ # Change Log All notable changes to this project will be documented in this file. - -The format is based on [Keep a Changelog](http://keepachangelog.com/) -and this project adheres to [Semantic Versioning](http://semver.org/). - -## [Unreleased] - yyyy-mm-dd - -Here we write upgrading notes for brands. It's a team effort to make them as -straightforward as possible. \ No newline at end of file +The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). + + + +## v2.0.0 - 2023-09-16 +### What's Changed +**Full Changelog**: https://github.com/obervinov/pyinstabot-downloader/compare/v1.0.1...v2.0.0 by @obervinov in https://github.com/obervinov/pyinstabot-downloader/pull/7 + +In this release, the approach with issue and github project was implemented already at the very last stages of release preparation, so: +- issue contains a list of mixed issues +- these issue are duplicated in the readme sections +#### πŸ› Bug Fixes +* (Update dependencies: 2023.06.13)[https://github.com/obervinov/pyinstabot-downloader/issues/6] +* (Update the project code and fix bugs)[https://github.com/obervinov/pyinstabot-downloader/issues/13] +* (Redundant login and password reading from vault)[https://github.com/obervinov/pyinstabot-downloader/issues/16] +* (The status is "None" when an exception occurred when uploading to mega, and the retry method)[https://github.com/obervinov/pyinstabot-downloader/issues/15] +#### πŸ“š Documentation +* (Update project repository: 2023.06.13)[https://github.com/obervinov/pyinstabot-downloader/issues/8] +#### πŸ’₯ Breaking Changes +* (Update dependencies: 2023.06.13)[https://github.com/obervinov/pyinstabot-downloader/issues/6] +* (Update the project code and fix bugs)[https://github.com/obervinov/pyinstabot-downloader/issues/13] +#### πŸš€ Features +* (Update dependencies: 2023.06.13)[https://github.com/obervinov/pyinstabot-downloader/issues/6] +* (Added the support GitHub Actions)[https://github.com/obervinov/pyinstabot-downloader/issues/10] +* (Update project repository: 2023.06.13)[https://github.com/obervinov/pyinstabot-downloader/issues/8] +* (Update the project code and fix bugs)[https://github.com/obervinov/pyinstabot-downloader/issues/13] +* (Check the download history for the specified post)[https://github.com/obervinov/pyinstabot-downloader/issues/17] + + +## v1.0.1 - 2022-11-06 +### What's Changed +**Full Changelog**: https://github.com/obervinov/pyinstabot-downloader/compare/v1.0.0...v1.0.1 +#### πŸ“š Documentation +* updated the documentation in the file [README.md](https://github.com/obervinov/pyinstabot-downloader/blob/main/README.md) and changed license to `MIT` by @obervinov in https://github.com/obervinov/pyinstabot-downloader/pull/2 and https://github.com/obervinov/pyinstabot-downloader/pull/3 +#### πŸš€ Features +* added `flake8` and fixed warnings by @obervinov in https://github.com/obervinov/pyinstabot-downloader/pull/1 + + + +## v1.0.0 - 2022-11-05 +### What's Changed +**Full Changelog**: https://github.com/obervinov/pyinstabot-downloader/commits/v1.0.0 +#### πŸ’₯ Breaking Changes +* project release diff --git a/Dockerfile b/Dockerfile index cec6d6fb2..c82f05cd6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,36 +1,42 @@ -FROM python:3.9.15-alpine3.16 +FROM python:3.10.7-alpine3.16 ### External argumetns ### -ARG BOT_NAME +ARG PROJECT_NAME +ARG PROJECT_DESCRIPTION +ARG PROJECT_VERSION ### Labels ### -LABEL org.opencontainers.image.source https://github.com/obervinov/${BOT_NAME} - -### Envermoment variables ### -ENV PATH=/home/python_user/.local/bin:$PATH - -### Install packages ### -RUN apk add git --no-cache +LABEL org.opencontainers.image.source https://github.com/obervinov/${PROJECT_NAME} +LABEL org.opencontainers.image.description $PROJECT_DESCRIPTION +LABEL org.opencontainers.image.title "Telegram bot: pyinstabot-downloader" +LABEL org.opencontainers.image.version $PROJECT_VERSION +LABEL org.opencontainers.image.authors github.obervinov@proton.me +LABEL org.opencontainers.image.licenses https://github.com/obervinov/pyinstabot-downloader/blob/$PROJECT_VERSION/LICENSE +LABEL org.opencontainers.image.documentation https://github.com/obervinov/pyinstabot-downloader/blob/$PROJECT_VERSION/README.md +LABEL org.opencontainers.image.source https://github.com/obervinov/pyinstabot-downloader/blob/$PROJECT_VERSION + +### Environment variables ### +ENV PATH=/home/${PROJECT_NAME}/.local/bin:$PATH ### Preparing user and dirs ### -RUN adduser -D -h /home/python_user -s /bin/sh python_user && \ - mkdir -p /home/python_user && \ - mkdir -p /var/log/${BOT_NAME} && \ - mkdir -p /home/python_user/${BOT_NAME} && \ - chown python_user. /home/python_user -R && \ - chown python_user. /var/log/${BOT_NAME} +RUN adduser -D -h /home/${PROJECT_NAME} -s /bin/sh ${PROJECT_NAME} && \ + mkdir -p /home/${PROJECT_NAME} && \ + mkdir -p /home/${PROJECT_NAME}/app && \ + mkdir -p /home/${PROJECT_NAME}/tmp && \ + chown ${PROJECT_NAME}. /home/${PROJECT_NAME} -R + +### Prepare git +RUN apk add git ### Switching context ### -USER python_user -WORKDIR /home/python_user/${BOT_NAME} +USER ${PROJECT_NAME} +WORKDIR /home/${PROJECT_NAME}/app ### Copy source code ### COPY requirements.txt ./ -COPY bot.py ./ COPY src/ ./ ### Installing a python dependeces - requirements.txt ### -RUN python3 -m pip install --upgrade pip RUN pip3 install -r requirements.txt -CMD [ "python3", "bot.py" ] \ No newline at end of file +CMD [ "python3", "bot.py" ] diff --git a/Pipfile b/Pipfile new file mode 100644 index 000000000..6841bed4e --- /dev/null +++ b/Pipfile @@ -0,0 +1,2 @@ +[requires] +python_version = "3.10.7" diff --git a/README.md b/README.md index 888bf6f30..4defd9533 100644 --- a/README.md +++ b/README.md @@ -1,187 +1,202 @@ # Pyinstabot-downloader +[![Release](https://github.com/obervinov/pyinstabot-downloader/actions/workflows/release.yml/badge.svg)](https://github.com/obervinov/pyinstabot-downloader/actions/workflows/release.yml) +[![CodeQL](https://github.com/obervinov/pyinstabot-downloader/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/obervinov/pyinstabot-downloader/actions/workflows/github-code-scanning/codeql) +[![Test and Build image](https://github.com/obervinov/pyinstabot-downloader/actions/workflows/workflow.yml/badge.svg?branch=main&event=pull_request)](https://github.com/obervinov/pyinstabot-downloader/actions/workflows/workflow.yml) + ![GitHub release (latest SemVer)](https://img.shields.io/github/v/release/obervinov/pyinstabot-downloader?style=for-the-badge) ![GitHub last commit](https://img.shields.io/github/last-commit/obervinov/pyinstabot-downloader?style=for-the-badge) ![GitHub Release Date](https://img.shields.io/github/release-date/obervinov/pyinstabot-downloader?style=for-the-badge) ![GitHub issues](https://img.shields.io/github/issues/obervinov/pyinstabot-downloader?style=for-the-badge) ![GitHub repo size](https://img.shields.io/github/repo-size/obervinov/pyinstabot-downloader?style=for-the-badge) -![PyPI - Python Version](https://img.shields.io/pypi/pyversions/instaloader?style=for-the-badge) +[![Python version](https://img.shields.io/badge/python-3.10.7-blue.svg?style=for-the-badge)](https://www.python.org/downloads/release/python-3107/) +[![License](https://img.shields.io/badge/license-MIT-green.svg?style=for-the-badge)](https://opensource.org/licenses/MIT) -## About this project -This project is a telegram boat that allows you to upload content from your Instagram profile to the Dropbox cloud.

-Main functions: -- unloading all posts from the profile -- unloading of one post +## GitHub Actions +| Name | Version | +| ------------------------ | ----------- | +| GitHub Actions Templates | [v1.0.5](https://github.com/obervinov/_templates/tree/v1.0.5) | -The vault is used for: -- storage of sensitive configuration parameters -- storing the history of already uploaded posts -- storing user authorization events +## About this project +This project is a telegram bot that allows you to create backups of content from your Instagram profile to Dropbox or Mega clouds, as well as in the local file system. +Main functions: +- a backup copy of all posts from the profile +- a backup copy of a specific post by link +- the ability to backup to the mega or dropbox cloud + +Review:

- - instagram-profile +

+

+ bot-preview-post + bot-preview-account +

+ +## Project architecture +Code logic +![Diagram](doc/diagram-logic.png) -## Repository map +Code dependencies +![Diagram](doc/diagram-code.png) + +## Repository map ```sh . -β”œβ”€β”€ Dockerfile ### Manifest for building docker-image -β”œβ”€β”€ LICENSE ### License info -β”œβ”€β”€ README.md ### The file you're reading now -β”œβ”€β”€ CHANGELOG.md ### All notable changes to this project will be documented in this file -β”œβ”€β”€ bot.py ### Main file with code this project -β”œβ”€β”€ docker-compose.yml ### Manifest for building and running project with all dependencies -β”œβ”€β”€ requirements.txt ### List of python dependencies -└── doc ### Directory with content for documentation - β”œβ”€β”€ bot-preview.gif # Gif animation with a demonstration of the work of bots - └── instagram-profile.png # Png image with screenshot of instagram profile for demonstration -─── src ### Extended modules - β”œβ”€β”€ dropbox.py # A code file containing a class for processing and sending data to dropbox - β”œβ”€β”€ instagram.py # A code file containing a class for receiving and processing data from the instagram api - └── progressbar.py # A code file containing a class for calculating and rendering the progress bar - -2 directory, 12 files +β”œβ”€β”€ CHANGELOG.md +β”œβ”€β”€ Dockerfile +β”œβ”€β”€ LICENSE +β”œβ”€β”€ Pipfile +β”œβ”€β”€ README.md +β”œβ”€β”€ SECURITY.md +β”œβ”€β”€ doc +β”‚ β”œβ”€β”€ bot-preview-account.png +β”‚ β”œβ”€β”€ bot-preview-post.png +β”‚ β”œβ”€β”€ bot-preview.gif +β”‚ β”œβ”€β”€ diagram-code.png +β”‚ β”œβ”€β”€ diagram-logic.png +β”‚ └── pyinstabot-downloader.drawio +β”œβ”€β”€ docker-compose.dev.yml +β”œβ”€β”€ docker-compose.prerelease.yml +β”œβ”€β”€ docker-compose.release.yml +β”œβ”€β”€ requirements.txt +β”œβ”€β”€ src +β”‚ β”œβ”€β”€ bot.py +β”‚ β”œβ”€β”€ configs +β”‚ β”‚ └── messages.json +β”‚ β”œβ”€β”€ constants.py +β”‚ └── modules +β”‚ β”œβ”€β”€ __init__.py +β”‚ β”œβ”€β”€ downloader.py +β”‚ └── uploader.py +└── vault + └── policy.hcl + +6 directories, 23 files ``` - -## Requirements -- Vault server - [a storage of secrets for bot with kv v2 engine](https://developer.hashicorp.com/vault/docs/secrets/kv/kv-v2) -- Dropbox api token - [instructions for generating a token of api](https://dropbox.tech/developers/generate-an-access-token-for-your-own-account) -- Telegram bot api token - [instructions for creating bot and getting a token of api](https://learn.microsoft.com/en-us/azure/bot-service/bot-service-channel-connect-telegram?view=azure-bot-service-4.0) -- Instagram login/password - [login and password from the instagram account, it is advisable to create a new account](https://www.instagram.com/accounts/emailsignup/) +## Requirements +- Vault server - [a storage of secrets for bot with kv v2 engine](https://developer.hashicorp.com/vault/docs/secrets/kv/kv-v2) +- Dropbox [api token](https://dropbox.tech/developers/generate-an-access-token-for-your-own-account) or Mega.nz [account](https://mega.nz) +- Telegram bot api token - [instructions for creating bot and getting a token of api](https://learn.microsoft.com/en-us/azure/bot-service/bot-service-channel-connect-telegram?view=azure-bot-service-4.0) +- Instagram username/password - [login and password from the instagram account, it is advisable to create a new account](https://www.instagram.com/accounts/emailsignup/) -## Environment variables - -| Variable | Description | Default | +## Environment variables +| Variable | Description | Default value | | ------------- | ------------- | ------------- | -| `BOT_VAULT_APPROLE_ID` | [Approve-id created during vault setup](https://developer.hashicorp.com/vault/docs/auth/approle) | `not set` | -| `BOT_VAULT_APPROLE_SECRET_ID` | [Approve-secret-id created during vault setup](https://developer.hashicorp.com/vault/docs/auth/approle) | `not set` | -| `BOT_VAULT_ADDR` | The address at which the vault server will be available to the bot | `http://vault-server:8200` | -| `BOT_INSTA_RATE_LIMIT_TIMEOUT` | Minimum pause between post uploads. A pause is necessary so as not to load graphql instagram with frequent queries. After each post, the value increases until it reaches BOT_INSTA_RATE_LIMIT_MAX_TIMEOUT, the value is indicated in seconds | `15` | -| `BOT_INSTA_RATE_LIMIT_MAX_TIMEOUT` | Maximum pause between post uploads. After reaching this limit, the pause counter is reset to the minimum - BOT_INSTA_RATE_LIMIT_TIMEOUT | `360` | -| `BOT_NAME` | The name of the bot | `pyinstabot-downloader` | -| `BOT_VAULT_MOUNT_PATH` | The point of mounting secrets in the vault | `secretv2` | -| `BOT_INSTAGRAM_SESSION_FILE` | The path for storing the file with the instagram session | `instaloader/.instaloader.session` | - -## How to run with docker-compose -1. Building and launching docker container with vault-server -```sh -docker-compose up -d vault-server +| `LOGGER_LEVEL` | [The logging level of the logging module](https://docs.python.org/3/library/logging.html#logging-levels) | `INFO` | +| `BOT_NAME` | The name of the bot, used to determine the unique mount point in the vault | `pyinstabot-downloader` | +| `MESSAGES_CONFIG` | The path to the message template file | `src/configs/messages.json` | +| `STORAGE_TYPE` | Type of target storage for saving uploaded content from instagram (`dropbox`, `mega` or `local`) | `mega` | +| `STORAGE_EXCLUDE_TYPE`| Types of files that you want to exclude from uploading to the cloud | `.txt` | +| `TEMPORARY_DIR` | Temporary directory for saving uploaded content from instagram | `tmp/` | +| `INSTAGRAM_SESSION` | The path for storing the file with the instagram session | `.session` | +| `INSTAGRAM_USERAGENT` | [User Agent to use for HTTP requests. Per default, Instaloader pretends being Chrome/92 on Linux](https://instaloader.github.io/cli-options.html#cmdoption-user-agent) | `None` | +| `VAULT_ADDR` | The address at which the vault server will be available to the bot | `None` | +| `VAULT_APPROLE_ID` | [Approle id created during vault setup](https://developer.hashicorp.com/vault/docs/auth/approle) | `None` | +| `VAULT_APPROLE_SECRETID` | [Approle secret id created during vault setup](https://developer.hashicorp.com/vault/docs/auth/approle) | `None` | + + +## Prepare +### Target storage of the content +#### If dropbox is going to be used as the target storage, you need to: +- [Create a dropbox account](https://www.dropbox.com/register) +- Generate an application token according to the instructions [here](https://dropbox.tech/developers/generate-an-access-token-for-your-own-account) and [here](https://developers.dropbox.com/ru-ru/oauth-guide) +- [More documentation](https://www.dropbox.com/developers/documentation/python#overview) + +#### If mega is going to be used as the target storage, you need to: +- [Create a mega account](https://mega.nz/register) +- Don't turn on 2fa (because the module mega.py can't work with 2fa https://github.com/odwyersoftware/mega.py/issues/19) + +#### If the local file system will be used as the target storage: +- Set to environment variable `TEMPORARY_DIR` the desired local path for saving content (ex. `/opt/backup/instagram`) + +Such a strange variable name comes from the logic of the bot. The `TEMPORARY_DIR` variable is used as an intermediate buffer between the stage of downloading content from Instagram and then uploading it to the target storage. + +If the target storage is dropbox or mega, then files from the temporary directory are simply deleted after successful upload to the cloud. + +If the target storage is a local file system, then any further steps to process the files will be unnecessary. The process just immediately uploads the content from Instagram to the target directory (temporary directory), after which nothing happens to the files. + +### Storing project configuration and project history + All persistent project data is stored in **Vault**: +- stores project configuration parameters +- keeps the history of already uploaded posts from instagram +- stores information about user authorization events +- stores attributes and user rights + +#### You can use an existing vault-server or launch a new one using docker-compose: +- instructions for starting and configuring a new vault-server +```bash +docker-compose -f docker-compose.dev.yml up vault-server -d +pip3 install -r requirements.txt +curl -L https://gist.githubusercontent.com/obervinov/9bd452fee681f0493da7fd0b2bfe1495/raw/bbc4aad0ed7be064e9876dde64ad8b26b185091b/setup_vault_server.py | python3 --url=http://localhost:8200 --name=pyinstabot-downloader --policy=vault/policy.hcl ``` -2. Configuration vault-server -```sh -# Go to the interactive shell of the vault container -docker exec -ti vault-server sh +- instructions for configuring an existing vault server +```bash +pip3 install -r requirements.txt +curl -L https://gist.githubusercontent.com/obervinov/9bd452fee681f0493da7fd0b2bfe1495/raw/bbc4aad0ed7be064e9876dde64ad8b26b185091b/setup_vault_server.py | python3 --url=http://localhost:8200 --name=pyinstabot-downloader --policy=vault/policy.hcl --token=hvs.123456qwerty +``` -# Init vault server -vault operator init +`setup_vault_server.py` - This script performs a quick and convenient configuration of the vault-server for this bot project: `initial` initialization of vault-server, `unseal` vault-server, creating an isolated `mount point`, loading `policy.hcl`, creating an `approle`. -# Login in vault-server with root token -# ${VAULT_ROOT_TOKEN} - Root token for vault login. Substitute your own value instead of a variable. The root token was received in the output at the previous step -vault login ${VAULT_ROOT_TOKEN} -address=http://0.0.0.0:8200 - -# Enabling secret engine - kv version 2 -vault secrets enable -version=2 -path=secretv2 kv - -# Enabling auth with approle method -vault auth enable approle - -### ${BOT_NAME} - your bot's name. Substitute your own value instead of a variable. For example: "pyinstabot-downloader" - -# Write policy rules to file in container -tee ${BOT_NAME}-policy.htl < How to run with docker-compose ```sh -docker logs -f ${BOT_NAME} +export VAULT_APPROLE_ID={change_me} +export VAULT_APPROLE_SECRETID={change_me} +export VAULT_ADDR={change_me} + +docker-compose -f docker-compose.dev.yml up -d +# or +docker-compose -f docker-compose.release.yml up -d ``` -## How to run a bot locally without a docker + +## How to run a bot locally without a docker **You need an already running and configured vault to use the approle and kv v2 engine** -1. Installing python requirements ```sh -python3 -m pip install --upgrade pip pip3 install -r requirements.txt -``` -2. Uploading the bot configuration containing sensitive data to the vault -```sh -# ${TELEGRAM_API_TOKEN} - your bot's api token -# ${INSTAGRAM_USER} - username for authorization in the instagram -# ${INSTAGRAM_PASSWORD} - password for authorization in the instagram -# ${YOUR_TELEGRAM_ID} - telegram id of your account for authorization of messages sent by the bot (whitelist) -# ${DROPBOX_API_TOKEN} - token for access to the dropbox api -vault kv put secretv2/${BOT_NAME}-config/config b_token="${TELEGRAM_API_TOKEN}" i_user="${INSTAGRAM_USER}" i_pass="${INSTAGRAM_PASSWORD}" whitelist="${YOUR_TELEGRAM_ID}" d_token="${DROPBOX_API_TOKEN} -``` -3. Setting environment variables in the host OS (the required values must be obtained at the vault configuration step) -```sh -expot BOT_VAULT_APPROLE_ID="change_me" -expot BOT_VAULT_APPROLE_SECRET_ID="change_me" -``` -4. Running bot -```sh -python3 bot.py -``` -## How to build a docker image with a bot -```sh -export BOT_VERSION=v1.0.0 -export BOT_NAME="pyinstabot-downloader" -docker build -t ghcr.io/${GITHUB_USERNAME}/${BOT_NAME}:${BOT_VERSION} . --build-arg BOT_NAME=${BOT_NAME} -docker push ghcr.io/${GITHUB_USERNAME}/${BOT_NAME}:${BOT_VERSION} +export VAULT_APPROLE_ID={change_me} +export VAULT_APPROLE_SECRETID={change_me} +export VAULT_ADDR={change_me} +export BOT_NAME=pyinstabot-downloader +export LOGGER_LEVEL=INFO +export STORAGE_TYPE=mega +export INSTAGRAM_SESSION=/home/python/.config/instaloader/.session +export STORAGE_EXCLUDE_TYPE=".txt" + +python3 src/bot.py ``` diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000..9b4eb0476 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,14 @@ +# Security Policy + +## Supported Versions + +Versions supported to fix vulnerabilities + +| Version | Supported | +| ------- | ------------------ | +| 2.0.x | :white_check_mark: | +| 1.0.x | :x: | + +## Reporting a Vulnerability + +In order to inform me about the vulnerability, write the details to the mail `github.obervinov@proton.me` \ No newline at end of file diff --git a/bot.py b/bot.py deleted file mode 100644 index 60aa382e0..000000000 --- a/bot.py +++ /dev/null @@ -1,125 +0,0 @@ -# THIS MAIN FUNCTION FROM TELEGRAM BOT AND ENTRYPOINT FROM DOCKER # - -# Importing modules # -import os -from logger import log, logging -from vault import VaultClient -from users import UsersAuth -from telegram import TelegramBot -from src.instagram import InstagramDownloader -from src.dropbox import DropboxDownloader - -# Environment variables # -bot_name = os.environ.get('BOT_NAME', 'pyinstabot-downloader') -vault_mount_point = os.environ.get('BOT_VAULT_MOUNT_PATH', 'secretv2') -vault_addr = os.environ.get('BOT_VAULT_ADDR', 'http://vault-server:8200') -vault_approle_id = os.environ.get('BOT_VAULT_APPROLE_ID', 'not set') -vault_approle_secret_id = os.environ.get('BOT_VAULT_APPROLE_SECRET_ID', 'not set') -instagram_session_file = os.environ.get('BOT_INSTAGRAM_SESSION_FILE', 'instaloader/.session') -ratelimit_timeout = int(os.environ.get('BOT_INSTA_RATE_LIMIT_TIMEOUT', 15)) -ratelimit_max_timeout = int(os.environ.get('BOT_INSTA_RATE_LIMIT_MAX_TIMEOUT', 360)) - -# Vault class # -Vault = VaultClient(vault_addr, vault_approle_id, vault_approle_secret_id, vault_mount_point) -# Secret data -instagram_user = Vault.vault_read_secrets(f"{bot_name}-config/config", "i_user") -instagram_pass = Vault.vault_read_secrets(f"{bot_name}-config/config", "i_pass") -dropbox_token = Vault.vault_read_secrets(f"{bot_name}-config/config", "d_token") - -# Telegram class # -Telegram = TelegramBot(bot_name, Vault) -telegram_bot = Telegram.telegram_bot - -# UsersAuth class # -Users_auth = UsersAuth(Vault, bot_name) - -# Dropbox class # -Dropbox = DropboxDownloader(dropbox_token, 8, 4, 120) - -# InstagramDownloader class # -Instagram = InstagramDownloader( - Vault, - instagram_user, - instagram_pass, - instagram_session_file, - bot_name, Dropbox, - telegram_bot - ) - -# Logger initialization # -logging.getLogger('bot.bot').setLevel(logging.INFO) - - -# DEBUG: Printing environment variables and classes# -log.debug(globals()) - - -# Decorators # -# Start command -@telegram_bot.message_handler(commands=['start']) -def start_message(message): - access_status = access_status = Users_auth.check_permission(message.chat.id) - - if access_status == "success": - log.info(f"sending startup message in chat {message.chat.id}") - answer = ( - f"Hi, {message.chat.username}! \u270B\n" - f"Access for your account - allowed \U0001F513\n" - f"\U0001F4F1Bot functions:\n" - f" \U0001F4CC Upload post content by instagram link to dropbox cloud\n" - f" \U0001F4CC Uploading all posts content by instagram profile-link to dropbox cloud\n" - f"Just send link \u270C" - ) - telegram_bot.send_message(message.chat.id, answer) - else: - log.error(f"403: Forbidden for username: {message.from_user.username}") - - -# Get all posts in instagram account regex -@telegram_bot.message_handler(regexp="^https://(www\.)?instagram.com/(?!p/)(?!reel/).*$") -def profile_get_all_posts(message): - access_status = access_status = Users_auth.check_permission(message.chat.id) - - if access_status == "success": - profile_username = str(message.text).split("/")[3].split("?")[0] - log.info("Decorator.profile_get_all_posts() --> call Instagram.download_all_posts()") - Instagram.download_all_posts( - profile_username, - ratelimit_timeout, - ratelimit_max_timeout, - message.chat.id - ) - else: - log.error(f"403: Forbidden for username: {message.from_user.username}") - - -# Download post per instagram-link by regex input text -@telegram_bot.message_handler(regexp="^https://www.instagram.com/(p|reel)/.*") -def profile_get_link_post(message): - access_status = access_status = Users_auth.check_permission(message.chat.id) - - if access_status == "success": - # Get shortcode value - shortcode = str(message.text).split("/")[4] - log.info("Decorator.profile_get_link_post() --> call Instagram.download_post()") - response = Instagram.download_post(shortcode) - telegram_bot.send_message(message.chat.id, response) - - else: - log.error(f"403: Forbidden for username: {message.from_user.username}") - - -# Starting bot # -def main(): - while True: - try: - log.info(f"Starting telegram bot: {bot_name}") - log.info(f"Home path: {os.getcwd()}") - log.info(f"Vault: {vault_addr}") - telegram_bot.polling() - except Exception as ex: - log.error(f"Strating telegram bot exception: {ex}") - - -if __name__ == "__main__": - main() diff --git a/doc/bot-preview-account.png b/doc/bot-preview-account.png new file mode 100644 index 000000000..0822b487f Binary files /dev/null and b/doc/bot-preview-account.png differ diff --git a/doc/bot-preview-post.png b/doc/bot-preview-post.png new file mode 100644 index 000000000..9a764cb64 Binary files /dev/null and b/doc/bot-preview-post.png differ diff --git a/doc/diagram-code.png b/doc/diagram-code.png new file mode 100644 index 000000000..15a2363b1 Binary files /dev/null and b/doc/diagram-code.png differ diff --git a/doc/diagram-logic.png b/doc/diagram-logic.png new file mode 100644 index 000000000..f5e27ed7f Binary files /dev/null and b/doc/diagram-logic.png differ diff --git a/doc/instagram-profile.png b/doc/instagram-profile.png deleted file mode 100644 index c02907d68..000000000 Binary files a/doc/instagram-profile.png and /dev/null differ diff --git a/doc/pyinstabot-downloader.drawio b/doc/pyinstabot-downloader.drawio new file mode 100644 index 000000000..ce195ccc9 --- /dev/null +++ b/doc/pyinstabot-downloader.drawio @@ -0,0 +1,378 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml new file mode 100644 index 000000000..07379c44c --- /dev/null +++ b/docker-compose.dev.yml @@ -0,0 +1,45 @@ +# manifest for the development process with the local version of the vault +--- +version: '3' +services: + vault-server: + image: vault + container_name: vault + command: ["server"] + environment: + # yamllint disable-line rule:line-length + - 'VAULT_LOCAL_CONFIG={"backend": {"file": {"path": "/vault/data"}}, "default_lease_ttl": "1h", "max_lease_ttl": "720h", "listener": {"tcp": {"address": "0.0.0.0:8200", "tls_disable": "1"}}' + - VAULT_API_ADDR=http://0.0.0.0:8200 + - VAULT_PORT=8200 + volumes: + - ./vault/data:/vault/data + ports: + - "0.0.0.0:8200:8200" + cap_add: + - IPC_LOCK + + pyinstabot-downloader: + build: + context: . + dockerfile: Dockerfile + args: + PROJECT_NAME: pyinstabot-downloader + # yamllint disable-line rule:line-length + PROJECT_DESCRIPTION: "This project is a telegram bot that allows you to backup content from your Instagram profile to the Dropbox/Mega cloud or to the local filesystem." + PROJECT_VERSION: v2.0.0 + container_name: pyinstabot-downloader + restart: always + environment: + - BOT_NAME=pyinstabot-downloader + - VAULT_APPROLE_ID=${PB_VAULT_APPROLE_ID} + - VAULT_APPROLE_SECRETID=${PB_VAULT_APPROLE_SECRETID} + - VAULT_ADDR=http://vault-server:8200 + - LOGGER_LEVEL=DEBUG + - STORAGE_TYPE=local + - INSTAGRAM_SESSION=${INSTAGRAM_SESSION} + - STORAGE_EXCLUDE_TYPE= + - MESSAGES_CONFIG=configs/messages.json + depends_on: + - vault-server + volumes: + - ../.config/instaloader:/home/python/.config/instaloader diff --git a/docker-compose.prerelease.yml b/docker-compose.prerelease.yml new file mode 100644 index 000000000..a877ca66a --- /dev/null +++ b/docker-compose.prerelease.yml @@ -0,0 +1,21 @@ +# manifest for launching and testing the pre-release version +--- +version: '3' +services: + pyinstabot-downloader: + image: ghcr.io/obervinov/pyinstabot-downloader:release-v2.0.0 + container_name: pyinstabot-downloader + restart: always + pull_policy: always + environment: + - BOT_NAME=pyinstabot-downloader + - VAULT_APPROLE_ID=${PB_VAULT_APPROLE_ID} + - VAULT_APPROLE_SECRETID=${PB_VAULT_APPROLE_SECRETID} + - VAULT_ADDR=${VAULT_ADDR} + - LOGGER_LEVEL=INFO + - STORAGE_TYPE=mega + - INSTAGRAM_SESSION=${INSTAGRAM_SESSION} + - STORAGE_EXCLUDE_TYPE=.txt + - MESSAGES_CONFIG=configs/messages.json + volumes: + - ../.config/instaloader:/home/python/.config/instaloader diff --git a/docker-compose.release.yml b/docker-compose.release.yml new file mode 100644 index 000000000..9fb2c0860 --- /dev/null +++ b/docker-compose.release.yml @@ -0,0 +1,20 @@ +# the main manifest for launching the release version of the project +--- +version: '3' +services: + pyinstabot-downloader: + image: ghcr.io/obervinov/pyinstabot-downloader:v2.0.0 + container_name: pyinstabot-downloader + restart: always + environment: + - BOT_NAME=pyinstabot-downloader + - VAULT_APPROLE_ID=${PB_VAULT_APPROLE_ID} + - VAULT_APPROLE_SECRETID=${PB_VAULT_APPROLE_SECRETID} + - VAULT_ADDR=${VAULT_ADDR} + - LOGGER_LEVEL=INFO + - STORAGE_TYPE=mega + - INSTAGRAM_SESSION=${INSTAGRAM_SESSION} + - STORAGE_EXCLUDE_TYPE=.txt + - MESSAGES_CONFIG=configs/messages.json + volumes: + - ../.config/instaloader:/home/python/.config/instaloader diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index 65554e531..000000000 --- a/docker-compose.yml +++ /dev/null @@ -1,31 +0,0 @@ -version: '3' -services: - vault-server: - image: vault - container_name: vault - command: ["server"] - environment: - - 'VAULT_LOCAL_CONFIG={"backend": {"file": {"path": "/vault/data"}}, "default_lease_ttl": "168h", "max_lease_ttl": "720h", "listener": {"tcp": {"address": "0.0.0.0:8200", "tls_disable": "1"}}' - - VAULT_API_ADDR=http://0.0.0.0:8200 - - VAULT_PORT=8200 - volumes: - - ./vault/data:/vault/data - cap_add: - - IPC_LOCK - - pyinstabot-downloader: - build: - context: . - dockerfile: Dockerfile - args: - BOT_NAME: pyinstabot-downloader - container_name: pyinstabot-downloader - restart: always - environment: - - BOT_VAULT_APPROLE_ID=${VAULT_APPROLE_ID} - - BOT_VAULT_APPROLE_SECRET_ID=${VAULT_APPROLE_SECRET_ID} - - BOT_VAULT_ADDR=http://vault-server:8200 - - BOT_INSTA_RATE_LIMIT_TIMEOUT=15 - - BOT_INSTA_RATE_LIMIT_MAX_TIMEOUT=360 - depends_on: - - vault-server diff --git a/requirements.txt b/requirements.txt index 80bbc6689..72d8e92c6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,9 @@ -instaloader==4.9.2 -pyTelegramBotAPI==4.1.1 -hvac==0.11.2 +instaloader==4.10.0 dropbox==11.21.0 -git+https://github.com/obervinov/logger-package.git#egg=logger -git+https://github.com/obervinov/users-package.git#egg=users -git+https://github.com/obervinov/vault-package.git#egg=vault -git+https://github.com/obervinov/telegram-package.git#egg=telegram \ No newline at end of file +mega.py==1.0.8 +tenacity==5.1.5 +messages @ git+https://github.com/obervinov/messages-package.git@v1.0.0 +users @ git+https://github.com/obervinov/users-package.git@v1.0.5 +logger @ git+https://github.com/obervinov/logger-package.git@v1.0.1 +telegram @ git+https://github.com/obervinov/telegram-package.git@v1.1.3 +vault @ git+https://github.com/obervinov/vault-package.git@v2.0.1 \ No newline at end of file diff --git a/src/bot.py b/src/bot.py new file mode 100644 index 000000000..cdf9763ec --- /dev/null +++ b/src/bot.py @@ -0,0 +1,225 @@ +""" +This module contains the main code for the bot +to work and contains the main logic linking the additional modules. +""" +from logger import log +import constants + + +# decorators +@constants.BOT.message_handler(commands=['start']) +def start_message( + message: constants.TELEGRAM_CLIENT.telegram_types.Message = None +) -> None: + """ + The function for intercepting the start command sent to the bot. + + Args: + :param message (telegram_types.Message): the message received by the bot. + + Returns: + None + """ + if constants.AUTH_CLIENT.check_permissions( + message.chat.id + ) == "allow": + log.info( + '[%s] sending startup message in chat %s', + __name__, + message.chat.id + ) + + constants.BOT.send_message( + chat_id=message.chat.id, + text=constants.MESSAGES_GENERATOR.render_template( + template_alias='hello_message', + username=message.from_user.username, + userid=message.chat.id + ) + ) + + +@constants.BOT.message_handler(regexp=r"^https://(www\.)?instagram.com/(?!p/)(?!reel/).*$") +def get_posts_account( + message: constants.TELEGRAM_CLIENT.telegram_types.Message = None +) -> None: + """ + A function for intercepting links sent to the bot to the Instagram profile. + + Args: + :param message (telegram_types.Message): the message received by the bot. + + Returns: + None + """ + if constants.AUTH_CLIENT.check_permissions( + message.chat.id + ) == "allow": + log.info( + '[%s] starting handler for profile url %s...', + __name__, + message.text + ) + + editable_message = None + stats_message_id = None + account_name = message.text.split("/")[3].split("?")[0] + account_info = constants.DOWNLOADER_INSTANCE.get_download_info( + account=account_name + ) + + constants.BOT.send_message( + message.chat.id, + constants.MESSAGES_GENERATOR.render_template( + template_alias='account_info', + account_name=account_name, + shortcodes_count=account_info['shortcodes_total_count'] + ) + ) + + for shortcode in account_info['shortcodes_for_download']: + # download the contents of an instagram post to a temporary folder + d_response = constants.DOWNLOADER_INSTANCE.get_post_content( + shortcode=shortcode + ) + # upload the received content to the destination storage + _ = constants.UPLOADER_INSTANCE.start_upload( + sub_dir_name=d_response['owner'] + ) + # render progressbar + progressbar = constants.MESSAGES_GENERATOR.render_progressbar( + total_count=account_info['shortcodes_total_count'], + current_count=account_info['shortcodes_exist_count'] + ) + account_info['shortcodes_exist_count'] = account_info['shortcodes_exist_count'] + 1 + stats_response = constants.MESSAGES_GENERATOR.render_template( + template_alias='account_stats_progress', + account_name=account_name, + posts_downloaded=account_info['shortcodes_exist_count'], + posts_count=account_info['shortcodes_total_count'], + progressbar=progressbar + ) + # check whether a message with stats has already been sent and whether we can edit it + if not editable_message: + stats_message_id = constants.BOT.send_message( + chat_id=message.chat.id, + text=stats_response + ).id + editable_message = True + elif editable_message: + constants.BOT.edit_message_text( + text=stats_response, + chat_id=message.chat.id, + message_id=stats_message_id + ) + + # when all messages are uploaded send a response with statistics + constants.BOT.edit_message_text( + text=constants.MESSAGES_GENERATOR.render_template( + template_alias='account_stats_done', + posts_downloaded=account_info['shortcodes_exist_count'], + posts_count=account_info['shortcodes_total_count'], + account_name=account_name, + progressbar=constants.MESSAGES_GENERATOR.render_progressbar( + total_count=account_info['shortcodes_total_count'], + current_count=account_info['shortcodes_exist_count'] + ) + ), + chat_id=message.chat.id, + message_id=stats_message_id + ) + log.info( + '[%s] all available posts from account %s has been downloaded', + __name__, + account_name + ) + + +@constants.BOT.message_handler(regexp="^https://www.instagram.com/(p|reel)/.*") +def get_post_account( + message: constants.TELEGRAM_CLIENT.telegram_types.Message = None +) -> None: + """ + A function for intercepting links sent by a bot to an Instagram post. + + Args: + :param message (telegram_types.Message): the message received by the bot. + + Returns: + None + """ + if constants.AUTH_CLIENT.check_permissions( + message.chat.id + ) == "allow": + log.info( + '[%s] starting handler for post url %s...', + __name__, + message.text + ) + + shortcode = message.text.split("/")[4] + + # download history + # we check the whole history instead of getting the owner by a short code to reduce + # the frequency of requests to the instagram api + for owner in constants.VAULT_CLIENT.list_secrets(path='history/'): + for post in constants.VAULT_CLIENT.read_secret(path=f"history/{owner}"): + if post == shortcode and constants.VAULT_CLIENT.read_secret( + path=f"history/{owner}", + key=post + ) == 'downloaded': + constants.BOT.send_message( + chat_id=message.chat.id, + text=constants.MESSAGES_GENERATOR.render_template( + template_alias='post_already_downloaded', + post_id=shortcode, + owner=owner + ) + ) + log.warning( + '[%s] the post %s of the owner %s has already been downloaded, skipped.', + __name__, + post, + owner + ) + return + # download the contents of an instagram post to a temporary folder + d_response = constants.DOWNLOADER_INSTANCE.get_post_content( + shortcode=shortcode + ) + # upload the received content to the destination storage + u_response = constants.UPLOADER_INSTANCE.start_upload( + sub_dir_name=d_response['owner'] + ) + constants.BOT.send_message( + chat_id=message.chat.id, + text=constants.MESSAGES_GENERATOR.render_template( + 'post_stats_info', + post_id=shortcode, + download_response=d_response, + upload_response=u_response + ) + ) + + +# entrypoint +def main(): + """ + The main entry point of the project. + + Args: + None + + Returns: + None + """ + while True: + log.info( + 'Starting bot %s', + constants.BOT_NAME + ) + constants.BOT.polling() + + +if __name__ == "__main__": + main() diff --git a/src/configs/messages.json b/src/configs/messages.json new file mode 100644 index 000000000..359eb5aca --- /dev/null +++ b/src/configs/messages.json @@ -0,0 +1 @@ +{"templates":{"hello_message": {"text": "Hi, {0}({1})! {2}\nAccess for your account - allowed {3}\nBot functions:\n {4} Upload post content by instagram link to local fs, dropbox or mega cloud\n {4} Uploading all posts content by instagram profile-link to local fs, dropbox or mega cloud\nJust send link{5}", "args": ["username", "userid", ":raised_hand:", ":unlocked:", ":pushpin:", ":raising_hands:"]}, "account_info": {"text": "{0} Account information {1} read successfully.\n {2} Posts count: {3}\n", "args": [":glowing_star:", "account_name", ":night_with_stars:", "shortcodes_count"]}, "account_stats_progress": {"text": "{0} Account statistics {1}\n{2} posts downloaded: {3} of {4}\n{5}", "args": [":framed_picture:", "account_name", ":down_arrow:", "posts_downloaded", "posts_count", "progressbar"]}, "account_stats_done": {"text": "{0} {1} of {2} posts from account {3} has been downloaded\n{4}", "args": [":hot_beverage:", "posts_downloaded", "posts_count", "account_name", "progressbar"]}, "post_stats_info": {"text": "{0} The content of the post {1} has been processed\n\n{2} {3}\n\n{4} {5}", "args": [":framed_picture:", "post_id", ":down_arrow:", "download_response", ":up_arrow:", "upload_response"]}, "post_already_downloaded": {"text": "{0} The content of the post {1} of the owner {2} has already been downloaded.\n{3}The download was skipped.", "args": [":construction:", "post_id", "owner", ":boxing_glove:"]}}} diff --git a/src/constants.py b/src/constants.py new file mode 100644 index 000000000..623529a4c --- /dev/null +++ b/src/constants.py @@ -0,0 +1,93 @@ +""" +This module contains the constants for this python project. +""" +import os +from telegram import TelegramBot +from users import UsersAuth +from messages import Messages +from vault import VaultClient +from modules.downloader import Downloader +from modules.uploader import Uploader + + +# environment variables +LOGGER_LEVEL = os.environ.get( + 'LOGGER_LEVEL', + 'INFO' +) + +BOT_NAME = os.environ.get( + 'BOT_NAME', + 'pyinstabot-downloader' +) + +MESSAGES_CONFIG = os.environ.get( + 'MESSAGES_CONFIG', + 'src/configs/messages.json' +) + +STORAGE_TYPE = os.environ.get( + 'STORAGE_TYPE', + 'mega' +) + +TEMPORARY_DIR = os.environ.get( + 'TEMPORARY_DIR', + 'tmp/' +) + +INSTAGRAM_SESSION = os.environ.get( + 'INSTAGRAM_SESSION', + '.session' +) + +INSTAGRAM_USERAGENT = os.environ.get( + 'INSTAGRAM_USERAGENT', + None +) + +STORAGE_EXCLUDE_TYPE = os.environ.get( + 'STORAGE_EXCLUDE_TYPE', + '.txt' +) + + +# instances +VAULT_CLIENT = VaultClient( + name=BOT_NAME +) + +TELEGRAM_CLIENT = TelegramBot( + vault=VAULT_CLIENT +) + +BOT = TELEGRAM_CLIENT.telegram_bot + +AUTH_CLIENT = UsersAuth( + vault=VAULT_CLIENT +) + +MESSAGES_GENERATOR = Messages( + config_path=MESSAGES_CONFIG +) + +DOWNLOADER_INSTANCE = Downloader( + auth={ + 'sessionfile': INSTAGRAM_SESSION + }, + settings={ + 'savepath': TEMPORARY_DIR, + 'useragent': INSTAGRAM_USERAGENT + }, + vault=VAULT_CLIENT +) + +UPLOADER_INSTANCE = Uploader( + storage={ + 'type': STORAGE_TYPE, + 'temporary': TEMPORARY_DIR, + 'cloud_root_path': BOT_NAME, + 'exclude_type': STORAGE_EXCLUDE_TYPE + }, + vault=VAULT_CLIENT +) diff --git a/src/dropbox.py b/src/dropbox.py deleted file mode 100644 index e164d2c73..000000000 --- a/src/dropbox.py +++ /dev/null @@ -1,87 +0,0 @@ -# THIS FILE FROM DROPBOX API COMMAND # - -# Importing modules # -import os -import dropbox -from logger import log - - -class DropboxDownloader: - - def __init__(self, - dropbox_token: str = None, - max_connections: int = 3, - max_retries_on_error: int = 3, - timeout: int = 60 - ) -> None: - - self.homepath = os.getcwd() - - try: - dropbox_session = dropbox.create_session(max_connections=max_connections, proxies=None) - self.dropbox_object = dropbox.Dropbox( - dropbox_token, - max_retries_on_error=max_retries_on_error, - max_retries_on_rate_limit=None, - user_agent=None, - session=dropbox_session, - headers=None, - timeout=timeout - ) - except Exception as ex: - log.error(f"[class.{__class__.__name__}] creating dropbox session faild: {ex}") - - - def upload_file(self, - upload_dir_name: str = None, - dropbox_dir_name: str = None - ) -> None: - - files = [] - local_path_profile_content = f"{self.homepath}/{upload_dir_name}" - log.info(f"[class.{__class__.__name__}] starting upload files from {upload_dir_name}") - - for r, d, f in os.walk(local_path_profile_content): - for file in f: - files.append(os.path.join(r, file)) - - for f in files: - filename = f.split("/") - len_filename = len(filename) - dropbox_filename = "/" + dropbox_dir_name + "/" + filename[len_filename-1] - - if ".txt" in filename[len_filename-1]: - os.remove(f) - - else: - with open(f, 'rb') as file_transfer: - try: - upload_file = self.dropbox_object.files_upload( - file_transfer.read(), - dropbox_filename, - autorename=True - ) - log.info( - f"[class.{__class__.__name__}]" - f"file {upload_file.name} has been uploaded" - ) - # extract metainfo of object - id = upload_file.id - size = upload_file.size - - except Exception as ex: - log.error( - f"[class.{__class__.__name__}]" - f"uploading picture to dropbox api faild: {ex}" - ) - break - - file_transfer.close() - os.remove(f) - status = "success" - response = f"{id} successful transfering {size} bytes" - - if len(os.listdir(local_path_profile_content)) == 0: - os.rmdir(local_path_profile_content) - - return status, response diff --git a/src/instagram.py b/src/instagram.py deleted file mode 100644 index 0dd6ee561..000000000 --- a/src/instagram.py +++ /dev/null @@ -1,221 +0,0 @@ -# THIS FILE FROM INSTGRAM API # -# Importing modules # -# https://instaloader.github.io/module/instaloader.html -import instaloader -import time -import datetime -from logger import log -from src.progressbar import ProgressBar -from ast import Str - - -class InstagramDownloader: - - def __init__(self, - Vault=None, - user: Str = None, - password: Str = None, - sessionfile: Str = None, - bot_name: Str = None, - Dropbox=None, - Telebot=None - ) -> None: - - self.Vault = Vault - self.Dropbox = Dropbox - self.Telebot = Telebot - self.sessionfile = sessionfile - self.bot_name = bot_name - - instaloaderObject = instaloader.Instaloader( - sleep=True, - quiet=True, - user_agent=None, - dirname_pattern="temp-data/{profile}_{shortcode}", - filename_pattern="{profile}_{shortcode}_{filename}", - download_pictures=True, - download_videos=True, - download_video_thumbnails=True, - download_geotags=False, - download_comments=False, - save_metadata=False, - compress_json=True, - post_metadata_txt_pattern=None, - storyitem_metadata_txt_pattern=None, - max_connection_attempts=3, - request_timeout=300.0, - rate_controller=None, - resume_prefix='iterator', - check_resume_bbd=True, - slide=None) - - try: - instaloaderObject.load_session_from_file(user, sessionfile) - self.instaloaderObject = instaloaderObject - log.info(f"[class.{__class__.__name__}] load session file successful") - - except Exception as ex: - log.warning(f"[class.{__class__.__name__}] load session file error: {ex}") - - try: - instaloaderObject.login(user, password) - self.instaloaderObject = instaloaderObject - log.info("[class.{__class__.__name__}] login with username/password successful") - - log.info(f"[class.{__class__.__name__}] saving new session in file: {sessionfile}") - instaloaderObject.save_session_to_file(sessionfile) - - except Exception as ex: - log.error(f"[class.{__class__.__name__}] faild login with username/password: {ex}") - - - def get_posts_list(self, username: str = None): - - try: - profile = instaloader.Profile.from_username(self.instaloaderObject.context, username) - log.info(f"[class.{__class__.__name__}] reading profile {username} was successful") - - posts = profile.get_posts() - self.posts_count = posts.count - log.info( - f"[class.{__class__.__name__}] " - f"reading list of posts from account {username} " - f"was successful" - ) - - shortcodes_list = list() - shortcodes_list_exist = list() - profile_path = f"{self.bot_name}-data/{username}" - - log.info(f"[class.{__class__.__name__}] reading history in vault for {username} ...") - shortcodes = self.Vault.vault_read_secrets(profile_path) - - if "InvalidPath" not in shortcodes: - # Checking the status of already uploaded posts - log.info( - f"[class.{__class__.__name__}] " - f"excluding shortcodes already downloaded..." - ) - for key, value in shortcodes.items(): - if value == 'success': - shortcodes_list_exist.append(key) - - # Exclude already uploaded posts from the list - log.info( - f"[class.{__class__.__name__}] " - f"building list of shortcodes for downloaded..." - ) - for post in posts: - if post.shortcode not in shortcodes_list_exist: - shortcodes_list.append(post.shortcode) - - log.info( - f"[class.{__class__.__name__}] " - f"building list of posts for downloaded was be done" - ) - return shortcodes_list - - except Exception as ex: - log.error( - f"[class.{__class__.__name__}] " - f"readed posts from account {username} " - f"faild {ex}" - ) - - - def download_post(self, shortcode: str = None): - - # Find and download post content - try: - post = instaloader.Post.from_shortcode(self.instaloaderObject.context, shortcode) - owner_name = str(post.owner_username) - save_dir_name = f"temp-data/{owner_name}_{shortcode}" - - # Progressbar options for vault (dict for statistics) - vault_path = f"{self.bot_name}-data/" + owner_name - - self.instaloaderObject.download_post(post, '') - log.info( - f"[class.{__class__.__name__}] " - f"content of post {shortcode} " - f"successful downloaded in temp storage" - ) - - status, response = self.Dropbox.upload_file(save_dir_name, owner_name) - - # recording statistics on download into vault - self.Vault.vault_put_secrets(vault_path, shortcode, status) - - return response - - except Exception as ex: - log.error( - f"[class.{__class__.__name__}] " - f"content by post {shortcode} " - f"faild downloaded: {ex}" - ) - self.Vault.vault_put_secrets(vault_path, shortcode, 'faild') - - - def download_all_posts(self, - username: str = None, - ratelimit_timeout: int = 30, - ratelimit_max_timeout: int = 1800, - chat_id: str = None - ): - - # unchangeable minimum timeout value - # to be reset to default value when ratelimit_max_timeout is reached - ratelimit_timeout_default = ratelimit_timeout - # progressbar and stats options/vars - message_object_stats = '' - editable_message_bot = 0 - - shortcodes_list = self.get_posts_list(username) - - answer = ( - f"\u2B50 Reading posts from account {username} successful.\n" - f"\U0001F303 Posts count: {self.posts_count}\n" - f"\U0001F6A6 Ratelimit timings: " - f"min {ratelimit_timeout}s - max {ratelimit_max_timeout}s" - ) - self.Telebot.send_message(chat_id, answer) - - for shortcode in shortcodes_list: - self.download_post(shortcode) - - Progressbar = ProgressBar(self.Vault, self.bot_name, username) - response = Progressbar.get(self.posts_count, "in_progress") - - # checking the condition whether the first message with statistics has already been sent - # for its subsequent editing - if editable_message_bot == 0: - message_object_stats = self.Telebot.send_message(chat_id, response) - editable_message_bot = 1 - else: - self.Telebot.edit_message_text(response, chat_id, message_object_stats.id) - - # pause downloaded for ratelimit - log.warning( - f"[class.{__class__.__name__}] " - f"ratelimit aplied in {datetime.datetime.now().strftime('%H:%M:%S')}: " - f"{ratelimit_timeout}" - ) - time.sleep(ratelimit_timeout) - - # reset timeout value to default if value too large (4600s/50m) - if ratelimit_timeout > ratelimit_max_timeout: - ratelimit_timeout = ratelimit_timeout_default - - # we increase the timeout so as not to get into the blacklist of instagram - ratelimit_timeout = int(ratelimit_timeout * 1.1) - - Finally_Progressbar = ProgressBar(self.Vault, self.bot_name, username) - finally_response = Finally_Progressbar.get(self.posts_count, "finally") - - log.info( - f"[class.{__class__.__name__}] " - f"all available posts from account {username} " - f"has been downloaded" - ) - self.Telebot.edit_message_text(finally_response, chat_id, message_object_stats.id) diff --git a/src/modules/__init__.py b/src/modules/__init__.py new file mode 100644 index 000000000..88ae83150 --- /dev/null +++ b/src/modules/__init__.py @@ -0,0 +1,5 @@ +""" +This is just a special file that tells pip that your main module is in this folder +No need to add anything here. Feel free to delete this line when you make your own package +Leave it empty +""" diff --git a/src/modules/downloader.py b/src/modules/downloader.py new file mode 100644 index 000000000..a6f2adad5 --- /dev/null +++ b/src/modules/downloader.py @@ -0,0 +1,314 @@ +""" +This module interacts with the instagram api and uploads content to a temporary local directory. +Supports downloading the content of the post by link, +the entire content of posts in the account, +getting information about the account +and saving the history of already downloaded messages in the vault. +https://instaloader.github.io/module/instaloader.html +""" +import os +import instaloader +from logger import log + + +class Downloader: + """ + The Instagram api instance is created by this class + and contains a set of all the necessary posts + for uploading content from Instagram accounts to local storage. + """ + + def __init__( + self, + auth: dict = None, + settings: dict = None, + vault: object = None + ) -> None: + """ + The method for create a new instagram api client instance. + + Args: + :param auth (dict): dictionary with authorization parameters. + :param username (str): username for authentication in the instagram api. + :param password (str): password for authentication in the instagram api. + :param sessionfile (str): the path to the session file of the instagram. + :param anonymous (bool): access to open profiles without logging in to an account. + only for tests. + :param settings (dict): dictionary with settings instaloader parameters. + :param savepath (str): local directory for saving downloaded content. + :param useragent (str): user-agent header. + :param vault (object): instance of vault for recording or reading download history. + + Returns: + None + + Examples: + >>> DOWNLOADER_INSTANCE = Downloader( + auth={ + 'anonymous': true + }, + settings={ + 'savepath': TEMPORARY_DIR, + 'useragent': INSTAGRAM_USERAGENT + }, + vault=VAULT_CLIENT + ) + >>> DOWNLOADER_INSTANCE = Downloader( + auth={ + 'sessionfile': INSTAGRAM_SESSION + }, + settings={ + 'savepath': TEMPORARY_DIR, + 'useragent': INSTAGRAM_USERAGENT + }, + vault=VAULT_CLIENT + ) + >>> DOWNLOADER_INSTANCE = Downloader( + auth={ + 'username': INSTAGRAM_USERNAME, + 'password': INSTAGRAM_PASSWORD + }, + settings={ + 'savepath': TEMPORARY_DIR, + 'useragent': INSTAGRAM_USERAGENT + }, + vault=VAULT_CLIENT + ) + """ + self.auth = auth + self.settings = settings + self.vault = vault + self.instaloader = instaloader.Instaloader( + quiet=True, + user_agent=self.settings['useragent'], + dirname_pattern=f"{settings['savepath']}/{{profile}}", + filename_pattern='{profile}_{shortcode}_{filename}', + download_pictures=True, + download_videos=True, + download_video_thumbnails=True, + save_metadata=False, + compress_json=True, + post_metadata_txt_pattern=None, + storyitem_metadata_txt_pattern=None, + check_resume_bbd=True, + fatal_status_codes=[400, 401, 429, 500] + ) + + if self.auth.get('anonymous'): + auth_status = self._login( + method='anonymous' + ) + elif os.path.exists( + self.auth.get('sessionfile') + ): + auth_status = self._login( + method='session' + ) + else: + auth_status = self._login( + method='password' + ) + log.info( + '[class.%s] downloader instance init with account %s: %s', + __class__.__name__, + self.auth['username'], + auth_status + ) + + def _login( + self, + method: str = None + ) -> str | None: + """ + The method for authentication in instagram api. + + Args: + :param method (str): authentication method 'password', 'session' or 'anonymous'. + + Returns: + (str) success + or + None + """ + if not self.auth.get('username') or not self.auth.get('anonymous'): + self.auth['username'] = self.vault.read_secret( + 'configuration/instagram', + 'username' + ) + + if method == 'session': + self.instaloader.load_session_from_file( + self.auth['username'], + self.auth['sessionfile'] + ) + log.info( + '[class.%s] session file was load success', + __class__.__name__ + ) + return 'success' + + if method == 'password': + if not self.auth.get('password'): + self.auth['password'] = self.vault.read_secret( + 'configuration/instagram', + 'password' + ) + self.instaloader.login( + self.auth['username'], + self.auth['password'] + ) + self.instaloader.save_session_to_file( + self.auth['sessionfile'] + ) + log.info( + '[class.%s] login with password was successful. Save session in %s', + __class__.__name__, + self.auth['sessionfile'] + ) + return 'success' + + if method == 'anonymous': + log.warning( + '[class.%s] initialization without logging into an account (anonymous)', + __class__.__name__ + ) + return None + + return None + + def get_posts( + self, + username: str = None + ) -> list | None: + """ + The method for getting a list posts of instagram account. + + Args: + :param username (str): instagram account profile name. + + Returns: + (list) ['post_id_1', 'post_id_2', 'post_id_3'] + or + None + """ + posts_list = [] + profile = instaloader.Profile.from_username( + self.instaloader.context, + username + ) + log.info( + '[class.%s] the %s profile was read success', + __class__.__name__, + username + ) + for post in profile.get_posts(): + posts_list.append(post.shortcode) + + return posts_list + + def get_post_content( + self, + shortcode: str = None + ) -> dict | None: + """ + The method for getting the content of a post from a specified Instagram account. + + Args: + :param shortcode (str): the ID of the record for downloading content. + + Returns: + (dict) { + 'post': shortcode, + 'owner': post.owner_username, + 'type': post.typename, + 'status': 'downloaded' + } + """ + post = instaloader.Post.from_shortcode( + self.instaloader.context, + shortcode + ) + self.instaloader.download_post(post, '') + log.info( + '[class.%s] the contents of the %s have been successfully downloaded ' + 'to the temporary storage', + __class__.__name__, + shortcode + ) + self.vault.write_secret( + f'history/{post.owner_username}', + shortcode, + "downloaded" + ) + return { + 'post': shortcode, + 'owner': post.owner_username, + 'type': post.typename, + 'status': 'downloaded' + } + + def get_download_info( + self, + account: str = None + ) -> dict | None: + """ + The method for collecting all the necessary information + to download all posts from the specified account. + Checks the history of already uploaded posts + and provides information for cyclic downloading. + + Args: + :param account (str): instagram account name to check the uploaded history. + + Returns: + (dict) { + "shortcodes_for_download": fresh_shortcodes, + "shortcodes_total_count": len(account_shortcodes), + "shortcodes_exist": len(history_shortcodes), + "shortcodes_exist_count": len(history_shortcodes.keys()) + } + """ + log.info( + '[class.%s] excluding shortcodes that are already downloaded...', + __class__.__name__ + ) + # account_shortcodes - list of shortcodes received from instagram + account_shortcodes = self.get_posts( + username=account + ) + # fresh_shortcodes - list of shortcodes that have not been downloaded yet + fresh_shortcodes = [] + # history_shortcodes - dict of shortcodes that have already been previously uploaded + try: + history_shortcodes = self.vault.read_secret( + f'history/{account}' + ) + # pylint: disable=W0718 + # will be fixed after https://github.com/obervinov/vault-package/issues/31 + except Exception as secret_not_found: + history_shortcodes = {} + log.warning( + '[class.%s] secret history/%s does not exist: %s', + __class__.__name__, + account, + secret_not_found + ) + for shortcode in account_shortcodes: + if shortcode not in history_shortcodes.keys(): + fresh_shortcodes.append(shortcode) + log.info( + '[class.%s] account metadata:\n' + 'already downloaded shortcodes: %s\n' + 'fresh shortcodes: %s\n' + 'shortcodes for download: %s', + __class__.__name__, + history_shortcodes, + account_shortcodes, + fresh_shortcodes + ) + return { + "shortcodes_for_download": fresh_shortcodes, + "shortcodes_total_count": len(account_shortcodes), + "shortcodes_exist": len(history_shortcodes), + "shortcodes_exist_count": len(history_shortcodes.keys()) + } diff --git a/src/modules/uploader.py b/src/modules/uploader.py new file mode 100644 index 000000000..5280e6303 --- /dev/null +++ b/src/modules/uploader.py @@ -0,0 +1,255 @@ +""" +This module processes the content uploaded from Instagram +and uploads the found media files (image, video) to the destination storage. +""" +import os +import dropbox +from mega import Mega +from logger import log + + +class Uploader: + """ + This class creates an instance with a connection + to the target storage for uploading local media content. + """ + + def __init__( + self, + storage: dict = None, + vault: object = None + ) -> None: + """ + The method creates an instance with a connection + to the target storage for uploading local media content. + + Args: + :param storage (dict): dictionary with storage parameters. + :param type (str): type of storage for uploading content + 'local' or 'dropbox' or 'mega'. + :param temporary (str): the temporary directory from which you want + to read the content and delete it after uploading. + :param cloud_root_path (str): a subdirectory in the cloud storage for saving content + :param vault (object): instance of vault for reading authorization data. + + Returns: + None + + Examples: + >>> UPLOADER_INSTANCE = Uploader( + storage={ + 'type': STORAGE_TYPE, + 'temporary': TEMPORARY_DIR, + 'cloud_root_path': BOT_NAME, + 'exclude_type': STORAGE_EXCLUDE_TYPE + }, + vault=VAULT_CLIENT + ) + """ + self.storage = storage + self.temporary_dir = f"{os.getcwd()}/{self.storage['temporary']}" + self.vault = vault + + log.info( + '[class.%s] uploader instance init with "%s" target storage', + __class__.__name__, + storage['type'] + ) + + if self.storage['type'] == 'dropbox': + self.dropbox_client = dropbox.Dropbox( + oauth2_access_token=self.vault.read_secret( + 'configuration/dropbox', + 'token' + ), + timeout=60 + ) + + if self.storage['type'] == 'mega': + self.mega_client = Mega().login( + self.vault.read_secret( + 'configuration/mega', + 'username' + ), + self.vault.read_secret( + 'configuration/mega', + 'password' + ) + ) + + self._check_incomplete_transfers() + + def _check_incomplete_transfers( + self, + ) -> None: + """ + The method for checking uploads in temp storage + that for some reason could not be uploaded to the cloud. + + Args: + None + + Returns: + None + """ + log.info( + '[class.%s] checking the pending uploads in the temporary directory ...', + __class__.__name__ + ) + + for _, artifacts, _ in os.walk(self.temporary_dir): + for artifact in artifacts: + log.warning( + '[class.%s] an unloaded artifact was found %s', + __class__.__name__, + artifact + ) + self.start_upload( + os.path.join(artifact) + ) + + def start_upload( + self, + sub_dir_name: str = None + ) -> dict: + """ + The method of preparing media files for transfer to the target storage (cloud or local). + + Args: + :param sub_dir_name (str): the name of the subdirectory where the content is located + is equivalent to the record ID. + + Returns: + (dict) { + '/root/path/shortcode/file1.jpeg': 'uploaded', + '/root/path/shortcode/file2.jpeg': None + } + + (explanation of values) + (str) 'uploaded' + (this means that the file has been successfully uploaded to the cloud) + (str) 'None' + (this means that an error has occurred the file is not uploaded to the cloud) + (str) 'saved' + (this means that the file must remain in the local (temporary directory)) + (and it is not required to perform any actions with it) + """ + transfers = {} + + log.info( + '[class.%s] preparing media files for transfer to the "%s"', + __class__.__name__, + self.storage['type'] + ) + + for root, _, files in os.walk( + f'{self.temporary_dir}{sub_dir_name}' + ): + for file in files: + if self.storage['exclude_type'] and self.storage['exclude_type'] in file: + os.remove( + os.path.join(root, file) + ) + else: + transfers[file] = self.file_upload( + os.path.join(root, file), + sub_dir_name + ) + if transfers[file] == 'uploaded': + os.remove( + os.path.join(root, file) + ) + + if len(os.listdir(f'{self.temporary_dir}{sub_dir_name}')) == 0: + os.rmdir(f'{self.temporary_dir}{sub_dir_name}') + + log.info( + '[class.%s] All TRANSFERS: %s', + __class__.__name__, + transfers + ) + return transfers + + def file_upload( + self, + source: str = None, + destination: str = None + ) -> str | None: + """ + The method of uploading the contents of the target directory + to the cloud or local directory. + + Args: + :param source (str): the path to the local file to transfer to the target storage. + :param destination (str): the name of the target directory in the destination storage. + + Returns: + (str) 'uploaded' + or + None + """ + log.info( + '[class.%s] starting upload file %s to %s//:%s', + __class__.__name__, + source, + self.storage['type'], + destination + ) + + if self.storage['type'] == "local": + return "saved" + + if self.storage['type'] == 'mega': + directory = f"{self.storage['cloud_root_path']}/{destination}" + try: + mega_folder = self.mega_client.find( + directory, + exclude_deleted=True + ) + if not mega_folder: + self.mega_client.create_folder( + directory + ) + response = self.mega_client.upload( + source, + mega_folder[0] + ) + log.info( + '[class.%s] %s successful transferred', + __class__.__name__, + response + ) + return "uploaded" + + # pylint: disable=W0718 + # because the mega library does not contain exceptions + except Exception as mega_exception: + log.error( + '[class.%s] error when uploading via the mega api: %s', + __class__.__name__, + mega_exception + ) + log.warning( + '[class.%s] trying again file_upload()', + __class__.__name__, + ) + self.file_upload( + source, + destination, + ) + + if self.storage['type'] == 'dropbox': + with open(source, 'rb') as file_transfer: + response = self.dropbox_client.files_upload( + file_transfer.read(), + f'/{destination}/{source.split("/")[-1]}' + ) + log.info( + '[class.%s] %s successful transferred', + __class__.__name__, + response + ) + file_transfer.close() + return "uploaded" + + return None diff --git a/src/progressbar.py b/src/progressbar.py deleted file mode 100644 index bc1305f30..000000000 --- a/src/progressbar.py +++ /dev/null @@ -1,70 +0,0 @@ -# THIS FILE FROM DRAW PORGRESSBAR RESPONSE # - -# Importing modules # -import math -import datetime - - -class ProgressBar: - - def __init__(self, - Vault=None, - bot_name: str = None, - username: str = None - ) -> None: - - self.Vault = Vault - self.bot_name = bot_name - self.username = username - self.posts_already_downloaded = 0 - self.success_count = 0 - self.faild_count = 0 - self.shortcodes_stat = list() - - - def get(self, posts_count: int = 0, state: str = 'in_progress'): - - vault_path = f"{self.bot_name}-data/{self.username}" - shortcodes_stats = self.Vault.vault_read_secrets(vault_path) - - for key, value in shortcodes_stats.items(): - - if value == 'success': - self.success_count = self.success_count + 1 - else: - self.faild_count = self.faild_count + 1 - - self.posts_already_downloaded = int(self.success_count) + int(self.faild_count) - procentage = math.ceil((self.posts_already_downloaded / posts_count) * 100) - - # generating a string for response and logging - progressbar_dowloaded_posts = ( - "\r[" - + "\u25FE" * int(procentage) - + "\u25AB" * int((100 - procentage)) - + f"]{str(procentage)}%" - ) - - if state != "finally": - response_stats = ( - f"\u23F3 Posts from account {self.username} " - f"already downloaded: {self.posts_already_downloaded} " - f"of {posts_count}\n" - f"Success: {self.success_count} / faild: {self.faild_count}\n" - f"{progressbar_dowloaded_posts}\n" - ) - response_ratelimit = ( - f"\u23F0 Ratelimit pause applied in " - f"{datetime.datetime.now().strftime('%H:%M:%S')}" - ) - response = f"{response_stats}{response_ratelimit}" - else: - response = ( - f"\u2615 {self.posts_already_downloaded} of {posts_count} " - f"posts from account {self.username} has been downloaded\n" - f"success: {self.success_count}\n" - f"faild: {self.faild_count}\n" - f"{progressbar_dowloaded_posts}" - ) - - return response diff --git a/vault/policy.hcl b/vault/policy.hcl new file mode 100644 index 000000000..bf774e4bb --- /dev/null +++ b/vault/policy.hcl @@ -0,0 +1,44 @@ +# Allowed to look up the approle token +path "auth/token/lookup" { + capabilities = ["read"] +} + +# Allowed to revoke the approle token +path "auth/token/revoke" { + capabilities = ["update"] +} + +# Allowed to look up its own approle token +path "auth/token/lookup-self" { + capabilities = ["read"] +} + +# Allowed to connect a mount point and update settings +path "pyinstabot-downloader/config" { + capabilities = ["update"] +} + +# Allowed to list bot configurations +path "pyinstabot-downloader/configuration/*" { + capabilities = ["read", "list"] +} + +# Allowed to read bot configuration +path "pyinstabot-downloader/data/configuration/*" { + capabilities = ["read", "list"] +} + +# Allowed to read bot history +path "pyinstabot-downloader/metadata/history/*" { + capabilities = ["read", "list"] +} + +# Allowed to create, read, update, and list bot history +path "pyinstabot-downloader/data/history/*" { + capabilities = ["create", "read", "list", "update"] +} + +# Allowed to read and record security events by a bot +path "pyinstabot-downloader/data/events/*" { + capabilities = ["read", "list", "create", "update"] +}