Skip to content

Commit

Permalink
fix workflows
Browse files Browse the repository at this point in the history
  • Loading branch information
danyworks committed Sep 25, 2024
1 parent 8065854 commit 533e783
Show file tree
Hide file tree
Showing 6 changed files with 139 additions and 89 deletions.
17 changes: 7 additions & 10 deletions .github/workflows/build-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,12 @@ jobs:
else
echo "ChromeDriver executable not found"
fi
echo "Listing contents of Chromium directory:"
ls -l ${{ env.OUTPUT_DIR }}/ungoogled-chromium_${{ inputs.chromium_version }}_linux/
# - name: Package the app using PyInstaller
# run: pipenv run pyinstaller main.spec
- name: Package the app using PyInstaller
run: pipenv run pyinstaller main.spec

# - name: Upload artifact
# uses: actions/upload-artifact@v4
# with:
# name: Linux Executable
# path: dist/scrapegoat_linux
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: Linux Executable
path: dist/scrapegoat_linux
17 changes: 7 additions & 10 deletions .github/workflows/build-macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,11 @@ jobs:
echo "ChromeDriver executable not found"
fi
echo "Listing contents of Chromium directory:"
ls -R ${{ env.OUTPUT_DIR }}
- name: Package the app using PyInstaller
run: pipenv run pyinstaller main.spec

# - name: Package the app using PyInstaller
# run: pipenv run pyinstaller main.spec

# - name: Upload artifact
# uses: actions/upload-artifact@v4
# with:
# name: macOS Executable
# path: dist/scrapegoat_macos_x64
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: macOS Executable
path: dist/scrapegoat_macos_x64
14 changes: 7 additions & 7 deletions .github/workflows/build-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,11 @@ jobs:
Write-Host "Listing contents of Chromium directory:"
Get-ChildItem -Path "D:\a\scrapegoat\scrapegoat\ungoogled_chromium" -Recurse | Select-Object FullName, Length
# - name: Package the app using PyInstaller
# run: pipenv run pyinstaller main.spec
- name: Package the app using PyInstaller
run: pipenv run pyinstaller main.spec

# - name: Upload artifact
# uses: actions/upload-artifact@v4
# with:
# name: Windows Executable
# path: "D:\\a\\scrapegoat\\scrapegoat\\dist\\scrapegoat_windows_x64.exe"
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: Windows Executable
path: "D:\\a\\scrapegoat\\scrapegoat\\dist\\scrapegoat_windows_x64.exe"
118 changes: 59 additions & 59 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ on:
- 'v*'

jobs:
# build-linux:
# name: Linux x64
# uses: ./.github/workflows/build-linux.yml
# with:
# python_version: '3.10'
# chromium_version: '129.0.6668.58-1'
build-linux:
name: Linux x64
uses: ./.github/workflows/build-linux.yml
with:
python_version: '3.10'
chromium_version: '129.0.6668.58-1'

build-macos:
name: macOS x64
Expand All @@ -22,63 +22,63 @@ jobs:
chromium_version: '129.0.6668.58-1.1'
chromedriver_version: '131.0.6724.0'

# build-windows:
# name: Windows x64
# uses: ./.github/workflows/build-windows.yml
# with:
# python_version: '3.10'
# chromium_version: '129.0.6668.58-1.1'
# chromedriver_version: '131.0.6724.0'
build-windows:
name: Windows x64
uses: ./.github/workflows/build-windows.yml
with:
python_version: '3.10'
chromium_version: '129.0.6668.58-1.1'
chromedriver_version: '131.0.6724.0'

# build-docker:
# name: Docker
# uses: ./.github/workflows/build-docker.yml
# with:
# repo: 'ghcr.io/${{ github.repository }}'
# context_path: '.'
# image: 'scrapegoat'
# tag: ${{ github.ref_name }}
# dockerfile_path: './GoatFile'
# secrets:
# PACKAGES_TOKEN: ${{ secrets.PACKAGES_TOKEN }}
build-docker:
name: Docker
uses: ./.github/workflows/build-docker.yml
with:
repo: 'ghcr.io/${{ github.repository }}'
context_path: '.'
image: 'scrapegoat'
tag: ${{ github.ref_name }}
dockerfile_path: './GoatFile'
secrets:
PACKAGES_TOKEN: ${{ secrets.PACKAGES_TOKEN }}

# create-release:
# needs: [build-linux, build-macos, build-windows, build-docker]
# runs-on: ubuntu-latest
# steps:
# - name: Checkout code
# uses: actions/checkout@v4
create-release:
needs: [build-linux, build-macos, build-windows, build-docker]
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

# - name: Download Linux artifact
# uses: actions/download-artifact@v4
# with:
# name: Linux Executable
# path: artifacts
- name: Download Linux artifact
uses: actions/download-artifact@v4
with:
name: Linux Executable
path: artifacts

# - name: Download macOS artifact
# uses: actions/download-artifact@v4
# with:
# name: macOS Executable
# path: artifacts
- name: Download macOS artifact
uses: actions/download-artifact@v4
with:
name: macOS Executable
path: artifacts

# - name: Download Windows artifact
# uses: actions/download-artifact@v4
# with:
# name: Windows Executable
# path: artifacts
- name: Download Windows artifact
uses: actions/download-artifact@v4
with:
name: Windows Executable
path: artifacts

# - name: Read release message
# id: release_message
# run: echo "message=$(cat RELEASE_MESSAGE.md)" >> $GITHUB_OUTPUT
- name: Read release message
id: release_message
run: echo "message=$(cat RELEASENOTES.md)" >> $GITHUB_OUTPUT

# - name: Create GitHub Release
# uses: softprops/action-gh-release@v1
# with:
# files: |
# artifacts/scrapegoat_linux
# artifacts/scrapegoat_macos_x64
# artifacts/scrapegoat_windows_x64.exe
# body: ${{ steps.release_message.outputs.message }}
# tag_name: ${{ github.ref_name }}
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Create GitHub Release
uses: softprops/action-gh-release@v1
with:
files: |
artifacts/scrapegoat_linux
artifacts/scrapegoat_macos_x64
artifacts/scrapegoat_windows_x64.exe
body: ${{ steps.release_message.outputs.message }}
tag_name: ${{ github.ref_name }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
49 changes: 49 additions & 0 deletions RELEASENOTES.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Release Notes

## Version 1.0.0 (2024-03-15)

### New Features
- Initial release of ScrapeGoat
- AI-powered scraping capabilities
- Browser automation with Selenium
- Custom scraping workflow creation
- Data export in CSV, JSON, and XML formats
- Scheduled scraping tasks
- Proxy support for IP rotation
- Advanced CAPTCHA handling

### Improvements
- Optimized performance for large-scale scraping tasks
- Enhanced user interface for easier navigation and workflow management

### Bug Fixes
- Resolved issues with certain website layouts causing scraping errors
- Fixed memory leak in long-running scraping sessions

## Version 1.0.1 (2024-03-22)

### Improvements
- Updated Chrome driver compatibility to version 131.0.6724.0
- Enhanced error handling and logging for better troubleshooting

### Bug Fixes
- Fixed an issue with proxy rotation not working correctly in some cases
- Resolved a problem with scheduled tasks not running at the specified times

## Version 1.1.0 (2024-04-05)

### New Features
- Added support for scraping dynamic content loaded via JavaScript
- Introduced a new AI model for improved CAPTCHA solving

### Improvements
- Increased scraping speed by 20% through optimized request handling
- Enhanced data cleaning and normalization features

### Bug Fixes
- Fixed a rare crash occurring when handling malformed HTML
- Resolved an issue with incorrect data parsing for certain website structures

---

For more details on how to use new features and improvements, please refer to the [User Manual](./docs/user_manual.md).
13 changes: 10 additions & 3 deletions src/driver/selenium.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,23 +92,30 @@ def _configure_chrome_options(self) -> Options:

logger.debug(f"Chrome configured with options: {options.arguments}")
return options

def _get_chrome_service(self, options: Options) -> Service:
current_platform = platform.system().lower()
if getattr(sys, 'frozen', False):
chrome_path = get_resource_path('chrome_portable')
chromedriver_path = get_resource_path('chromedriver')
else:
chrome_path = get_resource_path(os.path.join('ungoogled-chromium_128.0.6613.137-1_linux'))
chromedriver_path = get_resource_path(os.path.join('ungoogled-chromium_128.0.6613.137-1_linux'))
chrome_path = get_resource_path(os.path.join('ungoogled_chromium'))
chromedriver_path = get_resource_path(os.path.join('ungoogled_chromium'))

if current_platform.startswith('win'):
chrome_binary = 'chrome.exe'
chromedriver_binary = 'chromedriver.exe'
chrome_path = os.path.join(chrome_path, 'ungoogled-chromium_129.0.6668.58-1.1_windows')
chromedriver_path = os.path.join(chromedriver_path, 'chromedriver', 'chromedriver-win64')
elif current_platform == 'darwin':
chrome_binary = 'ungoogled-chromium_128.0.6613.137-1.1_x86-64-macos-signed'
chrome_binary = 'Chromium'
chromedriver_binary = 'chromedriver'
chrome_path = os.path.join(chrome_path, 'Chromium.app', 'Contents', 'MacOS')
else: # Linux
chrome_binary = 'chrome'
chromedriver_binary = 'chromedriver'
chrome_path = os.path.join(chrome_path, 'ungoogled-chromium_129.0.6668.58-1_linux')
chromedriver_path = chrome_path

chrome_binary_path = get_resource_path(os.path.join(chrome_path, chrome_binary))
chromedriver_binary_path = get_resource_path(os.path.join(chromedriver_path, chromedriver_binary))
Expand Down

0 comments on commit 533e783

Please sign in to comment.