mirror of
https://github.com/SrIzan10/hc-harbor.git
synced 2026-05-01 10:45:21 +00:00
Add repo pulling job for more recent events
In implementation I'm finding github's repo events are delayed by 1-3 days.
This commit is contained in:
129
app/jobs/pull_repo_commits_job.rb
Normal file
129
app/jobs/pull_repo_commits_job.rb
Normal file
@@ -0,0 +1,129 @@
|
||||
require "http"
|
||||
|
||||
class PullRepoCommitsJob < ApplicationJob
|
||||
queue_as :literally_whenever
|
||||
|
||||
# Retry on common network issues or temporary API errors
|
||||
retry_on HTTP::TimeoutError, HTTP::ConnectionError, wait: :exponentially_longer, attempts: 5
|
||||
retry_on JSON::ParserError, wait: 10.seconds, attempts: 3 # If API returns malformed JSON
|
||||
|
||||
discard_on ActiveJob::DeserializationError # If User record is gone
|
||||
|
||||
def perform(user_id, owner, repo)
|
||||
user = User.find_by(id: user_id)
|
||||
|
||||
unless user
|
||||
Rails.logger.warn "[PullRepoCommitsJob] User ##{user_id} not found. Skipping."
|
||||
return
|
||||
end
|
||||
|
||||
unless user.github_access_token.present?
|
||||
Rails.logger.warn "[PullRepoCommitsJob] User ##{user.id} missing GitHub token. Skipping."
|
||||
return
|
||||
end
|
||||
|
||||
Rails.logger.info "[PullRepoCommitsJob] Pulling commits for #{owner}/#{repo} for User ##{user.id}"
|
||||
|
||||
# Get commits from the last 3 days
|
||||
since_date = 3.days.ago.iso8601
|
||||
api_url = "https://api.github.com/repos/#{owner}/#{repo}/commits?since=#{since_date}"
|
||||
|
||||
begin
|
||||
response = HTTP.headers(
|
||||
"Accept" => "application/vnd.github.v3+json",
|
||||
"Authorization" => "Bearer #{user.github_access_token}",
|
||||
"X-GitHub-Api-Version" => "2022-11-28"
|
||||
).timeout(connect: 5, read: 10).get(api_url)
|
||||
|
||||
if response.status.success?
|
||||
commits_data = response.parse
|
||||
process_commits(user, commits_data)
|
||||
elsif response.status.code == 404
|
||||
Rails.logger.warn "[PullRepoCommitsJob] Repository #{owner}/#{repo} not found (404) for User ##{user.id}."
|
||||
elsif response.status.code == 403 # Forbidden, could be rate limit or permissions
|
||||
if response.headers["X-RateLimit-Remaining"].to_i == 0
|
||||
reset_time = Time.at(response.headers["X-RateLimit-Reset"].to_i)
|
||||
delay_seconds = [ (reset_time - Time.current).ceil, 5 ].max # at least 5s delay
|
||||
Rails.logger.warn "[PullRepoCommitsJob] GitHub API rate limit exceeded for User ##{user.id}. Retrying in #{delay_seconds}s."
|
||||
self.class.set(wait: delay_seconds.seconds).perform_later(user.id, owner, repo)
|
||||
else
|
||||
Rails.logger.error "[PullRepoCommitsJob] GitHub API forbidden (403) for User ##{user.id}. Response: #{response.body.to_s.truncate(500)}"
|
||||
end
|
||||
else
|
||||
Rails.logger.error "[PullRepoCommitsJob] GitHub API error for User ##{user.id}. Status: #{response.status}. Response: #{response.body.to_s.truncate(500)}"
|
||||
raise "GitHub API Error: Status #{response.status}" if response.status.server_error? || response.status.code == 401
|
||||
end
|
||||
|
||||
rescue HTTP::Error => e
|
||||
Rails.logger.error "[PullRepoCommitsJob] HTTP Error fetching commits for #{owner}/#{repo} (User ##{user.id}): #{e.message}"
|
||||
raise # Re-raise to allow GoodJob to retry based on retry_on
|
||||
rescue JSON::ParserError => e
|
||||
Rails.logger.error "[PullRepoCommitsJob] JSON Parse Error for #{owner}/#{repo} (User ##{user.id}): #{e.message}"
|
||||
raise # Re-raise to allow GoodJob to retry based on retry_on
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def process_commits(user, commits_data)
|
||||
return if commits_data.empty?
|
||||
|
||||
# Get existing commit SHAs to avoid duplicates
|
||||
shas_to_check = commits_data.map { |c| c["sha"] }.uniq
|
||||
existing_shas = Commit.where(sha: shas_to_check).pluck(:sha).to_set
|
||||
|
||||
processed_count = 0
|
||||
enqueued_count = 0
|
||||
|
||||
commits_data.each do |commit_data|
|
||||
processed_count += 1
|
||||
commit_sha = commit_data["sha"]
|
||||
commit_api_url = commit_data["url"]
|
||||
|
||||
# Skip if commit already exists
|
||||
next if existing_shas.include?(commit_sha)
|
||||
|
||||
# Get detailed commit info to check author
|
||||
begin
|
||||
commit_response = HTTP.headers(
|
||||
"Accept" => "application/vnd.github.v3+json",
|
||||
"Authorization" => "Bearer #{user.github_access_token}",
|
||||
"X-GitHub-Api-Version" => "2022-11-28"
|
||||
).timeout(connect: 5, read: 10).get(commit_api_url)
|
||||
|
||||
if commit_response.status.success?
|
||||
commit_details = commit_response.parse
|
||||
author = commit_details.dig("author")
|
||||
|
||||
# Check both author ID and login
|
||||
author_id = author&.dig("id")
|
||||
author_login = author&.dig("login")
|
||||
|
||||
# Process if either the ID or login matches
|
||||
if author_id == user.github_uid || author_login == user.github_username
|
||||
Rails.logger.info "[PullRepoCommitsJob] Enqueuing ProcessCommitJob for SHA #{commit_sha}, User ##{user.id}"
|
||||
ProcessCommitJob.perform_now(
|
||||
user.id,
|
||||
commit_sha,
|
||||
commit_api_url,
|
||||
"github"
|
||||
)
|
||||
enqueued_count += 1
|
||||
else
|
||||
Rails.logger.debug "[PullRepoCommitsJob] Skipping commit #{commit_sha} - author ID #{author_id}/login #{author_login} doesn't match user ID #{user.github_uid}/login #{user.github_username}"
|
||||
end
|
||||
else
|
||||
Rails.logger.warn "[PullRepoCommitsJob] Failed to fetch commit details for #{commit_sha}: #{commit_response.status}"
|
||||
end
|
||||
rescue HTTP::Error => e
|
||||
Rails.logger.error "[PullRepoCommitsJob] HTTP Error fetching commit details for #{commit_sha}: #{e.message}"
|
||||
next
|
||||
rescue JSON::ParserError => e
|
||||
Rails.logger.error "[PullRepoCommitsJob] JSON Parse Error for commit details #{commit_sha}: #{e.message}"
|
||||
next
|
||||
end
|
||||
end
|
||||
|
||||
Rails.logger.info "[PullRepoCommitsJob] Processed #{processed_count} commits. Enqueued #{enqueued_count} new ProcessCommitJob(s)."
|
||||
end
|
||||
end
|
||||
@@ -20,6 +20,8 @@ class ProjectRepoMapping < ApplicationRecord
|
||||
"<<LAST PROJECT>>"
|
||||
]
|
||||
|
||||
after_create :schedule_commit_pull
|
||||
|
||||
private
|
||||
|
||||
def repo_url_exists
|
||||
@@ -27,4 +29,15 @@ class ProjectRepoMapping < ApplicationRecord
|
||||
errors.add(:repo_url, "is not cloneable")
|
||||
end
|
||||
end
|
||||
|
||||
def schedule_commit_pull
|
||||
# Extract owner and repo name from the URL
|
||||
# Example URL: https://github.com/owner/repo
|
||||
if repo_url =~ %r{https?://[^/]+/([^/]+)/([^/]+)\z}
|
||||
owner = $1
|
||||
repo = $2
|
||||
Rails.logger.info "[ProjectRepoMapping] Scheduling commit pull for #{owner}/#{repo} for User ##{user_id}"
|
||||
PullRepoCommitsJob.perform_now(user_id, owner, repo)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user