Make Git remote URL parsing more robust (#19924)

This PR improves the parsing of Git remote URLs in order to make
features that depend on them more robust.

Previously we were just treating these as plain strings and doing
one-off shotgun parsing to massage them into the right format. This
meant that we weren't accounting for edge cases in URL structure.

One of these cases was HTTPS Git URLs containing a username, which can
arise when using GitHub Enterprise (see
https://github.com/zed-industries/zed/issues/11160).

We now have a `RemoteUrl` typed to represent a parsed Git remote URL and
use the `Url` parser to parse it.

Release Notes:

- Improved the parsing of Git remote URLs to support additional
scenarios.
This commit is contained in:
Marshall Bowers 2024-10-29 16:19:05 -04:00 committed by GitHub
parent d310a1269f
commit 5b7fa05a87
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 304 additions and 161 deletions

View file

@ -1,6 +1,11 @@
use std::str::FromStr;
use url::Url;
use git::{BuildCommitPermalinkParams, BuildPermalinkParams, GitHostingProvider, ParsedGitRemote};
use git::{
BuildCommitPermalinkParams, BuildPermalinkParams, GitHostingProvider, ParsedGitRemote,
RemoteUrl,
};
pub struct Bitbucket;
@ -25,18 +30,22 @@ impl GitHostingProvider for Bitbucket {
format!("lines-{start_line}:{end_line}")
}
fn parse_remote_url<'a>(&self, url: &'a str) -> Option<ParsedGitRemote<'a>> {
if url.contains("bitbucket.org") {
let (_, repo_with_owner) = url.trim_end_matches(".git").split_once("bitbucket.org")?;
let (owner, repo) = repo_with_owner
.trim_start_matches('/')
.trim_start_matches(':')
.split_once('/')?;
fn parse_remote_url(&self, url: &str) -> Option<ParsedGitRemote> {
let url = RemoteUrl::from_str(url).ok()?;
return Some(ParsedGitRemote { owner, repo });
let host = url.host_str()?;
if host != "bitbucket.org" {
return None;
}
None
let mut path_segments = url.path_segments()?;
let owner = path_segments.next()?;
let repo = path_segments.next()?.trim_end_matches(".git");
Some(ParsedGitRemote {
owner: owner.into(),
repo: repo.into(),
})
}
fn build_commit_permalink(
@ -88,8 +97,8 @@ mod tests {
let url = "https://thorstenballzed@bitbucket.org/thorstenzed/testingrepo.git";
let (provider, parsed) = parse_git_remote_url(provider_registry, url).unwrap();
assert_eq!(provider.name(), "Bitbucket");
assert_eq!(parsed.owner, "thorstenzed");
assert_eq!(parsed.repo, "testingrepo");
assert_eq!(parsed.owner.as_ref(), "thorstenzed");
assert_eq!(parsed.repo.as_ref(), "testingrepo");
}
#[test]
@ -99,8 +108,8 @@ mod tests {
let url = "https://bitbucket.org/thorstenzed/testingrepo.git";
let (provider, parsed) = parse_git_remote_url(provider_registry, url).unwrap();
assert_eq!(provider.name(), "Bitbucket");
assert_eq!(parsed.owner, "thorstenzed");
assert_eq!(parsed.repo, "testingrepo");
assert_eq!(parsed.owner.as_ref(), "thorstenzed");
assert_eq!(parsed.repo.as_ref(), "testingrepo");
}
#[test]
@ -110,15 +119,15 @@ mod tests {
let url = "git@bitbucket.org:thorstenzed/testingrepo.git";
let (provider, parsed) = parse_git_remote_url(provider_registry, url).unwrap();
assert_eq!(provider.name(), "Bitbucket");
assert_eq!(parsed.owner, "thorstenzed");
assert_eq!(parsed.repo, "testingrepo");
assert_eq!(parsed.owner.as_ref(), "thorstenzed");
assert_eq!(parsed.repo.as_ref(), "testingrepo");
}
#[test]
fn test_build_bitbucket_permalink_from_ssh_url() {
let remote = ParsedGitRemote {
owner: "thorstenzed",
repo: "testingrepo",
owner: "thorstenzed".into(),
repo: "testingrepo".into(),
};
let permalink = Bitbucket.build_permalink(
remote,
@ -136,8 +145,8 @@ mod tests {
#[test]
fn test_build_bitbucket_permalink_from_ssh_url_single_line_selection() {
let remote = ParsedGitRemote {
owner: "thorstenzed",
repo: "testingrepo",
owner: "thorstenzed".into(),
repo: "testingrepo".into(),
};
let permalink = Bitbucket.build_permalink(
remote,
@ -156,8 +165,8 @@ mod tests {
#[test]
fn test_build_bitbucket_permalink_from_ssh_url_multi_line_selection() {
let remote = ParsedGitRemote {
owner: "thorstenzed",
repo: "testingrepo",
owner: "thorstenzed".into(),
repo: "testingrepo".into(),
};
let permalink = Bitbucket.build_permalink(
remote,

View file

@ -1,3 +1,4 @@
use std::str::FromStr;
use std::sync::Arc;
use anyhow::{bail, Context, Result};
@ -9,6 +10,7 @@ use url::Url;
use git::{
BuildCommitPermalinkParams, BuildPermalinkParams, GitHostingProvider, Oid, ParsedGitRemote,
RemoteUrl,
};
#[derive(Debug, Deserialize)]
@ -103,19 +105,22 @@ impl GitHostingProvider for Codeberg {
format!("L{start_line}-L{end_line}")
}
fn parse_remote_url<'a>(&self, url: &'a str) -> Option<ParsedGitRemote<'a>> {
if url.starts_with("git@codeberg.org:") || url.starts_with("https://codeberg.org/") {
let repo_with_owner = url
.trim_start_matches("git@codeberg.org:")
.trim_start_matches("https://codeberg.org/")
.trim_end_matches(".git");
fn parse_remote_url(&self, url: &str) -> Option<ParsedGitRemote> {
let url = RemoteUrl::from_str(url).ok()?;
let (owner, repo) = repo_with_owner.split_once('/')?;
return Some(ParsedGitRemote { owner, repo });
let host = url.host_str()?;
if host != "codeberg.org" {
return None;
}
None
let mut path_segments = url.path_segments()?;
let owner = path_segments.next()?;
let repo = path_segments.next()?.trim_end_matches(".git");
Some(ParsedGitRemote {
owner: owner.into(),
repo: repo.into(),
})
}
fn build_commit_permalink(
@ -175,8 +180,8 @@ mod tests {
#[test]
fn test_build_codeberg_permalink_from_ssh_url() {
let remote = ParsedGitRemote {
owner: "rajveermalviya",
repo: "zed",
owner: "rajveermalviya".into(),
repo: "zed".into(),
};
let permalink = Codeberg.build_permalink(
remote,
@ -194,8 +199,8 @@ mod tests {
#[test]
fn test_build_codeberg_permalink_from_ssh_url_single_line_selection() {
let remote = ParsedGitRemote {
owner: "rajveermalviya",
repo: "zed",
owner: "rajveermalviya".into(),
repo: "zed".into(),
};
let permalink = Codeberg.build_permalink(
remote,
@ -213,8 +218,8 @@ mod tests {
#[test]
fn test_build_codeberg_permalink_from_ssh_url_multi_line_selection() {
let remote = ParsedGitRemote {
owner: "rajveermalviya",
repo: "zed",
owner: "rajveermalviya".into(),
repo: "zed".into(),
};
let permalink = Codeberg.build_permalink(
remote,
@ -232,8 +237,8 @@ mod tests {
#[test]
fn test_build_codeberg_permalink_from_https_url() {
let remote = ParsedGitRemote {
owner: "rajveermalviya",
repo: "zed",
owner: "rajveermalviya".into(),
repo: "zed".into(),
};
let permalink = Codeberg.build_permalink(
remote,
@ -251,8 +256,8 @@ mod tests {
#[test]
fn test_build_codeberg_permalink_from_https_url_single_line_selection() {
let remote = ParsedGitRemote {
owner: "rajveermalviya",
repo: "zed",
owner: "rajveermalviya".into(),
repo: "zed".into(),
};
let permalink = Codeberg.build_permalink(
remote,
@ -270,8 +275,8 @@ mod tests {
#[test]
fn test_build_codeberg_permalink_from_https_url_multi_line_selection() {
let remote = ParsedGitRemote {
owner: "rajveermalviya",
repo: "zed",
owner: "rajveermalviya".into(),
repo: "zed".into(),
};
let permalink = Codeberg.build_permalink(
remote,

View file

@ -1,6 +1,11 @@
use std::str::FromStr;
use url::Url;
use git::{BuildCommitPermalinkParams, BuildPermalinkParams, GitHostingProvider, ParsedGitRemote};
use git::{
BuildCommitPermalinkParams, BuildPermalinkParams, GitHostingProvider, ParsedGitRemote,
RemoteUrl,
};
pub struct Gitee;
@ -25,19 +30,22 @@ impl GitHostingProvider for Gitee {
format!("L{start_line}-{end_line}")
}
fn parse_remote_url<'a>(&self, url: &'a str) -> Option<ParsedGitRemote<'a>> {
if url.starts_with("git@gitee.com:") || url.starts_with("https://gitee.com/") {
let repo_with_owner = url
.trim_start_matches("git@gitee.com:")
.trim_start_matches("https://gitee.com/")
.trim_end_matches(".git");
fn parse_remote_url(&self, url: &str) -> Option<ParsedGitRemote> {
let url = RemoteUrl::from_str(url).ok()?;
let (owner, repo) = repo_with_owner.split_once('/')?;
return Some(ParsedGitRemote { owner, repo });
let host = url.host_str()?;
if host != "gitee.com" {
return None;
}
None
let mut path_segments = url.path_segments()?;
let owner = path_segments.next()?;
let repo = path_segments.next()?.trim_end_matches(".git");
Some(ParsedGitRemote {
owner: owner.into(),
repo: repo.into(),
})
}
fn build_commit_permalink(
@ -81,8 +89,8 @@ mod tests {
#[test]
fn test_build_gitee_permalink_from_ssh_url() {
let remote = ParsedGitRemote {
owner: "libkitten",
repo: "zed",
owner: "libkitten".into(),
repo: "zed".into(),
};
let permalink = Gitee.build_permalink(
remote,
@ -100,8 +108,8 @@ mod tests {
#[test]
fn test_build_gitee_permalink_from_ssh_url_single_line_selection() {
let remote = ParsedGitRemote {
owner: "libkitten",
repo: "zed",
owner: "libkitten".into(),
repo: "zed".into(),
};
let permalink = Gitee.build_permalink(
remote,
@ -119,8 +127,8 @@ mod tests {
#[test]
fn test_build_gitee_permalink_from_ssh_url_multi_line_selection() {
let remote = ParsedGitRemote {
owner: "libkitten",
repo: "zed",
owner: "libkitten".into(),
repo: "zed".into(),
};
let permalink = Gitee.build_permalink(
remote,
@ -138,8 +146,8 @@ mod tests {
#[test]
fn test_build_gitee_permalink_from_https_url() {
let remote = ParsedGitRemote {
owner: "libkitten",
repo: "zed",
owner: "libkitten".into(),
repo: "zed".into(),
};
let permalink = Gitee.build_permalink(
remote,
@ -157,8 +165,8 @@ mod tests {
#[test]
fn test_build_gitee_permalink_from_https_url_single_line_selection() {
let remote = ParsedGitRemote {
owner: "libkitten",
repo: "zed",
owner: "libkitten".into(),
repo: "zed".into(),
};
let permalink = Gitee.build_permalink(
remote,
@ -176,8 +184,8 @@ mod tests {
#[test]
fn test_build_gitee_permalink_from_https_url_multi_line_selection() {
let remote = ParsedGitRemote {
owner: "libkitten",
repo: "zed",
owner: "libkitten".into(),
repo: "zed".into(),
};
let permalink = Gitee.build_permalink(
remote,

View file

@ -1,3 +1,4 @@
use std::str::FromStr;
use std::sync::{Arc, OnceLock};
use anyhow::{bail, Context, Result};
@ -10,7 +11,7 @@ use url::Url;
use git::{
BuildCommitPermalinkParams, BuildPermalinkParams, GitHostingProvider, Oid, ParsedGitRemote,
PullRequest,
PullRequest, RemoteUrl,
};
fn pull_request_number_regex() -> &'static Regex {
@ -107,19 +108,22 @@ impl GitHostingProvider for Github {
format!("L{start_line}-L{end_line}")
}
fn parse_remote_url<'a>(&self, url: &'a str) -> Option<ParsedGitRemote<'a>> {
if url.starts_with("git@github.com:") || url.starts_with("https://github.com/") {
let repo_with_owner = url
.trim_start_matches("git@github.com:")
.trim_start_matches("https://github.com/")
.trim_end_matches(".git");
fn parse_remote_url(&self, url: &str) -> Option<ParsedGitRemote> {
let url = RemoteUrl::from_str(url).ok()?;
let (owner, repo) = repo_with_owner.split_once('/')?;
return Some(ParsedGitRemote { owner, repo });
let host = url.host_str()?;
if host != "github.com" {
return None;
}
None
let mut path_segments = url.path_segments()?;
let owner = path_segments.next()?;
let repo = path_segments.next()?.trim_end_matches(".git");
Some(ParsedGitRemote {
owner: owner.into(),
repo: repo.into(),
})
}
fn build_commit_permalink(
@ -198,11 +202,26 @@ mod tests {
use super::*;
#[test]
fn test_parse_remote_url_given_https_url_with_username() {
let parsed_remote = Github
.parse_remote_url("https://jlannister@github.com/some-org/some-repo.git")
.unwrap();
assert_eq!(
parsed_remote,
ParsedGitRemote {
owner: "some-org".into(),
repo: "some-repo".into(),
}
);
}
#[test]
fn test_build_github_permalink_from_ssh_url() {
let remote = ParsedGitRemote {
owner: "zed-industries",
repo: "zed",
owner: "zed-industries".into(),
repo: "zed".into(),
};
let permalink = Github.build_permalink(
remote,
@ -220,8 +239,8 @@ mod tests {
#[test]
fn test_build_github_permalink_from_ssh_url_single_line_selection() {
let remote = ParsedGitRemote {
owner: "zed-industries",
repo: "zed",
owner: "zed-industries".into(),
repo: "zed".into(),
};
let permalink = Github.build_permalink(
remote,
@ -239,8 +258,8 @@ mod tests {
#[test]
fn test_build_github_permalink_from_ssh_url_multi_line_selection() {
let remote = ParsedGitRemote {
owner: "zed-industries",
repo: "zed",
owner: "zed-industries".into(),
repo: "zed".into(),
};
let permalink = Github.build_permalink(
remote,
@ -258,8 +277,8 @@ mod tests {
#[test]
fn test_build_github_permalink_from_https_url() {
let remote = ParsedGitRemote {
owner: "zed-industries",
repo: "zed",
owner: "zed-industries".into(),
repo: "zed".into(),
};
let permalink = Github.build_permalink(
remote,
@ -277,8 +296,8 @@ mod tests {
#[test]
fn test_build_github_permalink_from_https_url_single_line_selection() {
let remote = ParsedGitRemote {
owner: "zed-industries",
repo: "zed",
owner: "zed-industries".into(),
repo: "zed".into(),
};
let permalink = Github.build_permalink(
remote,
@ -296,8 +315,8 @@ mod tests {
#[test]
fn test_build_github_permalink_from_https_url_multi_line_selection() {
let remote = ParsedGitRemote {
owner: "zed-industries",
repo: "zed",
owner: "zed-industries".into(),
repo: "zed".into(),
};
let permalink = Github.build_permalink(
remote,
@ -315,8 +334,8 @@ mod tests {
#[test]
fn test_github_pull_requests() {
let remote = ParsedGitRemote {
owner: "zed-industries",
repo: "zed",
owner: "zed-industries".into(),
repo: "zed".into(),
};
let message = "This does not contain a pull request";

View file

@ -1,8 +1,13 @@
use std::str::FromStr;
use anyhow::{anyhow, bail, Result};
use url::Url;
use util::maybe;
use git::{BuildCommitPermalinkParams, BuildPermalinkParams, GitHostingProvider, ParsedGitRemote};
use git::{
BuildCommitPermalinkParams, BuildPermalinkParams, GitHostingProvider, ParsedGitRemote,
RemoteUrl,
};
#[derive(Debug)]
pub struct Gitlab {
@ -64,21 +69,22 @@ impl GitHostingProvider for Gitlab {
format!("L{start_line}-{end_line}")
}
fn parse_remote_url<'a>(&self, url: &'a str) -> Option<ParsedGitRemote<'a>> {
let host = self.base_url.host_str()?;
fn parse_remote_url(&self, url: &str) -> Option<ParsedGitRemote> {
let url = RemoteUrl::from_str(url).ok()?;
if url.starts_with(&format!("git@{host}")) || url.starts_with(&format!("https://{host}/")) {
let repo_with_owner = url
.trim_start_matches(&format!("git@{host}:"))
.trim_start_matches(&format!("https://{host}/"))
.trim_end_matches(".git");
let (owner, repo) = repo_with_owner.split_once('/')?;
return Some(ParsedGitRemote { owner, repo });
let host = url.host_str()?;
if host != self.base_url.host_str()? {
return None;
}
None
let mut path_segments = url.path_segments()?;
let owner = path_segments.next()?;
let repo = path_segments.next()?.trim_end_matches(".git");
Some(ParsedGitRemote {
owner: owner.into(),
repo: repo.into(),
})
}
fn build_commit_permalink(
@ -127,8 +133,8 @@ mod tests {
#[test]
fn test_build_gitlab_permalink_from_ssh_url() {
let remote = ParsedGitRemote {
owner: "zed-industries",
repo: "zed",
owner: "zed-industries".into(),
repo: "zed".into(),
};
let permalink = Gitlab::new().build_permalink(
remote,
@ -146,8 +152,8 @@ mod tests {
#[test]
fn test_build_gitlab_permalink_from_ssh_url_single_line_selection() {
let remote = ParsedGitRemote {
owner: "zed-industries",
repo: "zed",
owner: "zed-industries".into(),
repo: "zed".into(),
};
let permalink = Gitlab::new().build_permalink(
remote,
@ -165,8 +171,8 @@ mod tests {
#[test]
fn test_build_gitlab_permalink_from_ssh_url_multi_line_selection() {
let remote = ParsedGitRemote {
owner: "zed-industries",
repo: "zed",
owner: "zed-industries".into(),
repo: "zed".into(),
};
let permalink = Gitlab::new().build_permalink(
remote,
@ -184,8 +190,8 @@ mod tests {
#[test]
fn test_build_gitlab_permalink_from_https_url() {
let remote = ParsedGitRemote {
owner: "zed-industries",
repo: "zed",
owner: "zed-industries".into(),
repo: "zed".into(),
};
let permalink = Gitlab::new().build_permalink(
remote,
@ -203,8 +209,8 @@ mod tests {
#[test]
fn test_build_gitlab_permalink_from_https_url_single_line_selection() {
let remote = ParsedGitRemote {
owner: "zed-industries",
repo: "zed",
owner: "zed-industries".into(),
repo: "zed".into(),
};
let permalink = Gitlab::new().build_permalink(
remote,
@ -222,8 +228,8 @@ mod tests {
#[test]
fn test_build_gitlab_permalink_from_https_url_multi_line_selection() {
let remote = ParsedGitRemote {
owner: "zed-industries",
repo: "zed",
owner: "zed-industries".into(),
repo: "zed".into(),
};
let permalink = Gitlab::new().build_permalink(
remote,
@ -241,8 +247,8 @@ mod tests {
#[test]
fn test_build_gitlab_self_hosted_permalink_from_ssh_url() {
let remote = ParsedGitRemote {
owner: "zed-industries",
repo: "zed",
owner: "zed-industries".into(),
repo: "zed".into(),
};
let gitlab =
Gitlab::from_remote_url("git@gitlab.some-enterprise.com:zed-industries/zed.git")
@ -263,8 +269,8 @@ mod tests {
#[test]
fn test_build_gitlab_self_hosted_permalink_from_https_url() {
let remote = ParsedGitRemote {
owner: "zed-industries",
repo: "zed",
owner: "zed-industries".into(),
repo: "zed".into(),
};
let gitlab =
Gitlab::from_remote_url("https://gitlab-instance.big-co.com/zed-industries/zed.git")

View file

@ -1,6 +1,11 @@
use std::str::FromStr;
use url::Url;
use git::{BuildCommitPermalinkParams, BuildPermalinkParams, GitHostingProvider, ParsedGitRemote};
use git::{
BuildCommitPermalinkParams, BuildPermalinkParams, GitHostingProvider, ParsedGitRemote,
RemoteUrl,
};
pub struct Sourcehut;
@ -25,21 +30,27 @@ impl GitHostingProvider for Sourcehut {
format!("L{start_line}-{end_line}")
}
fn parse_remote_url<'a>(&self, url: &'a str) -> Option<ParsedGitRemote<'a>> {
if url.starts_with("git@git.sr.ht:") || url.starts_with("https://git.sr.ht/") {
// sourcehut indicates a repo with '.git' suffix as a separate repo.
// For example, "git@git.sr.ht:~username/repo" and "git@git.sr.ht:~username/repo.git"
// are two distinct repositories.
let repo_with_owner = url
.trim_start_matches("git@git.sr.ht:~")
.trim_start_matches("https://git.sr.ht/~");
fn parse_remote_url(&self, url: &str) -> Option<ParsedGitRemote> {
let url = RemoteUrl::from_str(url).ok()?;
let (owner, repo) = repo_with_owner.split_once('/')?;
return Some(ParsedGitRemote { owner, repo });
let host = url.host_str()?;
if host != "git.sr.ht" {
return None;
}
None
let mut path_segments = url.path_segments()?;
let owner = path_segments.next()?;
// We don't trim the `.git` suffix here like we do elsewhere, as
// sourcehut treats a repo with `.git` suffix as a separate repo.
//
// For example, `git@git.sr.ht:~username/repo` and `git@git.sr.ht:~username/repo.git`
// are two distinct repositories.
let repo = path_segments.next()?;
Some(ParsedGitRemote {
owner: owner.into(),
repo: repo.into(),
})
}
fn build_commit_permalink(
@ -83,8 +94,8 @@ mod tests {
#[test]
fn test_build_sourcehut_permalink_from_ssh_url() {
let remote = ParsedGitRemote {
owner: "rajveermalviya",
repo: "zed",
owner: "rajveermalviya".into(),
repo: "zed".into(),
};
let permalink = Sourcehut.build_permalink(
remote,
@ -102,8 +113,8 @@ mod tests {
#[test]
fn test_build_sourcehut_permalink_from_ssh_url_with_git_prefix() {
let remote = ParsedGitRemote {
owner: "rajveermalviya",
repo: "zed.git",
owner: "rajveermalviya".into(),
repo: "zed.git".into(),
};
let permalink = Sourcehut.build_permalink(
remote,
@ -121,8 +132,8 @@ mod tests {
#[test]
fn test_build_sourcehut_permalink_from_ssh_url_single_line_selection() {
let remote = ParsedGitRemote {
owner: "rajveermalviya",
repo: "zed",
owner: "rajveermalviya".into(),
repo: "zed".into(),
};
let permalink = Sourcehut.build_permalink(
remote,
@ -140,8 +151,8 @@ mod tests {
#[test]
fn test_build_sourcehut_permalink_from_ssh_url_multi_line_selection() {
let remote = ParsedGitRemote {
owner: "rajveermalviya",
repo: "zed",
owner: "rajveermalviya".into(),
repo: "zed".into(),
};
let permalink = Sourcehut.build_permalink(
remote,
@ -159,8 +170,8 @@ mod tests {
#[test]
fn test_build_sourcehut_permalink_from_https_url() {
let remote = ParsedGitRemote {
owner: "rajveermalviya",
repo: "zed",
owner: "rajveermalviya".into(),
repo: "zed".into(),
};
let permalink = Sourcehut.build_permalink(
remote,
@ -178,8 +189,8 @@ mod tests {
#[test]
fn test_build_sourcehut_permalink_from_https_url_single_line_selection() {
let remote = ParsedGitRemote {
owner: "rajveermalviya",
repo: "zed",
owner: "rajveermalviya".into(),
repo: "zed".into(),
};
let permalink = Sourcehut.build_permalink(
remote,
@ -197,8 +208,8 @@ mod tests {
#[test]
fn test_build_sourcehut_permalink_from_https_url_multi_line_selection() {
let remote = ParsedGitRemote {
owner: "rajveermalviya",
repo: "zed",
owner: "rajveermalviya".into(),
repo: "zed".into(),
};
let permalink = Sourcehut.build_permalink(
remote,