Skip to content

Commit

Permalink
Merge branch 'thetradedesk-master'
Browse files Browse the repository at this point in the history
Closes #144
  • Loading branch information
frej committed Dec 6, 2018
2 parents 2bb173e + cadcfcb commit b51c58d
Show file tree
Hide file tree
Showing 12 changed files with 237 additions and 23 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Set the default behavior, in case people don't have core.autocrlf set.
* text=auto
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
*.orig
*.pyc
.dotest
.idea/
51 changes: 51 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,57 @@ if [ "$3" == "1" ]; then cat; else dos2unix; fi
-- End of crlf-filter.sh --
```


Plugins
-----------------

hg-fast-export supports plugins to manipulate the file data and commit
metadata. The plugins are enabled with the --plugin option. The value
of said option is a plugin name (by folder in the plugins directory),
and optionally, and equals-sign followed by an initialization string.

There is a readme accompanying each of the bundled plugins, with a
description of the usage. To create a new plugin, one must simply
add a new folder under the `plugins` directory, with the name of the
new plugin. Inside, there must be an `__init__.py` file, which contains
at a minimum:

```
def build_filter(args):
return Filter(args)
class Filter:
def __init__(self, args):
pass
#Or don't pass, if you want to do some init code here
```

Beyond the boilerplate initialization, you can see the two different
defined filter methods in the [dos2unix](./plugins/dos2unix) and
[branch_name_in_commit](./plugins/branch_name_in_commit) plugins.

```
commit_data = {'branch': branch, 'parents': parents, 'author': author, 'desc': desc}
def commit_message_filter(self,commit_data):
```
The `commit_message_filter` method is called for each commit, after parsing
from hg, but before outputting to git. The dictionary `commit_data` contains the
above attributes about the commit, and can be modified by any filter. The
values in the dictionary after filters have been run are used to create the git
commit.

```
file_data = {'filename':filename,'file_ctx':file_ctx,'d':d}
def file_data_filter(self,file_data):
```
The `file_data_filter` method is called for each file within each commit.
The dictionary `file_data` contains the above attributes about the file, and
can be modified by any filter. `file_ctx` is the filecontext from the
mercurial python library. After all filters have been run, the values
are used to add the file to the git commit.

Notes/Limitations
-----------------

Expand Down
82 changes: 59 additions & 23 deletions hg-fast-export.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import re
import sys
import os
import pluginloader

if sys.platform == "win32":
# On Windows, sys.stdout is initially opened in text mode, which means that
Expand Down Expand Up @@ -123,7 +124,7 @@ def get_author(logmessage,committer,authors):
return r
return committer

def export_file_contents(ctx,manifest,files,hgtags,encoding='',filter_contents=None):
def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}):
count=0
max=len(files)
for file in files:
Expand All @@ -137,18 +138,15 @@ def export_file_contents(ctx,manifest,files,hgtags,encoding='',filter_contents=N
filename=file
file_ctx=ctx.filectx(file)
d=file_ctx.data()
if filter_contents:
import subprocess
filter_cmd=filter_contents + [filename,node.hex(file_ctx.filenode()),'1' if file_ctx.isbinary() else '0']
try:
filter_proc=subprocess.Popen(filter_cmd,stdin=subprocess.PIPE,stdout=subprocess.PIPE)
d,_=filter_proc.communicate(d)
except:
sys.stderr.write('Running filter-contents %s:\n' % filter_cmd)
raise
filter_ret=filter_proc.poll()
if filter_ret:
raise subprocess.CalledProcessError(filter_ret,filter_cmd)

if plugins and plugins['file_data_filters']:
file_data = {'filename':filename,'file_ctx':file_ctx,'data':d}
for filter in plugins['file_data_filters']:
filter(file_data)
d=file_data['data']
filename=file_data['filename']
file_ctx=file_data['file_ctx']

wr('M %s inline %s' % (gitmode(manifest.flags(file)),
strip_leading_slash(filename)))
wr('data %d' % len(d)) # had some trouble with size()
Expand Down Expand Up @@ -198,7 +196,8 @@ def strip_leading_slash(filename):
return filename

def export_commit(ui,repo,revision,old_marks,max,count,authors,
branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='',filter_contents=None):
branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='',
plugins={}):
def get_branchname(name):
if brmap.has_key(name):
return brmap[name]
Expand All @@ -211,14 +210,24 @@ def get_branchname(name):
branch=get_branchname(branch)

parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0]
author = get_author(desc,user,authors)

if plugins and plugins['commit_message_filters']:
commit_data = {'branch': branch, 'parents': parents, 'author': author, 'desc': desc}
for filter in plugins['commit_message_filters']:
filter(commit_data)
branch = commit_data['branch']
parents = commit_data['parents']
author = commit_data['author']
desc = commit_data['desc']

if len(parents)==0 and revision != 0:
wr('reset refs/heads/%s' % branch)

wr('commit refs/heads/%s' % branch)
wr('mark :%d' % (revision+1))
if sob:
wr('author %s %d %s' % (get_author(desc,user,authors),time,timezone))
wr('author %s %d %s' % (author,time,timezone))
wr('committer %s %d %s' % (user,time,timezone))
wr('data %d' % (len(desc)+1)) # wtf?
wr(desc)
Expand Down Expand Up @@ -259,8 +268,8 @@ def get_branchname(name):
removed=[strip_leading_slash(x) for x in removed]

map(lambda r: wr('D %s' % r),removed)
export_file_contents(ctx,man,added,hgtags,fn_encoding,filter_contents)
export_file_contents(ctx,man,changed,hgtags,fn_encoding,filter_contents)
export_file_contents(ctx,man,added,hgtags,fn_encoding,plugins)
export_file_contents(ctx,man,changed,hgtags,fn_encoding,plugins)
wr()

return checkpoint(count)
Expand Down Expand Up @@ -396,7 +405,8 @@ def verify_heads(ui,repo,cache,force,branchesmap):

def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
authors={},branchesmap={},tagsmap={},
sob=False,force=False,hgtags=False,notes=False,encoding='',fn_encoding='',filter_contents=None):
sob=False,force=False,hgtags=False,notes=False,encoding='',fn_encoding='',
plugins={}):
def check_cache(filename, contents):
if len(contents) == 0:
sys.stderr.write('Warning: %s does not contain any data, this will probably make an incremental import fail\n' % filename)
Expand Down Expand Up @@ -438,7 +448,8 @@ def check_cache(filename, contents):
brmap={}
for rev in range(min,max):
c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap,
sob,brmap,hgtags,encoding,fn_encoding,filter_contents)
sob,brmap,hgtags,encoding,fn_encoding,
plugins)
if notes:
for rev in range(min,max):
c=export_note(ui,repo,rev,c,authors, encoding, rev == min and min != 0)
Expand Down Expand Up @@ -500,6 +511,10 @@ def bail(parser,opt):
help="Assume mappings are raw <key>=<value> lines")
parser.add_option("--filter-contents",dest="filter_contents",
help="Pipe contents of each exported file through FILTER_CONTENTS <file-path> <hg-hash> <is-binary>")
parser.add_option("--plugin-path", type="string", dest="pluginpath",
help="Additional search path for plugins ")
parser.add_option("--plugin", action="append", type="string", dest="plugins",
help="Add a plugin with the given init string <name=init>")

(options,args)=parser.parse_args()

Expand Down Expand Up @@ -538,13 +553,34 @@ def bail(parser,opt):
if options.fn_encoding!=None:
fn_encoding=options.fn_encoding

filter_contents=None
plugins=[]
if options.plugins!=None:
plugins+=options.plugins

if options.filter_contents!=None:
import shlex
filter_contents=shlex.split(options.filter_contents)
plugins+=['shell_filter_file_contents='+options.filter_contents]

plugins_dict={}
plugins_dict['commit_message_filters']=[]
plugins_dict['file_data_filters']=[]

if plugins and options.pluginpath:
sys.stderr.write('Using additional plugin path: ' + options.pluginpath + '\n')

for plugin in plugins:
split = plugin.split('=')
name, opts = split[0], '='.join(split[1:])
i = pluginloader.get_plugin(name,options.pluginpath)
sys.stderr.write('Loaded plugin ' + i['name'] + ' from path: ' + i['path'] +' with opts: ' + opts + '\n')
plugin = pluginloader.load_plugin(i).build_filter(opts)
if hasattr(plugin,'file_data_filter') and callable(plugin.file_data_filter):
plugins_dict['file_data_filters'].append(plugin.file_data_filter)
if hasattr(plugin, 'commit_message_filter') and callable(plugin.commit_message_filter):
plugins_dict['commit_message_filters'].append(plugin.commit_message_filter)

sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile,
options.headsfile, options.statusfile,
authors=a,branchesmap=b,tagsmap=t,
sob=options.sob,force=options.force,hgtags=options.hgtags,
notes=options.notes,encoding=encoding,fn_encoding=fn_encoding,filter_contents=filter_contents))
notes=options.notes,encoding=encoding,fn_encoding=fn_encoding,
plugins=plugins_dict))
2 changes: 2 additions & 0 deletions hg-fast-export.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ Options:
--mappings-are-raw Assume mappings are raw <key>=<value> lines
--filter-contents <cmd> Pipe contents of each exported file through <cmd>
with <file-path> <hg-hash> <is-binary> as arguments
--plugin <plugin=init> Add a plugin with the given init string (repeatable)
--plugin-path <plugin-path> Add an additional plugin lookup path
"
case "$1" in
-h|--help)
Expand Down
19 changes: 19 additions & 0 deletions pluginloader/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import os
import imp
PluginFolder = os.path.join(os.path.dirname(os.path.realpath(__file__)),"..","plugins")
MainModule = "__init__"

def get_plugin(name, plugin_path):
search_dirs = [PluginFolder]
if plugin_path:
search_dirs = [plugin_path] + search_dirs
for dir in search_dirs:
location = os.path.join(dir, name)
if not os.path.isdir(location) or not MainModule + ".py" in os.listdir(location):
continue
info = imp.find_module(MainModule, [location])
return {"name": name, "info": info, "path": location}
raise Exception("Could not find plugin with name " + name)

def load_plugin(plugin):
return imp.load_module(MainModule, *plugin["info"])
10 changes: 10 additions & 0 deletions plugins/branch_name_in_commit/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
## Branch Name in Commit Message

Mercurial has a much stronger notion of branches than Git,
and some parties may not wish to lose the branch information
during the migration to Git. You can use this plugin to either
prepend or append the branch name from the mercurial
commit into the commit message in Git.

To use the plugin, add
`--plugin branch_name_in_commit=(start|end)`.
14 changes: 14 additions & 0 deletions plugins/branch_name_in_commit/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
def build_filter(args):
return Filter(args)

class Filter:
def __init__(self, args):
if not args in ['start','end']:
raise Exception('Cannot have branch name anywhere but start and end')
self.pos = args

def commit_message_filter(self,commit_data):
if self.pos == 'start':
commit_data['desc'] = commit_data['branch'] + '\n' + commit_data['desc']
if self.pos == 'end':
commit_data['desc'] = commit_data['desc'] + '\n' + commit_data['branch']
9 changes: 9 additions & 0 deletions plugins/dos2unix/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
## Dos2unix filter

This plugin converts CRLF line ending to LF in text files in the repo.
It is recommended that you have a .gitattributes file that maintains
the usage of LF endings going forward, for after you have converted your
repository.

To use the plugin, add
`--plugin dos2unix`.
11 changes: 11 additions & 0 deletions plugins/dos2unix/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
def build_filter(args):
return Filter(args)

class Filter():
def __init__(self, args):
pass

def file_data_filter(self,file_data):
file_ctx = file_data['file_ctx']
if not file_ctx.isbinary():
file_data['data'] = file_data['data'].replace('\r\n', '\n')
30 changes: 30 additions & 0 deletions plugins/shell_filter_file_contents/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
## Shell Script File Filter

This plugin uses shell scripts in order to perform filtering of files.
If your preferred scripting is done via shell, this tool is for you.
Be noted, though, that this method can cause an order of magnitude slow
down. For small repositories, this wont be an issue.

To use the plugin, add
`--plugin shell_filter_file_contents=path/to/shell/script.sh`.
The filter script is supplied to the plugin option after the plugin name,
which is in turned passed to the plugin initialization. hg-fast-export
runs the filter for each exported file, pipes its content to the filter's
standard input, and uses the filter's standard output in place
of the file's original content. An example use of this feature
is to convert line endings in text files from CRLF to git's preferred LF,
although this task is faster performed using the native plugin.

The script is called with the following syntax:
`FILTER_CONTENTS <file-path> <hg-hash> <is-binary>`

```
-- Start of crlf-filter.sh --
#!/bin/sh
# $1 = pathname of exported file relative to the root of the repo
# $2 = Mercurial's hash of the file
# $3 = "1" if Mercurial reports the file as binary, otherwise "0"
if [ "$3" == "1" ]; then cat; else dos2unix; fi
-- End of crlf-filter.sh --
```
28 changes: 28 additions & 0 deletions plugins/shell_filter_file_contents/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#Pipe contents of each exported file through FILTER_CONTENTS <file-path> <hg-hash> <is-binary>"
import subprocess
import shlex
import sys
from mercurial import node

def build_filter(args):
return Filter(args)

class Filter:
def __init__(self, args):
self.filter_contents = shlex.split(args)

def file_data_filter(self,file_data):
d = file_data['data']
file_ctx = file_data['file_ctx']
filename = file_data['filename']
filter_cmd = self.filter_contents + [filename, node.hex(file_ctx.filenode()), '1' if file_ctx.isbinary() else '0']
try:
filter_proc = subprocess.Popen(filter_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
d, _ = filter_proc.communicate(d)
except:
sys.stderr.write('Running filter-contents %s:\n' % filter_cmd)
raise
filter_ret = filter_proc.poll()
if filter_ret:
raise subprocess.CalledProcessError(filter_ret, filter_cmd)
file_data['data'] = d

0 comments on commit b51c58d

Please sign in to comment.