<!--
Place this command file and all input files in the same directory
along with executable lam_conv and execute like this to
automatically convert
./lam_conv -b -c lamarc-converter-commands.xml
Or like this to explore in the GUI
./lam_conv -c lamarc-converter-commands.xml
-->
<lamarc-converter-cmd>
<!--
You can specify the lamarc input file you will produce
here. If not present, it defaults to infile.xml
-->
<outfile>lamarc-input.xml</outfile>
<!--
The comment below will be at the top of the outfile produced.
This is a useful way to distinguish different lamarc infiles
-->
<lamarc-header-comment>Example output for 3 chromosomes, some with multiple segments</lamarc-header-comment>
<!-- ********************************************************* -->
<!--
The <regions> section is where you specify both the type of
data you have and its relative location (and therefore
likeliness to be co-inherited).
-->
<regions>
<!--
Each region contains a specification of data types and
relative locations of data which are "close enough" to
each other to be modeled as co-inherited.
As a rule of thumb, data samples should be in the same
region if:
(a) they are within 1/1000 of a centimorgan, or
(b) they are within 1 centimorgan and you plan
to estimate recombination.
-->
<region>
<!--
all region, segment, and population names must be unique
-->
<name>chrom1</name>
<!--
The effective population size defaults to 1. You can
probably ignore it unless you're working with
sex chromosomes or mixing mtDna with chromosomal
-->
<effective-popsize>1</effective-popsize>
<!--
Within a region, different segments will occur where
(a) data types are different,
(b) mutation rates are different, or
(c) the samples are separated by unsampled stretches
of the genome
-->
<segments>
<!--
The region for chrom 1 is not terribly interesting.
It contains only a single stretch of DNA data,
the easiest and simplest to model in lamarc.
Allowed datatypes are "snp" "dna" "microsat" and "kallele"
-->
<segment datatype="dna">
<name>chrom1-segment</name>
<!--
For DNA data, the number of markers is the number
of sites in the data.
-->
<markers>9</markers>
</segment>
</segments>
</region>
<region>
<!--
Region "chrom2" models two sets of snp data on the
same chromosome, separated by other unknown data.
-->
<name>chrom2</name>
<effective-popsize>1</effective-popsize>
<segments>
<!--
A SNP segment requires that we provide more information
in order to model it correctly.
-->
<segment datatype="snp">
<name>chrom2-segment1</name>
<!--
For SNP data, the number of markers is the number
of SNP sites in the data.
-->
<markers>5</markers>
<!--
Using a region-wide scale, the position of this
segment within region chrom2. Lamarc needs this
information to model recombination events occuring
between segments.
-->
<map-position>1000</map-position>
<!--
where you started scanning for SNPS, assuming
"1" in segment co-ordinates is identical to
<map-position> in region co-ordinates
-->
<first-position-scanned>-5</first-position-scanned>
<!--
total data length (in nucleotides) scanned, staring
at <first-position-scanned>
-->
<length>500</length>
<!--
relative locations of snp markers using
segment coordinates.
-->
<locations> 2 88 125 173 443 </locations>
</segment>
<segment datatype="snp">
<name>chrom2-segment2</name>
<markers>7</markers>
<map-position>5000</map-position>
<first-position-scanned>-5</first-position-scanned>
<length>250</length>
<locations> 13 19 35 77 102 112 204</locations>
</segment>
</segments>
</region>
<region>
<!--
Here we have a microsat next to a SNP. The SNP was found
in a 100-base region at the 23rd site after the microsat
-->
<name>chrom3</name>
<segments>
<segment datatype="microsat">
<name>chrom3-micro</name>
<markers>1</markers>
<map-position>500</map-position>
<first-position-scanned>1</first-position-scanned>
</segment>
<segment datatype="snp">
<name>chrom3-snp</name>
<markers>1</markers>
<map-position>501</map-position>
<length>100</length>
<locations> 23 </locations>
<first-position-scanned>1</first-position-scanned>
</segment>
</segments>
</region>
</regions>
<!-- ********************************************************* -->
<!--
If you want to make sure your populations have nice names,
here is the place to do it.
-->
<populations>
<population>North</population>
<population>South</population>
</populations>
<!-- ********************************************************* -->
<!--
You may need to include the <individuals> tag if you:
(a) have samples which include unresolved haplotypes, or
(b) you are combining both allelic and nucleotide
segments in a single region, or
(c) you are doing trait mapping
-->
<individuals>
<individual>
<!--
if you have specified diploid (or higher ploidy) data
in a migrate microsat or kallele file, your individual
names are probably the sequence name labels from that file
-->
<name>n_ind0</name>
<!--
if you have dna or snp data from a phylip or migrate
file, your sample names are probably the sequence
name labels from that file
-->
<sample><name>n_ind0_a</name></sample>
<sample><name>n_ind0_b</name></sample>
<!--
use the <phase> tag to indicate when you don't know
which haploid (or greater ploidy) sample has which
marker. The scale here is the same as the 'locations'
tag, i.e. relative to the numbering system in the
segment in question. The first valid position is the
first-position-scanned value, and can be as higher than
that as the length of the segment.
The specification below indicates that for
this individual, we're not sure which of the two
haplotypes we should assign the first and second
data sample values to.
-->
<phase>
<segment-name>chrom2-segment2</segment-name>
<unresolved-markers> 13 19 </unresolved-markers>
</phase>
</individual>
<individual>
<name>n_ind1</name>
<sample><name>n_ind1_a</name></sample>
<sample><name>n_ind1_b</name></sample>
</individual>
<individual>
<name>n_ind2</name>
<sample><name>n_ind2_a</name></sample>
<sample><name>n_ind2_b</name></sample>
</individual>
<individual>
<name>s_ind0</name>
<sample><name>s_ind0_a</name></sample>
<sample><name>s_ind0_b</name></sample>
</individual>
<individual>
<name>s_ind1</name>
<sample><name>s_ind1_a</name></sample>
<sample><name>s_ind1_b</name></sample>
</individual>
</individuals>
<!-- ********************************************************* -->
<!--
Use the <infiles> tag to tell the converter how your data
corresponds to the <region> and <segment> elements
-->
<infiles>
<!--
All attributes given for the <infile> tag are required.
The legal values are given below
format : "migrate", "phylip"
datatype : "dna", "snp", "kallele", "microsat"
sequence-alignment : "sequential" or "interleaved"
-->
<infile format="migrate" datatype="dna" sequence-alignment="sequential">
<!--
File name is relative to the directory the converter
was invoked from
-->
<name>chrom1.mig</name>
<!--
The <population-matching> tag tells the converter how
to assign data samples to populations.
legal types are:
"single" : assign all data to the single population
whose name is enclosed within this tag
"byList" : a list of population names appears, enclosed
in <population-name> tags. Assign populations
in the file to the named populations in order
"byName" : use the name in the comment of the infile
-->
<population-matching type="byName"/>
<!--
The <segments-matching> tag tells the converter how
to assign data samples to segments.
legal types are:
"single" : assign all data to the single segment
whose name is enclosed within this tag
"byList" : a list of segment names appears, enclosed
in <segment-name> tags. Assign segments
in the file to the named segments in order
-->
<segments-matching type="byList">
<!-- assigned to segments from input file in the order given here -->
<segment-name>chrom1-segment</segment-name>
</segments-matching>
</infile>
<infile format="migrate" datatype="snp" sequence-alignment="sequential">
<name>chrom2.mig</name>
<population-matching type="byName"/>
<segments-matching type="byList">
<segment-name>chrom2-segment1</segment-name>
<segment-name>chrom2-segment2</segment-name>
</segments-matching>
</infile>
<!--
note that while both segments in chrom2 could be specified
in a single file, the segments of chrom3 are in different
files since they have different data types.
-->
<infile format="migrate" datatype="snp" sequence-alignment="sequential">
<name>chrom3snp.mig</name>
<population-matching type="byName"/>
<segments-matching type="byList">
<segment-name>chrom3-snp</segment-name>
</segments-matching>
</infile>
<infile format="migrate" datatype="microsat" sequence-alignment="sequential">
<name>chrom3microsat.mig</name>
<population-matching type="byName"/>
<segments-matching type="byList">
<segment-name>chrom3-micro</segment-name>
</segments-matching>
</infile>
</infiles>
</lamarc-converter-cmd>