Skip to contents

Takes a comparison object and filters it to include only bidirectional best hits, with the use of a second comparison, provided by other_direction. Both comparison objects must be filtered for best hits already (see best_hit).

Usage

bidirectional_best_hit(
  comparison,
  other_direction,
  group_by1 = "name1",
  group_by2 = "name2"
)

Arguments

comparison

A comparison object to filter.

other_direction

A comparison object that the comparison to filter is compared with.

group_by1

A character string referring to a column in comparison that holds its query names.

group_by2

A character string referring to a column in other_direction that holds its query names.

Value

A comparison object.

Details

The best hits from the first comparison are only kept when their query-subject combinations can also be found in the best hits from the second comparison. For example, take a best hit in the first comparison with the query name "geneA" and subject name "geneB". In the comparison provided by other_direction, the best hit for the query name "geneB" has to be the subject name "geneA". Only then is the hit kept as a bidirectional best hit. The query names are provided by group_by1 and group_by2, for the comparison to filter and the other comparison respectively.

Author

Mike Puijk

Examples

## Read example blastp results
infile1 <- system.file('extdata/blastp_example1.tab', package = 'genoPlotR')

## comparison before filtering for best hits
blast_comparison1 <- read_comparison_from_blast(infile1)
print_comparison(blast_comparison1)
#>     start1  end1 start2  end2          name1          name2 per_id aln_len
#>      <int> <int>  <int> <int>         <char>         <char>  <num>   <int>
#>  1:    117   502      1   385 HKIDPCCJ_00001 LABPAKCO_00001   58.7     387
#>  2:      1   376      1   379 HKIDPCCJ_00002 LABPAKCO_00002   56.5     379
#>  3:     19   651     12   645 HKIDPCCJ_00004 LABPAKCO_00005   74.6     634
#>  4:     23   655      8   644 HKIDPCCJ_00004 LABPAKCO_01957   51.4     642
#>  5:     11   811     10   812 HKIDPCCJ_00005 LABPAKCO_00006   62.8     803
#>  6:     11   810      6   809 HKIDPCCJ_00005 LABPAKCO_01958   40.0     817
#>  7:      1   172      1   190 HKIDPCCJ_00007 LABPAKCO_00009   55.8     197
#>  8:      1   172      1   164 HKIDPCCJ_00007 LABPAKCO_02342   50.0     174
#>  9:     93   535     67   509 HKIDPCCJ_00029 LABPAKCO_00168   28.7     453
#> 10:     87   521    107   546 HKIDPCCJ_00029 LABPAKCO_00977   26.3     452
#> 11:    328   529    335   539 HKIDPCCJ_00029 LABPAKCO_01459   33.2     205
#> 12:     88   534    100   548 HKIDPCCJ_00029 LABPAKCO_01460   23.7     451
#> 13:    304   498    312   502 HKIDPCCJ_00029 LABPAKCO_02399   31.3     201
#> 14:    328   521     28   227 HKIDPCCJ_00029 LABPAKCO_02526   29.8     208
#> 15:    328   519      4   214 HKIDPCCJ_00029 LABPAKCO_01272   29.1     213
#> 16:      1   637      1   644 HKIDPCCJ_00034 LABPAKCO_00103   56.1     645
#> 17:      2   210     11   270 HKIDPCCJ_00034 LABPAKCO_01390   31.5     260
#> 18:      8   303     12   290 HKIDPCCJ_00034 LABPAKCO_01699   30.1     306
#> 19:    100   531     78   511 HKIDPCCJ_00038 LABPAKCO_00168   24.1     456
#> 20:    250   513    267   544 HKIDPCCJ_00038 LABPAKCO_00977   26.1     284
#> 21:    267   513    290   533 HKIDPCCJ_00038 LABPAKCO_01460   27.6     254
#> 22:    325   503    331   513 HKIDPCCJ_00038 LABPAKCO_02399   28.1     185
#> 23:    109   473    105   460 HKIDPCCJ_00038 LABPAKCO_01612   22.5     374
#> 24:    359   501    368   516 HKIDPCCJ_00038 LABPAKCO_01459   29.6     152
#> 25:    320   514     22   226 HKIDPCCJ_00038 LABPAKCO_02526   25.8     209
#> 26:    323   485      2   187 HKIDPCCJ_00038 LABPAKCO_01272   22.1     204
#> 27:    451   514    331   398 HKIDPCCJ_00038 LABPAKCO_00394   32.4      68
#> 28:      8   338      4   334 HKIDPCCJ_00047 LABPAKCO_00854   54.1     333
#> 29:      4   336      1   333 HKIDPCCJ_00047 LABPAKCO_02361   27.1     336
#> 30:      1   442      1   443 HKIDPCCJ_00048 LABPAKCO_01328   33.8     444
#> 31:      1   438      1   440 HKIDPCCJ_00048 LABPAKCO_02752   32.4     442
#> 32:      1   379      1   378 HKIDPCCJ_00049 LABPAKCO_00839   72.5     382
#> 33:     15   348      9   310 HKIDPCCJ_00049 LABPAKCO_00753   32.2     342
#>     start1  end1 start2  end2          name1          name2 per_id aln_len
#>      mism  gaps   e_value bit_score direction
#>     <int> <int>     <num>     <num>     <num>
#>  1:   157     2 1.32e-153     440.0         1
#>  2:   162     1 1.18e-140     401.0         1
#>  3:   160     1  0.00e+00     946.0         1
#>  4:   298    11 9.19e-211     601.0         1
#>  5:   297     1  0.00e+00     989.0         1
#>  6:   460     7 4.55e-185     547.0         1
#>  7:    55     2  2.90e-56     172.0         1
#>  8:    75     2  8.61e-50     154.0         1
#>  9:   303    10  3.22e-49     174.0         1
#> 10:   304    13  2.30e-31     124.0         1
#> 11:   134     3  3.10e-30     120.0         1
#> 12:   338     6  4.45e-29     117.0         1
#> 13:   122     4  2.08e-22      97.1         1
#> 14:   124     6  2.37e-14      69.7         1
#> 15:   128     7  3.15e-14      71.2         1
#> 16:   274     5 7.15e-256     715.0         1
#> 17:   127     4  8.69e-31     124.0         1
#> 18:   177    10  5.54e-24     102.0         1
#> 19:   300     9  3.02e-28     114.0         1
#> 20:   184     8  3.67e-21      93.2         1
#> 21:   167     6  5.92e-19      86.3         1
#> 22:   125     2  9.99e-18      82.4         1
#> 23:   263     7  2.73e-17      80.9         1
#> 24:    95     3  1.32e-12      66.2         1
#> 25:   137     6  9.21e-11      58.9         1
#> 26:   100     7  6.56e-05      41.6         1
#> 27:    42     2  2.91e-04      39.7         1
#> 28:   149     3 6.03e-110     322.0         1
#> 29:   239     4  5.43e-29     111.0         1
#> 30:   291     3  2.02e-80     252.0         1
#> 31:   293     4  7.28e-74     235.0         1
#> 32:    98     3 5.42e-194     537.0         1
#> 33:   184    11  4.93e-43     149.0         1
#>      mism  gaps   e_value bit_score direction

## Filter for best hits and print results
bh_comparison1 <- best_hit(blast_comparison1)
print_comparison(bh_comparison1)
#>     start1  end1 start2  end2          name1          name2 per_id aln_len
#>      <int> <int>  <int> <int>         <char>         <char>  <num>   <int>
#>  1:    117   502      1   385 HKIDPCCJ_00001 LABPAKCO_00001   58.7     387
#>  2:      1   376      1   379 HKIDPCCJ_00002 LABPAKCO_00002   56.5     379
#>  3:     19   651     12   645 HKIDPCCJ_00004 LABPAKCO_00005   74.6     634
#>  4:     11   811     10   812 HKIDPCCJ_00005 LABPAKCO_00006   62.8     803
#>  5:      1   172      1   190 HKIDPCCJ_00007 LABPAKCO_00009   55.8     197
#>  6:     93   535     67   509 HKIDPCCJ_00029 LABPAKCO_00168   28.7     453
#>  7:      1   637      1   644 HKIDPCCJ_00034 LABPAKCO_00103   56.1     645
#>  8:    100   531     78   511 HKIDPCCJ_00038 LABPAKCO_00168   24.1     456
#>  9:      8   338      4   334 HKIDPCCJ_00047 LABPAKCO_00854   54.1     333
#> 10:      1   442      1   443 HKIDPCCJ_00048 LABPAKCO_01328   33.8     444
#> 11:      1   379      1   378 HKIDPCCJ_00049 LABPAKCO_00839   72.5     382
#>      mism  gaps   e_value bit_score direction
#>     <int> <int>     <num>     <num>     <num>
#>  1:   157     2 1.32e-153       440         1
#>  2:   162     1 1.18e-140       401         1
#>  3:   160     1  0.00e+00       946         1
#>  4:   297     1  0.00e+00       989         1
#>  5:    55     2  2.90e-56       172         1
#>  6:   303    10  3.22e-49       174         1
#>  7:   274     5 7.15e-256       715         1
#>  8:   300     9  3.02e-28       114         1
#>  9:   149     3 6.03e-110       322         1
#> 10:   291     3  2.02e-80       252         1
#> 11:    98     3 5.42e-194       537         1

## Repeat steps BLAST results in the other direction

infile2 <- system.file('extdata/blastp_example2.tab', package = 'genoPlotR')
blast_comparison2 <- read_comparison_from_blast(infile2)
bh_comparison2 <- best_hit(blast_comparison2)

## Filter for bidirectional best hits and print results
bbh_comparison1 <- bidirectional_best_hit(comparison = bh_comparison1,
                                         other_direction = bh_comparison2)
print_comparison(bbh_comparison1)
#>    start1  end1 start2  end2          name1          name2 per_id aln_len  mism
#>     <int> <int>  <int> <int>         <char>         <char>  <num>   <int> <int>
#> 1:    117   502      1   385 HKIDPCCJ_00001 LABPAKCO_00001   58.7     387   157
#> 2:      1   376      1   379 HKIDPCCJ_00002 LABPAKCO_00002   56.5     379   162
#> 3:     19   651     12   645 HKIDPCCJ_00004 LABPAKCO_00005   74.6     634   160
#> 4:     11   811     10   812 HKIDPCCJ_00005 LABPAKCO_00006   62.8     803   297
#> 5:      1   172      1   190 HKIDPCCJ_00007 LABPAKCO_00009   55.8     197    55
#> 6:      1   637      1   644 HKIDPCCJ_00034 LABPAKCO_00103   56.1     645   274
#> 7:      8   338      4   334 HKIDPCCJ_00047 LABPAKCO_00854   54.1     333   149
#> 8:      1   442      1   443 HKIDPCCJ_00048 LABPAKCO_01328   33.8     444   291
#> 9:      1   379      1   378 HKIDPCCJ_00049 LABPAKCO_00839   72.5     382    98
#>     gaps   e_value bit_score direction
#>    <int>     <num>     <num>     <num>
#> 1:     2 1.32e-153       440         1
#> 2:     1 1.18e-140       401         1
#> 3:     1  0.00e+00       946         1
#> 4:     1  0.00e+00       989         1
#> 5:     2  2.90e-56       172         1
#> 6:     5 7.15e-256       715         1
#> 7:     3 6.03e-110       322         1
#> 8:     3  2.02e-80       252         1
#> 9:     3 5.42e-194       537         1